################################################################################# #Association of BCG vaccination policy with prevalence and mortality of COVID-19# #Codes by Giovanni Sala, Fujita University, 3/30/2020) # ################################################################################# library(openxlsx) library(bestNormalize) library(ggplot2) library(rsq) db <- file.choose() #select Supplementary Table.xlsx CV19<-read.xlsx(db) colnames(CV19) dim(CV19) CV19$BCG.type <- CV19$BCG.policy CV19$temp <- (CV19$Feb.temperature + CV19$Mar.temperature)/2 DATA <- CV19[, which (colnames(CV19) %in% c("Country", #"Continent", "TOT.Cases.1M.pop", "TOT.Death.1M.pop","temp","Life.Expectancy","BCG.type","Current.BCG.vaccination"))] colnames(DATA) #Dummy coding DATA$A <- ifelse(DATA$BCG.type == "A", 1, 0) DATA$B <- ifelse(DATA$BCG.type == "B", 1, 0) DATA$C <- ifelse(DATA$BCG.type == "C", 1, 0) DATA$DC_ratio <- DATA$TOT.Death.1M.pop/DATA$TOT.Cases.1M.pop DATA.CASES <- DATA[, which (colnames(DATA) %in% c("Country", #"Continent", "TOT.Cases.1M.pop", #"TOT.Death.1M.pop", "temp","Life.Expectancy","BCG.type","Current.BCG.vaccination","A","B","C"))] DATA.CASES.COMPLETE <- na.omit(DATA.CASES) dim(DATA.CASES.COMPLETE) DATA.DEATHS <- DATA[, which (colnames(DATA) %in% c("Country", #"Continent", #"TOT.Cases.1M.pop", "TOT.Death.1M.pop","temp","Life.Expectancy","BCG.type","Current.BCG.vaccination","A","B","C", "DC_ratio"))] DATA.DEATHS.COMPLETE <- na.omit(DATA.DEATHS) dim(DATA.DEATHS.COMPLETE) #Descriptives mean(DATA.CASES.COMPLETE$TOT.Cases.1M.pop, na.rm = TRUE) mean(DATA.CASES.COMPLETE$Life.Expectancy, na.rm = TRUE) mean(DATA.CASES.COMPLETE$temp, na.rm = TRUE) sd(DATA.CASES.COMPLETE$TOT.Cases.1M.pop, na.rm = TRUE) sd(DATA.CASES.COMPLETE$Life.Expectancy, na.rm = TRUE) sd(DATA.CASES.COMPLETE$temp, na.rm = TRUE) quantile(DATA.CASES.COMPLETE$TOT.Cases.1M.pop, na.rm = TRUE) quantile(DATA.CASES.COMPLETE$Life.Expectancy, na.rm = TRUE) quantile(DATA.CASES.COMPLETE$temp, na.rm = TRUE) sum(DATA.CASES.COMPLETE$A) sum(DATA.CASES.COMPLETE$B) sum(DATA.CASES.COMPLETE$C) mean(DATA.DEATHS.COMPLETE$TOT.Death.1M.pop, na.rm = TRUE) mean(DATA.DEATHS.COMPLETE$Life.Expectancy, na.rm = TRUE) mean(DATA.DEATHS.COMPLETE$temp, na.rm = TRUE) sd(DATA.DEATHS.COMPLETE$TOT.Death.1M.pop, na.rm = TRUE) sd(DATA.DEATHS.COMPLETE$Life.Expectancy, na.rm = TRUE) sd(DATA.DEATHS.COMPLETE$temp, na.rm = TRUE) quantile(DATA.DEATHS.COMPLETE$TOT.Death.1M.pop, na.rm = TRUE) quantile(DATA.DEATHS.COMPLETE$Life.Expectancy, na.rm = TRUE) quantile(DATA.DEATHS.COMPLETE$temp, na.rm = TRUE) sum(DATA.DEATHS.COMPLETE$A) sum(DATA.DEATHS.COMPLETE$B) sum(DATA.DEATHS.COMPLETE$C) #Normalization CASES BN_TOT.Cases.1M.pop <- orderNorm(DATA.CASES.COMPLETE$TOT.Cases.1M.pop) BN_TOT.Cases.1M.pop DATA.CASES.COMPLETE$TOT.Cases.1M.pop_N <- predict(BN_TOT.Cases.1M.pop) x <- predict(BN_TOT.Cases.1M.pop, newdata = DATA.CASES.COMPLETE$TOT.Cases.1M.pop_N, inverse = TRUE) all.equal(x, DATA.CASES.COMPLETE$TOT.Cases.1M.pop) hist(DATA.CASES.COMPLETE$TOT.Cases.1M.pop_N) BN_Life.Expectancy <- orderNorm(DATA.CASES.COMPLETE$Life.Expectancy) BN_Life.Expectancy DATA.CASES.COMPLETE$Life.Expectancy_N <- predict(BN_Life.Expectancy) x <- predict(BN_Life.Expectancy, newdata = DATA.CASES.COMPLETE$Life.Expectancy_N, inverse = TRUE) all.equal(x, DATA.CASES.COMPLETE$Life.Expectancy) hist(DATA.CASES.COMPLETE$Life.Expectancy_N) BN_temp <- orderNorm(DATA.CASES.COMPLETE$temp) BN_temp DATA.CASES.COMPLETE$temp_N <- predict(BN_temp) x <- predict(BN_temp, newdata = DATA.CASES.COMPLETE$temp_N, inverse = TRUE) all.equal(x, DATA.CASES.COMPLETE$temp) hist(DATA.CASES.COMPLETE$temp_N) #Normalization DEATHS BN_TOT.Death.1M.pop <- orderNorm(DATA.DEATHS.COMPLETE$TOT.Death.1M.pop) BN_TOT.Death.1M.pop DATA.DEATHS.COMPLETE$TOT.Death.1M.pop_N <- predict(BN_TOT.Death.1M.pop) x <- predict(BN_TOT.Death.1M.pop, newdata = DATA.DEATHS.COMPLETE$TOT.Death.1M.pop_N, inverse = TRUE) all.equal(x, DATA.DEATHS.COMPLETE$TOT.Death.1M.pop) hist(DATA.DEATHS.COMPLETE$TOT.Death.1M.pop_N) BN_Life.Expectancy <- orderNorm(DATA.DEATHS.COMPLETE$Life.Expectancy) BN_Life.Expectancy DATA.DEATHS.COMPLETE$Life.Expectancy_N <- predict(BN_Life.Expectancy) x <- predict(BN_Life.Expectancy, newdata = DATA.DEATHS.COMPLETE$Life.Expectancy_N, inverse = TRUE) all.equal(x, DATA.DEATHS.COMPLETE$Life.Expectancy) hist(DATA.DEATHS.COMPLETE$Life.Expectancy_N) BN_temp <- orderNorm(DATA.DEATHS.COMPLETE$temp) BN_temp DATA.DEATHS.COMPLETE$temp_N <- predict(BN_temp) x <- predict(BN_temp, newdata = DATA.DEATHS.COMPLETE$temp_N, inverse = TRUE) all.equal(x, DATA.DEATHS.COMPLETE$temp) hist(DATA.DEATHS.COMPLETE$temp_N) BN_DC_ratio <- orderNorm(DATA.DEATHS.COMPLETE$DC_ratio) BN_DC_ratio DATA.DEATHS.COMPLETE$DC_ratio_N <- predict(BN_DC_ratio) x <- predict(BN_DC_ratio, newdata = DATA.DEATHS.COMPLETE$DC_ratio_N, inverse = TRUE) all.equal(x, DATA.DEATHS.COMPLETE$DC_ratio) hist(DATA.DEATHS.COMPLETE$DC_ratio_N) #correlation matrix DATA.CASES.COMPLETE.CORR <- DATA.CASES.COMPLETE[, which (colnames(DATA.CASES.COMPLETE) %in% c( "TOT.Cases.1M.pop_N","temp_N","Life.Expectancy_N"))] cor(DATA.CASES.COMPLETE.CORR) #correlation matrix DATA.DEATHS.COMPLETE.CORR <- DATA.DEATHS.COMPLETE[, which (colnames(DATA.DEATHS.COMPLETE) %in% c( "TOT.Death.1M.pop_N","temp_N","Life.Expectancy_N"))] cor(DATA.DEATHS.COMPLETE.CORR) ###Linear models### ##Cases per million lm_TOT.Cases_BC_0 = lm(TOT.Cases.1M.pop_N ~ Life.Expectancy_N + temp_N + 0*B + 0*C +1, data = DATA.CASES.COMPLETE) #lm_TOT.Cases_BC_1 = lm(TOT.Cases.1M.pop_N ~ B + C + Life.Expectancy_N + temp_N, data = DATA.CASES.COMPLETE) lm_TOT.Cases_BC_1 = lm(TOT.Cases.1M.pop_N ~ BCG.type + Life.Expectancy_N + temp_N, data = DATA.CASES.COMPLETE) summary(lm_TOT.Cases_BC_0) summary(lm_TOT.Cases_BC_1) anova(lm_TOT.Cases_BC_0,lm_TOT.Cases_BC_1) (summary(lm_TOT.Cases_BC_1)$adj.r.squared - summary(lm_TOT.Cases_BC_0)$adj.r.squared)*100 rsq.partial(lm_TOT.Cases_BC_1, adj=TRUE) #above 78 years life expectancy DATA.CASES.COMPLETE_78 <- subset(DATA.CASES.COMPLETE, Life.Expectancy>78) aggregate(DATA.CASES.COMPLETE_78$Life.Expectancy~DATA.CASES.COMPLETE_78$BCG.type, FUN=mean) aggregate(DATA.CASES.COMPLETE_78$Life.Expectancy~DATA.CASES.COMPLETE_78$BCG.type, FUN=sd) bub.CASES_0<-lm(TOT.Cases.1M.pop_N ~ 0*B + 0*C + temp_N + 1,data = DATA.CASES.COMPLETE_78) #bub.CASES_1<-lm(TOT.Cases.1M.pop_N ~ B + C + temp_N + 1,data = DATA.CASES.COMPLETE_78) bub.CASES_1<-lm(TOT.Cases.1M.pop_N ~ BCG.type + temp_N + 1,data = DATA.CASES.COMPLETE_78) summary(bub.CASES_0) summary(bub.CASES_1) (summary(bub.CASES_1)$adj.r.squared - summary(bub.CASES_0)$adj.r.squared)*100 rsq.partial(bub.CASES_1, adj=TRUE) sum(rsq.partial(bub.CASES_1, adj=TRUE)$partial.rsq) #residual checks #resid(lm_TOT.Cases_BC_1) #List of residuals plot(density(resid(lm_TOT.Cases_BC_1))) #A density plot qqnorm(resid(lm_TOT.Cases_BC_1)) # A quantile normal plot - good for checking normality qqline(resid(lm_TOT.Cases_BC_1)) #plots c = ggplot(DATA.CASES.COMPLETE, aes(x=BCG.type, y=TOT.Cases.1M.pop_N, fill=BCG.type)) c = c + geom_boxplot() c = c + theme(axis.title.x=element_blank(), axis.text.x=element_blank(), axis.ticks.x=element_blank(), axis.text.y = element_text( size = 14)) c = c + geom_jitter(shape=16, position=position_jitter(0.1)) c = c + scale_fill_discrete( name="BCG type", labels = c("A: Currently administering", "B: Used to administer", "C: Never administered")) c = c + theme(legend.text=element_text(size=14)) c = c + ylab("Standardaized rate of COVID-19 Cases") + xlab("") c = c + ggtitle("Total COVID-19 cases per 1 million population") c = c + theme(text = element_text(size = 14)) #c = c + geom_boxplot(lwd=1.2) c ggsave(file="COVID19cases_boxplot.png", plot=c, dpi=300, width=7, height=5) d2 = ggplot(DATA.CASES.COMPLETE, aes(x=Life.Expectancy , y=TOT.Cases.1M.pop_N, color=BCG.type)) +geom_point() d2 = d2 + stat_smooth(method="lm",size = 1, colour="black") d2 = d2 + scale_color_discrete( name="BCG type", labels = c("A: Currently administering", "B: Used to administer", "C: Never administered")) d2 = d2 + theme(legend.text=element_text(size=14)) d2 = d2 + ylab("Standardaized COVID-19 cases") + xlab("Life Expectancy") d2 = d2 + ggtitle("COVID-19 cases per 1 million population by life expectancy") d2 = d2 + geom_vline(xintercept=78, linetype="dotted",size = 1) d2 = d2 + theme(text = element_text(size = 14)) d2 ggsave(file="scatterCOVID19cases_life_exp.png", plot=d2, dpi=300, width=7, height=5) d2 = ggplot(DATA.CASES.COMPLETE, aes(x=temp , y=TOT.Cases.1M.pop_N, color=BCG.type)) +geom_point() d2 = d2 + stat_smooth(method="lm",size = 1, colour="black") d2 = d2 + scale_color_discrete( name="BCG type", labels = c("A: Currently administering", "B: Used to administer", "C: Never administered")) d2 = d2 + theme(legend.text=element_text(size=14)) d2 = d2 + ylab("Standardaized COVID-19 cases") + xlab("Temperature") d2 = d2 + ggtitle("COVID-19 cases per 1 million population by temperature") #d2 = d2 + geom_vline(xintercept=78, linetype="dotted",size = 1) d2 = d2 + theme(text = element_text(size = 14)) d2 ggsave(file="scatterCOVID19cases_temp.png", plot=d2, dpi=300, width=7, height=5) c = ggplot(DATA.CASES.COMPLETE_78, aes(x=BCG.type, y=TOT.Cases.1M.pop_N, fill=BCG.type)) c = c + geom_boxplot() c = c + theme(axis.title.x=element_blank(), axis.text.x=element_blank(), axis.ticks.x=element_blank(), axis.text.y = element_text( size = 14)) c = c + geom_jitter(shape=16, position=position_jitter(0.1)) c = c + scale_fill_discrete( name="BCG type", labels = c("A: Currently administering", "B: Used to administer", "C: Never administered")) c = c + theme(legend.text=element_text(size=14)) c = c + ylab("Standardaized rate of COVID-19 Cases") + xlab("") c = c + ggtitle("Total COVID-19 cases in countries with life expectancy >78") c = c + theme(text = element_text(size = 14)) #c = c + geom_boxplot(lwd=1.2) c ggsave(file="COVID19cases_boxplot_78.png", plot=c, dpi=300, width=7, height=5) ##Deaths per million lm_TOT.Death_BC_0 = lm(TOT.Death.1M.pop_N ~ Life.Expectancy_N + temp_N + 0*B + 0*C +1, data = DATA.DEATHS.COMPLETE) #lm_TOT.Death_BC_1 = lm(TOT.Death.1M.pop_N ~ B + C + Life.Expectancy_N + temp_N, data = DATA.DEATHS.COMPLETE) lm_TOT.Death_BC_1 = lm(TOT.Death.1M.pop_N ~ BCG.type + Life.Expectancy_N + temp_N, data = DATA.DEATHS.COMPLETE) summary(lm_TOT.Death_BC_0) summary(lm_TOT.Death_BC_1) anova(lm_TOT.Death_BC_0,lm_TOT.Death_BC_1) (summary(lm_TOT.Death_BC_1)$adj.r.squared - summary(lm_TOT.Death_BC_0)$adj.r.squared)*100 rsq.partial(lm_TOT.Death_BC_1, adj=TRUE) DATA.DEATHS.COMPLETE_78 <- subset(DATA.DEATHS.COMPLETE, Life.Expectancy>78) bub.DEATHS_0<-lm(TOT.Death.1M.pop_N~ 0*B + 0*C + temp_N + 1,data = DATA.DEATHS.COMPLETE_78) #bub.DEATHS_1<-lm(TOT.Death.1M.pop_N ~ B + C + temp_N + 1,data = DATA.DEATHS.COMPLETE_78) bub.DEATHS_1<-lm(TOT.Death.1M.pop_N ~ BCG.type + temp_N + 1,data = DATA.DEATHS.COMPLETE_78) summary(bub.DEATHS_0) summary(bub.DEATHS_1) (summary(bub.DEATHS_1)$adj.r.squared - summary(bub.DEATHS_0)$adj.r.squared)*100 rsq.partial(bub.DEATHS_1, adj=TRUE) #residual checks #resid(lm_TOT.Death_BC_1) #List of residuals plot(density(resid(lm_TOT.Death_BC_1))) #A density plot qqnorm(resid(lm_TOT.Death_BC_1)) # A quantile normal plot - good for checking normality qqline(resid(lm_TOT.Death_BC_1)) #plots c = ggplot(DATA.DEATHS.COMPLETE, aes(x=BCG.type, y=TOT.Death.1M.pop_N, fill=BCG.type)) c = c + geom_boxplot() c = c + theme(axis.title.x=element_blank(), axis.text.x=element_blank(), axis.ticks.x=element_blank(), axis.text.y = element_text( size = 14)) c = c + geom_jitter(shape=16, position=position_jitter(0.1)) c = c + scale_fill_discrete( name="BCG type", labels = c("A: Currently administering", "B: Used to administer", "C: Never administered")) c = c + theme(legend.text=element_text(size=14)) c = c + ylab("Standardaized rate of COVID-19 deaths") + xlab("") c = c + ggtitle("Total COVID-19 deaths per 1 million population") c = c + theme(text = element_text(size = 14)) #c = c + geom_boxplot(lwd=1.2) c ggsave(file="COVID19deaths_boxplot.png", plot=c, dpi=300, width=7, height=5) d2 = ggplot(DATA.DEATHS.COMPLETE, aes(x=Life.Expectancy , y=TOT.Death.1M.pop_N, color=BCG.type)) +geom_point() d2 = d2 + stat_smooth(method="lm",size = 1, colour="black") d2 = d2 + scale_color_discrete( name="BCG type", labels = c("A: Currently administering", "B: Used to administer", "C: Never administered")) d2 = d2 + theme(legend.text=element_text(size=14)) d2 = d2 + ylab("Standardaized COVID-19 deaths") + xlab("Life Expectancy") d2 = d2 + ggtitle("COVID-19 deaths per 1 million population by life expectancy") d2 = d2 + geom_vline(xintercept=78, linetype="dotted",size = 1) d2 = d2 + theme(text = element_text(size = 14)) d2 ggsave(file="scatterCOVID19deaths_life_exp.png", plot=d2, dpi=300, width=7, height=5) d2 = ggplot(DATA.DEATHS.COMPLETE, aes(x=temp , y=TOT.Death.1M.pop_N, color=BCG.type)) +geom_point() d2 = d2 + stat_smooth(method="lm",size = 1, colour="black") d2 = d2 + scale_color_discrete( name="BCG type", labels = c("A: Currently administering", "B: Used to administer", "C: Never administered")) d2 = d2 + theme(legend.text=element_text(size=14)) d2 = d2 + ylab("Standardaized COVID-19 deaths") + xlab("Temperature") d2 = d2 + ggtitle("COVID-19 deaths per 1 million population by temperature") #d2 = d2 + geom_vline(xintercept=78, linetype="dotted",size = 1) d2 = d2 + theme(text = element_text(size = 14)) d2 ggsave(file="scatterCOVID19deaths_temp.png", plot=d2, dpi=300, width=7, height=5) c = ggplot(DATA.DEATHS.COMPLETE_78, aes(x=BCG.type, y=TOT.Death.1M.pop_N, fill=BCG.type)) c = c + geom_boxplot() c = c + theme(axis.title.x=element_blank(), axis.text.x=element_blank(), axis.ticks.x=element_blank(), axis.text.y = element_text( size = 14)) c = c + geom_jitter(shape=16, position=position_jitter(0.1)) c = c + scale_fill_discrete( name="BCG type", labels = c("A: Currently administering", "B: Used to administer", "C: Never administered")) c = c + theme(legend.text=element_text(size=14)) c = c + ylab("Standardaized rate of COVID-19 Cases") + xlab("") c = c + ggtitle("Total COVID-19 deaths in countries with life expectancy >78") c = c + theme(text = element_text(size = 14)) #c = c + geom_boxplot(lwd=1.2) c ggsave(file="COVID19deaths_boxplot_78.png", plot=c, dpi=300, width=7, height=5) ##Deaths ratio lm_TOT.Death_ratio_0 = lm(DC_ratio_N ~ Life.Expectancy_N + temp_N + 0*B + 0*C +1, data = DATA.DEATHS.COMPLETE) #lm_TOT.Death_ratio_1 = lm(DC_ratio_N ~ B + C + Life.Expectancy_N + temp_N, data = DATA.DEATHS.COMPLETE) lm_TOT.Death_ratio_1 = lm(DC_ratio_N ~ BCG.type + Life.Expectancy_N + temp_N, data = DATA.DEATHS.COMPLETE) summary(lm_TOT.Death_ratio_0) summary(lm_TOT.Death_ratio_1) anova(lm_TOT.Death_ratio_0,lm_TOT.Death_ratio_1) (summary(lm_TOT.Death_ratio_1)$adj.r.squared - summary(lm_TOT.Death_ratio_0)$adj.r.squared)*100 rsq.partial(lm_TOT.Death_ratio_1, adj=TRUE) #residual checks #resid(lm_TOT.Death_ratio_1) #List of residuals plot(density(resid(lm_TOT.Death_ratio_1))) #A density plot qqnorm(resid(lm_TOT.Death_ratio_1)) # A quantile normal plot - good for checking normality qqline(resid(lm_TOT.Death_ratio_1)) #Chisquare test on growth rates M <- matrix(c(26,18,5, 1,9,9), ncol=2 ) # merging Groups B and C M_78 <- matrix(c(15,6,1, 1,8,9), ncol=2 ) # merging Groups B and C M<-t(M) chisq.test(M) chisq.test(M, simulate.p.value = TRUE) M_78<-t(M_78) chisq.test(M_78) chisq.test(M_78, simulate.p.value = TRUE)