rm(list = ls(all.names = TRUE)) library(Amelia) library(bestNormalize) library(openxlsx) library(lubridate) library(purrr) library(dplyr) library(nlme) library(rsq) library(lm.beta) library(tidyverse) library(caret) library(leaps) library(ggplot2) library(reghelper) library(ggpubr) library(car) library(missMDA) library(psych) setwd("C:/Users/user/Dropbox/BCG-COVID-19/Ver 3/TF_SG") CV19 <- read.xlsx("Table_S1-9.xlsx", sheet=3) DATA <- CV19[, which (colnames(CV19) %in% c( "Country_code", "BCG.TB", "m_cases", "r_cases", "popData2018.x", #"Positive.cases.per.test", "temp", "BCG.type","Current.BCG.vaccination", "prevalence_overweight_2012", "Fraction_urban_population", "Population_ages_65_and_above_2018_percent", "TB.incidents.per.10.thosand.mean_2000_2018", "TB.Incidents.100"))] DATA_COMPLETE <- na.omit(DATA) dim(DATA_COMPLETE) DATA_COMPLETE$pop2018_N <- predict(orderNorm(DATA_COMPLETE$popData2018.x)) hist(DATA_COMPLETE$pop2018_N) DATA_COMPLETE$Ob_N <- predict(orderNorm(DATA_COMPLETE$prevalence_overweight_2012)) hist(DATA_COMPLETE$Ob_N) DATA_COMPLETE$TB_Z <- scale(DATA_COMPLETE$TB.incidents.per.10.thosand.mean_2000_2018)[,1] hist(DATA_COMPLETE$TB_Z) DATA_COMPLETE$temp_N <- predict(orderNorm(DATA_COMPLETE$temp)) hist(DATA_COMPLETE$temp_N) DATA_COMPLETE$frac_N <- predict(orderNorm(DATA_COMPLETE$Fraction_urban_population)) hist(DATA_COMPLETE$frac_N) DATA_COMPLETE$age_N <- predict(orderNorm(DATA_COMPLETE$Population_ages_65_and_above_2018_percent)) hist(DATA_COMPLETE$age_N) DATA_COMPLETE$Yy <- ifelse(DATA_COMPLETE$BCG.TB == "Y_y", 1, 0) DATA_COMPLETE$Yn <- ifelse(DATA_COMPLETE$BCG.TB == "Y_n", 1, 0) DATA_COMPLETE$Nn <- ifelse(DATA_COMPLETE$BCG.TB == "N_n", 1, 0) DATA_d <- CV19[, which (colnames(CV19) %in% c( "Country_code", "BCG.TB", "m_deaths", "r_deaths", "popData2018.x", #"Positive.cases.per.test", "temp", "BCG.type","Current.BCG.vaccination", "prevalence_overweight_2012", "Fraction_urban_population", "Population_ages_65_and_above_2018_percent", "TB.incidents.per.10.thosand.mean_2000_2018", "TB.Incidents.100"))] DATA_COMPLETE_d <- na.omit(DATA_d) dim(DATA_COMPLETE_d) DATA_COMPLETE_d$pop2018_N <- predict(orderNorm(DATA_COMPLETE_d$popData2018.x)) DATA_COMPLETE_d$Ob_N <- predict(orderNorm(DATA_COMPLETE_d$prevalence_overweight_2012)) DATA_COMPLETE_d$TB_Z <- scale(DATA_COMPLETE_d$TB.incidents.per.10.thosand.mean_2000_2018)[,1] DATA_COMPLETE_d$temp_N <- predict(orderNorm(DATA_COMPLETE_d$temp)) DATA_COMPLETE_d$frac_N <- predict(orderNorm(DATA_COMPLETE_d$Fraction_urban_population)) DATA_COMPLETE_d$age_N <- predict(orderNorm(DATA_COMPLETE_d$Population_ages_65_and_above_2018_percent)) ###Analysis### DATA_COMPLETE$m_cases_Z <- scale(DATA_COMPLETE$m_cases)[,1] summary(DATA_COMPLETE$r_cases) cases_gr_lm <- lm(m_cases ~ Current.BCG.vaccination + TB_Z + Ob_N + pop2018_N + temp_N + age_N + frac_N , weights=DATA_COMPLETE$r_cases, data = DATA_COMPLETE) summary(cases_gr_lm, corr=T) shapiro.test(resid(cases_gr_lm)) #List of residuals plot(density(resid(cases_gr_lm))) #A density plot rsq.partial(cases_gr_lm, adj=TRUE) lm.beta(cases_gr_lm) Anova(cases_gr_lm) models_cases_gr <- regsubsets(m_cases ~ Current.BCG.vaccination + TB_Z + Ob_N + temp_N + age_N + frac_N + pop2018_N, weights=DATA_COMPLETE$r_cases, data = DATA_COMPLETE, nvmax = 6) summary(models_cases_gr) res.sum <- summary(models_cases_gr) data.frame( Adj.R2 = which.max(res.sum$adjr2), CP = which.min(res.sum$cp), BIC = which.min(res.sum$bic) ) coef(models_cases_gr, 1:5) vcov(models_cases_gr, 5) cases_gr_lm_3 <- lm(m_cases ~ Current.BCG.vaccination + pop2018_N + temp_N, weights=DATA_COMPLETE$r_cases, data = DATA_COMPLETE) summary(cases_gr_lm_3, corr=T) rsq.partial(cases_gr_lm_3, adj=TRUE) lm.beta(cases_gr_lm_3) #example reported in the Discussion #confint(cases_gr_lm_3) log_f1<-summary(cases_gr_lm_3)$coef[1,1] log_f2<-summary(cases_gr_lm_3)$coef[2,1]+summary(cases_gr_lm_3)$coef[1,1] 100*(10^log_f1)^15 100*(10^log_f2)^15 100*(10^log_f1)^30 100*(10^log_f2)^30 #deaths deaths_gr_lm <- lm(m_deaths~ Current.BCG.vaccination + TB_Z + Ob_N + pop2018_N + temp_N + age_N + frac_N , weights=DATA_COMPLETE$r_deaths, data = DATA_COMPLETE_d) summary(deaths_gr_lm, corr=T) shapiro.test(resid(deaths_gr_lm)) #List of residuals plot(density(resid(deaths_gr_lm))) #A density plot rsq.partial(deaths_gr_lm, adj=TRUE) lm.beta(deaths_gr_lm) Anova(deaths_gr_lm) models_deaths_gr <- regsubsets(m_deaths ~ Current.BCG.vaccination + TB_Z + Ob_N + temp_N + age_N + frac_N + pop2018_N, weights=DATA_COMPLETE_d$r_deaths, data = DATA_COMPLETE_d, nvmax = 6) summary(models_deaths_gr) res.sum <- summary(models_deaths_gr) data.frame( Adj.R2 = which.max(res.sum$adjr2), CP = which.min(res.sum$cp), BIC = which.min(res.sum$bic) ) coef(models_deaths_gr, 1:5) vcov(models_deaths_gr, 5) deaths_gr_lm_2 <- lm(m_deaths ~ Current.BCG.vaccination + pop2018_N ,weights=DATA_COMPLETE_d$r_deaths, data = DATA_COMPLETE_d) summary(deaths_gr_lm_2, corr=T) rsq.partial(deaths_gr_lm_2, adj=TRUE) lm.beta(deaths_gr_lm_2) log_f1_d<-summary(deaths_gr_lm_2)$coef[1,1] log_f2_d<-summary(deaths_gr_lm_2)$coef[2,1]+summary(deaths_gr_lm_2)$coef[1,1] 100*(10^log_f1_d)^15 100*(10^log_f2_d)^15 100*(10^log_f1_d)^30 100*(10^log_f2_d)^30 ##### #PCA# cases_PCA_lm <- lm(m_cases ~ Current.BCG.vaccination + pop2018_N + TB_Z + RC1 + RC2 + RC3 , weights=CV19$r_cases, data = CV19) summary(cases_PCA_lm, corr=T) shapiro.test(resid(cases_PCA_lm)) #List of residuals plot(density(resid(cases_PCA_lm))) #A density plot rsq.partial(cases_PCA_lm, adj=TRUE) lm.beta(cases_PCA_lm) kruskal.test(RC1 ~ Current.BCG.vaccination, data = CV19) dataset_1 <- na.omit(CV19[ , which (colnames(CV19) %in% c( "m_deaths","r_deaths","Current.BCG.vaccination","TB_Z", "pop2018_N","RC1","RC2","RC3"))]) dataset_1<-na.omit(dataset_1) deaths_PCA_lm <- lm(m_deaths ~ Current.BCG.vaccination + pop2018_N + TB_Z + RC1 + RC2 + RC3 , weights=dataset_1$r_deaths, data = dataset_1) summary(deaths_PCA_lm, corr=T) shapiro.test(resid(deaths_PCA_lm)) #List of residuals plot(density(resid(deaths_PCA_lm))) #A density plot rsq.partial(deaths_PCA_lm, adj=TRUE) lm.beta(deaths_PCA_lm)