rm(list = ls(all.names = TRUE)) library(Amelia) library(bestNormalize) library(openxlsx) library(lubridate) library(purrr) library(dplyr) library(nlme) library(rsq) library(lm.beta) library(tidyverse) library(caret) library(leaps) library(ggplot2) library(reghelper) library(ggpubr) library(car) library(missMDA) library(psych) #setwd("C:/Users/user/Dropbox/.../") CV19 <- read.xlsx("Table_S1-9.xlsx", sheet=2) DATA <- CV19[, which (colnames(CV19) %in% c( "Country_code", "BCG.TB", "m_cases", "r_cases", #"popData2018.x", #"Positive.cases.per.test", "temp", "BCG.type","Current.BCG.vaccination", "prevalence_overweight_2012", "Fraction_urban_population", "Population_ages_65_and_above_2018_percent", "TB.incidents.per.10.thosand.mean_2000_2018", "TB.Incidents.100"))] DATA_COMPLETE <- na.omit(DATA) dim(DATA_COMPLETE) #DATA_COMPLETE$pop2018_N <- predict(orderNorm(DATA_COMPLETE$popData2018.x)) #hist(DATA_COMPLETE$pop2018_N) DATA_COMPLETE$TB_Z <- scale(DATA_COMPLETE$TB.incidents.per.10.thosand.mean_2000_2018)[,1] hist(DATA_COMPLETE$TB_Z) DATA_COMPLETE$Ob_N <- predict(orderNorm(DATA_COMPLETE$prevalence_overweight_2012)) hist(DATA_COMPLETE$Ob_N) DATA_COMPLETE$temp_N <- predict(orderNorm(DATA_COMPLETE$temp)) hist(DATA_COMPLETE$temp_N) DATA_COMPLETE$frac_N <- predict(orderNorm(DATA_COMPLETE$Fraction_urban_population)) hist(DATA_COMPLETE$frac_N) DATA_COMPLETE$age_N <- predict(orderNorm(DATA_COMPLETE$Population_ages_65_and_above_2018_percent)) hist(DATA_COMPLETE$age_N) DATA_COMPLETE$Yy <- ifelse(DATA_COMPLETE$BCG.TB == "Y_y", 1, 0) DATA_COMPLETE$Yn <- ifelse(DATA_COMPLETE$BCG.TB == "Y_n", 1, 0) DATA_COMPLETE$Nn <- ifelse(DATA_COMPLETE$BCG.TB == "N_n", 1, 0) ###Analysis### shapiro.test(DATA_COMPLETE$m_cases) DATA_COMPLETE$m_cases_Z <- scale(DATA_COMPLETE$m_cases)[,1] summary(DATA_COMPLETE$r_cases) cases_gr_lm <- lm(m_cases ~ Current.BCG.vaccination + TB_Z + Ob_N + temp_N + age_N + frac_N, weights=DATA_COMPLETE$r_cases, data = DATA_COMPLETE) summary(cases_gr_lm, corr=T) shapiro.test(resid(cases_gr_lm)) #List of residuals plot(density(resid(cases_gr_lm))) #A density plot rsq.partial(cases_gr_lm, adj=TRUE) lm.beta(cases_gr_lm) Anova(cases_gr_lm) models_cases_gr <- regsubsets(m_cases ~ Current.BCG.vaccination + TB_Z + Ob_N + temp_N + age_N + frac_N, weights=DATA_COMPLETE$r_cases, data = DATA_COMPLETE, nvmax = 6) summary(models_cases_gr) res.sum <- summary(models_cases_gr) data.frame( Adj.R2 = which.max(res.sum$adjr2), CP = which.min(res.sum$cp), BIC = which.min(res.sum$bic) ) coef(models_cases_gr, 1:5) vcov(models_cases_gr, 5) cases_gr_lm_2 <- lm(m_cases ~ Current.BCG.vaccination + age_N, weights=DATA_COMPLETE$r_cases, data = DATA_COMPLETE) summary(cases_gr_lm_2, corr=T) rsq.partial(cases_gr_lm_2, adj=TRUE) lm.beta(cases_gr_lm_2) DATA_d <- CV19[, which (colnames(CV19) %in% c( "Country_code", "BCG.TB", "m_deaths", "r_deaths", #"popData2018.x", #"Positive.cases.per.test", "temp", "BCG.type","Current.BCG.vaccination", "prevalence_overweight_2012", "Fraction_urban_population", "Population_ages_65_and_above_2018_percent", "TB.incidents.per.10.thosand.mean_2000_2018", "TB.Incidents.100"))] DATA_COMPLETE_d <- na.omit(DATA_d) dim(DATA_COMPLETE_d) #DATA_COMPLETE_d$pop2018_N <- predict(orderNorm(DATA_COMPLETE_d$popData2018.x)) #hist(DATA_COMPLETE_d$pop2018_N) DATA_COMPLETE_d$TB_Z <- scale(DATA_COMPLETE_d$TB.incidents.per.10.thosand.mean_2000_2018)[,1] hist(DATA_COMPLETE_d$TB_Z) DATA_COMPLETE_d$Ob_N <- predict(orderNorm(DATA_COMPLETE_d$prevalence_overweight_2012)) hist(DATA_COMPLETE_d$Ob_N) DATA_COMPLETE_d$temp_N <- predict(orderNorm(DATA_COMPLETE_d$temp)) hist(DATA_COMPLETE_d$temp_N) DATA_COMPLETE_d$frac_N <- predict(orderNorm(DATA_COMPLETE_d$Fraction_urban_population)) hist(DATA_COMPLETE_d$frac_N) DATA_COMPLETE_d$age_N <- predict(orderNorm(DATA_COMPLETE_d$Population_ages_65_and_above_2018_percent)) hist(DATA_COMPLETE_d$age_N) ###Analysis### shapiro.test(DATA_COMPLETE_d$m_deaths) DATA_COMPLETE_d$m_deaths_N <- predict(orderNorm(DATA_COMPLETE_d$m_deaths)) DATA_COMPLETE_d$m_deaths_Z <- scale(DATA_COMPLETE_d$m_deaths)[,1] summary(DATA_COMPLETE_d$r_deaths) deaths_gr_lm <- lm(m_deaths_N~ Current.BCG.vaccination + TB_Z + Ob_N + temp_N + age_N + frac_N, weights=DATA_COMPLETE_d$r_deaths, data = DATA_COMPLETE_d) summary(deaths_gr_lm, corr=T) shapiro.test(resid(deaths_gr_lm)) #List of residuals plot(density(resid(deaths_gr_lm))) #A density plot rsq.partial(deaths_gr_lm, adj=TRUE) lm.beta(deaths_gr_lm) Anova(deaths_gr_lm) models_deaths_gr <- regsubsets(m_deaths_N ~ Current.BCG.vaccination + TB_Z + Ob_N + temp_N + age_N + frac_N, weights=DATA_COMPLETE_d$r_cases, data = DATA_COMPLETE_d, nvmax = 6) summary(models_deaths_gr) res.sum <- summary(models_deaths_gr) data.frame( Adj.R2 = which.max(res.sum$adjr2), CP = which.min(res.sum$cp), BIC = which.min(res.sum$bic) ) coef(models_deaths_gr, 1:5) vcov(models_deaths_gr, 5) deaths_gr_lm_2 <- lm(m_deaths_N ~ Current.BCG.vaccination + age_N, weights=DATA_COMPLETE_d$r_cases, data = DATA_COMPLETE_d) summary(deaths_gr_lm_2, corr=T) rsq.partial(deaths_gr_lm_2, adj=TRUE) lm.beta(deaths_gr_lm_2) ##### #PCA# cases_PCA_lm <- lm(m_cases ~ Current.BCG.vaccination + TB_Z + RC1 + RC2 + RC3 , weights=CV19$r_cases, data = CV19) summary(cases_PCA_lm, corr=T) shapiro.test(resid(cases_PCA_lm)) #List of residuals plot(density(resid(cases_PCA_lm))) #A density plot rsq.partial(cases_PCA_lm, adj=TRUE) kruskal.test(RC1 ~ Current.BCG.vaccination, data = dataset) kruskal.test(RC2 ~ Current.BCG.vaccination, data = dataset) kruskal.test(RC3 ~ Current.BCG.vaccination, data = dataset) deaths_PCA_lm <- lm(m_deaths_N ~ Current.BCG.vaccination + TB_Z + RC1 + RC2 + RC3 , weights=r_deaths, data = CV19) summary(deaths_PCA_lm, corr=T) shapiro.test(resid(deaths_PCA_lm)) #List of residuals plot(density(resid(deaths_PCA_lm))) #A density plot rsq.partial(deaths_PCA_lm, adj=TRUE) kruskal.test(RC1 ~ Current.BCG.vaccination, data = CV19)