@article {Badr2021.05.05.21256712, author = {Hamada S. Badr and Benjamin F. Zaitchik and Gaige H. Kerr and Nhat-Lan H. Nguyen and Yen-Ting Chen and Patrick Hinson and Josh M. Colston and Margaret N. Kosek and Ensheng Dong and Hongru Du and Maximilian Marshall and Kristen Nixon and Arash Mohegh and Daniel L. Goldberg and Susan C. Anenberg and Lauren M. Gardner}, title = {Unified real-time environmental-epidemiological data for multiscale modeling of the COVID-19 pandemic}, elocation-id = {2021.05.05.21256712}, year = {2021}, doi = {10.1101/2021.05.05.21256712}, publisher = {Cold Spring Harbor Laboratory Press}, abstract = {An impressive number of COVID-19 data catalogs exist. None, however, are optimized for data science applications, e.g., inconsistent naming and data conventions, uneven quality control, and lack of alignment between disease data and potential predictors pose barriers to robust modeling and analysis. To address this gap, we generated a unified dataset that integrates and implements quality checks of the data from numerous leading sources of COVID-19 epidemiological and environmental data. We use a globally consistent hierarchy of administrative units to facilitate analysis within and across countries. The dataset applies this unified hierarchy to align COVID-19 case data with a number of other data types relevant to understanding and predicting COVID-19 risk, including hydrometeorological data, air quality, information on COVID-19 control policies, and key demographic characteristics.Competing Interest StatementThe authors have declared no competing interest.Funding StatementThis work is supported by NASA Health \& Air Quality project 80NSSC18K0327, under a COVID-19 supplement, National Institute of Health (NIH) project 3U19AI135995-03S1 ("Consortium for Viral Systems Biology (CViSB)"; Collaboration with The Scripps Research Institute and UCLA), and NASA grant 80NSSC20K1122. Johns Hopkins Applied Physics Laboratory (APL), Data Services and Esri provide professional support on designing the automatic data collection structure, and maintaining the JHU CSSE GitHub repository.Author DeclarationsI confirm all relevant ethical guidelines have been followed, and any necessary IRB and/or ethics committee approvals have been obtained.YesThe details of the IRB/oversight body that provided approval or exemption for the research described are given below:IRB approval is not required.All necessary patient/participant consent has been obtained and the appropriate institutional forms have been archived.YesI understand that all clinical trials and any other prospective interventional studies must be registered with an ICMJE-approved registry, such as ClinicalTrials.gov. I confirm that any such study reported in the manuscript has been registered and the trial registration ID is provided (note: if posting a prospective study registered retrospectively, please provide a statement in the trial ID field explaining why the study was not registered in advance).YesI have followed all appropriate research reporting guidelines and uploaded the relevant EQUATOR Network research reporting checklist(s) and other pertinent material as supplementary files, if applicable.YesThe source code used to clean, unify, aggregate, and merge the different data components from all sources will be available on GitHub. https://github.com/CSSEGISandData/COVID-19_Unified-Dataset}, URL = {https://www.medrxiv.org/content/early/2021/05/07/2021.05.05.21256712}, eprint = {https://www.medrxiv.org/content/early/2021/05/07/2021.05.05.21256712.full.pdf}, journal = {medRxiv} }