@article {Wagner2020.04.19.20067660, author = {Tyler Wagner and FNU Shweta and Karthik Murugadoss and Samir Awasthi and AJ Venkatakrishnan and Sairam Bade and Arjun Puranik and Martin Kang and Brian W. Pickering and John C. O{\textquoteright}Horo and Philippe R. Bauer and Raymund R. Razonable and Paschalis Vergidis and Zelalem Temesgen and Stacey Rizza and Maryam Mahmood and Walter R. Wilson and Douglas Challener and Praveen Anand and Matt Liebers and Zainab Doctor and Eli Silvert and Hugo Solomon and Akash Anand and Rakesh Barve and Gregory J. Gores and Amy W. Williams and William G. Morice II and John Halamka and Andrew D. Badley and Venky Soundararajan}, title = {Augmented Curation of Clinical Notes from a Massive EHR System Reveals Symptoms of Impending COVID-19 Diagnosis}, elocation-id = {2020.04.19.20067660}, year = {2020}, doi = {10.1101/2020.04.19.20067660}, publisher = {Cold Spring Harbor Laboratory Press}, abstract = {Understanding temporal dynamics of COVID-19 patient symptoms could provide fine-grained resolution to guide clinical decision-making. Here, we use deep neural networks over an institution-wide platform for the augmented curation of clinical notes from 77,167 patients subjected to COVID-19 PCR testing. By contrasting Electronic Health Record (EHR)-derived symptoms of COVID-19-positive (COVIDpos; n=2,317) versus COVID-19-negative (COVIDneg; n=74,850) patients for the week preceding the PCR testing date, we identify anosmia/dysgeusia (27.1-fold), fever/chills (2.6-fold), respiratory difficulty (2.2-fold), cough (2.2-fold), myalgia/arthralgia (2-fold), and diarrhea (1.4-fold) as significantly amplified in COVIDpos over COVIDneg patients. The combination of cough and fever/chills has 4.2-fold amplification in COVIDpos patients during the week prior to PCR testing, and along with anosmia/dysgeusia, constitutes the earliest EHR-derived signature of COVID-19. This study introduces an Augmented Intelligence platform for the real-time synthesis of institutional biomedical knowledge. The platform holds tremendous potential for scaling up curation throughput, thus enabling EHR-powered early disease diagnosis.Competing Interest StatementThe authors are all employees of nference or the Mayo Clinic. The authors from nference have financial interests in the company. One or more of the investigators associated with this project and Mayo Clinic have a Financial Conflict of Interest in technology used in the research and that the investigator(s) and Mayo Clinic may stand to gain financially from the successful outcome of the research. This research has been reviewed by the Mayo Clinic Conflict of Interest Review Board and is being conducted in compliance with Mayo Clinic Conflict of Interest policies. ADB is a consultant for Abbvie, is on scientific advisory boards for Nference and Zentalis, and is founder and President of Splissen therapeutics.Funding StatementADB is supported by Grants AI 110173 and AI120698 from NIAID, 109593-62-RGRL from Amfar, and the HH Sheikh Khalifa Bin Zayed Al-Nahyan named professorship from Mayo Clinic.Author DeclarationsAll relevant ethical guidelines have been followed; any necessary IRB and/or ethics committee approvals have been obtained and details of the IRB/oversight body are included in the manuscript.YesAll necessary patient/participant consent has been obtained and the appropriate institutional forms have been archived.YesI understand that all clinical trials and any other prospective interventional studies must be registered with an ICMJE-approved registry, such as ClinicalTrials.gov. I confirm that any such study reported in the manuscript has been registered and the trial registration ID is provided (note: if posting a prospective study registered retrospectively, please provide a statement in the trial ID field explaining why the study was not registered in advance).YesI have followed all appropriate research reporting guidelines and uploaded the relevant EQUATOR Network research reporting checklist(s) and other pertinent material as supplementary files, if applicable.YesThe EHR dataset where augmented curation was conducted from the Mayo Clinic records was accessed under IRB 20-003278, "Study of COVID-19 patient characteristics with augmented curation of Electronic Health Records (EHR) to inform strategic and operational decisions". The EHR data cannot be shared or released due to HIPAA regulations. Contact corresponding authors for additional details regarding the IRB, and please refer to the Mayo Clinic IRB website for further details on our commitment to patient privacy (https://www.mayo.edu/research/institutional-review-board/overview). The summary statistics derived from the EHRs are enclosed within the manuscript.}, URL = {https://www.medrxiv.org/content/early/2020/06/11/2020.04.19.20067660}, eprint = {https://www.medrxiv.org/content/early/2020/06/11/2020.04.19.20067660.full.pdf}, journal = {medRxiv} }