@article {Izquierdo2020.05.22.20109959, author = {Jose L. Izquierdo and Julio Ancochea and Savana COVID-19 Research Group and Joan B. Soriano}, title = {Clinical Characteristics and Prognostic Factors for ICU Admission of Patients with Covid-19 Using Machine Learning and Natural Language Processing}, elocation-id = {2020.05.22.20109959}, year = {2020}, doi = {10.1101/2020.05.22.20109959}, publisher = {Cold Spring Harbor Laboratory Press}, abstract = {There remain many unknowns regarding the onset and clinical course of the ongoing COVID-19 pandemic. We used a combination of classic epidemiological methods, natural language processing (NLP), and machine learning (for predictive modeling), to analyse the electronic health records (EHRs) of patients with COVID-19.We explored the unstructured free text in the EHRs within the SESCAM Healthcare Network (Castilla La-Mancha, Spain) from the entire population with available EHRs (1,364,924 patients) from January 1st to March 29th, 2020. We extracted related clinical information upon diagnosis, progression and outcome for all COVID-19 cases, focusing in those requiring ICU admission.A total of 10,504 patients with a clinical or PCR-confirmed diagnosis of COVID-19 were identified, 52.5\% males, with age of 58.2{\textpm}19.7 years. Upon admission, the most common symptoms were cough, fever, and dyspnoea, but all in less than half of cases. Overall, 6\% of hospitalized patients required ICU admission. Using a machine-learning, data-driven algorithm we identified that a combination of age, fever, and tachypnoea was the most parsimonious predictor of ICU admission: those younger than 56 years, without tachypnoea, and temperature \<39{\textdegree}C, (or \>39{\textdegree}C without respiratory crackles), were free of ICU admission. On the contrary, COVID-19 patients aged 40 to 79 years were likely to be admitted to the ICU if they had tachypnoea and delayed their visit to the ER after being seen in primary care.Our results show that a combination of easily obtainable clinical variables (age, fever, and tachypnoea with/without respiratory crackles) predicts which COVID-19 patients require ICU admission.Competing Interest StatementThis study was sponsored by SAVANA (https://www.savanamed.com/) and some authors are employees of SAVANA. There are no other conflicts of interest to reportFunding StatementThis study was sponsored by SAVANA (https://www.savanamed.com/)Author DeclarationsI confirm all relevant ethical guidelines have been followed, and any necessary IRB and/or ethics committee approvals have been obtained.YesThe details of the IRB/oversight body that provided approval or exemption for the research described are given below:This study was classified as a non-post-authorization study (EPA) by the Spanish Agency of Medicines and Health Products (AEMPS), and it was approved by the Research Ethics Committee at the University Hospital of Guadalajara (Spain).All necessary patient/participant consent has been obtained and the appropriate institutional forms have been archived.YesI understand that all clinical trials and any other prospective interventional studies must be registered with an ICMJE-approved registry, such as ClinicalTrials.gov. I confirm that any such study reported in the manuscript has been registered and the trial registration ID is provided (note: if posting a prospective study registered retrospectively, please provide a statement in the trial ID field explaining why the study was not registered in advance).Yes I have followed all appropriate research reporting guidelines and uploaded the relevant EQUATOR Network research reporting checklist(s) and other pertinent material as supplementary files, if applicable.YesTabulated data is available upon request to the authors and subject to all EU regulations https://bigcovidata.savanamed.com/}, URL = {https://www.medrxiv.org/content/early/2020/05/26/2020.05.22.20109959}, eprint = {https://www.medrxiv.org/content/early/2020/05/26/2020.05.22.20109959.full.pdf}, journal = {medRxiv} }