@article {Sheu2021.08.04.21261512, author = {Yi-han Sheu and Colin Magdamo and Matthew Miller and Sudeshna Das and Deborah Blacker and Jordan W. Smoller}, title = {Phenotyping Antidepressant Treatment Response with Deep Learning in Electronic Health Records}, elocation-id = {2021.08.04.21261512}, year = {2021}, doi = {10.1101/2021.08.04.21261512}, publisher = {Cold Spring Harbor Laboratory Press}, abstract = {Efficient, accurate phenotyping for antidepressant treatment response in electronic health records (EHRs) could facilitate precision psychiatry applications but remains a challenge. Increasingly, artificial intelligence methods using {\textquotedblleft}deep learning{\textquotedblright} applied to clinical data have shown promise in complex classification problems. Here, we systematically evaluate the performance of eight deep-learning-based natural language processing models in classifying response to antidepressants in a large real-world healthcare setting. We obtained data spanning 1990-2018 for adults with depression and a co-occurring antidepressant prescription from the EHR data warehouse of the Mass General Brigham healthcare system (n=111,572). Clinical notes were collected for the following time windows after antidepressant initiation: (1) 2 days to 4 weeks, (2) 4{\textendash}12 weeks, and (3) 12{\textendash}26 weeks. A stratified random sample of these note sets (total 4,299 across time periods) were manually reviewed to classify response status as {\textquotedblleft}improved{\textquotedblright} or {\textquotedblleft}no evidence of improvement{\textquotedblright} in depression symptoms. All models performed well, with areas under the receiver operator curve (AUROC) of at least 0.80. Positive predictive values (PPVs) ranged from 0.72 {\textendash} 0.91. In general, models incorporating more information-dense and longer text sequences performed better than others. The best performing model (Longformer-large with sliding window) had an AUROC = 0.88 and PPV = 0.84 at a specificity of 0.88. Our results indicate that deep learning methods applied to EHR data can accurately classify antidepressant response in a real-world healthcare setting. Automated treatment response classification may facilitate a range of research and clinical decision support applications.Competing Interest StatementJWS is a member of the Leon Levy Foundation Neuroscience Advisory Board and received an honorarium for an internal seminar at Biogen, Inc. He is PI of a collaborative study of the genetics of depression and bipolar disorder sponsored by 23andMe for which 23andMe provides analysis time as in-kind support but no payments.Funding StatementNo external funding was received for the purpose of this study.Author DeclarationsI confirm all relevant ethical guidelines have been followed, and any necessary IRB and/or ethics committee approvals have been obtained.YesThe details of the IRB/oversight body that provided approval or exemption for the research described are given below:This study was reviewed and approved by institutional review board of the Mass General Brigham (MGB) Healthcare System (Boston, MA, USA; Protocol number: $\#$2018P000765), which granted permission to process and analyze the electronic healthcare data provided by MGB for the purpose of this study.All necessary patient/participant consent has been obtained and the appropriate institutional forms have been archived.YesI understand that all clinical trials and any other prospective interventional studies must be registered with an ICMJE-approved registry, such as ClinicalTrials.gov. I confirm that any such study reported in the manuscript has been registered and the trial registration ID is provided (note: if posting a prospective study registered retrospectively, please provide a statement in the trial ID field explaining why the study was not registered in advance).YesI have followed all appropriate research reporting guidelines and uploaded the relevant EQUATOR Network research reporting checklist(s) and other pertinent material as supplementary files, if applicable.YesThe electronic health records data used in this study are not available to the public or per request for privacy protection.}, URL = {https://www.medrxiv.org/content/early/2021/08/04/2021.08.04.21261512}, eprint = {https://www.medrxiv.org/content/early/2021/08/04/2021.08.04.21261512.full.pdf}, journal = {medRxiv} }