{"markup":"\u003C?xml version=\u00221.0\u0022 encoding=\u0022UTF-8\u0022 ?\u003E\n    \u003Chtml version=\u0022HTML+RDFa+MathML 1.1\u0022\n    xmlns:content=\u0022http:\/\/purl.org\/rss\/1.0\/modules\/content\/\u0022\n    xmlns:dc=\u0022http:\/\/purl.org\/dc\/terms\/\u0022\n    xmlns:foaf=\u0022http:\/\/xmlns.com\/foaf\/0.1\/\u0022\n    xmlns:og=\u0022http:\/\/ogp.me\/ns#\u0022\n    xmlns:rdfs=\u0022http:\/\/www.w3.org\/2000\/01\/rdf-schema#\u0022\n    xmlns:sioc=\u0022http:\/\/rdfs.org\/sioc\/ns#\u0022\n    xmlns:sioct=\u0022http:\/\/rdfs.org\/sioc\/types#\u0022\n    xmlns:skos=\u0022http:\/\/www.w3.org\/2004\/02\/skos\/core#\u0022\n    xmlns:xsd=\u0022http:\/\/www.w3.org\/2001\/XMLSchema#\u0022\n    xmlns:mml=\u0022http:\/\/www.w3.org\/1998\/Math\/MathML\u0022\u003E\n  \u003Chead\u003E\u003Cscript type=\u0022text\/javascript\u0022 src=\u0022\/\/cdn.jsdelivr.net\/qtip2\/2.2.1\/jquery.qtip.min.js\u0022\u003E\u003C\/script\u003E\n\u003Cscript type=\u0022text\/javascript\u0022 src=\u0022https:\/\/www.medrxiv.org\/sites\/default\/files\/js\/js_YjAJQgxDlFX6S-O02jj9jCrVbrwlY3CGgCg1FzPlvBs.js\u0022\u003E\u003C\/script\u003E\n\u003Cscript type=\u0022text\/javascript\u0022\u003E\n\u003C!--\/\/--\u003E\u003C![CDATA[\/\/\u003E\u003C!--\nif(typeof window.MathJax === \u0022undefined\u0022) window.MathJax = { menuSettings: { zoom: \u0022Click\u0022 } };\n\/\/--\u003E\u003C!]]\u003E\n\u003C\/script\u003E\n\u003Cscript type=\u0022text\/javascript\u0022 src=\u0022https:\/\/www.medrxiv.org\/sites\/default\/files\/js\/js_waP91NpgGpectm_6Y2XDEauLJ8WCSCBKmmA87unpp2E.js\u0022\u003E\u003C\/script\u003E\n\u003Cscript type=\u0022text\/javascript\u0022 src=\u0022https:\/\/www.googletagmanager.com\/gtag\/js?id=G-0K57TCX5BY\u0022\u003E\u003C\/script\u003E\n\u003Cscript type=\u0022text\/javascript\u0022\u003E\n\u003C!--\/\/--\u003E\u003C![CDATA[\/\/\u003E\u003C!--\nwindow.dataLayer = window.dataLayer || [];function gtag(){dataLayer.push(arguments)};gtag(\u0022js\u0022, new Date());gtag(\u0022set\u0022, \u0022developer_id.dMDhkMT\u0022, true);gtag(\u0022config\u0022, \u0022G-0K57TCX5BY\u0022, {\u0022groups\u0022:\u0022default\u0022,\u0022anonymize_ip\u0022:true});\n\/\/--\u003E\u003C!]]\u003E\n\u003C\/script\u003E\n\u003Cscript type=\u0022text\/javascript\u0022\u003E\n\u003C!--\/\/--\u003E\u003C![CDATA[\/\/\u003E\u003C!--\njQuery.extend(Drupal.settings, {\u0022basePath\u0022:\u0022\\\/\u0022,\u0022pathPrefix\u0022:\u0022\u0022,\u0022highwire\u0022:{\u0022ac\u0022:{\u0022medrxiv;2026.01.23.26344677v1\u0022:{\u0022access\u0022:{\u0022full\u0022:true},\u0022pisa_id\u0022:\u0022medrxiv;2026.01.23.26344677v1\u0022,\u0022apath\u0022:\u0022\u0022,\u0022jcode\u0022:\u0022medrxiv\u0022}},\u0022processed\u0022:[\u0022highwire_math\u0022],\u0022markup\u0022:[{\u0022requested\u0022:\u0022full-text\u0022,\u0022variant\u0022:\u0022full-text\u0022,\u0022view\u0022:\u0022full\u0022,\u0022pisa\u0022:\u0022medrxiv;2026.01.23.26344677v1\u0022}]},\u0022instances\u0022:\u0022{\\u0022highwire_abstract_tooltip\\u0022:{\\u0022content\\u0022:{\\u0022text\\u0022:\\u0022\\u0022},\\u0022style\\u0022:{\\u0022tip\\u0022:{\\u0022width\\u0022:20,\\u0022height\\u0022:20,\\u0022border\\u0022:1,\\u0022offset\\u0022:0,\\u0022corner\\u0022:true},\\u0022classes\\u0022:\\u0022qtip-custom hw-tooltip hw-abstract-tooltip qtip-shadow qtip-rounded\\u0022,\\u0022classes_custom\\u0022:\\u0022hw-tooltip hw-abstract-tooltip\\u0022},\\u0022position\\u0022:{\\u0022at\\u0022:\\u0022right center\\u0022,\\u0022my\\u0022:\\u0022left center\\u0022,\\u0022viewport\\u0022:true,\\u0022adjust\\u0022:{\\u0022method\\u0022:\\u0022shift\\u0022}},\\u0022show\\u0022:{\\u0022event\\u0022:\\u0022mouseenter click \\u0022,\\u0022solo\\u0022:true},\\u0022hide\\u0022:{\\u0022event\\u0022:\\u0022mouseleave \\u0022,\\u0022fixed\\u0022:1,\\u0022delay\\u0022:\\u0022100\\u0022}},\\u0022highwire_author_tooltip\\u0022:{\\u0022content\\u0022:{\\u0022text\\u0022:\\u0022\\u0022},\\u0022style\\u0022:{\\u0022tip\\u0022:{\\u0022width\\u0022:15,\\u0022height\\u0022:15,\\u0022border\\u0022:1,\\u0022offset\\u0022:0,\\u0022corner\\u0022:true},\\u0022classes\\u0022:\\u0022qtip-custom hw-tooltip hw-author-tooltip qtip-shadow qtip-rounded\\u0022,\\u0022classes_custom\\u0022:\\u0022hw-tooltip hw-author-tooltip\\u0022},\\u0022position\\u0022:{\\u0022at\\u0022:\\u0022top center\\u0022,\\u0022my\\u0022:\\u0022bottom center\\u0022,\\u0022viewport\\u0022:true,\\u0022adjust\\u0022:{\\u0022method\\u0022:\\u0022\\u0022}},\\u0022show\\u0022:{\\u0022event\\u0022:\\u0022mouseenter \\u0022,\\u0022solo\\u0022:true},\\u0022hide\\u0022:{\\u0022event\\u0022:\\u0022mouseleave \\u0022,\\u0022fixed\\u0022:1,\\u0022delay\\u0022:\\u0022100\\u0022}},\\u0022highwire_reflinks_tooltip\\u0022:{\\u0022content\\u0022:{\\u0022text\\u0022:\\u0022\\u0022},\\u0022style\\u0022:{\\u0022tip\\u0022:{\\u0022width\\u0022:15,\\u0022height\\u0022:15,\\u0022border\\u0022:1,\\u0022mimic\\u0022:\\u0022top center\\u0022,\\u0022offset\\u0022:0,\\u0022corner\\u0022:true},\\u0022classes\\u0022:\\u0022qtip-custom hw-tooltip hw-ref-link-tooltip qtip-shadow qtip-rounded\\u0022,\\u0022classes_custom\\u0022:\\u0022hw-tooltip hw-ref-link-tooltip\\u0022},\\u0022position\\u0022:{\\u0022at\\u0022:\\u0022bottom left\\u0022,\\u0022my\\u0022:\\u0022top left\\u0022,\\u0022viewport\\u0022:true,\\u0022adjust\\u0022:{\\u0022method\\u0022:\\u0022flip\\u0022}},\\u0022show\\u0022:{\\u0022event\\u0022:\\u0022mouseenter \\u0022,\\u0022solo\\u0022:true},\\u0022hide\\u0022:{\\u0022event\\u0022:\\u0022mouseleave \\u0022,\\u0022fixed\\u0022:1,\\u0022delay\\u0022:\\u0022100\\u0022}}}\u0022,\u0022qtipDebug\u0022:\u0022{\\u0022leaveElement\\u0022:0}\u0022,\u0022googleanalytics\u0022:{\u0022account\u0022:[\u0022G-0K57TCX5BY\u0022],\u0022trackOutbound\u0022:1,\u0022trackMailto\u0022:1,\u0022trackDownload\u0022:1,\u0022trackDownloadExtensions\u0022:\u00227z|aac|arc|arj|asf|asx|avi|bin|csv|doc(x|m)?|dot(x|m)?|exe|flv|gif|gz|gzip|hqx|jar|jpe?g|js|mp(2|3|4|e?g)|mov(ie)?|msi|msp|pdf|phps|png|ppt(x|m)?|pot(x|m)?|pps(x|m)?|ppam|sld(x|m)?|thmx|qtm?|ra(m|r)?|sea|sit|tar|tgz|torrent|txt|wav|wma|wmv|wpd|xls(x|m|b)?|xlt(x|m)|xlam|xml|z|zip\u0022,\u0022trackColorbox\u0022:1},\u0022ajaxPageState\u0022:{\u0022js\u0022:{\u0022\\\/\\\/cdn.jsdelivr.net\\\/qtip2\\\/2.2.1\\\/jquery.qtip.min.js\u0022:1,\u0022sites\\\/all\\\/modules\\\/highwire\\\/highwire\\\/plugins\\\/highwire_markup_process\\\/js\\\/highwire_article_reference_popup.js\u0022:1,\u0022sites\\\/all\\\/modules\\\/highwire\\\/highwire\\\/plugins\\\/highwire_markup_process\\\/js\\\/highwire_at_symbol.js\u0022:1,\u00220\u0022:1,\u0022sites\\\/all\\\/modules\\\/contrib\\\/google_analytics\\\/googleanalytics.js\u0022:1,\u0022https:\\\/\\\/www.googletagmanager.com\\\/gtag\\\/js?id=G-0K57TCX5BY\u0022:1,\u00221\u0022:1}}});\n\/\/--\u003E\u003C!]]\u003E\n\u003C\/script\u003E\n\u003Clink type=\u0022text\/css\u0022 rel=\u0022stylesheet\u0022 href=\u0022https:\/\/www.medrxiv.org\/sites\/default\/files\/advagg_css\/css__dn-cpI1YtkU_iLHgA5WhlkxgYWyat_IxjF_B-WSYrpE__a9hIbt0eaZ7d5nhwnm2weG8R_2eXK4EvoOx9dOxouHE__QrrGUc7CpljPR5Aph-ukPbcwtK4AWrHGwCEXJ_k1V_c.css\u0022 media=\u0022all\u0022 \/\u003E\n\u003Clink type=\u0022text\/css\u0022 rel=\u0022stylesheet\u0022 href=\u0022\/\/cdn.jsdelivr.net\/qtip2\/2.2.1\/jquery.qtip.min.css\u0022 media=\u0022all\u0022 \/\u003E\n\u003Clink type=\u0022text\/css\u0022 rel=\u0022stylesheet\u0022 href=\u0022https:\/\/www.medrxiv.org\/sites\/default\/files\/advagg_css\/css__HGACIFBlu2o05y3afvqlt5wrE_5Dn6MXsexfuEpeIwg__t4SOPxucAPoV3Os7g8dXqyMB1HRXQridRJ82X7nE33E__QrrGUc7CpljPR5Aph-ukPbcwtK4AWrHGwCEXJ_k1V_c.css\u0022 media=\u0022all\u0022 \/\u003E\n\u003Clink rel=\u0027stylesheet\u0027 type=\u0027text\/css\u0027 href=\u0027\/sites\/all\/modules\/contrib\/panels\/plugins\/layouts\/onecol\/onecol.css\u0027 \/\u003E\u003C\/head\u003E\u003Cbody\u003E\u003Cdiv class=\u0022panels-ajax-tab-panel panels-ajax-tab-panel-article-tab-full-text\u0022\u003E\u003Cdiv class=\u0022panel-display panel-1col clearfix\u0022 \u003E\n  \u003Cdiv class=\u0022panel-panel panel-col\u0022\u003E\n    \u003Cdiv\u003E\u003Cdiv class=\u0022panel-pane pane-highwire-markup\u0022 \u003E\n  \n      \n  \n  \u003Cdiv class=\u0022pane-content\u0022\u003E\n    \u003Cdiv class=\u0022highwire-markup\u0022\u003E\u003Cdiv xmlns=\u0022http:\/\/www.w3.org\/1999\/xhtml\u0022 data-highwire-cite-ref-tooltip-instance=\u0022highwire_reflinks_tooltip\u0022 class=\u0022content-block-markup\u0022 xmlns:xhtml=\u0022http:\/\/www.w3.org\/1999\/xhtml\u0022\u003E\u003Cdiv class=\u0022article fulltext-view \u0022\u003E\u003Cspan class=\u0022highwire-journal-article-marker-start\u0022\u003E\u003C\/span\u003E\u003Cdiv class=\u0022section abstract\u0022 id=\u0022abstract-1\u0022\u003E\u003Ch2 class=\u0022\u0022\u003EAbstract\u003C\/h2\u003E\u003Cp id=\u0022p-2\u0022\u003EFoundation models trained on patient electronic health records (EHRs) hold promise for transforming clinical care by enabling effective decision support and personalized healthcare delivery, but have been limited by a focus on intensive care objectives. Here we present a multi-domain transformer-based EHR foundation model designed to predict two liver disease outcomes in patients with Chronic Hepatitis B, an infection characterized by diverse and uncertain medical trajectories. Through case studies employing attention maps, we demonstrate that the transformer model identifies patterns similar to one-liners employed by clinical staff and depends on distinct clinical events to estimate disease progression. Our findings underscore both the utility and challenges of EHR foundation models in clinical care and the necessity to evaluate EHR-models on less-regimented diseases.\u003C\/p\u003E\u003C\/div\u003E\u003Cdiv class=\u0022section\u0022 id=\u0022sec-1\u0022\u003E\u003Ch2 class=\u0022\u0022\u003E1 Introduction\u003C\/h2\u003E\u003Cp id=\u0022p-15\u0022\u003EDespite the existence of a safe and effective vaccine as well as chronic suppressive therapy, Chronic Hepatitis B (CHB) infection results in over 800 000 deaths annually and 1.5 million people are newly infected each year [\u003Ca id=\u0022xref-ref-19-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-19\u0022\u003E19\u003C\/a\u003E]. Liver cirrhosis and hepatocellular carcinoma (HCC), collectively referred to as C\/HCC, are the leading causes of mortality among CHB patients [\u003Ca id=\u0022xref-ref-3-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-3\u0022\u003E3\u003C\/a\u003E]. The clinical progression among CHB patients to C\/HCC is a continuous but non-linear processes with subtle and nonspecific early warning signs. Providing individualized risk assessments for developing liver diseases could enable early intervention and promote medication adherence to prevent severe outcomes. Their impact on the liver\u2019s synthetic function typically occurs only at advanced stages, when hepatocyte function ultimately collapses. These medical characteristics hold the potential for large-scale EHR models to learn long-range dependencies and sequential symptoms in the medical trajectory.\u003C\/p\u003E\u003Cp id=\u0022p-16\u0022\u003ENumerous studies have developed and evaluated pre-trained language models for clinical prediction tasks [\u003Ca id=\u0022xref-ref-17-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-17\u0022\u003E17\u003C\/a\u003E][\u003Ca id=\u0022xref-ref-20-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-20\u0022\u003E20\u003C\/a\u003E][\u003Ca id=\u0022xref-ref-16-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-16\u0022\u003E16\u003C\/a\u003E][\u003Ca id=\u0022xref-ref-4-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-4\u0022\u003E4\u003C\/a\u003E][\u003Ca id=\u0022xref-ref-13-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-13\u0022\u003E13\u003C\/a\u003E][\u003Ca id=\u0022xref-ref-21-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-21\u0022\u003E21\u003C\/a\u003E][\u003Ca id=\u0022xref-ref-12-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-12\u0022\u003E12\u003C\/a\u003E][\u003Ca id=\u0022xref-ref-7-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-7\u0022\u003E7\u003C\/a\u003E][\u003Ca id=\u0022xref-ref-8-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-8\u0022\u003E8\u003C\/a\u003E]. CLMBR [\u003Ca id=\u0022xref-ref-17-2\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-17\u0022\u003E17\u003C\/a\u003E] introduced a framework that learns patient representations from a large EHR database using a language objective and a Gated Recurrent Unit (GRU), which was subsequently extended [\u003Ca id=\u0022xref-ref-20-2\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-20\u0022\u003E20\u003C\/a\u003E] with a transformer-based architecture. Medical pre-trained models have also been adapted to time-to-event training objective [\u003Ca id=\u0022xref-ref-16-2\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-16\u0022\u003E16\u003C\/a\u003E] and state-space representations, including the incorporation of a Mamba block [\u003Ca id=\u0022xref-ref-4-2\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-4\u0022\u003E4\u003C\/a\u003E].\u003C\/p\u003E\u003Cp id=\u0022p-17\u0022\u003EMost of the popular EHR datasets are focused on the intensive care unit (ICU) setting, such as MIMIC-III [\u003Ca id=\u0022xref-ref-5-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-5\u0022\u003E5\u003C\/a\u003E], MIMIC-IV [\u003Ca id=\u0022xref-ref-6-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-6\u0022\u003E6\u003C\/a\u003E]. ICU care is notable for intense short-term monitoring, relatively protocolized treatments and similar patient trajectories. This is in contrast to the patient trajectories from chronic diseases\u2013with intermittent monitoring, inconsistent emergence of signs of disease progression, and variable clinical practices for monitoring. Application of EHR foundation models to indolent and chronic progressive diseases remains understudied, and it is still unclear whether predictive performance on ICU trajectories reported in the literature hold in these cases. We are among the first [\u003Ca id=\u0022xref-ref-13-2\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-13\u0022\u003E13\u003C\/a\u003E] to develop clinical pre-trained models on the Optum\u00ae de-identified Electronic Health Record data set (Optum\u00ae EHR) [\u003Ca id=\u0022xref-ref-2-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-2\u0022\u003E2\u003C\/a\u003E]. This is a rich multi modal dataset, including encounters, procedures, medications, clinical notes that further has de-identified longitudinal patient data.\u003C\/p\u003E\u003Cp id=\u0022p-18\u0022\u003EOur contributions are as follows:\u003C\/p\u003E\u003Col class=\u0022list-romanlower \u0022 id=\u0022list-1\u0022\u003E\u003Cli id=\u0022list-item-1\u0022\u003E\u003Cp id=\u0022p-19\u0022\u003EWe develop a multi-domain EHR foundation model, and explore its utility to predict liver cirrhosis and liver cancer for chronic hepatitis B patients.\u003C\/p\u003E\u003C\/li\u003E\u003Cli id=\u0022list-item-2\u0022\u003E\u003Cp id=\u0022p-20\u0022\u003EWe analyse attention maps of transformer blocks contained in our models and provide a medical interpretation into their decision making process in the case of insidious diseases such as C\/HCC.\u003C\/p\u003E\u003C\/li\u003E\u003Cli id=\u0022list-item-3\u0022\u003E\u003Cp id=\u0022p-21\u0022\u003EWe discuss the medical perspective on current literature evaluating EHR foundation models on diseases with stereotypical treatments paths, and argue for the necessity to evaluate on unclear patient trajectories.\u003C\/p\u003E\u003C\/li\u003E\u003C\/ol\u003E\u003C\/div\u003E\u003Cdiv class=\u0022section\u0022 id=\u0022sec-2\u0022\u003E\u003Ch2 class=\u0022\u0022\u003E2 Methods\u003C\/h2\u003E\u003Cp id=\u0022p-22\u0022\u003EWe present an overview of the analysis workflow in \u003Ca id=\u0022xref-fig-2-1\u0022 class=\u0022xref-fig\u0022 href=\u0022#F2\u0022\u003EFigure A1\u003C\/a\u003E.\u003C\/p\u003E\u003Cdiv id=\u0022sec-3\u0022 class=\u0022subsection\u0022\u003E\u003Ch3\u003EEHR data\u003C\/h3\u003E\u003Cp id=\u0022p-23\u0022\u003EWe trained our models on multi-center EHR data [\u003Ca id=\u0022xref-ref-2-2\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-2\u0022\u003E2\u003C\/a\u003E], which comprises de-identified, longitudinal records from over 100 million patients across more than 7,000 U.S. hospitals and clinics. The database captures multi-domain data, extracted from structured medical information and written medical notes\u2014including patient demographics, diagnoses, and prescriptions\u2014recorded between 2007 and 2024 (see Appendix A.1 for more details). The analysis of liver disease prediction focuses solely on a target population of patients living with chronic hepatitis B (CHB). For finetuning and an end-to-end trained transformer, we therefore define the \u003Cem\u003ECHB target population\u003C\/em\u003E as all patients with at least one CHB diagnosis code, resulting in a dataset comprised of more than 78 000 patients (see \u003Ca id=\u0022xref-table-wrap-2-1\u0022 class=\u0022xref-table\u0022 href=\u0022#T2\u0022\u003ETable A1\u003C\/a\u003E). To obtain our pre-training dataset, we filtered the full EHR database for patients not contained in the CHB target population and for medical codes occurring in no less than 7 000 patients and included only patients with between 20 and 600 events. The resulting pretraining dataset is comprised of over 55 million patient trajectories with an average of 168 events per patient.\u003C\/p\u003E\u003Cp id=\u0022p-24\u0022\u003EThe tables considered in the presented analysis are \u003Cem\u003EPatients\u003C\/em\u003E (e.g. birth year, gender, ethnicity), \u003Cem\u003EDiagnosis\u003C\/em\u003E (e.g. ICD9, ICD10, SNOWMED codes), \u003Cem\u003ELabs\u003C\/em\u003E (e.g. LOINC codes), \u003Cem\u003EImmunization\u003C\/em\u003E (vaccinations), \u003Cem\u003EObservation\u003C\/em\u003E (numerical e.g. SBP, temperature), \u003Cem\u003EPrescription\u003C\/em\u003E (e.g. NDC codes), \u003Cem\u003EVisit\u003C\/em\u003E (categorical visit types e.g. inpatient or emergency), \u003Cem\u003EProcedures\u003C\/em\u003E (e.g. CPT4, ICD9, SNOMED codes), and \u003Cem\u003EAdministrations\u003C\/em\u003E (e.g. NDC codes).\u003C\/p\u003E\u003Cp id=\u0022p-25\u0022\u003EThe EHR data includes in-patient stays with intensive surveillance, yielding up to 26 500 recorded events per patient. To reduce the sequence length and the dominance of high-surveillance periods, events are aggregated over monthly intervals. This aggregation window corresponds to the medically relevant period for detecting changes in liver health. Let \u003Cem\u003ET\u003C\/em\u003E\u003Csub\u003Emax\u003C\/sub\u003E be the maximum number of months with at least one event recorded for all patients in a batch, then the batch can be represented by a tuple of patients \u003Cem\u003EP\u003C\/em\u003E\u003Csub\u003E\u003Cem\u003Ei\u003C\/em\u003E\u003C\/sub\u003E, with each \u003Cem\u003EP\u003C\/em\u003E\u003Csub\u003E\u003Cem\u003Ei\u003C\/em\u003E\u003C\/sub\u003E:\n\u003Cspan class=\u0022disp-formula\u0022 id=\u0022disp-formula-1\u0022\u003E\u003Cspan class=\u0022highwire-responsive-lazyload\u0022\u003E\u003Cimg src=\u0022data:image\/gif;base64,R0lGODlhAQABAIAAAAAAAP\/\/\/yH5BAEAAAAALAAAAAABAAEAAAIBRAA7\u0022 class=\u0022highwire-embed lazyload\u0022 alt=\u0022Embedded Image\u0022 data-src=\u0022https:\/\/www.medrxiv.org\/sites\/default\/files\/highwire\/medrxiv\/early\/2026\/01\/24\/2026.01.23.26344677\/embed\/graphic-1.gif\u0022\/\u003E\u003Cnoscript\u003E\u003Cimg class=\u0022highwire-embed\u0022 alt=\u0022Embedded Image\u0022 src=\u0022https:\/\/www.medrxiv.org\/sites\/default\/files\/highwire\/medrxiv\/early\/2026\/01\/24\/2026.01.23.26344677\/embed\/graphic-1.gif\u0022\/\u003E\u003C\/noscript\u003E\u003C\/span\u003E\n\n\u003C\/span\u003E\nGiven the expanding literature on clinical pre-trained models, the Medical Event Data Standard (MEDS)[\u003Ca id=\u0022xref-ref-1-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-1\u0022\u003E1\u003C\/a\u003E] has been proposed to maximize interoperability across datasets, tools, and model architectures. Our work follows the MEDS standard.\u003C\/p\u003E\u003C\/div\u003E\u003Cdiv id=\u0022sec-4\u0022 class=\u0022subsection\u0022\u003E\u003Ch3\u003EEndpoint definition\u003C\/h3\u003E\u003Cp id=\u0022p-26\u0022\u003EWe predict the probability of developing cirrhosis or liver cancer after four different time delays (1, 3, 5, and 10 years). These time delays correspond to clinically relevant periods for liver function deterioration. Both outcomes are studied in separate models.\u003C\/p\u003E\u003C\/div\u003E\u003Cdiv id=\u0022sec-5\u0022 class=\u0022subsection\u0022\u003E\u003Ch3\u003EEHR tabular representation\u003C\/h3\u003E\u003Cp id=\u0022p-27\u0022\u003EWhile literature on EHR foundation models has demonstrated notable improvements over simpler baselines, historical evidence indicates that clinical machine learning models have frequently been outperformed by less complex approaches. Consequently, we challenge the EHR foundation model approach by establishing a robust and straightforward baseline using XGBoost, by converting each patient sequence into a tabular representation of the sequence. The construction of aggregated timepoints and corresponding labels match the process of the embedding models. However, instead of embedding each event for a patient, a table is constructed. Given \u003Cem\u003EN\u003C\/em\u003E\u003Csub\u003Eevents\u003C\/sub\u003E is the maximum number of event types present in the data, the tabular data representation takes the form \u003Cspan class=\u0022inline-formula\u0022 id=\u0022inline-formula-1\u0022\u003E\u003Cspan class=\u0022highwire-responsive-lazyload\u0022\u003E\u003Cimg src=\u0022data:image\/gif;base64,R0lGODlhAQABAIAAAAAAAP\/\/\/yH5BAEAAAAALAAAAAABAAEAAAIBRAA7\u0022 class=\u0022highwire-embed lazyload\u0022 alt=\u0022Embedded Image\u0022 data-src=\u0022https:\/\/www.medrxiv.org\/sites\/default\/files\/highwire\/medrxiv\/early\/2026\/01\/24\/2026.01.23.26344677\/embed\/inline-graphic-1.gif\u0022\/\u003E\u003Cnoscript\u003E\u003Cimg class=\u0022highwire-embed\u0022 alt=\u0022Embedded Image\u0022 src=\u0022https:\/\/www.medrxiv.org\/sites\/default\/files\/highwire\/medrxiv\/early\/2026\/01\/24\/2026.01.23.26344677\/embed\/inline-graphic-1.gif\u0022\/\u003E\u003C\/noscript\u003E\u003C\/span\u003E\u003C\/span\u003E. The presence of categorical features in a patient\u2019s sequence is one-hot encoded. Numerical events are described by two columns; the first recording the numerical value (in case multiple numerical events are recorded for the same event type, the most recent measurement is taken) and the second indicating presence\/absence of the value, to be able to differentiate between true zero-valued features and not recorded features. We employed \u003Ckbd\u003Esklearn\u003C\/kbd\u003E\u2019s [\u003Ca id=\u0022xref-ref-10-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-10\u0022\u003E10\u003C\/a\u003E] \u003Ckbd\u003ESelectKBest\u003C\/kbd\u003E feature selection on the validation set with the criterion \u003Ckbd\u003Ef_classif\u003C\/kbd\u003E (ANOVA F-value) to reduce the number of features in the table from \u003Cem\u003EN\u003C\/em\u003E\u003Csub\u003Eall_events\u003C\/sub\u003E + \u003Cem\u003EN\u003C\/em\u003E\u003Csub\u003Enumerical\u003C\/sub\u003E to 1000.\u003C\/p\u003E\u003C\/div\u003E\u003Cdiv id=\u0022sec-6\u0022 class=\u0022subsection\u0022\u003E\u003Ch3\u003EEHR sequence embedding\u003C\/h3\u003E\u003Cp id=\u0022p-28\u0022\u003EDepending on the feature domain, each medical event in time-bin \u003Cem\u003EM\u003C\/em\u003E\u003Csub\u003E\u003Cem\u003Ej\u003C\/em\u003E\u003C\/sub\u003E is described by an obligatory categorical feature defined by a medical code or text, and by an optional numerical value. Each event is embedded according to their event type, into a common model dimension \u003Cem\u003ED\u003C\/em\u003E. All embeddings within the monthly aggregation time window are averaged, leading to each \u003Cem\u003EM\u003C\/em\u003E\u003Csub\u003E\u003Cem\u003Ej\u003C\/em\u003E\u003C\/sub\u003E in \u003Cem\u003EP\u003C\/em\u003E\u003Csub\u003E\u003Cem\u003Ei\u003C\/em\u003E\u003C\/sub\u003E being represented by a single embedding vector \u003Cstrong\u003EX\u003C\/strong\u003E \u2208 \u211d\u003Csup\u003E\u003Cem\u003ED\u003C\/em\u003E\u003C\/sup\u003E. Therefore, a batch tensor with \u003Cem\u003EB\u003C\/em\u003E padded sequences takes the shape \u003Cspan class=\u0022inline-formula\u0022 id=\u0022inline-formula-2\u0022\u003E\u003Cspan class=\u0022highwire-responsive-lazyload\u0022\u003E\u003Cimg src=\u0022data:image\/gif;base64,R0lGODlhAQABAIAAAAAAAP\/\/\/yH5BAEAAAAALAAAAAABAAEAAAIBRAA7\u0022 class=\u0022highwire-embed lazyload\u0022 alt=\u0022Embedded Image\u0022 data-src=\u0022https:\/\/www.medrxiv.org\/sites\/default\/files\/highwire\/medrxiv\/early\/2026\/01\/24\/2026.01.23.26344677\/embed\/inline-graphic-2.gif\u0022\/\u003E\u003Cnoscript\u003E\u003Cimg class=\u0022highwire-embed\u0022 alt=\u0022Embedded Image\u0022 src=\u0022https:\/\/www.medrxiv.org\/sites\/default\/files\/highwire\/medrxiv\/early\/2026\/01\/24\/2026.01.23.26344677\/embed\/inline-graphic-2.gif\u0022\/\u003E\u003C\/noscript\u003E\u003C\/span\u003E\u003C\/span\u003E.\u003C\/p\u003E\u003Cp id=\u0022p-29\u0022\u003EFor each \u003Cem\u003EM\u003C\/em\u003E\u003Csub\u003E\u003Cem\u003Ej\u003C\/em\u003E\u003C\/sub\u003E, two additional embeddings are constructed and added: a trainable categorical embedding indicating age \u003Cstrong\u003EA\u003C\/strong\u003E \u2208 \u211d\u003Csup\u003E\u003Cem\u003ED\u003C\/em\u003E\u003C\/sup\u003E and a fixed positional encoding \u003Cstrong\u003EP\u003C\/strong\u003E \u2208 \u211d\u003Csup\u003E\u003Cem\u003ED\u003C\/em\u003E\u003C\/sup\u003E. The final tensor given to the transformer \u003Cstrong\u003EZ\u003C\/strong\u003E is constructed as \u003Cspan class=\u0022inline-formula\u0022 id=\u0022inline-formula-3\u0022\u003E\u003Cspan class=\u0022highwire-responsive-lazyload\u0022\u003E\u003Cimg src=\u0022data:image\/gif;base64,R0lGODlhAQABAIAAAAAAAP\/\/\/yH5BAEAAAAALAAAAAABAAEAAAIBRAA7\u0022 class=\u0022highwire-embed lazyload\u0022 alt=\u0022Embedded Image\u0022 data-src=\u0022https:\/\/www.medrxiv.org\/sites\/default\/files\/highwire\/medrxiv\/early\/2026\/01\/24\/2026.01.23.26344677\/embed\/inline-graphic-3.gif\u0022\/\u003E\u003Cnoscript\u003E\u003Cimg class=\u0022highwire-embed\u0022 alt=\u0022Embedded Image\u0022 src=\u0022https:\/\/www.medrxiv.org\/sites\/default\/files\/highwire\/medrxiv\/early\/2026\/01\/24\/2026.01.23.26344677\/embed\/inline-graphic-3.gif\u0022\/\u003E\u003C\/noscript\u003E\u003C\/span\u003E\u003C\/span\u003E.\u003C\/p\u003E\u003C\/div\u003E\u003Cdiv id=\u0022sec-7\u0022 class=\u0022subsection\u0022\u003E\u003Ch3\u003E2.1 An EHR foundation model\u003C\/h3\u003E\u003C\/div\u003E\u003Cdiv id=\u0022sec-8\u0022 class=\u0022subsection\u0022\u003E\u003Ch3\u003Eend-to-end trained model\u003C\/h3\u003E\u003Cp id=\u0022p-30\u0022\u003EWe base our model architecture largely on existing methods [\u003Ca id=\u0022xref-ref-20-3\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-20\u0022\u003E20\u003C\/a\u003E] [\u003Ca id=\u0022xref-ref-17-3\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-17\u0022\u003E17\u003C\/a\u003E]. The end-to-end trained model consists of a transformer encoder with \u003Cem\u003EL\u003C\/em\u003E identical layers, employing a masked multi-head self-attention considering all subsequences for training. We combine the transformer position-wise embeddings with a binary multi-task MLP prediction head with sigmoid activation. All time horizon outcomes are treated as a multi-task prediction objective, leading to \u003Cspan class=\u0022inline-formula\u0022 id=\u0022inline-formula-4\u0022\u003E\u003Cspan class=\u0022highwire-responsive-lazyload\u0022\u003E\u003Cimg src=\u0022data:image\/gif;base64,R0lGODlhAQABAIAAAAAAAP\/\/\/yH5BAEAAAAALAAAAAABAAEAAAIBRAA7\u0022 class=\u0022highwire-embed lazyload\u0022 alt=\u0022Embedded Image\u0022 data-src=\u0022https:\/\/www.medrxiv.org\/sites\/default\/files\/highwire\/medrxiv\/early\/2026\/01\/24\/2026.01.23.26344677\/embed\/inline-graphic-4.gif\u0022\/\u003E\u003Cnoscript\u003E\u003Cimg class=\u0022highwire-embed\u0022 alt=\u0022Embedded Image\u0022 src=\u0022https:\/\/www.medrxiv.org\/sites\/default\/files\/highwire\/medrxiv\/early\/2026\/01\/24\/2026.01.23.26344677\/embed\/inline-graphic-4.gif\u0022\/\u003E\u003C\/noscript\u003E\u003C\/span\u003E\u003C\/span\u003E. The model is trained using the \u003Ckbd\u003EAdamW\u003C\/kbd\u003E optimiser and stopped at reaching the validation loss minimum.\u003C\/p\u003E\u003Cdiv id=\u0022sec-9\u0022 class=\u0022subsection\u0022\u003E\u003Ch4\u003Epretrained model\u003C\/h4\u003E\u003Cp id=\u0022p-31\u0022\u003EFor the pre-trained and fine-tuned models, an additional embedded time offset \u003Cstrong\u003E\u0394T\u003C\/strong\u003E in days describing time passed between events is added to the input \u003Cstrong\u003EZ\u003C\/strong\u003E\u003Csup\u003E\u2217\u003C\/sup\u003E = \u003Cstrong\u003EZ\u003C\/strong\u003E + \u003Cstrong\u003E\u0394T\u003C\/strong\u003E [\u003Ca id=\u0022xref-ref-21-2\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-21\u0022\u003E21\u003C\/a\u003E]. As each position in \u003Cstrong\u003EZ\u003C\/strong\u003E\u003Csup\u003E\u2217\u003C\/sup\u003E constitutes of multiple aggregated events, we use a multi-token prediction setup for pretraining the EHR foundation model. The pretraining prediction head is a multi-layer perceptron (MLP) providing a logit tensor for each code in the dataset \u003Cspan class=\u0022inline-formula\u0022 id=\u0022inline-formula-5\u0022\u003E\u003Cspan class=\u0022highwire-responsive-lazyload\u0022\u003E\u003Cimg src=\u0022data:image\/gif;base64,R0lGODlhAQABAIAAAAAAAP\/\/\/yH5BAEAAAAALAAAAAABAAEAAAIBRAA7\u0022 class=\u0022highwire-embed lazyload\u0022 alt=\u0022Embedded Image\u0022 data-src=\u0022https:\/\/www.medrxiv.org\/sites\/default\/files\/highwire\/medrxiv\/early\/2026\/01\/24\/2026.01.23.26344677\/embed\/inline-graphic-5.gif\u0022\/\u003E\u003Cnoscript\u003E\u003Cimg class=\u0022highwire-embed\u0022 alt=\u0022Embedded Image\u0022 src=\u0022https:\/\/www.medrxiv.org\/sites\/default\/files\/highwire\/medrxiv\/early\/2026\/01\/24\/2026.01.23.26344677\/embed\/inline-graphic-5.gif\u0022\/\u003E\u003C\/noscript\u003E\u003C\/span\u003E\u003C\/span\u003E, where \u003Cem\u003EC\u003C\/em\u003E is the vocabulary size, i.e. the number of medical event codes in the dataset.\u003C\/p\u003E\u003Cp id=\u0022p-32\u0022\u003EThe loss function used to optimise the pretrained model consists of three parts:\u003C\/p\u003E\u003Col class=\u0022list-ord \u0022 id=\u0022list-2\u0022\u003E\u003Cli id=\u0022list-item-4\u0022\u003E\u003Cp id=\u0022p-33\u0022\u003Ea self-supervised multi-token objective consists in predicting the set of medical events present in the next aggregated time window, by optimising a multi-label cross-entropy loss \u2112\u003Csub\u003Enext\u003C\/sub\u003E (\u003Ca id=\u0022xref-disp-formula-2-1\u0022 class=\u0022xref-disp-formula\u0022 href=\u0022#disp-formula-2\u0022\u003EEq. 1\u003C\/a\u003E),\u003C\/p\u003E\u003C\/li\u003E\u003Cli id=\u0022list-item-5\u0022\u003E\u003Cp id=\u0022p-34\u0022\u003Efor each time window \u003Cem\u003EM\u003C\/em\u003E\u003Csub\u003E\u003Cem\u003Ei\u003C\/em\u003E\u003C\/sub\u003E, the model fits a regression head to predict a time offset in days to the earliest event in the next window \u003Cem\u003EM\u003C\/em\u003E\u003Csub\u003E\u003Cem\u003Ei\u003C\/em\u003E+1\u003C\/sub\u003E, using a mean squared error loss \u2112\u003Csub\u003Etime\u003C\/sub\u003E (\u003Ca id=\u0022xref-disp-formula-3-1\u0022 class=\u0022xref-disp-formula\u0022 href=\u0022#disp-formula-3\u0022\u003EEq. 2\u003C\/a\u003E),\u003C\/p\u003E\u003C\/li\u003E\u003Cli id=\u0022list-item-6\u0022\u003E\u003Cp id=\u0022p-35\u0022\u003Eand for the numerical targets in the dataset, a Gaussian prediction head estimates the mean and log-variance of the distribution through a negative log-likelihood loss \u2112\u003Csub\u003Enum\u003C\/sub\u003E (\u003Ca id=\u0022xref-disp-formula-4-1\u0022 class=\u0022xref-disp-formula\u0022 href=\u0022#disp-formula-4\u0022\u003EEq. 3\u003C\/a\u003E).\u003C\/p\u003E\u003C\/li\u003E\u003C\/ol\u003E\u003Cp id=\u0022p-36\u0022\u003EThe total loss is defined as \u2112\u003Csub\u003Etotal\u003C\/sub\u003E = \u2112\u003Csub\u003Enext\u003C\/sub\u003E + \u2112\u003Csub\u003Etime\u003C\/sub\u003E + \u2112\u003Csub\u003Enum\u003C\/sub\u003E. Multiple model sizes were evaluated in our study (2.1.1). Results presented are based on the 170M model.\u003C\/p\u003E\u003C\/div\u003E\u003Cdiv id=\u0022sec-10\u0022 class=\u0022subsection\u0022\u003E\u003Ch4\u003E2.1.1 Loss functions and training details\u003C\/h4\u003E\u003Cp id=\u0022p-37\u0022\u003EGiven the target \u003Cstrong\u003EY\u003C\/strong\u003E and the padding boolean mask \u003Cspan class=\u0022inline-formula\u0022 id=\u0022inline-formula-6\u0022\u003E\u003Cspan class=\u0022highwire-responsive-lazyload\u0022\u003E\u003Cimg src=\u0022data:image\/gif;base64,R0lGODlhAQABAIAAAAAAAP\/\/\/yH5BAEAAAAALAAAAAABAAEAAAIBRAA7\u0022 class=\u0022highwire-embed lazyload\u0022 alt=\u0022Embedded Image\u0022 data-src=\u0022https:\/\/www.medrxiv.org\/sites\/default\/files\/highwire\/medrxiv\/early\/2026\/01\/24\/2026.01.23.26344677\/embed\/inline-graphic-6.gif\u0022\/\u003E\u003Cnoscript\u003E\u003Cimg class=\u0022highwire-embed\u0022 alt=\u0022Embedded Image\u0022 src=\u0022https:\/\/www.medrxiv.org\/sites\/default\/files\/highwire\/medrxiv\/early\/2026\/01\/24\/2026.01.23.26344677\/embed\/inline-graphic-6.gif\u0022\/\u003E\u003C\/noscript\u003E\u003C\/span\u003E\u003C\/span\u003E, the self-supervised multi-token objective consists in predicting the set of medical events present in the next aggregated time window, by optimising a multi-label cross-entropy (CE) loss:\n\u003Cspan class=\u0022disp-formula\u0022 id=\u0022disp-formula-2\u0022\u003E\u003Cspan class=\u0022highwire-responsive-lazyload\u0022\u003E\u003Cimg src=\u0022data:image\/gif;base64,R0lGODlhAQABAIAAAAAAAP\/\/\/yH5BAEAAAAALAAAAAABAAEAAAIBRAA7\u0022 class=\u0022highwire-embed lazyload\u0022 alt=\u0022Embedded Image\u0022 data-src=\u0022https:\/\/www.medrxiv.org\/sites\/default\/files\/highwire\/medrxiv\/early\/2026\/01\/24\/2026.01.23.26344677\/embed\/graphic-2.gif\u0022\/\u003E\u003Cnoscript\u003E\u003Cimg class=\u0022highwire-embed\u0022 alt=\u0022Embedded Image\u0022 src=\u0022https:\/\/www.medrxiv.org\/sites\/default\/files\/highwire\/medrxiv\/early\/2026\/01\/24\/2026.01.23.26344677\/embed\/graphic-2.gif\u0022\/\u003E\u003C\/noscript\u003E\u003C\/span\u003E\n\n\u003C\/span\u003E\nAdditionally, for each token, the model fits a regression head to predict the time offset to the next time point \u003Cspan class=\u0022inline-formula\u0022 id=\u0022inline-formula-7\u0022\u003E\u003Cspan class=\u0022highwire-responsive-lazyload\u0022\u003E\u003Cimg src=\u0022data:image\/gif;base64,R0lGODlhAQABAIAAAAAAAP\/\/\/yH5BAEAAAAALAAAAAABAAEAAAIBRAA7\u0022 class=\u0022highwire-embed lazyload\u0022 alt=\u0022Embedded Image\u0022 data-src=\u0022https:\/\/www.medrxiv.org\/sites\/default\/files\/highwire\/medrxiv\/early\/2026\/01\/24\/2026.01.23.26344677\/embed\/inline-graphic-7.gif\u0022\/\u003E\u003Cnoscript\u003E\u003Cimg class=\u0022highwire-embed\u0022 alt=\u0022Embedded Image\u0022 src=\u0022https:\/\/www.medrxiv.org\/sites\/default\/files\/highwire\/medrxiv\/early\/2026\/01\/24\/2026.01.23.26344677\/embed\/inline-graphic-7.gif\u0022\/\u003E\u003C\/noscript\u003E\u003C\/span\u003E\u003C\/span\u003E measured in days between the aggregated event tokens. The loss is a mean squared error (MSE):\n\u003Cspan class=\u0022disp-formula\u0022 id=\u0022disp-formula-3\u0022\u003E\u003Cspan class=\u0022highwire-responsive-lazyload\u0022\u003E\u003Cimg src=\u0022data:image\/gif;base64,R0lGODlhAQABAIAAAAAAAP\/\/\/yH5BAEAAAAALAAAAAABAAEAAAIBRAA7\u0022 class=\u0022highwire-embed lazyload\u0022 alt=\u0022Embedded Image\u0022 data-src=\u0022https:\/\/www.medrxiv.org\/sites\/default\/files\/highwire\/medrxiv\/early\/2026\/01\/24\/2026.01.23.26344677\/embed\/graphic-3.gif\u0022\/\u003E\u003Cnoscript\u003E\u003Cimg class=\u0022highwire-embed\u0022 alt=\u0022Embedded Image\u0022 src=\u0022https:\/\/www.medrxiv.org\/sites\/default\/files\/highwire\/medrxiv\/early\/2026\/01\/24\/2026.01.23.26344677\/embed\/graphic-3.gif\u0022\/\u003E\u003C\/noscript\u003E\u003C\/span\u003E\n\n\u003C\/span\u003E\nGiven a numerical target \u003Cstrong\u003ENum\u003C\/strong\u003E \u003Cspan class=\u0022inline-formula\u0022 id=\u0022inline-formula-8\u0022\u003E\u003Cspan class=\u0022highwire-responsive-lazyload\u0022\u003E\u003Cimg src=\u0022data:image\/gif;base64,R0lGODlhAQABAIAAAAAAAP\/\/\/yH5BAEAAAAALAAAAAABAAEAAAIBRAA7\u0022 class=\u0022highwire-embed lazyload\u0022 alt=\u0022Embedded Image\u0022 data-src=\u0022https:\/\/www.medrxiv.org\/sites\/default\/files\/highwire\/medrxiv\/early\/2026\/01\/24\/2026.01.23.26344677\/embed\/inline-graphic-8.gif\u0022\/\u003E\u003Cnoscript\u003E\u003Cimg class=\u0022highwire-embed\u0022 alt=\u0022Embedded Image\u0022 src=\u0022https:\/\/www.medrxiv.org\/sites\/default\/files\/highwire\/medrxiv\/early\/2026\/01\/24\/2026.01.23.26344677\/embed\/inline-graphic-8.gif\u0022\/\u003E\u003C\/noscript\u003E\u003C\/span\u003E\u003C\/span\u003E for the \u003Cem\u003EC\u003C\/em\u003E\u003Csub\u003E\u003Cem\u003Enum\u003C\/em\u003E\u003C\/sub\u003E numerical events of the dataset, a Gaussian prediction head estimates the mean and log-variance of the distribution. Considering a numerical mask \u003Cspan class=\u0022inline-formula\u0022 id=\u0022inline-formula-9\u0022\u003E\u003Cspan class=\u0022highwire-responsive-lazyload\u0022\u003E\u003Cimg src=\u0022data:image\/gif;base64,R0lGODlhAQABAIAAAAAAAP\/\/\/yH5BAEAAAAALAAAAAABAAEAAAIBRAA7\u0022 class=\u0022highwire-embed lazyload\u0022 alt=\u0022Embedded Image\u0022 data-src=\u0022https:\/\/www.medrxiv.org\/sites\/default\/files\/highwire\/medrxiv\/early\/2026\/01\/24\/2026.01.23.26344677\/embed\/inline-graphic-9.gif\u0022\/\u003E\u003Cnoscript\u003E\u003Cimg class=\u0022highwire-embed\u0022 alt=\u0022Embedded Image\u0022 src=\u0022https:\/\/www.medrxiv.org\/sites\/default\/files\/highwire\/medrxiv\/early\/2026\/01\/24\/2026.01.23.26344677\/embed\/inline-graphic-9.gif\u0022\/\u003E\u003C\/noscript\u003E\u003C\/span\u003E\u003C\/span\u003E which represents if a given numerical code is included in the batch, the negative log-likelihood loss is:\n\u003Cspan class=\u0022disp-formula\u0022 id=\u0022disp-formula-4\u0022\u003E\u003Cspan class=\u0022highwire-responsive-lazyload\u0022\u003E\u003Cimg src=\u0022data:image\/gif;base64,R0lGODlhAQABAIAAAAAAAP\/\/\/yH5BAEAAAAALAAAAAABAAEAAAIBRAA7\u0022 class=\u0022highwire-embed lazyload\u0022 alt=\u0022Embedded Image\u0022 data-src=\u0022https:\/\/www.medrxiv.org\/sites\/default\/files\/highwire\/medrxiv\/early\/2026\/01\/24\/2026.01.23.26344677\/embed\/graphic-4.gif\u0022\/\u003E\u003Cnoscript\u003E\u003Cimg class=\u0022highwire-embed\u0022 alt=\u0022Embedded Image\u0022 src=\u0022https:\/\/www.medrxiv.org\/sites\/default\/files\/highwire\/medrxiv\/early\/2026\/01\/24\/2026.01.23.26344677\/embed\/graphic-4.gif\u0022\/\u003E\u003C\/noscript\u003E\u003C\/span\u003E\n\n\u003C\/span\u003E\nThree model sizes were evaluated in our study; 43M, 170M and 860M parameters. We obtained equally high performances for the 170M and 860M models, and are presenting the 170M model in more detail. The 43M model is trained in full precision, while mixed precision is used for the larger models. Distributed Data Parallel (DDP) training is applied to the 43M and 170M models, and DeepSpeed ZeRO-2 [\u003Ca id=\u0022xref-ref-11-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-11\u0022\u003E11\u003C\/a\u003E] is utilized for the 860M model. Learning rate scheduling follows a cosine annealing schedule with warm-up. Pre-trained models are fine-tuned on the CHB target population training data, initializing weights from pre-training and fine-tuning only the final transformer layer and the prediction head, with all other weights frozen.\u003C\/p\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/div\u003E\u003Cdiv class=\u0022section\u0022 id=\u0022sec-11\u0022\u003E\u003Ch2 class=\u0022\u0022\u003E3 Results\u003C\/h2\u003E\u003Cp id=\u0022p-38\u0022\u003EThe prediction results are displayed in \u003Ca id=\u0022xref-table-wrap-1-1\u0022 class=\u0022xref-table\u0022 href=\u0022#T1\u0022\u003ETable 1\u003C\/a\u003E. The results indicate that end-to-end transformers trained directly on the CHB target population are not predicting comparatively well to the XGBoost baseline or the finetuned foundation model. While the finetuned foundation models show clear gains in performance compared to the end-to-end transformers, they reach a similar performance to but never outperform a simple tabular XGBoost baseline.\u003C\/p\u003E\u003Cdiv id=\u0022T1\u0022 class=\u0022table pos-float\u0022\u003E\u003Cdiv class=\u0022table-inline table-callout-links\u0022\u003E\u003Cdiv class=\u0022callout\u0022\u003E\u003Cspan\u003EView this table:\u003C\/span\u003E\u003Cul class=\u0022callout-links\u0022\u003E\u003Cli class=\u0022view-inline first\u0022\u003E\u003Ca href=\u0022\u0022 class=\u0022table-expand-inline\u0022 data-table-url=\u0022\/highwire\/markup\/1145280\/expansion?postprocessors=highwire_tables%2Chighwire_reclass%2Chighwire_figures%2Chighwire_math%2Chighwire_inline_linked_media%2Chighwire_embed\u0026amp;table-expand-inline=1\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003EView inline\u003C\/a\u003E\u003C\/li\u003E\u003Cli class=\u0022view-popup\u0022\u003E\u003Ca href=\u0022\/highwire\/markup\/1145280\/expansion?width=1000\u0026amp;height=500\u0026amp;iframe=true\u0026amp;postprocessors=highwire_tables%2Chighwire_reclass%2Chighwire_figures%2Chighwire_math%2Chighwire_inline_linked_media%2Chighwire_embed\u0022 class=\u0022colorbox colorbox-load table-expand-popup\u0022 rel=\u0022gallery-fragment-tables\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003EView popup\u003C\/a\u003E\u003C\/li\u003E\u003Cli class=\u0022download-ppt last\u0022\u003E\u003Ca href=\u0022\/highwire\/powerpoint\/1145280\u0022 class=\u0022highwire-figure-link highwire-figure-link-ppt\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003EDownload powerpoint\u003C\/a\u003E\u003C\/li\u003E\u003C\/ul\u003E\u003C\/div\u003E\u003C\/div\u003E\u003Cdiv class=\u0022table-caption\u0022\u003E\u003Cspan class=\u0022table-label\u0022\u003ETable 1.\u003C\/span\u003E \u003Cspan class=\u0022caption-title\u0022\u003EModel comparison between tabular XGBoost baseline (e2e-xgb-tab), end-to-end transformer (e2e-tfm) and finetuned EHR foundation model (fm-ft-tfm).\u003C\/span\u003E\u003Cp id=\u0022p-39\u0022 class=\u0022first-child\u0022\u003EWe report mean \u00b1 standard deviation over the same five fixed train-test splits used in all experiments. Metrics are area under the ROC curve (AUC) and specificity at 90 percent sensitivity (sp@90sen).\u003C\/p\u003E\u003Cdiv class=\u0022sb-div caption-clear\u0022\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/div\u003E\u003Cp id=\u0022p-40\u0022\u003EWith known advantages and limitations in attention interpretability [\u003Ca id=\u0022xref-ref-14-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-14\u0022\u003E14\u003C\/a\u003E], we gather insights into transformer model mechanics by analyzing attention weights patterns observed regularly over a large number of patients, and showcase them in two exemplary case studies in \u003Ca id=\u0022xref-fig-1-1\u0022 class=\u0022xref-fig\u0022 href=\u0022#F1\u0022\u003EFigure 1\u003C\/a\u003E. We regularly observe that one attention head is focusing solely on the first event, which constitutes a \u003Cem\u003Econtext token\u003C\/em\u003E of fixed patient characteristics: gender, year of birth, race, ethnicity, and geographical region (\u003Ca id=\u0022xref-fig-1-2\u0022 class=\u0022xref-fig\u0022 href=\u0022#F1\u0022\u003EFigure 1a\u003C\/a\u003E). Remarkably, this resembles the way that doctors and nurses communicate efficiently about patients through \u2018one-liners\u2019. The one-liner starts with a patient identifier providing clinically important information and paints a picture of the patient in the reader\u2019s mind [\u003Ca id=\u0022xref-ref-15-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-15\u0022\u003E15\u003C\/a\u003E], quickly sharing a baseline pretest probability for causes of the presenting medical concern. We find the emergence of this convergent behavior in our models as quite astonishing, while also noting the weaknesses of learning misleading correlations (e.g. race as a marker for socioeconomic determinants of health). A second pattern is that of attention heads placing \u003Cem\u003Efull attention on the latest relevant medical event\u003C\/em\u003E, initially the context token, potentially switching full focus to a newly occurring event in the sequence (\u003Ca id=\u0022xref-fig-1-3\u0022 class=\u0022xref-fig\u0022 href=\u0022#F1\u0022\u003EFigure 1b\u003C\/a\u003E). Investigating the medical trajectory of the studied patient, attention in head no. 5 was initially fixed on the context token, then fully switched to a token for a medical code indicating abnormal findings on diagnostic imaging of liver, and switching again to a token recording a hepatic function panel. The same attention head placed attention on an abnormal findings on diagnostic imaging of liver in other patients as well. This could indicate that this specific attention head has learned to focus on hepatic function for predicting HCC.\u003C\/p\u003E\u003Cdiv id=\u0022F1\u0022 class=\u0022fig pos-float type-figure  odd\u0022\u003E\u003Cdiv class=\u0022highwire-figure\u0022\u003E\u003Cdiv class=\u0022fig-inline-img-wrapper\u0022\u003E\u003Cdiv class=\u0022fig-inline-img\u0022\u003E\u003Ca href=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2026\/01\/24\/2026.01.23.26344677\/F1.large.jpg?width=800\u0026amp;height=600\u0026amp;carousel=1\u0022 title=\u0022Attention weights of end-to-end transformer. Attention maps for the first model layer (L) and selected prediction heads (H) for each time index. Color scale normalized to the maximum value within each row (darkest color) and zero (brightest color). Attention across all heads and predictions for the showcased patients can be found in Figure A2.\u0022 class=\u0022highwire-fragment fragment-images colorbox-load\u0022 rel=\u0022gallery-fragment-images-206886724\u0022 data-figure-caption=\u0022\u0026lt;div class=\u0026quot;highwire-markup\u0026quot;\u0026gt;\u0026lt;span xmlns=\u0026quot;http:\/\/www.w3.org\/1999\/xhtml\u0026quot; class=\u0026quot;caption-title\u0026quot;\u0026gt;Attention weights of end-to-end transformer.\u0026lt;\/span\u0026gt; Attention maps for the first model layer (L) and selected prediction heads (H) for each time index. Color scale normalized to the maximum value within each row (darkest color) and zero (brightest color). Attention across all heads and predictions for the showcased patients can be found in Figure A2.\u0026lt;\/div\u0026gt;\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003E\u003Cspan class=\u0022hw-responsive-img\u0022\u003E\u003Cimg class=\u0022highwire-fragment fragment-image lazyload\u0022 alt=\u0022Figure 1.\u0022 src=\u0022data:image\/gif;base64,R0lGODlhAQABAIAAAAAAAP\/\/\/yH5BAEAAAAALAAAAAABAAEAAAIBRAA7\u0022 data-src=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2026\/01\/24\/2026.01.23.26344677\/F1.medium.gif\u0022 width=\u0022440\u0022 height=\u0022107\u0022\/\u003E\u003Cnoscript\u003E\u003Cimg class=\u0022highwire-fragment fragment-image\u0022 alt=\u0022Figure 1.\u0022 src=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2026\/01\/24\/2026.01.23.26344677\/F1.medium.gif\u0022 width=\u0022440\u0022 height=\u0022107\u0022\/\u003E\u003C\/noscript\u003E\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003Cul class=\u0022highwire-figure-links inline\u0022\u003E\u003Cli class=\u0022download-fig first\u0022\u003E\u003Ca href=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2026\/01\/24\/2026.01.23.26344677\/F1.large.jpg?download=true\u0022 class=\u0022highwire-figure-link highwire-figure-link-download\u0022 title=\u0022Download Figure 1.\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003EDownload figure\u003C\/a\u003E\u003C\/li\u003E\u003Cli class=\u0022new-tab last\u0022\u003E\u003Ca href=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2026\/01\/24\/2026.01.23.26344677\/F1.large.jpg\u0022 class=\u0022highwire-figure-link highwire-figure-link-newtab\u0022 target=\u0022_blank\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003EOpen in new tab\u003C\/a\u003E\u003C\/li\u003E\u003C\/ul\u003E\u003C\/div\u003E\u003Cdiv class=\u0022fig-caption\u0022 xmlns:xhtml=\u0022http:\/\/www.w3.org\/1999\/xhtml\u0022\u003E\u003Cspan class=\u0022fig-label\u0022\u003EFigure 1.\u003C\/span\u003E \u003Cspan class=\u0022caption-title\u0022\u003EAttention weights of end-to-end transformer.\u003C\/span\u003E\u003Cp id=\u0022p-41\u0022 class=\u0022first-child\u0022\u003EAttention maps for the first model layer (L) and selected prediction heads (H) for each time index. Color scale normalized to the maximum value within each row (darkest color) and zero (brightest color). Attention across all heads and predictions for the showcased patients can be found in \u003Ca id=\u0022xref-fig-3-1\u0022 class=\u0022xref-fig\u0022 href=\u0022#F3\u0022\u003EFigure A2\u003C\/a\u003E.\u003C\/p\u003E\u003Cdiv class=\u0022sb-div caption-clear\u0022\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/div\u003E\u003Cp id=\u0022p-42\u0022\u003EFurther, we find several indications that the EHR-models use \u003Cem\u003Esequence aggregation\u003C\/em\u003E operations. In end-to-end transformers, performance increases with larger time aggregation windows (\u003Ca id=\u0022xref-fig-5-1\u0022 class=\u0022xref-fig\u0022 href=\u0022#F5\u0022\u003EFigure A4\u003C\/a\u003E). This aggregation can also be seen in the pretrained model trained on the multi-token objective, where horizontal lines in pretrained foundation models indicate that some query tokens place attention on all other key tokens, effectively leading to an averaging on the signal of all tokens (\u003Ca id=\u0022xref-fig-4-1\u0022 class=\u0022xref-fig\u0022 href=\u0022#F4\u0022\u003EFigure A3\u003C\/a\u003E).\u003C\/p\u003E\u003C\/div\u003E\u003Cdiv class=\u0022section\u0022 id=\u0022sec-12\u0022\u003E\u003Ch2 class=\u0022\u0022\u003E4 Discussion\u003C\/h2\u003E\u003Cp id=\u0022p-43\u0022\u003EIn this work we are presenting a novel multi-domain EHR foundation model intended for long-term disease monitoring, and evaluate its capability for predicting chronic progressive diseases in the case of liver cirrhosis and liver cancer in CHB patients. We have demonstrated its utility of outperforming an end-to-end transformer trained directly on the target population, while critically comparing to and demonstrating a similar performance employing a non-temporal XGBoost prediction model. Emergent behavior mirroring clinical care is observed by placing attention to the \u2018one-liner\u2019 summarizing a patient\u2019s baseline health status. This behavior speaks to the potential of tackling difficult clinical problems. We present indications that the transformer models struggle to take advantage of the temporal aspect of the EHR sequences, and fall back to leveraging simpler techniques such as sequence aggregation to base their predictions by favoring large time aggregation windows and showing averaging steps in attention maps. We hypothesize that vanilla EHR foundation model architectures may not fully capture the complexity of medical event codes and their temporal interactions due to the relatively small number of EHR sequences compared to the training data available in language domains. The XGBoost setup likely created a more focused space of medical events by employing simple feature selection. A promising development would be to introduce greater inductive bias into the models, such as improving medical tokenization [\u003Ca id=\u0022xref-ref-18-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-18\u0022\u003E18\u003C\/a\u003E] or increasing training size by combining EHR datasets.\u003C\/p\u003E\u003Cp id=\u0022p-44\u0022\u003EWe are now further giving a medical perspective on the current literature and utilization of EHR-based foundation models: Prior work has demonstrated the utility of EHR foundation models over simpler baselines, such as logistic regression [\u003Ca id=\u0022xref-ref-17-4\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-17\u0022\u003E17\u003C\/a\u003E] and XGBoost [\u003Ca id=\u0022xref-ref-4-3\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-4\u0022\u003E4\u003C\/a\u003E]. The choice of endpoints used for performance evaluation is often motivated by the focus on ICU datasets, e.g. inpatient mortality [\u003Ca id=\u0022xref-ref-17-5\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-17\u0022\u003E17\u003C\/a\u003E] [\u003Ca id=\u0022xref-ref-4-4\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-4\u0022\u003E4\u003C\/a\u003E], long admission [\u003Ca id=\u0022xref-ref-17-6\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-17\u0022\u003E17\u003C\/a\u003E], ICU transfer [\u003Ca id=\u0022xref-ref-17-7\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-17\u0022\u003E17\u003C\/a\u003E], or readmission [\u003Ca id=\u0022xref-ref-17-8\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-17\u0022\u003E17\u003C\/a\u003E] [\u003Ca id=\u0022xref-ref-4-5\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-4\u0022\u003E4\u003C\/a\u003E]. The medical problems underlying these endpoints\u2013e.g. heart failure, sepsis and cardiogenic shock\u2013have well-developed management pathways that are systematically enforced by healthcare providers via payment rate benchmarks [\u003Ca id=\u0022xref-ref-9-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-9\u0022\u003E9\u003C\/a\u003E]. This results in most patients having a fairly regimented management pathway that tends to be documented in a stereotypical manner in health records, enabling sequence-based encoders to effectively capture and impute the continuation of these regimented medical protocols.\u003C\/p\u003E\u003Cp id=\u0022p-45\u0022\u003EHowever, this is the exception rather than the norm; many common conditions, such as liver abnormalities, exhibit ad hoc diagnostics and less predictable treatment trajectories. As we think forward to foundational EHR-trained models as decision support tools for physicians and health systems, being able to grapple with complex and contradictory patient information offers a key opportunity for advancement in health. The presented results call into question whether the superior performance of EHR foundation models has been demonstrated in the use case of predicting patient trajectories uncertain to the medical professional. In an era of increasingly constrained provider time with patients at health check-ins, it is often these slowly progressive diseases that get missed, and only addressed when the disease has progressed to an overt, and often irreversible state. A EHR-trained model capable of flagging the subtle signs of earlier disease progression could be an invaluable partner to the primary care provider, enabling a focus on disease suppression, treatment adherence, and even direction to novel curative therapies.\u003C\/p\u003E\u003C\/div\u003E\u003Cdiv class=\u0022section data-availability\u0022 id=\u0022sec-13\u0022\u003E\u003Ch2 class=\u0022\u0022\u003EData Availability\u003C\/h2\u003E\u003Cp id=\u0022p-46\u0022\u003EAll data produced in the present study are available upon reasonable request to the authors\u003C\/p\u003E\u003C\/div\u003E\u003Cdiv class=\u0022section\u0022 id=\u0022sec-14\u0022\u003E\u003Ch2 class=\u0022\u0022\u003EA Technical Appendix and Supplementary Material\u003C\/h2\u003E\u003Cdiv id=\u0022F2\u0022 class=\u0022fig pos-float type-figure  odd\u0022\u003E\u003Cdiv class=\u0022highwire-figure\u0022\u003E\u003Cdiv class=\u0022fig-inline-img-wrapper\u0022\u003E\u003Cdiv class=\u0022fig-inline-img\u0022\u003E\u003Ca href=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2026\/01\/24\/2026.01.23.26344677\/F2.large.jpg?width=800\u0026amp;height=600\u0026amp;carousel=1\u0022 title=\u0022C\/HCC prediction workflow overview. The workflow of the presented risk prediction models is depicted from the bottom of the plot upwards. The common initial processing includes extracting the patients with chronic hepatitis B, extracting the medical events sequence and the respective endpoints at each timepoint. The workflow then splits into three downstream paths, A. one using a tabular data representation defining a feature vector for each patient, B. directly uses a mean feature embedding to train an end-to-end transformer, and C. pretrains a model on a multi-token loss first and is then finetuned on the C\/HCC prediction objective.\u0022 class=\u0022highwire-fragment fragment-images colorbox-load\u0022 rel=\u0022gallery-fragment-images-206886724\u0022 data-figure-caption=\u0022\u0026lt;div class=\u0026quot;highwire-markup\u0026quot;\u0026gt;\u0026lt;div xmlns=\u0026quot;http:\/\/www.w3.org\/1999\/xhtml\u0026quot;\u0026gt;\u0026lt;span class=\u0026quot;caption-title\u0026quot;\u0026gt;C\/HCC prediction workflow overview.\u0026lt;\/span\u0026gt; The workflow of the presented risk prediction models is depicted from the bottom of the plot upwards. The common initial processing includes extracting the patients with chronic hepatitis B, extracting the medical events sequence and the respective endpoints at each timepoint. The workflow then splits into three downstream paths, \u0026lt;strong\u0026gt;A\u0026lt;\/strong\u0026gt;. one using a tabular data representation defining a feature vector for each patient, \u0026lt;strong\u0026gt;B\u0026lt;\/strong\u0026gt;. directly uses a mean feature embedding to train an end-to-end transformer, and \u0026lt;strong\u0026gt;C\u0026lt;\/strong\u0026gt;. pretrains a model on a multi-token loss first and is then finetuned on the C\/HCC prediction objective.\u0026lt;\/div\u0026gt;\u0026lt;\/div\u0026gt;\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003E\u003Cspan class=\u0022hw-responsive-img\u0022\u003E\u003Cimg class=\u0022highwire-fragment fragment-image lazyload\u0022 alt=\u0022Figure A1:\u0022 src=\u0022data:image\/gif;base64,R0lGODlhAQABAIAAAAAAAP\/\/\/yH5BAEAAAAALAAAAAABAAEAAAIBRAA7\u0022 data-src=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2026\/01\/24\/2026.01.23.26344677\/F2.medium.gif\u0022 width=\u0022440\u0022 height=\u0022317\u0022\/\u003E\u003Cnoscript\u003E\u003Cimg class=\u0022highwire-fragment fragment-image\u0022 alt=\u0022Figure A1:\u0022 src=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2026\/01\/24\/2026.01.23.26344677\/F2.medium.gif\u0022 width=\u0022440\u0022 height=\u0022317\u0022\/\u003E\u003C\/noscript\u003E\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003Cul class=\u0022highwire-figure-links inline\u0022\u003E\u003Cli class=\u0022download-fig first\u0022\u003E\u003Ca href=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2026\/01\/24\/2026.01.23.26344677\/F2.large.jpg?download=true\u0022 class=\u0022highwire-figure-link highwire-figure-link-download\u0022 title=\u0022Download Figure A1:\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003EDownload figure\u003C\/a\u003E\u003C\/li\u003E\u003Cli class=\u0022new-tab last\u0022\u003E\u003Ca href=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2026\/01\/24\/2026.01.23.26344677\/F2.large.jpg\u0022 class=\u0022highwire-figure-link highwire-figure-link-newtab\u0022 target=\u0022_blank\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003EOpen in new tab\u003C\/a\u003E\u003C\/li\u003E\u003C\/ul\u003E\u003C\/div\u003E\u003Cdiv class=\u0022fig-caption\u0022\u003E\u003Cspan class=\u0022fig-label\u0022\u003EFigure A1:\u003C\/span\u003E \u003Cspan class=\u0022caption-title\u0022\u003EC\/HCC prediction workflow overview.\u003C\/span\u003E\u003Cp id=\u0022p-47\u0022 class=\u0022first-child\u0022\u003EThe workflow of the presented risk prediction models is depicted from the bottom of the plot upwards. The common initial processing includes extracting the patients with chronic hepatitis B, extracting the medical events sequence and the respective endpoints at each timepoint. The workflow then splits into three downstream paths, \u003Cstrong\u003EA\u003C\/strong\u003E. one using a tabular data representation defining a feature vector for each patient, \u003Cstrong\u003EB\u003C\/strong\u003E. directly uses a mean feature embedding to train an end-to-end transformer, and \u003Cstrong\u003EC\u003C\/strong\u003E. pretrains a model on a multi-token loss first and is then finetuned on the C\/HCC prediction objective.\u003C\/p\u003E\u003Cdiv class=\u0022sb-div caption-clear\u0022\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/div\u003E\u003Cdiv class=\u0022section\u0022 id=\u0022sec-15\u0022\u003E\u003Ch3\u003EA.1 EHR multi-domain database\u003C\/h3\u003E\u003Cp id=\u0022p-48\u0022\u003EOptum\u00ae de-identified Electronic Health Record data set (Optum\u00ae EHR) is a longitudinal electronic health record repository derived from dozens of healthcare provider organizations in the United States. Administrative medical data is obtained from both Inpatient and Ambulatory electronic health records (EHRs), practice management systems, and other internal systems and is processed, normalized, and standardized across the continuum of care from both acute inpatient stays and outpatient visits. The data is statistically de-identified under the HIPAA Privacy Rule\u2019s Expert Determination method and managed according to Optum\u00ae customer data use agreements.\u003C\/p\u003E\u003Cdiv id=\u0022T2\u0022 class=\u0022table pos-float\u0022\u003E\u003Cdiv class=\u0022table-inline table-callout-links\u0022\u003E\u003Cdiv class=\u0022callout\u0022\u003E\u003Cspan\u003EView this table:\u003C\/span\u003E\u003Cul class=\u0022callout-links\u0022\u003E\u003Cli class=\u0022view-inline first\u0022\u003E\u003Ca href=\u0022\u0022 class=\u0022table-expand-inline\u0022 data-table-url=\u0022\/highwire\/markup\/1145276\/expansion?postprocessors=highwire_tables%2Chighwire_reclass%2Chighwire_figures%2Chighwire_math%2Chighwire_inline_linked_media%2Chighwire_embed\u0026amp;table-expand-inline=1\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003EView inline\u003C\/a\u003E\u003C\/li\u003E\u003Cli class=\u0022view-popup\u0022\u003E\u003Ca href=\u0022\/highwire\/markup\/1145276\/expansion?width=1000\u0026amp;height=500\u0026amp;iframe=true\u0026amp;postprocessors=highwire_tables%2Chighwire_reclass%2Chighwire_figures%2Chighwire_math%2Chighwire_inline_linked_media%2Chighwire_embed\u0022 class=\u0022colorbox colorbox-load table-expand-popup\u0022 rel=\u0022gallery-fragment-tables\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003EView popup\u003C\/a\u003E\u003C\/li\u003E\u003Cli class=\u0022download-ppt last\u0022\u003E\u003Ca href=\u0022\/highwire\/powerpoint\/1145276\u0022 class=\u0022highwire-figure-link highwire-figure-link-ppt\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003EDownload powerpoint\u003C\/a\u003E\u003C\/li\u003E\u003C\/ul\u003E\u003C\/div\u003E\u003C\/div\u003E\u003Cdiv class=\u0022table-caption\u0022\u003E\u003Cspan class=\u0022table-label\u0022\u003ETable A1:\u003C\/span\u003E \u003Cspan class=\u0022caption-title\u0022\u003EFilter conditions for chronic Hepatitis B patients\u003C\/span\u003E\u003Cp id=\u0022p-49\u0022 class=\u0022first-child\u0022\u003Eby medical code. All individuals that have at least one entry corresponding to these codes are included in the analysis. Abbr.: \u2018Chronic viral hepatitis B\u2019 was abbreviated to CHB.\u003C\/p\u003E\u003Cdiv class=\u0022sb-div caption-clear\u0022\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/div\u003E\u003Cdiv class=\u0022section\u0022 id=\u0022sec-16\u0022\u003E\u003Ch4\u003EData access statement\u003C\/h4\u003E\u003Cp id=\u0022p-50\u0022\u003EThe source data used for the present study were licensed from the Optum\u00ae de-identified EHR database (\u003Ca href=\u0022https:\/\/www.optum.com\/\u0022\u003Ehttps:\/\/www.optum.com\/\u003C\/a\u003E), with restrictions that do not allow for the data to be redistributed or made publicly available. However, for accredited researchers, the Optum\u00ae de-identified EHR database is available for licensing at Optum, Inc. Data access may require a data-sharing agreement and may incur data access fees.\u003C\/p\u003E\u003C\/div\u003E\u003Cdiv class=\u0022section\u0022 id=\u0022sec-17\u0022\u003E\u003Ch4\u003EHigh positive class imbalance\u003C\/h4\u003E\u003Cp id=\u0022p-51\u0022\u003EVery few events get a positive class label. The positive class ratio of having either cirrhosis or liver cancer within the prediction time horizons of 1, 3, 5 or 10 years changes slightly, but ranges from 3 to 7 percent. For the end-to-end and finetuned models, the option to adjust the weight of the postive class in the BCE loss for the class ratio through \u003Ckbd\u003Epos_weight\u003C\/kbd\u003E is given as a hyperparameter.\u003C\/p\u003E\u003Cdiv id=\u0022F3\u0022 class=\u0022fig pos-float type-figure  odd\u0022\u003E\u003Cdiv class=\u0022highwire-figure\u0022\u003E\u003Cdiv class=\u0022fig-inline-img-wrapper\u0022\u003E\u003Cdiv class=\u0022fig-inline-img\u0022\u003E\u003Ca href=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2026\/01\/24\/2026.01.23.26344677\/F3.large.jpg?width=800\u0026amp;height=600\u0026amp;carousel=1\u0022 title=\u0022Full attention weights of end-to-end transformer. Left: Attention maps for the first model layer (L) and all prediction heads (H) for each time index. Color scale normalized to the maximum value within each row (darkest color) and zero (brightest color). Right: Predictions and labels for each time index (blue: 0, white: 0.5, red: 1).\u0022 class=\u0022highwire-fragment fragment-images colorbox-load\u0022 rel=\u0022gallery-fragment-images-206886724\u0022 data-figure-caption=\u0022\u0026lt;div class=\u0026quot;highwire-markup\u0026quot;\u0026gt;\u0026lt;span xmlns=\u0026quot;http:\/\/www.w3.org\/1999\/xhtml\u0026quot; class=\u0026quot;caption-title\u0026quot;\u0026gt;Full attention weights of end-to-end transformer.\u0026lt;\/span\u0026gt; Left: Attention maps for the first model layer (L) and all prediction heads (H) for each time index. Color scale normalized to the maximum value within each row (darkest color) and zero (brightest color). Right: Predictions and labels for each time index (blue: 0, white: 0.5, red: 1).\u0026lt;\/div\u0026gt;\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003E\u003Cspan class=\u0022hw-responsive-img\u0022\u003E\u003Cimg class=\u0022highwire-fragment fragment-image lazyload\u0022 alt=\u0022Figure A2:\u0022 src=\u0022data:image\/gif;base64,R0lGODlhAQABAIAAAAAAAP\/\/\/yH5BAEAAAAALAAAAAABAAEAAAIBRAA7\u0022 data-src=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2026\/01\/24\/2026.01.23.26344677\/F3.medium.gif\u0022 width=\u0022370\u0022 height=\u0022440\u0022\/\u003E\u003Cnoscript\u003E\u003Cimg class=\u0022highwire-fragment fragment-image\u0022 alt=\u0022Figure A2:\u0022 src=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2026\/01\/24\/2026.01.23.26344677\/F3.medium.gif\u0022 width=\u0022370\u0022 height=\u0022440\u0022\/\u003E\u003C\/noscript\u003E\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003Cul class=\u0022highwire-figure-links inline\u0022\u003E\u003Cli class=\u0022download-fig first\u0022\u003E\u003Ca href=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2026\/01\/24\/2026.01.23.26344677\/F3.large.jpg?download=true\u0022 class=\u0022highwire-figure-link highwire-figure-link-download\u0022 title=\u0022Download Figure A2:\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003EDownload figure\u003C\/a\u003E\u003C\/li\u003E\u003Cli class=\u0022new-tab last\u0022\u003E\u003Ca href=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2026\/01\/24\/2026.01.23.26344677\/F3.large.jpg\u0022 class=\u0022highwire-figure-link highwire-figure-link-newtab\u0022 target=\u0022_blank\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003EOpen in new tab\u003C\/a\u003E\u003C\/li\u003E\u003C\/ul\u003E\u003C\/div\u003E\u003Cdiv class=\u0022fig-caption\u0022\u003E\u003Cspan class=\u0022fig-label\u0022\u003EFigure A2:\u003C\/span\u003E \u003Cspan class=\u0022caption-title\u0022\u003EFull attention weights of end-to-end transformer.\u003C\/span\u003E\u003Cp id=\u0022p-52\u0022 class=\u0022first-child\u0022\u003ELeft: Attention maps for the first model layer (L) and all prediction heads (H) for each time index. Color scale normalized to the maximum value within each row (darkest color) and zero (brightest color). Right: Predictions and labels for each time index (blue: 0, white: 0.5, red: 1).\u003C\/p\u003E\u003Cdiv class=\u0022sb-div caption-clear\u0022\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/div\u003E\u003Cdiv id=\u0022F4\u0022 class=\u0022fig pos-float type-figure  odd\u0022\u003E\u003Cdiv class=\u0022highwire-figure\u0022\u003E\u003Cdiv class=\u0022fig-inline-img-wrapper\u0022\u003E\u003Cdiv class=\u0022fig-inline-img\u0022\u003E\u003Ca href=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2026\/01\/24\/2026.01.23.26344677\/F4.large.jpg?width=800\u0026amp;height=600\u0026amp;carousel=1\u0022 title=\u0022Attention maps of first layer of pretrained foundation model. High attention on the first token in the sequence can be observed. This is the context token, describing a number of set patient characteristics: gender, year of birth, race, ethnicity, and geographical region. Several heads spread out remaining attention across a wide range of tokens, giving all tokens the chance to attend to each other.\u0022 class=\u0022highwire-fragment fragment-images colorbox-load\u0022 rel=\u0022gallery-fragment-images-206886724\u0022 data-figure-caption=\u0022\u0026lt;div class=\u0026quot;highwire-markup\u0026quot;\u0026gt;\u0026lt;span xmlns=\u0026quot;http:\/\/www.w3.org\/1999\/xhtml\u0026quot; class=\u0026quot;caption-title\u0026quot;\u0026gt;Attention maps of first layer of pretrained foundation model.\u0026lt;\/span\u0026gt; High attention on the first token in the sequence can be observed. This is the context token, describing a number of set patient characteristics: gender, year of birth, race, ethnicity, and geographical region. Several heads spread out remaining attention across a wide range of tokens, giving all tokens the chance to attend to each other.\u0026lt;\/div\u0026gt;\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003E\u003Cspan class=\u0022hw-responsive-img\u0022\u003E\u003Cimg class=\u0022highwire-fragment fragment-image lazyload\u0022 alt=\u0022Figure A3:\u0022 src=\u0022data:image\/gif;base64,R0lGODlhAQABAIAAAAAAAP\/\/\/yH5BAEAAAAALAAAAAABAAEAAAIBRAA7\u0022 data-src=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2026\/01\/24\/2026.01.23.26344677\/F4.medium.gif\u0022 width=\u0022440\u0022 height=\u0022420\u0022\/\u003E\u003Cnoscript\u003E\u003Cimg class=\u0022highwire-fragment fragment-image\u0022 alt=\u0022Figure A3:\u0022 src=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2026\/01\/24\/2026.01.23.26344677\/F4.medium.gif\u0022 width=\u0022440\u0022 height=\u0022420\u0022\/\u003E\u003C\/noscript\u003E\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003Cul class=\u0022highwire-figure-links inline\u0022\u003E\u003Cli class=\u0022download-fig first\u0022\u003E\u003Ca href=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2026\/01\/24\/2026.01.23.26344677\/F4.large.jpg?download=true\u0022 class=\u0022highwire-figure-link highwire-figure-link-download\u0022 title=\u0022Download Figure A3:\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003EDownload figure\u003C\/a\u003E\u003C\/li\u003E\u003Cli class=\u0022new-tab last\u0022\u003E\u003Ca href=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2026\/01\/24\/2026.01.23.26344677\/F4.large.jpg\u0022 class=\u0022highwire-figure-link highwire-figure-link-newtab\u0022 target=\u0022_blank\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003EOpen in new tab\u003C\/a\u003E\u003C\/li\u003E\u003C\/ul\u003E\u003C\/div\u003E\u003Cdiv class=\u0022fig-caption\u0022\u003E\u003Cspan class=\u0022fig-label\u0022\u003EFigure A3:\u003C\/span\u003E \u003Cspan class=\u0022caption-title\u0022\u003EAttention maps of first layer of pretrained foundation model.\u003C\/span\u003E\u003Cp id=\u0022p-53\u0022 class=\u0022first-child\u0022\u003EHigh attention on the first token in the sequence can be observed. This is the context token, describing a number of set patient characteristics: gender, year of birth, race, ethnicity, and geographical region. Several heads spread out remaining attention across a wide range of tokens, giving all tokens the chance to attend to each other.\u003C\/p\u003E\u003Cdiv class=\u0022sb-div caption-clear\u0022\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/div\u003E\u003Cdiv id=\u0022F5\u0022 class=\u0022fig pos-float type-figure  odd\u0022\u003E\u003Cdiv class=\u0022highwire-figure\u0022\u003E\u003Cdiv class=\u0022fig-inline-img-wrapper\u0022\u003E\u003Cdiv class=\u0022fig-inline-img\u0022\u003E\u003Ca href=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2026\/01\/24\/2026.01.23.26344677\/F5.large.jpg?width=800\u0026amp;height=600\u0026amp;carousel=1\u0022 title=\u0022End-to-end trained transformer predictive performance vs. aggregation time window. An increase in AUC can be observed until 1 month, with a decrease for larger aggregation windows of 2 months.\u0022 class=\u0022highwire-fragment fragment-images colorbox-load\u0022 rel=\u0022gallery-fragment-images-206886724\u0022 data-figure-caption=\u0022\u0026lt;div class=\u0026quot;highwire-markup\u0026quot;\u0026gt;\u0026lt;span xmlns=\u0026quot;http:\/\/www.w3.org\/1999\/xhtml\u0026quot; class=\u0026quot;caption-title\u0026quot;\u0026gt;End-to-end trained transformer predictive performance vs. aggregation time window.\u0026lt;\/span\u0026gt; An increase in AUC can be observed until 1 month, with a decrease for larger aggregation windows of 2 months.\u0026lt;\/div\u0026gt;\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003E\u003Cspan class=\u0022hw-responsive-img\u0022\u003E\u003Cimg class=\u0022highwire-fragment fragment-image lazyload\u0022 alt=\u0022Figure A4:\u0022 src=\u0022data:image\/gif;base64,R0lGODlhAQABAIAAAAAAAP\/\/\/yH5BAEAAAAALAAAAAABAAEAAAIBRAA7\u0022 data-src=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2026\/01\/24\/2026.01.23.26344677\/F5.medium.gif\u0022 width=\u0022440\u0022 height=\u0022264\u0022\/\u003E\u003Cnoscript\u003E\u003Cimg class=\u0022highwire-fragment fragment-image\u0022 alt=\u0022Figure A4:\u0022 src=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2026\/01\/24\/2026.01.23.26344677\/F5.medium.gif\u0022 width=\u0022440\u0022 height=\u0022264\u0022\/\u003E\u003C\/noscript\u003E\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003Cul class=\u0022highwire-figure-links inline\u0022\u003E\u003Cli class=\u0022download-fig first\u0022\u003E\u003Ca href=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2026\/01\/24\/2026.01.23.26344677\/F5.large.jpg?download=true\u0022 class=\u0022highwire-figure-link highwire-figure-link-download\u0022 title=\u0022Download Figure A4:\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003EDownload figure\u003C\/a\u003E\u003C\/li\u003E\u003Cli class=\u0022new-tab last\u0022\u003E\u003Ca href=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2026\/01\/24\/2026.01.23.26344677\/F5.large.jpg\u0022 class=\u0022highwire-figure-link highwire-figure-link-newtab\u0022 target=\u0022_blank\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003EOpen in new tab\u003C\/a\u003E\u003C\/li\u003E\u003C\/ul\u003E\u003C\/div\u003E\u003Cdiv class=\u0022fig-caption\u0022\u003E\u003Cspan class=\u0022fig-label\u0022\u003EFigure A4:\u003C\/span\u003E \u003Cspan class=\u0022caption-title\u0022\u003EEnd-to-end trained transformer predictive performance vs. aggregation time window.\u003C\/span\u003E\u003Cp id=\u0022p-54\u0022 class=\u0022first-child\u0022\u003EAn increase in AUC can be observed until 1 month, with a decrease for larger aggregation windows of 2 months.\u003C\/p\u003E\u003Cdiv class=\u0022sb-div caption-clear\u0022\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/div\u003E\u003Cdiv class=\u0022section ref-list\u0022 id=\u0022ref-list-1\u0022\u003E\u003Ch2 class=\u0022\u0022\u003EReferences\u003C\/h2\u003E\u003Col class=\u0022cit-list ref-use-labels\u0022\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E[1].\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-1-1\u0022 title=\u0022View reference [1] in text\u0022 id=\u0022ref-1\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-web\u0022 id=\u0022cit-2026.01.23.26344677v1.1\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-source\u0022\u003EMedical event data standard (MEDS) format\u003C\/span\u003E. \u003Ca href=\u0022https:\/\/github.com\/Medical-Event-Data-Standard\/meds\u0022\u003Ehttps:\/\/github.com\/Medical-Event-Data-Standard\/meds\u003C\/a\u003E.\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E[2].\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-2-1\u0022 title=\u0022View reference [2] in text\u0022 id=\u0022ref-2\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-web\u0022 id=\u0022cit-2026.01.23.26344677v1.2\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth cit-collab\u0022\u003EOptum\u00ae EHR\u003C\/span\u003E. \u003Ca href=\u0022https:\/\/business.optum.com\/en\/data-analytics\/life-sciences\/real-world-data\/ehr-data.html\u0022\u003Ehttps:\/\/business.optum.com\/en\/data-analytics\/life-sciences\/real-world-data\/ehr-data.html\u003C\/a\u003E. \u003Cspan class=\u0022cit-source\u0022\u003EOptum\u00ae de-identified Electronic Health Record data set (Optum\u00ae EHR) is a longitudinal electronic health record repository derived from dozens of healthcare provider organizations in the United States. Administrative medical data is obtained from both Inpatient and Ambulatory electronic health records (EHRs), practice management systems, and other internal systems and is processed, normalized, and standardized across the continuum of care from both acute inpatient stays and outpatient visits. The data is statistically de-identified under the HIPAA Privacy Rule\u2019s Expert Determination method and managed according to Optum\u00ae customer data use agreements\u003C\/span\u003E.\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E[3].\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-3-1\u0022 title=\u0022View reference [3] in text\u0022 id=\u0022ref-3\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal\u0022 id=\u0022cit-2026.01.23.26344677v1.3\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003ED.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EBixler\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EY.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EZhong\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EK. N.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003ELy\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EA. C.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EMoorman\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EP. R.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003ESpradling\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EE. H.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003ETeshale\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EL. B.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003ERupp\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003ES. C.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EGordon\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EJ. A.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EBoscarino\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EM. A.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003ESchmidt\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EY. G.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EDaida\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003ES. D.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EHolmberg\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003ES. D.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EHolmberg\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EE. H.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003ETeshale\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EP. R.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003ESpradling\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EA. C.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EMoorman\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EJ.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EXing\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EY.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EZhong\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003ES. C.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EGordon\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003ED. R.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003ENerenz\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EM.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003ELu\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EL.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003ELamerato\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EJ.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003ELi\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EL. B.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003ERupp\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EN.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EAkkerman\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003ET.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EZhang\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003ES.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003ETrudeau\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EY.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EZhou\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EK.-H.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EWu\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EJ. A.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EBoscarino\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EZ. S.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EDaar\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003ER. E.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003ESmith\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EY. G.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EDaida\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EC. M.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003ETrinacty\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EJ. W.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003ELai\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EC. P.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EWong\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EM. A.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003ESchmidt\u003C\/span\u003E\u003C\/span\u003E, and \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EJ. L.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EDonald\u003C\/span\u003E\u003C\/span\u003E. \u003Cspan class=\u0022cit-article-title\u0022\u003EMortality among patients with chronic hepatitis b infection: The chronic hepatitis cohort study (checs)\u003C\/span\u003E. \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003EClinical Infectious Diseases\u003C\/abbr\u003E, \u003Cspan class=\u0022cit-vol\u0022\u003E68\u003C\/span\u003E(\u003Cspan class=\u0022cit-issue\u0022\u003E6\u003C\/span\u003E):\u003Cspan class=\u0022cit-fpage\u0022\u003E956\u003C\/span\u003E\u2013\u003Cspan class=\u0022cit-lpage\u0022\u003E963\u003C\/span\u003E, \u003Cspan class=\u0022cit-month\u0022\u003EJuly\u003C\/span\u003E \u003Cspan class=\u0022cit-pub-date\u0022\u003E2018\u003C\/span\u003E.\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003Ca href=\u0022{openurl}?query=rft.jtitle%253DClinical%2BInfectious%2BDiseases%26rft.volume%253D68%26rft.spage%253D956%26rft.genre%253Darticle%26rft_val_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Ajournal%26ctx_ver%253DZ39.88-2004%26url_ver%253DZ39.88-2004%26url_ctx_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Actx\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-openurl cit-ref-sprinkles-open-url\u0022\u003E\u003Cspan\u003EOpenUrl\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E[4].\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-4-1\u0022 title=\u0022View reference [4] in text\u0022 id=\u0022ref-4\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal\u0022 id=\u0022cit-2026.01.23.26344677v1.4\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EA.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EFallahpour\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EM.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EAlinoori\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EW.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EYe\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EX.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003ECao\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EA.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EAfkanpour\u003C\/span\u003E\u003C\/span\u003E, and \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EA.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EKrishnan\u003C\/span\u003E\u003C\/span\u003E. \u003Cspan class=\u0022cit-article-title\u0022\u003EEHRMamba: Towards generalizable and scalable foundation models for electronic health records\u003C\/span\u003E. \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003EarXiv\u003C\/abbr\u003E, \u003Cspan class=\u0022cit-pub-date\u0022\u003E2024\u003C\/span\u003E.\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E[5].\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-5-1\u0022 title=\u0022View reference [5] in text\u0022 id=\u0022ref-5\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal\u0022 id=\u0022cit-2026.01.23.26344677v1.5\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EA. E.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EJohnson\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003ET. J.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EPollard\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EL.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EShen\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EL.-w. H.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003ELehman\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EM.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EFeng\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EM.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EGhassemi\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EB.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EMoody\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EP.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003ESzolovits\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EL. Anthony\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003ECeli\u003C\/span\u003E\u003C\/span\u003E, and \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003ER. G.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EMark\u003C\/span\u003E\u003C\/span\u003E. \u003Cspan class=\u0022cit-article-title\u0022\u003EMIMIC-III, a freely accessible critical care database\u003C\/span\u003E. \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003EScientific Data\u003C\/abbr\u003E, \u003Cspan class=\u0022cit-vol\u0022\u003E3\u003C\/span\u003E(\u003Cspan class=\u0022cit-issue\u0022\u003E1\u003C\/span\u003E), \u003Cspan class=\u0022cit-month\u0022\u003EMay\u003C\/span\u003E \u003Cspan class=\u0022cit-pub-date\u0022\u003E2016\u003C\/span\u003E.\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E[6].\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-6-1\u0022 title=\u0022View reference [6] in text\u0022 id=\u0022ref-6\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal\u0022 id=\u0022cit-2026.01.23.26344677v1.6\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EA. E. W.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EJohnson\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EL.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EBulgarelli\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EL.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EShen\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EA.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EGayles\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EA.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EShammout\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003ES.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EHorng\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003ET. J.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EPollard\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003ES.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EHao\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EB.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EMoody\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EB.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EGow\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EL.-w. H.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003ELehman\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EL. A.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003ECeli\u003C\/span\u003E\u003C\/span\u003E, and \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003ER. G.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EMark\u003C\/span\u003E\u003C\/span\u003E. \u003Cspan class=\u0022cit-article-title\u0022\u003EMIMIC-IV, a freely accessible electronic health record dataset\u003C\/span\u003E. \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003EScientific Data\u003C\/abbr\u003E, \u003Cspan class=\u0022cit-vol\u0022\u003E10\u003C\/span\u003E(\u003Cspan class=\u0022cit-issue\u0022\u003E1\u003C\/span\u003E), \u003Cspan class=\u0022cit-month\u0022\u003EJan\u003C\/span\u003E. \u003Cspan class=\u0022cit-pub-date\u0022\u003E2023\u003C\/span\u003E.\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E[7].\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-7-1\u0022 title=\u0022View reference [7] in text\u0022 id=\u0022ref-7\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal\u0022 id=\u0022cit-2026.01.23.26344677v1.7\u0022 data-doi=\u002210.1038\/s42256-024-00974-9\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003ES. J.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EMataraso\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EC. A.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EEspinosa\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003ED.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003ESeong\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003ES. M.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EReincke\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EE.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EBerson\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EJ. D.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EReiss\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EY.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EKim\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EM.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EGhanem\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EC.-H.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EShu\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003ET.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EJames\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EY.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003ETan\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003ES.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EShome\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EI. A.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EStelzer\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003ED.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EFeyaerts\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003ER. J.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EWong\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EG. M.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EShaw\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EM. S.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EAngst\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EB.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EGaudilliere\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003ED. K.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EStevenson\u003C\/span\u003E\u003C\/span\u003E, and \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EN.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EAghaeepour\u003C\/span\u003E\u003C\/span\u003E. \u003Cspan class=\u0022cit-article-title\u0022\u003EA machine learning approach to leveraging electronic health records for enhanced omics analysis\u003C\/span\u003E. \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003ENature Machine Intelligence\u003C\/abbr\u003E, \u003Cspan class=\u0022cit-vol\u0022\u003E7\u003C\/span\u003E(\u003Cspan class=\u0022cit-issue\u0022\u003E2\u003C\/span\u003E):\u003Cspan class=\u0022cit-fpage\u0022\u003E293\u003C\/span\u003E\u2013\u003Cspan class=\u0022cit-lpage\u0022\u003E306\u003C\/span\u003E, \u003Cspan class=\u0022cit-month\u0022\u003EJan\u003C\/span\u003E. \u003Cspan class=\u0022cit-pub-date\u0022\u003E2025\u003C\/span\u003E.\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003Ca href=\u0022{openurl}?query=rft.jtitle%253DNature%2BMachine%2BIntelligence%26rft.volume%253D7%26rft.spage%253D293%26rft_id%253Dinfo%253Adoi%252F10.1038%252Fs42256-024-00974-9%26rft_id%253Dinfo%253Apmid%252F40008295%26rft.genre%253Darticle%26rft_val_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Ajournal%26ctx_ver%253DZ39.88-2004%26url_ver%253DZ39.88-2004%26url_ctx_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Actx\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-openurl cit-ref-sprinkles-open-url\u0022\u003E\u003Cspan\u003EOpenUrl\u003C\/span\u003E\u003C\/a\u003E\u003Ca href=\u0022\/lookup\/external-ref?access_num=10.1038\/s42256-024-00974-9\u0026amp;link_type=DOI\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-doi cit-ref-sprinkles-crossref\u0022\u003E\u003Cspan\u003ECrossRef\u003C\/span\u003E\u003C\/a\u003E\u003Ca href=\u0022\/lookup\/external-ref?access_num=40008295\u0026amp;link_type=MED\u0026amp;atom=%2Fmedrxiv%2Fearly%2F2026%2F01%2F24%2F2026.01.23.26344677.atom\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-medline\u0022\u003E\u003Cspan\u003EPubMed\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E[8].\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-8-1\u0022 title=\u0022View reference [8] in text\u0022 id=\u0022ref-8\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal\u0022 id=\u0022cit-2026.01.23.26344677v1.8\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EM. B. A.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EMcDermott\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EB.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003ENestor\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EP.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EArgaw\u003C\/span\u003E\u003C\/span\u003E, and \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EI.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EKohane\u003C\/span\u003E\u003C\/span\u003E. \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003EEvent stream gpt: A data pre-processing and modeling library for generative, pre-trained transformers over continuous-time sequences of complex events\u003C\/abbr\u003E, \u003Cspan class=\u0022cit-pub-date\u0022\u003E2023\u003C\/span\u003E.\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E[9].\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-9-1\u0022 title=\u0022View reference [9] in text\u0022 id=\u0022ref-9\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-web\u0022 id=\u0022cit-2026.01.23.26344677v1.9\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth cit-collab\u0022\u003EMilliman\u003C\/span\u003E. \u003Cspan class=\u0022cit-source\u0022\u003ECommercial reimbursement benchmarking\u003C\/span\u003E. \u003Ca href=\u0022https:\/\/www.milliman.com\/en\/insight\/commercial-reimbursement-benchmarking\u0022\u003Ehttps:\/\/www.milliman.com\/en\/insight\/commercial-reimbursement-benchmarking\u003C\/a\u003E. Accessed: \u003Cspan class=\u0022cit-date-in-citation\u0022\u003EOctober 2023\u003C\/span\u003E.\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E[10].\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-10-1\u0022 title=\u0022View reference [10] in text\u0022 id=\u0022ref-10\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal\u0022 id=\u0022cit-2026.01.23.26344677v1.10\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EF.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EPedregosa\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EG.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EVaroquaux\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EA.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EGramfort\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EV.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EMichel\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EB.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EThirion\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EO.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EGrisel\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EM.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EBlondel\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EP.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EPrettenhofer\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003ER.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EWeiss\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EV.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EDubourg\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EJ.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EVanderplas\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EA.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EPassos\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003ED.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003ECournapeau\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EM.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EBrucher\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EM.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EPerrot\u003C\/span\u003E\u003C\/span\u003E, and \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EE.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EDuchesnay\u003C\/span\u003E\u003C\/span\u003E. \u003Cspan class=\u0022cit-article-title\u0022\u003EScikit-learn: Machine learning in Python\u003C\/span\u003E. \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003EJournal of Machine Learning Research\u003C\/abbr\u003E, \u003Cspan class=\u0022cit-vol\u0022\u003E12\u003C\/span\u003E:\u003Cspan class=\u0022cit-fpage\u0022\u003E2825\u003C\/span\u003E\u2013\u003Cspan class=\u0022cit-lpage\u0022\u003E2830\u003C\/span\u003E, \u003Cspan class=\u0022cit-pub-date\u0022\u003E2011\u003C\/span\u003E.\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003Ca href=\u0022{openurl}?query=rft.jtitle%253DJournal%2Bof%2BMachine%2BLearning%2BResearch%26rft.volume%253D12%26rft.spage%253D2825%26rft.genre%253Darticle%26rft_val_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Ajournal%26ctx_ver%253DZ39.88-2004%26url_ver%253DZ39.88-2004%26url_ctx_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Actx\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-openurl cit-ref-sprinkles-open-url\u0022\u003E\u003Cspan\u003EOpenUrl\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E[11].\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-11-1\u0022 title=\u0022View reference [11] in text\u0022 id=\u0022ref-11\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-book\u0022 id=\u0022cit-2026.01.23.26344677v1.11\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003ES.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003ERajbhandari\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EJ.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003ERasley\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EO.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003ERuwase\u003C\/span\u003E\u003C\/span\u003E, and \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EY.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EHe\u003C\/span\u003E\u003C\/span\u003E. \u003Cspan class=\u0022cit-chapter-title\u0022\u003EZero: Memory optimizations toward training trillion parameter models\u003C\/span\u003E. \u003Cspan class=\u0022cit-source\u0022\u003EIn SC20: International Conference for High Performance Computing, Networking, Storage and Analysis\u003C\/span\u003E, pages \u003Cspan class=\u0022cit-fpage\u0022\u003E1\u003C\/span\u003E\u2013\u003Cspan class=\u0022cit-lpage\u0022\u003E16\u003C\/span\u003E. \u003Cspan class=\u0022cit-publ-name\u0022\u003EIEEE\u003C\/span\u003E, \u003Cspan class=\u0022cit-pub-date\u0022\u003E2020\u003C\/span\u003E.\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E[12].\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-12-1\u0022 title=\u0022View reference [12] in text\u0022 id=\u0022ref-12\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal\u0022 id=\u0022cit-2026.01.23.26344677v1.12\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EP.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003ERenc\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EY.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EJia\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EA. E.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003ESamir\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EJ.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EWas\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EQ.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003ELi\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003ED. W.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EBates\u003C\/span\u003E\u003C\/span\u003E, and \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EA.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003ESitek\u003C\/span\u003E\u003C\/span\u003E. \u003Cspan class=\u0022cit-article-title\u0022\u003EZero shot health trajectory prediction using transformer\u003C\/span\u003E. \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003Enpj Digital Medicine\u003C\/abbr\u003E, \u003Cspan class=\u0022cit-vol\u0022\u003E7\u003C\/span\u003E(\u003Cspan class=\u0022cit-issue\u0022\u003E1\u003C\/span\u003E), \u003Cspan class=\u0022cit-month\u0022\u003ESept\u003C\/span\u003E. \u003Cspan class=\u0022cit-pub-date\u0022\u003E2024\u003C\/span\u003E.\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E[13].\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-13-1\u0022 title=\u0022View reference [13] in text\u0022 id=\u0022ref-13\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-book\u0022 id=\u0022cit-2026.01.23.26344677v1.13\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EM.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003ERupp\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EO.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EPeter\u003C\/span\u003E\u003C\/span\u003E, and \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003ET.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EPattipaka\u003C\/span\u003E\u003C\/span\u003E. \u003Cspan class=\u0022cit-source\u0022\u003EExBEHRT: Extended Transformer for Electronic Health Records\u003C\/span\u003E, page \u003Cspan class=\u0022cit-fpage\u0022\u003E73\u003C\/span\u003E\u2013\u003Cspan class=\u0022cit-lpage\u0022\u003E84\u003C\/span\u003E. \u003Cspan class=\u0022cit-publ-name\u0022\u003ESpringer Nature Switzerland\u003C\/span\u003E, \u003Cspan class=\u0022cit-pub-date\u0022\u003E2023\u003C\/span\u003E.\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E[14].\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-14-1\u0022 title=\u0022View reference [14] in text\u0022 id=\u0022ref-14\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-book\u0022 id=\u0022cit-2026.01.23.26344677v1.14\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Col class=\u0022duplicate\u0022\u003E\u003Cli\u003E\u003Cspan class=\u0022cit-ed\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EA.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EKorhonen\u003C\/span\u003E\u003C\/span\u003E, \u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022cit-ed\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003ED.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003ETraum\u003C\/span\u003E\u003C\/span\u003E, and \u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022cit-ed\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EL.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EM\u00e0rquez\u003C\/span\u003E\u003C\/span\u003E\u003C\/li\u003E\u003C\/ol\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003ES.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003ESerrano\u003C\/span\u003E\u003C\/span\u003E and \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EN. A.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003ESmith\u003C\/span\u003E\u003C\/span\u003E. \u003Cspan class=\u0022cit-chapter-title\u0022\u003EIs attention interpretable?\u003C\/span\u003E In \u003Cspan class=\u0022cit-ed\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EA.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EKorhonen\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-ed\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003ED.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003ETraum\u003C\/span\u003E\u003C\/span\u003E, and \u003Cspan class=\u0022cit-ed\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EL.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EM\u00e0rquez\u003C\/span\u003E\u003C\/span\u003E, editors, \u003Cspan class=\u0022cit-source\u0022\u003EProceedings of the 57th Annual Meeting of the Association for Computational Linguistics\u003C\/span\u003E, pages \u003Cspan class=\u0022cit-fpage\u0022\u003E2931\u003C\/span\u003E\u2013\u003Cspan class=\u0022cit-lpage\u0022\u003E2951\u003C\/span\u003E, \u003Cspan class=\u0022cit-publ-loc\u0022\u003EFlorence, Italy\u003C\/span\u003E, \u003Cspan class=\u0022cit-month\u0022\u003EJuly\u003C\/span\u003E \u003Cspan class=\u0022cit-pub-date\u0022\u003E2019\u003C\/span\u003E. \u003Cspan class=\u0022cit-publ-name\u0022\u003EAssociation for Computational Linguistics\u003C\/span\u003E.\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E[15].\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-15-1\u0022 title=\u0022View reference [15] in text\u0022 id=\u0022ref-15\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-web\u0022 id=\u0022cit-2026.01.23.26344677v1.15\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth cit-collab\u0022\u003ESOAPAssist\u003C\/span\u003E. \u003Cspan class=\u0022cit-source\u0022\u003EThe soap note one-liner\u003C\/span\u003E. \u003Ca href=\u0022https:\/\/soapassist.com\/the-soap-note-one-liner\/\u0022\u003Ehttps:\/\/soapassist.com\/the-soap-note-one-liner\/\u003C\/a\u003E, \u003Cspan class=\u0022cit-pub-date\u0022\u003E2023\u003C\/span\u003E. Accessed: \u003Cspan class=\u0022cit-date-in-citation\u0022\u003E2023-10-31\u003C\/span\u003E.\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E[16].\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-16-1\u0022 title=\u0022View reference [16] in text\u0022 id=\u0022ref-16\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal\u0022 id=\u0022cit-2026.01.23.26344677v1.16\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EE.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003ESteinberg\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EJ.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EFries\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EY.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EXu\u003C\/span\u003E\u003C\/span\u003E, and \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EN.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EShah\u003C\/span\u003E\u003C\/span\u003E. \u003Cspan class=\u0022cit-article-title\u0022\u003EMotor: A time-to-event foundation model for structured medical records\u003C\/span\u003E. \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003EarXiv\u003C\/abbr\u003E, \u003Cspan class=\u0022cit-pub-date\u0022\u003E2023\u003C\/span\u003E.\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E[17].\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-17-1\u0022 title=\u0022View reference [17] in text\u0022 id=\u0022ref-17\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal\u0022 id=\u0022cit-2026.01.23.26344677v1.17\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EE.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003ESteinberg\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EK.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EJung\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EJ. A.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EFries\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EC. K.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003ECorbin\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003ES. R.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EPfohl\u003C\/span\u003E\u003C\/span\u003E, and \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EN. H.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EShah\u003C\/span\u003E\u003C\/span\u003E. \u003Cspan class=\u0022cit-article-title\u0022\u003ELanguage models are an effective patient representation learning technique for electronic health record data\u003C\/span\u003E. \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003EarXiv\u003C\/abbr\u003E, \u003Cspan class=\u0022cit-pub-date\u0022\u003E2020\u003C\/span\u003E.\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E[18].\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-18-1\u0022 title=\u0022View reference [18] in text\u0022 id=\u0022ref-18\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal\u0022 id=\u0022cit-2026.01.23.26344677v1.18\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EX.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003ESu\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003ES.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EMessica\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EY.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EHuang\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003ER.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EJohnson\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EL.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EFesser\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003ES.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EGao\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EF.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003ESahneh\u003C\/span\u003E\u003C\/span\u003E, and \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EM.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EZitnik\u003C\/span\u003E\u003C\/span\u003E. \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003EMultimodal medical code tokenizer\u003C\/abbr\u003E, \u003Cspan class=\u0022cit-pub-date\u0022\u003E2025\u003C\/span\u003E.\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E[19].\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-19-1\u0022 title=\u0022View reference [19] in text\u0022 id=\u0022ref-19\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-web\u0022 id=\u0022cit-2026.01.23.26344677v1.19\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth cit-collab\u0022\u003EWorld Health Organisation (WHO\u003C\/span\u003E). \u003Cspan class=\u0022cit-source\u0022\u003EHepatitis B fact sheet\u003C\/span\u003E. \u003Ca href=\u0022https:\/\/www.who.int\/news-room\/fact-sheets\/detail\/hepatitis-b\u0022\u003Ehttps:\/\/www.who.int\/news-room\/fact-sheets\/detail\/hepatitis-b\u003C\/a\u003E.\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E[20].\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-20-1\u0022 title=\u0022View reference [20] in text\u0022 id=\u0022ref-20\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal\u0022 id=\u0022cit-2026.01.23.26344677v1.20\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EM.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EWornow\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003ER.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EThapa\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EE.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003ESteinberg\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EJ. A.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EFries\u003C\/span\u003E\u003C\/span\u003E, and \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EN. H.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EShah\u003C\/span\u003E\u003C\/span\u003E. \u003Cspan class=\u0022cit-article-title\u0022\u003EEHRSHOT: An EHR benchmark for few-shot evaluation of foundation models\u003C\/span\u003E. \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003EarXiv\u003C\/abbr\u003E, \u003Cspan class=\u0022cit-pub-date\u0022\u003E2023\u003C\/span\u003E.\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E[21].\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-21-1\u0022 title=\u0022View reference [21] in text\u0022 id=\u0022ref-21\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal\u0022 id=\u0022cit-2026.01.23.26344677v1.21\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EZ.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EYang\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EA.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EMitra\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EW.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003ELiu\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003ED.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EBerlowitz\u003C\/span\u003E\u003C\/span\u003E, and \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EH.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EYu\u003C\/span\u003E\u003C\/span\u003E. \u003Cspan class=\u0022cit-article-title\u0022\u003ETransformEHR: transformer-based encoder-decoder generative model to enhance prediction of disease outcomes using electronic health records\u003C\/span\u003E. \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003ENature communications\u003C\/abbr\u003E, \u003Cspan class=\u0022cit-vol\u0022\u003E14\u003C\/span\u003E(\u003Cspan class=\u0022cit-issue\u0022\u003E1\u003C\/span\u003E):\u003Cspan class=\u0022cit-fpage\u0022\u003E7857\u003C\/span\u003E, \u003Cspan class=\u0022cit-pub-date\u0022\u003E2023\u003C\/span\u003E.\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003Ca href=\u0022{openurl}?query=rft.jtitle%253DNature%2Bcommunications%26rft.volume%253D14%26rft.spage%253D7857%26rft_id%253Dinfo%253Apmid%252F38030638%26rft.genre%253Darticle%26rft_val_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Ajournal%26ctx_ver%253DZ39.88-2004%26url_ver%253DZ39.88-2004%26url_ctx_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Actx\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-openurl cit-ref-sprinkles-open-url\u0022\u003E\u003Cspan\u003EOpenUrl\u003C\/span\u003E\u003C\/a\u003E\u003Ca href=\u0022\/lookup\/external-ref?access_num=38030638\u0026amp;link_type=MED\u0026amp;atom=%2Fmedrxiv%2Fearly%2F2026%2F01%2F24%2F2026.01.23.26344677.atom\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-medline\u0022\u003E\u003Cspan\u003EPubMed\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003C\/ol\u003E\u003C\/div\u003E\u003Cspan class=\u0022highwire-journal-article-marker-end\u0022\u003E\u003C\/span\u003E\u003C\/div\u003E\u003Cspan class=\u0022related-urls\u0022\u003E\u003C\/span\u003E\u003C\/div\u003E\u003C\/div\u003E  \u003C\/div\u003E\n\n  \n  \u003C\/div\u003E\n\u003C\/div\u003E\n  \u003C\/div\u003E\n\u003C\/div\u003E\n\u003C\/div\u003E\u003Cscript type=\u0022text\/javascript\u0022 src=\u0022https:\/\/www.medrxiv.org\/sites\/default\/files\/js\/js_zP7WWIfzbyzvaM63L39cNV2juU_1XVH7wduFK9gcMNI.js\u0022\u003E\u003C\/script\u003E\n\u003C\/body\u003E\u003C\/html\u003E"}