{"markup":"\u003C?xml version=\u00221.0\u0022 encoding=\u0022UTF-8\u0022 ?\u003E\n    \u003Chtml version=\u0022HTML+RDFa+MathML 1.1\u0022\n    xmlns:content=\u0022http:\/\/purl.org\/rss\/1.0\/modules\/content\/\u0022\n    xmlns:dc=\u0022http:\/\/purl.org\/dc\/terms\/\u0022\n    xmlns:foaf=\u0022http:\/\/xmlns.com\/foaf\/0.1\/\u0022\n    xmlns:og=\u0022http:\/\/ogp.me\/ns#\u0022\n    xmlns:rdfs=\u0022http:\/\/www.w3.org\/2000\/01\/rdf-schema#\u0022\n    xmlns:sioc=\u0022http:\/\/rdfs.org\/sioc\/ns#\u0022\n    xmlns:sioct=\u0022http:\/\/rdfs.org\/sioc\/types#\u0022\n    xmlns:skos=\u0022http:\/\/www.w3.org\/2004\/02\/skos\/core#\u0022\n    xmlns:xsd=\u0022http:\/\/www.w3.org\/2001\/XMLSchema#\u0022\n    xmlns:mml=\u0022http:\/\/www.w3.org\/1998\/Math\/MathML\u0022\u003E\n  \u003Chead\u003E\u003Cscript type=\u0022text\/javascript\u0022 src=\u0022\/\/cdn.jsdelivr.net\/qtip2\/2.2.1\/jquery.qtip.min.js\u0022\u003E\u003C\/script\u003E\n\u003Cscript type=\u0022text\/javascript\u0022 src=\u0022https:\/\/www.medrxiv.org\/sites\/default\/files\/js\/js_YjAJQgxDlFX6S-O02jj9jCrVbrwlY3CGgCg1FzPlvBs.js\u0022\u003E\u003C\/script\u003E\n\u003Cscript type=\u0022text\/javascript\u0022\u003E\n\u003C!--\/\/--\u003E\u003C![CDATA[\/\/\u003E\u003C!--\nif(typeof window.MathJax === \u0022undefined\u0022) window.MathJax = { menuSettings: { zoom: \u0022Click\u0022 } };\n\/\/--\u003E\u003C!]]\u003E\n\u003C\/script\u003E\n\u003Cscript type=\u0022text\/javascript\u0022 src=\u0022https:\/\/www.medrxiv.org\/sites\/default\/files\/js\/js_waP91NpgGpectm_6Y2XDEauLJ8WCSCBKmmA87unpp2E.js\u0022\u003E\u003C\/script\u003E\n\u003Cscript type=\u0022text\/javascript\u0022 src=\u0022https:\/\/www.googletagmanager.com\/gtag\/js?id=G-0K57TCX5BY\u0022\u003E\u003C\/script\u003E\n\u003Cscript type=\u0022text\/javascript\u0022\u003E\n\u003C!--\/\/--\u003E\u003C![CDATA[\/\/\u003E\u003C!--\nwindow.dataLayer = window.dataLayer || [];function gtag(){dataLayer.push(arguments)};gtag(\u0022js\u0022, new Date());gtag(\u0022set\u0022, \u0022developer_id.dMDhkMT\u0022, true);gtag(\u0022config\u0022, \u0022G-0K57TCX5BY\u0022, {\u0022groups\u0022:\u0022default\u0022,\u0022anonymize_ip\u0022:true});\n\/\/--\u003E\u003C!]]\u003E\n\u003C\/script\u003E\n\u003Cscript type=\u0022text\/javascript\u0022\u003E\n\u003C!--\/\/--\u003E\u003C![CDATA[\/\/\u003E\u003C!--\njQuery.extend(Drupal.settings, {\u0022basePath\u0022:\u0022\\\/\u0022,\u0022pathPrefix\u0022:\u0022\u0022,\u0022highwire\u0022:{\u0022ac\u0022:{\u0022medrxiv;2026.03.02.26347469v1\u0022:{\u0022access\u0022:{\u0022full\u0022:true},\u0022pisa_id\u0022:\u0022medrxiv;2026.03.02.26347469v1\u0022,\u0022apath\u0022:\u0022\u0022,\u0022jcode\u0022:\u0022medrxiv\u0022}},\u0022processed\u0022:[\u0022highwire_math\u0022],\u0022markup\u0022:[{\u0022requested\u0022:\u0022full-text\u0022,\u0022variant\u0022:\u0022full-text\u0022,\u0022view\u0022:\u0022full\u0022,\u0022pisa\u0022:\u0022medrxiv;2026.03.02.26347469v1\u0022}]},\u0022instances\u0022:\u0022{\\u0022highwire_abstract_tooltip\\u0022:{\\u0022content\\u0022:{\\u0022text\\u0022:\\u0022\\u0022},\\u0022style\\u0022:{\\u0022tip\\u0022:{\\u0022width\\u0022:20,\\u0022height\\u0022:20,\\u0022border\\u0022:1,\\u0022offset\\u0022:0,\\u0022corner\\u0022:true},\\u0022classes\\u0022:\\u0022qtip-custom hw-tooltip hw-abstract-tooltip qtip-shadow qtip-rounded\\u0022,\\u0022classes_custom\\u0022:\\u0022hw-tooltip hw-abstract-tooltip\\u0022},\\u0022position\\u0022:{\\u0022at\\u0022:\\u0022right center\\u0022,\\u0022my\\u0022:\\u0022left center\\u0022,\\u0022viewport\\u0022:true,\\u0022adjust\\u0022:{\\u0022method\\u0022:\\u0022shift\\u0022}},\\u0022show\\u0022:{\\u0022event\\u0022:\\u0022mouseenter click \\u0022,\\u0022solo\\u0022:true},\\u0022hide\\u0022:{\\u0022event\\u0022:\\u0022mouseleave \\u0022,\\u0022fixed\\u0022:1,\\u0022delay\\u0022:\\u0022100\\u0022}},\\u0022highwire_author_tooltip\\u0022:{\\u0022content\\u0022:{\\u0022text\\u0022:\\u0022\\u0022},\\u0022style\\u0022:{\\u0022tip\\u0022:{\\u0022width\\u0022:15,\\u0022height\\u0022:15,\\u0022border\\u0022:1,\\u0022offset\\u0022:0,\\u0022corner\\u0022:true},\\u0022classes\\u0022:\\u0022qtip-custom hw-tooltip hw-author-tooltip qtip-shadow qtip-rounded\\u0022,\\u0022classes_custom\\u0022:\\u0022hw-tooltip hw-author-tooltip\\u0022},\\u0022position\\u0022:{\\u0022at\\u0022:\\u0022top center\\u0022,\\u0022my\\u0022:\\u0022bottom center\\u0022,\\u0022viewport\\u0022:true,\\u0022adjust\\u0022:{\\u0022method\\u0022:\\u0022\\u0022}},\\u0022show\\u0022:{\\u0022event\\u0022:\\u0022mouseenter \\u0022,\\u0022solo\\u0022:true},\\u0022hide\\u0022:{\\u0022event\\u0022:\\u0022mouseleave \\u0022,\\u0022fixed\\u0022:1,\\u0022delay\\u0022:\\u0022100\\u0022}},\\u0022highwire_reflinks_tooltip\\u0022:{\\u0022content\\u0022:{\\u0022text\\u0022:\\u0022\\u0022},\\u0022style\\u0022:{\\u0022tip\\u0022:{\\u0022width\\u0022:15,\\u0022height\\u0022:15,\\u0022border\\u0022:1,\\u0022mimic\\u0022:\\u0022top center\\u0022,\\u0022offset\\u0022:0,\\u0022corner\\u0022:true},\\u0022classes\\u0022:\\u0022qtip-custom hw-tooltip hw-ref-link-tooltip qtip-shadow qtip-rounded\\u0022,\\u0022classes_custom\\u0022:\\u0022hw-tooltip hw-ref-link-tooltip\\u0022},\\u0022position\\u0022:{\\u0022at\\u0022:\\u0022bottom left\\u0022,\\u0022my\\u0022:\\u0022top left\\u0022,\\u0022viewport\\u0022:true,\\u0022adjust\\u0022:{\\u0022method\\u0022:\\u0022flip\\u0022}},\\u0022show\\u0022:{\\u0022event\\u0022:\\u0022mouseenter \\u0022,\\u0022solo\\u0022:true},\\u0022hide\\u0022:{\\u0022event\\u0022:\\u0022mouseleave \\u0022,\\u0022fixed\\u0022:1,\\u0022delay\\u0022:\\u0022100\\u0022}}}\u0022,\u0022qtipDebug\u0022:\u0022{\\u0022leaveElement\\u0022:0}\u0022,\u0022googleanalytics\u0022:{\u0022account\u0022:[\u0022G-0K57TCX5BY\u0022],\u0022trackOutbound\u0022:1,\u0022trackMailto\u0022:1,\u0022trackDownload\u0022:1,\u0022trackDownloadExtensions\u0022:\u00227z|aac|arc|arj|asf|asx|avi|bin|csv|doc(x|m)?|dot(x|m)?|exe|flv|gif|gz|gzip|hqx|jar|jpe?g|js|mp(2|3|4|e?g)|mov(ie)?|msi|msp|pdf|phps|png|ppt(x|m)?|pot(x|m)?|pps(x|m)?|ppam|sld(x|m)?|thmx|qtm?|ra(m|r)?|sea|sit|tar|tgz|torrent|txt|wav|wma|wmv|wpd|xls(x|m|b)?|xlt(x|m)|xlam|xml|z|zip\u0022,\u0022trackColorbox\u0022:1},\u0022ajaxPageState\u0022:{\u0022js\u0022:{\u0022\\\/\\\/cdn.jsdelivr.net\\\/qtip2\\\/2.2.1\\\/jquery.qtip.min.js\u0022:1,\u0022sites\\\/all\\\/modules\\\/highwire\\\/highwire\\\/plugins\\\/highwire_markup_process\\\/js\\\/highwire_article_reference_popup.js\u0022:1,\u0022sites\\\/all\\\/modules\\\/highwire\\\/highwire\\\/plugins\\\/highwire_markup_process\\\/js\\\/highwire_at_symbol.js\u0022:1,\u00220\u0022:1,\u0022sites\\\/all\\\/modules\\\/contrib\\\/google_analytics\\\/googleanalytics.js\u0022:1,\u0022https:\\\/\\\/www.googletagmanager.com\\\/gtag\\\/js?id=G-0K57TCX5BY\u0022:1,\u00221\u0022:1}}});\n\/\/--\u003E\u003C!]]\u003E\n\u003C\/script\u003E\n\u003Clink type=\u0022text\/css\u0022 rel=\u0022stylesheet\u0022 href=\u0022https:\/\/www.medrxiv.org\/sites\/default\/files\/advagg_css\/css__dn-cpI1YtkU_iLHgA5WhlkxgYWyat_IxjF_B-WSYrpE__a9hIbt0eaZ7d5nhwnm2weG8R_2eXK4EvoOx9dOxouHE__QrrGUc7CpljPR5Aph-ukPbcwtK4AWrHGwCEXJ_k1V_c.css\u0022 media=\u0022all\u0022 \/\u003E\n\u003Clink type=\u0022text\/css\u0022 rel=\u0022stylesheet\u0022 href=\u0022\/\/cdn.jsdelivr.net\/qtip2\/2.2.1\/jquery.qtip.min.css\u0022 media=\u0022all\u0022 \/\u003E\n\u003Clink type=\u0022text\/css\u0022 rel=\u0022stylesheet\u0022 href=\u0022https:\/\/www.medrxiv.org\/sites\/default\/files\/advagg_css\/css__HGACIFBlu2o05y3afvqlt5wrE_5Dn6MXsexfuEpeIwg__t4SOPxucAPoV3Os7g8dXqyMB1HRXQridRJ82X7nE33E__QrrGUc7CpljPR5Aph-ukPbcwtK4AWrHGwCEXJ_k1V_c.css\u0022 media=\u0022all\u0022 \/\u003E\n\u003Clink rel=\u0027stylesheet\u0027 type=\u0027text\/css\u0027 href=\u0027\/sites\/all\/modules\/contrib\/panels\/plugins\/layouts\/onecol\/onecol.css\u0027 \/\u003E\u003C\/head\u003E\u003Cbody\u003E\u003Cdiv class=\u0022panels-ajax-tab-panel panels-ajax-tab-panel-article-tab-full-text\u0022\u003E\u003Cdiv class=\u0022panel-display panel-1col clearfix\u0022 \u003E\n  \u003Cdiv class=\u0022panel-panel panel-col\u0022\u003E\n    \u003Cdiv\u003E\u003Cdiv class=\u0022panel-pane pane-highwire-markup\u0022 \u003E\n  \n      \n  \n  \u003Cdiv class=\u0022pane-content\u0022\u003E\n    \u003Cdiv class=\u0022highwire-markup\u0022\u003E\u003Cdiv xmlns=\u0022http:\/\/www.w3.org\/1999\/xhtml\u0022 data-highwire-cite-ref-tooltip-instance=\u0022highwire_reflinks_tooltip\u0022 class=\u0022content-block-markup\u0022 xmlns:xhtml=\u0022http:\/\/www.w3.org\/1999\/xhtml\u0022\u003E\u003Cdiv class=\u0022article fulltext-view \u0022\u003E\u003Cspan class=\u0022highwire-journal-article-marker-start\u0022\u003E\u003C\/span\u003E\u003Cdiv class=\u0022section abstract\u0022 id=\u0022abstract-1\u0022\u003E\u003Ch2 class=\u0022\u0022\u003EAbstract\u003C\/h2\u003E\u003Cp id=\u0022p-3\u0022\u003ERare diseases affect over 300 million people worldwide, yet patients often endure years-long diagnostic delays that limit timely intervention and trial opportunities. Computational rare disease recognition (RDR) remains constrained by knowledge resources that are often incomplete, heterogeneous, and dependent on extensive multi-disciplinary expert curation that cannot scale. Large language models (LLMs) applied directly for end-to-end diagnosis or disease discrimination face similar knowledge bottlenecks while also raising concerns around cost, reproducibility, and data governance. Here, we introduce GEN-KnowRD, a knowledge-layer-first framework that leverages LLMs to generate schema-guided rare disease profiles, systematically assesses their quality, and constructs a computable knowledge base (PheMAP-RD) for local deployment. GEN-KnowRD integrates this knowledge into lightweight inference pipelines for both general-purpose disease screening and specialized early discrimination from longitudinal electronic health records. Across six public benchmarks for general-purpose screen (9,290 patients spanning 798 rare diseases), GEN-KnowRD significantly improves disease ranking compared to a state-of-the-art, HPO-centered diagnostic framework (up to 345.8% improvement in top-1 success), advanced end-to-end LLM reasoning (up to 129.1% improvement), and a variant of GEN-KnowRD instantiated with expert-curated knowledge rather than LLM-generated profiles. In two real-world cohorts for early diagnosis of idiopathic pulmonary fibrosis (511 patients) as a use case, GEN-KnowRD also demonstrates robust discrimination performance gains, supporting effective RDR during the pre-diagnostic window. These findings demonstrate that repositioning LLMs from diagnostic reasoning to the knowledge layer\u2014decoupling knowledge construction from patient-level inference\u2014yields stronger RDR, while providing scalable, continuously updatable, and reusable infrastructure for diagnosis, screening, and clinical research across the rare disease landscape.\u003C\/p\u003E\u003C\/div\u003E\u003Cdiv class=\u0022section\u0022 id=\u0022sec-1\u0022\u003E\u003Ch2 class=\u0022\u0022\u003EMain\u003C\/h2\u003E\u003Cp id=\u0022p-16\u0022\u003ERare diseases collectively affect an estimated 300 million people worldwide,\u003Csup\u003E\u003Ca id=\u0022xref-ref-1-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-1\u0022\u003E1\u003C\/a\u003E\u003C\/sup\u003E yet their clinical complexity and rarity leave most clinicians with limited experience in recognizing them.\u003Csup\u003E\u003Ca id=\u0022xref-ref-2-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-2\u0022\u003E2\u003C\/a\u003E\u2013\u003Ca id=\u0022xref-ref-4-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-4\u0022\u003E4\u003C\/a\u003E\u003C\/sup\u003E As a result, patients often endure years-long diagnostic odysseys, missing critical windows for treatment, clinical trial participation, and therapeutic development.\u003Csup\u003E\u003Ca id=\u0022xref-ref-5-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-5\u0022\u003E5\u003C\/a\u003E\u003C\/sup\u003E Computational approaches to rare disease recognition (RDR) have sought to close this gap by extracting phenotypic (and, where available, genotypic) features from patient records, mapping them to standardized representations such as Human Phenotype Ontology (HPO) terms, and matching them against human-curated knowledge resources\u003Csup\u003E\u003Ca id=\u0022xref-ref-6-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-6\u0022\u003E6\u003C\/a\u003E\u2013\u003Ca id=\u0022xref-ref-17-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-17\u0022\u003E17\u003C\/a\u003E\u003C\/sup\u003E to prioritize candidate diseases for investigation. However, the knowledge resources this paradigm depends on are typically heterogeneous, incomplete, biased toward well-studied conditions, and reliant on extensive manual curation that is expensive to maintain and cannot keep pace with rapidly evolving medical evidence\u003Csup\u003E\u003Ca id=\u0022xref-ref-18-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-18\u0022\u003E18\u003C\/a\u003E\u2013\u003Ca id=\u0022xref-ref-24-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-24\u0022\u003E24\u003C\/a\u003E\u003C\/sup\u003E, which increasingly constrain the reliability, scalability, and equity of RDR.\u003C\/p\u003E\u003Cp id=\u0022p-17\u0022\u003ERecent advances in large language models (LLMs) have sparked enthusiasm for end-to-end diagnostic reasoning.\u003Csup\u003E\u003Ca id=\u0022xref-ref-25-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-25\u0022\u003E25\u003C\/a\u003E\u2013\u003Ca id=\u0022xref-ref-29-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-29\u0022\u003E29\u003C\/a\u003E\u003C\/sup\u003E However, directly applying LLMs as diagnosticians or discriminators does not fundamentally resolve the knowledge infrastructure problem. The knowledge encoded in LLMs\u2019 parameters is implicit and difficult to govern or trace, while retrieval-augmented approaches remain only as reliable as the underlying corpora and the retrieval pipelines,\u003Csup\u003E\u003Ca id=\u0022xref-ref-30-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-30\u0022\u003E30\u003C\/a\u003E\u2013\u003Ca id=\u0022xref-ref-33-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-33\u0022\u003E33\u003C\/a\u003E\u003C\/sup\u003E offering no assurance that the evidence most relevant to a given patient in terms of semantics and granularity will be reliably retrieved,\u003Csup\u003E\u003Ca id=\u0022xref-ref-34-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-34\u0022\u003E34\u003C\/a\u003E\u003C\/sup\u003E correctly grounded,\u003Csup\u003E\u003Ca id=\u0022xref-ref-35-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-35\u0022\u003E35\u003C\/a\u003E\u003C\/sup\u003E or consistently applied.\u003Csup\u003E\u003Ca id=\u0022xref-ref-36-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-36\u0022\u003E36\u003C\/a\u003E\u003C\/sup\u003E These limitations are compounded by practical constraints. Deploying frontier LLMs for routine clinical quires is computationally and financially prohibitive at health-system scale, and transmitting sensitive patient information to proprietary models or external services they invoke raises concerns around data privacy and governance.\u003Csup\u003E\u003Ca id=\u0022xref-ref-37-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-37\u0022\u003E37\u003C\/a\u003E\u003C\/sup\u003E Together, these factors motivate an alternative strategy, one that harnesses LLM capabilities upstream, while keeping patient-level inference lightweight, local, and reliable.\u003C\/p\u003E\u003Cp id=\u0022p-18\u0022\u003EWe hypothesize that the core limitation of computational RDR lies not in the sophistication of downstream inference but upstream, in how disease knowledge is synthesized, structured, and made computable. Rather than using LLMs as per-patient reasoners that reconstruct knowledge context for each patient-level inference, we reposition them to the knowledge layer where they generate reusable disease representations. Under this paradigm, inference can be executed through lightweight, deterministic pipelines grounded in a shared knowledge layer, thereby decoupling knowledge synthesis from patient-level reasoning.\u003C\/p\u003E\u003Cp id=\u0022p-19\u0022\u003EHere, we present GEN-KnowRD (GENerative Knowledge-driven Rare Disease recognition framework; \u003Cstrong\u003E\u003Ca id=\u0022xref-fig-1-1\u0022 class=\u0022xref-fig\u0022 href=\u0022#F1\u0022\u003EFig. 1a\u003C\/a\u003E\u003C\/strong\u003E), a framework that transforms LLM use for RDR by moving LLMs from the point of diagnosis or discrimination to the knowledge layer. Using the rare disease report catalog of the National Organization for Rare Disorders (NORD) database\u003Csup\u003E\u003Ca id=\u0022xref-ref-38-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-38\u0022\u003E38\u003C\/a\u003E\u003C\/sup\u003E, GEN-KnowRD 1) prompts multiple advanced LLMs (Claude Sonnet 4\u003Csup\u003E\u003Ca id=\u0022xref-ref-39-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-39\u0022\u003E39\u003C\/a\u003E\u003C\/sup\u003E, DeepSeek R1\u003Csup\u003E\u003Ca id=\u0022xref-ref-40-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-40\u0022\u003E40\u003C\/a\u003E\u003C\/sup\u003E, Gemini 2.5 Pro\u003Csup\u003E\u003Ca id=\u0022xref-ref-41-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-41\u0022\u003E41\u003C\/a\u003E\u003C\/sup\u003E, and OpenAI o3\u003Csup\u003E\u003Ca id=\u0022xref-ref-42-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-42\u0022\u003E42\u003C\/a\u003E\u003C\/sup\u003E) to produce schema-guided disease profiles; 2) conducts multi-faceted quality assessment of the resulting disease profiles; and 3) converts them into a computable knowledge base, PheMAP-RD.\u003C\/p\u003E\u003Cdiv id=\u0022F1\u0022 class=\u0022fig pos-float type-figure  odd\u0022\u003E\u003Cdiv class=\u0022highwire-figure\u0022\u003E\u003Cdiv class=\u0022fig-inline-img-wrapper\u0022\u003E\u003Cdiv class=\u0022fig-inline-img\u0022\u003E\u003Ca href=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2026\/03\/03\/2026.03.02.26347469\/F1.large.jpg?width=800\u0026amp;height=600\u0026amp;carousel=1\u0022 title=\u0022An overview of GEN-KnowRD architecture, evaluation strategy, and datasets used. a, For each rare disease in the NORD rare disease report catalog, multiple large language models (LLM) are independently prompted to generate a schema-guided disease profile comprising ten clinically meaningful sections; the quality of LLM-generated rare disease profiles is assessed using a multi-faceted framework that combines objective quantitative metrics with clinical expert review; a general-purpose rare disease screening pipeline then compares both sparse and dense representations of patient clinical presentation description against the corresponding representations of rare diseases to derive an initial disease ranking (Stage 1), followed by a knowledge-boosted reranker that refines the top candidate list (Stage 2); a specialized rare disease discrimination pipeline, illustrated using IPF as a use case, extracts disease-relevant clinical evidence and quantifies symptom presence and severity from longitudinal clinical notes prior to diagnosis, followed by lightweight classification models that distinguish IPF from non-IPF patients and from suspected IPF patients. b, A summary of the public benchmarks used to evaluate the performance of the general-purpose rare disease screening pipeline, as well as the utility of disease profiles for rare disease recognition. c, Composition of the public benchmarks used for evaluation. d, Disease distribution across the public benchmarks after excluding patients whose diseases fall outside of the scope of the NORD report catalog. e, Distribution of disease complexity across the public benchmarks, based on the Orphanet disease classification and defined by the number of affected body systems. f, A summary of the private benchmark and tasks from Vanderbilt University Medical Center used to evaluate the performance of the specialized rare disease discrimination pipeline, as well as the utility of disease profiles for rare disease recognition. NORD: National Organization for Rare Disorders; LLM: large language model; UMLS: unified medical language system; AUROC: area under the receiver operating characteristic curve; MRR: mean reciprocal rank; IPF: idiopathic pulmonary fibrosis.\u0022 class=\u0022highwire-fragment fragment-images colorbox-load\u0022 rel=\u0022gallery-fragment-images-2117183985\u0022 data-figure-caption=\u0022\u0026lt;div class=\u0026quot;highwire-markup\u0026quot;\u0026gt;\u0026lt;div xmlns=\u0026quot;http:\/\/www.w3.org\/1999\/xhtml\u0026quot;\u0026gt;\u0026lt;span class=\u0026quot;caption-title\u0026quot;\u0026gt;An overview of GEN-KnowRD architecture, evaluation strategy, and datasets used.\u0026lt;\/span\u0026gt; \u0026lt;strong\u0026gt;a,\u0026lt;\/strong\u0026gt; For each rare disease in the NORD rare disease report catalog, multiple large language models (LLM) are independently prompted to generate a schema-guided disease profile comprising ten clinically meaningful sections; the quality of LLM-generated rare disease profiles is assessed using a multi-faceted framework that combines objective quantitative metrics with clinical expert review; a general-purpose rare disease screening pipeline then compares both sparse and dense representations of patient clinical presentation description against the corresponding representations of rare diseases to derive an initial disease ranking (Stage 1), followed by a knowledge-boosted reranker that refines the top candidate list (Stage 2); a specialized rare disease discrimination pipeline, illustrated using IPF as a use case, extracts disease-relevant clinical evidence and quantifies symptom presence and severity from longitudinal clinical notes prior to diagnosis, followed by lightweight classification models that distinguish IPF from non-IPF patients and from suspected IPF patients. \u0026lt;strong\u0026gt;b,\u0026lt;\/strong\u0026gt; A summary of the public benchmarks used to evaluate the performance of the general-purpose rare disease screening pipeline, as well as the utility of disease profiles for rare disease recognition. \u0026lt;strong\u0026gt;c,\u0026lt;\/strong\u0026gt; Composition of the public benchmarks used for evaluation. \u0026lt;strong\u0026gt;d,\u0026lt;\/strong\u0026gt; Disease distribution across the public benchmarks after excluding patients whose diseases fall outside of the scope of the NORD report catalog. \u0026lt;strong\u0026gt;e,\u0026lt;\/strong\u0026gt; Distribution of disease complexity across the public benchmarks, based on the Orphanet disease classification and defined by the number of affected body systems. \u0026lt;strong\u0026gt;f,\u0026lt;\/strong\u0026gt; A summary of the private benchmark and tasks from Vanderbilt University Medical Center used to evaluate the performance of the specialized rare disease discrimination pipeline, as well as the utility of disease profiles for rare disease recognition. NORD: National Organization for Rare Disorders; LLM: large language model; UMLS: unified medical language system; AUROC: area under the receiver operating characteristic curve; MRR: mean reciprocal rank; IPF: idiopathic pulmonary fibrosis.\u0026lt;\/div\u0026gt;\u0026lt;\/div\u0026gt;\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003E\u003Cspan class=\u0022hw-responsive-img\u0022\u003E\u003Cimg class=\u0022highwire-fragment fragment-image lazyload\u0022 alt=\u0022Fig. 1:\u0022 src=\u0022data:image\/gif;base64,R0lGODlhAQABAIAAAAAAAP\/\/\/yH5BAEAAAAALAAAAAABAAEAAAIBRAA7\u0022 data-src=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2026\/03\/03\/2026.03.02.26347469\/F1.medium.gif\u0022 width=\u0022440\u0022 height=\u0022230\u0022\/\u003E\u003Cnoscript\u003E\u003Cimg class=\u0022highwire-fragment fragment-image\u0022 alt=\u0022Fig. 1:\u0022 src=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2026\/03\/03\/2026.03.02.26347469\/F1.medium.gif\u0022 width=\u0022440\u0022 height=\u0022230\u0022\/\u003E\u003C\/noscript\u003E\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003Cul class=\u0022highwire-figure-links inline\u0022\u003E\u003Cli class=\u0022download-fig first\u0022\u003E\u003Ca href=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2026\/03\/03\/2026.03.02.26347469\/F1.large.jpg?download=true\u0022 class=\u0022highwire-figure-link highwire-figure-link-download\u0022 title=\u0022Download Fig. 1:\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003EDownload figure\u003C\/a\u003E\u003C\/li\u003E\u003Cli class=\u0022new-tab last\u0022\u003E\u003Ca href=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2026\/03\/03\/2026.03.02.26347469\/F1.large.jpg\u0022 class=\u0022highwire-figure-link highwire-figure-link-newtab\u0022 target=\u0022_blank\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003EOpen in new tab\u003C\/a\u003E\u003C\/li\u003E\u003C\/ul\u003E\u003C\/div\u003E\u003Cdiv class=\u0022fig-caption\u0022 xmlns:xhtml=\u0022http:\/\/www.w3.org\/1999\/xhtml\u0022\u003E\u003Cspan class=\u0022fig-label\u0022\u003EFig. 1:\u003C\/span\u003E \u003Cspan class=\u0022caption-title\u0022\u003EAn overview of GEN-KnowRD architecture, evaluation strategy, and datasets used.\u003C\/span\u003E\u003Cp id=\u0022p-20\u0022 class=\u0022first-child\u0022\u003E\u003Cstrong\u003Ea,\u003C\/strong\u003E For each rare disease in the NORD rare disease report catalog, multiple large language models (LLM) are independently prompted to generate a schema-guided disease profile comprising ten clinically meaningful sections; the quality of LLM-generated rare disease profiles is assessed using a multi-faceted framework that combines objective quantitative metrics with clinical expert review; a general-purpose rare disease screening pipeline then compares both sparse and dense representations of patient clinical presentation description against the corresponding representations of rare diseases to derive an initial disease ranking (Stage 1), followed by a knowledge-boosted reranker that refines the top candidate list (Stage 2); a specialized rare disease discrimination pipeline, illustrated using IPF as a use case, extracts disease-relevant clinical evidence and quantifies symptom presence and severity from longitudinal clinical notes prior to diagnosis, followed by lightweight classification models that distinguish IPF from non-IPF patients and from suspected IPF patients. \u003Cstrong\u003Eb,\u003C\/strong\u003E A summary of the public benchmarks used to evaluate the performance of the general-purpose rare disease screening pipeline, as well as the utility of disease profiles for rare disease recognition. \u003Cstrong\u003Ec,\u003C\/strong\u003E Composition of the public benchmarks used for evaluation. \u003Cstrong\u003Ed,\u003C\/strong\u003E Disease distribution across the public benchmarks after excluding patients whose diseases fall outside of the scope of the NORD report catalog. \u003Cstrong\u003Ee,\u003C\/strong\u003E Distribution of disease complexity across the public benchmarks, based on the Orphanet disease classification and defined by the number of affected body systems. \u003Cstrong\u003Ef,\u003C\/strong\u003E A summary of the private benchmark and tasks from Vanderbilt University Medical Center used to evaluate the performance of the specialized rare disease discrimination pipeline, as well as the utility of disease profiles for rare disease recognition. NORD: National Organization for Rare Disorders; LLM: large language model; UMLS: unified medical language system; AUROC: area under the receiver operating characteristic curve; MRR: mean reciprocal rank; IPF: idiopathic pulmonary fibrosis.\u003C\/p\u003E\u003Cdiv class=\u0022sb-div caption-clear\u0022\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/div\u003E\u003Cp id=\u0022p-21\u0022\u003EBuilding on this resource, GEN-KnowRD then implements two inferential pipelines that enable rich clinical evidence distillation: one designed for general-purpose disease screening that produces a ranked list of candidate diseases, and another tailored to specialized disease discrimination that classifies whether a patient has a target disease. In our design, these pipelines operate on patient-level clinical evidence derived from existing health records, while PheMAP-RD is applied as a reusable, local knowledge resource to the patient data without requiring patient information to be sent to any LLM. Extensive evaluations across both public (9,290 patients spanning 798 distinct rare diseases) and private (longitudinal cohorts for idiopathic pulmonary fibrosis diagnosis that involve 511 patients from Vanderbilt University Medical Center) benchmarks demonstrate that GEN-KnowRD consistently 1) outperforms a state-of-the-art, two-stage HPO-centered diagnostic framework (PhenoBrain\u003Csup\u003E\u003Ca id=\u0022xref-ref-43-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-43\u0022\u003E43\u003C\/a\u003E\u003C\/sup\u003E), 2) surpasses pipelines based on human-curated knowledge bases (i.e., NORD rare disease reports\u003Csup\u003E\u003Ca id=\u0022xref-ref-38-2\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-38\u0022\u003E38\u003C\/a\u003E\u003C\/sup\u003E), and 3) achieves performance that is better than OpenAI GPT-5, a state-of-the-art LLM baseline performing end-to-end inference. Most notably, GEN-KnowRD based on the knowledge source generated by Claude Sonnet 4 delivers the strongest overall performance and robust gains, surpassing alternative LLM-generated disease profiles across evaluation settings. We have released PheMAP-RD through our public service platform\u003Csup\u003E\u003Ca id=\u0022xref-ref-44-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-44\u0022\u003E44\u003C\/a\u003E\u003C\/sup\u003E (Supplementary \u003Cstrong\u003E\u003Ca id=\u0022xref-fig-1-2\u0022 class=\u0022xref-fig\u0022 href=\u0022#F1\u0022\u003EFigs. 1\u003C\/a\u003E,\u003Ca id=\u0022xref-fig-2-1\u0022 class=\u0022xref-fig\u0022 href=\u0022#F2\u0022\u003E2\u003C\/a\u003E\u003C\/strong\u003E) as a reusable resource that can be readily integrated into future diagnostic systems, including retrieval-augmented and agentic LLM pipelines. Rather than competing with end-to-end LLM-or agent-based diagnostic approaches, GEN-KnowRD complements them by providing better-structured knowledge and principled processing modules that can strengthen evidence retrieval, grounding, and downstream decision support.\u003C\/p\u003E\u003Cdiv id=\u0022F2\u0022 class=\u0022fig pos-float type-figure  odd\u0022\u003E\u003Cdiv class=\u0022highwire-figure\u0022\u003E\u003Cdiv class=\u0022fig-inline-img-wrapper\u0022\u003E\u003Cdiv class=\u0022fig-inline-img\u0022\u003E\u003Ca href=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2026\/03\/03\/2026.03.02.26347469\/F2.large.jpg?width=800\u0026amp;height=600\u0026amp;carousel=1\u0022 title=\u0022Evaluation of LLM-generated RDK using semantic-based analysis and expert review. a, Readability of rare disease profiles measured by the Simple Measure of Gobbledygook (SMOG). b, Average number of references generated per disease. c, Distribution of reference source types. d, Number of unique matched UMLS concepts per disease across semantic groups (each concept is counted once per profile, even if matched multiple times). e, Jaccard similarity of UMLS concept sets among models within each semantic group. f, Expert evaluation of rare disease profiles for four randomly sampled rare diseases, with scores averaged across two clinical experts blinded to the source of disease profiles.\u0022 class=\u0022highwire-fragment fragment-images colorbox-load\u0022 rel=\u0022gallery-fragment-images-2117183985\u0022 data-figure-caption=\u0022\u0026lt;div class=\u0026quot;highwire-markup\u0026quot;\u0026gt;\u0026lt;div xmlns=\u0026quot;http:\/\/www.w3.org\/1999\/xhtml\u0026quot;\u0026gt;\u0026lt;span class=\u0026quot;caption-title\u0026quot;\u0026gt;Evaluation of LLM-generated RDK using semantic-based analysis and expert review.\u0026lt;\/span\u0026gt; \u0026lt;strong\u0026gt;a,\u0026lt;\/strong\u0026gt; Readability of rare disease profiles measured by the Simple Measure of Gobbledygook (SMOG). \u0026lt;strong\u0026gt;b,\u0026lt;\/strong\u0026gt; Average number of references generated per disease. \u0026lt;strong\u0026gt;c,\u0026lt;\/strong\u0026gt; Distribution of reference source types. \u0026lt;strong\u0026gt;d,\u0026lt;\/strong\u0026gt; Number of unique matched UMLS concepts per disease across semantic groups (each concept is counted once per profile, even if matched multiple times). \u0026lt;strong\u0026gt;e,\u0026lt;\/strong\u0026gt; Jaccard similarity of UMLS concept sets among models within each semantic group. \u0026lt;strong\u0026gt;f,\u0026lt;\/strong\u0026gt; Expert evaluation of rare disease profiles for four randomly sampled rare diseases, with scores averaged across two clinical experts blinded to the source of disease profiles.\u0026lt;\/div\u0026gt;\u0026lt;\/div\u0026gt;\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003E\u003Cspan class=\u0022hw-responsive-img\u0022\u003E\u003Cimg class=\u0022highwire-fragment fragment-image lazyload\u0022 alt=\u0022Fig. 2:\u0022 src=\u0022data:image\/gif;base64,R0lGODlhAQABAIAAAAAAAP\/\/\/yH5BAEAAAAALAAAAAABAAEAAAIBRAA7\u0022 data-src=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2026\/03\/03\/2026.03.02.26347469\/F2.medium.gif\u0022 width=\u0022440\u0022 height=\u0022174\u0022\/\u003E\u003Cnoscript\u003E\u003Cimg class=\u0022highwire-fragment fragment-image\u0022 alt=\u0022Fig. 2:\u0022 src=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2026\/03\/03\/2026.03.02.26347469\/F2.medium.gif\u0022 width=\u0022440\u0022 height=\u0022174\u0022\/\u003E\u003C\/noscript\u003E\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003Cul class=\u0022highwire-figure-links inline\u0022\u003E\u003Cli class=\u0022download-fig first\u0022\u003E\u003Ca href=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2026\/03\/03\/2026.03.02.26347469\/F2.large.jpg?download=true\u0022 class=\u0022highwire-figure-link highwire-figure-link-download\u0022 title=\u0022Download Fig. 2:\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003EDownload figure\u003C\/a\u003E\u003C\/li\u003E\u003Cli class=\u0022new-tab last\u0022\u003E\u003Ca href=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2026\/03\/03\/2026.03.02.26347469\/F2.large.jpg\u0022 class=\u0022highwire-figure-link highwire-figure-link-newtab\u0022 target=\u0022_blank\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003EOpen in new tab\u003C\/a\u003E\u003C\/li\u003E\u003C\/ul\u003E\u003C\/div\u003E\u003Cdiv class=\u0022fig-caption\u0022\u003E\u003Cspan class=\u0022fig-label\u0022\u003EFig. 2:\u003C\/span\u003E \u003Cspan class=\u0022caption-title\u0022\u003EEvaluation of LLM-generated RDK using semantic-based analysis and expert review.\u003C\/span\u003E\u003Cp id=\u0022p-22\u0022 class=\u0022first-child\u0022\u003E\u003Cstrong\u003Ea,\u003C\/strong\u003E Readability of rare disease profiles measured by the Simple Measure of Gobbledygook (SMOG). \u003Cstrong\u003Eb,\u003C\/strong\u003E Average number of references generated per disease. \u003Cstrong\u003Ec,\u003C\/strong\u003E Distribution of reference source types. \u003Cstrong\u003Ed,\u003C\/strong\u003E Number of unique matched UMLS concepts per disease across semantic groups (each concept is counted once per profile, even if matched multiple times). \u003Cstrong\u003Ee,\u003C\/strong\u003E Jaccard similarity of UMLS concept sets among models within each semantic group. \u003Cstrong\u003Ef,\u003C\/strong\u003E Expert evaluation of rare disease profiles for four randomly sampled rare diseases, with scores averaged across two clinical experts blinded to the source of disease profiles.\u003C\/p\u003E\u003Cdiv class=\u0022sb-div caption-clear\u0022\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/div\u003E\u003Cdiv id=\u0022sec-2\u0022 class=\u0022subsection\u0022\u003E\u003Ch3\u003EAn overview of GEN-KnowRD and datasets\u003C\/h3\u003E\u003Cp id=\u0022p-23\u0022\u003EGEN-KnowRD is comprised of four components (\u003Cstrong\u003E\u003Ca id=\u0022xref-fig-1-3\u0022 class=\u0022xref-fig\u0022 href=\u0022#F1\u0022\u003EFig. 1a\u003C\/a\u003E\u003C\/strong\u003E): 1) LLM-based rare disease profile creation, 2) multi-faceted RDK quality evaluation, 3) general-purpose rare disease screening, and 4) specialized rare disease discrimination. Rather than relying on the knowledge encoded in LLMs or on existing external knowledge for RDR, GEN-KnowRD harnesses LLMs to construct systematic, well-structured, and reusable RDK layer in a machine-actionable form. We scope the rare disease profile creation to the 1,320 diseases in the NORD Rare Disease Database\u003Csup\u003E\u003Ca id=\u0022xref-ref-38-3\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-38\u0022\u003E38\u003C\/a\u003E\u003C\/sup\u003E (which incorporates knowledge from OMIM and Orphanet) for which NORD provides expert-curated, in-depth disease reports covering disease characteristics, diagnosis evaluation, management, and prognosis. This subset serves as a rigorously defined evaluation backbone for three reasons. First, these diseases are supported by structured, clinically vetted descriptions that enable systematic comparison between LLM-generated and expert-curated knowledge under controlled conditions. Second, they correspond to rare diseases with sufficiently annotated patient cohorts across public benchmarks, permitting reliable, label-verified performance evaluation. Third, constraining the candidate space during initial validation allows us to isolate the impact of knowledge-layer quality and inference architecture without confounding effects from diseases lacking standardized descriptions or benchmark cases. Importantly, GEN-KnowRD is modular and not restricted to this subset. The framework can be extended to additional rare or common diseases as structured knowledge sources and labeled cohorts become available. The present scope therefore provides a high-quality, controlled testbed to evaluate knowledge-layer construction and its downstream impact, rather than a structural limitation of the approach.\u003C\/p\u003E\u003Cp id=\u0022p-24\u0022\u003EWe selected four representative LLMs, i.e., Claude Sonnet 4\u003Csup\u003E\u003Ca id=\u0022xref-ref-39-2\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-39\u0022\u003E39\u003C\/a\u003E\u003C\/sup\u003E, DeepSeek R1\u003Csup\u003E\u003Ca id=\u0022xref-ref-40-2\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-40\u0022\u003E40\u003C\/a\u003E\u003C\/sup\u003E, Gemini 2.5 Pro\u003Csup\u003E\u003Ca id=\u0022xref-ref-41-2\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-41\u0022\u003E41\u003C\/a\u003E\u003C\/sup\u003E, and OpenAI o3\u003Csup\u003E\u003Ca id=\u0022xref-ref-42-2\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-42\u0022\u003E42\u003C\/a\u003E\u003C\/sup\u003E, because they demonstrate state-of-the-art performance across diverse model families and offer strong reasoning and synthesis capabilities well-suited for knowledge layer construction. Each model was then instructed to generate schema-guided multi-section profile for each disease. The profiles cover ten prespecified axes: 1) disease overview, 2) synonyms and abbreviations, 3) subtypes or variants, 4) epidemiology, 5) etiology and pathogenesis, 6) clinical presentation, 7) diagnostic evaluation, 8) management and standard therapy, 9) investigational or emerging therapies, and 10) prognosis. The prompt template used is detailed in Supplementary \u003Cstrong\u003E\u003Ca id=\u0022xref-table-wrap-1-1\u0022 class=\u0022xref-table\u0022 href=\u0022#T1\u0022\u003ETable 1\u003C\/a\u003E\u003C\/strong\u003E.\u003C\/p\u003E\u003Cdiv id=\u0022T1\u0022 class=\u0022table pos-float\u0022\u003E\u003Cdiv class=\u0022table-inline table-callout-links\u0022\u003E\u003Cdiv class=\u0022callout\u0022\u003E\u003Cspan\u003EView this table:\u003C\/span\u003E\u003Cul class=\u0022callout-links\u0022\u003E\u003Cli class=\u0022view-inline first\u0022\u003E\u003Ca href=\u0022\u0022 class=\u0022table-expand-inline\u0022 data-table-url=\u0022\/highwire\/markup\/1169637\/expansion?postprocessors=highwire_tables%2Chighwire_reclass%2Chighwire_figures%2Chighwire_math%2Chighwire_inline_linked_media%2Chighwire_embed\u0026amp;table-expand-inline=1\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003EView inline\u003C\/a\u003E\u003C\/li\u003E\u003Cli class=\u0022view-popup\u0022\u003E\u003Ca href=\u0022\/highwire\/markup\/1169637\/expansion?width=1000\u0026amp;height=500\u0026amp;iframe=true\u0026amp;postprocessors=highwire_tables%2Chighwire_reclass%2Chighwire_figures%2Chighwire_math%2Chighwire_inline_linked_media%2Chighwire_embed\u0022 class=\u0022colorbox colorbox-load table-expand-popup\u0022 rel=\u0022gallery-fragment-tables\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003EView popup\u003C\/a\u003E\u003C\/li\u003E\u003Cli class=\u0022download-ppt last\u0022\u003E\u003Ca href=\u0022\/highwire\/powerpoint\/1169637\u0022 class=\u0022highwire-figure-link highwire-figure-link-ppt\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003EDownload powerpoint\u003C\/a\u003E\u003C\/li\u003E\u003C\/ul\u003E\u003C\/div\u003E\u003C\/div\u003E\u003Cdiv class=\u0022table-caption\u0022\u003E\u003Cspan class=\u0022table-label\u0022\u003ETable 1:\u003C\/span\u003E \u003Cspan class=\u0022caption-title\u0022\u003ESummary of LLM-produced rare disease profiles and their generation process.\u003C\/span\u003E\u003Cdiv class=\u0022sb-div caption-clear\u0022\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/div\u003E\u003Cp id=\u0022p-25\u0022\u003EBased on the disease profiles generated from each model, we then completed the corresponding RDK by extracting the disease-relevant clinical concepts. Unlike prior research that has relied primarily on HPO terms for RDR tasks\u003Csup\u003E\u003Ca id=\u0022xref-ref-43-2\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-43\u0022\u003E43\u003C\/a\u003E\u003C\/sup\u003E, GEN-KnowRD extracts the unified medical language system (UMLS) concepts of selected semantic categories from the collected rare disease profiles, enabling a more informative representation of rare diseases. The rationale for extracting UMLS concepts is twofold. First, HPO is designed to represent phenotypic abnormalities, whereas the UMLS integrates broader and richer information spanning drugs, disorders, procedures, laboratory tests, devices, anatomy, genes, proteins and more that can be used to capture non-phenotype signals for more robust RDR. Second, UMLS concepts aggregate extensive synonyms, abbreviations, and lexical variations that commonly appear in clinical texts, and thus improve the ability to match diverse textual expressions of the same concept across heterogeneous real-world clinical documentation. The extracted UMLS concepts, together with the LLM-produced disease profiles from which the concepts were derived, constitute the RDK and are organized into a unified knowledge base, PheMAP-RD, allowing a direct mapping between clinical narratives and machine-actionable concepts. While PheMAP-RD focuses on disease-level, concept-centric knowledge, more contextual signals are considered in our downstream pipelines. To preserve provenance and support source-specific assessment, knowledge from different sources (a specific LLM or NORD reports) is maintained and evaluated separately.\u003C\/p\u003E\u003Cp id=\u0022p-26\u0022\u003EThe multi-faceted RDK quality evaluation combines automated quality checks with targeted expert review. Automated metrics capture key properties of the generation process and the resulting RDK, including execution time, token usage, readability, reference composition, and clinical concept coverage. These analyses are complemented by clinical expert review (V.E.K. and M.G.) of the produced RDK layer for multiple randomly selected rare diseases. This evaluation framework is intended to be applied at each release of disease profiles, with automated checks run for every update and expert review used for quality assurance on sampled diseases and high-impact changes. These expert reviews are part of GEN-KnowRD\u2019s evaluation process and thus are distinct from the expert curation of NORD reports. See Methods for details.\u003C\/p\u003E\u003Cp id=\u0022p-27\u0022\u003EGEN-KnowRD supports general-purpose rare disease screening by introducing a novel two-stage pipeline that ranks the most likely candidate diseases based on a patient\u2019s clinical presentation description (e.g., a summarized document or raw clinical notes). In the first stage, the clinical presentation of a patient first undergoes the same UMLS concept extraction process as mentioned earlier, such that a sparse representation is constructed in the form of a discrete UMLS concept list. GEN-KnowRD then applies the BM25 algorithm\u003Csup\u003E\u003Ca id=\u0022xref-ref-45-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-45\u0022\u003E45\u003C\/a\u003E\u003C\/sup\u003E to calculate the similarity between this sparse representation and that of each rare disease profile, resulting in a concept-based ranked list of rare diseases. Rather than solely relying on sparse representations that focuses on lexical overlap, GEN-KnowRD also calculates semantic similarity using embedding-based dense representations to generate an independent ranked list of diseases. We selected and finetuned Qwen3-Embedding-8B\u003Csup\u003E\u003Ca id=\u0022xref-ref-46-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-46\u0022\u003E46\u003C\/a\u003E\u003C\/sup\u003E for embedding generation due to its superior performance (Supplementary \u003Cstrong\u003ETable 2\u003C\/strong\u003E). We then used reciprocal rank fusion\u003Csup\u003E\u003Ca id=\u0022xref-ref-47-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-47\u0022\u003E47\u003C\/a\u003E\u003C\/sup\u003E to aggregate sparse and dense disease rankings to derive a fused ranking score at the end of the first stage. The second stage starts with the top 20 diseases according to the fused scores, where each candidate disease (in the form of the combination of disease name and selected sections in the corresponding disease profile) is re-evaluated against the patient\u2019s clinical presentation description using a reranker (Qwen3-reranker-8B\u003Csup\u003E\u003Ca id=\u0022xref-ref-46-2\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-46\u0022\u003E46\u003C\/a\u003E\u003C\/sup\u003E) to derive an updated disease ranking.\u003C\/p\u003E\u003Cp id=\u0022p-28\u0022\u003EGEN-KnowRD can also perform specialized rare disease discrimination, where the goal is to evaluate a single candidate diagnosis. Given the clinical presentation description of a patient and a hypothesized diagnosis, a task-specific model determines whether the presentation is consistent with that diagnosis. To facilitate early identification of idiopathic pulmonary fibrosis (IPF) as a use case, a condition known for prolonged symptom-to-diagnosis delays\u003Csup\u003E\u003Ca id=\u0022xref-ref-48-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-48\u0022\u003E48\u003C\/a\u003E,\u003Ca id=\u0022xref-ref-49-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-49\u0022\u003E49\u003C\/a\u003E\u003C\/sup\u003E, we designed two diagnostic classification studies: 1) distinguishing IPF from non-IPF pulmonary diseases that may present with similar nonspecific symptoms and are often documented earlier in the diagnostic journey, and 2) distinguishing IPF from suspected IPF cases, targeting a clinically important pre-clinical suspicion window of high diagnostic uncertainty when accurately discriminating true IPF among suspected cases can accelerate referral and enable timely entry into IPF-focused care pathways. Using the established IPF RDK, GEN-KnowRD extracts key temporal features from patients\u2019 longitudinal clinical notes and fits classifiers to adjudicate the diagnosis.\u003C\/p\u003E\u003Cp id=\u0022p-29\u0022\u003ETo evaluate the utility of LLM-generated rare disease profiles, as well as the performance of the supported general-purpose disease screening pipeline, we assembled six public rare disease benchmarks (\u003Cstrong\u003E\u003Ca id=\u0022xref-fig-1-4\u0022 class=\u0022xref-fig\u0022 href=\u0022#F1\u0022\u003EFig. 1b\u003C\/a\u003E\u003C\/strong\u003E), including 1) HMS\u003Csup\u003E\u003Ca id=\u0022xref-ref-50-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-50\u0022\u003E50\u003C\/a\u003E\u003C\/sup\u003E, 2) LIRICAL\u003Csup\u003E\u003Ca id=\u0022xref-ref-51-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-51\u0022\u003E51\u003C\/a\u003E\u003C\/sup\u003E, 3) MME\u003Csup\u003E\u003Ca id=\u0022xref-ref-52-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-52\u0022\u003E52\u003C\/a\u003E\u003C\/sup\u003E, 4) RAMEDIS\u003Csup\u003E\u003Ca id=\u0022xref-ref-53-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-53\u0022\u003E53\u003C\/a\u003E\u003C\/sup\u003E, 5) MyGene2\u003Csup\u003E\u003Ca id=\u0022xref-ref-54-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-54\u0022\u003E54\u003C\/a\u003E\u003C\/sup\u003E, and 6) PMC-Patients\u003Csup\u003E\u003Ca id=\u0022xref-ref-55-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-55\u0022\u003E55\u003C\/a\u003E\u003C\/sup\u003E. We combined the first five benchmarks into a single one (Non-PMC) and converted the HPO terms used to describe patient clinical presentation into free-text descriptions. The PMC benchmark (i.e., PMC-Patients; free-text patient summaries) accounts for 92.6% (8,601\/9,290) of patients and includes 651 unique rare diseases, whereas Non-PMC contains 7.4% (689\/9,290) of patients and 40 unique rare diseases (\u003Cstrong\u003E\u003Ca id=\u0022xref-fig-1-5\u0022 class=\u0022xref-fig\u0022 href=\u0022#F1\u0022\u003EFig. 1b,c\u003C\/a\u003E\u003C\/strong\u003E). The two benchmarks share 107 overlapping diseases (\u003Cstrong\u003E\u003Ca id=\u0022xref-fig-1-6\u0022 class=\u0022xref-fig\u0022 href=\u0022#F1\u0022\u003EFig. 1d\u003C\/a\u003E\u003C\/strong\u003E). Disease complexity, defined as the number of Orphanet classification assignments (31 categories in total) according to affected body systems\u003Csup\u003E\u003Ca id=\u0022xref-ref-56-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-56\u0022\u003E56\u003C\/a\u003E\u003C\/sup\u003E, exhibits a long-tailed distribution, with most diseases assigned to relatively small complexity (\u003Cstrong\u003E\u003Ca id=\u0022xref-fig-1-7\u0022 class=\u0022xref-fig\u0022 href=\u0022#F1\u0022\u003EFig. 1e\u003C\/a\u003E\u003C\/strong\u003E). We also constructed two real-world private diagnostic benchmarks (511 patients in total; Supplementary \u003Cstrong\u003ETable 3\u003C\/strong\u003E) based on longitudinal electronic health records (EHR) data from Vanderbilt University Medical Center for specialized rare disease discrimination (\u003Cstrong\u003E\u003Ca id=\u0022xref-fig-1-8\u0022 class=\u0022xref-fig\u0022 href=\u0022#F1\u0022\u003EFig. 1f\u003C\/a\u003E\u003C\/strong\u003E). Detailed dataset preprocessing information is provided in the Methods.\u003C\/p\u003E\u003Cp id=\u0022p-30\u0022\u003EWe evaluated three baseline approaches for comparison. The first is the NORD rare disease reports\u003Csup\u003E\u003Ca id=\u0022xref-ref-38-4\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-38\u0022\u003E38\u003C\/a\u003E\u003C\/sup\u003E, which serves as a gold-standard knowledge source. We compared their utility against LLM-generated rare disease profiles for supporting RDR while keeping the GEN-KnowRD\u2019s RDR pipelines fixed. The second baseline is PhenoBrain\u003Csup\u003E\u003Ca id=\u0022xref-ref-43-3\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-43\u0022\u003E43\u003C\/a\u003E\u003C\/sup\u003E, a state-of-the-art, HPO-first RDR framework that reflects the prevailing two-stage RDR paradigm. This approach first maps a patient\u2019s clinical presentation description into HPO terms and then ranks diseases using an ensemble model grounded in human-curated knowledge. Third, we evaluated GPT-5 as a representative advanced LLM baseline for RDR to assess whether GEN-KnowRD, a lightweight framework, can achieve performance comparable to substantially more computationally intensive end-to-end LLM reasoning approaches.\u003C\/p\u003E\u003C\/div\u003E\u003Cdiv id=\u0022sec-3\u0022 class=\u0022subsection\u0022\u003E\u003Ch3\u003EQuality of generative-AI based RDK\u003C\/h3\u003E\u003Cp id=\u0022p-31\u0022\u003ERare disease profile generation demonstrates distinct patterns across LLMs (\u003Cstrong\u003E\u003Ca id=\u0022xref-table-wrap-1-2\u0022 class=\u0022xref-table\u0022 href=\u0022#T1\u0022\u003ETable 1\u003C\/a\u003E\u003C\/strong\u003E). First, OpenAI o3 and Gemini 2.5 Pro show the shortest prompt-to-response times, generating each disease profile in approximately 88 seconds, whereas Claude Sonnet 4 and DeepSeek R1 require significantly longer amounts of time (more than 140 seconds per disease). Second, Claude Sonnet 4 consumes the largest prompt contexts (323,766 tokens per disease), due to external web search and the integration of extensive retrieved information; it also generates the most verbose rare disease profiles (5,230 tokens per disease). OpenAI o3 ranks second for both input and output token usage. By contrast, Gemini 2.5 Pro and DeepSeek R1 consume input contexts that are roughly two orders of magnitude smaller and output shorter disease profiles. Third, other than Claude Sonnet 4, whose reasoning information is not available, OpenAI o3 triggers the reasoning function the most (37.5%) and leads to more reasoning tokens per disease (1,288), followed by Gemini 2.5 Pro and DeepSeek R1.\u003C\/p\u003E\u003Cp id=\u0022p-32\u0022\u003EWe observe multiple notable semantic characteristics in the rare disease profiles that were produced. Readability scores for all LLM-generated disease profiles, as well as the NORD reports, fall within the college-to-graduate range based on the Simple Measure of Gobbledygook metric (SMOG) (\u003Cstrong\u003E\u003Ca id=\u0022xref-fig-2-2\u0022 class=\u0022xref-fig\u0022 href=\u0022#F2\u0022\u003EFig. 2a\u003C\/a\u003E\u003C\/strong\u003E), consistent with our prompt\u2019s intent that the summaries \u201cserve as a reliable Wikipedia entry for use by medical researchers and healthcare professionals\u201d. Among the models, Gemini 2.5 Pro produces the most readable texts (corresponding to the lowest readability score), whereas OpenAI o3 shows the highest linguistic complexity and thus requires greater reading capability. Claude Sonnet 4 and DeepSeek R1 yield readability levels that are the most similar to the expert-curated reports. Since LLMs employ different evidence retrieval strategies, they exhibit distinct citation patterns. Gemini 2.5 Pro and Claude Sonnet 4 produce roughly 20 references per disease, whereas OpenAI o3 generates about 14 (\u003Cstrong\u003E\u003Ca id=\u0022xref-fig-2-3\u0022 class=\u0022xref-fig\u0022 href=\u0022#F2\u0022\u003EFig. 2b\u003C\/a\u003E\u003C\/strong\u003E). Given that DeepSeek R1 does not support web search through an application programming interface (API) at the time of our experiments, it does not produce any references. Across models, references are most frequently drawn from government or official biomedical databases, such as PubMed, PMC, and NCBI, which index much of the authoritative peer-reviewed literature (\u003Cstrong\u003E\u003Ca id=\u0022xref-fig-2-4\u0022 class=\u0022xref-fig\u0022 href=\u0022#F2\u0022\u003EFig. 2c\u003C\/a\u003E\u003C\/strong\u003E). The LLMs also rely heavily on specialized rare disease databases like Orphanet, which offer curated, high-specificity information about rare disorders. In addition, clinical or medical reference (e.g., Medscape, Cleveland Clinic Health Library, and Mayo Clinic Diseases and Conditions) and encyclopedia (e.g., Wikipedia) platforms are commonly cited for practical details on disease symptoms, diagnosis, and treatment guidelines. Academic journals or research portals, such as ScienceDirect, Wiley Online Library, MDPI, and ResearchGate, also contribute a nontrivial portion of citation coverage. Nonetheless, the usage of these three information categories is more variable across LLMs compared to government or official biomedical databases. Overall, Claude Sonnet 4 shows wider citation coverage than other models.\u003C\/p\u003E\u003Cp id=\u0022p-33\u0022\u003EAt the concept level, we observe that the number of extracted unique UMLS concepts varies across clinically relevant semantic groups and across LLMs (\u003Cstrong\u003E\u003Ca id=\u0022xref-fig-2-5\u0022 class=\u0022xref-fig\u0022 href=\u0022#F2\u0022\u003EFig. 2d\u003C\/a\u003E\u003C\/strong\u003E). The \u201csymptoms \u0026amp; conditions\u201d group contributes the largest share of matches, followed by \u201cgenetics \u0026amp; molecular biology\u201d, which is consistent with the goal of orienting disease profiles to support RDR. By contrast, the \u201cdrugs \u0026amp; procedures\u201d and \u201cdiagnostics \u0026amp; laboratory findings\u201d groups contain far fewer concepts across models, reflecting the lack of effective treatments and diagnostic approaches for rare diseases. Across model outputs, the disease profiles produced by Claude Sonnet 4 match the largest number of distinct UMLS concepts in all semantic groups, suggesting greater capability for evidence retrieval in RDR; OpenAI o3 and DeepSeek R1 consistently rank second and third. In comparison, the expert-curated NORD reports and Gemini 2.5 Pro match the fewest UMLS concepts across groups. Group-level similarity analysis suggests that UMLS concepts in \u201csymptoms \u0026amp; conditions\u201d are the most consistent among all groups, whereas \u201cgenetics \u0026amp; molecular biology\u201d shows the greatest divergence (\u003Cstrong\u003E\u003Ca id=\u0022xref-fig-2-6\u0022 class=\u0022xref-fig\u0022 href=\u0022#F2\u0022\u003EFig. 2e\u003C\/a\u003E\u003C\/strong\u003E). On average, the NORD reports exhibit lower similarity to LLM-produced profiles across all groups than the similarity observed among LLM-produced profiles themselves. Taken together with their smaller counts in unique UMLS concepts, this pattern suggests a narrower scope than the broader generative coverage produced by LLMs, which may enrich or extend the conceptual space beyond expert-curated content.\u003C\/p\u003E\u003Cp id=\u0022p-34\u0022\u003ETwo clinical experts independently assessed profiles for four randomly selected diseases (Behcet\u2019s syndrome, granulomatosis with polyangiitis, IPF, and myasthenia gravis) across four dimensions: factual accuracy, clinical completeness, utility, and specificity. Inter-rater agreement measured with Cohen\u2019s Kappa is substantial (0.644), according to Landis and Koch criteria.\u003Csup\u003E\u003Ca id=\u0022xref-ref-57-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-57\u0022\u003E57\u003C\/a\u003E\u003C\/sup\u003E Across these diseases, LLM-generated profiles consistently exceed the expert-curated NORD reports in most dimensions (\u003Cstrong\u003E\u003Ca id=\u0022xref-fig-2-7\u0022 class=\u0022xref-fig\u0022 href=\u0022#F2\u0022\u003EFig. 2f\u003C\/a\u003E\u003C\/strong\u003E). NORD reports rank lowest in clinical completeness, utility and disease specificity, and lower than DeepSeek R1- and Gemini 2.5 Pro-generated profiles in factual accuracy. Expert reviewers attributed specific shortcomings in the NORD IPF report to reliance on outdated clinical guidelines and conflation of drug-induced and radiation-induced pulmonary fibrosis with IPF. Among LLM-generated profiles, DeepSeek R1 and OpenAI O3 receive the highest ratings for clinical completeness, utility and disease specificity, while DeepSeek R1 and Gemini 2.5 Pro rank highest in factual accuracy. Claude Sonnet 4 ranks in the middle tier across most dimensions. Expert reviewers noted that Claude Sonnet 4-generated IPF profile references clinically unavailable diagnostic test (e.g., BAL SP-A levels, genetic testing) as though they play a routine role in diagnosis. These findings suggest that, at least for the diseases evaluated, LLM-generated profiles can achieve clinician-assessed quality that surpasses or equals established expert-curated resources, while also revealing dimension-specific variation across models. Interestingly, these expert ratings do not directly predict downstream computational performance as shown in the following sections.\u003C\/p\u003E\u003C\/div\u003E\u003Cdiv id=\u0022sec-4\u0022 class=\u0022subsection\u0022\u003E\u003Ch3\u003EGeneral-purpose rare disease screening evaluation\u003C\/h3\u003E\u003Cp id=\u0022p-35\u0022\u003EThere are several notable findings for the general-purpose rare disease screening using GEN-KnowRD. First, across all knowledge sources, Stage 2 reranking uniformly refines Stage 1 disease rankings, improving Recall@1, @3, and @5 (\u003Cstrong\u003E\u003Ca id=\u0022xref-fig-3-1\u0022 class=\u0022xref-fig\u0022 href=\u0022#F3\u0022\u003EFig. 3a\u003C\/a\u003E\u003C\/strong\u003E). For example, with expert-curated NORD rare disease reports, Stage 2 yields a Recall@1 of 0.837, which surpasses Stage 1\u2019s rankings (0.613) by 36.5%. Second, LLM-produced rare disease profiles achieve consistently better RDR performance across both stages compared to expert-curated reports. Notably, Claude Sonnet 4 achieves a Stage 1 Recall@1 of 0.690, corresponding to a 12.6% increase over NORD reports (0.613). This advantage persists in Stage 2, although the improvement reduces to 2.7%. Third, Claude Sonnet 4 outperforms other LLMs on Recall@1, and, together with OpenAI o3, achieves the highest Recall@3 and Recall@5. This suggests that the rare disease profiles generated by Claude Sonnet 4 provide stronger support for RDR. Fourth, a Stage 2 variant that reranks diseases using only the disease names (without incorporating knowledge) performs uniformly worse than standard Stage 2 across all LLMs, highlighting the value of Stage 2\u2019s knowledge-boosted design. Fifth, mean reciprocal rank (MRR), calculated as the average of the reciprocal ranks of the ground-truth diseases (with a range from 0 to 1), is leveraged to summarize the overall ranking quality for each knowledge source \u003Cstrong\u003E(\u003Ca id=\u0022xref-fig-3-2\u0022 class=\u0022xref-fig\u0022 href=\u0022#F3\u0022\u003EFig. 3b\u003C\/a\u003E)\u003C\/strong\u003E. The LLM-produced knowledge layer (i.e., disease profiles) consistently outperforms NORD reports, yielding statistically significantly higher MRR across Stages 1 and 2. Among these sources, Claude Sonnet 4 performs best in Stage 1, whereas Claude Sonnet 4 and OpenAI o3 both lead in Stage 2.\u003C\/p\u003E\u003Cdiv id=\u0022F3\u0022 class=\u0022fig pos-float type-figure  odd\u0022\u003E\u003Cdiv class=\u0022highwire-figure\u0022\u003E\u003Cdiv class=\u0022fig-inline-img-wrapper\u0022\u003E\u003Cdiv class=\u0022fig-inline-img\u0022\u003E\u003Ca href=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2026\/03\/03\/2026.03.02.26347469\/F3.large.jpg?width=800\u0026amp;height=600\u0026amp;carousel=1\u0022 title=\u0022Stage-wise evaluation of GEN-KnowRD\u0026#x2019;s general-purpose rare disease screening. a, Recall for Stage 1 and Stage 2 across knowledge sources, along with a Stage 2 variant that reranks diseases using only disease names. b, Distributions of the mean reciprocal rank (MRR) of the ground-truth disease across knowledge sources. MRR differences between each LLM-based knowledge source and the NORD reports were evaluated using two-sided Wilcoxon signed-rank tests. c, Recall@1 of disease rankings out of Stage 1 across disease complexity categories based on Orphanet classification. The number of patents within each group is indicated. d, Recall@1 of disease rankings out of Stage 2 across disease complexity categories based on Orphanet classification. e, UpSet plot showing the number of rank gains of the ground-truth rare diseases attributable to knowledge-boosted reranking in Stage 2. The leftmost bar indicates the number of patients for whom the ground-truth disease moves up from Stage 1 position. f, Relationship categories identified in cases where Claude Sonnet 4 ranks the ground-truth rare disease second. *** indicates p\u0022 class=\u0022highwire-fragment fragment-images colorbox-load\u0022 rel=\u0022gallery-fragment-images-2117183985\u0022 data-figure-caption=\u0022\u0026lt;div class=\u0026quot;highwire-markup\u0026quot;\u0026gt;\u0026lt;div xmlns=\u0026quot;http:\/\/www.w3.org\/1999\/xhtml\u0026quot;\u0026gt;\u0026lt;span class=\u0026quot;caption-title\u0026quot;\u0026gt;Stage-wise evaluation of GEN-KnowRD\u0026#x2019;s general-purpose rare disease screening.\u0026lt;\/span\u0026gt; \u0026lt;strong\u0026gt;a,\u0026lt;\/strong\u0026gt; Recall for Stage 1 and Stage 2 across knowledge sources, along with a Stage 2 variant that reranks diseases using only disease names. \u0026lt;strong\u0026gt;b,\u0026lt;\/strong\u0026gt; Distributions of the mean reciprocal rank (MRR) of the ground-truth disease across knowledge sources. MRR differences between each LLM-based knowledge source and the NORD reports were evaluated using two-sided Wilcoxon signed-rank tests. \u0026lt;strong\u0026gt;c,\u0026lt;\/strong\u0026gt; Recall@1 of disease rankings out of Stage 1 across disease complexity categories based on Orphanet classification. The number of patents within each group is indicated. \u0026lt;strong\u0026gt;d,\u0026lt;\/strong\u0026gt; Recall@1 of disease rankings out of Stage 2 across disease complexity categories based on Orphanet classification. \u0026lt;strong\u0026gt;e,\u0026lt;\/strong\u0026gt; UpSet plot showing the number of rank gains of the ground-truth rare diseases attributable to knowledge-boosted reranking in Stage 2. The leftmost bar indicates the number of patients for whom the ground-truth disease moves up from Stage 1 position. \u0026lt;strong\u0026gt;f,\u0026lt;\/strong\u0026gt; Relationship categories identified in cases where Claude Sonnet 4 ranks the ground-truth rare disease second. *** indicates p\u0026lt;0.001 with Holm-Bonferroni correction.\u0026lt;\/div\u0026gt;\u0026lt;\/div\u0026gt;\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003E\u003Cspan class=\u0022hw-responsive-img\u0022\u003E\u003Cimg class=\u0022highwire-fragment fragment-image lazyload\u0022 alt=\u0022Fig. 3:\u0022 src=\u0022data:image\/gif;base64,R0lGODlhAQABAIAAAAAAAP\/\/\/yH5BAEAAAAALAAAAAABAAEAAAIBRAA7\u0022 data-src=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2026\/03\/03\/2026.03.02.26347469\/F3.medium.gif\u0022 width=\u0022440\u0022 height=\u0022270\u0022\/\u003E\u003Cnoscript\u003E\u003Cimg class=\u0022highwire-fragment fragment-image\u0022 alt=\u0022Fig. 3:\u0022 src=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2026\/03\/03\/2026.03.02.26347469\/F3.medium.gif\u0022 width=\u0022440\u0022 height=\u0022270\u0022\/\u003E\u003C\/noscript\u003E\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003Cul class=\u0022highwire-figure-links inline\u0022\u003E\u003Cli class=\u0022download-fig first\u0022\u003E\u003Ca href=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2026\/03\/03\/2026.03.02.26347469\/F3.large.jpg?download=true\u0022 class=\u0022highwire-figure-link highwire-figure-link-download\u0022 title=\u0022Download Fig. 3:\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003EDownload figure\u003C\/a\u003E\u003C\/li\u003E\u003Cli class=\u0022new-tab last\u0022\u003E\u003Ca href=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2026\/03\/03\/2026.03.02.26347469\/F3.large.jpg\u0022 class=\u0022highwire-figure-link highwire-figure-link-newtab\u0022 target=\u0022_blank\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003EOpen in new tab\u003C\/a\u003E\u003C\/li\u003E\u003C\/ul\u003E\u003C\/div\u003E\u003Cdiv class=\u0022fig-caption\u0022\u003E\u003Cspan class=\u0022fig-label\u0022\u003EFig. 3:\u003C\/span\u003E \u003Cspan class=\u0022caption-title\u0022\u003EStage-wise evaluation of GEN-KnowRD\u2019s general-purpose rare disease screening.\u003C\/span\u003E\u003Cp id=\u0022p-36\u0022 class=\u0022first-child\u0022\u003E\u003Cstrong\u003Ea,\u003C\/strong\u003E Recall for Stage 1 and Stage 2 across knowledge sources, along with a Stage 2 variant that reranks diseases using only disease names. \u003Cstrong\u003Eb,\u003C\/strong\u003E Distributions of the mean reciprocal rank (MRR) of the ground-truth disease across knowledge sources. MRR differences between each LLM-based knowledge source and the NORD reports were evaluated using two-sided Wilcoxon signed-rank tests. \u003Cstrong\u003Ec,\u003C\/strong\u003E Recall@1 of disease rankings out of Stage 1 across disease complexity categories based on Orphanet classification. The number of patents within each group is indicated. \u003Cstrong\u003Ed,\u003C\/strong\u003E Recall@1 of disease rankings out of Stage 2 across disease complexity categories based on Orphanet classification. \u003Cstrong\u003Ee,\u003C\/strong\u003E UpSet plot showing the number of rank gains of the ground-truth rare diseases attributable to knowledge-boosted reranking in Stage 2. The leftmost bar indicates the number of patients for whom the ground-truth disease moves up from Stage 1 position. \u003Cstrong\u003Ef,\u003C\/strong\u003E Relationship categories identified in cases where Claude Sonnet 4 ranks the ground-truth rare disease second. *** indicates p\u0026lt;0.001 with Holm-Bonferroni correction.\u003C\/p\u003E\u003Cdiv class=\u0022sb-div caption-clear\u0022\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/div\u003E\u003Cp id=\u0022p-37\u0022\u003EWe further evaluated GEN-KnowRD by disease complexity and observe heterogeneity in RDR performance \u003Cstrong\u003E(\u003Ca id=\u0022xref-fig-3-3\u0022 class=\u0022xref-fig\u0022 href=\u0022#F3\u0022\u003EFig. 3c,d\u003C\/a\u003E)\u003C\/strong\u003E. The variability within and across complexity levels is more pronounced in Stage 1 than in Stage 2, consistent with the observed recall patterns above. At Stage 1, GEN-KnowRD, on average, exhibits a lower performance for diseases affecting four (Recall@1=0.624 across knowledge sources), five (0.600), or six (0.568) body systems (overall mean Recall@1=0.597), relative to those with fewer affected systems (overall mean Recall@1=0.672) or more systems (overall mean Recall@1=0.691). A similar phenomenon persists in Stage 2, although the variability is largely attenuated by the knowledge-boosted reranking. Across knowledge sources, Claude Sonnet 4-generated disease profiles achieve the strongest results, ranking best in six of nine complexity levels at Stage 1 and five out of nine at Stage 2 (Supplementary \u003Cstrong\u003EFig. 3\u003C\/strong\u003E). By contrast, NORD reports often perform worst (seven of nine levels at Stage 1 and five of nine at Stage 2).\u003C\/p\u003E\u003Cp id=\u0022p-38\u0022\u003EThe benefit of knowledge-boosted reranking (Stage 2) is further reflected in rank gains for ground-truth diagnoses (\u003Cstrong\u003E\u003Ca id=\u0022xref-fig-3-4\u0022 class=\u0022xref-fig\u0022 href=\u0022#F3\u0022\u003EFig. 3e\u003C\/a\u003E\u003C\/strong\u003E). Across knowledge sources, approximately 1,000 patients have their Stage 1 ranks of the true diagnoses improved, highlighting the reranker\u2019s ability to leverage nuanced clinical evidence when aligning patient presentation with disease knowledge. Of these improvements, about 60% (612 patients) are shared across all knowledge sources, representing the largest subset. The second largest subset of rank gains is shared by all knowledge sources from LLMs (99 patients), whereas improvements involving the NORD reports occur in substantially smaller subsets, each comprising no more than half of that size.\u003C\/p\u003E\u003Cp id=\u0022p-39\u0022\u003EBeyond aggregate RDR performance, we performed a ranking discrepancy analysis using GEN-KnowRD with the Claude Sonnet 4-produced disease profiles as an example (\u003Cstrong\u003E\u003Ca id=\u0022xref-fig-3-5\u0022 class=\u0022xref-fig\u0022 href=\u0022#F3\u0022\u003EFig. 3f\u003C\/a\u003E\u003C\/strong\u003E). We used three LLMs as judges (Gemma-3-27B, OpenAI GPT-5, and OpenEvidence) and took their consensus to characterize the relationship between the top-rank disease and the true diagnosis. Among the 502 patients for whom the true disease is ranked second (accounting for approximately 80% of top-3 ranking discrepancies), the most common pattern is a broader-narrower relationship, where one disease is a parent entity or subtype of the other, accounting for 26.7% of the discrepancies. The remaining discrepancies are attributed to differential diagnosis pairs that are commonly confused clinically (25.1%), etiologic relationships where one can cause or lead to the other (18.9%), diseases within the same family (17.7%), and unrelated pairs with no meaningful links (11.6%). See detailed process in Methods.\u003C\/p\u003E\u003Cp id=\u0022p-40\u0022\u003EBeyond evaluating a single knowledge source for RDR, we further examined the feasibility of leveraging all LLM-produced knowledge sources jointly. Specifically, we explored a gated ensemble strategy (GEN-KnowRD-Ensemble) that considers rankings from GEN-KnowRD instantiated with different LLM-produced knowledge sources, and compared it with two state-of-the-art RDR approaches (i.e., PhenoBrain and OpenAI GPT-5), as well as with GEN-KnowRD using the NORD reports. Among 7,038 patient cases with valid outputs from all approaches, GEN-KnowRD-Ensemble outperforms OpenAI GPT-5 with statistical significance across Recall@1, Recall@2, and Recall@3 (\u003Cstrong\u003E\u003Ca id=\u0022xref-fig-4-1\u0022 class=\u0022xref-fig\u0022 href=\u0022#F4\u0022\u003EFig. 4a-c\u003C\/a\u003E\u003C\/strong\u003E). One exception is Recall@1 on PMC (6,364 patients), where OpenAI GPT-5 shows a slight better performance. This might be attributable to the free-text nature of PMC and potential data leakage during model pretraining. In contrast, OpenAI GPT-5 performs significantly worse on Non-PMC (674 patients) across Recall@1, Recall@2, and Recall@3. For example, GEN-KnowRD-Ensemble achieves a Recall@1 improvement of 129.1% than OpenAI GPT-5. This result might be attributable to Non-PMC benchmarks encoding a patient\u2019s clinical presentation as individual HPO identifiers, for which LLM pretraining is less likely to capture the meaningful associations between these terms and true diagnoses at the free-text level.\u003C\/p\u003E\u003Cdiv id=\u0022F4\u0022 class=\u0022fig pos-float type-figure  odd\u0022\u003E\u003Cdiv class=\u0022highwire-figure\u0022\u003E\u003Cdiv class=\u0022fig-inline-img-wrapper\u0022\u003E\u003Cdiv class=\u0022fig-inline-img\u0022\u003E\u003Ca href=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2026\/03\/03\/2026.03.02.26347469\/F4.large.jpg?width=800\u0026amp;height=600\u0026amp;carousel=1\u0022 title=\u0022Evaluation of GEN-KnowRD-Ensemble against representative RDR approaches. a, Recall@1. b, Recall@2. c, Recall@3. d, Distribution of knowledge sources that GEN-KnowRD-Ensemble adopts in determining the final rare disease rankings. ***, **, and * indicate p\u0022 class=\u0022highwire-fragment fragment-images colorbox-load\u0022 rel=\u0022gallery-fragment-images-2117183985\u0022 data-figure-caption=\u0022\u0026lt;div class=\u0026quot;highwire-markup\u0026quot;\u0026gt;\u0026lt;div xmlns=\u0026quot;http:\/\/www.w3.org\/1999\/xhtml\u0026quot;\u0026gt;\u0026lt;span class=\u0026quot;caption-title\u0026quot;\u0026gt;Evaluation of GEN-KnowRD-Ensemble against representative RDR approaches.\u0026lt;\/span\u0026gt; \u0026lt;strong\u0026gt;a,\u0026lt;\/strong\u0026gt; Recall@1. \u0026lt;strong\u0026gt;b,\u0026lt;\/strong\u0026gt; Recall@2. \u0026lt;strong\u0026gt;c,\u0026lt;\/strong\u0026gt; Recall@3. \u0026lt;strong\u0026gt;d,\u0026lt;\/strong\u0026gt; Distribution of knowledge sources that GEN-KnowRD-Ensemble adopts in determining the final rare disease rankings. ***, **, and * indicate p\u0026lt;0.001, p\u0026lt;0.01, and p\u0026lt;0.05, respectively, from a two-sided Wilcoxon signed-rank test with Holm-Bonferroni correction. The prompt used for OpenAI GPT-5 is provided in Supplementary \u0026lt;strong\u0026gt;Table 4\u0026lt;\/strong\u0026gt;. NS: not significant.\u0026lt;\/div\u0026gt;\u0026lt;\/div\u0026gt;\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003E\u003Cspan class=\u0022hw-responsive-img\u0022\u003E\u003Cimg class=\u0022highwire-fragment fragment-image lazyload\u0022 alt=\u0022Fig. 4:\u0022 src=\u0022data:image\/gif;base64,R0lGODlhAQABAIAAAAAAAP\/\/\/yH5BAEAAAAALAAAAAABAAEAAAIBRAA7\u0022 data-src=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2026\/03\/03\/2026.03.02.26347469\/F4.medium.gif\u0022 width=\u0022440\u0022 height=\u0022122\u0022\/\u003E\u003Cnoscript\u003E\u003Cimg class=\u0022highwire-fragment fragment-image\u0022 alt=\u0022Fig. 4:\u0022 src=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2026\/03\/03\/2026.03.02.26347469\/F4.medium.gif\u0022 width=\u0022440\u0022 height=\u0022122\u0022\/\u003E\u003C\/noscript\u003E\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003Cul class=\u0022highwire-figure-links inline\u0022\u003E\u003Cli class=\u0022download-fig first\u0022\u003E\u003Ca href=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2026\/03\/03\/2026.03.02.26347469\/F4.large.jpg?download=true\u0022 class=\u0022highwire-figure-link highwire-figure-link-download\u0022 title=\u0022Download Fig. 4:\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003EDownload figure\u003C\/a\u003E\u003C\/li\u003E\u003Cli class=\u0022new-tab last\u0022\u003E\u003Ca href=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2026\/03\/03\/2026.03.02.26347469\/F4.large.jpg\u0022 class=\u0022highwire-figure-link highwire-figure-link-newtab\u0022 target=\u0022_blank\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003EOpen in new tab\u003C\/a\u003E\u003C\/li\u003E\u003C\/ul\u003E\u003C\/div\u003E\u003Cdiv class=\u0022fig-caption\u0022\u003E\u003Cspan class=\u0022fig-label\u0022\u003EFig. 4:\u003C\/span\u003E \u003Cspan class=\u0022caption-title\u0022\u003EEvaluation of GEN-KnowRD-Ensemble against representative RDR approaches.\u003C\/span\u003E\u003Cp id=\u0022p-41\u0022 class=\u0022first-child\u0022\u003E\u003Cstrong\u003Ea,\u003C\/strong\u003E Recall@1. \u003Cstrong\u003Eb,\u003C\/strong\u003E Recall@2. \u003Cstrong\u003Ec,\u003C\/strong\u003E Recall@3. \u003Cstrong\u003Ed,\u003C\/strong\u003E Distribution of knowledge sources that GEN-KnowRD-Ensemble adopts in determining the final rare disease rankings. ***, **, and * indicate p\u0026lt;0.001, p\u0026lt;0.01, and p\u0026lt;0.05, respectively, from a two-sided Wilcoxon signed-rank test with Holm-Bonferroni correction. The prompt used for OpenAI GPT-5 is provided in Supplementary \u003Cstrong\u003ETable 4\u003C\/strong\u003E. NS: not significant.\u003C\/p\u003E\u003Cdiv class=\u0022sb-div caption-clear\u0022\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/div\u003E\u003Cp id=\u0022p-42\u0022\u003EMoreover, GEN-KnowRD-Ensemble also yields significantly higher recall than PhenoBrain across benchmarks, underscoring the limitations of pipelines that depend primarily on HPO extraction and mapping. For example, GEN-KnowRD-Ensemble achieves a Recall@1 gain of 345.8% relative to PhenoBrain on PMC. Nevertheless, PhenoBrain still outperforms OpenAI GPT-5 on Non-PMC, highlighting the value of compact, task-specialized models for RDR. Furthermore, GEN-KnowRD-Ensemble surpasses GEN-KnowRD with NORD reports, especially on Non-PMC (corresponding to a Recall@1 gain of 8.3%), demonstrating the added benefit of combining multiple LLM-derived knowledge sources through the gated ensemble mechanism. Detailed comparison results between GEN-KnowRD-Ensemble and GEN-KnowRD with individual knowledge sources are provided in Supplementary \u003Cstrong\u003ETable 5\u003C\/strong\u003E. Analysis of the knowledge source selected by the gating mechanism in GEN-KnowRD-Ensemble suggests that Claude Sonnet 4-derived knowledge source is used most frequently (56.1%), followed by Gemini 2.5 Pro (23.5%), DeepSeek R1 (12.9%), and OpenAI o3 (7.5%) (\u003Cstrong\u003E\u003Ca id=\u0022xref-fig-4-2\u0022 class=\u0022xref-fig\u0022 href=\u0022#F4\u0022\u003EFig. 4d\u003C\/a\u003E\u003C\/strong\u003E).\u003C\/p\u003E\u003C\/div\u003E\u003Cdiv id=\u0022sec-5\u0022 class=\u0022subsection\u0022\u003E\u003Ch3\u003ESpecialized rare disease discrimination evaluation\u003C\/h3\u003E\u003Cp id=\u0022p-43\u0022\u003EWe used IPF as a case study to evaluate GEN-KnowRD for specialized rare disease discrimination. In this setting, the number of extracted UMLS concepts from the IPF disease profiles varies across knowledge sources (\u003Cstrong\u003E\u003Ca id=\u0022xref-fig-5-1\u0022 class=\u0022xref-fig\u0022 href=\u0022#F5\u0022\u003EFig. 5a\u003C\/a\u003E\u003C\/strong\u003E). The NORD IPF report contains 278 distinct UMLS concepts (the largest set), whereas OpenAI o3-generated IPF disease profile contains 154 concepts, representing the smallest set. Only 33 concepts overlap across all five sources, while 10 concepts are shared across the LLM-generated disease profiles, suggesting the strong uniqueness and diversity of knowledge representations across sources. Using the UMLS concepts extracted from each knowledge source, we identified their meaningful mentions in longitudinal clinical notes from the constructed cohorts in both early diagnosis studies (Supplementary \u003Cstrong\u003EFig. 4\u003C\/strong\u003E) and then calculate, for each source, the difference in the total number of matched concepts between cases and controls. In Study 1 (IPF vs non-IPF), the disease profile generated by Claude Sonnet 4 leads to the largest case-control differences (\u003Cstrong\u003E\u003Ca id=\u0022xref-fig-5-2\u0022 class=\u0022xref-fig\u0022 href=\u0022#F5\u0022\u003EFig. 5b\u003C\/a\u003E\u003C\/strong\u003E), indicating richer and more discriminative coverage of IPF-related characteristics, even though these sources do not have the highest number of distinct IPF concepts overall. By contrast, in Study 2 (IPF vs suspected IPF), the NORD IPF report corresponds to the largest case-control concept difference (\u003Cstrong\u003E\u003Ca id=\u0022xref-fig-5-3\u0022 class=\u0022xref-fig\u0022 href=\u0022#F5\u0022\u003EFig. 5c\u003C\/a\u003E\u003C\/strong\u003E). Across both studies, OpenAI o3-generated IPF profile produces the smallest case-control concept differences.\u003C\/p\u003E\u003Cdiv id=\u0022F5\u0022 class=\u0022fig pos-float type-figure  odd\u0022\u003E\u003Cdiv class=\u0022highwire-figure\u0022\u003E\u003Cdiv class=\u0022fig-inline-img-wrapper\u0022\u003E\u003Cdiv class=\u0022fig-inline-img\u0022\u003E\u003Ca href=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2026\/03\/03\/2026.03.02.26347469\/F5.large.jpg?width=800\u0026amp;height=600\u0026amp;carousel=1\u0022 title=\u0022Evaluation of GEN-KnowRD in early diagnosis of idiopathic pulmonary fibrosis (IPF). a, UpSet plot showing overlap among UMLS concepts extracted from the clinical presentation sections of rare disease profiles. b, Differences in the total number of matched UMLS concepts extracted from longitudinal clinical notes of IPF group versus non-IPF group (169 patients per group; Study 1). Multiple mentions of the same concept within a single note are counted only once. c, Differences in the total number of matched UMLS concepts extracted from longitudinal clinical notes of IPF group versus suspected IPF group (171 patients per group; Study 2). d, AUROC and F1 scores of GEN-KnowRD with TF-IDF-weighted concept importance for early diagnosis of IPF in Study 1. e, AUROC and F1 scores of GEN-KnowRD with SIS-weighted concept importance for early diagnosis of IPF in Study 1. f, AUROC and F1 scores of GEN-KnowRD with TF-IDF-weighted concept importance for early diagnosis of IPF in Study 2. g, AUROC and F1 scores of GEN-KnowRD with SIS-weighted concept importance for early diagnosis of IPF in Study 2. Youden index is applied to determine cut-off thresholds and 95% confidence intervals for AUROC and F1 are presented for knowledge sources.\u0022 class=\u0022highwire-fragment fragment-images colorbox-load\u0022 rel=\u0022gallery-fragment-images-2117183985\u0022 data-figure-caption=\u0022\u0026lt;div class=\u0026quot;highwire-markup\u0026quot;\u0026gt;\u0026lt;div xmlns=\u0026quot;http:\/\/www.w3.org\/1999\/xhtml\u0026quot;\u0026gt;\u0026lt;span class=\u0026quot;caption-title\u0026quot;\u0026gt;Evaluation of GEN-KnowRD in early diagnosis of idiopathic pulmonary fibrosis (IPF).\u0026lt;\/span\u0026gt; \u0026lt;strong\u0026gt;a,\u0026lt;\/strong\u0026gt; UpSet plot showing overlap among UMLS concepts extracted from the clinical presentation sections of rare disease profiles. \u0026lt;strong\u0026gt;b,\u0026lt;\/strong\u0026gt; Differences in the total number of matched UMLS concepts extracted from longitudinal clinical notes of IPF group versus non-IPF group (169 patients per group; Study 1). Multiple mentions of the same concept within a single note are counted only once. \u0026lt;strong\u0026gt;c,\u0026lt;\/strong\u0026gt; Differences in the total number of matched UMLS concepts extracted from longitudinal clinical notes of IPF group versus suspected IPF group (171 patients per group; Study 2). \u0026lt;strong\u0026gt;d,\u0026lt;\/strong\u0026gt; AUROC and F1 scores of GEN-KnowRD with TF-IDF-weighted concept importance for early diagnosis of IPF in Study 1. \u0026lt;strong\u0026gt;e,\u0026lt;\/strong\u0026gt; AUROC and F1 scores of GEN-KnowRD with SIS-weighted concept importance for early diagnosis of IPF in Study 1. \u0026lt;strong\u0026gt;f,\u0026lt;\/strong\u0026gt; AUROC and F1 scores of GEN-KnowRD with TF-IDF-weighted concept importance for early diagnosis of IPF in Study 2. \u0026lt;strong\u0026gt;g,\u0026lt;\/strong\u0026gt; AUROC and F1 scores of GEN-KnowRD with SIS-weighted concept importance for early diagnosis of IPF in Study 2. Youden index is applied to determine cut-off thresholds and 95% confidence intervals for AUROC and F1 are presented for knowledge sources.\u0026lt;\/div\u0026gt;\u0026lt;\/div\u0026gt;\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003E\u003Cspan class=\u0022hw-responsive-img\u0022\u003E\u003Cimg class=\u0022highwire-fragment fragment-image lazyload\u0022 alt=\u0022Fig. 5:\u0022 src=\u0022data:image\/gif;base64,R0lGODlhAQABAIAAAAAAAP\/\/\/yH5BAEAAAAALAAAAAABAAEAAAIBRAA7\u0022 data-src=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2026\/03\/03\/2026.03.02.26347469\/F5.medium.gif\u0022 width=\u0022440\u0022 height=\u0022158\u0022\/\u003E\u003Cnoscript\u003E\u003Cimg class=\u0022highwire-fragment fragment-image\u0022 alt=\u0022Fig. 5:\u0022 src=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2026\/03\/03\/2026.03.02.26347469\/F5.medium.gif\u0022 width=\u0022440\u0022 height=\u0022158\u0022\/\u003E\u003C\/noscript\u003E\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003Cul class=\u0022highwire-figure-links inline\u0022\u003E\u003Cli class=\u0022download-fig first\u0022\u003E\u003Ca href=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2026\/03\/03\/2026.03.02.26347469\/F5.large.jpg?download=true\u0022 class=\u0022highwire-figure-link highwire-figure-link-download\u0022 title=\u0022Download Fig. 5:\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003EDownload figure\u003C\/a\u003E\u003C\/li\u003E\u003Cli class=\u0022new-tab last\u0022\u003E\u003Ca href=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2026\/03\/03\/2026.03.02.26347469\/F5.large.jpg\u0022 class=\u0022highwire-figure-link highwire-figure-link-newtab\u0022 target=\u0022_blank\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003EOpen in new tab\u003C\/a\u003E\u003C\/li\u003E\u003C\/ul\u003E\u003C\/div\u003E\u003Cdiv class=\u0022fig-caption\u0022\u003E\u003Cspan class=\u0022fig-label\u0022\u003EFig. 5:\u003C\/span\u003E \u003Cspan class=\u0022caption-title\u0022\u003EEvaluation of GEN-KnowRD in early diagnosis of idiopathic pulmonary fibrosis (IPF).\u003C\/span\u003E\u003Cp id=\u0022p-44\u0022 class=\u0022first-child\u0022\u003E\u003Cstrong\u003Ea,\u003C\/strong\u003E UpSet plot showing overlap among UMLS concepts extracted from the clinical presentation sections of rare disease profiles. \u003Cstrong\u003Eb,\u003C\/strong\u003E Differences in the total number of matched UMLS concepts extracted from longitudinal clinical notes of IPF group versus non-IPF group (169 patients per group; Study 1). Multiple mentions of the same concept within a single note are counted only once. \u003Cstrong\u003Ec,\u003C\/strong\u003E Differences in the total number of matched UMLS concepts extracted from longitudinal clinical notes of IPF group versus suspected IPF group (171 patients per group; Study 2). \u003Cstrong\u003Ed,\u003C\/strong\u003E AUROC and F1 scores of GEN-KnowRD with TF-IDF-weighted concept importance for early diagnosis of IPF in Study 1. \u003Cstrong\u003Ee,\u003C\/strong\u003E AUROC and F1 scores of GEN-KnowRD with SIS-weighted concept importance for early diagnosis of IPF in Study 1. \u003Cstrong\u003Ef,\u003C\/strong\u003E AUROC and F1 scores of GEN-KnowRD with TF-IDF-weighted concept importance for early diagnosis of IPF in Study 2. \u003Cstrong\u003Eg,\u003C\/strong\u003E AUROC and F1 scores of GEN-KnowRD with SIS-weighted concept importance for early diagnosis of IPF in Study 2. Youden index is applied to determine cut-off thresholds and 95% confidence intervals for AUROC and F1 are presented for knowledge sources.\u003C\/p\u003E\u003Cdiv class=\u0022sb-div caption-clear\u0022\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/div\u003E\u003Cp id=\u0022p-45\u0022\u003EThere are multiple notable findings in the performance of IPF early diagnosis. First, Studies 1 and 2 differ markedly in difficulty, as evidenced by substantially higher AUROC and F1 scores achieved in Study 1 than in Study 2 across all concept-weighting strategies (\u003Cstrong\u003E\u003Ca id=\u0022xref-fig-5-4\u0022 class=\u0022xref-fig\u0022 href=\u0022#F5\u0022\u003EFig. 5d-g\u003C\/a\u003E\u003C\/strong\u003E). This difference is expected as IPF and other distinct pulmonary diseases tend to share fewer common clinical and physiological signatures in clinical notes than IPF and suspected IPF cases, whereas suspected IPF cases are enriched for fibrotic descriptors and diagnostic uncertainty, making the negative class highly confusable and the task inherently more difficult. Consequently, Study 2 requires resolving fine-grained, trajectory-dependent evidence beyond keywords, motivating more sophisticated approaches. Second, in Study 1, the OpenAI o3-produced IPF profile yields significantly lower performance than the NORD IPF report and other LLM-based sources, with AUROC of 0.731 (TF-IDF-weighted concept importance) and 0.710 (SIS-weighted) and F1 of 0.652 (TF-IDF) and 0.619 (SIS) (\u003Cstrong\u003E\u003Ca id=\u0022xref-fig-5-5\u0022 class=\u0022xref-fig\u0022 href=\u0022#F5\u0022\u003EFig. 5d,e\u003C\/a\u003E\u003C\/strong\u003E). This result may be partially explained by the OpenAI o3-produced IPF clinical presentation lacking key terminology related to pulmonary fibrosis. In contrast, the corresponding AUROC and F1 scores for the NORD IPF report and other LLM-based sources are all above 0.800. Third, compared to TF-IDF-weighting, SIS-based weighting not only sharpens performance separation across knowledge sources, but also improves overall performance. Notably, the Claude Sonnet 4-based IPF profile achieves the highest AUROC (0.992) and F1 (0.956), corresponding to gains of 10.8% and 16.0% over the TF-IDF weighting setting, where most knowledge sources, except OpenAI o3, offer largely similar support for early diagnosis. Fourth, the differences of early diagnosis performance in Study 2 are more modest overall (\u003Cstrong\u003E\u003Ca id=\u0022xref-fig-5-6\u0022 class=\u0022xref-fig\u0022 href=\u0022#F5\u0022\u003EFig. 5f,g\u003C\/a\u003E\u003C\/strong\u003E). Nevertheless, the NORD IPF profile shows relatively weaker performance than the LLM-produced disease profiles in terms of AUROC and F1. Interestingly, the average AUROC and F1 of OpenAI o3 rank highest in Study 2, suggesting that its symptom representations capture clinically meaningful distinctions within this challenging borderline population. The Claude Sonnet 4-generated IPF profile ranks second, achieving AUROC of 0.660 (TF-IDF) and 0.666 (SIS) and F1 of 0.557 for both weighting strategies.\u003C\/p\u003E\u003C\/div\u003E\u003C\/div\u003E\u003Cdiv class=\u0022section\u0022 id=\u0022sec-6\u0022\u003E\u003Ch2 class=\u0022\u0022\u003EDiscussion\u003C\/h2\u003E\u003Cp id=\u0022p-46\u0022\u003EThe current limitations of RDR pose a major challenge that hinders timely intervention, cohort identification of patients for enrollment in clinical trials, and advances in therapeutic development. Yet current computational approaches for RDR depend on knowledge resources that are expensive to curate, structurally heterogeneous, incomplete, and slow to incorporate emerging evidence. To address these constraints at their source, we introduce GEN-KnowRD, a knowledge-first framework that generates schema-guided rare disease profiles, converts them into an AI-ready knowledge base (PheMAP-RD), and then links this resource with lightweight inference pipelines. Across six public benchmarks for general-purpose rare disease screening and two private longitudinal EHR cohorts for IPF early diagnosis, GEN-KnowRD consistently improves RDR relative to a state-of-the-art, HPO-centered baseline and to pipelines grounded in expert-curated NORD reports, while achieving stronger results than end-to-end LLM-based reasoning. These findings support our hypothesis that RDR is often constrained by how domain knowledge is presented and leveraged, and how clinical evidence is distilled into actionable signals, such that strengthening upstream knowledge layer and evidence processing can enable strong performance even with lightweight downstream strategies that can be deployed locally.\u003C\/p\u003E\u003Cp id=\u0022p-47\u0022\u003EOur results imply broader translational value of GEN-KnowRD. Since PheMAP-RD provides structured, computable rare disease profiles that are decoupled from any single inference method, it can naturally function as shared infrastructure across clinical applications. For screening and triage, the general-purpose pipeline can flag patients whose records suggest unrecognized rare conditions, which potentially support earlier specialist referral and prompting targeted genetic testing. For clinical research, the same knowledge base can be repurposed to match patient phenotypic profiles against trial eligibility criteria, accelerating cohort identification for rare disease studies. Critically, the modular and reusable design of the knowledge layer means that PheMAP-RD is not limited to the pipelines presented in this paper. As end-to-end and agentic AI diagnostic systems continue to advance, exemplified by recent work such as DeepRare\u003Csup\u003E\u003Ca id=\u0022xref-ref-28-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-28\u0022\u003E28\u003C\/a\u003E\u003C\/sup\u003E, the quality of knowledge they retrieve and reason over will remain a key determinant of their reliability. GEN-KnowRD is designed to complement such systems by providing quality-assessed knowledge base that can strengthen evidence retrieval, grounding, and decision support. In this sense, GEN-KnowRD\u2019s contribution is not a competing diagnostic tool, but a foundational layer that can raise the performance ceiling of the rare disease AI ecosystem.\u003C\/p\u003E\u003Cp id=\u0022p-48\u0022\u003EUnderneath the performance gains, the central contribution of GEN-KnowRD is a shift in how LLMs (and AI more broadly) are used in RDR. GEN-KnowRD employs LLMs to construct a disease knowledge layer that can be systematically evaluated, versioned, processed, and reused. This addresses multiple bottlenecks of the current knowledge-driven paradigm. First, by scaling the creation of disease profiles, it substantially reduces multi-disciplinary human curation effort required to construct and maintain rare disease knowledge resources. Traditional curation typically relies upon repeated manual synthesis by clinicians, geneticists, and clinical informaticians to perform literature review, reconcile fragmented evidence across publications and guidelines, normalize inconsistent terminology, and harmonize variable documentation practices across sources. In contrast, GEN-KnowRD shifts the workflow from labor-intensive end-to-end drafting to targeted review and governance, allowing scarce expert time to be focused on validation and auditing. This design also streamlines downstream processing since LLM-produced disease profiles become easier to evaluate, update, and integrate into AI-ready knowledge base that retrieval and reranking pipelines can consume directly, with fewer failures attributable to formatting issues or missing sections. Over time, this also enables faster refresh cycles, enabling a living resource in which updates can be applied systematically and consistently with minimal additional human effort.\u003C\/p\u003E\u003Cp id=\u0022p-49\u0022\u003ESecond, GEN-KnowRD strengthens the reliability and stability of knowledge-driven RDR by enforcing a shared schema and editorial constraints that make disease evidence easier to compare across conditions and less sensitive to source-specific idiosyncrasies. Compared to ad hoc, heterogeneous disease knowledge sources typically used for retrieval in end-to-end LLM workflows, this approach reduces representational bias and noise arising from uneven coverage, variable emphasis, and inconsistent terminology, leading to cleaner retrieval signals and more consistent disease prioritization. For example, in the heterogeneous knowledge setting, one disease may be described with detailed guideline-style sections, whereas another is captured in a short narrative that uses different synonyms and omits important information (e.g., testing cues). This can cause signal retrieval to disproportionately favor the better-structured disease simply because it contains more searchable anchors, rather than because it aligns more closely with the patient\u2019s presentation.\u003C\/p\u003E\u003Cp id=\u0022p-50\u0022\u003EThird, GEN-KnowRD enables controllable evidence pathways by separating knowledge processing from inference, which potentially supports the ability to diagnose and localize failures, as inaccurate prioritization or discrimination can be explicitly traced to specific stages (e.g., knowledge quality, concept extraction, retrieval, reranking, or feature selection), rather than being entangled within one big end-to-end, opaque reasoning process typically used by LLMs. Importantly, because inference is executed through fixed, lightweight components instead of interactive, per-query LLM generation, the behavior of GEN-KnowRD is more reproducible across runs and less vulnerable to prompt- or context-dependent variability that can otherwise complicate validation.\u003C\/p\u003E\u003Cp id=\u0022p-51\u0022\u003EFourth, compared to RDR workflows that directly invoke advanced proprietary LLMs and require transmitting sensitive patient health information to external services, GEN-KnowRD offers a more privacy-preserving and security-aligned deployment model. By shifting LLM use to an offline knowledge layer construction step and performing online inference via lightweight local computation, institutions can run the entire RDR workflow on-premise or within a secured private cloud, keeping protected health information inside their own network boundary. This minimizes reliance on third-party data handling and limits potential retention or secondary-use risks, while achieving tighter control than is typically feasible with external LLM calls.\u003C\/p\u003E\u003Cp id=\u0022p-52\u0022\u003EFifth, our design substantially cuts the computational and financial costs of RDR while achieving comparable or even greater performance than state-of-the-art approaches. GEN-KnowRD, along with PheMAP-RD, amortizes the expense of knowledge layer construction using LLMs across future patients and institutions, paving the way for scalable deployment without requiring per-query invocation of expensive LLM models or repeated long-context reasoning at inference time. These savings are especially pronounced in RDR based on EHR data, where longitudinal histories and clinical notes can be prohibitively lengthy to process directly with LLMs in routine workflows. Notably, PheMAP-RD is a durable community asset that can be freely reused by researchers and hospitals so that the primary cost is paid once during creation, while the benefits accumulate broadly through repeated downstream use.\u003C\/p\u003E\u003Cp id=\u0022p-53\u0022\u003EIn addition to these efficiency and scalability advantages, GEN-KnowRD also suggests a scalable pathway for maintaining a living rare disease knowledge layer, rather than a static resource that risks falling behind advances in clinical evidence. Because PheMAP-RD is built through a modular pipeline, it can refresh iteratively as stronger LLMs and new evidence become available. In an updated cycle, disease profiles can be regenerated or revised, clinical concept representation re-extracted, and the resulting knowledge base re-benchmarked before release. This design enables PheMAP-RD to evolve with medical knowledge and language modeling technology while lowering the maintenance burden relative to full manual re-curation. It also creates a practical path toward a community-facing rare disease knowledge resource that improves through transparent, versioned updates.\u003C\/p\u003E\u003Cp id=\u0022p-54\u0022\u003EBeyond this forward-looking implication, the present study also reveals that LLM-constructed rare disease knowledge layer exhibits distinct characteristics that translate into measurable differences in RDR. Notably, they vary in reference patterns, linguistic complexity, and the breadth of clinical concepts captured across semantic groups. These differences reflect variations in LLMs\u2019 retrieval strategies and synthesis style. Importantly, expert review indicates that human-curated NORD reports are not superior to LLM-produced disease profiles in key quality axes, including factual accuracy, clinical completeness, clinical utility, and clinical specificity. In fact, several LLM-generated disease profiles match or exceed NORD report on these criteria. However, we note that expert ratings are not expected to be fully consistent with downstream RDR performance. This is because RDR hinges not only on human perceived value but also on how well knowledge can be operationalized for computational matching, especially the coverage and discriminative value of extracted clinical concepts, as well as the extent to which retrieved evidence can be reliably aligned with heterogeneous clinical presentation of patients. The consistently strong performance of Claude Sonnet 4-produced disease profile across evaluation settings likely stems from their more comprehensive and clinically grounded representations, which in turn enable better evidence retrieval and disease ranking. Meanwhile, the diversity across LLMs motivates a source-level perspective that heterogeneous knowledge sources can be complementary, and the aggregation through gated ensemble can offer robustness beyond any single model\u2019s knowledge synthesis capability.\u003C\/p\u003E\u003Cp id=\u0022p-55\u0022\u003EOur results further suggest that RDR performance is influenced by how patient evidence is represented and linked to disease knowledge. Prior approaches that rely solely on HPO term extraction followed by phenotype matching based on the extracted terms (PhenoBrain as an example) inherit several structural disadvantages that become salient in real-world clinical narratives. First, HPO terms capture primarily phenotypic abnormalities and only partially reflect the breadth of diagnostically decisive evidence in rare diseases. Discarding non-phenotypic evidence and contextual information leads to a representation bottleneck that is difficult to overcome by supplicated matching strategies alone. Second, an HPO-first pipeline relies on accurate phenotype normalization from noisy text, yet clinical documentation is often ambiguous, contains negation and uncertainty, varies widely in granularity, and encodes important temporal information. All of these factors can induce systematic omissions or mis-mappings that propagate through the pipeline and irreversibly constrain the candidate disease set. Third, the phenotype-centered knowledge-matching can overemphasize the canonical presentations while underweighting atypical or evolving manifestations. When coupled with knowledge publication and curation biases in underlying resources, this can penalize ultra-rare diseases or evolving manifestations. Together, these factors explain why current HPO-first approaches may reach a performance ceiling in real-world RDR settings. In contrast, expanding the representation to UMLS concepts broadens the clinical signal space and improve robustness to lexical and documentation variation, while GEN-KnowRD\u2019s lightweight downstream pipelines incorporate temporal and contextual cues during inference.\u003C\/p\u003E\u003Cp id=\u0022p-56\u0022\u003EConsistent with this design rationale, our two-stage pipeline for general-purpose rare disease screening illustrates that knowledge-boosted reranking in Stage 2 consistently improves Recall@1, @2, and @5, compared to Stage 1 and brings performance to a more comparable range across knowledge sources. This convergence underscores the value of reranking as a knowledge-boosted refinement step. In particular, by re-evaluating the top-20 candidates with richer patient-disease interactions, reranking can recover signals that are only weakly captured by first-stage lexical or embedding similarity and mitigate source-specific noise in the initial retrieval rankings. Interestingly, reranking even without additional knowledge (e.g., using only disease names) can boost RDR performance to roughly the same level for all knowledge sources (\u003Cstrong\u003E\u003Ca id=\u0022xref-fig-3-6\u0022 class=\u0022xref-fig\u0022 href=\u0022#F3\u0022\u003EFig. 3a\u003C\/a\u003E\u003C\/strong\u003E). This reflects the reranker\u2019s (Qwen3-reranker-8B) ability to perform stronger cross-text alignment and better calibration within a small candidate set. Our observations are consistent with recent findings that highlight the value of reranking across various health applications\u003Csup\u003E\u003Ca id=\u0022xref-ref-58-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-58\u0022\u003E58\u003C\/a\u003E\u2013\u003Ca id=\u0022xref-ref-60-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-60\u0022\u003E60\u003C\/a\u003E\u003C\/sup\u003E. We also note that the pretraining of the base model underlying the reranker might have been exposed to knowledge about diseases or medicine more broadly. Nonetheless, the consistently best results are achieved when reranking is guided by the core disease knowledge, underscoring the added value of reusable RDK beyond the name-only setting.\u003C\/p\u003E\u003Cp id=\u0022p-57\u0022\u003EThe ranking discrepancy analysis surfaces common weaknesses of current RDR approaches and assessment strategies and, at the same time, reveals future directions for improvement. The high prevalence of broader-narrower confusions suggests that GEN-KnowRD frequently identifies the correct diseases but cannot fully resolve fine-grained disease identity. These confusions may reflect issues in label granularity mismatches across catalogs and disease profiles that may emphasize shared manifestations more than discriminative subtype features. Addressing this limitation will require ontology-aware normalization, subtype-specific knowledge enrichment (e.g., adding more details that highlight what distinguishes a subtype from its parent), and refined evidence-aware reranking that prioritizes high-specificity cues. The large share of differential diagnosis and same-family discrepancies further reflects the RDR difficulty in settings where diseases share common symptoms and workups, arguably one of the biggest challenges for clinicians as well. Resolving these cases will require not only more comprehensive patient information collection but also higher-resolution modeling that captures temporality, severity trajectories, and discriminative evidence across similar diseases. Meanwhile, etiologically-related discrepancies suggest that the pipeline can prioritize diseases that are causally adjacent rather than the intended primary diagnosis, which might be addressed by exploiting causal directionality knowledge and explicitly representing the relationships between primary diseases and their comorbidities and complications.\u003C\/p\u003E\u003Cp id=\u0022p-58\u0022\u003EDespite the merits of this work, our study has several limitations that should be acknowledged. First, we focused on approximately 1,300 diseases that have corresponding high-quality human-curated rare disease reports. Scaling GEN-KnowRD to the broader rare disease spectrum will require further validation of knowledge quality, consistency, and coverage, especially for ultra-rare and newly characterized conditions, and, critically, sufficiently rigorous labeled cohorts for benchmarking and evaluation. Second, although our baseline comparisons are conducted fairly, we ranked diseases within a rare disease candidate pool and did not explicitly include common conditions. This setting may simplify the real-world practice, where common diseases often compete strongly in the differential and can be confused with rare diseases due to shared manifestations. In future work, we intend to extend and rigorously evaluate GEN-KnowRD against the entire human disease catalog and enable it to function as an autonomous assistant for real-time early diagnosis across diverse clinical data environments, including EHRs. Third, LLM-generated disease profiles can vary with the choice of model, prompting strategy, retrieval behavior, and (when applicable) tool use, and the resulting knowledge may contain omissions or subtle inaccuracies. Although we conducted multi-faceted quality evaluation, fully characterizing provenance, timeliness, and factual reliability at scale remains challenging. At the same time, how best to optimize the RDK construction process and to maximize its downstream benefit for RDR are still open questions. Fourth, while GEN-KnowRD can acknowledge genotype and multimodal cues that can be expressed in text and mapped to UMLS concepts, the current pipelines, including ours, primarily operate on text-derived clinical evidence and concept-level representations. The incorporation of genetic variants, imaging findings, and laboratory patterns as first-class signals is likely to further improve both screening and discrimination, particularly for those defined by genotype-phenotype specificity or characteristic biomarker signatures. This may be addressed by integrating multi-modal foundation models to encode multi-modal information into latent representations and by coupling these embeddings with structured genotype features, which collectively enable joint retrieval and reranking over multi-modal evidence. Fifth, the two specialized IPF discrimination studies are derived from a single healthcare organization. As performance may vary across sites with different note writing practice, population characteristics, and care pathways, multi-center validation is needed to assess the transportability of our observations.\u003C\/p\u003E\u003C\/div\u003E\u003Cdiv class=\u0022section\u0022 id=\u0022sec-7\u0022\u003E\u003Ch2 class=\u0022\u0022\u003EMethods\u003C\/h2\u003E\u003Cdiv id=\u0022sec-8\u0022 class=\u0022subsection\u0022\u003E\u003Ch3\u003EDataset\u003C\/h3\u003E\u003Cdiv id=\u0022sec-9\u0022 class=\u0022subsection\u0022\u003E\u003Ch4\u003ENORD Rare Disease Database and Reports\u003C\/h4\u003E\u003Cp id=\u0022p-59\u0022\u003EThe NORD Rare Disease Database compiles a comprehensive list of more than 10,000 rare diseases by aggregating information from established rare disease ontologies and reference sources like Orphanet and OMIM.\u003Csup\u003E\u003Ca id=\u0022xref-ref-38-5\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-38\u0022\u003E38\u003C\/a\u003E\u003C\/sup\u003E From this extensive index, NORD content experts curate the full rare disease reports for a core subset of 1,320 diseases, each presenting summaries of clinical features, diagnosis, management, prognosis and additional resources. These reports are produced, reviewed, and maintained by medical experts who synthesize clinically vetted information from the medical literature, and they are freely available to support understanding, diagnosis, and care for rare diseases. We generated disease profiles for the 1,320 diseases that NORD has prioritized and use their reports as the primary knowledge baseline for comparison (\u003Cstrong\u003E\u003Ca id=\u0022xref-fig-1-9\u0022 class=\u0022xref-fig\u0022 href=\u0022#F1\u0022\u003EFig. 1a\u003C\/a\u003E\u003C\/strong\u003E) given that they provide publicly accessible, clinically oriented, and well-organized information that is closely aligned with RDR using patients\u2019 clinical notes or clinical presentation summaries. Other resources either are not uniformly designed as a clinically oriented description of diseases (e.g., OMIM) or vary substantially by content categories about a disease (e.g., Orphanet), making them less suitable to serve as a strong knowledge baseline.\u003C\/p\u003E\u003C\/div\u003E\u003Cdiv id=\u0022sec-10\u0022 class=\u0022subsection\u0022\u003E\u003Ch4\u003EPublic Benchmarks\u003C\/h4\u003E\u003Cp id=\u0022p-60\u0022\u003EWe use six public RDR benchmarks that have been widely referenced in prior research\u003Csup\u003E\u003Ca id=\u0022xref-ref-28-2\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-28\u0022\u003E28\u003C\/a\u003E,\u003Ca id=\u0022xref-ref-29-2\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-29\u0022\u003E29\u003C\/a\u003E,\u003Ca id=\u0022xref-ref-43-4\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-43\u0022\u003E43\u003C\/a\u003E,\u003Ca id=\u0022xref-ref-61-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-61\u0022\u003E61\u003C\/a\u003E\u003C\/sup\u003E: 1) HMS\u003Csup\u003E\u003Ca id=\u0022xref-ref-50-2\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-50\u0022\u003E50\u003C\/a\u003E\u003C\/sup\u003E, 2) LIRICAL\u003Csup\u003E\u003Ca id=\u0022xref-ref-51-2\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-51\u0022\u003E51\u003C\/a\u003E\u003C\/sup\u003E, 3) MME\u003Csup\u003E\u003Ca id=\u0022xref-ref-52-2\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-52\u0022\u003E52\u003C\/a\u003E\u003C\/sup\u003E, 4) RAMEDIS\u003Csup\u003E\u003Ca id=\u0022xref-ref-53-2\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-53\u0022\u003E53\u003C\/a\u003E\u003C\/sup\u003E, 5) MyGene2\u003Csup\u003E\u003Ca id=\u0022xref-ref-54-2\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-54\u0022\u003E54\u003C\/a\u003E\u003C\/sup\u003E, and 6) PMC-Patients\u003Csup\u003E\u003Ca id=\u0022xref-ref-55-2\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-55\u0022\u003E55\u003C\/a\u003E\u003C\/sup\u003E. PMC-Patients provides free-text patient summaries extracted from published case reports in PubMed Central, whereas the other five benchmarks are provided in a structured format consisting of HPO terms paired with ground-truth rare disease labels. We first mapped the disease labels of these benchmarks to the NORD rare disease report catalog through ORPHAcode and then converted HPO terms to their corresponding free-text descriptions, so that all input patient information in our experiments is represented as free text. For evaluation purposes, we combined the five benchmarks other than PMC-Patients into a single dataset (Non-PMC), and, for convenience, we refer to PMC-Patients as PMC (\u003Cstrong\u003E\u003Ca id=\u0022xref-fig-1-10\u0022 class=\u0022xref-fig\u0022 href=\u0022#F1\u0022\u003EFig. 1a\u003C\/a\u003E\u003C\/strong\u003E). In total, 9,290 patients with 689 rare diseases are used in general-purpose disease screening. Note that PMC may not be suitable for evaluating early diagnosis, since some patients in the set have already been diagnosed. Instead, it serves as an appropriate benchmark for rare disease identification. Previous research has used LLMs to rewrite patient descriptions to remove diagnosis-related information to better align the input with the diagnostic task.\u003Csup\u003E\u003Ca id=\u0022xref-ref-28-3\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-28\u0022\u003E28\u003C\/a\u003E\u003C\/sup\u003E However, this practice risk altering clinical meaning and potentially biasing the assessment of knowledge sources and leading to false conclusions. We therefore use the raw PMC data in our evaluation.\u003C\/p\u003E\u003C\/div\u003E\u003Cdiv id=\u0022sec-11\u0022 class=\u0022subsection\u0022\u003E\u003Ch4\u003EPrivate Benchmarks\u003C\/h4\u003E\u003Cp id=\u0022p-61\u0022\u003EIn addition to the public benchmarks, we constructed a real-world early-diagnosis benchmark using raw, longitudinal clinical notes from patients at Vanderbilt University Medical Center. We designed two diagnostic tasks using IPF as an example to demonstrate 1) GEN-KnowRD\u2019s capacity to discriminate rare diseases, and 2) the value of LLM-generated disease profiles. For the task of distinguishing IPF from non-IPF patients using data prior to their diagnosis, we define cases as individuals with not only preliminary ICD diagnosis codes but also billed ICD diagnosis codes (J84.112 or 516.31) documented in the EHR. We define controls as individuals with respiratory disease who received care from the same pulmonary specialists responsible for diagnosing IPF cases with no recorded IPF diagnosis codes. For each of the cases, we selected a control matched on sex, race, and age at disease onset within a \u00b15-year window. In addition, included patients must meet the following criteria: 1) disease onset age \u226545 years; 2) at least 90 days between the first documented visit and initial diagnosis; 3) at least 90 days between initial diagnosis and the last documented visit; 4) at least 30 clinical notes recorded between 18 and 3 months prior to diagnosis; and 5) at least 180-day spanning time of these notes within the pre-diagnosis window. For the task of distinguishing IPF from suspected IPF patients, we use the same definition of cases, while defining controls as individuals flagged in Vanderbilt University Medical Center\u2019s clinical EHR database as only having preliminary IPF diagnosis codes but no corresponding billed IPF codes, using the database\u2019s native indicators for preliminary versus billed diagnoses. Using these criteria, we identified 169 cases (and 169 matched controls) for IPF vs non-IPF comparison and 171 cases (and 171 matched controls) for IPF vs suspected IPF comparison. Detailed cohort construction is provided in Supplementary \u003Cstrong\u003E\u003Ca id=\u0022xref-fig-4-3\u0022 class=\u0022xref-fig\u0022 href=\u0022#F4\u0022\u003EFig. 4\u003C\/a\u003E.\u003C\/strong\u003E\u003C\/p\u003E\u003C\/div\u003E\u003C\/div\u003E\u003Cdiv id=\u0022sec-12\u0022 class=\u0022subsection\u0022\u003E\u003Ch3\u003EGeneration of RDK\u003C\/h3\u003E\u003Cdiv id=\u0022sec-13\u0022 class=\u0022subsection\u0022\u003E\u003Ch4\u003ERDK creation from generative AI models\u003C\/h4\u003E\u003Cp id=\u0022p-62\u0022\u003EWe used Claude Sonnet 4, Gemini 2.5 Pro, OpenAI o3, and DeepSeek R1 to generate rare disease profiles with a prompt template approved by two clinical phenotyping experts (W.Q.W. and V.E.K.). We leveraged the corresponding application programming interfaces (API) for efficient data collection. To improve factual grounding and reduce hallucinated content and outdated information, we enabled the web search function in Claude Sonnet 4, Gemini 2.5 Pro, and OpenAI o3. In contrast, DeepSeek R1 did not provide a comparable capability through API during our experiments and thus was used without web search. To facilitate subsequent analysis, we employed Databricks MLflow Tracing to capture all details of the rare disease profile generation, including inputs, outputs, intermediate reasoning traces (if any), and metadata for each LLM invocation. For each rase disease and each LLM, a markdown file was created to store the corresponding disease profile.\u003C\/p\u003E\u003C\/div\u003E\u003Cdiv id=\u0022sec-14\u0022 class=\u0022subsection\u0022\u003E\u003Ch4\u003EConcept extraction\u003C\/h4\u003E\u003Cp id=\u0022p-63\u0022\u003EWe applied SciSpacy\u003Csup\u003E\u003Ca id=\u0022xref-ref-62-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-62\u0022\u003E62\u003C\/a\u003E\u003C\/sup\u003E, a widely used open-source biomedical NLP toolkit, to extract relevant clinical concepts as it provides a practical, scalable pipeline with strong usability. We used its UMLS EntityLinker for concept extraction, with the similarity threshold set to 0.8. To identify concepts that are most relevant to the RDR tasks, we limited the concept extraction to four UMLS semantic groups: 1) symptoms \u0026amp; conditions, 2) diagnostics \u0026amp; laboratory findings, 3) drugs \u0026amp; procedures, and 4) genetics \u0026amp; molecular biology. The included semantic types under each group are summarized in Supplementary \u003Cstrong\u003ETable 6\u003C\/strong\u003E. We paired LLM-generated rare disease profiles with the corresponding extracted UMLS concepts to construct a rare disease knowledge base termed PheMAP-RD.\u003C\/p\u003E\u003C\/div\u003E\u003C\/div\u003E\u003Cdiv id=\u0022sec-15\u0022 class=\u0022subsection\u0022\u003E\u003Ch3\u003EQuality evaluation of generative-AI produced RDK\u003C\/h3\u003E\u003Cdiv id=\u0022sec-16\u0022 class=\u0022subsection\u0022\u003E\u003Ch4\u003EObjective metrics\u003C\/h4\u003E\u003Cp id=\u0022p-64\u0022\u003EObjective quality evaluation of rare disease profiles encompasses both the profile generation process and the properties of the produced profiles themselves. For the disease profile generation process for each LLM, we report running time, the number of input and output tokens, reasoning ratios, and external queries. Based on the generated disease profiles, we compare the following quantities across knowledge sources: 1) the categories and distributions of supporting references, 2) UMLS concepts matched across clinical semantic groups, and 3) text readability.\u003C\/p\u003E\u003Cp id=\u0022p-65\u0022\u003EReadability is measured using the Simple Measure of Gobbledygook (SMOG)\u003Csup\u003E\u003Ca id=\u0022xref-ref-63-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-63\u0022\u003E63\u003C\/a\u003E,\u003Ca id=\u0022xref-ref-64-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-64\u0022\u003E64\u003C\/a\u003E\u003C\/sup\u003E score, an established approach that estimates the education level required to understand a text. Specifically, SMOG calculates the number of polysyllabic words per 30 sentences and a higher value indicates more complex, less accessible writing, whereas lower scores reflect clearer and more readable content. In general, SMOG scores of 13-16 correspond to college-level readability, 17-18 to graduate-level, and 19+ to post-graduate or doctoral-level readability.\u003Csup\u003E\u003Ca id=\u0022xref-ref-64-2\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-64\u0022\u003E64\u003C\/a\u003E\u003C\/sup\u003E\u003C\/p\u003E\u003C\/div\u003E\u003Cdiv id=\u0022sec-17\u0022 class=\u0022subsection\u0022\u003E\u003Ch4\u003EExpert review\u003C\/h4\u003E\u003Cp id=\u0022p-66\u0022\u003ETwo experts (V.E.K. and M.G.) with extensive experience in clinical phenotyping conducted quality evaluations of the collected rare disease profiles for four randomly selected diseases: Behcet\u2019s syndrome, granulomatosis with polyangiitis, idiopathic pulmonary fibrosis, and myasthenia gravis. In addition to disease profiles produced by LLMs, the corresponding NORD rare disease reports were also evaluated using the same criteria. The source of each disease profile remains blind to review experts to minimize potential bias. The evaluation covers four major dimensions using a scale from 1 (worst) to 5 (best): 1) factual accuracy, which measures the degree to which statements, data, and claims in the disease profile are verifiable, correct, and consistent with current peer-reviewed evidence and established clinical guidelines, 2) clinical completeness, which measures the degree to which the disease profile covers essential clinical domains necessary for comprehensive understanding and management of the disease, without critical omissions, 3) clinical utility, which measures the degree to which the disease profile provides actionable, practically applicable information that supports clinical reasoning, decision-making, and patient care workflows, and 4) clinical specificity, which measures the degree to which the disease profile captures unique characteristics, distinguishing features, and context-specific considerations particular to the rare disease being described, avoiding generic or non-specific content. See Supplementary \u003Cstrong\u003ETable 7\u003C\/strong\u003E for detailed evaluation criteria.\u003C\/p\u003E\u003Cp id=\u0022p-67\u0022\u003ECohen\u2019s Kappa with quadratic weights is leveraged to quantify inter-rater agreement, and the mean score across the two experts for each evaluation dimension is used to compare the quality across knowledge sources. We conducted dimension-wise error analysis based on the review documents collected from the review process.\u003C\/p\u003E\u003C\/div\u003E\u003C\/div\u003E\u003Cdiv id=\u0022sec-18\u0022 class=\u0022subsection\u0022\u003E\u003Ch3\u003EGeneral-purpose rare disease screening\u003C\/h3\u003E\u003Cdiv id=\u0022sec-19\u0022 class=\u0022subsection\u0022\u003E\u003Ch4\u003EStage 1: Sparse-dense similarity measurement\u003C\/h4\u003E\u003Cp id=\u0022p-68\u0022\u003EThe overall goal of Stage 1 is to produce a short list of rare disease candidates that includes the ground-truth diagnosis. To generate an initial ranked list, we calculated the similarity between a patient\u2019s clinical presentation descriptions and rare disease profiles using both sparse and dense representations. We use the BM25\u003Csup\u003E\u003Ca id=\u0022xref-ref-45-2\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-45\u0022\u003E45\u003C\/a\u003E\u003C\/sup\u003E algorithm for sparse matching and embedding-based similarity for dense semantic matching. BM25 emphasizes exact term overlap and thus favors scenarios where disease specific terminology and explicit descriptors are present. In contrast, embedding-based similarity captures broader semantic relationships and can identify relevant diseases even when key terms are expressed differently or not stated verbatim. Because these methods excel under different linguistic conditions, combining them can leverage complementary signals and potentially yield more robust candidate prioritization than either method along.\u003C\/p\u003E\u003Cp id=\u0022p-69\u0022\u003EWe focus on four core sections of the generated rare disease profiles that capture the most clinically relevant information: 1) clinical presentation, 2) diagnostic evaluation, 3) subtypes or variants, and 4) management and standard therapy. Given that NORD rare disease reports do not consistently follow these section designs, we used an LLM to restructure the original reports into the same four categories without rewriting or adding new information. Specifically, we prompt Qwen3-235B-A22B to assign each sentence to one of the four sections (See the prompt in Supplementary \u003Cstrong\u003ETable 8\u003C\/strong\u003E).\u003C\/p\u003E\u003Cp id=\u0022p-70\u0022\u003EAmong the top three embedding models on the Massive Text Embedding Benchmark (MTEB) leaderboard\u003Csup\u003E\u003Ca id=\u0022xref-ref-65-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-65\u0022\u003E65\u003C\/a\u003E\u003C\/sup\u003E as of September 2025, Qwen3-Embedding-8B\u003Csup\u003E\u003Ca id=\u0022xref-ref-46-3\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-46\u0022\u003E46\u003C\/a\u003E\u003C\/sup\u003E and Llama-Embed-Nemotron-8B\u003Csup\u003E\u003Ca id=\u0022xref-ref-66-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-66\u0022\u003E66\u003C\/a\u003E\u003C\/sup\u003E are open-source and suitable for further development. To determine their capability in real-world clinical retrieval settings, we constructed a disease prioritization dataset using 11 rare diseases (see the list in Supplementary \u003Cstrong\u003EFig. 5\u003C\/strong\u003E) and clinical notes of patients from Vanderbilt University Medical Center spanning six months before to six months after first diagnosis. This dataset comprises 984 patient-disease pairs and 23,384 clinical notes, with note titles falling within: 1) progress notes, 2) assessment and plan, 3) subjective and objective, 4) admission history and physical exam, 5) discharge summary, 6) emergency provider notes, and 7) general clinic notes. We evaluated embedding models using semantic match rate (SMR), which is calculated in three steps: 1) for each rare disease candidate and each patient, we identify the patient\u2019s clinical note with the highest embedding similarity to that disease, 2) we rank diseases based on the values of maximum similarity for the patient, and 3) we calculate the top-1 hit rate across patients. To simplify the embedding model selection, we first embedded only the name of rare disease, where Qwen3-Embedding-8B achieves a significantly higher top-1 SMR than Llama-Embed-Nemotron-8B (Supplementary \u003Cstrong\u003ETable 2\u003C\/strong\u003E). The embedding visualizations of clinical notes and disease names using t-SNE\u003Csup\u003E\u003Ca id=\u0022xref-ref-67-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-67\u0022\u003E67\u003C\/a\u003E\u003C\/sup\u003E confirm the superiority of Qwen3-Embedding-8B (Supplementary \u003Cstrong\u003EFig. 5\u003C\/strong\u003E). We then enriched disease representation by incorporating the four core sections described above, and further finetuned a Qwen3-Embedding-8B model for each LLM-generated disease profile using the same dataset through contrastive learning, where we constructed positive pairs using patients\u2019 clinical notes and their corresponding diagnosed rare diseases and negative pairs through random selection. Because the finetuned Qwen3-Embedding-8B achieves the highest top-1 SMR (Supplementary \u003Cstrong\u003ETable 2\u003C\/strong\u003E), it is thus chosen as GEN-KnowRD\u2019s embedding model. In this study, Qwen3-Embedding-8B was used to embed the full patient and disease descriptions without truncation.\u003C\/p\u003E\u003Cp id=\u0022p-71\u0022\u003ETo fuse the sparse and dense signals, we use reciprocal rank fusion (RRF) method\u003Csup\u003E\u003Ca id=\u0022xref-ref-47-2\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-47\u0022\u003E47\u003C\/a\u003E\u003C\/sup\u003E, a simple and robust strategy for aggregating ranked lists. The RRF-fused score of candidate disease \u003Cem\u003Ed\u003C\/em\u003E is defined as:\n\u003Cspan class=\u0022disp-formula\u0022 id=\u0022disp-formula-1\u0022\u003E\u003Cspan class=\u0022highwire-responsive-lazyload\u0022\u003E\u003Cimg src=\u0022data:image\/gif;base64,R0lGODlhAQABAIAAAAAAAP\/\/\/yH5BAEAAAAALAAAAAABAAEAAAIBRAA7\u0022 class=\u0022highwire-embed lazyload\u0022 alt=\u0022Embedded Image\u0022 data-src=\u0022https:\/\/www.medrxiv.org\/sites\/default\/files\/highwire\/medrxiv\/early\/2026\/03\/03\/2026.03.02.26347469\/embed\/graphic-7.gif\u0022\/\u003E\u003Cnoscript\u003E\u003Cimg class=\u0022highwire-embed\u0022 alt=\u0022Embedded Image\u0022 src=\u0022https:\/\/www.medrxiv.org\/sites\/default\/files\/highwire\/medrxiv\/early\/2026\/03\/03\/2026.03.02.26347469\/embed\/graphic-7.gif\u0022\/\u003E\u003C\/noscript\u003E\u003C\/span\u003E\n\n\u003C\/span\u003E\nwhere \u003Cem\u003Es\u003C\/em\u003E denotes a smoothing constant, set to 60 by following the original study\u003Csup\u003E\u003Ca id=\u0022xref-ref-47-3\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-47\u0022\u003E47\u003C\/a\u003E\u003C\/sup\u003E, and \u003Cem\u003Erank\u003Csub\u003Edense\u003C\/sub\u003E\u003C\/em\u003E (\u003Cem\u003Ed\u003C\/em\u003E) and \u003Cem\u003Erank\u003Csub\u003Esparse\u003C\/sub\u003E\u003C\/em\u003E(\u003Cem\u003Ed\u003C\/em\u003E) denote disease \u003Cem\u003Ed\u003C\/em\u003E\u2019s positions in the dense and sparse rankings, respectively. Overall, RRF upweights diseases that appear near the top of either list and downweighs those that rank poorly in both, yielding a fused ranking score that is robust to noise and complementary failure modes across retrieval methods. We evaluated recall@5 to justify this fusion design of GEN-KnowRD, which outperforms either sparse-only or dense-only retrieval (Supplementary \u003Cstrong\u003EFig. 6\u003C\/strong\u003E).\u003C\/p\u003E\u003C\/div\u003E\u003Cdiv id=\u0022sec-20\u0022 class=\u0022subsection\u0022\u003E\u003Ch4\u003EStage 2: Rare disease reranking\u003C\/h4\u003E\u003Cp id=\u0022p-72\u0022\u003ETo refine the disease rankings from Stage 1, which is built on the whole disease space, we applied a reranker to the top K diseases. Specifically, we use Qwen3-reranker-8B\u003Csup\u003E\u003Ca id=\u0022xref-ref-46-4\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-46\u0022\u003E46\u003C\/a\u003E\u003C\/sup\u003E to re-evaluate pairwise relevance between a patient\u2019s clinical presentation description and each of the K diseases. The inputs of the reranker are 1) a patient\u2019s clinical presentation description, and 2) a disease document formed by combining the disease name with the core RDK sections. The reranker ingests the concatenated pair to align fine-grained clinical cues with disease-specific knowledge and then outputs a scalar relevance score reflecting how well the disease explains the patient\u2019s clinical presentation. The relevance score is calculated independently for each of the K candidates, which are then sorted according to the calculated scores to form the final ranked list. Compared to the first stage that relies on lexical overlap and embedding similarity, the reranker in the second stage can exploit richer interaction features to upweight candidates supported by multiple concordant findings and downweigh those contradicted by explicitly negated evidence. Because we applied reranking only to a small candidate set (K=20), it remains computationally feasible while substantially improving precision at the top of the list.\u003C\/p\u003E\u003C\/div\u003E\u003Cdiv id=\u0022sec-21\u0022 class=\u0022subsection\u0022\u003E\u003Ch4\u003EEnsemble\u003C\/h4\u003E\u003Cp id=\u0022p-73\u0022\u003EAs Stages 1 and 2 leverage the rare disease profiles produced by a single LLM, we further explored whether combining disease prioritizations from different LLMs can boost RDR performance. We implemented a Spearman rank correlation-based ensemble gating method that selects, for each patient, the most consensus-aligned disease ranking. Specifically, we computed the sum of pairwise rank correlations between each ranking and all others, and chose the ranking with the highest total agreement, aiming to reduce the chance of introducing spurious disease candidates from noisy lower-ranked predictions. We chose the top three diseases for correlation calculation since disease rankings beyond this threshold are more error-prone and less confident. We view this ensemble variant, GEN-KnowRD-Ensemble, as an aggregated pipeline for comparison with baseline models.\u003C\/p\u003E\u003C\/div\u003E\u003Cdiv id=\u0022sec-22\u0022 class=\u0022subsection\u0022\u003E\u003Ch4\u003EDiscrepancy analysis\u003C\/h4\u003E\u003Cp id=\u0022p-74\u0022\u003ESince Claude Sonnet 4-produced rare disease profiles demonstrate the highest Recall@\u003Cem\u003Ex\u003C\/em\u003E for RDR compared to other LLMs and its performance gets saturated quickly from Recall@2, we examined all cases (n=502), where ground-truth diseases are ranked second by GEN-KnowRD, to summarize the relationship between those ranked first and the ground-truth diseases. We use Gemma-3-27B and GPT-5 to independently classify the relationships. When they output the same category, the relationship is accepted. When they identify different relationships, OpenEvidence is invoked to derive the final relationship. We define five relationship categories: 1) broader-narrower, where the disease ranked first (rank-1 disease) is a broader class or a specific subtype of the disease ranked second (rank-2 disease), 2) same-category, where both diseases belong to the same disease family or superclass, 3) etiologically related, where one disease may cause or lead to the other, 4) differential diagnosis, where the two diseases are commonly confused clinically, and 5) unrelated, where they involve different systems or etiologies with no meaningful links.\u003C\/p\u003E\u003C\/div\u003E\u003C\/div\u003E\u003Cdiv id=\u0022sec-23\u0022 class=\u0022subsection\u0022\u003E\u003Ch3\u003ESpecialized rare disease discrimination\u003C\/h3\u003E\u003Cp id=\u0022p-75\u0022\u003EClinical notes within the pre-diagnosis window (between 18 months and 3 months prior to diagnosis) from included patients first undergo the same UMLS concept extraction procedure as described above. Using the IPF disease profile generated by an LLM, the UMLS concepts appearing in its \u201cclinical presentation\u201d section are selected as the target set for clinical evidence extraction, as they capture clinically actionable, symptom-level descriptors most relevant to IPF diagnosis. To mitigate inaccurate evidence matching, we first applied MedGemma-27B\u003Csup\u003E\u003Ca id=\u0022xref-ref-68-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-68\u0022\u003E68\u003C\/a\u003E\u003C\/sup\u003E to process all clinical notes and classify each concept as one of the following categories: family history, historical condition, hypothetical condition, negated condition, possible condition, or others. Only those classified as possible and others are used for following analysis. For each patient-week and each symptom concept, we assigned a three-level code, where 0, 1, and 2 indicate no mention of the symptom in that week\u2019s notes, symptom mentioned that week, and symptom not mentioned despite other symptoms being mentioned in the same week, respectively. This results in a complete symptom-week matrix per patient, in which we identified all symptom concept pairs that co-occur within a week (i.e., both symptoms coded as 1). For each symptom pair, we compute two clinically motivated quantities: 1) the number of distinct co-occurrence weeks (\u003Cem\u003Epair\u003C\/em\u003E_\u003Cem\u003Eweek\u003C\/em\u003E_\u003Cem\u003Esingle\u003C\/em\u003E) and 2) the number of co-occurrence weeks in which either symptom is labeled as worsening (\u003Cem\u003Eworsen\u003C\/em\u003E_\u003Cem\u003Ecount\u003C\/em\u003E_\u003Cem\u003Esingle\u003C\/em\u003E) by MedGemma-27B. These pair-level quantities are then aggregated to the patient level by summing across all symptom pairs to represent overall co-occurrence activity (\u003Cem\u003Epair\u003C\/em\u003E_\u003Cem\u003Eweek\u003C\/em\u003E_\u003Cem\u003Eagg\u003C\/em\u003E) and worsening burden ( \u003Cem\u003Eworsen\u003C\/em\u003E_\u003Cem\u003Ecount\u003C\/em\u003E_\u003Cem\u003Eagg\u003C\/em\u003E). To weigh symptoms by discriminative relevance, each symptom concept \u003Cem\u003Ec\u003C\/em\u003E receives a symptom importance score (SIS) based on prevalence difference between IPF cases and controls, calculated as \u003Cspan class=\u0022inline-formula\u0022 id=\u0022inline-formula-1\u0022\u003E\u003Cspan class=\u0022highwire-responsive-lazyload\u0022\u003E\u003Cimg src=\u0022data:image\/gif;base64,R0lGODlhAQABAIAAAAAAAP\/\/\/yH5BAEAAAAALAAAAAABAAEAAAIBRAA7\u0022 class=\u0022highwire-embed lazyload\u0022 alt=\u0022Embedded Image\u0022 data-src=\u0022https:\/\/www.medrxiv.org\/sites\/default\/files\/highwire\/medrxiv\/early\/2026\/03\/03\/2026.03.02.26347469\/embed\/inline-graphic-1.gif\u0022\/\u003E\u003Cnoscript\u003E\u003Cimg class=\u0022highwire-embed\u0022 alt=\u0022Embedded Image\u0022 src=\u0022https:\/\/www.medrxiv.org\/sites\/default\/files\/highwire\/medrxiv\/early\/2026\/03\/03\/2026.03.02.26347469\/embed\/inline-graphic-1.gif\u0022\/\u003E\u003C\/noscript\u003E\u003C\/span\u003E\u003C\/span\u003E, and a patient-specific symptom weight, calculated as \u003Cem\u003ESIS\u003C\/em\u003E(\u003Cem\u003Ec\u003C\/em\u003E) \u22c5 (1 + \u003Cem\u003Efreq\u003Csub\u003Epa\u003C\/sub\u003E(c)\u003C\/em\u003E), where \u003Cem\u003Efreq\u003Csub\u003Epa\u003C\/sub\u003E\u003C\/em\u003E(\u003Cem\u003Ec\u003C\/em\u003E) is the number of weeks in which \u003Cem\u003Ec\u003C\/em\u003E is present for that patient. We also calculate the term frequency-inverse document frequency (TF-IDF) value of concept \u003Cem\u003Ec\u003C\/em\u003E as an alternative importance score for comparison. To avoid double counting symptoms that appear in multiple co-occurring pairs, we pool all symptoms participating in any pair, deduplicated at the (patient, symptom) level, and sum the unique symptom weights to obtain total symptom weight (\u003Cem\u003Etotal\u003C\/em\u003E_\u003Cem\u003Esymptom\u003C\/em\u003E_\u003Cem\u003Eweight\u003C\/em\u003E). Finally, we normalize overall symptom burden by co-occurrence activity using \u003Cem\u003Enormalized\u003C\/em\u003E_\u003Cem\u003Eweight\u003C\/em\u003E = \u003Cem\u003Etotal\u003C\/em\u003E_\u003Cem\u003Esymptom\u003C\/em\u003E_\u003Cem\u003Eweight\u003C\/em\u003E\/(\u003Cem\u003Epair\u003C\/em\u003E_\u003Cem\u003Eweek\u003C\/em\u003E_\u003Cem\u003Eagg\u003C\/em\u003E + 1). The downstream classifiers use \u003Cem\u003Enormalized\u003C\/em\u003E_\u003Cem\u003Eweight\u003C\/em\u003E, \u003Cem\u003Epair\u003C\/em\u003E_\u003Cem\u003Eweek\u003C\/em\u003E_\u003Cem\u003Eagg\u003C\/em\u003E, and \u003Cem\u003Eworsen\u003C\/em\u003E_\u003Cem\u003Ecount\u003C\/em\u003E_\u003Cem\u003Eagg\u003C\/em\u003E as patient-level features to distinguish IPF cases from two different types of controls.\u003C\/p\u003E\u003Cp id=\u0022p-76\u0022\u003EFor each task, we fit a logistic regression model and evaluate model performance using stratified out-of-bag bootstrap validation\u003Csup\u003E\u003Ca id=\u0022xref-ref-69-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-69\u0022\u003E69\u003C\/a\u003E\u003C\/sup\u003E, with a 2:1 training-test ratio and 100 repetitions. Each trained classifier is evaluated on the held-out test set using standard measures that quantify both classification performance and the utility of the LLM-generated rare disease profiles for supporting early IPF diagnosis.\u003C\/p\u003E\u003C\/div\u003E\u003C\/div\u003E\u003Cdiv class=\u0022section\u0022 id=\u0022sec-24\u0022\u003E\u003Ch2 class=\u0022\u0022\u003EData Availability\u003C\/h2\u003E\u003Cp id=\u0022p-77\u0022\u003ELLM-generated rare disease profiles and the corresponding extracted UMLS concepts for each rare disease (i.e., PheMAP-RD) are available at \u003Ca href=\u0022https:\/\/wei-phenolib-api.app.vumc.org\/\u0022\u003Ehttps:\/\/wei-phenolib-api.app.vumc.org\/\u003C\/a\u003E. This service also hosts the patient clinical presentation descriptions (9,290) used in our public benchmark evaluations and GEN-KnowRD-Ensemble results. The private benchmarks for IPF early diagnosis are available upon request and IRB approval by Vanderbilt University Medical Center.\u003C\/p\u003E\u003C\/div\u003E\u003Cdiv class=\u0022section\u0022 id=\u0022sec-26\u0022\u003E\u003Ch2 class=\u0022\u0022\u003EAuthor Contribution Statement\u003C\/h2\u003E\u003Cp id=\u0022p-79\u0022\u003EW.Q.W., C.Y., and W.S. conceived and designed this study. W.S. and C.Y. conducted data preprocessing, performed the experiments, and analyzed the results. W.S. and Y.X. performed LLM data collection. M.E.G. and V.E.K. reviewed and evaluated LLM-generated rare disease profiles as clinical experts. C.Y. and W.S. summarized the major experimental findings and drafted the manuscript. W.Q.W., H.L., L.W., J.W., and R.L. assisted in interpreting the results and provided significant intellectual feedback. A.L.D., Q.F., C.S., V.A.B., J.L., C.M.S., K.W., P.J.E., and B.A.M. extensively revised the manuscript. W.Q.W. supervised the study. All authors participated in manuscript preparation and approved the final version.\u003C\/p\u003E\u003C\/div\u003E\u003Cdiv class=\u0022section\u0022 id=\u0022sec-27\u0022\u003E\u003Ch2 class=\u0022\u0022\u003EEthics Declarations\u003C\/h2\u003E\u003Cp id=\u0022p-80\u0022\u003EThere is no conflict of interest for this study.\u003C\/p\u003E\u003C\/div\u003E\u003Cdiv class=\u0022section ack\u0022 id=\u0022ack-1\u0022\u003E\u003Ch2 class=\u0022\u0022\u003EAcknowledgements\u003C\/h2\u003E\u003Cp id=\u0022p-78\u0022\u003EThis work is supported in part by National Institute of Health grants R01HG012748, K99LM014428, R00LM014429, R01HG013031, R01AG084550, R01HL171809, R01HG012748, R01LM012806, P50HD106446, R01GM139891, and UL1TR002243.\u003C\/p\u003E\u003C\/div\u003E\u003Cdiv class=\u0022section fn-group\u0022 id=\u0022fn-group-1\u0022\u003E\u003Ch2\u003EFootnotes\u003C\/h2\u003E\u003Cul\u003E\u003Cli class=\u0022fn\u0022 id=\u0022fn-1\u0022\u003E\u003Cp id=\u0022p-1\u0022\u003E\u003Ca class=\u0022rev-xref\u0022 href=\u0022#xref-fn-1-1\u0022\u003E\u21b5\u003C\/a\u003E\u003Cspan class=\u0022fn-label\u0022\u003E*\u003C\/span\u003E Co-first authors\u003C\/p\u003E\u003C\/li\u003E\u003C\/ul\u003E\u003C\/div\u003E\u003Cdiv class=\u0022section ref-list\u0022 id=\u0022ref-list-1\u0022\u003E\u003Ch2 class=\u0022\u0022\u003EReferences\u003C\/h2\u003E\u003Col class=\u0022cit-list ref-use-labels\u0022\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E1.\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-1-1\u0022 title=\u0022View reference 1. in text\u0022 id=\u0022ref-1\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal\u0022 id=\u0022cit-2026.03.02.26347469v1.1\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-article-title\u0022\u003EThe Lancet Global Health. The landscape for rare diseases in 2024\u003C\/span\u003E. \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003ELancet Glob. Health\u003C\/abbr\u003E \u003Cspan class=\u0022cit-vol\u0022\u003E12\u003C\/span\u003E, \u003Cspan class=\u0022cit-fpage\u0022\u003Ee341\u003C\/span\u003E (\u003Cspan class=\u0022cit-pub-date\u0022\u003E2024\u003C\/span\u003E).\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003Ca href=\u0022{openurl}?query=rft.jtitle%253DLancet%2BGlob.%2BHealth%26rft.volume%253D12%26rft.spage%253D341e%26rft.genre%253Darticle%26rft_val_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Ajournal%26ctx_ver%253DZ39.88-2004%26url_ver%253DZ39.88-2004%26url_ctx_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Actx\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-openurl cit-ref-sprinkles-open-url\u0022\u003E\u003Cspan\u003EOpenUrl\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E2.\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-2-1\u0022 title=\u0022View reference 2. in text\u0022 id=\u0022ref-2\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal\u0022 id=\u0022cit-2026.03.02.26347469v1.2\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EBaynam\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EG.\u003C\/span\u003E\u003C\/span\u003E \u003Cspan class=\u0022cit-etal\u0022\u003Eet al.\u003C\/span\u003E \u003Cspan class=\u0022cit-article-title\u0022\u003EGlobal health for rare diseases through primary care\u003C\/span\u003E. \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003ELancet Glob. Health\u003C\/abbr\u003E \u003Cspan class=\u0022cit-vol\u0022\u003E12\u003C\/span\u003E, \u003Cspan class=\u0022cit-fpage\u0022\u003Ee1192\u003C\/span\u003E\u2013\u003Cspan class=\u0022cit-lpage\u0022\u003Ee1199\u003C\/span\u003E (\u003Cspan class=\u0022cit-pub-date\u0022\u003E2024\u003C\/span\u003E).\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003Ca href=\u0022{openurl}?query=rft.jtitle%253DLancet%2BGlob.%2BHealth%26rft.volume%253D12%26rft.spage%253De1192%26rft_id%253Dinfo%253Apmid%252F38876765%26rft.genre%253Darticle%26rft_val_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Ajournal%26ctx_ver%253DZ39.88-2004%26url_ver%253DZ39.88-2004%26url_ctx_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Actx\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-openurl cit-ref-sprinkles-open-url\u0022\u003E\u003Cspan\u003EOpenUrl\u003C\/span\u003E\u003C\/a\u003E\u003Ca href=\u0022\/lookup\/external-ref?access_num=38876765\u0026amp;link_type=MED\u0026amp;atom=%2Fmedrxiv%2Fearly%2F2026%2F03%2F03%2F2026.03.02.26347469.atom\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-medline\u0022\u003E\u003Cspan\u003EPubMed\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E3.\u003C\/span\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal no-rev-xref\u0022 id=\u0022cit-2026.03.02.26347469v1.3\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003ETaruscio\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003ED.\u003C\/span\u003E\u003C\/span\u003E \u0026amp; \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EGahl\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EW. A\u003C\/span\u003E\u003C\/span\u003E. \u003Cspan class=\u0022cit-article-title\u0022\u003ERare diseases: Challenges and opportunities for research and public health\u003C\/span\u003E. \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003ENat. Rev. Dis. Primer\u003C\/abbr\u003E \u003Cspan class=\u0022cit-vol\u0022\u003E10\u003C\/span\u003E, \u003Cspan class=\u0022cit-issue\u0022\u003E13\u003C\/span\u003E (\u003Cspan class=\u0022cit-pub-date\u0022\u003E2024\u003C\/span\u003E).\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E4.\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-4-1\u0022 title=\u0022View reference 4. in text\u0022 id=\u0022ref-4\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal\u0022 id=\u0022cit-2026.03.02.26347469v1.4\u0022 data-doi=\u002210.1038\/s41431-024-01604-z\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EFaye\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EF.\u003C\/span\u003E\u003C\/span\u003E \u003Cspan class=\u0022cit-etal\u0022\u003Eet al.\u003C\/span\u003E \u003Cspan class=\u0022cit-article-title\u0022\u003ETime to diagnosis and determinants of diagnostic delays of people living with a rare disease: Results of a Rare Barometer retrospective patient survey\u003C\/span\u003E. \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003EEur. J. Hum. Genet\u003C\/abbr\u003E. \u003Cspan class=\u0022cit-vol\u0022\u003E32\u003C\/span\u003E, \u003Cspan class=\u0022cit-fpage\u0022\u003E1116\u003C\/span\u003E\u2013\u003Cspan class=\u0022cit-lpage\u0022\u003E1126\u003C\/span\u003E (\u003Cspan class=\u0022cit-pub-date\u0022\u003E2024\u003C\/span\u003E).\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003Ca href=\u0022{openurl}?query=rft.jtitle%253DEur.%2BJ.%2BHum.%2BGenet%26rft.volume%253D32%26rft.spage%253D1116%26rft_id%253Dinfo%253Adoi%252F10.1038%252Fs41431-024-01604-z%26rft_id%253Dinfo%253Apmid%252F38755315%26rft.genre%253Darticle%26rft_val_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Ajournal%26ctx_ver%253DZ39.88-2004%26url_ver%253DZ39.88-2004%26url_ctx_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Actx\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-openurl cit-ref-sprinkles-open-url\u0022\u003E\u003Cspan\u003EOpenUrl\u003C\/span\u003E\u003C\/a\u003E\u003Ca href=\u0022\/lookup\/external-ref?access_num=10.1038\/s41431-024-01604-z\u0026amp;link_type=DOI\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-doi cit-ref-sprinkles-crossref\u0022\u003E\u003Cspan\u003ECrossRef\u003C\/span\u003E\u003C\/a\u003E\u003Ca href=\u0022\/lookup\/external-ref?access_num=38755315\u0026amp;link_type=MED\u0026amp;atom=%2Fmedrxiv%2Fearly%2F2026%2F03%2F03%2F2026.03.02.26347469.atom\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-medline\u0022\u003E\u003Cspan\u003EPubMed\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E5.\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-5-1\u0022 title=\u0022View reference 5. in text\u0022 id=\u0022ref-5\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal\u0022 id=\u0022cit-2026.03.02.26347469v1.5\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003ESchaefer\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EJ.\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003ELehne\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EM.\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003ESchepers\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EJ.\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EPrasser\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EF.\u003C\/span\u003E\u003C\/span\u003E \u0026amp; \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EThun\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003ES\u003C\/span\u003E\u003C\/span\u003E. \u003Cspan class=\u0022cit-article-title\u0022\u003EThe use of machine learning in rare diseases: A scoping review\u003C\/span\u003E. \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003EOrphanet J. Rare Dis\u003C\/abbr\u003E. \u003Cspan class=\u0022cit-vol\u0022\u003E15\u003C\/span\u003E, \u003Cspan class=\u0022cit-issue\u0022\u003E145\u003C\/span\u003E (\u003Cspan class=\u0022cit-pub-date\u0022\u003E2020\u003C\/span\u003E).\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E6.\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-6-1\u0022 title=\u0022View reference 6. in text\u0022 id=\u0022ref-6\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal\u0022 id=\u0022cit-2026.03.02.26347469v1.6\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003ELagorce\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003ED.\u003C\/span\u003E\u003C\/span\u003E \u003Cspan class=\u0022cit-etal\u0022\u003Eet al.\u003C\/span\u003E \u003Cspan class=\u0022cit-article-title\u0022\u003EPhenotypic similarity-based approach for variant prioritization for unsolved rare disease: A preliminary methodological report\u003C\/span\u003E. \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003EEur. J. Hum. Genet\u003C\/abbr\u003E. \u003Cspan class=\u0022cit-vol\u0022\u003E32\u003C\/span\u003E, \u003Cspan class=\u0022cit-fpage\u0022\u003E182\u003C\/span\u003E\u2013\u003Cspan class=\u0022cit-lpage\u0022\u003E189\u003C\/span\u003E (\u003Cspan class=\u0022cit-pub-date\u0022\u003E2024\u003C\/span\u003E).\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003Ca href=\u0022{openurl}?query=rft.jtitle%253DEur.%2BJ.%2BHum.%2BGenet%26rft.volume%253D32%26rft.spage%253D182%26rft_id%253Dinfo%253Apmid%252F37926714%26rft.genre%253Darticle%26rft_val_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Ajournal%26ctx_ver%253DZ39.88-2004%26url_ver%253DZ39.88-2004%26url_ctx_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Actx\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-openurl cit-ref-sprinkles-open-url\u0022\u003E\u003Cspan\u003EOpenUrl\u003C\/span\u003E\u003C\/a\u003E\u003Ca href=\u0022\/lookup\/external-ref?access_num=37926714\u0026amp;link_type=MED\u0026amp;atom=%2Fmedrxiv%2Fearly%2F2026%2F03%2F03%2F2026.03.02.26347469.atom\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-medline\u0022\u003E\u003Cspan\u003EPubMed\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E7.\u003C\/span\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal no-rev-xref\u0022 id=\u0022cit-2026.03.02.26347469v1.7\u0022 data-doi=\u002210.1038\/s41467-024-52407-1\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003ESmail\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EC.\u003C\/span\u003E\u003C\/span\u003E \u003Cspan class=\u0022cit-etal\u0022\u003Eet al.\u003C\/span\u003E \u003Cspan class=\u0022cit-article-title\u0022\u003EComplex trait associations in rare diseases and impacts on Mendelian variant interpretation\u003C\/span\u003E. \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003ENat. Commun\u003C\/abbr\u003E. \u003Cspan class=\u0022cit-vol\u0022\u003E15\u003C\/span\u003E, \u003Cspan class=\u0022cit-fpage\u0022\u003E8196\u003C\/span\u003E (\u003Cspan class=\u0022cit-pub-date\u0022\u003E2024\u003C\/span\u003E).\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003Ca href=\u0022{openurl}?query=rft.jtitle%253DNat.%2BCommun%26rft.volume%253D15%26rft.spage%253D8196%26rft_id%253Dinfo%253Adoi%252F10.1038%252Fs41467-024-52407-1%26rft_id%253Dinfo%253Apmid%252F39294130%26rft.genre%253Darticle%26rft_val_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Ajournal%26ctx_ver%253DZ39.88-2004%26url_ver%253DZ39.88-2004%26url_ctx_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Actx\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-openurl cit-ref-sprinkles-open-url\u0022\u003E\u003Cspan\u003EOpenUrl\u003C\/span\u003E\u003C\/a\u003E\u003Ca href=\u0022\/lookup\/external-ref?access_num=10.1038\/s41467-024-52407-1\u0026amp;link_type=DOI\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-doi cit-ref-sprinkles-crossref\u0022\u003E\u003Cspan\u003ECrossRef\u003C\/span\u003E\u003C\/a\u003E\u003Ca href=\u0022\/lookup\/external-ref?access_num=39294130\u0026amp;link_type=MED\u0026amp;atom=%2Fmedrxiv%2Fearly%2F2026%2F03%2F03%2F2026.03.02.26347469.atom\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-medline\u0022\u003E\u003Cspan\u003EPubMed\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E8.\u003C\/span\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal no-rev-xref\u0022 id=\u0022cit-2026.03.02.26347469v1.8\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EChimirri\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EL.\u003C\/span\u003E\u003C\/span\u003E \u003Cspan class=\u0022cit-etal\u0022\u003Eet al.\u003C\/span\u003E \u003Cspan class=\u0022cit-article-title\u0022\u003EConsistent performance of large language models in rare disease diagnosis across ten languages and 4917 cases\u003C\/span\u003E. \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003EeBioMedicine\u003C\/abbr\u003E \u003Cspan class=\u0022cit-vol\u0022\u003E121\u003C\/span\u003E, \u003Cspan class=\u0022cit-issue\u0022\u003E105957\u003C\/span\u003E (\u003Cspan class=\u0022cit-pub-date\u0022\u003E2025\u003C\/span\u003E).\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E9.\u003C\/span\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal no-rev-xref\u0022 id=\u0022cit-2026.03.02.26347469v1.9\u0022 data-doi=\u002210.1093\/bib\/bbad172\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EZhai\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EW.\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EHuang\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EX.\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EShen\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EN.\u003C\/span\u003E\u003C\/span\u003E \u0026amp; \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EZhu\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003ES\u003C\/span\u003E\u003C\/span\u003E. \u003Cspan class=\u0022cit-article-title\u0022\u003EPhen2Disease: A phenotype-driven model for disease and gene prioritization by bidirectional maximum matching semantic similarities\u003C\/span\u003E. \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003EBrief. Bioinform\u003C\/abbr\u003E. \u003Cspan class=\u0022cit-vol\u0022\u003E24\u003C\/span\u003E, \u003Cspan class=\u0022cit-fpage\u0022\u003Ebbad172\u003C\/span\u003E (\u003Cspan class=\u0022cit-pub-date\u0022\u003E2023\u003C\/span\u003E).\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003Ca href=\u0022{openurl}?query=rft.jtitle%253DBrief.%2BBioinform%26rft.volume%253D24%26rft.spage%253Dbbad172%26rft_id%253Dinfo%253Adoi%252F10.1093%252Fbib%252Fbbad172%26rft_id%253Dinfo%253Apmid%252F37248747%26rft.genre%253Darticle%26rft_val_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Ajournal%26ctx_ver%253DZ39.88-2004%26url_ver%253DZ39.88-2004%26url_ctx_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Actx\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-openurl cit-ref-sprinkles-open-url\u0022\u003E\u003Cspan\u003EOpenUrl\u003C\/span\u003E\u003C\/a\u003E\u003Ca href=\u0022\/lookup\/external-ref?access_num=10.1093\/bib\/bbad172\u0026amp;link_type=DOI\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-doi cit-ref-sprinkles-crossref\u0022\u003E\u003Cspan\u003ECrossRef\u003C\/span\u003E\u003C\/a\u003E\u003Ca href=\u0022\/lookup\/external-ref?access_num=37248747\u0026amp;link_type=MED\u0026amp;atom=%2Fmedrxiv%2Fearly%2F2026%2F03%2F03%2F2026.03.02.26347469.atom\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-medline\u0022\u003E\u003Cspan\u003EPubMed\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E10.\u003C\/span\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal no-rev-xref\u0022 id=\u0022cit-2026.03.02.26347469v1.10\u0022 data-doi=\u002210.1038\/s41436-018-0072-y\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EJagadeesh\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EK. A.\u003C\/span\u003E\u003C\/span\u003E \u003Cspan class=\u0022cit-etal\u0022\u003Eet al.\u003C\/span\u003E \u003Cspan class=\u0022cit-article-title\u0022\u003EPhrank measures phenotype sets similarity to greatly improve Mendelian diagnostic disease prioritization\u003C\/span\u003E. \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003EGenet. Med\u003C\/abbr\u003E. \u003Cspan class=\u0022cit-vol\u0022\u003E21\u003C\/span\u003E, \u003Cspan class=\u0022cit-fpage\u0022\u003E464\u003C\/span\u003E\u2013\u003Cspan class=\u0022cit-lpage\u0022\u003E470\u003C\/span\u003E (\u003Cspan class=\u0022cit-pub-date\u0022\u003E2019\u003C\/span\u003E).\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003Ca href=\u0022{openurl}?query=rft.jtitle%253DGenet.%2BMed%26rft.volume%253D21%26rft.spage%253D464%26rft_id%253Dinfo%253Adoi%252F10.1038%252Fs41436-018-0072-y%26rft_id%253Dinfo%253Apmid%252F29997393%26rft.genre%253Darticle%26rft_val_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Ajournal%26ctx_ver%253DZ39.88-2004%26url_ver%253DZ39.88-2004%26url_ctx_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Actx\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-openurl cit-ref-sprinkles-open-url\u0022\u003E\u003Cspan\u003EOpenUrl\u003C\/span\u003E\u003C\/a\u003E\u003Ca href=\u0022\/lookup\/external-ref?access_num=10.1038\/s41436-018-0072-y\u0026amp;link_type=DOI\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-doi cit-ref-sprinkles-crossref\u0022\u003E\u003Cspan\u003ECrossRef\u003C\/span\u003E\u003C\/a\u003E\u003Ca href=\u0022\/lookup\/external-ref?access_num=29997393\u0026amp;link_type=MED\u0026amp;atom=%2Fmedrxiv%2Fearly%2F2026%2F03%2F03%2F2026.03.02.26347469.atom\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-medline\u0022\u003E\u003Cspan\u003EPubMed\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E11.\u003C\/span\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal no-rev-xref\u0022 id=\u0022cit-2026.03.02.26347469v1.11\u0022 data-doi=\u002210.1038\/s41436-019-0439-8\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003ELi\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EQ.\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EZhao\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EK.\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EBustamante\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EC. D.\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EMa\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EX.\u003C\/span\u003E\u003C\/span\u003E \u0026amp; \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EWong\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EW. H\u003C\/span\u003E\u003C\/span\u003E. \u003Cspan class=\u0022cit-article-title\u0022\u003EXrare: A machine learning method jointly modeling phenotypes and genetic evidence for rare disease diagnosis\u003C\/span\u003E. \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003EGenet. Med\u003C\/abbr\u003E. \u003Cspan class=\u0022cit-vol\u0022\u003E21\u003C\/span\u003E, \u003Cspan class=\u0022cit-fpage\u0022\u003E2126\u003C\/span\u003E\u2013\u003Cspan class=\u0022cit-lpage\u0022\u003E2134\u003C\/span\u003E (\u003Cspan class=\u0022cit-pub-date\u0022\u003E2019\u003C\/span\u003E).\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003Ca href=\u0022{openurl}?query=rft.jtitle%253DGenet.%2BMed%26rft.volume%253D21%26rft.spage%253D2126%26rft_id%253Dinfo%253Adoi%252F10.1038%252Fs41436-019-0439-8%26rft_id%253Dinfo%253Apmid%252F30675030%26rft.genre%253Darticle%26rft_val_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Ajournal%26ctx_ver%253DZ39.88-2004%26url_ver%253DZ39.88-2004%26url_ctx_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Actx\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-openurl cit-ref-sprinkles-open-url\u0022\u003E\u003Cspan\u003EOpenUrl\u003C\/span\u003E\u003C\/a\u003E\u003Ca href=\u0022\/lookup\/external-ref?access_num=10.1038\/s41436-019-0439-8\u0026amp;link_type=DOI\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-doi cit-ref-sprinkles-crossref\u0022\u003E\u003Cspan\u003ECrossRef\u003C\/span\u003E\u003C\/a\u003E\u003Ca href=\u0022\/lookup\/external-ref?access_num=30675030\u0026amp;link_type=MED\u0026amp;atom=%2Fmedrxiv%2Fearly%2F2026%2F03%2F03%2F2026.03.02.26347469.atom\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-medline\u0022\u003E\u003Cspan\u003EPubMed\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E12.\u003C\/span\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal no-rev-xref\u0022 id=\u0022cit-2026.03.02.26347469v1.12\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EJia\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EJ.\u003C\/span\u003E\u003C\/span\u003E \u003Cspan class=\u0022cit-etal\u0022\u003Eet al.\u003C\/span\u003E \u003Cspan class=\u0022cit-article-title\u0022\u003ERDAD: A machine learning system to support phenotype-based rare disease diagnosis\u003C\/span\u003E. \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003EFront. Genet\u003C\/abbr\u003E. \u003Cspan class=\u0022cit-vol\u0022\u003E9\u003C\/span\u003E, \u003Cspan class=\u0022cit-issue\u0022\u003E587\u003C\/span\u003E (\u003Cspan class=\u0022cit-pub-date\u0022\u003E2018\u003C\/span\u003E).\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E13.\u003C\/span\u003E\u003Cdiv class=\u0022cit ref-cit ref-book no-rev-xref\u0022 id=\u0022cit-2026.03.02.26347469v1.13\u0022 data-doi=\u002210.1109\/BIBM.2016.7822617\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EPeng\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EJ.\u003C\/span\u003E\u003C\/span\u003E \u003Cspan class=\u0022cit-etal\u0022\u003Eet al.\u003C\/span\u003E \u003Cspan class=\u0022cit-chapter-title\u0022\u003EMeasuring phenotype semantic similarity using Human Phenotype Ontology\u003C\/span\u003E. \u003Cspan class=\u0022cit-source\u0022\u003Ein 2016 IEEE International Conference on Bioinformatics and Biomedicine (BIBM)\u003C\/span\u003E \u003Cspan class=\u0022cit-fpage\u0022\u003E763\u003C\/span\u003E\u2013\u003Cspan class=\u0022cit-lpage\u0022\u003E766\u003C\/span\u003E (\u003Cspan class=\u0022cit-publ-name\u0022\u003EIEEE\u003C\/span\u003E, \u003Cspan class=\u0022cit-publ-loc\u0022\u003EShenzhen, China\u003C\/span\u003E, \u003Cspan class=\u0022cit-pub-date\u0022\u003E2016\u003C\/span\u003E).\u003Cspan class=\u0022cit-pub-id-sep cit-pub-id-doi-sep\u0022\u003E \u003C\/span\u003E\u003Cspan class=\u0022cit-pub-id-scheme\u0022\u003Edoi:\u003C\/span\u003E\u003Cspan class=\u0022cit-pub-id cit-pub-id-doi\u0022\u003E10.1109\/BIBM.2016.7822617\u003C\/span\u003E.\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003Ca href=\u0022{openurl}?query=rft.jtitle%253Din%2B2016%2BIEEE%2BInternational%2BConference%2Bon%2BBioinformatics%2Band%2BBiomedicine%2B%2528BIBM%2529%26rft_id%253Dinfo%253Adoi%252F10.1109%252FBIBM.2016.7822617%26rft.genre%253Darticle%26rft_val_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Ajournal%26ctx_ver%253DZ39.88-2004%26url_ver%253DZ39.88-2004%26url_ctx_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Actx\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-openurl cit-ref-sprinkles-open-url\u0022\u003E\u003Cspan\u003EOpenUrl\u003C\/span\u003E\u003C\/a\u003E\u003Ca href=\u0022\/lookup\/external-ref?access_num=10.1109\/BIBM.2016.7822617\u0026amp;link_type=DOI\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-doi cit-ref-sprinkles-crossref\u0022\u003E\u003Cspan\u003ECrossRef\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E14.\u003C\/span\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal no-rev-xref\u0022 id=\u0022cit-2026.03.02.26347469v1.14\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EZhao\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EM.\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-etal\u0022\u003Eet al.\u003C\/span\u003E \u003Cspan class=\u0022cit-article-title\u0022\u003EPhen2Gene: Rapid phenotype-driven gene prioritization for rare diseases\u003C\/span\u003E. \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003ENAR Genomics Bioinforma\u003C\/abbr\u003E. \u003Cspan class=\u0022cit-vol\u0022\u003E2\u003C\/span\u003E, \u003Cspan class=\u0022cit-fpage\u0022\u003Elqaa032\u003C\/span\u003E (\u003Cspan class=\u0022cit-pub-date\u0022\u003E2020\u003C\/span\u003E).\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003Ca href=\u0022{openurl}?query=rft.jtitle%253DNAR%2BGenomics%2BBioinforma%26rft.volume%253D2%26rft.spage%253D032lqaa%26rft.genre%253Darticle%26rft_val_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Ajournal%26ctx_ver%253DZ39.88-2004%26url_ver%253DZ39.88-2004%26url_ctx_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Actx\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-openurl cit-ref-sprinkles-open-url\u0022\u003E\u003Cspan\u003EOpenUrl\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E15.\u003C\/span\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal no-rev-xref\u0022 id=\u0022cit-2026.03.02.26347469v1.15\u0022 data-doi=\u002210.1371\/journal.pone.0170365\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EPavan\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003ES.\u003C\/span\u003E\u003C\/span\u003E \u003Cspan class=\u0022cit-etal\u0022\u003Eet al.\u003C\/span\u003E \u003Cspan class=\u0022cit-article-title\u0022\u003EClinical practice guidelines for rare diseases: The Orphanet database\u003C\/span\u003E. \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003EPLOS ONE\u003C\/abbr\u003E \u003Cspan class=\u0022cit-vol\u0022\u003E12\u003C\/span\u003E, \u003Cspan class=\u0022cit-fpage\u0022\u003Ee0170365\u003C\/span\u003E (\u003Cspan class=\u0022cit-pub-date\u0022\u003E2017\u003C\/span\u003E).\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003Ca href=\u0022{openurl}?query=rft.jtitle%253DPLOS%2BONE%26rft.volume%253D12%26rft.spage%253De0170365%26rft_id%253Dinfo%253Adoi%252F10.1371%252Fjournal.pone.0170365%26rft_id%253Dinfo%253Apmid%252F28099516%26rft.genre%253Darticle%26rft_val_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Ajournal%26ctx_ver%253DZ39.88-2004%26url_ver%253DZ39.88-2004%26url_ctx_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Actx\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-openurl cit-ref-sprinkles-open-url\u0022\u003E\u003Cspan\u003EOpenUrl\u003C\/span\u003E\u003C\/a\u003E\u003Ca href=\u0022\/lookup\/external-ref?access_num=10.1371\/journal.pone.0170365\u0026amp;link_type=DOI\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-doi cit-ref-sprinkles-crossref\u0022\u003E\u003Cspan\u003ECrossRef\u003C\/span\u003E\u003C\/a\u003E\u003Ca href=\u0022\/lookup\/external-ref?access_num=28099516\u0026amp;link_type=MED\u0026amp;atom=%2Fmedrxiv%2Fearly%2F2026%2F03%2F03%2F2026.03.02.26347469.atom\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-medline\u0022\u003E\u003Cspan\u003EPubMed\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E16.\u003C\/span\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal no-rev-xref\u0022 id=\u0022cit-2026.03.02.26347469v1.16\u0022 data-doi=\u002210.1093\/nar\/gki033\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EHamosh\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EA\u003C\/span\u003E\u003C\/span\u003E. \u003Cspan class=\u0022cit-article-title\u0022\u003EOnline Mendelian Inheritance in Man (OMIM), a knowledgebase of human genes and genetic disorders\u003C\/span\u003E. \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003ENucleic Acids Res\u003C\/abbr\u003E. \u003Cspan class=\u0022cit-vol\u0022\u003E33\u003C\/span\u003E, \u003Cspan class=\u0022cit-fpage\u0022\u003ED514\u003C\/span\u003E\u2013\u003Cspan class=\u0022cit-lpage\u0022\u003ED517\u003C\/span\u003E (\u003Cspan class=\u0022cit-pub-date\u0022\u003E2004\u003C\/span\u003E).\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003Ca href=\u0022{openurl}?query=rft.jtitle%253DNucleic%2BAcids%2BRes%26rft.volume%253D33%26rft.spage%253DD514%26rft_id%253Dinfo%253Adoi%252F10.1093%252Fnar%252Fgki033%26rft.genre%253Darticle%26rft_val_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Ajournal%26ctx_ver%253DZ39.88-2004%26url_ver%253DZ39.88-2004%26url_ctx_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Actx\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-openurl cit-ref-sprinkles-open-url\u0022\u003E\u003Cspan\u003EOpenUrl\u003C\/span\u003E\u003C\/a\u003E\u003Ca href=\u0022\/lookup\/external-ref?access_num=10.1093\/nar\/gki033\u0026amp;link_type=DOI\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-doi cit-ref-sprinkles-crossref\u0022\u003E\u003Cspan\u003ECrossRef\u003C\/span\u003E\u003C\/a\u003E\u003Ca href=\u0022\/lookup\/external-ref?access_num=000226524300106\u0026amp;link_type=ISI\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-newisilink cit-ref-sprinkles-webofscience\u0022\u003E\u003Cspan\u003EWeb of Science\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E17.\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-17-1\u0022 title=\u0022View reference 17. in text\u0022 id=\u0022ref-17\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal\u0022 id=\u0022cit-2026.03.02.26347469v1.17\u0022 data-doi=\u002210.1093\/nar\/gkv1222\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003ELandrum\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EM. J.\u003C\/span\u003E\u003C\/span\u003E \u003Cspan class=\u0022cit-etal\u0022\u003Eet al.\u003C\/span\u003E \u003Cspan class=\u0022cit-article-title\u0022\u003EClinVar: Public archive of interpretations of clinically relevant variants\u003C\/span\u003E. \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003ENucleic Acids Res\u003C\/abbr\u003E. \u003Cspan class=\u0022cit-vol\u0022\u003E44\u003C\/span\u003E, \u003Cspan class=\u0022cit-fpage\u0022\u003ED862\u003C\/span\u003E\u2013\u003Cspan class=\u0022cit-lpage\u0022\u003ED868\u003C\/span\u003E (\u003Cspan class=\u0022cit-pub-date\u0022\u003E2016\u003C\/span\u003E).\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003Ca href=\u0022{openurl}?query=rft.jtitle%253DNucleic%2BAcids%2BRes%26rft_id%253Dinfo%253Adoi%252F10.1093%252Fnar%252Fgkv1222%26rft_id%253Dinfo%253Apmid%252F26582918%26rft.genre%253Darticle%26rft_val_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Ajournal%26ctx_ver%253DZ39.88-2004%26url_ver%253DZ39.88-2004%26url_ctx_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Actx\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-openurl cit-ref-sprinkles-open-url\u0022\u003E\u003Cspan\u003EOpenUrl\u003C\/span\u003E\u003C\/a\u003E\u003Ca href=\u0022\/lookup\/external-ref?access_num=10.1093\/nar\/gkv1222\u0026amp;link_type=DOI\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-doi cit-ref-sprinkles-crossref\u0022\u003E\u003Cspan\u003ECrossRef\u003C\/span\u003E\u003C\/a\u003E\u003Ca href=\u0022\/lookup\/external-ref?access_num=26582918\u0026amp;link_type=MED\u0026amp;atom=%2Fmedrxiv%2Fearly%2F2026%2F03%2F03%2F2026.03.02.26347469.atom\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-medline\u0022\u003E\u003Cspan\u003EPubMed\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E18.\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-18-1\u0022 title=\u0022View reference 18. in text\u0022 id=\u0022ref-18\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal\u0022 id=\u0022cit-2026.03.02.26347469v1.18\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EShourick\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EJ.\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EWack\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EM.\u003C\/span\u003E\u003C\/span\u003E \u0026amp; \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EJannot\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EA.-S\u003C\/span\u003E\u003C\/span\u003E. \u003Cspan class=\u0022cit-article-title\u0022\u003EAssessing rare diseases prevalence using literature quantification\u003C\/span\u003E. \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003EOrphanet J. Rare Dis\u003C\/abbr\u003E. \u003Cspan class=\u0022cit-vol\u0022\u003E16\u003C\/span\u003E, \u003Cspan class=\u0022cit-issue\u0022\u003E139\u003C\/span\u003E (\u003Cspan class=\u0022cit-pub-date\u0022\u003E2021\u003C\/span\u003E).\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E19.\u003C\/span\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal no-rev-xref\u0022 id=\u0022cit-2026.03.02.26347469v1.19\u0022 data-doi=\u002210.1038\/s41591-024-03190-5\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EIba\u00f1ez\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EK.\u003C\/span\u003E\u003C\/span\u003E \u003Cspan class=\u0022cit-etal\u0022\u003Eet al.\u003C\/span\u003E \u003Cspan class=\u0022cit-article-title\u0022\u003EIncreased frequency of repeat expansion mutations across different populations\u003C\/span\u003E. \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003ENat. Med\u003C\/abbr\u003E. \u003Cspan class=\u0022cit-vol\u0022\u003E30\u003C\/span\u003E, \u003Cspan class=\u0022cit-fpage\u0022\u003E3357\u003C\/span\u003E\u2013\u003Cspan class=\u0022cit-lpage\u0022\u003E3368\u003C\/span\u003E (\u003Cspan class=\u0022cit-pub-date\u0022\u003E2024\u003C\/span\u003E).\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003Ca href=\u0022{openurl}?query=rft.jtitle%253DNat.%2BMed%26rft.volume%253D30%26rft.spage%253D3357%26rft_id%253Dinfo%253Adoi%252F10.1038%252Fs41591-024-03190-5%26rft_id%253Dinfo%253Apmid%252F39354197%26rft.genre%253Darticle%26rft_val_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Ajournal%26ctx_ver%253DZ39.88-2004%26url_ver%253DZ39.88-2004%26url_ctx_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Actx\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-openurl cit-ref-sprinkles-open-url\u0022\u003E\u003Cspan\u003EOpenUrl\u003C\/span\u003E\u003C\/a\u003E\u003Ca href=\u0022\/lookup\/external-ref?access_num=10.1038\/s41591-024-03190-5\u0026amp;link_type=DOI\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-doi cit-ref-sprinkles-crossref\u0022\u003E\u003Cspan\u003ECrossRef\u003C\/span\u003E\u003C\/a\u003E\u003Ca href=\u0022\/lookup\/external-ref?access_num=39354197\u0026amp;link_type=MED\u0026amp;atom=%2Fmedrxiv%2Fearly%2F2026%2F03%2F03%2F2026.03.02.26347469.atom\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-medline\u0022\u003E\u003Cspan\u003EPubMed\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E20.\u003C\/span\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal no-rev-xref\u0022 id=\u0022cit-2026.03.02.26347469v1.20\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EKariampuzha\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EW. Z.\u003C\/span\u003E\u003C\/span\u003E \u003Cspan class=\u0022cit-etal\u0022\u003Eet al.\u003C\/span\u003E \u003Cspan class=\u0022cit-article-title\u0022\u003EPrecision information extraction for rare disease epidemiology at scale\u003C\/span\u003E. \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003EJ. Transl. Med\u003C\/abbr\u003E. \u003Cspan class=\u0022cit-vol\u0022\u003E21\u003C\/span\u003E, \u003Cspan class=\u0022cit-issue\u0022\u003E157\u003C\/span\u003E (\u003Cspan class=\u0022cit-pub-date\u0022\u003E2023\u003C\/span\u003E).\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E21.\u003C\/span\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal no-rev-xref\u0022 id=\u0022cit-2026.03.02.26347469v1.21\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003ENicholson\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003ED. N.\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EHimmelstein\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003ED. S.\u003C\/span\u003E\u003C\/span\u003E \u0026amp; \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EGreene\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EC. S\u003C\/span\u003E\u003C\/span\u003E. \u003Cspan class=\u0022cit-article-title\u0022\u003EExpanding a database-derived biomedical knowledge graph via multi-relation extraction from biomedical abstracts\u003C\/span\u003E. \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003EBioData Min\u003C\/abbr\u003E. \u003Cspan class=\u0022cit-vol\u0022\u003E15\u003C\/span\u003E, \u003Cspan class=\u0022cit-issue\u0022\u003E26\u003C\/span\u003E (\u003Cspan class=\u0022cit-pub-date\u0022\u003E2022\u003C\/span\u003E).\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E22.\u003C\/span\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal no-rev-xref\u0022 id=\u0022cit-2026.03.02.26347469v1.22\u0022 data-doi=\u002210.1093\/nar\/gku1205\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EAmberger\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EJ. S.\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EBocchini\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EC. A.\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003ESchiettecatte\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EF.\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EScott\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EA. F.\u003C\/span\u003E\u003C\/span\u003E \u0026amp; \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EHamosh\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EA. OMIM\u003C\/span\u003E\u003C\/span\u003E.\u003Cspan class=\u0022cit-article-title\u0022\u003Eorg: Online Mendelian Inheritance in Man (OMIM\u00ae), an online catalog of human genes and genetic disorders\u003C\/span\u003E. \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003ENucleic Acids Res\u003C\/abbr\u003E. \u003Cspan class=\u0022cit-vol\u0022\u003E43\u003C\/span\u003E, \u003Cspan class=\u0022cit-fpage\u0022\u003ED789\u003C\/span\u003E\u2013\u003Cspan class=\u0022cit-lpage\u0022\u003ED798\u003C\/span\u003E (\u003Cspan class=\u0022cit-pub-date\u0022\u003E2015\u003C\/span\u003E).\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003Ca href=\u0022{openurl}?query=rft.jtitle%253DNucleic%2BAcids%2BRes%26rft_id%253Dinfo%253Adoi%252F10.1093%252Fnar%252Fgku1205%26rft_id%253Dinfo%253Apmid%252F25428349%26rft.genre%253Darticle%26rft_val_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Ajournal%26ctx_ver%253DZ39.88-2004%26url_ver%253DZ39.88-2004%26url_ctx_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Actx\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-openurl cit-ref-sprinkles-open-url\u0022\u003E\u003Cspan\u003EOpenUrl\u003C\/span\u003E\u003C\/a\u003E\u003Ca href=\u0022\/lookup\/external-ref?access_num=10.1093\/nar\/gku1205\u0026amp;link_type=DOI\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-doi cit-ref-sprinkles-crossref\u0022\u003E\u003Cspan\u003ECrossRef\u003C\/span\u003E\u003C\/a\u003E\u003Ca href=\u0022\/lookup\/external-ref?access_num=25428349\u0026amp;link_type=MED\u0026amp;atom=%2Fmedrxiv%2Fearly%2F2026%2F03%2F03%2F2026.03.02.26347469.atom\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-medline\u0022\u003E\u003Cspan\u003EPubMed\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E23.\u003C\/span\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal no-rev-xref\u0022 id=\u0022cit-2026.03.02.26347469v1.23\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EVasilevsky\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EN. A.\u003C\/span\u003E\u003C\/span\u003E \u003Cspan class=\u0022cit-etal\u0022\u003Eet al.\u003C\/span\u003E \u003Cspan class=\u0022cit-article-title\u0022\u003EMondo: Integrating disease terminology across communities\u003C\/span\u003E. \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003EGENETICS iya\u003C\/abbr\u003E\u003Cspan class=\u0022cit-issue\u0022\u003Ef215\u003C\/span\u003E (\u003Cspan class=\u0022cit-pub-date\u0022\u003E2025\u003C\/span\u003E).\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E24.\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-24-1\u0022 title=\u0022View reference 24. in text\u0022 id=\u0022ref-24\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal\u0022 id=\u0022cit-2026.03.02.26347469v1.24\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EMart\u00ednez-deMiguel\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EC.\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003ESegura-Bedmar\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EI.\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EChac\u00f3n-Solano\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EE.\u003C\/span\u003E\u003C\/span\u003E \u0026amp; \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EGuerrero-Aspizua\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003ES\u003C\/span\u003E\u003C\/span\u003E. \u003Cspan class=\u0022cit-article-title\u0022\u003EThe RareDis corpus: A corpus annotated with rare diseases, their signs and symptoms\u003C\/span\u003E. \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003EJ. Biomed. Inform\u003C\/abbr\u003E. \u003Cspan class=\u0022cit-vol\u0022\u003E125\u003C\/span\u003E, \u003Cspan class=\u0022cit-issue\u0022\u003E103961\u003C\/span\u003E (\u003Cspan class=\u0022cit-pub-date\u0022\u003E2022\u003C\/span\u003E).\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E25.\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-25-1\u0022 title=\u0022View reference 25. in text\u0022 id=\u0022ref-25\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal\u0022 id=\u0022cit-2026.03.02.26347469v1.25\u0022 data-doi=\u002210.1002\/ajmg.a.63878\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EYoung\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EC. C.\u003C\/span\u003E\u003C\/span\u003E \u003Cspan class=\u0022cit-etal\u0022\u003Eet al.\u003C\/span\u003E \u003Cspan class=\u0022cit-article-title\u0022\u003EDiagnostic accuracy of a custom large language model on rare pediatric disease case reports\u003C\/span\u003E. \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003EAm. J. Med. Genet. A\u003C\/abbr\u003E. \u003Cspan class=\u0022cit-vol\u0022\u003E197\u003C\/span\u003E, \u003Cspan class=\u0022cit-fpage\u0022\u003Ee63878\u003C\/span\u003E (\u003Cspan class=\u0022cit-pub-date\u0022\u003E2025\u003C\/span\u003E).\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003Ca href=\u0022{openurl}?query=rft.jtitle%253DAm.%2BJ.%2BMed.%2BGenet.%2BA%26rft.volume%253D197%26rft.spage%253De63878%26rft_id%253Dinfo%253Adoi%252F10.1002%252Fajmg.a.63878%26rft_id%253Dinfo%253Apmid%252F39268988%26rft.genre%253Darticle%26rft_val_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Ajournal%26ctx_ver%253DZ39.88-2004%26url_ver%253DZ39.88-2004%26url_ctx_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Actx\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-openurl cit-ref-sprinkles-open-url\u0022\u003E\u003Cspan\u003EOpenUrl\u003C\/span\u003E\u003C\/a\u003E\u003Ca href=\u0022\/lookup\/external-ref?access_num=10.1002\/ajmg.a.63878\u0026amp;link_type=DOI\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-doi cit-ref-sprinkles-crossref\u0022\u003E\u003Cspan\u003ECrossRef\u003C\/span\u003E\u003C\/a\u003E\u003Ca href=\u0022\/lookup\/external-ref?access_num=39268988\u0026amp;link_type=MED\u0026amp;atom=%2Fmedrxiv%2Fearly%2F2026%2F03%2F03%2F2026.03.02.26347469.atom\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-medline\u0022\u003E\u003Cspan\u003EPubMed\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E26.\u003C\/span\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal no-rev-xref\u0022 id=\u0022cit-2026.03.02.26347469v1.26\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EShyr\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EC.\u003C\/span\u003E\u003C\/span\u003E \u003Cspan class=\u0022cit-etal\u0022\u003Eet al.\u003C\/span\u003E \u003Cspan class=\u0022cit-article-title\u0022\u003EIdentifying and extracting rare diseases and their phenotypes with large language models\u003C\/span\u003E. \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003EJ. Healthc. Inform. Res\u003C\/abbr\u003E. \u003Cspan class=\u0022cit-vol\u0022\u003E8\u003C\/span\u003E, \u003Cspan class=\u0022cit-fpage\u0022\u003E438\u003C\/span\u003E\u2013\u003Cspan class=\u0022cit-lpage\u0022\u003E461\u003C\/span\u003E (\u003Cspan class=\u0022cit-pub-date\u0022\u003E2024\u003C\/span\u003E).\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003Ca href=\u0022{openurl}?query=rft.jtitle%253DJ.%2BHealthc.%2BInform.%2BRes%26rft.volume%253D8%26rft.spage%253D438%26rft_id%253Dinfo%253Apmid%252F38681753%26rft.genre%253Darticle%26rft_val_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Ajournal%26ctx_ver%253DZ39.88-2004%26url_ver%253DZ39.88-2004%26url_ctx_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Actx\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-openurl cit-ref-sprinkles-open-url\u0022\u003E\u003Cspan\u003EOpenUrl\u003C\/span\u003E\u003C\/a\u003E\u003Ca href=\u0022\/lookup\/external-ref?access_num=38681753\u0026amp;link_type=MED\u0026amp;atom=%2Fmedrxiv%2Fearly%2F2026%2F03%2F03%2F2026.03.02.26347469.atom\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-medline\u0022\u003E\u003Cspan\u003EPubMed\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E27.\u003C\/span\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal no-rev-xref\u0022 id=\u0022cit-2026.03.02.26347469v1.27\u0022 data-doi=\u002210.1001\/jamanetworkopen.2025.28538\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EShyr\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EC.\u003C\/span\u003E\u003C\/span\u003E \u003Cspan class=\u0022cit-etal\u0022\u003Eet al.\u003C\/span\u003E \u003Cspan class=\u0022cit-article-title\u0022\u003ELarge language models for rare disease diagnosis at the undiagnosed diseases network\u003C\/span\u003E. \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003EJAMA Netw. Open\u003C\/abbr\u003E \u003Cspan class=\u0022cit-vol\u0022\u003E8\u003C\/span\u003E, \u003Cspan class=\u0022cit-fpage\u0022\u003Ee2528538\u003C\/span\u003E (\u003Cspan class=\u0022cit-pub-date\u0022\u003E2025\u003C\/span\u003E).\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003Ca href=\u0022{openurl}?query=rft.jtitle%253DJAMA%2BNetw.%2BOpen%26rft.volume%253D8%26rft.spage%253D2528538e%26rft_id%253Dinfo%253Adoi%252F10.1001%252Fjamanetworkopen.2025.28538%26rft.genre%253Darticle%26rft_val_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Ajournal%26ctx_ver%253DZ39.88-2004%26url_ver%253DZ39.88-2004%26url_ctx_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Actx\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-openurl cit-ref-sprinkles-open-url\u0022\u003E\u003Cspan\u003EOpenUrl\u003C\/span\u003E\u003C\/a\u003E\u003Ca href=\u0022\/lookup\/external-ref?access_num=10.1001\/jamanetworkopen.2025.28538\u0026amp;link_type=DOI\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-doi cit-ref-sprinkles-crossref\u0022\u003E\u003Cspan\u003ECrossRef\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E28.\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-28-1\u0022 title=\u0022View reference 28. in text\u0022 id=\u0022ref-28\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal\u0022 id=\u0022cit-2026.03.02.26347469v1.28\u0022 data-doi=\u002210.1038\/s41586-025-10097-9\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EZhao\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EW.\u003C\/span\u003E\u003C\/span\u003E \u003Cspan class=\u0022cit-etal\u0022\u003Eet al.\u003C\/span\u003E \u003Cspan class=\u0022cit-article-title\u0022\u003EAn agentic system for rare disease diagnosis with traceable reasoning\u003C\/span\u003E. \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003ENature\u003C\/abbr\u003E\u003Cspan class=\u0022cit-pub-id-sep cit-pub-id-doi-sep\u0022\u003E \u003C\/span\u003E\u003Cspan class=\u0022cit-pub-id cit-pub-id-doi\u0022\u003E\u003Cspan class=\u0022cit-pub-id-scheme-doi\u0022\u003Edoi:\u003C\/span\u003E10.1038\/s41586-025-10097-9\u003C\/span\u003E (\u003Cspan class=\u0022cit-pub-date\u0022\u003E2026\u003C\/span\u003E) doi:10.1038\/s41586-025-10097-9.\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003Ca href=\u0022{openurl}?query=rft.jtitle%253DNature%26rft_id%253Dinfo%253Adoi%252F10.1038%252Fs41586-025-10097-9%26rft.genre%253Darticle%26rft_val_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Ajournal%26ctx_ver%253DZ39.88-2004%26url_ver%253DZ39.88-2004%26url_ctx_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Actx\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-openurl cit-ref-sprinkles-open-url\u0022\u003E\u003Cspan\u003EOpenUrl\u003C\/span\u003E\u003C\/a\u003E\u003Ca href=\u0022\/lookup\/external-ref?access_num=10.1038\/s41586-025-10097-9\u0026amp;link_type=DOI\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-doi cit-ref-sprinkles-crossref\u0022\u003E\u003Cspan\u003ECrossRef\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E29.\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-29-1\u0022 title=\u0022View reference 29. in text\u0022 id=\u0022ref-29\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-book\u0022 id=\u0022cit-2026.03.02.26347469v1.29\u0022 data-doi=\u002210.1145\/3637528.3671576\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EChen\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EX.\u003C\/span\u003E\u003C\/span\u003E \u003Cspan class=\u0022cit-etal\u0022\u003Eet al.\u003C\/span\u003E \u003Cspan class=\u0022cit-chapter-title\u0022\u003ERareBench: Can LLMs Serve as Rare Diseases Specialists?\u003C\/span\u003E \u003Cspan class=\u0022cit-source\u0022\u003EIn Proceedings of the 30th ACM SIGKDD Conference on Knowledge Discovery and Data Mining\u003C\/span\u003E \u003Cspan class=\u0022cit-fpage\u0022\u003E4850\u003C\/span\u003E\u2013\u003Cspan class=\u0022cit-lpage\u0022\u003E4861\u003C\/span\u003E (\u003Cspan class=\u0022cit-publ-name\u0022\u003EACM\u003C\/span\u003E, \u003Cspan class=\u0022cit-publ-loc\u0022\u003EBarcelona Spain\u003C\/span\u003E, \u003Cspan class=\u0022cit-pub-date\u0022\u003E2024\u003C\/span\u003E).\u003Cspan class=\u0022cit-pub-id-sep cit-pub-id-doi-sep\u0022\u003E \u003C\/span\u003E\u003Cspan class=\u0022cit-pub-id-scheme\u0022\u003Edoi:\u003C\/span\u003E\u003Cspan class=\u0022cit-pub-id cit-pub-id-doi\u0022\u003E10.1145\/3637528.3671576\u003C\/span\u003E.\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003Ca href=\u0022{openurl}?query=rft.jtitle%253DIn%2BProceedings%2Bof%2Bthe%2B30th%2BACM%2BSIGKDD%2BConference%2Bon%2BKnowledge%2BDiscovery%2Band%2BData%2BMining%26rft_id%253Dinfo%253Adoi%252F10.1145%252F3637528.3671576%26rft.genre%253Darticle%26rft_val_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Ajournal%26ctx_ver%253DZ39.88-2004%26url_ver%253DZ39.88-2004%26url_ctx_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Actx\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-openurl cit-ref-sprinkles-open-url\u0022\u003E\u003Cspan\u003EOpenUrl\u003C\/span\u003E\u003C\/a\u003E\u003Ca href=\u0022\/lookup\/external-ref?access_num=10.1145\/3637528.3671576\u0026amp;link_type=DOI\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-doi cit-ref-sprinkles-crossref\u0022\u003E\u003Cspan\u003ECrossRef\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E30.\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-30-1\u0022 title=\u0022View reference 30. in text\u0022 id=\u0022ref-30\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal\u0022 id=\u0022cit-2026.03.02.26347469v1.30\u0022 data-doi=\u002210.48550\/arXiv.2510.10161\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EPang\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EL.\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-etal\u0022\u003Eet al.\u003C\/span\u003E \u003Cspan class=\u0022cit-article-title\u0022\u003ELarge language model sourcing: A survey\u003C\/span\u003E. Preprint at\u003Cspan class=\u0022cit-pub-id-sep cit-pub-id-doi-sep\u0022\u003E \u003C\/span\u003E\u003Cspan class=\u0022cit-pub-id cit-pub-id-doi\u0022\u003E\u003Cspan class=\u0022cit-pub-id-scheme-doi\u0022\u003Edoi:\u003C\/span\u003E10.48550\/arXiv.2510.10161\u003C\/span\u003E (\u003Cspan class=\u0022cit-pub-date\u0022\u003E2025\u003C\/span\u003E).\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003Ca href=\u0022{openurl}?query=rft_id%253Dinfo%253Adoi%252F10.48550%252FarXiv.2510.10161%26rft.genre%253Darticle%26rft_val_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Ajournal%26ctx_ver%253DZ39.88-2004%26url_ver%253DZ39.88-2004%26url_ctx_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Actx\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-openurl cit-ref-sprinkles-open-url\u0022\u003E\u003Cspan\u003EOpenUrl\u003C\/span\u003E\u003C\/a\u003E\u003Ca href=\u0022\/lookup\/external-ref?access_num=10.48550\/arXiv.2510.10161\u0026amp;link_type=DOI\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-doi cit-ref-sprinkles-crossref\u0022\u003E\u003Cspan\u003ECrossRef\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E31.\u003C\/span\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal no-rev-xref\u0022 id=\u0022cit-2026.03.02.26347469v1.31\u0022 data-doi=\u002210.48550\/ARXIV.2504.13079\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EWang\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EH.\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EPrasad\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EA\u003C\/span\u003E\u003C\/span\u003E., \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EStengel-Eskin\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EE.\u003C\/span\u003E\u003C\/span\u003E \u0026amp; \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EBansal\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EM.\u003C\/span\u003E\u003C\/span\u003E \u003Cspan class=\u0022cit-article-title\u0022\u003ERetrieval-augmented generation with conflicting evidence\u003C\/span\u003E. Preprint at\u003Cspan class=\u0022cit-pub-id-sep cit-pub-id-doi-sep\u0022\u003E \u003C\/span\u003E\u003Cspan class=\u0022cit-pub-id cit-pub-id-doi\u0022\u003E\u003Cspan class=\u0022cit-pub-id-scheme-doi\u0022\u003Edoi:\u003C\/span\u003E10.48550\/ARXIV.2504.13079\u003C\/span\u003E (\u003Cspan class=\u0022cit-pub-date\u0022\u003E2025\u003C\/span\u003E).\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003Ca href=\u0022{openurl}?query=rft_id%253Dinfo%253Adoi%252F10.48550%252FARXIV.2504.13079%26rft.genre%253Darticle%26rft_val_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Ajournal%26ctx_ver%253DZ39.88-2004%26url_ver%253DZ39.88-2004%26url_ctx_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Actx\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-openurl cit-ref-sprinkles-open-url\u0022\u003E\u003Cspan\u003EOpenUrl\u003C\/span\u003E\u003C\/a\u003E\u003Ca href=\u0022\/lookup\/external-ref?access_num=10.48550\/ARXIV.2504.13079\u0026amp;link_type=DOI\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-doi cit-ref-sprinkles-crossref\u0022\u003E\u003Cspan\u003ECrossRef\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E32.\u003C\/span\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal no-rev-xref\u0022 id=\u0022cit-2026.03.02.26347469v1.32\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EMasanneck\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EL.\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EMeuth\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003ES. G.\u003C\/span\u003E\u003C\/span\u003E \u0026amp; \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EPawlitzki\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EM\u003C\/span\u003E\u003C\/span\u003E. \u003Cspan class=\u0022cit-article-title\u0022\u003EEvaluating base and retrieval augmented LLMs with document or online support for evidence based neurology\u003C\/span\u003E. \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003ENpj Digit. Med\u003C\/abbr\u003E. \u003Cspan class=\u0022cit-vol\u0022\u003E8\u003C\/span\u003E, \u003Cspan class=\u0022cit-issue\u0022\u003E137\u003C\/span\u003E (\u003Cspan class=\u0022cit-pub-date\u0022\u003E2025\u003C\/span\u003E).\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E33.\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-33-1\u0022 title=\u0022View reference 33. in text\u0022 id=\u0022ref-33\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal\u0022 id=\u0022cit-2026.03.02.26347469v1.33\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EWang\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003ES.\u003C\/span\u003E\u003C\/span\u003E \u003Cspan class=\u0022cit-etal\u0022\u003Eet al.\u003C\/span\u003E \u003Cspan class=\u0022cit-article-title\u0022\u003EKnowledge editing for large language models: A survey\u003C\/span\u003E. \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003EACM Comput. Surv\u003C\/abbr\u003E. \u003Cspan class=\u0022cit-vol\u0022\u003E57\u003C\/span\u003E, \u003Cspan class=\u0022cit-fpage\u0022\u003E1\u003C\/span\u003E\u2013\u003Cspan class=\u0022cit-lpage\u0022\u003E37\u003C\/span\u003E (\u003Cspan class=\u0022cit-pub-date\u0022\u003E2025\u003C\/span\u003E).\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003Ca href=\u0022{openurl}?query=rft.jtitle%253DACM%2BComput.%2BSurv%26rft.volume%253D57%26rft.spage%253D1%26rft.genre%253Darticle%26rft_val_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Ajournal%26ctx_ver%253DZ39.88-2004%26url_ver%253DZ39.88-2004%26url_ctx_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Actx\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-openurl cit-ref-sprinkles-open-url\u0022\u003E\u003Cspan\u003EOpenUrl\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E34.\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-34-1\u0022 title=\u0022View reference 34. in text\u0022 id=\u0022ref-34\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-web\u0022 id=\u0022cit-2026.03.02.26347469v1.34\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EYoran\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EO.\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EWolfson\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003ET.\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003ERam\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EO.\u003C\/span\u003E\u003C\/span\u003E \u0026amp; \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EBerant\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EJ.\u003C\/span\u003E\u003C\/span\u003E \u003Cspan class=\u0022cit-article-title\u0022\u003EMaking retrieval-augmented language models robust to irrelevant context\u003C\/span\u003E. Preprint at\u003Cspan class=\u0022cit-pub-id-sep cit-pub-id-doi-sep\u0022\u003E \u003C\/span\u003E\u003Cspan class=\u0022cit-pub-id cit-pub-id-doi\u0022\u003E\u003Cspan class=\u0022cit-pub-id-scheme-doi\u0022\u003Edoi:\u003C\/span\u003E10.48550\/arXiv.2310.01558\u003C\/span\u003E (\u003Cspan class=\u0022cit-pub-date\u0022\u003E2024\u003C\/span\u003E).\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E35.\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-35-1\u0022 title=\u0022View reference 35. in text\u0022 id=\u0022ref-35\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal\u0022 id=\u0022cit-2026.03.02.26347469v1.35\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EWu\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EK.\u003C\/span\u003E\u003C\/span\u003E \u003Cspan class=\u0022cit-etal\u0022\u003Eet al.\u003C\/span\u003E \u003Cspan class=\u0022cit-article-title\u0022\u003EAn automated framework for assessing how well LLMs cite relevant medical references\u003C\/span\u003E. \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003ENat. Commun\u003C\/abbr\u003E. \u003Cspan class=\u0022cit-vol\u0022\u003E16\u003C\/span\u003E, \u003Cspan class=\u0022cit-fpage\u0022\u003E3615\u003C\/span\u003E (\u003Cspan class=\u0022cit-pub-date\u0022\u003E2025\u003C\/span\u003E).\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003Ca href=\u0022{openurl}?query=rft.jtitle%253DNat.%2BCommun%26rft.volume%253D16%26rft.spage%253D3615%26rft_id%253Dinfo%253Apmid%252F40240349%26rft.genre%253Darticle%26rft_val_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Ajournal%26ctx_ver%253DZ39.88-2004%26url_ver%253DZ39.88-2004%26url_ctx_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Actx\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-openurl cit-ref-sprinkles-open-url\u0022\u003E\u003Cspan\u003EOpenUrl\u003C\/span\u003E\u003C\/a\u003E\u003Ca href=\u0022\/lookup\/external-ref?access_num=40240349\u0026amp;link_type=MED\u0026amp;atom=%2Fmedrxiv%2Fearly%2F2026%2F03%2F03%2F2026.03.02.26347469.atom\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-medline\u0022\u003E\u003Cspan\u003EPubMed\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E36.\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-36-1\u0022 title=\u0022View reference 36. in text\u0022 id=\u0022ref-36\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-web\u0022 id=\u0022cit-2026.03.02.26347469v1.36\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003ERahman\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003ES.\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-etal\u0022\u003Eet al.\u003C\/span\u003E \u003Cspan class=\u0022cit-article-title\u0022\u003EGeneralization in healthcare AI: Evaluation of a clinical large language model\u003C\/span\u003E. Preprint at\u003Cspan class=\u0022cit-pub-id-sep cit-pub-id-doi-sep\u0022\u003E \u003C\/span\u003E\u003Cspan class=\u0022cit-pub-id cit-pub-id-doi\u0022\u003E\u003Cspan class=\u0022cit-pub-id-scheme-doi\u0022\u003Edoi:\u003C\/span\u003E10.48550\/arXiv.2402.10965\u003C\/span\u003E (\u003Cspan class=\u0022cit-pub-date\u0022\u003E2024\u003C\/span\u003E).\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E37.\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-37-1\u0022 title=\u0022View reference 37. in text\u0022 id=\u0022ref-37\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal\u0022 id=\u0022cit-2026.03.02.26347469v1.37\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EPeng\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EY.\u003C\/span\u003E\u003C\/span\u003E \u003Cspan class=\u0022cit-etal\u0022\u003Eet al.\u003C\/span\u003E \u003Cspan class=\u0022cit-article-title\u0022\u003EFrom GPT to DeepSeek: Significant gaps remain in realizing AI in healthcare\u003C\/span\u003E. \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003EJ. Biomed. Inform\u003C\/abbr\u003E. \u003Cspan class=\u0022cit-vol\u0022\u003E163\u003C\/span\u003E, \u003Cspan class=\u0022cit-issue\u0022\u003E104791\u003C\/span\u003E (\u003Cspan class=\u0022cit-pub-date\u0022\u003E2025\u003C\/span\u003E).\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E38.\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-38-1\u0022 title=\u0022View reference 38. in text\u0022 id=\u0022ref-38\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-web\u0022 id=\u0022cit-2026.03.02.26347469v1.38\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-article-title\u0022\u003ENational Organization for Rare Disorders. Rare disease database\u003C\/span\u003E. \u003Ca href=\u0022https:\/\/rarediseases.org\/rare-diseases\/\u0022\u003Ehttps:\/\/rarediseases.org\/rare-diseases\/\u003C\/a\u003E (\u003Cspan class=\u0022cit-pub-date\u0022\u003E2025\u003C\/span\u003E).\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E39.\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-39-1\u0022 title=\u0022View reference 39. in text\u0022 id=\u0022ref-39\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-web\u0022 id=\u0022cit-2026.03.02.26347469v1.39\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth cit-collab\u0022\u003EAnthropic\u003C\/span\u003E. \u003Cspan class=\u0022cit-article-title\u0022\u003EIntroducing Claude\u003C\/span\u003E \u003Cspan class=\u0022cit-vol\u0022\u003E4\u003C\/span\u003E. \u003Ca href=\u0022https:\/\/www.anthropic.com\/news\/claude-4\u0022\u003Ehttps:\/\/www.anthropic.com\/news\/claude-4\u003C\/a\u003E \u003Ca href=\u0022https:\/\/www.anthropic.com\/news\/claude-4\u0022\u003Ehttps:\/\/www.anthropic.com\/news\/claude-4\u003C\/a\u003E (\u003Cspan class=\u0022cit-pub-date\u0022\u003E2025\u003C\/span\u003E).\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E40.\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-40-1\u0022 title=\u0022View reference 40. in text\u0022 id=\u0022ref-40\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-web\u0022 id=\u0022cit-2026.03.02.26347469v1.40\u0022 data-doi=\u002210.48550\/ARXIV.2501.12948\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth cit-collab\u0022\u003EDeepSeek-AI\u003C\/span\u003E \u003Cspan class=\u0022cit-etal\u0022\u003Eet al.\u003C\/span\u003E \u003Cspan class=\u0022cit-article-title\u0022\u003EDeepSeek-R1: Incentivizing reasoning capability in LLMs via reinforcement learning\u003C\/span\u003E. Preprint at\u003Cspan class=\u0022cit-pub-id-sep cit-pub-id-doi-sep\u0022\u003E \u003C\/span\u003E\u003Cspan class=\u0022cit-pub-id cit-pub-id-doi\u0022\u003E\u003Cspan class=\u0022cit-pub-id-scheme-doi\u0022\u003Edoi:\u003C\/span\u003E10.48550\/ARXIV.2501.12948\u003C\/span\u003E (\u003Cspan class=\u0022cit-pub-date\u0022\u003E2025\u003C\/span\u003E).\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003Ca href=\u0022{openurl}?query=rft_id%253Dinfo%253Adoi%252F10.48550%252FARXIV.2501.12948%26rft.genre%253Darticle%26rft_val_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Ajournal%26ctx_ver%253DZ39.88-2004%26url_ver%253DZ39.88-2004%26url_ctx_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Actx\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-openurl cit-ref-sprinkles-open-url\u0022\u003E\u003Cspan\u003EOpenUrl\u003C\/span\u003E\u003C\/a\u003E\u003Ca href=\u0022\/lookup\/external-ref?access_num=10.48550\/ARXIV.2501.12948\u0026amp;link_type=DOI\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-doi cit-ref-sprinkles-crossref\u0022\u003E\u003Cspan\u003ECrossRef\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E41.\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-41-1\u0022 title=\u0022View reference 41. in text\u0022 id=\u0022ref-41\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-web\u0022 id=\u0022cit-2026.03.02.26347469v1.41\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-article-title\u0022\u003EGoogle DeepMind\u003C\/span\u003E. \u003Cspan class=\u0022cit-source\u0022\u003EGemini 2.5: Our Most Intelligent AI Model\u003C\/span\u003E. \u003Ca href=\u0022https:\/\/blog.google\/innovation-and-ai\/models-and-research\/google-deepmind\/gemini-model-thinking-updates-march-2025\u0022\u003Ehttps:\/\/blog.google\/innovation-and-ai\/models-and-research\/google-deepmind\/gemini-model-thinking-updates-march-2025\u003C\/a\u003E \/ \u003Ca href=\u0022https:\/\/blog.google\/innovation-and-ai\/models-and-research\/google-deepmind\/gemini-model-thinking-updates-march-2025\/\u0022\u003Ehttps:\/\/blog.google\/innovation-and-ai\/models-and-research\/google-deepmind\/gemini-model-thinking-updates-march-2025\/\u003C\/a\u003E (\u003Cspan class=\u0022cit-pub-date\u0022\u003E2025\u003C\/span\u003E).\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E42.\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-42-1\u0022 title=\u0022View reference 42. in text\u0022 id=\u0022ref-42\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-web\u0022 id=\u0022cit-2026.03.02.26347469v1.42\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth cit-collab\u0022\u003EOpenAI\u003C\/span\u003E. \u003Cspan class=\u0022cit-source\u0022\u003EIntroducing OpenAI O3 and O4-Mini\u003C\/span\u003E. \u003Ca href=\u0022https:\/\/openai.com\/index\/introducing-o3-and-o4-mini\/\u0022\u003Ehttps:\/\/openai.com\/index\/introducing-o3-and-o4-mini\/\u003C\/a\u003E \u003Ca href=\u0022https:\/\/openai.com\/index\/introducing-o3-and-o4-mini\/\u0022\u003Ehttps:\/\/openai.com\/index\/introducing-o3-and-o4-mini\/\u003C\/a\u003E (\u003Cspan class=\u0022cit-pub-date\u0022\u003E2025\u003C\/span\u003E).\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E43.\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-43-1\u0022 title=\u0022View reference 43. in text\u0022 id=\u0022ref-43\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal\u0022 id=\u0022cit-2026.03.02.26347469v1.43\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EMao\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EX.\u003C\/span\u003E\u003C\/span\u003E \u003Cspan class=\u0022cit-etal\u0022\u003Eet al.\u003C\/span\u003E \u003Cspan class=\u0022cit-article-title\u0022\u003EA phenotype-based AI pipeline outperforms human experts in differentially diagnosing rare diseases using EHRs\u003C\/span\u003E. \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003ENpj Digit. Med\u003C\/abbr\u003E. \u003Cspan class=\u0022cit-vol\u0022\u003E8\u003C\/span\u003E, \u003Cspan class=\u0022cit-issue\u0022\u003E68\u003C\/span\u003E (\u003Cspan class=\u0022cit-pub-date\u0022\u003E2025\u003C\/span\u003E).\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E44.\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-44-1\u0022 title=\u0022View reference 44. in text\u0022 id=\u0022ref-44\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-web\u0022 id=\u0022cit-2026.03.02.26347469v1.44\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth cit-collab\u0022\u003EPheMAP-RD. PheMAP-RD\u003C\/span\u003E. \u003Ca href=\u0022https:\/\/wei-phenolib-api.app.vumc.org\/\u0022\u003Ehttps:\/\/wei-phenolib-api.app.vumc.org\/\u003C\/a\u003E (\u003Cspan class=\u0022cit-pub-date\u0022\u003E2026\u003C\/span\u003E).\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E45.\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-45-1\u0022 title=\u0022View reference 45. in text\u0022 id=\u0022ref-45\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal\u0022 id=\u0022cit-2026.03.02.26347469v1.45\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003ERobertson\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003ES.\u003C\/span\u003E\u003C\/span\u003E \u0026amp; \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EZaragoza\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EH\u003C\/span\u003E\u003C\/span\u003E. \u003Cspan class=\u0022cit-article-title\u0022\u003EThe Probabilistic relevance framework: BM25 and beyond\u003C\/span\u003E. \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003EFound. Trends\u00ae Inf. Retr\u003C\/abbr\u003E. \u003Cspan class=\u0022cit-vol\u0022\u003E3\u003C\/span\u003E, \u003Cspan class=\u0022cit-fpage\u0022\u003E333\u003C\/span\u003E\u2013\u003Cspan class=\u0022cit-lpage\u0022\u003E389\u003C\/span\u003E (\u003Cspan class=\u0022cit-pub-date\u0022\u003E2009\u003C\/span\u003E).\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003Ca href=\u0022{openurl}?query=rft.jtitle%253DFound.%2BTrends%25AE%2BInf.%2BRetr%26rft.volume%253D3%26rft.spage%253D333%26rft.genre%253Darticle%26rft_val_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Ajournal%26ctx_ver%253DZ39.88-2004%26url_ver%253DZ39.88-2004%26url_ctx_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Actx\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-openurl cit-ref-sprinkles-open-url\u0022\u003E\u003Cspan\u003EOpenUrl\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E46.\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-46-1\u0022 title=\u0022View reference 46. in text\u0022 id=\u0022ref-46\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-web\u0022 id=\u0022cit-2026.03.02.26347469v1.46\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EZhang\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EY.\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-etal\u0022\u003Eet al.\u003C\/span\u003E \u003Cspan class=\u0022cit-article-title\u0022\u003EQwen3 embedding: Advancing text embedding and reranking through foundation models\u003C\/span\u003E. Preprint at\u003Cspan class=\u0022cit-pub-id-sep cit-pub-id-doi-sep\u0022\u003E \u003C\/span\u003E\u003Cspan class=\u0022cit-pub-id cit-pub-id-doi\u0022\u003E\u003Cspan class=\u0022cit-pub-id-scheme-doi\u0022\u003Edoi:\u003C\/span\u003E10.48550\/ARXIV.2506.05176\u003C\/span\u003E (\u003Cspan class=\u0022cit-pub-date\u0022\u003E2025\u003C\/span\u003E).\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E47.\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-47-1\u0022 title=\u0022View reference 47. in text\u0022 id=\u0022ref-47\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-book\u0022 id=\u0022cit-2026.03.02.26347469v1.47\u0022 data-doi=\u002210.1145\/1571941.1572114\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003ECormack\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EG. V.\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EClarke\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EC. L. A.\u003C\/span\u003E\u003C\/span\u003E \u0026amp; \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EBuettcher\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003ES\u003C\/span\u003E\u003C\/span\u003E. \u003Cspan class=\u0022cit-chapter-title\u0022\u003EReciprocal rank fusion outperforms condorcet and individual rank learning methods\u003C\/span\u003E. \u003Cspan class=\u0022cit-source\u0022\u003EIn Proceedings of the 32nd international ACM SIGIR conference on Research and development in information retrieval\u003C\/span\u003E \u003Cspan class=\u0022cit-fpage\u0022\u003E758\u003C\/span\u003E\u2013\u003Cspan class=\u0022cit-lpage\u0022\u003E759\u003C\/span\u003E (\u003Cspan class=\u0022cit-publ-name\u0022\u003EACM\u003C\/span\u003E, \u003Cspan class=\u0022cit-publ-loc\u0022\u003EBoston MA USA\u003C\/span\u003E, \u003Cspan class=\u0022cit-pub-date\u0022\u003E2009\u003C\/span\u003E).\u003Cspan class=\u0022cit-pub-id-sep cit-pub-id-doi-sep\u0022\u003E \u003C\/span\u003E\u003Cspan class=\u0022cit-pub-id-scheme\u0022\u003Edoi:\u003C\/span\u003E\u003Cspan class=\u0022cit-pub-id cit-pub-id-doi\u0022\u003E10.1145\/1571941.1572114\u003C\/span\u003E.\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003Ca href=\u0022{openurl}?query=rft.jtitle%253DIn%2BProceedings%2Bof%2Bthe%2B32nd%2Binternational%2BACM%2BSIGIR%2Bconference%2Bon%2BResearch%2Band%2Bdevelopment%2Bin%2Binformation%2Bretrieval%26rft_id%253Dinfo%253Adoi%252F10.1145%252F1571941.1572114%26rft.genre%253Darticle%26rft_val_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Ajournal%26ctx_ver%253DZ39.88-2004%26url_ver%253DZ39.88-2004%26url_ctx_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Actx\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-openurl cit-ref-sprinkles-open-url\u0022\u003E\u003Cspan\u003EOpenUrl\u003C\/span\u003E\u003C\/a\u003E\u003Ca href=\u0022\/lookup\/external-ref?access_num=10.1145\/1571941.1572114\u0026amp;link_type=DOI\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-doi cit-ref-sprinkles-crossref\u0022\u003E\u003Cspan\u003ECrossRef\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E48.\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-48-1\u0022 title=\u0022View reference 48. in text\u0022 id=\u0022ref-48\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal\u0022 id=\u0022cit-2026.03.02.26347469v1.48\u0022 data-doi=\u002210.1136\/bmjresp-2024-002333\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EGrant-Orser\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EA.\u003C\/span\u003E\u003C\/span\u003E \u003Cspan class=\u0022cit-etal\u0022\u003Eet al.\u003C\/span\u003E \u003Cspan class=\u0022cit-article-title\u0022\u003EThe diagnostic pathway for patients with interstitial lung disease: A mixed-methods study of patients and physicians\u003C\/span\u003E. \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003EBMJ Open Respir. Res\u003C\/abbr\u003E. \u003Cspan class=\u0022cit-vol\u0022\u003E11\u003C\/span\u003E, \u003Cspan class=\u0022cit-fpage\u0022\u003Ee002333\u003C\/span\u003E (\u003Cspan class=\u0022cit-pub-date\u0022\u003E2024\u003C\/span\u003E).\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003Ca href=\u0022{openurl}?query=rft.jtitle%253DBMJ%2BOpen%2BRespiratory%2BResearch%26rft.stitle%253DBMJ%2BOpen%2BResp%2BRes%26rft.aulast%253DGrant-Orser%26rft.auinit1%253DA.%26rft.volume%253D11%26rft.issue%253D1%26rft.spage%253De002333%26rft.epage%253De002333%26rft.atitle%253DThe%2Bdiagnostic%2Bpathway%2Bfor%2Bpatients%2Bwith%2Binterstitial%2Blung%2Bdisease%253A%2Ba%2Bmixed-methods%2Bstudy%2Bof%2Bpatients%2Band%2Bphysicians%26rft_id%253Dinfo%253Adoi%252F10.1136%252Fbmjresp-2024-002333%26rft_id%253Dinfo%253Apmid%252F38688689%26rft.genre%253Darticle%26rft_val_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Ajournal%26ctx_ver%253DZ39.88-2004%26url_ver%253DZ39.88-2004%26url_ctx_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Actx\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-openurl cit-ref-sprinkles-open-url\u0022\u003E\u003Cspan\u003EOpenUrl\u003C\/span\u003E\u003C\/a\u003E\u003Ca href=\u0022\/lookup\/ijlink\/YTozOntzOjQ6InBhdGgiO3M6MTQ6Ii9sb29rdXAvaWpsaW5rIjtzOjU6InF1ZXJ5IjthOjQ6e3M6ODoibGlua1R5cGUiO3M6NDoiQUJTVCI7czoxMToiam91cm5hbENvZGUiO3M6NzoiYm1qcmVzcCI7czo1OiJyZXNpZCI7czoxMjoiMTEvMS9lMDAyMzMzIjtzOjQ6ImF0b20iO3M6NTA6Ii9tZWRyeGl2L2Vhcmx5LzIwMjYvMDMvMDMvMjAyNi4wMy4wMi4yNjM0NzQ2OS5hdG9tIjt9czo4OiJmcmFnbWVudCI7czowOiIiO30=\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-ijlink\u0022\u003E\u003Cspan\u003E\u003Cspan class=\u0022cit-reflinks-abstract\u0022\u003EAbstract\u003C\/span\u003E\u003Cspan class=\u0022cit-sep cit-reflinks-variant-name-sep\u0022\u003E\/\u003C\/span\u003E\u003Cspan class=\u0022cit-reflinks-full-text\u0022\u003E\u003Cspan class=\u0022free-full-text\u0022\u003EFREE \u003C\/span\u003EFull Text\u003C\/span\u003E\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E49.\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-49-1\u0022 title=\u0022View reference 49. in text\u0022 id=\u0022ref-49\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal\u0022 id=\u0022cit-2026.03.02.26347469v1.49\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EOnishchenko\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003ED.\u003C\/span\u003E\u003C\/span\u003E \u003Cspan class=\u0022cit-etal\u0022\u003Eet al.\u003C\/span\u003E \u003Cspan class=\u0022cit-article-title\u0022\u003EScreening for idiopathic pulmonary fibrosis using comorbidity signatures in electronic health records\u003C\/span\u003E. \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003ENat. Med\u003C\/abbr\u003E. \u003Cspan class=\u0022cit-vol\u0022\u003E28\u003C\/span\u003E, \u003Cspan class=\u0022cit-fpage\u0022\u003E2107\u003C\/span\u003E\u2013\u003Cspan class=\u0022cit-lpage\u0022\u003E2116\u003C\/span\u003E (\u003Cspan class=\u0022cit-pub-date\u0022\u003E2022\u003C\/span\u003E).\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003Ca href=\u0022{openurl}?query=rft.jtitle%253DNat.%2BMed%26rft.volume%253D28%26rft.spage%253D2107%26rft_id%253Dinfo%253Apmid%252F36175678%26rft.genre%253Darticle%26rft_val_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Ajournal%26ctx_ver%253DZ39.88-2004%26url_ver%253DZ39.88-2004%26url_ctx_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Actx\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-openurl cit-ref-sprinkles-open-url\u0022\u003E\u003Cspan\u003EOpenUrl\u003C\/span\u003E\u003C\/a\u003E\u003Ca href=\u0022\/lookup\/external-ref?access_num=36175678\u0026amp;link_type=MED\u0026amp;atom=%2Fmedrxiv%2Fearly%2F2026%2F03%2F03%2F2026.03.02.26347469.atom\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-medline\u0022\u003E\u003Cspan\u003EPubMed\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E50.\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-50-1\u0022 title=\u0022View reference 50. in text\u0022 id=\u0022ref-50\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal\u0022 id=\u0022cit-2026.03.02.26347469v1.50\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003ERonicke\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003ES.\u003C\/span\u003E\u003C\/span\u003E \u003Cspan class=\u0022cit-etal\u0022\u003Eet al.\u003C\/span\u003E \u003Cspan class=\u0022cit-article-title\u0022\u003ECan a decision support system accelerate rare disease diagnosis? Evaluating the potential impact of Ada DX in a retrospective study\u003C\/span\u003E. \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003EOrphanet J. Rare Dis\u003C\/abbr\u003E. \u003Cspan class=\u0022cit-vol\u0022\u003E14\u003C\/span\u003E, \u003Cspan class=\u0022cit-issue\u0022\u003E69\u003C\/span\u003E (\u003Cspan class=\u0022cit-pub-date\u0022\u003E2019\u003C\/span\u003E).\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E51.\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-51-1\u0022 title=\u0022View reference 51. in text\u0022 id=\u0022ref-51\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal\u0022 id=\u0022cit-2026.03.02.26347469v1.51\u0022 data-doi=\u002210.1016\/j.ajhg.2020.06.021\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003ERobinson\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EP. N.\u003C\/span\u003E\u003C\/span\u003E \u003Cspan class=\u0022cit-etal\u0022\u003Eet al.\u003C\/span\u003E \u003Cspan class=\u0022cit-article-title\u0022\u003EInterpretable clinical genomics with a likelihood ratio paradigm\u003C\/span\u003E. \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003EAm. J. Hum. Genet\u003C\/abbr\u003E. \u003Cspan class=\u0022cit-vol\u0022\u003E107\u003C\/span\u003E, \u003Cspan class=\u0022cit-fpage\u0022\u003E403\u003C\/span\u003E\u2013\u003Cspan class=\u0022cit-lpage\u0022\u003E417\u003C\/span\u003E (\u003Cspan class=\u0022cit-pub-date\u0022\u003E2020\u003C\/span\u003E).\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003Ca href=\u0022{openurl}?query=rft.jtitle%253DAm.%2BJ.%2BHum.%2BGenet%26rft.volume%253D107%26rft.spage%253D403%26rft_id%253Dinfo%253Adoi%252F10.1016%252Fj.ajhg.2020.06.021%26rft_id%253Dinfo%253Apmid%252F32755546%26rft.genre%253Darticle%26rft_val_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Ajournal%26ctx_ver%253DZ39.88-2004%26url_ver%253DZ39.88-2004%26url_ctx_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Actx\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-openurl cit-ref-sprinkles-open-url\u0022\u003E\u003Cspan\u003EOpenUrl\u003C\/span\u003E\u003C\/a\u003E\u003Ca href=\u0022\/lookup\/external-ref?access_num=10.1016\/j.ajhg.2020.06.021\u0026amp;link_type=DOI\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-doi cit-ref-sprinkles-crossref\u0022\u003E\u003Cspan\u003ECrossRef\u003C\/span\u003E\u003C\/a\u003E\u003Ca href=\u0022\/lookup\/external-ref?access_num=32755546\u0026amp;link_type=MED\u0026amp;atom=%2Fmedrxiv%2Fearly%2F2026%2F03%2F03%2F2026.03.02.26347469.atom\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-medline\u0022\u003E\u003Cspan\u003EPubMed\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E52.\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-52-1\u0022 title=\u0022View reference 52. in text\u0022 id=\u0022ref-52\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal\u0022 id=\u0022cit-2026.03.02.26347469v1.52\u0022 data-doi=\u002210.1002\/humu.22858\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EPhilippakis\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EA. A.\u003C\/span\u003E\u003C\/span\u003E \u003Cspan class=\u0022cit-etal\u0022\u003Eet al.\u003C\/span\u003E \u003Cspan class=\u0022cit-article-title\u0022\u003EThe Matchmaker exchange: A platform for rare disease gene discovery\u003C\/span\u003E. \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003EHum. Mutat\u003C\/abbr\u003E. \u003Cspan class=\u0022cit-vol\u0022\u003E36\u003C\/span\u003E, \u003Cspan class=\u0022cit-fpage\u0022\u003E915\u003C\/span\u003E\u2013\u003Cspan class=\u0022cit-lpage\u0022\u003E921\u003C\/span\u003E (\u003Cspan class=\u0022cit-pub-date\u0022\u003E2015\u003C\/span\u003E).\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003Ca href=\u0022{openurl}?query=rft.jtitle%253DHum.%2BMutat%26rft.volume%253D36%26rft.spage%253D915%26rft_id%253Dinfo%253Adoi%252F10.1002%252Fhumu.22858%26rft_id%253Dinfo%253Apmid%252F26295439%26rft.genre%253Darticle%26rft_val_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Ajournal%26ctx_ver%253DZ39.88-2004%26url_ver%253DZ39.88-2004%26url_ctx_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Actx\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-openurl cit-ref-sprinkles-open-url\u0022\u003E\u003Cspan\u003EOpenUrl\u003C\/span\u003E\u003C\/a\u003E\u003Ca href=\u0022\/lookup\/external-ref?access_num=10.1002\/humu.22858\u0026amp;link_type=DOI\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-doi cit-ref-sprinkles-crossref\u0022\u003E\u003Cspan\u003ECrossRef\u003C\/span\u003E\u003C\/a\u003E\u003Ca href=\u0022\/lookup\/external-ref?access_num=26295439\u0026amp;link_type=MED\u0026amp;atom=%2Fmedrxiv%2Fearly%2F2026%2F03%2F03%2F2026.03.02.26347469.atom\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-medline\u0022\u003E\u003Cspan\u003EPubMed\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E53.\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-53-1\u0022 title=\u0022View reference 53. in text\u0022 id=\u0022ref-53\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal\u0022 id=\u0022cit-2026.03.02.26347469v1.53\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003ETopel\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003ET.\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EScheible\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003ED.\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003ETrefz\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EF.\u003C\/span\u003E\u003C\/span\u003E \u0026amp; \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EHofestadt\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003ER\u003C\/span\u003E\u003C\/span\u003E. \u003Cspan class=\u0022cit-article-title\u0022\u003ERAMEDIS: A comprehensive information system for variations and corresponding phenotypes of rare metabolic diseases\u003C\/span\u003E. \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003EHum. Mutat\u003C\/abbr\u003E. \u003Cspan class=\u0022cit-vol\u0022\u003E31\u003C\/span\u003E, \u003Cspan class=\u0022cit-fpage\u0022\u003EE1081\u003C\/span\u003E\u2013\u003Cspan class=\u0022cit-lpage\u0022\u003EE1088\u003C\/span\u003E (\u003Cspan class=\u0022cit-pub-date\u0022\u003E2010\u003C\/span\u003E).\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003Ca href=\u0022{openurl}?query=rft.jtitle%253DHuman%2Bmutation%26rft.stitle%253DHum%2BMutat%26rft.aulast%253DTopel%26rft.auinit1%253DT.%26rft.volume%253D31%26rft.issue%253D1%26rft.spage%253DE1081%26rft.epage%253DE1088%26rft.atitle%253DRAMEDIS%253A%2Ba%2Bcomprehensive%2Binformation%2Bsystem%2Bfor%2Bvariations%2Band%2Bcorresponding%2Bphenotypes%2Bof%2Brare%2Bmetabolic%2Bdiseases.%26rft_id%253Dinfo%253Apmid%252F19953641%26rft.genre%253Darticle%26rft_val_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Ajournal%26ctx_ver%253DZ39.88-2004%26url_ver%253DZ39.88-2004%26url_ctx_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Actx\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-openurl cit-ref-sprinkles-open-url\u0022\u003E\u003Cspan\u003EOpenUrl\u003C\/span\u003E\u003C\/a\u003E\u003Ca href=\u0022\/lookup\/external-ref?access_num=19953641\u0026amp;link_type=MED\u0026amp;atom=%2Fmedrxiv%2Fearly%2F2026%2F03%2F03%2F2026.03.02.26347469.atom\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-medline\u0022\u003E\u003Cspan\u003EPubMed\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E54.\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-54-1\u0022 title=\u0022View reference 54. in text\u0022 id=\u0022ref-54\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-web\u0022 id=\u0022cit-2026.03.02.26347469v1.54\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth cit-collab\u0022\u003EUniversity of Washington\u003C\/span\u003E. \u003Cspan class=\u0022cit-article-title\u0022\u003EMyGene2\u003C\/span\u003E. \u003Ca href=\u0022https:\/\/mygene2.org\/MyGene2\/\u0022\u003Ehttps:\/\/mygene2.org\/MyGene2\/\u003C\/a\u003E (\u003Cspan class=\u0022cit-pub-date\u0022\u003E2026\u003C\/span\u003E).\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E55.\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-55-1\u0022 title=\u0022View reference 55. in text\u0022 id=\u0022ref-55\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal\u0022 id=\u0022cit-2026.03.02.26347469v1.55\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EZhao\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EZ.\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EJin\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EQ.\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EChen\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EF.\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EPeng\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003ET.\u003C\/span\u003E\u003C\/span\u003E \u0026amp; \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EYu\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003ES\u003C\/span\u003E\u003C\/span\u003E. \u003Cspan class=\u0022cit-article-title\u0022\u003EA large-scale dataset of patient summaries for retrieval-based clinical decision support systems\u003C\/span\u003E. \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003ESci. Data\u003C\/abbr\u003E \u003Cspan class=\u0022cit-vol\u0022\u003E10\u003C\/span\u003E, \u003Cspan class=\u0022cit-issue\u0022\u003E909\u003C\/span\u003E (\u003Cspan class=\u0022cit-pub-date\u0022\u003E2023\u003C\/span\u003E).\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E56.\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-56-1\u0022 title=\u0022View reference 56. in text\u0022 id=\u0022ref-56\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-web\u0022 id=\u0022cit-2026.03.02.26347469v1.56\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-article-title\u0022\u003EOrphanet. Orphanet Disease Classification\u003C\/span\u003E. \u003Ca href=\u0022https:\/\/www.orpha.net\/en\/disease\/classification\/heads\u0022\u003Ehttps:\/\/www.orpha.net\/en\/disease\/classification\/heads\u003C\/a\u003E (\u003Cspan class=\u0022cit-pub-date\u0022\u003E2025\u003C\/span\u003E).\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E57.\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-57-1\u0022 title=\u0022View reference 57. in text\u0022 id=\u0022ref-57\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal\u0022 id=\u0022cit-2026.03.02.26347469v1.57\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003ELandis\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EJ. R.\u003C\/span\u003E\u003C\/span\u003E \u0026amp; \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EKoch\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EG. G\u003C\/span\u003E\u003C\/span\u003E. \u003Cspan class=\u0022cit-article-title\u0022\u003EThe measurement of observer agreement for categorical data\u003C\/span\u003E. \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003EBiometrics\u003C\/abbr\u003E \u003Cspan class=\u0022cit-vol\u0022\u003E33\u003C\/span\u003E, \u003Cspan class=\u0022cit-issue\u0022\u003E159\u003C\/span\u003E (\u003Cspan class=\u0022cit-pub-date\u0022\u003E1977\u003C\/span\u003E).\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E58.\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-58-1\u0022 title=\u0022View reference 58. in text\u0022 id=\u0022ref-58\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-book\u0022 id=\u0022cit-2026.03.02.26347469v1.58\u0022 data-doi=\u002210.18653\/v1\/2024.naacl-long.273\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EWang\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EX.\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EMercer\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003ER.\u003C\/span\u003E\u003C\/span\u003E \u0026amp; \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003ERudzicz\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EF\u003C\/span\u003E\u003C\/span\u003E. \u003Cspan class=\u0022cit-chapter-title\u0022\u003EMulti-stage retrieve and re-rank model for automatic medical coding recommendation\u003C\/span\u003E. \u003Cspan class=\u0022cit-source\u0022\u003EIn Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies\u003C\/span\u003E (\u003Cem\u003EVolume\u003C\/em\u003E \u003Cspan class=\u0022cit-vol\u0022\u003E1\u003C\/span\u003E: \u003Cem\u003ELong Papers\u003C\/em\u003E) \u003Cspan class=\u0022cit-fpage\u0022\u003E4881\u003C\/span\u003E\u2013\u003Cspan class=\u0022cit-lpage\u0022\u003E4891\u003C\/span\u003E (\u003Cspan class=\u0022cit-publ-name\u0022\u003EAssociation for Computational Linguistics\u003C\/span\u003E, \u003Cspan class=\u0022cit-publ-loc\u0022\u003EMexico City, Mexico\u003C\/span\u003E, \u003Cspan class=\u0022cit-pub-date\u0022\u003E2024\u003C\/span\u003E).\u003Cspan class=\u0022cit-pub-id-sep cit-pub-id-doi-sep\u0022\u003E \u003C\/span\u003E\u003Cspan class=\u0022cit-pub-id-scheme\u0022\u003Edoi:\u003C\/span\u003E\u003Cspan class=\u0022cit-pub-id cit-pub-id-doi\u0022\u003E10.18653\/v1\/2024.naacl-long.273\u003C\/span\u003E.\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003Ca href=\u0022{openurl}?query=rft.jtitle%253DIn%2BProceedings%2Bof%2Bthe%2B2024%2BConference%2Bof%2Bthe%2BNorth%2BAmerican%2BChapter%2Bof%2Bthe%2BAssociation%2Bfor%2BComputational%2BLinguistics%253A%2BHuman%2BLanguage%2BTechnologies%26rft.volume%253D1%26rft.spage%253D4881%26rft_id%253Dinfo%253Adoi%252F10.18653%252Fv1%252F2024.naacl-long.273%26rft.genre%253Darticle%26rft_val_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Ajournal%26ctx_ver%253DZ39.88-2004%26url_ver%253DZ39.88-2004%26url_ctx_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Actx\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-openurl cit-ref-sprinkles-open-url\u0022\u003E\u003Cspan\u003EOpenUrl\u003C\/span\u003E\u003C\/a\u003E\u003Ca href=\u0022\/lookup\/external-ref?access_num=10.18653\/v1\/2024.naacl-long.273\u0026amp;link_type=DOI\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-doi cit-ref-sprinkles-crossref\u0022\u003E\u003Cspan\u003ECrossRef\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E59.\u003C\/span\u003E\u003Cdiv class=\u0022cit ref-cit ref-book no-rev-xref\u0022 id=\u0022cit-2026.03.02.26347469v1.59\u0022 data-doi=\u002210.18653\/v1\/2024.bionlp-1.13\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EYang\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003ER.\u003C\/span\u003E\u003C\/span\u003E \u003Cspan class=\u0022cit-etal\u0022\u003Eet al.\u003C\/span\u003E \u003Cspan class=\u0022cit-chapter-title\u0022\u003EKG-Rank: Enhancing large language models for medical QA with knowledge graphs and ranking techniques\u003C\/span\u003E. \u003Cspan class=\u0022cit-source\u0022\u003EIn Proceedings of the 23rd Workshop on Biomedical Natural Language Processing\u003C\/span\u003E \u003Cspan class=\u0022cit-fpage\u0022\u003E155\u003C\/span\u003E\u2013\u003Cspan class=\u0022cit-lpage\u0022\u003E166\u003C\/span\u003E (\u003Cspan class=\u0022cit-publ-name\u0022\u003EAssociation for Computational Linguistics\u003C\/span\u003E, \u003Cspan class=\u0022cit-publ-loc\u0022\u003EBangkok, Thailand\u003C\/span\u003E, \u003Cspan class=\u0022cit-pub-date\u0022\u003E2024\u003C\/span\u003E).\u003Cspan class=\u0022cit-pub-id-sep cit-pub-id-doi-sep\u0022\u003E \u003C\/span\u003E\u003Cspan class=\u0022cit-pub-id-scheme\u0022\u003Edoi:\u003C\/span\u003E\u003Cspan class=\u0022cit-pub-id cit-pub-id-doi\u0022\u003E10.18653\/v1\/2024.bionlp-1.13\u003C\/span\u003E.\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003Ca href=\u0022{openurl}?query=rft.jtitle%253DIn%2BProceedings%2Bof%2Bthe%2B23rd%2BWorkshop%2Bon%2BBiomedical%2BNatural%2BLanguage%2BProcessing%26rft_id%253Dinfo%253Adoi%252F10.18653%252Fv1%252F2024.bionlp-1.13%26rft.genre%253Darticle%26rft_val_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Ajournal%26ctx_ver%253DZ39.88-2004%26url_ver%253DZ39.88-2004%26url_ctx_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Actx\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-openurl cit-ref-sprinkles-open-url\u0022\u003E\u003Cspan\u003EOpenUrl\u003C\/span\u003E\u003C\/a\u003E\u003Ca href=\u0022\/lookup\/external-ref?access_num=10.18653\/v1\/2024.bionlp-1.13\u0026amp;link_type=DOI\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-doi cit-ref-sprinkles-crossref\u0022\u003E\u003Cspan\u003ECrossRef\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E60.\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-60-1\u0022 title=\u0022View reference 60. in text\u0022 id=\u0022ref-60\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal\u0022 id=\u0022cit-2026.03.02.26347469v1.60\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EKusa\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EW.\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EMendoza\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003E\u00d3. E.\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EKnoth\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EP.\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EPasi\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EG.\u003C\/span\u003E\u003C\/span\u003E \u0026amp; \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EHanbury\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EA\u003C\/span\u003E\u003C\/span\u003E. \u003Cspan class=\u0022cit-article-title\u0022\u003EEffective matching of patients to clinical trials using entity extraction and neural re-ranking\u003C\/span\u003E. \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003EJ. Biomed. Inform\u003C\/abbr\u003E. \u003Cspan class=\u0022cit-vol\u0022\u003E144\u003C\/span\u003E, \u003Cspan class=\u0022cit-issue\u0022\u003E104444\u003C\/span\u003E (\u003Cspan class=\u0022cit-pub-date\u0022\u003E2023\u003C\/span\u003E).\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E61.\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-61-1\u0022 title=\u0022View reference 61. in text\u0022 id=\u0022ref-61\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-web\u0022 id=\u0022cit-2026.03.02.26347469v1.61\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EYang\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003ET.\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-etal\u0022\u003Eet al.\u003C\/span\u003E \u003Cspan class=\u0022cit-article-title\u0022\u003EA specialized large language model for clinical reasoning and diagnosis in rare diseases\u003C\/span\u003E. Preprint at\u003Cspan class=\u0022cit-pub-id-sep cit-pub-id-doi-sep\u0022\u003E \u003C\/span\u003E\u003Cspan class=\u0022cit-pub-id cit-pub-id-doi\u0022\u003E\u003Cspan class=\u0022cit-pub-id-scheme-doi\u0022\u003Edoi:\u003C\/span\u003E10.48550\/ARXIV.2511.14638\u003C\/span\u003E (\u003Cspan class=\u0022cit-pub-date\u0022\u003E2025\u003C\/span\u003E).\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E62.\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-62-1\u0022 title=\u0022View reference 62. in text\u0022 id=\u0022ref-62\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-book\u0022 id=\u0022cit-2026.03.02.26347469v1.62\u0022 data-doi=\u002210.18653\/v1\/W19-5034\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003ENeumann\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EM.\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EKing\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003ED.\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EBeltagy\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EI.\u003C\/span\u003E\u003C\/span\u003E \u0026amp; \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EAmmar\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EW\u003C\/span\u003E\u003C\/span\u003E. \u003Cspan class=\u0022cit-chapter-title\u0022\u003EScispaCy: Fast and robust models for biomedical natural language processing\u003C\/span\u003E. \u003Cspan class=\u0022cit-source\u0022\u003EIn Proceedings of the 18th BioNLP Workshop and Shared Task\u003C\/span\u003E \u003Cspan class=\u0022cit-fpage\u0022\u003E319\u003C\/span\u003E\u2013\u003Cspan class=\u0022cit-lpage\u0022\u003E327\u003C\/span\u003E (\u003Cspan class=\u0022cit-publ-name\u0022\u003EAssociation for Computational Linguistics\u003C\/span\u003E, \u003Cspan class=\u0022cit-publ-loc\u0022\u003EFlorence, Italy\u003C\/span\u003E, \u003Cspan class=\u0022cit-pub-date\u0022\u003E2019\u003C\/span\u003E).\u003Cspan class=\u0022cit-pub-id-sep cit-pub-id-doi-sep\u0022\u003E \u003C\/span\u003E\u003Cspan class=\u0022cit-pub-id-scheme\u0022\u003Edoi:\u003C\/span\u003E\u003Cspan class=\u0022cit-pub-id cit-pub-id-doi\u0022\u003E10.18653\/v1\/W19-5034\u003C\/span\u003E.\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003Ca href=\u0022{openurl}?query=rft.jtitle%253DIn%2BProceedings%2Bof%2Bthe%2B18th%2BBioNLP%2BWorkshop%2Band%2BShared%2BTask%26rft_id%253Dinfo%253Adoi%252F10.18653%252Fv1%252FW19-5034%26rft.genre%253Darticle%26rft_val_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Ajournal%26ctx_ver%253DZ39.88-2004%26url_ver%253DZ39.88-2004%26url_ctx_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Actx\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-openurl cit-ref-sprinkles-open-url\u0022\u003E\u003Cspan\u003EOpenUrl\u003C\/span\u003E\u003C\/a\u003E\u003Ca href=\u0022\/lookup\/external-ref?access_num=10.18653\/v1\/W19-5034\u0026amp;link_type=DOI\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-doi cit-ref-sprinkles-crossref\u0022\u003E\u003Cspan\u003ECrossRef\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E63.\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-63-1\u0022 title=\u0022View reference 63. in text\u0022 id=\u0022ref-63\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-web\u0022 id=\u0022cit-2026.03.02.26347469v1.63\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-article-title\u0022\u003EHarvard Center for Health Communication. SMOG readability formula: Your tool for clearer health communication\u003C\/span\u003E. \u003Ca href=\u0022https:\/\/hsph.harvard.edu\/research\/healthcommunication\/resources\/smog\/?utm_source=chatgpt.com\u0022\u003Ehttps:\/\/hsph.harvard.edu\/research\/healthcommunication\/resources\/smog\/?utm_source=chatgpt.com\u003C\/a\u003E (\u003Cspan class=\u0022cit-pub-date\u0022\u003E2025\u003C\/span\u003E).\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E64.\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-64-1\u0022 title=\u0022View reference 64. in text\u0022 id=\u0022ref-64\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal\u0022 id=\u0022cit-2026.03.02.26347469v1.64\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003ELaughlin\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EG. H. M\u003C\/span\u003E\u003C\/span\u003E. \u003Cspan class=\u0022cit-article-title\u0022\u003ESMOG grading-a new readability formula\u003C\/span\u003E. \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003EJ. Read\u003C\/abbr\u003E. \u003Cspan class=\u0022cit-vol\u0022\u003E12\u003C\/span\u003E, \u003Cspan class=\u0022cit-fpage\u0022\u003E639\u003C\/span\u003E\u2013\u003Cspan class=\u0022cit-lpage\u0022\u003E648\u003C\/span\u003E (\u003Cspan class=\u0022cit-pub-date\u0022\u003E1969\u003C\/span\u003E).\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003Ca href=\u0022{openurl}?query=rft.jtitle%253DJ.%2BRead%26rft.volume%253D12%26rft.spage%253D639%26rft.genre%253Darticle%26rft_val_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Ajournal%26ctx_ver%253DZ39.88-2004%26url_ver%253DZ39.88-2004%26url_ctx_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Actx\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-openurl cit-ref-sprinkles-open-url\u0022\u003E\u003Cspan\u003EOpenUrl\u003C\/span\u003E\u003C\/a\u003E\u003Ca href=\u0022\/lookup\/external-ref?access_num=A1969ZM49800004\u0026amp;link_type=ISI\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-newisilink cit-ref-sprinkles-webofscience\u0022\u003E\u003Cspan\u003EWeb of Science\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E65.\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-65-1\u0022 title=\u0022View reference 65. in text\u0022 id=\u0022ref-65\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-web\u0022 id=\u0022cit-2026.03.02.26347469v1.65\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-source\u0022\u003EMTEB Leaderboard\u003C\/span\u003E. \u003Ca href=\u0022https:\/\/huggingface.co\/spaces\/mteb\/leaderboard\u0022\u003Ehttps:\/\/huggingface.co\/spaces\/mteb\/leaderboard\u003C\/a\u003E (\u003Cspan class=\u0022cit-pub-date\u0022\u003E2025\u003C\/span\u003E).\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E66.\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-66-1\u0022 title=\u0022View reference 66. in text\u0022 id=\u0022ref-66\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-web\u0022 id=\u0022cit-2026.03.02.26347469v1.66\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EBabakhin\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EY.\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-etal\u0022\u003Eet al.\u003C\/span\u003E \u003Cspan class=\u0022cit-article-title\u0022\u003ELlama-Embed-Nemotron-8B: A universal text embedding model for multilingual and cross-lingual tasks\u003C\/span\u003E. Preprint at\u003Cspan class=\u0022cit-pub-id-sep cit-pub-id-doi-sep\u0022\u003E \u003C\/span\u003E\u003Cspan class=\u0022cit-pub-id cit-pub-id-doi\u0022\u003E\u003Cspan class=\u0022cit-pub-id-scheme-doi\u0022\u003Edoi:\u003C\/span\u003E10.48550\/ARXIV.2511.07025\u003C\/span\u003E (\u003Cspan class=\u0022cit-pub-date\u0022\u003E2025\u003C\/span\u003E).\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E67.\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-67-1\u0022 title=\u0022View reference 67. in text\u0022 id=\u0022ref-67\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal\u0022 id=\u0022cit-2026.03.02.26347469v1.67\u0022 data-doi=\u002210.1007\/s10479-011-0841-3\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EMaaten\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EL. van der\u003C\/span\u003E\u003C\/span\u003E \u0026amp; \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EHinton\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EG.\u003C\/span\u003E\u003C\/span\u003E \u003Cspan class=\u0022cit-article-title\u0022\u003EVisualizing data using t-SNE\u003C\/span\u003E. \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003EJ. Mach. Learn. Res\u003C\/abbr\u003E. \u003Cspan class=\u0022cit-vol\u0022\u003E9\u003C\/span\u003E, \u003Cspan class=\u0022cit-fpage\u0022\u003E2579\u003C\/span\u003E\u2013\u003Cspan class=\u0022cit-lpage\u0022\u003E2605\u003C\/span\u003E (\u003Cspan class=\u0022cit-pub-date\u0022\u003E2008\u003C\/span\u003E).\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003Ca href=\u0022{openurl}?query=rft.jtitle%253DJ.%2BMach.%2BLearn.%2BRes%26rft.volume%253D9%26rft.spage%253D2579%26rft_id%253Dinfo%253Adoi%252F10.1007%252Fs10479-011-0841-3%26rft_id%253Dinfo%253Apmid%252F25143956%26rft.genre%253Darticle%26rft_val_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Ajournal%26ctx_ver%253DZ39.88-2004%26url_ver%253DZ39.88-2004%26url_ctx_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Actx\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-openurl cit-ref-sprinkles-open-url\u0022\u003E\u003Cspan\u003EOpenUrl\u003C\/span\u003E\u003C\/a\u003E\u003Ca href=\u0022\/lookup\/external-ref?access_num=10.1007\/s10479-011-0841-3\u0026amp;link_type=DOI\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-doi cit-ref-sprinkles-crossref\u0022\u003E\u003Cspan\u003ECrossRef\u003C\/span\u003E\u003C\/a\u003E\u003Ca href=\u0022\/lookup\/external-ref?access_num=25143956\u0026amp;link_type=MED\u0026amp;atom=%2Fmedrxiv%2Fearly%2F2026%2F03%2F03%2F2026.03.02.26347469.atom\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-medline\u0022\u003E\u003Cspan\u003EPubMed\u003C\/span\u003E\u003C\/a\u003E\u003Ca href=\u0022\/lookup\/external-ref?access_num=000262637600007\u0026amp;link_type=ISI\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-newisilink cit-ref-sprinkles-webofscience\u0022\u003E\u003Cspan\u003EWeb of Science\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E68.\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-68-1\u0022 title=\u0022View reference 68. in text\u0022 id=\u0022ref-68\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-web\u0022 id=\u0022cit-2026.03.02.26347469v1.68\u0022 data-doi=\u002210.48550\/ARXIV.2507.05201\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003ESellergren\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EA.\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-etal\u0022\u003Eet al.\u003C\/span\u003E \u003Cspan class=\u0022cit-article-title\u0022\u003EMedGemma technical report\u003C\/span\u003E. Preprint at\u003Cspan class=\u0022cit-pub-id-sep cit-pub-id-doi-sep\u0022\u003E \u003C\/span\u003E\u003Cspan class=\u0022cit-pub-id cit-pub-id-doi\u0022\u003E\u003Cspan class=\u0022cit-pub-id-scheme-doi\u0022\u003Edoi:\u003C\/span\u003E10.48550\/ARXIV.2507.05201\u003C\/span\u003E (\u003Cspan class=\u0022cit-pub-date\u0022\u003E2025\u003C\/span\u003E).\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003Ca href=\u0022{openurl}?query=rft_id%253Dinfo%253Adoi%252F10.48550%252FARXIV.2507.05201%26rft.genre%253Darticle%26rft_val_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Ajournal%26ctx_ver%253DZ39.88-2004%26url_ver%253DZ39.88-2004%26url_ctx_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Actx\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-openurl cit-ref-sprinkles-open-url\u0022\u003E\u003Cspan\u003EOpenUrl\u003C\/span\u003E\u003C\/a\u003E\u003Ca href=\u0022\/lookup\/external-ref?access_num=10.48550\/ARXIV.2507.05201\u0026amp;link_type=DOI\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-doi cit-ref-sprinkles-crossref\u0022\u003E\u003Cspan\u003ECrossRef\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E69.\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-69-1\u0022 title=\u0022View reference 69. in text\u0022 id=\u0022ref-69\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal\u0022 id=\u0022cit-2026.03.02.26347469v1.69\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EFernandez-Felix\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EB. M.\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EGarc\u00eda-Esquinas\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EE.\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EMuriel\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EA.\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003ERoyuela\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EA.\u003C\/span\u003E\u003C\/span\u003E \u0026amp; \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EZamora\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EJ\u003C\/span\u003E\u003C\/span\u003E. \u003Cspan class=\u0022cit-article-title\u0022\u003EBootstrap internal validation command for predictive logistic regression models\u003C\/span\u003E. \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003EStata J. Promot. Commun. Stat. Stata\u003C\/abbr\u003E \u003Cspan class=\u0022cit-vol\u0022\u003E21\u003C\/span\u003E, \u003Cspan class=\u0022cit-fpage\u0022\u003E498\u003C\/span\u003E\u2013\u003Cspan class=\u0022cit-lpage\u0022\u003E509\u003C\/span\u003E (\u003Cspan class=\u0022cit-pub-date\u0022\u003E2021\u003C\/span\u003E).\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003Ca href=\u0022{openurl}?query=rft.jtitle%253DStata%2BJ.%2BPromot.%2BCommun.%2BStat.%2BStata%26rft.volume%253D21%26rft.spage%253D498%26rft.genre%253Darticle%26rft_val_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Ajournal%26ctx_ver%253DZ39.88-2004%26url_ver%253DZ39.88-2004%26url_ctx_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Actx\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-openurl cit-ref-sprinkles-open-url\u0022\u003E\u003Cspan\u003EOpenUrl\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003C\/ol\u003E\u003C\/div\u003E\u003Cspan class=\u0022highwire-journal-article-marker-end\u0022\u003E\u003C\/span\u003E\u003C\/div\u003E\u003Cspan class=\u0022related-urls\u0022\u003E\u003C\/span\u003E\u003C\/div\u003E\u003C\/div\u003E  \u003C\/div\u003E\n\n  \n  \u003C\/div\u003E\n\u003C\/div\u003E\n  \u003C\/div\u003E\n\u003C\/div\u003E\n\u003C\/div\u003E\u003Cscript type=\u0022text\/javascript\u0022 src=\u0022https:\/\/www.medrxiv.org\/sites\/default\/files\/js\/js_zP7WWIfzbyzvaM63L39cNV2juU_1XVH7wduFK9gcMNI.js\u0022\u003E\u003C\/script\u003E\n\u003C\/body\u003E\u003C\/html\u003E"}