{"markup":"\u003C?xml version=\u00221.0\u0022 encoding=\u0022UTF-8\u0022 ?\u003E\n    \u003Chtml version=\u0022HTML+RDFa+MathML 1.1\u0022\n    xmlns:content=\u0022http:\/\/purl.org\/rss\/1.0\/modules\/content\/\u0022\n    xmlns:dc=\u0022http:\/\/purl.org\/dc\/terms\/\u0022\n    xmlns:foaf=\u0022http:\/\/xmlns.com\/foaf\/0.1\/\u0022\n    xmlns:og=\u0022http:\/\/ogp.me\/ns#\u0022\n    xmlns:rdfs=\u0022http:\/\/www.w3.org\/2000\/01\/rdf-schema#\u0022\n    xmlns:sioc=\u0022http:\/\/rdfs.org\/sioc\/ns#\u0022\n    xmlns:sioct=\u0022http:\/\/rdfs.org\/sioc\/types#\u0022\n    xmlns:skos=\u0022http:\/\/www.w3.org\/2004\/02\/skos\/core#\u0022\n    xmlns:xsd=\u0022http:\/\/www.w3.org\/2001\/XMLSchema#\u0022\n    xmlns:mml=\u0022http:\/\/www.w3.org\/1998\/Math\/MathML\u0022\u003E\n  \u003Chead\u003E\u003Cscript type=\u0022text\/javascript\u0022 src=\u0022\/\/cdn.jsdelivr.net\/qtip2\/2.2.1\/jquery.qtip.min.js\u0022\u003E\u003C\/script\u003E\n\u003Cscript type=\u0022text\/javascript\u0022 src=\u0022https:\/\/www.medrxiv.org\/sites\/default\/files\/js\/js_YjAJQgxDlFX6S-O02jj9jCrVbrwlY3CGgCg1FzPlvBs.js\u0022\u003E\u003C\/script\u003E\n\u003Cscript type=\u0022text\/javascript\u0022\u003E\n\u003C!--\/\/--\u003E\u003C![CDATA[\/\/\u003E\u003C!--\nif(typeof window.MathJax === \u0022undefined\u0022) window.MathJax = { menuSettings: { zoom: \u0022Click\u0022 } };\n\/\/--\u003E\u003C!]]\u003E\n\u003C\/script\u003E\n\u003Cscript type=\u0022text\/javascript\u0022 src=\u0022https:\/\/www.medrxiv.org\/sites\/default\/files\/js\/js_waP91NpgGpectm_6Y2XDEauLJ8WCSCBKmmA87unpp2E.js\u0022\u003E\u003C\/script\u003E\n\u003Cscript type=\u0022text\/javascript\u0022 src=\u0022https:\/\/www.googletagmanager.com\/gtag\/js?id=G-0K57TCX5BY\u0022\u003E\u003C\/script\u003E\n\u003Cscript type=\u0022text\/javascript\u0022\u003E\n\u003C!--\/\/--\u003E\u003C![CDATA[\/\/\u003E\u003C!--\nwindow.dataLayer = window.dataLayer || [];function gtag(){dataLayer.push(arguments)};gtag(\u0022js\u0022, new Date());gtag(\u0022set\u0022, \u0022developer_id.dMDhkMT\u0022, true);gtag(\u0022config\u0022, \u0022G-0K57TCX5BY\u0022, {\u0022groups\u0022:\u0022default\u0022,\u0022anonymize_ip\u0022:true});\n\/\/--\u003E\u003C!]]\u003E\n\u003C\/script\u003E\n\u003Cscript type=\u0022text\/javascript\u0022\u003E\n\u003C!--\/\/--\u003E\u003C![CDATA[\/\/\u003E\u003C!--\njQuery.extend(Drupal.settings, {\u0022basePath\u0022:\u0022\\\/\u0022,\u0022pathPrefix\u0022:\u0022\u0022,\u0022highwire\u0022:{\u0022ac\u0022:{\u0022\\\/medrxiv\\\/early\\\/2026\\\/01\\\/23\\\/2026.01.22.26344603.atom\u0022:{\u0022access\u0022:{\u0022full\u0022:true},\u0022pisa_id\u0022:\u0022\u0022,\u0022apath\u0022:\u0022\\\/medrxiv\\\/early\\\/2026\\\/01\\\/23\\\/2026.01.22.26344603.atom\u0022,\u0022jcode\u0022:\u0022medrxiv\u0022},\u0022medrxiv;2026.01.22.26344603v1\u0022:{\u0022access\u0022:{\u0022full\u0022:true},\u0022pisa_id\u0022:\u0022medrxiv;2026.01.22.26344603v1\u0022,\u0022apath\u0022:\u0022\u0022,\u0022jcode\u0022:\u0022medrxiv\u0022}},\u0022processed\u0022:[\u0022highwire_math\u0022],\u0022markup\u0022:[{\u0022requested\u0022:\u0022abstract\u0022,\u0022variant\u0022:\u0022abstract\u0022,\u0022view\u0022:\u0022abstract\u0022,\u0022pisa\u0022:\u0022medrxiv;2026.01.22.26344603v1\u0022}]},\u0022instances\u0022:\u0022{\\u0022highwire_abstract_tooltip\\u0022:{\\u0022content\\u0022:{\\u0022text\\u0022:\\u0022\\u0022},\\u0022style\\u0022:{\\u0022tip\\u0022:{\\u0022width\\u0022:20,\\u0022height\\u0022:20,\\u0022border\\u0022:1,\\u0022offset\\u0022:0,\\u0022corner\\u0022:true},\\u0022classes\\u0022:\\u0022qtip-custom hw-tooltip hw-abstract-tooltip qtip-shadow qtip-rounded\\u0022,\\u0022classes_custom\\u0022:\\u0022hw-tooltip hw-abstract-tooltip\\u0022},\\u0022position\\u0022:{\\u0022at\\u0022:\\u0022right center\\u0022,\\u0022my\\u0022:\\u0022left center\\u0022,\\u0022viewport\\u0022:true,\\u0022adjust\\u0022:{\\u0022method\\u0022:\\u0022shift\\u0022}},\\u0022show\\u0022:{\\u0022event\\u0022:\\u0022mouseenter click \\u0022,\\u0022solo\\u0022:true},\\u0022hide\\u0022:{\\u0022event\\u0022:\\u0022mouseleave \\u0022,\\u0022fixed\\u0022:1,\\u0022delay\\u0022:\\u0022100\\u0022}},\\u0022highwire_author_tooltip\\u0022:{\\u0022content\\u0022:{\\u0022text\\u0022:\\u0022\\u0022},\\u0022style\\u0022:{\\u0022tip\\u0022:{\\u0022width\\u0022:15,\\u0022height\\u0022:15,\\u0022border\\u0022:1,\\u0022offset\\u0022:0,\\u0022corner\\u0022:true},\\u0022classes\\u0022:\\u0022qtip-custom hw-tooltip hw-author-tooltip qtip-shadow qtip-rounded\\u0022,\\u0022classes_custom\\u0022:\\u0022hw-tooltip hw-author-tooltip\\u0022},\\u0022position\\u0022:{\\u0022at\\u0022:\\u0022top center\\u0022,\\u0022my\\u0022:\\u0022bottom center\\u0022,\\u0022viewport\\u0022:true,\\u0022adjust\\u0022:{\\u0022method\\u0022:\\u0022\\u0022}},\\u0022show\\u0022:{\\u0022event\\u0022:\\u0022mouseenter \\u0022,\\u0022solo\\u0022:true},\\u0022hide\\u0022:{\\u0022event\\u0022:\\u0022mouseleave \\u0022,\\u0022fixed\\u0022:1,\\u0022delay\\u0022:\\u0022100\\u0022}},\\u0022highwire_reflinks_tooltip\\u0022:{\\u0022content\\u0022:{\\u0022text\\u0022:\\u0022\\u0022},\\u0022style\\u0022:{\\u0022tip\\u0022:{\\u0022width\\u0022:15,\\u0022height\\u0022:15,\\u0022border\\u0022:1,\\u0022mimic\\u0022:\\u0022top center\\u0022,\\u0022offset\\u0022:0,\\u0022corner\\u0022:true},\\u0022classes\\u0022:\\u0022qtip-custom hw-tooltip hw-ref-link-tooltip qtip-shadow qtip-rounded\\u0022,\\u0022classes_custom\\u0022:\\u0022hw-tooltip hw-ref-link-tooltip\\u0022},\\u0022position\\u0022:{\\u0022at\\u0022:\\u0022bottom left\\u0022,\\u0022my\\u0022:\\u0022top left\\u0022,\\u0022viewport\\u0022:true,\\u0022adjust\\u0022:{\\u0022method\\u0022:\\u0022flip\\u0022}},\\u0022show\\u0022:{\\u0022event\\u0022:\\u0022mouseenter \\u0022,\\u0022solo\\u0022:true},\\u0022hide\\u0022:{\\u0022event\\u0022:\\u0022mouseleave \\u0022,\\u0022fixed\\u0022:1,\\u0022delay\\u0022:\\u0022100\\u0022}}}\u0022,\u0022qtipDebug\u0022:\u0022{\\u0022leaveElement\\u0022:0}\u0022,\u0022googleanalytics\u0022:{\u0022account\u0022:[\u0022G-0K57TCX5BY\u0022],\u0022trackOutbound\u0022:1,\u0022trackMailto\u0022:1,\u0022trackDownload\u0022:1,\u0022trackDownloadExtensions\u0022:\u00227z|aac|arc|arj|asf|asx|avi|bin|csv|doc(x|m)?|dot(x|m)?|exe|flv|gif|gz|gzip|hqx|jar|jpe?g|js|mp(2|3|4|e?g)|mov(ie)?|msi|msp|pdf|phps|png|ppt(x|m)?|pot(x|m)?|pps(x|m)?|ppam|sld(x|m)?|thmx|qtm?|ra(m|r)?|sea|sit|tar|tgz|torrent|txt|wav|wma|wmv|wpd|xls(x|m|b)?|xlt(x|m)|xlam|xml|z|zip\u0022,\u0022trackColorbox\u0022:1},\u0022ajaxPageState\u0022:{\u0022js\u0022:{\u0022\\\/\\\/cdn.jsdelivr.net\\\/qtip2\\\/2.2.1\\\/jquery.qtip.min.js\u0022:1,\u0022sites\\\/all\\\/modules\\\/highwire\\\/highwire\\\/plugins\\\/highwire_markup_process\\\/js\\\/highwire_article_reference_popup.js\u0022:1,\u0022sites\\\/all\\\/modules\\\/highwire\\\/highwire\\\/plugins\\\/highwire_markup_process\\\/js\\\/highwire_at_symbol.js\u0022:1,\u00220\u0022:1,\u0022sites\\\/all\\\/modules\\\/contrib\\\/google_analytics\\\/googleanalytics.js\u0022:1,\u0022https:\\\/\\\/www.googletagmanager.com\\\/gtag\\\/js?id=G-0K57TCX5BY\u0022:1,\u00221\u0022:1}}});\n\/\/--\u003E\u003C!]]\u003E\n\u003C\/script\u003E\n\u003Clink type=\u0022text\/css\u0022 rel=\u0022stylesheet\u0022 href=\u0022https:\/\/www.medrxiv.org\/sites\/default\/files\/advagg_css\/css__uXgUByez87OKDsgffPHe7u5qNUzr7zOnqWrSJ87THKk__I8zmferlWQG1DHWX_fZmeyRd733gqStwZcOGe0mM0T4__QrrGUc7CpljPR5Aph-ukPbcwtK4AWrHGwCEXJ_k1V_c.css\u0022 media=\u0022all\u0022 \/\u003E\n\u003Clink type=\u0022text\/css\u0022 rel=\u0022stylesheet\u0022 href=\u0022\/\/cdn.jsdelivr.net\/qtip2\/2.2.1\/jquery.qtip.min.css\u0022 media=\u0022all\u0022 \/\u003E\n\u003Clink type=\u0022text\/css\u0022 rel=\u0022stylesheet\u0022 href=\u0022https:\/\/www.medrxiv.org\/sites\/default\/files\/advagg_css\/css__HGACIFBlu2o05y3afvqlt5wrE_5Dn6MXsexfuEpeIwg__t4SOPxucAPoV3Os7g8dXqyMB1HRXQridRJ82X7nE33E__QrrGUc7CpljPR5Aph-ukPbcwtK4AWrHGwCEXJ_k1V_c.css\u0022 media=\u0022all\u0022 \/\u003E\n\u003Clink rel=\u0027stylesheet\u0027 type=\u0027text\/css\u0027 href=\u0027\/sites\/all\/modules\/contrib\/panels\/plugins\/layouts\/onecol\/onecol.css\u0027 \/\u003E\u003C\/head\u003E\u003Cbody\u003E\u003Cdiv class=\u0022panels-ajax-tab-panel panels-ajax-tab-panel-biorxiv-tab-art\u0022\u003E\u003Cdiv class=\u0022panel-display panel-1col clearfix\u0022 \u003E\n  \u003Cdiv class=\u0022panel-panel panel-col\u0022\u003E\n    \u003Cdiv\u003E\u003Cdiv class=\u0022panel-pane pane-highwire-markup\u0022 \u003E\n  \n      \n  \n  \u003Cdiv class=\u0022pane-content\u0022\u003E\n    \u003Cdiv class=\u0022highwire-markup\u0022\u003E\u003Cdiv xmlns=\u0022http:\/\/www.w3.org\/1999\/xhtml\u0022 data-highwire-cite-ref-tooltip-instance=\u0022highwire_reflinks_tooltip\u0022 class=\u0022content-block-markup\u0022 xmlns:xhtml=\u0022http:\/\/www.w3.org\/1999\/xhtml\u0022\u003E\u003Cdiv class=\u0022article abstract-view \u0022\u003E\u003Cspan class=\u0022highwire-journal-article-marker-start\u0022\u003E\u003C\/span\u003E\u003Cdiv class=\u0022section abstract\u0022 id=\u0022abstract-1\u0022\u003E\u003Ch2 class=\u0022\u0022\u003EAbstract\u003C\/h2\u003E\u003Cdiv id=\u0022sec-1\u0022 class=\u0022subsection\u0022\u003E\u003Cp id=\u0022p-3\u0022\u003E\u003Cstrong\u003EPurpose\u003C\/strong\u003E Natural Language Processing (NLP) has the potential to extract structured clinical knowledge from unstructured Electronic Health Records (EHRs). However, the limited availability of annotated datasets for algorithm training restricts its application in clinical practice. This study investigates the use of transformer-based NLP models to structure Italian EHRs in cardiac settings, addressing this gap.\u003C\/p\u003E\u003C\/div\u003E\u003Cdiv id=\u0022sec-2\u0022 class=\u0022subsection\u0022\u003E\u003Cp id=\u0022p-4\u0022\u003E\u003Cstrong\u003EMethods\u003C\/strong\u003E We implemented and evaluated three named entity recognition algorithms: SpaCy, Flair, and Multiconer. The experiments utilized three datasets comprising 2235 anamneses from patients at the Fondazione Toscana Gabriele Monasterio, Italy.\u003C\/p\u003E\u003C\/div\u003E\u003Cdiv id=\u0022sec-3\u0022 class=\u0022subsection\u0022\u003E\u003Cp id=\u0022p-5\u0022\u003E\u003Cstrong\u003EResults\u003C\/strong\u003E The SpaCy model achieved the highest performance with an F1-score of 97% in identifying clinical features on explicitly mentioned entities (Presence\/Absence classification). However, features are not always mentioned, as clinicians selectively document only clinically relevant information in real-world practice. External validation shows model generalizability: EVD-100 dataset (considering 12 features, 97.13% F1) and STEMI dataset (considering 3 shared features, 88.29% F1). These structured variables were subsequently used to train machine learning algorithms (Logistic Regression, XGBoost, CatBoost) for classifying amyloidosis in heart failure patients. The classifiers trained on SpaCy-structured data attained an average F1-score of 66.70%, closely matching the 66.99% F1-score from classifiers using clinician-annotated data.\u003C\/p\u003E\u003C\/div\u003E\u003Cdiv id=\u0022sec-4\u0022 class=\u0022subsection\u0022\u003E\u003Cp id=\u0022p-6\u0022\u003E\u003Cstrong\u003EConclusion\u003C\/strong\u003E This study shows the feasibility of using NLP for structuring Italian EHRs in realistic clinical settings, highlighting its potential to enhance computer-assisted detection despite selective documentation patterns. The comparable performance across annotation methods suggests NLP\u2019s capability to bridge the gap in dataset annotation, paving the way for its integration into clinical practice.\u003C\/p\u003E\u003C\/div\u003E\u003C\/div\u003E\u003Ch3\u003ECompeting Interest Statement\u003C\/h3\u003E\u003Cp id=\u0022p-7\u0022\u003EThe authors have declared no competing interest.\u003C\/p\u003E\u003Ch3\u003EFunding Statement\u003C\/h3\u003E\u003Cp id=\u0022p-8\u0022\u003EThis study did not receive any funding\u003C\/p\u003E\u003Ch3\u003EAuthor Declarations\u003C\/h3\u003E\u003Cp id=\u0022p-9\u0022\u003EI confirm all relevant ethical guidelines have been followed, and any necessary IRB and\/or ethics committee approvals have been obtained.\u003C\/p\u003E\u003Cp id=\u0022p-10\u0022\u003EYes\u003C\/p\u003E\u003Cp id=\u0022p-11\u0022\u003EThe details of the IRB\/oversight body that provided approval or exemption for the research described are given below:\u003C\/p\u003E\u003Cp id=\u0022p-12\u0022\u003EEthics Committee of Fondazione Toscana Gabriele Monasterio gave ethical approval for this work (Decree No. 3854, 02 December 2023).\u003C\/p\u003E\u003Cp id=\u0022p-13\u0022\u003EI confirm that all necessary patient\/participant consent has been obtained and the appropriate institutional forms have been archived, and that any patient\/participant\/sample identifiers included were not known to anyone (e.g., hospital staff, patients or participants themselves) outside the research group so cannot be used to identify individuals.\u003C\/p\u003E\u003Cp id=\u0022p-14\u0022\u003EYes\u003C\/p\u003E\u003Cp id=\u0022p-15\u0022\u003EI understand that all clinical trials and any other prospective interventional studies must be registered with an ICMJE-approved registry, such as ClinicalTrials.gov. I confirm that any such study reported in the manuscript has been registered and the trial registration ID is provided (note: if posting a prospective study registered retrospectively, please provide a statement in the trial ID field explaining why the study was not registered in advance).\u003C\/p\u003E\u003Cp id=\u0022p-16\u0022\u003EYes\u003C\/p\u003E\u003Cp id=\u0022p-17\u0022\u003EI have followed all appropriate research reporting guidelines, such as any relevant EQUATOR Network research reporting checklist(s) and other pertinent material, if applicable.\u003C\/p\u003E\u003Cp id=\u0022p-18\u0022\u003EYes\u003C\/p\u003E\u003Cdiv class=\u0022section fn-group\u0022 id=\u0022fn-group-1\u0022\u003E\u003Ch2\u003EFootnotes\u003C\/h2\u003E\u003Cul\u003E\u003Cli class=\u0022fn-others\u0022 id=\u0022fn-1\u0022\u003E\u003Cp id=\u0022p-1\u0022\u003EContributing authors: \u003Cspan class=\u0022em-link\u0022\u003E\u003Cspan class=\u0022em-addr\u0022\u003Eandrea.bandini{at}santannapisa.it\u003C\/span\u003E\u003C\/span\u003E; \u003Cspan class=\u0022em-link\u0022\u003E\u003Cspan class=\u0022em-addr\u0022\u003Edaniele.sartiano{at}iit.cnr.it\u003C\/span\u003E\u003C\/span\u003E; \u003Cspan class=\u0022em-link\u0022\u003E\u003Cspan class=\u0022em-addr\u0022\u003Egiuseppe.vergara{at}santannapisa.it\u003C\/span\u003E\u003C\/span\u003E; \u003Cspan class=\u0022em-link\u0022\u003E\u003Cspan class=\u0022em-addr\u0022\u003Edalmiani{at}ftgm.it\u003C\/span\u003E\u003C\/span\u003E; \u003Cspan class=\u0022em-link\u0022\u003E\u003Cspan class=\u0022em-addr\u0022\u003Emichele.emdin{at}santannapisa.it\u003C\/span\u003E\u003C\/span\u003E; \u003Cspan class=\u0022em-link\u0022\u003E\u003Cspan class=\u0022em-addr\u0022\u003Esilvestro.micera{at}santannapisa.it\u003C\/span\u003E\u003C\/span\u003E; \u003Cspan class=\u0022em-link\u0022\u003E\u003Cspan class=\u0022em-addr\u0022\u003Ecalogero.oddo{at}santannapisa.it\u003C\/span\u003E\u003C\/span\u003E; \u003Cspan class=\u0022em-link\u0022\u003E\u003Cspan class=\u0022em-addr\u0022\u003Eclaudio.passino{at}santannapisa.it\u003C\/span\u003E\u003C\/span\u003E; \u003Cspan class=\u0022em-link\u0022\u003E\u003Cspan class=\u0022em-addr\u0022\u003Esara.moccia{at}unich.it\u003C\/span\u003E\u003C\/span\u003E\u003C\/p\u003E\u003C\/li\u003E\u003C\/ul\u003E\u003C\/div\u003E\u003Cdiv class=\u0022section data-availability\u0022 id=\u0022sec-24\u0022\u003E\u003Ch2 class=\u0022\u0022\u003EData Availability\u003C\/h2\u003E\u003Cp id=\u0022p-89\u0022\u003EThe data that support the findings of this study are not publicly available due to ethical and legal restrictions, but are available from the corresponding author upon reasonable request and with approval from the Ethics Committee.\u003C\/p\u003E\u003C\/div\u003E\u003Cspan class=\u0022highwire-journal-article-marker-end\u0022\u003E\u003C\/span\u003E\u003C\/div\u003E\u003Cspan class=\u0022related-urls\u0022\u003E\u003C\/span\u003E\u003C\/div\u003E\u003C\/div\u003E  \u003C\/div\u003E\n\n  \n  \u003C\/div\u003E\n\u003Cdiv class=\u0022panel-separator\u0022\u003E\u003C\/div\u003E\u003Cdiv class=\u0022panel-pane pane-biorxiv-copyright\u0022 \u003E\n  \n      \n  \n  \u003Cdiv class=\u0022pane-content\u0022\u003E\n    \u003Cdiv class=\u0022field field-name-field-highwire-copyright field-type-text field-label-inline clearfix\u0022\u003E\u003Cdiv class=\u0022field-label\u0022\u003ECopyright\u0026nbsp;\u003C\/div\u003E\u003Cdiv class=\u0022field-items\u0022\u003E\u003Cdiv class=\u0022field-item even\u0022\u003EThe copyright holder for this preprint is the author\/funder, who has granted medRxiv a license to display the preprint in perpetuity.\u003Cspan class=\u0022license-type\u0022\u003E It is made available under a \u003Ca href=\u0022http:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0\/\u0022 class=\u0022\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003ECC-BY-NC-ND 4.0 International license\u003C\/a\u003E.\u003C\/span\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/div\u003E  \u003C\/div\u003E\n\n  \n  \u003C\/div\u003E\n\u003C\/div\u003E\n  \u003C\/div\u003E\n\u003C\/div\u003E\n\u003C\/div\u003E\u003Cscript type=\u0022text\/javascript\u0022 src=\u0022https:\/\/www.medrxiv.org\/sites\/default\/files\/js\/js_SXHPyYQMndPSjH0oAPTy1xd0XLtmYCIziRIiNb0RJd8.js\u0022\u003E\u003C\/script\u003E\n\u003C\/body\u003E\u003C\/html\u003E"}