{"markup":"\u003C?xml version=\u00221.0\u0022 encoding=\u0022UTF-8\u0022 ?\u003E\n    \u003Chtml version=\u0022HTML+RDFa+MathML 1.1\u0022\n    xmlns:content=\u0022http:\/\/purl.org\/rss\/1.0\/modules\/content\/\u0022\n    xmlns:dc=\u0022http:\/\/purl.org\/dc\/terms\/\u0022\n    xmlns:foaf=\u0022http:\/\/xmlns.com\/foaf\/0.1\/\u0022\n    xmlns:og=\u0022http:\/\/ogp.me\/ns#\u0022\n    xmlns:rdfs=\u0022http:\/\/www.w3.org\/2000\/01\/rdf-schema#\u0022\n    xmlns:sioc=\u0022http:\/\/rdfs.org\/sioc\/ns#\u0022\n    xmlns:sioct=\u0022http:\/\/rdfs.org\/sioc\/types#\u0022\n    xmlns:skos=\u0022http:\/\/www.w3.org\/2004\/02\/skos\/core#\u0022\n    xmlns:xsd=\u0022http:\/\/www.w3.org\/2001\/XMLSchema#\u0022\n    xmlns:mml=\u0022http:\/\/www.w3.org\/1998\/Math\/MathML\u0022\u003E\n  \u003Chead\u003E\u003Cscript type=\u0022text\/javascript\u0022 src=\u0022\/\/cdn.jsdelivr.net\/qtip2\/2.2.1\/jquery.qtip.min.js\u0022\u003E\u003C\/script\u003E\n\u003Cscript type=\u0022text\/javascript\u0022 src=\u0022https:\/\/www.medrxiv.org\/sites\/default\/files\/js\/js_YjAJQgxDlFX6S-O02jj9jCrVbrwlY3CGgCg1FzPlvBs.js\u0022\u003E\u003C\/script\u003E\n\u003Cscript type=\u0022text\/javascript\u0022\u003E\n\u003C!--\/\/--\u003E\u003C![CDATA[\/\/\u003E\u003C!--\nif(typeof window.MathJax === \u0022undefined\u0022) window.MathJax = { menuSettings: { zoom: \u0022Click\u0022 } };\n\/\/--\u003E\u003C!]]\u003E\n\u003C\/script\u003E\n\u003Cscript type=\u0022text\/javascript\u0022 src=\u0022https:\/\/www.medrxiv.org\/sites\/default\/files\/js\/js_waP91NpgGpectm_6Y2XDEauLJ8WCSCBKmmA87unpp2E.js\u0022\u003E\u003C\/script\u003E\n\u003Cscript type=\u0022text\/javascript\u0022 src=\u0022https:\/\/www.googletagmanager.com\/gtag\/js?id=G-0K57TCX5BY\u0022\u003E\u003C\/script\u003E\n\u003Cscript type=\u0022text\/javascript\u0022\u003E\n\u003C!--\/\/--\u003E\u003C![CDATA[\/\/\u003E\u003C!--\nwindow.dataLayer = window.dataLayer || [];function gtag(){dataLayer.push(arguments)};gtag(\u0022js\u0022, new Date());gtag(\u0022set\u0022, \u0022developer_id.dMDhkMT\u0022, true);gtag(\u0022config\u0022, \u0022G-0K57TCX5BY\u0022, {\u0022groups\u0022:\u0022default\u0022,\u0022anonymize_ip\u0022:true});\n\/\/--\u003E\u003C!]]\u003E\n\u003C\/script\u003E\n\u003Cscript type=\u0022text\/javascript\u0022\u003E\n\u003C!--\/\/--\u003E\u003C![CDATA[\/\/\u003E\u003C!--\njQuery.extend(Drupal.settings, {\u0022basePath\u0022:\u0022\\\/\u0022,\u0022pathPrefix\u0022:\u0022\u0022,\u0022highwire\u0022:{\u0022ac\u0022:{\u0022medrxiv;2025.07.27.25332177v1\u0022:{\u0022access\u0022:{\u0022full\u0022:true},\u0022pisa_id\u0022:\u0022medrxiv;2025.07.27.25332177v1\u0022,\u0022apath\u0022:\u0022\u0022,\u0022jcode\u0022:\u0022medrxiv\u0022}},\u0022processed\u0022:[\u0022highwire_math\u0022],\u0022markup\u0022:[{\u0022requested\u0022:\u0022full-text\u0022,\u0022variant\u0022:\u0022full-text\u0022,\u0022view\u0022:\u0022full\u0022,\u0022pisa\u0022:\u0022medrxiv;2025.07.27.25332177v1\u0022}]},\u0022instances\u0022:\u0022{\\u0022highwire_abstract_tooltip\\u0022:{\\u0022content\\u0022:{\\u0022text\\u0022:\\u0022\\u0022},\\u0022style\\u0022:{\\u0022tip\\u0022:{\\u0022width\\u0022:20,\\u0022height\\u0022:20,\\u0022border\\u0022:1,\\u0022offset\\u0022:0,\\u0022corner\\u0022:true},\\u0022classes\\u0022:\\u0022qtip-custom hw-tooltip hw-abstract-tooltip qtip-shadow qtip-rounded\\u0022,\\u0022classes_custom\\u0022:\\u0022hw-tooltip hw-abstract-tooltip\\u0022},\\u0022position\\u0022:{\\u0022at\\u0022:\\u0022right center\\u0022,\\u0022my\\u0022:\\u0022left center\\u0022,\\u0022viewport\\u0022:true,\\u0022adjust\\u0022:{\\u0022method\\u0022:\\u0022shift\\u0022}},\\u0022show\\u0022:{\\u0022event\\u0022:\\u0022mouseenter click \\u0022,\\u0022solo\\u0022:true},\\u0022hide\\u0022:{\\u0022event\\u0022:\\u0022mouseleave \\u0022,\\u0022fixed\\u0022:1,\\u0022delay\\u0022:\\u0022100\\u0022}},\\u0022highwire_author_tooltip\\u0022:{\\u0022content\\u0022:{\\u0022text\\u0022:\\u0022\\u0022},\\u0022style\\u0022:{\\u0022tip\\u0022:{\\u0022width\\u0022:15,\\u0022height\\u0022:15,\\u0022border\\u0022:1,\\u0022offset\\u0022:0,\\u0022corner\\u0022:true},\\u0022classes\\u0022:\\u0022qtip-custom hw-tooltip hw-author-tooltip qtip-shadow qtip-rounded\\u0022,\\u0022classes_custom\\u0022:\\u0022hw-tooltip hw-author-tooltip\\u0022},\\u0022position\\u0022:{\\u0022at\\u0022:\\u0022top center\\u0022,\\u0022my\\u0022:\\u0022bottom center\\u0022,\\u0022viewport\\u0022:true,\\u0022adjust\\u0022:{\\u0022method\\u0022:\\u0022\\u0022}},\\u0022show\\u0022:{\\u0022event\\u0022:\\u0022mouseenter \\u0022,\\u0022solo\\u0022:true},\\u0022hide\\u0022:{\\u0022event\\u0022:\\u0022mouseleave \\u0022,\\u0022fixed\\u0022:1,\\u0022delay\\u0022:\\u0022100\\u0022}},\\u0022highwire_reflinks_tooltip\\u0022:{\\u0022content\\u0022:{\\u0022text\\u0022:\\u0022\\u0022},\\u0022style\\u0022:{\\u0022tip\\u0022:{\\u0022width\\u0022:15,\\u0022height\\u0022:15,\\u0022border\\u0022:1,\\u0022mimic\\u0022:\\u0022top center\\u0022,\\u0022offset\\u0022:0,\\u0022corner\\u0022:true},\\u0022classes\\u0022:\\u0022qtip-custom hw-tooltip hw-ref-link-tooltip qtip-shadow qtip-rounded\\u0022,\\u0022classes_custom\\u0022:\\u0022hw-tooltip hw-ref-link-tooltip\\u0022},\\u0022position\\u0022:{\\u0022at\\u0022:\\u0022bottom left\\u0022,\\u0022my\\u0022:\\u0022top left\\u0022,\\u0022viewport\\u0022:true,\\u0022adjust\\u0022:{\\u0022method\\u0022:\\u0022flip\\u0022}},\\u0022show\\u0022:{\\u0022event\\u0022:\\u0022mouseenter \\u0022,\\u0022solo\\u0022:true},\\u0022hide\\u0022:{\\u0022event\\u0022:\\u0022mouseleave \\u0022,\\u0022fixed\\u0022:1,\\u0022delay\\u0022:\\u0022100\\u0022}}}\u0022,\u0022qtipDebug\u0022:\u0022{\\u0022leaveElement\\u0022:0}\u0022,\u0022googleanalytics\u0022:{\u0022account\u0022:[\u0022G-0K57TCX5BY\u0022],\u0022trackOutbound\u0022:1,\u0022trackMailto\u0022:1,\u0022trackDownload\u0022:1,\u0022trackDownloadExtensions\u0022:\u00227z|aac|arc|arj|asf|asx|avi|bin|csv|doc(x|m)?|dot(x|m)?|exe|flv|gif|gz|gzip|hqx|jar|jpe?g|js|mp(2|3|4|e?g)|mov(ie)?|msi|msp|pdf|phps|png|ppt(x|m)?|pot(x|m)?|pps(x|m)?|ppam|sld(x|m)?|thmx|qtm?|ra(m|r)?|sea|sit|tar|tgz|torrent|txt|wav|wma|wmv|wpd|xls(x|m|b)?|xlt(x|m)|xlam|xml|z|zip\u0022,\u0022trackColorbox\u0022:1},\u0022ajaxPageState\u0022:{\u0022js\u0022:{\u0022\\\/\\\/cdn.jsdelivr.net\\\/qtip2\\\/2.2.1\\\/jquery.qtip.min.js\u0022:1,\u0022sites\\\/all\\\/modules\\\/highwire\\\/highwire\\\/plugins\\\/highwire_markup_process\\\/js\\\/highwire_article_reference_popup.js\u0022:1,\u0022sites\\\/all\\\/modules\\\/highwire\\\/highwire\\\/plugins\\\/highwire_markup_process\\\/js\\\/highwire_at_symbol.js\u0022:1,\u00220\u0022:1,\u0022sites\\\/all\\\/modules\\\/contrib\\\/google_analytics\\\/googleanalytics.js\u0022:1,\u0022https:\\\/\\\/www.googletagmanager.com\\\/gtag\\\/js?id=G-0K57TCX5BY\u0022:1,\u00221\u0022:1}}});\n\/\/--\u003E\u003C!]]\u003E\n\u003C\/script\u003E\n\u003Clink type=\u0022text\/css\u0022 rel=\u0022stylesheet\u0022 href=\u0022https:\/\/www.medrxiv.org\/sites\/default\/files\/advagg_css\/css__dn-cpI1YtkU_iLHgA5WhlkxgYWyat_IxjF_B-WSYrpE__a9hIbt0eaZ7d5nhwnm2weG8R_2eXK4EvoOx9dOxouHE__QrrGUc7CpljPR5Aph-ukPbcwtK4AWrHGwCEXJ_k1V_c.css\u0022 media=\u0022all\u0022 \/\u003E\n\u003Clink type=\u0022text\/css\u0022 rel=\u0022stylesheet\u0022 href=\u0022\/\/cdn.jsdelivr.net\/qtip2\/2.2.1\/jquery.qtip.min.css\u0022 media=\u0022all\u0022 \/\u003E\n\u003Clink type=\u0022text\/css\u0022 rel=\u0022stylesheet\u0022 href=\u0022https:\/\/www.medrxiv.org\/sites\/default\/files\/advagg_css\/css__HGACIFBlu2o05y3afvqlt5wrE_5Dn6MXsexfuEpeIwg__t4SOPxucAPoV3Os7g8dXqyMB1HRXQridRJ82X7nE33E__QrrGUc7CpljPR5Aph-ukPbcwtK4AWrHGwCEXJ_k1V_c.css\u0022 media=\u0022all\u0022 \/\u003E\n\u003Clink rel=\u0027stylesheet\u0027 type=\u0027text\/css\u0027 href=\u0027\/sites\/all\/modules\/contrib\/panels\/plugins\/layouts\/onecol\/onecol.css\u0027 \/\u003E\u003C\/head\u003E\u003Cbody\u003E\u003Cdiv class=\u0022panels-ajax-tab-panel panels-ajax-tab-panel-article-tab-full-text\u0022\u003E\u003Cdiv class=\u0022panel-display panel-1col clearfix\u0022 \u003E\n  \u003Cdiv class=\u0022panel-panel panel-col\u0022\u003E\n    \u003Cdiv\u003E\u003Cdiv class=\u0022panel-pane pane-highwire-markup\u0022 \u003E\n  \n      \n  \n  \u003Cdiv class=\u0022pane-content\u0022\u003E\n    \u003Cdiv class=\u0022highwire-markup\u0022\u003E\u003Cdiv xmlns=\u0022http:\/\/www.w3.org\/1999\/xhtml\u0022 data-highwire-cite-ref-tooltip-instance=\u0022highwire_reflinks_tooltip\u0022 class=\u0022content-block-markup\u0022 xmlns:xhtml=\u0022http:\/\/www.w3.org\/1999\/xhtml\u0022\u003E\u003Cdiv class=\u0022article fulltext-view \u0022\u003E\u003Cspan class=\u0022highwire-journal-article-marker-start\u0022\u003E\u003C\/span\u003E\u003Cdiv class=\u0022section abstract\u0022 id=\u0022abstract-1\u0022\u003E\u003Ch2 class=\u0022\u0022\u003EAbstract\u003C\/h2\u003E\u003Cdiv id=\u0022sec-1\u0022 class=\u0022subsection\u0022\u003E\u003Cp id=\u0022p-3\u0022\u003E\u003Cstrong\u003EBackground\u003C\/strong\u003E The privacy protection of medical patients has remained a critical concern in healthcare information management during the digital era. Conventional approaches have predominantly relied on rule-based protocols and data encryption systems, which typically require substantial involvement of IT professionals for implementation. Recent advancements in Large Language Models (LLMs) have introduced novel approaches for electronic medical records (EMRs) privacy protection, simultaneously enabling clinical practitioners to utilize these tools for specific data tasks.\u003C\/p\u003E\u003C\/div\u003E\u003Cdiv id=\u0022sec-2\u0022 class=\u0022subsection\u0022\u003E\u003Cp id=\u0022p-4\u0022\u003E\u003Cstrong\u003EObjectives\u003C\/strong\u003E This study aims to leverage LLMs through a no-code framework to achieve structured processing of patient privacy data in Chinese EMRs and formulate privacy policies, while evaluating the practical efficacy of LLMs.\u003C\/p\u003E\u003C\/div\u003E\u003Cdiv id=\u0022sec-3\u0022 class=\u0022subsection\u0022\u003E\u003Cp id=\u0022p-5\u0022\u003E\u003Cstrong\u003EMethods\u003C\/strong\u003E This study employs a disease-specific data subset from Peking Union Medical College Hospital (PUMCH), comprising data from approximately 160,000 patients, using a prompt engineering approach to enable LLMs to perform sensitive information annotation in lengthy EMR narratives. Simultaneously, it automates the classification of privacy-level for identified sensitive data and develops targeted protection strategies based on risk tiers, thereby mitigating non-essential exposure of patient privacy during data sharing. The research utilizes the Qwen model, with its entire workflow being exclusively driven by medical natural language prompts and self-evolving knowledge bases, requiring no supplementary programming or code development. These strategies were validated using the hospital\u2019s test text dataset, with primary evaluation metrics focusing on precision rates (including accuracy of information extraction and privacy-level classification) and recall rate assessments for critical sensitive data categories.\u003C\/p\u003E\u003C\/div\u003E\u003Cdiv id=\u0022sec-4\u0022 class=\u0022subsection\u0022\u003E\u003Cp id=\u0022p-6\u0022\u003E\u003Cstrong\u003EResults\u003C\/strong\u003E Utilizing 4 million text entries from PUMCH, we conducted sampled data observation and performed privacy annotation via LLM prompts across seven categories: names, addresses, contact details, national ID numbers, hospital names, sexually transmitted disease (STD) information, and pregnancy-related patient data. Through iterative prompt refinement via error analysis, optimal performance was achieved on the test set, demonstrating an average precision of 97% and recall of 95% across these seven entity types. Furthermore, sensitivity tier classification was implemented for three high-risk categories: addresses, STD information, and pregnancy-related data, attaining average precision of 95% and recall of 90% in sensitivity-level determination.\u003C\/p\u003E\u003C\/div\u003E\u003Cdiv id=\u0022sec-5\u0022 class=\u0022subsection\u0022\u003E\u003Cp id=\u0022p-7\u0022\u003E\u003Cstrong\u003EDiscussion\u003C\/strong\u003E We propose a novel codeless privacy protection framework leveraging LLMs, enabling intelligent anonymization of medical data through natural language interaction. This solution employs a three-tiered hierarchical protection mechanism that dynamically adapts privacy strategies to clinical scenario requirements, ensuring data security while maximizing data utility.\u003C\/p\u003E\u003C\/div\u003E\u003C\/div\u003E\u003Cdiv class=\u0022section\u0022 id=\u0022sec-6\u0022\u003E\u003Ch2 class=\u0022\u0022\u003EIntroduction\u003C\/h2\u003E\u003Cp id=\u0022p-20\u0022\u003EThe evolution of electronic medical records (EMRs) has transformed how healthcare institutions store, access, and share patient data. Modern EMR systems integrate clinical information in digitized, standardized formats, enhancing data integrity and usability [\u003Ca id=\u0022xref-ref-1-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-1\u0022\u003E1\u003C\/a\u003E]. Concurrently, this advancement enables cross-institutional data sharing, which has become a critical driver for leveraging real-world evidence to complement clinical trial outcomes and healthcare capabilities across institutions, regions, and even nations [\u003Ca id=\u0022xref-ref-2-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-2\u0022\u003E2\u003C\/a\u003E]. Multi-center data utilization accelerates insights into disease patterns and treatment efficacy while facilitating longitudinal analyses of patient trends across demographics, thereby advancing epidemiological research, risk prediction, and personalized care[\u003Ca id=\u0022xref-ref-3-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-3\u0022\u003E3\u003C\/a\u003E].Advancing precision medicine and population health research.\u003C\/p\u003E\u003Cp id=\u0022p-21\u0022\u003EHowever, patient privacy breaches during data sharing remain a pivotal challenge. Inadequate protection of personally identifiable information (PII) and sensitive health data\u2014including diagnoses and treatment for critical conditions\u2014can severely impact patients. Unauthorized access may lead to social stigmatization, employment or insurance discrimination, and significant emotional distress. Furthermore, such breaches erode public trust in healthcare systems, deterring patients from sharing crucial data in the future, ultimately jeopardize both individual well-being care and public health initiatives[\u003Ca id=\u0022xref-ref-4-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-4\u0022\u003E4\u003C\/a\u003E,\u003Ca id=\u0022xref-ref-5-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-5\u0022\u003E5\u003C\/a\u003E].\u003C\/p\u003E\u003Cp id=\u0022p-22\u0022\u003EData anonymization\u2014a process involving the modification or \u201cmasking\u201d of sensitive fields to reduce identifiability while preserving clinical utility\u2014represents an underexplored yet promising approach. While many studies focus on heavy-duty techniques like encryption, access control, federated learning, and differential privacy, anonymization enables finer-grained control over what patient information is shared and how. By precisely targeting specific data types and scopes, healthcare organizations can maximize data sharing for research and operational purposes while minimizing privacy risks [\u003Ca id=\u0022xref-ref-6-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-6\u0022\u003E6\u003C\/a\u003E].\u003C\/p\u003E\u003Cp id=\u0022p-23\u0022\u003ETraditional anonymization typically relies on rudimentary models that first identify sensitive entities and then mask or transform them. In practice, this often involves entity recognition using regular expression-based methods or basic machine learning techniques applied to patient data. Once extracted, sensitive entities undergo systematic anonymization to mitigate re-identification risks while retaining analytical utility. These conventional approaches remain popular due to their simplicity, though they may lack the contextual nuance captured by advanced methods. Nevertheless, they remain essential components of many data-sharing workflows for ensuring patient privacy [\u003Ca id=\u0022xref-ref-7-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-7\u0022\u003E7\u003C\/a\u003E,\u003Ca id=\u0022xref-ref-8-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-8\u0022\u003E8\u003C\/a\u003E].\u003C\/p\u003E\u003Cp id=\u0022p-24\u0022\u003EOur study leverages large language models (LLMs) to advance data anonymization. LLMs offer a transformative alternative to traditional methods. Unlike rule-based regex or conventional machine learning models, LLMs harness advanced transformer architectures to better capture contextual subtleties in clinical narratives [\u003Ca id=\u0022xref-ref-9-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-9\u0022\u003E9\u003C\/a\u003E]. This enhanced contextual understanding improves identification accuracy for sensitive entities embedded in unstructured text, such as patient names, birthdates, and other identifiers. Crucially, through natural language prompting, clinical researchers can directly guide the anonymization process. This user-friendly approach eliminates dependency on technical experts, democratizes access to sophisticated anonymization tools, and enables more efficient, accurate patient data sharing without compromising privacy [\u003Ca id=\u0022xref-ref-10-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-10\u0022\u003E10\u003C\/a\u003E].\u003C\/p\u003E\u003Cp id=\u0022p-25\u0022\u003EThe application of LLMs in medicine marks a significant advancement, particularly in empowering clinicians to perform natural language processing (NLP) tasks\u2014such as data anonymization, information extraction, and summarization\u2014that previously required specialized expertise. By interacting with LLMs via natural language prompts, clinicians can now execute these tasks, making advanced NLP capabilities widely accessible.\u003C\/p\u003E\u003Cp id=\u0022p-26\u0022\u003EOur goal is to utilize LLMs combined with prompt engineering and Retrieval-Augmented Generation (RAG) to achieve structured extraction of patient privacy data from Chinese EMRs and formulate corresponding protection strategies, while systematically evaluating model performance throughout this process.\u003C\/p\u003E\u003C\/div\u003E\u003Cdiv class=\u0022section\u0022 id=\u0022sec-7\u0022\u003E\u003Ch2 class=\u0022\u0022\u003EMethods\u003C\/h2\u003E\u003Cdiv id=\u0022sec-8\u0022 class=\u0022subsection\u0022\u003E\u003Ch3\u003EData Source\u003C\/h3\u003E\u003Cp id=\u0022p-27\u0022\u003EThis study is based on a multicenter clinical research infrastructure developed through PUMCH\u2019s disease-specific data platform (\u003Ca id=\u0022xref-fig-1-1\u0022 class=\u0022xref-fig\u0022 href=\u0022#F1\u0022\u003EFigure 1\u003C\/a\u003E). The system rigorously adheres to the Observational Medical Outcomes Partnership (OMOP) Common Data Model (CDM) standard, integrating comprehensive, multidimensional clinical data from 160,000 patients with selected diseases collected between 2012 and 2025. At the data architecture level, structured data undergo standardized mapping via OMOP-CDM specifications, while unstructured textual data are uniformly stored in a NOTE table containing 13 categories of core clinical documentation: outpatient\/emergency records, ward round notes, discharge summaries, admission notes, initial progress note, 24-hour admission\/discharge records, preoperative discussion notes, surgical reports, initial postoperative progress note, specialty-specific notes, periodic summaries, resuscitation records, and death certificates.\u003C\/p\u003E\u003Cdiv id=\u0022F1\u0022 class=\u0022fig pos-float type-figure  odd\u0022\u003E\u003Cdiv class=\u0022highwire-figure\u0022\u003E\u003Cdiv class=\u0022fig-inline-img-wrapper\u0022\u003E\u003Cdiv class=\u0022fig-inline-img\u0022\u003E\u003Ca href=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2025\/07\/28\/2025.07.27.25332177\/F1.large.jpg?width=800\u0026amp;height=600\u0026amp;carousel=1\u0022 title=\u0022Structure of PUMCH\u0026#x2019;s disease-specific data platform. CDM structure suitable for Chinese EMRs under the OMOP system.\u0022 class=\u0022highwire-fragment fragment-images colorbox-load\u0022 rel=\u0022gallery-fragment-images-2022453215\u0022 data-figure-caption=\u0022\u0026lt;div class=\u0026quot;highwire-markup\u0026quot;\u0026gt;Structure of PUMCH\u0026#x2019;s disease-specific data platform. CDM structure suitable for Chinese EMRs under the OMOP system.\u0026lt;\/div\u0026gt;\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003E\u003Cspan class=\u0022hw-responsive-img\u0022\u003E\u003Cimg class=\u0022highwire-fragment fragment-image lazyload\u0022 alt=\u0022Figure 1.\u0022 src=\u0022data:image\/gif;base64,R0lGODlhAQABAIAAAAAAAP\/\/\/yH5BAEAAAAALAAAAAABAAEAAAIBRAA7\u0022 data-src=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2025\/07\/28\/2025.07.27.25332177\/F1.medium.gif\u0022 width=\u0022440\u0022 height=\u0022284\u0022\/\u003E\u003Cnoscript\u003E\u003Cimg class=\u0022highwire-fragment fragment-image\u0022 alt=\u0022Figure 1.\u0022 src=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2025\/07\/28\/2025.07.27.25332177\/F1.medium.gif\u0022 width=\u0022440\u0022 height=\u0022284\u0022\/\u003E\u003C\/noscript\u003E\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003Cul class=\u0022highwire-figure-links inline\u0022\u003E\u003Cli class=\u0022download-fig first\u0022\u003E\u003Ca href=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2025\/07\/28\/2025.07.27.25332177\/F1.large.jpg?download=true\u0022 class=\u0022highwire-figure-link highwire-figure-link-download\u0022 title=\u0022Download Figure 1.\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003EDownload figure\u003C\/a\u003E\u003C\/li\u003E\u003Cli class=\u0022new-tab last\u0022\u003E\u003Ca href=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2025\/07\/28\/2025.07.27.25332177\/F1.large.jpg\u0022 class=\u0022highwire-figure-link highwire-figure-link-newtab\u0022 target=\u0022_blank\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003EOpen in new tab\u003C\/a\u003E\u003C\/li\u003E\u003C\/ul\u003E\u003C\/div\u003E\u003Cdiv class=\u0022fig-caption\u0022 xmlns:xhtml=\u0022http:\/\/www.w3.org\/1999\/xhtml\u0022\u003E\u003Cspan class=\u0022fig-label\u0022\u003EFigure 1.\u003C\/span\u003E \u003Cp id=\u0022p-28\u0022 class=\u0022first-child\u0022\u003EStructure of PUMCH\u2019s disease-specific data platform. CDM structure suitable for Chinese EMRs under the OMOP system.\u003C\/p\u003E\u003Cdiv class=\u0022sb-div caption-clear\u0022\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/div\u003E\u003Cp id=\u0022p-29\u0022\u003EFor structured data, field-specific anonymization is achieved through predefined table schemas. Consequently, sensitive information extraction and protection focus exclusively on unstructured textual fields within Chinese EMRs\u2014primarily the aforementioned 13 documentation types stored in the NOTE master table.\u003C\/p\u003E\u003Cp id=\u0022p-30\u0022\u003EFrom 4 million raw textual records, we randomly sampled 10,000 entries: 8,000 for training\/validation and 2,000 for testing, enabling dual validation of information extraction accuracy and privacy strategy effectiveness.\u003C\/p\u003E\u003C\/div\u003E\u003Cdiv id=\u0022sec-9\u0022 class=\u0022subsection\u0022\u003E\u003Ch3\u003ELLM Tools\u003C\/h3\u003E\u003Cp id=\u0022p-31\u0022\u003EThe present study focuses on two primary objectives: first, identifying and marking patient-sensitive information within given long-form texts, and second, categorizing the identified sensitive information by risk severity levels. We have selected the Qwen2.5-72B-Instruct model [\u003Ca id=\u0022xref-ref-11-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-11\u0022\u003E11\u003C\/a\u003E] as our foundational architecture - a 72-billion-parameter open-source LLM developed by Alibaba Cloud. This model excels in deep comprehension of multi-step, cross-modal complex instructions, enhanced by ultra-long context processing and RLHF optimization techniques, making it particularly suitable for accurately parsing ambiguous requirements and generating structured outputs that align with our operational goals.The \u003Ca id=\u0022xref-fig-2-1\u0022 class=\u0022xref-fig\u0022 href=\u0022#F2\u0022\u003Efigure 2\u003C\/a\u003E below shows our research roadmap.\u003C\/p\u003E\u003Cdiv id=\u0022F2\u0022 class=\u0022fig pos-float type-figure  odd\u0022\u003E\u003Cdiv class=\u0022highwire-figure\u0022\u003E\u003Cdiv class=\u0022fig-inline-img-wrapper\u0022\u003E\u003Cdiv class=\u0022fig-inline-img\u0022\u003E\u003Ca href=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2025\/07\/28\/2025.07.27.25332177\/F2.large.jpg?width=800\u0026amp;height=600\u0026amp;carousel=1\u0022 title=\u0022Research roadmap. The research roadmap illustrates three key steps: data preparation, large model processing and API application.\u0022 class=\u0022highwire-fragment fragment-images colorbox-load\u0022 rel=\u0022gallery-fragment-images-2022453215\u0022 data-figure-caption=\u0022\u0026lt;div class=\u0026quot;highwire-markup\u0026quot;\u0026gt;Research roadmap. The research roadmap illustrates three key steps: data preparation, large model processing and API application.\u0026lt;\/div\u0026gt;\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003E\u003Cspan class=\u0022hw-responsive-img\u0022\u003E\u003Cimg class=\u0022highwire-fragment fragment-image lazyload\u0022 alt=\u0022Figure 2.\u0022 src=\u0022data:image\/gif;base64,R0lGODlhAQABAIAAAAAAAP\/\/\/yH5BAEAAAAALAAAAAABAAEAAAIBRAA7\u0022 data-src=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2025\/07\/28\/2025.07.27.25332177\/F2.medium.gif\u0022 width=\u0022440\u0022 height=\u0022178\u0022\/\u003E\u003Cnoscript\u003E\u003Cimg class=\u0022highwire-fragment fragment-image\u0022 alt=\u0022Figure 2.\u0022 src=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2025\/07\/28\/2025.07.27.25332177\/F2.medium.gif\u0022 width=\u0022440\u0022 height=\u0022178\u0022\/\u003E\u003C\/noscript\u003E\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003Cul class=\u0022highwire-figure-links inline\u0022\u003E\u003Cli class=\u0022download-fig first\u0022\u003E\u003Ca href=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2025\/07\/28\/2025.07.27.25332177\/F2.large.jpg?download=true\u0022 class=\u0022highwire-figure-link highwire-figure-link-download\u0022 title=\u0022Download Figure 2.\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003EDownload figure\u003C\/a\u003E\u003C\/li\u003E\u003Cli class=\u0022new-tab last\u0022\u003E\u003Ca href=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2025\/07\/28\/2025.07.27.25332177\/F2.large.jpg\u0022 class=\u0022highwire-figure-link highwire-figure-link-newtab\u0022 target=\u0022_blank\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003EOpen in new tab\u003C\/a\u003E\u003C\/li\u003E\u003C\/ul\u003E\u003C\/div\u003E\u003Cdiv class=\u0022fig-caption\u0022\u003E\u003Cspan class=\u0022fig-label\u0022\u003EFigure 2.\u003C\/span\u003E \u003Cp id=\u0022p-32\u0022 class=\u0022first-child\u0022\u003EResearch roadmap. The research roadmap illustrates three key steps: data preparation, large model processing and API application.\u003C\/p\u003E\u003Cdiv class=\u0022sb-div caption-clear\u0022\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/div\u003E\u003Cp id=\u0022p-33\u0022\u003ENotably, this research intentionally avoids engineering techniques like model fine-tuning or knowledge distillation, instead accomplishing its objectives solely through Prompt engineering and Retrieval-Augmented Generation (RAG). In LLM systems, Prompts serve as natural language instructions that guide model responses by decoding core human intent and contextual understanding. Their key strength lies in enabling flexible natural language interactions without requiring specialized formatting or programming syntax, thereby directly translating complex semantic content into contextually appropriate outputs. RAG enhances output authenticity and reliability through a two-phase process: first retrieving high-quality information from external knowledge bases, then generating answers based on verified data. This approach leverages dynamic retrieval mechanisms to anchor responses in up-to-date or domain-specific knowledge, effectively mitigating model hallucinations while improving result verifiability and accuracy.\u003C\/p\u003E\u003Cp id=\u0022p-34\u0022\u003EOur Prompt implementation employs specially formatted natural language instructions, while RAG operations are facilitated through the open-source bisheng platform [\u003Ca id=\u0022xref-ref-12-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-12\u0022\u003E12\u003C\/a\u003E]. Bisheng provides a comprehensive LLM application development environment featuring modular tools for knowledge base management, agent orchestration, and skill reuse. Its visual interface streamlines model deployment workflows, with core capabilities centered on integrating RAG technology with dynamic knowledge repositories to significantly enhance response precision. The knowledge base employs the BGE-large-zh-v1.5 embedding model [\u003Ca id=\u0022xref-ref-13-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-13\u0022\u003E13\u003C\/a\u003E] from Beijing Academy of Artificial Intelligence, optimized specifically for Chinese semantic similarity calculations and retrieval tasks. Through bisheng\u2019s visual interface, we combine natural language Prompts with our proprietary repository of sensitive information patterns to enable no-code LLM application development - the foundational approach for all subsequent research activities in this project.\u003C\/p\u003E\u003C\/div\u003E\u003Cdiv id=\u0022sec-10\u0022 class=\u0022subsection\u0022\u003E\u003Ch3\u003EMarking of Sensitive Information\u003C\/h3\u003E\u003Cp id=\u0022p-35\u0022\u003EOur primary task is to leverage large language models to identify and label sensitive information within EMRs text entries. Based on our previous research, the desensitized information identified in this task is mainly divided into two major categories encompassing seven entity types. The first category pertains to basic privacy information, including names, national ID numbers, contact information, residential addresses, and healthcare facility names. This type of information is not distinguished between patients and medical staff, meaning that the relevant information of physicians will also be recognized. The second category relates to patients\u2019 medical sensitive information, including pregnancy and childbirth-related information[\u003Ca id=\u0022xref-ref-14-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-14\u0022\u003E14\u003C\/a\u003E], as well as information related to STDs[\u003Ca id=\u0022xref-ref-15-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-15\u0022\u003E15\u003C\/a\u003E]. The labeling of sensitive information primarily relies on prompt engineering techniques.\u003C\/p\u003E\u003Cp id=\u0022p-36\u0022\u003EAccording to the application domains, prompt engineering can be classified into twelve major categories[\u003Ca id=\u0022xref-ref-16-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-16\u0022\u003E16\u003C\/a\u003E]. For our task, we mainly utilize capabilities such as New Tasks Without Extensive Training, Reasoning and Logic, and Reduce Hallucination. For names, ID numbers, contact information, and addresses, we adopt the Few-shot Prompting approach[\u003Ca id=\u0022xref-ref-17-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-17\u0022\u003E17\u003C\/a\u003E]. For hospital names, pregnancy-related privacy information, and STD-related information, we employ RAG[\u003Ca id=\u0022xref-ref-18-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-18\u0022\u003E18\u003C\/a\u003E]. Finally, we use Chain-of-Thought (CoT) Prompting[\u003Ca id=\u0022xref-ref-19-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-19\u0022\u003E19\u003C\/a\u003E] to complete all entity labeling tasks in a single prompt.\u003C\/p\u003E\u003Cp id=\u0022p-37\u0022\u003EWe begin by individually labeling each entity category for sensitive information and iteratively refine the prompts based on results generated from real training data until the optimal performance is achieved on the validation set. Once optimal prompts for all seven entity types have been developed, we then merge them into a single prompt using CoT prompting to enable simultaneous labeling of all sensitive information within a text.\u003C\/p\u003E\u003Cp id=\u0022p-38\u0022\u003EFor the four entity types \u2014 names, ID numbers, contact information, and addresses \u2014 we adopt Few-shot Prompting. This technique guides the large model to accurately identify target entities by designing concise prompt templates and incorporating a few annotated examples. Chinese names, ID numbers, contact information, and addresses generally follow fixed patterns (e.g., 18-digit combinations for ID numbers, administrative divisions in addresses, and numerical patterns for phone numbers), enabling the large model to perform high-precision entity extraction with low complexity.\u003C\/p\u003E\u003Cp id=\u0022p-39\u0022\u003EFor the hospital name entity, we primarily utilize the RAG method. The core advantage of RAG lies in its knowledge augmentation and disambiguation capabilities. By retrieving external medical knowledge bases (e.g., hospital directories and standardized institutional codes) and combining them with model inference, it can accurately identify implicit variants of hospital names in text (such as abbreviations, aliases, or non-standard expressions), while also dynamically verifying the validity of the entities (e.g., matching with official registration information), effectively mitigating the issue of model hallucination. Additionally, the knowledge base can be updated in real time to accommodate newly established institutions, significantly improving the accuracy and reliability of entity extraction.\u003C\/p\u003E\u003Cp id=\u0022p-40\u0022\u003EFor pregnancy-related privacy information and STD-related information, we also employ the RAG approach. Our previous work has already accumulated partial corpora for these two categories. Furthermore, we can directly construct a knowledge base using common Chinese ICD-9 and ICD-10 disease descriptions. RAG separates the task into two modules: Retrieval and Generation. The retrieval module, using a lightweight model, quickly matches pregnancy and STD-related structured data in the knowledge base, while the generation module focuses on recognition and generation based on the retrieval results. This modular design allows for the independent optimization of each component, ensuring high-accuracy medical entity extraction while reducing computational load, making it more suitable for deployment in privacy-sensitive scenarios.\u003C\/p\u003E\u003Cp id=\u0022p-41\u0022\u003EFor all seven entity types, we define output formats within the prompts. The large model labels directly on the original text using \u2018\u0026lt;\u0026lt;\u2019 and \u2018\u0026gt;\u0026gt;\u2019 to mark positions, appending the corresponding entity type, and returns the labeled text without any unrelated output. If no sensitive information is detected, the model simply returns the original text. For example, \u201c\u60a3\u8005\u25aa\u4e09\u2026\u2026\u201d would return \u201c\u60a3\u8005\u0026lt;\u0026lt;\u25aa\u4e09: \u59d3\u540d\u0026gt;\u0026gt;\u2026\u2026\u201d, while \u201c\u60a3\u8005\u6027\u25aa\u5973\u2026\u2026\u201d would return \u201c\u60a3\u8005\u6027\u25aa\u5973\u2026\u2026\u201d.\u003C\/p\u003E\u003Cp id=\u0022p-42\u0022\u003EAfter achieving satisfactory extraction performance for all seven entity types on the training set using the individual prompts, we consolidate them into a single unified prompt using CoT prompting to enable simultaneous extraction. This unified prompt is further refined using the validation set to obtain optimal results. The final prompt is then evaluated on the test set by calculating precision and recall.\u003C\/p\u003E\u003C\/div\u003E\u003Cdiv id=\u0022sec-11\u0022 class=\u0022subsection\u0022\u003E\u003Ch3\u003EProtection of Sensitive Information\u003C\/h3\u003E\u003Cp id=\u0022p-43\u0022\u003EThrough the aforementioned steps, we have successfully extract sensitive information from the text. Given the importance of sensitive information, our priority is to maximize the recall rate of extraction in order to reduce omissions. However, this approach may affect the usability of the desensitized data, as it may result in the identification of data that is not particularly sensitive but is important for subsequent research purposes. Therefore, we further assess the extracted sensitive information to determine its sensitivity level, and then perform desensitization according to different sensitivity levels based on the data usage requirements of the subsequent research.\u003C\/p\u003E\u003Cp id=\u0022p-44\u0022\u003EThe assessment of sensitivity levels is not applied to all the extracted entity types. For the four types of entities\u2014name, ID number, contact information, and hospital name\u2014the sensitivity level is uniformly set to the highest level, and no further classification is required. For address entities, two levels are defined: Level 1 includes specific address information, such as street names, residential community names, or door numbers; Level 2 includes only information up to the district or county level, such as \u201cHaidian District, Beijing.\u201d For sexually transmitted disease (STD) information and pregnancy-related privacy information, three privacy levels are defined: Level 1 is highly sensitive, including information that could lead to serious social discrimination if disclosed, such as HIV\/syphilis infection or miscarriage records; Level 2 is moderately sensitive, which may affect social perception if disclosed, such as mycoplasma\/herpes infections or pregnancy-related surgical records; Level 3 is mildly sensitive, containing indirect descriptions of a patient\u2019s condition, such as hepatitis B or C infections, or marital and childbearing status.\u003C\/p\u003E\u003Cp id=\u0022p-45\u0022\u003EAs seen from the definitions above, our classification of privacy levels adopts a qualitative approach with no clearly defined boundaries, which differs from traditional methods that rely on specific and explicit definitions. Here, we aim to leverage the large model\u2019s inherent comprehension ability to perform this task through natural language descriptions, and evaluate the model\u2019s performance in such tasks, which are common across various aspects of the healthcare domain.\u003C\/p\u003E\u003Cp id=\u0022p-46\u0022\u003EFor the sensitivity classification of location, STD information, and pregnancy-related privacy information, we follow the same methodology as the entity extraction task. That is, we first optimize the classification for each entity type on the training set, then combine the tasks and revise them through a second round on the validation set, and finally calculate the micro-average score on the test set.\u003C\/p\u003E\u003C\/div\u003E\u003C\/div\u003E\u003Cdiv class=\u0022section\u0022 id=\u0022sec-12\u0022\u003E\u003Ch2 class=\u0022\u0022\u003EResults\u003C\/h2\u003E\u003Cdiv id=\u0022sec-13\u0022 class=\u0022subsection\u0022\u003E\u003Ch3\u003EUse of LLM\u003C\/h3\u003E\u003Cp id=\u0022p-47\u0022\u003EAll model development tasks in this study were carried out on the Bisheng platform and mainly included the following three types of tasks:\u003C\/p\u003E\u003Cp id=\u0022p-48\u0022\u003EKnowledge Base Construction: We uploaded three types of dictionaries to the platform\u2014hospital name lists, sexually transmitted disease lists, and pregnancy-related privacy description lists. The platform automatically parsed and segmented them to form structured knowledge bases.\u003C\/p\u003E\u003Cp id=\u0022p-49\u0022\u003EPrompt Construction: For all large model tasks, prompts were designed and then uniformly categorized into two types of prompts.\u003C\/p\u003E\u003Cp id=\u0022p-50\u0022\u003EWorkflow Development: Two separate workflows were established, one for information tagging and the other for sensitive information classification. These workflows support end-to-end processing from raw text to final results. Additionally, the platform encapsulated the entire process into APIs, allowing real-world environments to directly access the processing via API calls.\u003C\/p\u003E\u003C\/div\u003E\u003Cdiv id=\u0022sec-14\u0022 class=\u0022subsection\u0022\u003E\u003Ch3\u003ESensitive Information Tagging Results\u003C\/h3\u003E\u003Cp id=\u0022p-51\u0022\u003E\u003Ca id=\u0022xref-table-wrap-1-1\u0022 class=\u0022xref-table\u0022 href=\u0022#T1\u0022\u003ETable 1\u003C\/a\u003E illustrates the precision and recall performance for the seven types of entities on the test set. Compared with early neural network-based NER systems, achieving over 95% precision and over 90% recall across the seven entity types represents a substantial advancement. For instance, in earlier sequence labeling models, the LSTM-CNN method by Chiu and Nichols (2016) reported an F1 score of only 86.17% [\u003Ca id=\u0022xref-ref-20-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-20\u0022\u003E20\u003C\/a\u003E]. This result fully demonstrates the reliability of using large model techniques for entity recognition tasks.\u003C\/p\u003E\u003Cdiv id=\u0022T1\u0022 class=\u0022table pos-float\u0022\u003E\u003Cdiv class=\u0022table-inline table-callout-links\u0022\u003E\u003Cdiv class=\u0022callout\u0022\u003E\u003Cspan\u003EView this table:\u003C\/span\u003E\u003Cul class=\u0022callout-links\u0022\u003E\u003Cli class=\u0022view-inline first\u0022\u003E\u003Ca href=\u0022\u0022 class=\u0022table-expand-inline\u0022 data-table-url=\u0022\/highwire\/markup\/1036217\/expansion?postprocessors=highwire_tables%2Chighwire_reclass%2Chighwire_figures%2Chighwire_math%2Chighwire_inline_linked_media%2Chighwire_embed\u0026amp;table-expand-inline=1\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003EView inline\u003C\/a\u003E\u003C\/li\u003E\u003Cli class=\u0022view-popup\u0022\u003E\u003Ca href=\u0022\/highwire\/markup\/1036217\/expansion?width=1000\u0026amp;height=500\u0026amp;iframe=true\u0026amp;postprocessors=highwire_tables%2Chighwire_reclass%2Chighwire_figures%2Chighwire_math%2Chighwire_inline_linked_media%2Chighwire_embed\u0022 class=\u0022colorbox colorbox-load table-expand-popup\u0022 rel=\u0022gallery-fragment-tables\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003EView popup\u003C\/a\u003E\u003C\/li\u003E\u003Cli class=\u0022download-ppt last\u0022\u003E\u003Ca href=\u0022\/highwire\/powerpoint\/1036217\u0022 class=\u0022highwire-figure-link highwire-figure-link-ppt\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003EDownload powerpoint\u003C\/a\u003E\u003C\/li\u003E\u003C\/ul\u003E\u003C\/div\u003E\u003C\/div\u003E\u003Cdiv class=\u0022table-caption\u0022\u003E\u003Cspan class=\u0022table-label\u0022\u003ETable 1.\u003C\/span\u003E \u003Cspan class=\u0022caption-title\u0022\u003EPrecision and Recall for Tagging Seven Types of Entities on the Test Set.\u003C\/span\u003E\u003Cdiv class=\u0022sb-div caption-clear\u0022\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/div\u003E\u003Cp id=\u0022p-52\u0022\u003ENotably, LLMs exhibit significantly superior performance on basic information entities than on medical information entities. Large language models are typically more adept at extracting basic entities such as names and addresses because their pretraining corpora (e.g., large-scale crawled web texts and books) contain a wealth of such examples. In contrast, specialized medical terms\u2014such as those related to sensitive diseases\u2014occur far less frequently, resulting in weaker representation capabilities and vocabulary gaps [\u003Ca id=\u0022xref-ref-21-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-21\u0022\u003E21\u003C\/a\u003E].\u003C\/p\u003E\u003Cp id=\u0022p-53\u0022\u003EFurthermore, despite supplementary dictionaries via the RAG approach, this still cannot fully address the limitations in representational capacity. Additionally, medical entities often follow complex naming conventions, with abundant synonyms and abbreviations (e.g., HPV for human papillomavirus), and accurate disambiguation requires precise clinical context. General-domain LLMs, if not fine-tuned for specific fields, often misidentify or overgeneralize these terms [\u003Ca id=\u0022xref-ref-22-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-22\u0022\u003E22\u003C\/a\u003E].\u003C\/p\u003E\u003C\/div\u003E\u003Cdiv id=\u0022sec-15\u0022 class=\u0022subsection\u0022\u003E\u003Ch3\u003EPrompt for Sensitive Information Tagging\u003C\/h3\u003E\u003Cp id=\u0022p-54\u0022\u003ETo effectively identify sensitive entities\u2014specifically hospital names, patient pregnancy-related privacy information, and patient sexually transmitted disease (STD) information\u2014we constructed dedicated retrieval dictionaries for each category, which were used to build corresponding vector databases. Full dictionary entries are provided in Appendix 1.\u003C\/p\u003E\u003Cp id=\u0022p-55\u0022\u003EThe hospital name dictionary includes full names, abbreviations, and acronyms (in both Chinese and English) for the majority of hospitals across China.\u003C\/p\u003E\u003Cp id=\u0022p-56\u0022\u003EThe pregnancy-related privacy dictionary comprises descriptive phrases concerning obstetric history, pregnancy-related clinical notes, and fetal condition records.\u003C\/p\u003E\u003Cp id=\u0022p-57\u0022\u003EThe STD-related dictionary contains a list of sensitive diseases with both English and Chinese abbreviations.\u003C\/p\u003E\u003Cp id=\u0022p-58\u0022\u003EFor the complete set of seven entity types, we designed a unified Prompt leveraging a Chain-of-Thought (CoT) reasoning approach. This Prompt guides the model to label entities step-by-step based on their category within the original text and outputs the annotated result in a fixed return format, which ensures the consistency and standardization required for downstream processing.\u003C\/p\u003E\u003Cp id=\u0022p-59\u0022\u003EWe further enhanced model performance by using Few-shot Prompting, providing several clear in-context examples to help the large language model better understand the task and improve recognition accuracy. The complete prompt text can be found in Appendix 2.\u003C\/p\u003E\u003Cp id=\u0022p-60\u0022\u003EBased on the above dictionaries and prompt structure, we utilized the Bisheng platform\u2019s built-in workflow template to construct an automated sensitive data tagging pipeline. This pipeline supports export in JSON format and also offers a packaged API, enabling seamless integration with training and validation workflows. The complete data pipeline JSON file is included in Appendix 3.\u003C\/p\u003E\u003C\/div\u003E\u003Cdiv id=\u0022sec-16\u0022 class=\u0022subsection\u0022\u003E\u003Ch3\u003ESensitive Information Sensitivity Level Classification Results\u003C\/h3\u003E\u003Cp id=\u0022p-61\u0022\u003EAfter labeling the seven categories of entities, we performed a sensitivity level classification task based on the annotated text. The classification schema is defined as follows:\u003C\/p\u003E\u003Cp id=\u0022p-62\u0022\u003EFor names, ID numbers, contact information, and hospital names, the sensitivity level is uniformly set to Level 1 (Highly Sensitive). No further classification is required.\u003C\/p\u003E\u003Cp id=\u0022p-63\u0022\u003EFor addresses, we define two levels:\u003C\/p\u003E\u003Cp id=\u0022p-64\u0022\u003ELevel 1 (Highly Sensitive): Information that includes specific details such as street names, residential areas, or house numbers.\u003C\/p\u003E\u003Cp id=\u0022p-65\u0022\u003ELevel 2 (Moderately Sensitive): Information limited to county or district-level regions, such as \u201cHaidian District, Beijing\u201d.\u003C\/p\u003E\u003Cp id=\u0022p-66\u0022\u003EFor sexually transmitted disease (STD) information and pregnancy-related privacy information, we define three levels:\u003C\/p\u003E\u003Cp id=\u0022p-67\u0022\u003ELevel 1 (Highly Sensitive): Includes conditions that may result in severe social stigmatization if disclosed, such as HIV or syphilis, or extremely private records such as abortion history.\u003C\/p\u003E\u003Cp id=\u0022p-68\u0022\u003ELevel 2 (Moderately Sensitive): Includes conditions that may affect social perception, such as mycoplasma or herpes, or procedural records related to childbirth.\u003C\/p\u003E\u003Cp id=\u0022p-69\u0022\u003ELevel 3 (Mildly Sensitive): Indirect or less stigmatizing health information, such as hepatitis B\/C or marital and reproductive status.\u003C\/p\u003E\u003Cp id=\u0022p-70\u0022\u003EAs this is a multi-class classification task, we used the micro-average score to evaluate model performance in terms of both precision and recall. Given that sensitivity level classification lacks clear quantitative thresholds, correctness is primarily determined by expert judgment. To enhance annotation reliability, we adopted a dual-expert cross-validation approach:\u003C\/p\u003E\u003Cp id=\u0022p-71\u0022\u003EIf the two experts agreed on a classification, that result was taken as the ground truth.\u003C\/p\u003E\u003Cp id=\u0022p-72\u0022\u003EIn cases of disagreement, the lower sensitivity level was chosen as the final label, in order to support broader data usability and avoid over-sanitization.\u003C\/p\u003E\u003Cp id=\u0022p-73\u0022\u003EModel predictions were then compared with these ground truth labels to calculate classification performance. \u003Ca id=\u0022xref-table-wrap-2-1\u0022 class=\u0022xref-table\u0022 href=\u0022#T2\u0022\u003ETable 2\u003C\/a\u003E presents the micro-average precision and recall for the three entity types subject to sensitivity level classification.\u003C\/p\u003E\u003Cdiv id=\u0022T2\u0022 class=\u0022table pos-float\u0022\u003E\u003Cdiv class=\u0022table-inline table-callout-links\u0022\u003E\u003Cdiv class=\u0022callout\u0022\u003E\u003Cspan\u003EView this table:\u003C\/span\u003E\u003Cul class=\u0022callout-links\u0022\u003E\u003Cli class=\u0022view-inline first\u0022\u003E\u003Ca href=\u0022\u0022 class=\u0022table-expand-inline\u0022 data-table-url=\u0022\/highwire\/markup\/1036216\/expansion?postprocessors=highwire_tables%2Chighwire_reclass%2Chighwire_figures%2Chighwire_math%2Chighwire_inline_linked_media%2Chighwire_embed\u0026amp;table-expand-inline=1\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003EView inline\u003C\/a\u003E\u003C\/li\u003E\u003Cli class=\u0022view-popup\u0022\u003E\u003Ca href=\u0022\/highwire\/markup\/1036216\/expansion?width=1000\u0026amp;height=500\u0026amp;iframe=true\u0026amp;postprocessors=highwire_tables%2Chighwire_reclass%2Chighwire_figures%2Chighwire_math%2Chighwire_inline_linked_media%2Chighwire_embed\u0022 class=\u0022colorbox colorbox-load table-expand-popup\u0022 rel=\u0022gallery-fragment-tables\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003EView popup\u003C\/a\u003E\u003C\/li\u003E\u003Cli class=\u0022download-ppt last\u0022\u003E\u003Ca href=\u0022\/highwire\/powerpoint\/1036216\u0022 class=\u0022highwire-figure-link highwire-figure-link-ppt\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003EDownload powerpoint\u003C\/a\u003E\u003C\/li\u003E\u003C\/ul\u003E\u003C\/div\u003E\u003C\/div\u003E\u003Cdiv class=\u0022table-caption\u0022\u003E\u003Cspan class=\u0022table-label\u0022\u003ETable 2.\u003C\/span\u003E \u003Cspan class=\u0022caption-title\u0022\u003EMicro-average Precision and Recall for Sensitivity Level Classification of Three Entity Types on the Test Set.\u003C\/span\u003E\u003Cdiv class=\u0022sb-div caption-clear\u0022\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/div\u003E\u003Cdiv id=\u0022sec-17\u0022 class=\u0022subsection\u0022\u003E\u003Ch3\u003EPrompt for Sensitive Information Sensitivity Level Classification\u003C\/h3\u003E\u003Cp id=\u0022p-74\u0022\u003EFor the task of sensitivity level classification, we adopted a Chain-of-Thought (CoT) prompting approach. This method processes the previously labeled text and determines the sensitivity level for each entity. The sensitivity level is appended directly after the entity label in the annotated text:\u003C\/p\u003E\u003Cp id=\u0022p-75\u0022\u003ENames, ID numbers, contact information, and hospital names are uniformly labeled as Level 1 (Highly Sensitive).\u003C\/p\u003E\u003Cp id=\u0022p-76\u0022\u003EThe remaining entity types\u2014addresses, STD-related information, and pregnancy-related privacy information\u2014are classified by the model according to predefined criteria.\u003C\/p\u003E\u003Cp id=\u0022p-77\u0022\u003EThe complete prompt used for this task can be found in Appendix File 2.\u003C\/p\u003E\u003Cp id=\u0022p-78\u0022\u003EBased on this prompt, we developed a sensitivity level classification workflow for private data. The full data flow in JSON format is available in Appendix File 3, and the workflow also supports direct API invocation, allowing for streamlined integration into real-world systems.\u003C\/p\u003E\u003Cp id=\u0022p-79\u0022\u003EBy combining the workflows for sensitive information tagging and sensitivity level classification, we enable a fully automated pipeline for sensitive data protection. This setup supports targeted data de-identification based on the required sensitivity level for subsequent research use, allowing researchers to balance privacy and data utility as needed.\u003C\/p\u003E\u003C\/div\u003E\u003C\/div\u003E\u003Cdiv class=\u0022section\u0022 id=\u0022sec-18\u0022\u003E\u003Ch2 class=\u0022\u0022\u003EDiscussion\u003C\/h2\u003E\u003Cdiv id=\u0022sec-19\u0022 class=\u0022subsection\u0022\u003E\u003Ch3\u003EPrincipal Findings\u003C\/h3\u003E\u003Cp id=\u0022p-80\u0022\u003EIn this study, we have innovatively developed a no-code privacy protection solution based on large language models (LLMs), utilizing natural language interactions to intelligently de-identify medical data. The solution incorporates a three-tier protection mechanism that dynamically adapts privacy preservation strategies to clinical scenario demands, maximizing data value retention while ensuring data security. This approach provides important insights for applying LLMs in medical data governance and showcases an innovative path for combining AI technology with medical compliance requirements.\u003C\/p\u003E\u003Cp id=\u0022p-81\u0022\u003EBreakthrough Achievements: This research marks a breakthrough in applying large language models in the field of sensitive information protection, particularly within medical research contexts. Compared to traditional AI models that require specialized algorithm engineers, large language models can handle complex data de-identification tasks through natural language interactions. This significantly lowers the AI adoption threshold for healthcare institutions.\u003C\/p\u003E\u003Cp id=\u0022p-82\u0022\u003EUnique Value of Data De-identification: When it comes to privacy protection techniques, data de-identification presents distinct practical value. Compared to the technical complexity of federated learning or the high computational costs associated with data encryption, large model-based intelligent de-identification systems maintain the statistical analysis value of the original data. Additionally, these systems leverage natural language processing capabilities to provide context-sensitive and precise masking of sensitive information within medical records.\u003C\/p\u003E\u003Cp id=\u0022p-83\u0022\u003EWe have used large model technology to address traditional entity recognition and classification tasks, achieving good results overall. This success encourages us to explore other types of tasks in the future. As large models continue to evolve, embracing these technologies to better serve our research remains a substantial challenge for healthcare professionals.\u003C\/p\u003E\u003C\/div\u003E\u003Cdiv id=\u0022sec-20\u0022 class=\u0022subsection\u0022\u003E\u003Ch3\u003ELimitations\u003C\/h3\u003E\u003Cp id=\u0022p-84\u0022\u003EDespite the promising outcomes of this study, there are still several limitations that need to be addressed:\u003C\/p\u003E\u003Cp id=\u0022p-85\u0022\u003EData Limitations: The model\u2019s test and validation datasets were sourced excusively from Beijing Union Medical College Hospital. Although we strictly divided the training, validation, and test sets, the singular data source may limit the model\u2019s generalization ability. Future research should include data from multiple healthcare institutions (e.g., top-tier hospitals, primary care hospitals) and perform multi-center validation to comprehensively assess the model\u2019s practical value.\u003C\/p\u003E\u003Cp id=\u0022p-86\u0022\u003ELack of Privacy Rule Adjustment Mechanisms: The current privacy protection scheme relies mainly on static rules defined by sensitivity level parameters, lacking the capability for dynamic adjustments. Additionally, the instability of large model outputs may trigger systemic risks\u2014any changes in the rules can have cascading effects, requiring a reevaluation of the entire system\u2019s security. This necessitates the development of more stable and adaptive privacy protection frameworks.\u003C\/p\u003E\u003Cp id=\u0022p-87\u0022\u003EDe-identification Precision Issues: Existing de-identification algorithms suffer from both over-de-identification and under-de-identification problems. Over-de-identification can lead to the loss of crucial clinical information, severely affecting data usability, while under-de-identification may result in sensitive information being overlooked. More accurate semantic recognition technology is needed to protect privacy while maximizing data value retention. It is recommended to introduce manual review mechanisms as a transitional solution.\u003C\/p\u003E\u003C\/div\u003E\u003Cdiv id=\u0022sec-21\u0022 class=\u0022subsection\u0022\u003E\u003Ch3\u003EFuture Directions\u003C\/h3\u003E\u003Cp id=\u0022p-88\u0022\u003EThe application of large language models in healthcare has already permeated multiple stages of the clinical workflow, ranging from fundamental data cleaning and standardization to complex clinical decision-support systems. The breadth and depth of these applications continue to expand. However, this technological penetration presents dual challenges: on one hand, the accuracy and stability of model outputs directly affect the quality of medical services; on the other hand, the unique nature of medical scenarios demands that algorithms be highly reliable and interpretable. In future research, we will focus on the following:\u003C\/p\u003E\u003Cp id=\u0022p-89\u0022\u003EApplication Expansion: Exploring the potential applications of large models in emerging areas such as personalized treatment plan generation, assistive diagnostics, and healthcare resource optimization.\u003C\/p\u003E\u003Cp id=\u0022p-90\u0022\u003EReliability Validation Framework: Establishing a comprehensive evaluation framework that includes clinical effectiveness, algorithm stability, and ethical compliance.\u003C\/p\u003E\u003Cp id=\u0022p-91\u0022\u003EHuman-AI Collaboration Mechanism: Investigating how to combine the predictive power of large models with clinical experts\u2019 experience and judgment to build more intelligent decision support systems.\u003C\/p\u003E\u003Cp id=\u0022p-92\u0022\u003EThrough continuous technological iterations and rigorous clinical validation, we aim to transform large models into truly reliable research tools\u2014leveraging their strengths in data processing and pattern recognition while ensuring their outputs meet the high standards required in medical practice. The deep integration of this technology with medicine will open new possibilities for enhancing healthcare quality and fostering research innovation.\u003C\/p\u003E\u003C\/div\u003E\u003C\/div\u003E\u003Cdiv class=\u0022section\u0022 id=\u0022sec-22\u0022\u003E\u003Ch2 class=\u0022\u0022\u003EConclusions\u003C\/h2\u003E\u003Cp id=\u0022p-93\u0022\u003EWe have innovatively developed a no-code privacy protection solution based on large language models, which enables intelligent de-identification of medical data through natural language interaction. This solution adopts a three-level hierarchical protection mechanism, which can automatically match the appropriate privacy protection strategies according to clinical scenario requirements, ensuring data security while maximizing the retention of the data\u2019s research and application value.\u003C\/p\u003E\u003Cp id=\u0022p-94\u0022\u003EThis practice provides an important reference for the application of large models in medical data governance and demonstrates an innovative path for the integration of AI technology with medical compliance requirements. It not only provides the medical industry with a more convenient privacy protection method but also promotes the further development and application of large language models in the medical field. Through this technology, we can achieve efficient de-identification of data, providing more reliable protection for the secure sharing of medical data and clinical research.\u003C\/p\u003E\u003C\/div\u003E\u003Cdiv class=\u0022section data-availability\u0022 id=\u0022sec-23\u0022\u003E\u003Ch2 class=\u0022\u0022\u003EData Availability\u003C\/h2\u003E\u003Cp id=\u0022p-95\u0022\u003EThe datasets supporting the findings of this study are available from the corresponding author upon reasonable request. The hospital data cannot be made publicly available due to its sensitive nature.\u003C\/p\u003E\u003C\/div\u003E\u003Cdiv class=\u0022section ref-list\u0022 id=\u0022ref-list-1\u0022\u003E\u003Ch2 class=\u0022\u0022\u003EReference\u003C\/h2\u003E\u003Col class=\u0022cit-list\u0022\u003E\u003Cli\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-1-1\u0022 title=\u0022View reference 1 in text\u0022 id=\u0022ref-1\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-book\u0022 id=\u0022cit-2025.07.27.25332177v1.1\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Col class=\u0022duplicate\u0022\u003E\u003Cli\u003E\u003Cspan class=\u0022cit-ed\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EColliot\u003C\/span\u003E  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EO\u003C\/span\u003E\u003C\/span\u003E\u003C\/li\u003E\u003C\/ol\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EWang\u003C\/span\u003E  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EW\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EFerrari\u003C\/span\u003E  \u003Cspan class=\u0022cit-name-given-names\u0022\u003ED\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EHaddon-Hill\u003C\/span\u003E  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EG\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-etal\u0022\u003Eet al.\u003C\/span\u003E \u003Cspan class=\u0022cit-chapter-title\u0022\u003EElectronic Health Records as Source of Research Data. 2023 Jul 23\u003C\/span\u003E. In: \u003Cspan class=\u0022cit-ed\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EColliot\u003C\/span\u003E  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EO\u003C\/span\u003E\u003C\/span\u003E, editor. \u003Cspan class=\u0022cit-source\u0022\u003EMachine Learning for Brain Disorders [Internet]\u003C\/span\u003E. \u003Cspan class=\u0022cit-publ-loc\u0022\u003ENew York, NY\u003C\/span\u003E: \u003Cspan class=\u0022cit-publ-name\u0022\u003EHumana\u003C\/span\u003E; \u003Cspan class=\u0022cit-pub-date\u0022\u003E2023\u003C\/span\u003E. Chapter 11.\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-2-1\u0022 title=\u0022View reference 2 in text\u0022 id=\u0022ref-2\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-book\u0022 id=\u0022cit-2025.07.27.25332177v1.2\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003ERichesson\u003C\/span\u003E  \u003Cspan class=\u0022cit-name-given-names\u0022\u003ERL\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EPlatt\u003C\/span\u003E  \u003Cspan class=\u0022cit-name-given-names\u0022\u003ER\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003ESimon\u003C\/span\u003E  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EG\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-etal\u0022\u003Eet al.\u003C\/span\u003E \u003Cspan class=\u0022cit-chapter-title\u0022\u003EUsing Electronic Health Record Data in Pragmatic Clinical Trials: Introduction\u003C\/span\u003E. \u003Cspan class=\u0022cit-source\u0022\u003EIn: Rethinking Clinical Trials: A Living Textbook of Pragmatic Clinical Trials\u003C\/span\u003E. \u003Cspan class=\u0022cit-publ-loc\u0022\u003EBethesda, MD\u003C\/span\u003E: \u003Cspan class=\u0022cit-publ-name\u0022\u003ENIH Pragmatic Trials Collaboratory\u003C\/span\u003E.\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-3-1\u0022 title=\u0022View reference 3 in text\u0022 id=\u0022ref-3\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal\u0022 id=\u0022cit-2025.07.27.25332177v1.3\u0022 data-doi=\u002210.1186\/s12874-022-01768-6\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003ELiu\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EF.\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EPanagiotakos\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003ED.\u003C\/span\u003E\u003C\/span\u003E \u003Cspan class=\u0022cit-article-title\u0022\u003EReal-world data: a brief review of the methods, applications, challenges and opportunities\u003C\/span\u003E. \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003EBMC Med Res Methodol\u003C\/abbr\u003E \u003Cspan class=\u0022cit-vol\u0022\u003E22\u003C\/span\u003E, \u003Cspan class=\u0022cit-fpage\u0022\u003E287\u003C\/span\u003E (\u003Cspan class=\u0022cit-pub-date\u0022\u003E2022\u003C\/span\u003E).\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003Ca href=\u0022{openurl}?query=rft.jtitle%253DBMC%2BMed%2BRes%2BMethodol%26rft.volume%253D22%26rft.spage%253D287%26rft_id%253Dinfo%253Adoi%252F10.1186%252Fs12874-022-01768-6%26rft_id%253Dinfo%253Apmid%252F36335315%26rft.genre%253Darticle%26rft_val_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Ajournal%26ctx_ver%253DZ39.88-2004%26url_ver%253DZ39.88-2004%26url_ctx_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Actx\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-openurl cit-ref-sprinkles-open-url\u0022\u003E\u003Cspan\u003EOpenUrl\u003C\/span\u003E\u003C\/a\u003E\u003Ca href=\u0022\/lookup\/external-ref?access_num=10.1186\/s12874-022-01768-6\u0026amp;link_type=DOI\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-doi cit-ref-sprinkles-crossref\u0022\u003E\u003Cspan\u003ECrossRef\u003C\/span\u003E\u003C\/a\u003E\u003Ca href=\u0022\/lookup\/external-ref?access_num=36335315\u0026amp;link_type=MED\u0026amp;atom=%2Fmedrxiv%2Fearly%2F2025%2F07%2F28%2F2025.07.27.25332177.atom\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-medline\u0022\u003E\u003Cspan\u003EPubMed\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-4-1\u0022 title=\u0022View reference 4 in text\u0022 id=\u0022ref-4\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal\u0022 id=\u0022cit-2025.07.27.25332177v1.4\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EJavad\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EPool\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003ESaeed\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EAkhlaghpour\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EFarhad\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EFatehi\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EAndrew\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EBurton-Jones\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-article-title\u0022\u003EA systematic analysis of failures in protecting personal health data: A scoping review\u003C\/span\u003E, \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003EInternational Journal of Information Management\u003C\/abbr\u003E, Volume \u003Cspan class=\u0022cit-vol\u0022\u003E74\u003C\/span\u003E, \u003Cspan class=\u0022cit-pub-date\u0022\u003E2024\u003C\/span\u003E, \u003Cspan class=\u0022cit-fpage\u0022\u003E102719\u003C\/span\u003E, ISSN \u003Cspan class=\u0022cit-issn\u0022\u003E0268-4012\u003C\/span\u003E.\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003Ca href=\u0022{openurl}?query=rft.jtitle%253DInternational%2BJournal%2Bof%2BInformation%2BManagement%26rft.volume%253D74%26rft.spage%253D102719%26rft.genre%253Darticle%26rft_val_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Ajournal%26ctx_ver%253DZ39.88-2004%26url_ver%253DZ39.88-2004%26url_ctx_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Actx\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-openurl cit-ref-sprinkles-open-url\u0022\u003E\u003Cspan\u003EOpenUrl\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-5-1\u0022 title=\u0022View reference 5 in text\u0022 id=\u0022ref-5\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-book\u0022 id=\u0022cit-2025.07.27.25332177v1.5\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003ESamuel\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EWairimu\u003C\/span\u003E\u003C\/span\u003E and \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003ELothar\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EFritsch\u003C\/span\u003E\u003C\/span\u003E. \u003Cspan class=\u0022cit-pub-date\u0022\u003E2022\u003C\/span\u003E. \u003Cspan class=\u0022cit-chapter-title\u0022\u003EModelling privacy harms of compromised personal medical data - beyond data breach\u003C\/span\u003E. \u003Cspan class=\u0022cit-source\u0022\u003EIn The 17th International Conference on Availability, Reliability and Security (ARES 2022), August 23\u201326, 2022\u003C\/span\u003E, \u003Cspan class=\u0022cit-publ-loc\u0022\u003EVienna, Austria. ACM, New York, NY, USA\u003C\/span\u003E 9 Pages.\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-6-1\u0022 title=\u0022View reference 6 in text\u0022 id=\u0022ref-6\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal\u0022 id=\u0022cit-2025.07.27.25332177v1.6\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003ERobert M A\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003Evan der Boon\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EA\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EJohnCamm\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EC\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EAguiar\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EE\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EBiasin\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EG\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EBreithardt\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EH\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EBueno\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EI\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EDrossart\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EN\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EHoppe\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EE\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EKamenjasevic\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003ER\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003ELadeiras-Lopes\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EPaul\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EMcGreavy\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EP\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003ELanzer\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003ER\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EVidal-Perez\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-article-title\u0022\u003ENico Bruining, Risks and benefits of sharing patient information on social media: a digital dilemma\u003C\/span\u003E, \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003EEuropean Heart Journal - Digital Health\u003C\/abbr\u003E, Volume \u003Cspan class=\u0022cit-vol\u0022\u003E5\u003C\/span\u003E, Issue \u003Cspan class=\u0022cit-issue\u0022\u003E3\u003C\/span\u003E, May \u003Cspan class=\u0022cit-pub-date\u0022\u003E2024\u003C\/span\u003E, Pages \u003Cspan class=\u0022cit-fpage\u0022\u003E199\u003C\/span\u003E\u2013\u003Cspan class=\u0022cit-lpage\u0022\u003E207\u003C\/span\u003E.\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003Ca href=\u0022{openurl}?query=rft.jtitle%253DEuropean%2BHeart%2BJournal%2B-%2BDigital%2BHealth%26rft.volume%253D5%26rft.spage%253D199%26rft_id%253Dinfo%253Apmid%252F38774369%26rft.genre%253Darticle%26rft_val_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Ajournal%26ctx_ver%253DZ39.88-2004%26url_ver%253DZ39.88-2004%26url_ctx_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Actx\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-openurl cit-ref-sprinkles-open-url\u0022\u003E\u003Cspan\u003EOpenUrl\u003C\/span\u003E\u003C\/a\u003E\u003Ca href=\u0022\/lookup\/external-ref?access_num=38774369\u0026amp;link_type=MED\u0026amp;atom=%2Fmedrxiv%2Fearly%2F2025%2F07%2F28%2F2025.07.27.25332177.atom\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-medline\u0022\u003E\u003Cspan\u003EPubMed\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-7-1\u0022 title=\u0022View reference 7 in text\u0022 id=\u0022ref-7\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal\u0022 id=\u0022cit-2025.07.27.25332177v1.7\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003ELiu\u003C\/span\u003E  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EC\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EJiao\u003C\/span\u003E  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EY\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003ESu\u003C\/span\u003E  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EL\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003ELiu\u003C\/span\u003E  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EW\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EZhang\u003C\/span\u003E  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EH\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003ENie\u003C\/span\u003E  \u003Cspan class=\u0022cit-name-given-names\u0022\u003ES\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EGong\u003C\/span\u003E  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EM\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-article-title\u0022\u003EEffective Privacy Protection Strategies for Pregnancy and Gestation Information From Electronic Medical Records: Retrospective Study in a National Health Care Data Network in China\u003C\/span\u003E, \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003EJ Med Internet Res\u003C\/abbr\u003E \u003Cspan class=\u0022cit-pub-date\u0022\u003E2024\u003C\/span\u003E;\u003Cspan class=\u0022cit-vol\u0022\u003E26\u003C\/span\u003E:\u003Cspan class=\u0022cit-fpage\u0022\u003Ee46455\u003C\/span\u003E.\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003Ca href=\u0022{openurl}?query=rft.jtitle%253DJ%2BMed%2BInternet%2BRes%26rft.volume%253D26%26rft.spage%253De46455%26rft_id%253Dinfo%253Apmid%252F39163593%26rft.genre%253Darticle%26rft_val_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Ajournal%26ctx_ver%253DZ39.88-2004%26url_ver%253DZ39.88-2004%26url_ctx_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Actx\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-openurl cit-ref-sprinkles-open-url\u0022\u003E\u003Cspan\u003EOpenUrl\u003C\/span\u003E\u003C\/a\u003E\u003Ca href=\u0022\/lookup\/external-ref?access_num=39163593\u0026amp;link_type=MED\u0026amp;atom=%2Fmedrxiv%2Fearly%2F2025%2F07%2F28%2F2025.07.27.25332177.atom\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-medline\u0022\u003E\u003Cspan\u003EPubMed\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-8-1\u0022 title=\u0022View reference 8 in text\u0022 id=\u0022ref-8\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal\u0022 id=\u0022cit-2025.07.27.25332177v1.8\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EGong\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EM.\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EYu\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EY.\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EOuyang\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EZ.\u003C\/span\u003E\u003C\/span\u003E \u003Cspan class=\u0022cit-etal\u0022\u003Eet al.\u003C\/span\u003E \u003Cspan class=\u0022cit-article-title\u0022\u003EPrivacy protection of sexually transmitted infections information from Chinese electronic medical records\u003C\/span\u003E. \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003ESci Rep\u003C\/abbr\u003E \u003Cspan class=\u0022cit-vol\u0022\u003E15\u003C\/span\u003E, \u003Cspan class=\u0022cit-fpage\u0022\u003E1296\u003C\/span\u003E (\u003Cspan class=\u0022cit-pub-date\u0022\u003E2025\u003C\/span\u003E).\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003Ca href=\u0022{openurl}?query=rft.jtitle%253DSci%2BRep%26rft.volume%253D15%26rft.spage%253D1296%26rft_id%253Dinfo%253Apmid%252F39779720%26rft.genre%253Darticle%26rft_val_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Ajournal%26ctx_ver%253DZ39.88-2004%26url_ver%253DZ39.88-2004%26url_ctx_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Actx\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-openurl cit-ref-sprinkles-open-url\u0022\u003E\u003Cspan\u003EOpenUrl\u003C\/span\u003E\u003C\/a\u003E\u003Ca href=\u0022\/lookup\/external-ref?access_num=39779720\u0026amp;link_type=MED\u0026amp;atom=%2Fmedrxiv%2Fearly%2F2025%2F07%2F28%2F2025.07.27.25332177.atom\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-medline\u0022\u003E\u003Cspan\u003EPubMed\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-9-1\u0022 title=\u0022View reference 9 in text\u0022 id=\u0022ref-9\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal\u0022 id=\u0022cit-2025.07.27.25332177v1.9\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EVaswani\u003C\/span\u003E  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EA\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EShazeer\u003C\/span\u003E  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EN\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EParmar\u003C\/span\u003E  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EN\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-etal\u0022\u003Eet al.\u003C\/span\u003E \u003Cspan class=\u0022cit-article-title\u0022\u003EAttention is all you need[J]\u003C\/span\u003E. \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003EAdvances in neural information processing systems\u003C\/abbr\u003E, \u003Cspan class=\u0022cit-pub-date\u0022\u003E2017\u003C\/span\u003E, \u003Cspan class=\u0022cit-vol\u0022\u003E30\u003C\/span\u003E.\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-10-1\u0022 title=\u0022View reference 10 in text\u0022 id=\u0022ref-10\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal\u0022 id=\u0022cit-2025.07.27.25332177v1.10\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EBrown\u003C\/span\u003E  \u003Cspan class=\u0022cit-name-given-names\u0022\u003ET\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EMann\u003C\/span\u003E  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EB\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003ERyder\u003C\/span\u003E  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EN\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-etal\u0022\u003Eet al.\u003C\/span\u003E \u003Cspan class=\u0022cit-article-title\u0022\u003ELanguage models are few-shot learners[J]\u003C\/span\u003E. \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003EAdvances in neural information processing systems\u003C\/abbr\u003E, \u003Cspan class=\u0022cit-pub-date\u0022\u003E2020\u003C\/span\u003E, \u003Cspan class=\u0022cit-vol\u0022\u003E33\u003C\/span\u003E: \u003Cspan class=\u0022cit-fpage\u0022\u003E1877\u003C\/span\u003E\u2013\u003Cspan class=\u0022cit-lpage\u0022\u003E1901\u003C\/span\u003E.\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003Ca href=\u0022{openurl}?query=rft.jtitle%253DAdvances%2Bin%2Bneural%2Binformation%2Bprocessing%2Bsystems%26rft.volume%253D33%26rft.spage%253D1877%26rft.genre%253Darticle%26rft_val_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Ajournal%26ctx_ver%253DZ39.88-2004%26url_ver%253DZ39.88-2004%26url_ctx_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Actx\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-openurl cit-ref-sprinkles-open-url\u0022\u003E\u003Cspan\u003EOpenUrl\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-11-1\u0022 title=\u0022View reference 11 in text\u0022 id=\u0022ref-11\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-web\u0022 id=\u0022cit-2025.07.27.25332177v1.11\u0022\u003E\u003Cdiv class=\u0022cit-metadata unstructured\u0022\u003E\u003Ca href=\u0022https:\/\/huggingface.co\/Qwen\/Qwen2.5-72B-Instruct\u0022\u003Ehttps:\/\/huggingface.co\/Qwen\/Qwen2.5-72B-Instruct\u003C\/a\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-12-1\u0022 title=\u0022View reference 12 in text\u0022 id=\u0022ref-12\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-web\u0022 id=\u0022cit-2025.07.27.25332177v1.12\u0022\u003E\u003Cdiv class=\u0022cit-metadata unstructured\u0022\u003E\u003Ca href=\u0022https:\/\/github.com\/dataelement\/bisheng\u0022\u003Ehttps:\/\/github.com\/dataelement\/bisheng\u003C\/a\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-13-1\u0022 title=\u0022View reference 13 in text\u0022 id=\u0022ref-13\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-web\u0022 id=\u0022cit-2025.07.27.25332177v1.13\u0022\u003E\u003Cdiv class=\u0022cit-metadata unstructured\u0022\u003E\u003Ca href=\u0022https:\/\/huggingface.co\/BAAI\/bge-large-zh-v1.5\u0022\u003Ehttps:\/\/huggingface.co\/BAAI\/bge-large-zh-v1.5\u003C\/a\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-14-1\u0022 title=\u0022View reference 14 in text\u0022 id=\u0022ref-14\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal\u0022 id=\u0022cit-2025.07.27.25332177v1.14\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003ELiu\u003C\/span\u003E  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EC\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EJiao\u003C\/span\u003E  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EY\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003ESu\u003C\/span\u003E  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EL\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003ELiu\u003C\/span\u003E  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EW\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EZhang\u003C\/span\u003E  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EH\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003ENie\u003C\/span\u003E  \u003Cspan class=\u0022cit-name-given-names\u0022\u003ES\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EGong\u003C\/span\u003E  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EM.\u003C\/span\u003E\u003C\/span\u003E \u003Cspan class=\u0022cit-article-title\u0022\u003EEffective Privacy Protection Strategies for Pregnancy and Gestation Information From Electronic Medical Records: Retrospective Study in a National Health Care Data Network in China\u003C\/span\u003E. \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003EJ Med Internet Res\u003C\/abbr\u003E \u003Cspan class=\u0022cit-pub-date\u0022\u003E2024\u003C\/span\u003E;\u003Cspan class=\u0022cit-vol\u0022\u003E26\u003C\/span\u003E:\u003Cspan class=\u0022cit-fpage\u0022\u003Ee46455\u003C\/span\u003E.\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003Ca href=\u0022{openurl}?query=rft.jtitle%253DJ%2BMed%2BInternet%2BRes%26rft.volume%253D26%26rft.spage%253De46455%26rft_id%253Dinfo%253Apmid%252F39163593%26rft.genre%253Darticle%26rft_val_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Ajournal%26ctx_ver%253DZ39.88-2004%26url_ver%253DZ39.88-2004%26url_ctx_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Actx\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-openurl cit-ref-sprinkles-open-url\u0022\u003E\u003Cspan\u003EOpenUrl\u003C\/span\u003E\u003C\/a\u003E\u003Ca href=\u0022\/lookup\/external-ref?access_num=39163593\u0026amp;link_type=MED\u0026amp;atom=%2Fmedrxiv%2Fearly%2F2025%2F07%2F28%2F2025.07.27.25332177.atom\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-medline\u0022\u003E\u003Cspan\u003EPubMed\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-15-1\u0022 title=\u0022View reference 15 in text\u0022 id=\u0022ref-15\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal\u0022 id=\u0022cit-2025.07.27.25332177v1.15\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EGong\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EM.\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EYu\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EY.\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EOuyang\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EZ.\u003C\/span\u003E\u003C\/span\u003E \u003Cspan class=\u0022cit-etal\u0022\u003Eet al.\u003C\/span\u003E \u003Cspan class=\u0022cit-article-title\u0022\u003EPrivacy protection of sexually transmitted infections information from Chinese electronic medical records\u003C\/span\u003E. \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003ESci Rep\u003C\/abbr\u003E \u003Cspan class=\u0022cit-vol\u0022\u003E15\u003C\/span\u003E, \u003Cspan class=\u0022cit-fpage\u0022\u003E1296\u003C\/span\u003E (\u003Cspan class=\u0022cit-pub-date\u0022\u003E2025\u003C\/span\u003E).\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003Ca href=\u0022{openurl}?query=rft.jtitle%253DSci%2BRep%26rft.volume%253D15%26rft.spage%253D1296%26rft_id%253Dinfo%253Apmid%252F39779720%26rft.genre%253Darticle%26rft_val_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Ajournal%26ctx_ver%253DZ39.88-2004%26url_ver%253DZ39.88-2004%26url_ctx_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Actx\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-openurl cit-ref-sprinkles-open-url\u0022\u003E\u003Cspan\u003EOpenUrl\u003C\/span\u003E\u003C\/a\u003E\u003Ca href=\u0022\/lookup\/external-ref?access_num=39779720\u0026amp;link_type=MED\u0026amp;atom=%2Fmedrxiv%2Fearly%2F2025%2F07%2F28%2F2025.07.27.25332177.atom\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-medline\u0022\u003E\u003Cspan\u003EPubMed\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-16-1\u0022 title=\u0022View reference 16 in text\u0022 id=\u0022ref-16\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal\u0022 id=\u0022cit-2025.07.27.25332177v1.16\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003ESahoo\u003C\/span\u003E  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EP\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003ESingh\u003C\/span\u003E  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EA K\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003ESaha\u003C\/span\u003E  \u003Cspan class=\u0022cit-name-given-names\u0022\u003ES\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-etal\u0022\u003Eet al.\u003C\/span\u003E \u003Cspan class=\u0022cit-article-title\u0022\u003EA systematic survey of prompt engineering in large language models: Techniques and applications[J]\u003C\/span\u003E. \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003EarXiv preprint\u003C\/abbr\u003E\u003Cspan class=\u0022cit-pub-id-sep cit-pub-id-arxiv-sep\u0022\u003E \u003C\/span\u003E\u003Cspan class=\u0022cit-pub-id cit-pub-id-arxiv\u0022\u003E\u003Cspan class=\u0022cit-pub-id-scheme-arxiv\u0022\u003Earxiv:\u003C\/span\u003E2402.07927\u003C\/span\u003E, \u003Cspan class=\u0022cit-pub-date\u0022\u003E2024\u003C\/span\u003E.\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-17-1\u0022 title=\u0022View reference 17 in text\u0022 id=\u0022ref-17\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal\u0022 id=\u0022cit-2025.07.27.25332177v1.17\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EBrown\u003C\/span\u003E  \u003Cspan class=\u0022cit-name-given-names\u0022\u003ET\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EMann\u003C\/span\u003E  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EB\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003ERyder\u003C\/span\u003E  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EN\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-etal\u0022\u003Eet al.\u003C\/span\u003E \u003Cspan class=\u0022cit-article-title\u0022\u003ELanguage models are few-shot learners[J]\u003C\/span\u003E. \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003EAdvances in neural information processing systems\u003C\/abbr\u003E, \u003Cspan class=\u0022cit-pub-date\u0022\u003E2020\u003C\/span\u003E, \u003Cspan class=\u0022cit-vol\u0022\u003E33\u003C\/span\u003E: \u003Cspan class=\u0022cit-fpage\u0022\u003E1877\u003C\/span\u003E\u2013\u003Cspan class=\u0022cit-lpage\u0022\u003E1901\u003C\/span\u003E.\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003Ca href=\u0022{openurl}?query=rft.jtitle%253DAdvances%2Bin%2Bneural%2Binformation%2Bprocessing%2Bsystems%26rft.volume%253D33%26rft.spage%253D1877%26rft.genre%253Darticle%26rft_val_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Ajournal%26ctx_ver%253DZ39.88-2004%26url_ver%253DZ39.88-2004%26url_ctx_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Actx\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-openurl cit-ref-sprinkles-open-url\u0022\u003E\u003Cspan\u003EOpenUrl\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-18-1\u0022 title=\u0022View reference 18 in text\u0022 id=\u0022ref-18\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal\u0022 id=\u0022cit-2025.07.27.25332177v1.18\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003ELewis\u003C\/span\u003E  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EP\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EPerez\u003C\/span\u003E  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EE\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EPiktus\u003C\/span\u003E  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EA\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-etal\u0022\u003Eet al.\u003C\/span\u003E \u003Cspan class=\u0022cit-article-title\u0022\u003ERetrieval-augmented generation for knowledge-intensive nlp tasks[J]\u003C\/span\u003E. \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003EAdvances in neural information processing systems\u003C\/abbr\u003E, \u003Cspan class=\u0022cit-pub-date\u0022\u003E2020\u003C\/span\u003E, \u003Cspan class=\u0022cit-vol\u0022\u003E33\u003C\/span\u003E: \u003Cspan class=\u0022cit-fpage\u0022\u003E9459\u003C\/span\u003E\u2013\u003Cspan class=\u0022cit-lpage\u0022\u003E9474\u003C\/span\u003E.\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003Ca href=\u0022{openurl}?query=rft.jtitle%253DAdvances%2Bin%2Bneural%2Binformation%2Bprocessing%2Bsystems%26rft.volume%253D33%26rft.spage%253D9459%26rft.genre%253Darticle%26rft_val_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Ajournal%26ctx_ver%253DZ39.88-2004%26url_ver%253DZ39.88-2004%26url_ctx_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Actx\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-openurl cit-ref-sprinkles-open-url\u0022\u003E\u003Cspan\u003EOpenUrl\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-19-1\u0022 title=\u0022View reference 19 in text\u0022 id=\u0022ref-19\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal\u0022 id=\u0022cit-2025.07.27.25332177v1.19\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EWei\u003C\/span\u003E  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EJ\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EWang\u003C\/span\u003E  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EX\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003ESchuurmans\u003C\/span\u003E  \u003Cspan class=\u0022cit-name-given-names\u0022\u003ED\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-etal\u0022\u003Eet al.\u003C\/span\u003E \u003Cspan class=\u0022cit-article-title\u0022\u003EChain-of-thought prompting elicits reasoning in large language models[J]\u003C\/span\u003E. \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003EAdvances in neural information processing systems\u003C\/abbr\u003E, \u003Cspan class=\u0022cit-pub-date\u0022\u003E2022\u003C\/span\u003E, \u003Cspan class=\u0022cit-vol\u0022\u003E35\u003C\/span\u003E: \u003Cspan class=\u0022cit-fpage\u0022\u003E24824\u003C\/span\u003E\u2013\u003Cspan class=\u0022cit-lpage\u0022\u003E24837\u003C\/span\u003E.\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003Ca href=\u0022{openurl}?query=rft.jtitle%253DAdvances%2Bin%2Bneural%2Binformation%2Bprocessing%2Bsystems%26rft.volume%253D35%26rft.spage%253D24824%26rft.genre%253Darticle%26rft_val_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Ajournal%26ctx_ver%253DZ39.88-2004%26url_ver%253DZ39.88-2004%26url_ctx_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Actx\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-openurl cit-ref-sprinkles-open-url\u0022\u003E\u003Cspan\u003EOpenUrl\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-20-1\u0022 title=\u0022View reference 20 in text\u0022 id=\u0022ref-20\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal\u0022 id=\u0022cit-2025.07.27.25332177v1.20\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EJason P.C.\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003EChiu\u003C\/span\u003E\u003C\/span\u003E and \u003Cspan class=\u0022cit-auth\u0022\u003E \u003Cspan class=\u0022cit-name-given-names\u0022\u003EEric\u003C\/span\u003E \u003Cspan class=\u0022cit-name-surname\u0022\u003ENichols\u003C\/span\u003E\u003C\/span\u003E. \u003Cspan class=\u0022cit-pub-date\u0022\u003E2016\u003C\/span\u003E. \u003Cspan class=\u0022cit-article-title\u0022\u003ENamed Entity Recognition with Bidirectional LSTM-CNNs\u003C\/span\u003E. \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003ETransactions of the Association for Computational Linguistics\u003C\/abbr\u003E, \u003Cspan class=\u0022cit-vol\u0022\u003E4\u003C\/span\u003E:\u003Cspan class=\u0022cit-fpage\u0022\u003E357\u003C\/span\u003E\u2013\u003Cspan class=\u0022cit-lpage\u0022\u003E370\u003C\/span\u003E.\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003Ca href=\u0022{openurl}?query=rft.jtitle%253DTransactions%2Bof%2Bthe%2BAssociation%2Bfor%2BComputational%2BLinguistics%26rft.volume%253D4%26rft.spage%253D357%26rft.genre%253Darticle%26rft_val_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Ajournal%26ctx_ver%253DZ39.88-2004%26url_ver%253DZ39.88-2004%26url_ctx_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Actx\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-openurl cit-ref-sprinkles-open-url\u0022\u003E\u003Cspan\u003EOpenUrl\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-21-1\u0022 title=\u0022View reference 21 in text\u0022 id=\u0022ref-21\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal\u0022 id=\u0022cit-2025.07.27.25332177v1.21\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EXie\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EQ.\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EChen\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EQ.\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EChen\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EA.\u003C\/span\u003E\u003C\/span\u003E \u003Cspan class=\u0022cit-etal\u0022\u003Eet al.\u003C\/span\u003E \u003Cspan class=\u0022cit-article-title\u0022\u003EMedical foundation large language models for comprehensive text analysis and beyond\u003C\/span\u003E. \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003Enpj Digit. Med\u003C\/abbr\u003E. \u003Cspan class=\u0022cit-vol\u0022\u003E8\u003C\/span\u003E, \u003Cspan class=\u0022cit-fpage\u0022\u003E141\u003C\/span\u003E (\u003Cspan class=\u0022cit-pub-date\u0022\u003E2025\u003C\/span\u003E).\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003Ca href=\u0022{openurl}?query=rft.jtitle%253Dnpj%2BDigit.%2BMed%26rft.volume%253D8%26rft.spage%253D141%26rft_id%253Dinfo%253Apmid%252F40044845%26rft.genre%253Darticle%26rft_val_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Ajournal%26ctx_ver%253DZ39.88-2004%26url_ver%253DZ39.88-2004%26url_ctx_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Actx\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-openurl cit-ref-sprinkles-open-url\u0022\u003E\u003Cspan\u003EOpenUrl\u003C\/span\u003E\u003C\/a\u003E\u003Ca href=\u0022\/lookup\/external-ref?access_num=40044845\u0026amp;link_type=MED\u0026amp;atom=%2Fmedrxiv%2Fearly%2F2025%2F07%2F28%2F2025.07.27.25332177.atom\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-medline\u0022\u003E\u003Cspan\u003EPubMed\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-22-1\u0022 title=\u0022View reference 22 in text\u0022 id=\u0022ref-22\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal\u0022 id=\u0022cit-2025.07.27.25332177v1.22\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003ELu\u003C\/span\u003E  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EQ\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003ELi\u003C\/span\u003E  \u003Cspan class=\u0022cit-name-given-names\u0022\u003ER\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EWen\u003C\/span\u003E  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EA\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-etal\u0022\u003Eet al.\u003C\/span\u003E \u003Cspan class=\u0022cit-article-title\u0022\u003ELarge language models struggle in token-level clinical named entity recognition[J]\u003C\/span\u003E. \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003EarXiv preprint\u003C\/abbr\u003E\u003Cspan class=\u0022cit-pub-id-sep cit-pub-id-arxiv-sep\u0022\u003E \u003C\/span\u003E\u003Cspan class=\u0022cit-pub-id cit-pub-id-arxiv\u0022\u003E\u003Cspan class=\u0022cit-pub-id-scheme-arxiv\u0022\u003Earxiv:\u003C\/span\u003E2407.00731\u003C\/span\u003E, 2024.\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003C\/ol\u003E\u003C\/div\u003E\u003Cspan class=\u0022highwire-journal-article-marker-end\u0022\u003E\u003C\/span\u003E\u003C\/div\u003E\u003Cspan class=\u0022related-urls\u0022\u003E\u003C\/span\u003E\u003C\/div\u003E\u003C\/div\u003E  \u003C\/div\u003E\n\n  \n  \u003C\/div\u003E\n\u003C\/div\u003E\n  \u003C\/div\u003E\n\u003C\/div\u003E\n\u003C\/div\u003E\u003Cscript type=\u0022text\/javascript\u0022 src=\u0022https:\/\/www.medrxiv.org\/sites\/default\/files\/js\/js_zP7WWIfzbyzvaM63L39cNV2juU_1XVH7wduFK9gcMNI.js\u0022\u003E\u003C\/script\u003E\n\u003C\/body\u003E\u003C\/html\u003E"}