{"markup":"\u003C?xml version=\u00221.0\u0022 encoding=\u0022UTF-8\u0022 ?\u003E\n    \u003Chtml version=\u0022HTML+RDFa+MathML 1.1\u0022\n    xmlns:content=\u0022http:\/\/purl.org\/rss\/1.0\/modules\/content\/\u0022\n    xmlns:dc=\u0022http:\/\/purl.org\/dc\/terms\/\u0022\n    xmlns:foaf=\u0022http:\/\/xmlns.com\/foaf\/0.1\/\u0022\n    xmlns:og=\u0022http:\/\/ogp.me\/ns#\u0022\n    xmlns:rdfs=\u0022http:\/\/www.w3.org\/2000\/01\/rdf-schema#\u0022\n    xmlns:sioc=\u0022http:\/\/rdfs.org\/sioc\/ns#\u0022\n    xmlns:sioct=\u0022http:\/\/rdfs.org\/sioc\/types#\u0022\n    xmlns:skos=\u0022http:\/\/www.w3.org\/2004\/02\/skos\/core#\u0022\n    xmlns:xsd=\u0022http:\/\/www.w3.org\/2001\/XMLSchema#\u0022\n    xmlns:mml=\u0022http:\/\/www.w3.org\/1998\/Math\/MathML\u0022\u003E\n  \u003Chead\u003E\u003Cscript type=\u0022text\/javascript\u0022 src=\u0022\/\/cdn.jsdelivr.net\/qtip2\/2.2.1\/jquery.qtip.min.js\u0022\u003E\u003C\/script\u003E\n\u003Cscript type=\u0022text\/javascript\u0022 src=\u0022https:\/\/www.medrxiv.org\/sites\/default\/files\/js\/js_YjAJQgxDlFX6S-O02jj9jCrVbrwlY3CGgCg1FzPlvBs.js\u0022\u003E\u003C\/script\u003E\n\u003Cscript type=\u0022text\/javascript\u0022\u003E\n\u003C!--\/\/--\u003E\u003C![CDATA[\/\/\u003E\u003C!--\nif(typeof window.MathJax === \u0022undefined\u0022) window.MathJax = { menuSettings: { zoom: \u0022Click\u0022 } };\n\/\/--\u003E\u003C!]]\u003E\n\u003C\/script\u003E\n\u003Cscript type=\u0022text\/javascript\u0022 src=\u0022https:\/\/www.medrxiv.org\/sites\/default\/files\/js\/js_waP91NpgGpectm_6Y2XDEauLJ8WCSCBKmmA87unpp2E.js\u0022\u003E\u003C\/script\u003E\n\u003Cscript type=\u0022text\/javascript\u0022 src=\u0022https:\/\/www.googletagmanager.com\/gtag\/js?id=G-0K57TCX5BY\u0022\u003E\u003C\/script\u003E\n\u003Cscript type=\u0022text\/javascript\u0022\u003E\n\u003C!--\/\/--\u003E\u003C![CDATA[\/\/\u003E\u003C!--\nwindow.dataLayer = window.dataLayer || [];function gtag(){dataLayer.push(arguments)};gtag(\u0022js\u0022, new Date());gtag(\u0022set\u0022, \u0022developer_id.dMDhkMT\u0022, true);gtag(\u0022config\u0022, \u0022G-0K57TCX5BY\u0022, {\u0022groups\u0022:\u0022default\u0022,\u0022anonymize_ip\u0022:true});\n\/\/--\u003E\u003C!]]\u003E\n\u003C\/script\u003E\n\u003Cscript type=\u0022text\/javascript\u0022\u003E\n\u003C!--\/\/--\u003E\u003C![CDATA[\/\/\u003E\u003C!--\njQuery.extend(Drupal.settings, {\u0022basePath\u0022:\u0022\\\/\u0022,\u0022pathPrefix\u0022:\u0022\u0022,\u0022highwire\u0022:{\u0022ac\u0022:{\u0022medrxiv;2025.04.07.25325392v1\u0022:{\u0022access\u0022:{\u0022full\u0022:true},\u0022pisa_id\u0022:\u0022medrxiv;2025.04.07.25325392v1\u0022,\u0022apath\u0022:\u0022\u0022,\u0022jcode\u0022:\u0022medrxiv\u0022}},\u0022processed\u0022:[\u0022highwire_math\u0022],\u0022markup\u0022:[{\u0022requested\u0022:\u0022full-text\u0022,\u0022variant\u0022:\u0022full-text\u0022,\u0022view\u0022:\u0022full\u0022,\u0022pisa\u0022:\u0022medrxiv;2025.04.07.25325392v1\u0022}]},\u0022instances\u0022:\u0022{\\u0022highwire_abstract_tooltip\\u0022:{\\u0022content\\u0022:{\\u0022text\\u0022:\\u0022\\u0022},\\u0022style\\u0022:{\\u0022tip\\u0022:{\\u0022width\\u0022:20,\\u0022height\\u0022:20,\\u0022border\\u0022:1,\\u0022offset\\u0022:0,\\u0022corner\\u0022:true},\\u0022classes\\u0022:\\u0022qtip-custom hw-tooltip hw-abstract-tooltip qtip-shadow qtip-rounded\\u0022,\\u0022classes_custom\\u0022:\\u0022hw-tooltip hw-abstract-tooltip\\u0022},\\u0022position\\u0022:{\\u0022at\\u0022:\\u0022right center\\u0022,\\u0022my\\u0022:\\u0022left center\\u0022,\\u0022viewport\\u0022:true,\\u0022adjust\\u0022:{\\u0022method\\u0022:\\u0022shift\\u0022}},\\u0022show\\u0022:{\\u0022event\\u0022:\\u0022mouseenter click \\u0022,\\u0022solo\\u0022:true},\\u0022hide\\u0022:{\\u0022event\\u0022:\\u0022mouseleave \\u0022,\\u0022fixed\\u0022:1,\\u0022delay\\u0022:\\u0022100\\u0022}},\\u0022highwire_author_tooltip\\u0022:{\\u0022content\\u0022:{\\u0022text\\u0022:\\u0022\\u0022},\\u0022style\\u0022:{\\u0022tip\\u0022:{\\u0022width\\u0022:15,\\u0022height\\u0022:15,\\u0022border\\u0022:1,\\u0022offset\\u0022:0,\\u0022corner\\u0022:true},\\u0022classes\\u0022:\\u0022qtip-custom hw-tooltip hw-author-tooltip qtip-shadow qtip-rounded\\u0022,\\u0022classes_custom\\u0022:\\u0022hw-tooltip hw-author-tooltip\\u0022},\\u0022position\\u0022:{\\u0022at\\u0022:\\u0022top center\\u0022,\\u0022my\\u0022:\\u0022bottom center\\u0022,\\u0022viewport\\u0022:true,\\u0022adjust\\u0022:{\\u0022method\\u0022:\\u0022\\u0022}},\\u0022show\\u0022:{\\u0022event\\u0022:\\u0022mouseenter \\u0022,\\u0022solo\\u0022:true},\\u0022hide\\u0022:{\\u0022event\\u0022:\\u0022mouseleave \\u0022,\\u0022fixed\\u0022:1,\\u0022delay\\u0022:\\u0022100\\u0022}},\\u0022highwire_reflinks_tooltip\\u0022:{\\u0022content\\u0022:{\\u0022text\\u0022:\\u0022\\u0022},\\u0022style\\u0022:{\\u0022tip\\u0022:{\\u0022width\\u0022:15,\\u0022height\\u0022:15,\\u0022border\\u0022:1,\\u0022mimic\\u0022:\\u0022top center\\u0022,\\u0022offset\\u0022:0,\\u0022corner\\u0022:true},\\u0022classes\\u0022:\\u0022qtip-custom hw-tooltip hw-ref-link-tooltip qtip-shadow qtip-rounded\\u0022,\\u0022classes_custom\\u0022:\\u0022hw-tooltip hw-ref-link-tooltip\\u0022},\\u0022position\\u0022:{\\u0022at\\u0022:\\u0022bottom left\\u0022,\\u0022my\\u0022:\\u0022top left\\u0022,\\u0022viewport\\u0022:true,\\u0022adjust\\u0022:{\\u0022method\\u0022:\\u0022flip\\u0022}},\\u0022show\\u0022:{\\u0022event\\u0022:\\u0022mouseenter \\u0022,\\u0022solo\\u0022:true},\\u0022hide\\u0022:{\\u0022event\\u0022:\\u0022mouseleave \\u0022,\\u0022fixed\\u0022:1,\\u0022delay\\u0022:\\u0022100\\u0022}}}\u0022,\u0022qtipDebug\u0022:\u0022{\\u0022leaveElement\\u0022:0}\u0022,\u0022googleanalytics\u0022:{\u0022account\u0022:[\u0022G-0K57TCX5BY\u0022],\u0022trackOutbound\u0022:1,\u0022trackMailto\u0022:1,\u0022trackDownload\u0022:1,\u0022trackDownloadExtensions\u0022:\u00227z|aac|arc|arj|asf|asx|avi|bin|csv|doc(x|m)?|dot(x|m)?|exe|flv|gif|gz|gzip|hqx|jar|jpe?g|js|mp(2|3|4|e?g)|mov(ie)?|msi|msp|pdf|phps|png|ppt(x|m)?|pot(x|m)?|pps(x|m)?|ppam|sld(x|m)?|thmx|qtm?|ra(m|r)?|sea|sit|tar|tgz|torrent|txt|wav|wma|wmv|wpd|xls(x|m|b)?|xlt(x|m)|xlam|xml|z|zip\u0022,\u0022trackColorbox\u0022:1},\u0022ajaxPageState\u0022:{\u0022js\u0022:{\u0022\\\/\\\/cdn.jsdelivr.net\\\/qtip2\\\/2.2.1\\\/jquery.qtip.min.js\u0022:1,\u0022sites\\\/all\\\/modules\\\/highwire\\\/highwire\\\/plugins\\\/highwire_markup_process\\\/js\\\/highwire_article_reference_popup.js\u0022:1,\u0022sites\\\/all\\\/modules\\\/highwire\\\/highwire\\\/plugins\\\/highwire_markup_process\\\/js\\\/highwire_at_symbol.js\u0022:1,\u00220\u0022:1,\u0022sites\\\/all\\\/modules\\\/contrib\\\/google_analytics\\\/googleanalytics.js\u0022:1,\u0022https:\\\/\\\/www.googletagmanager.com\\\/gtag\\\/js?id=G-0K57TCX5BY\u0022:1,\u00221\u0022:1}}});\n\/\/--\u003E\u003C!]]\u003E\n\u003C\/script\u003E\n\u003Clink type=\u0022text\/css\u0022 rel=\u0022stylesheet\u0022 href=\u0022https:\/\/www.medrxiv.org\/sites\/default\/files\/advagg_css\/css__dn-cpI1YtkU_iLHgA5WhlkxgYWyat_IxjF_B-WSYrpE__a9hIbt0eaZ7d5nhwnm2weG8R_2eXK4EvoOx9dOxouHE__QrrGUc7CpljPR5Aph-ukPbcwtK4AWrHGwCEXJ_k1V_c.css\u0022 media=\u0022all\u0022 \/\u003E\n\u003Clink type=\u0022text\/css\u0022 rel=\u0022stylesheet\u0022 href=\u0022\/\/cdn.jsdelivr.net\/qtip2\/2.2.1\/jquery.qtip.min.css\u0022 media=\u0022all\u0022 \/\u003E\n\u003Clink type=\u0022text\/css\u0022 rel=\u0022stylesheet\u0022 href=\u0022https:\/\/www.medrxiv.org\/sites\/default\/files\/advagg_css\/css__HGACIFBlu2o05y3afvqlt5wrE_5Dn6MXsexfuEpeIwg__t4SOPxucAPoV3Os7g8dXqyMB1HRXQridRJ82X7nE33E__QrrGUc7CpljPR5Aph-ukPbcwtK4AWrHGwCEXJ_k1V_c.css\u0022 media=\u0022all\u0022 \/\u003E\n\u003Clink rel=\u0027stylesheet\u0027 type=\u0027text\/css\u0027 href=\u0027\/sites\/all\/modules\/contrib\/panels\/plugins\/layouts\/onecol\/onecol.css\u0027 \/\u003E\u003C\/head\u003E\u003Cbody\u003E\u003Cdiv class=\u0022panels-ajax-tab-panel panels-ajax-tab-panel-article-tab-full-text\u0022\u003E\u003Cdiv class=\u0022panel-display panel-1col clearfix\u0022 \u003E\n  \u003Cdiv class=\u0022panel-panel panel-col\u0022\u003E\n    \u003Cdiv\u003E\u003Cdiv class=\u0022panel-pane pane-highwire-markup\u0022 \u003E\n  \n      \n  \n  \u003Cdiv class=\u0022pane-content\u0022\u003E\n    \u003Cdiv class=\u0022highwire-markup\u0022\u003E\u003Cdiv xmlns=\u0022http:\/\/www.w3.org\/1999\/xhtml\u0022 data-highwire-cite-ref-tooltip-instance=\u0022highwire_reflinks_tooltip\u0022 class=\u0022content-block-markup\u0022 xmlns:xhtml=\u0022http:\/\/www.w3.org\/1999\/xhtml\u0022\u003E\u003Cdiv class=\u0022article fulltext-view \u0022\u003E\u003Cspan class=\u0022highwire-journal-article-marker-start\u0022\u003E\u003C\/span\u003E\u003Cdiv class=\u0022section abstract\u0022 id=\u0022abstract-1\u0022\u003E\u003Ch2 class=\u0022\u0022\u003EAbstract\u003C\/h2\u003E\u003Cp id=\u0022p-3\u0022\u003ERecent work on computer vision and image processing has relied substantially on open datasets, which allow for an objective comparison of techniques and methodologies. In the area of computational pathology and, more specifically, on colorectal cancer, the dataset NCT-CRC-HE-100K, which consists of 100,000 patches of human tissue stained with Haematoxylin and Eosin has been widely used as a training set for deep learning studies. The patches are grouped into 9 classes of tissue (adipose, background, debris, lymphocytes, mucus, smooth muscle, normal colon mucosa, cancer-associated stroma, colorectal adenocarcinoma epithelium). The set is released with a separate set (CRC-VAL-HE-7K) of 7,180 patches that is commonly used for testing. In this work, features were extracted from both sets first with Persistent Homology, then, with Gabor filters to reveal that the training set presents a rather different distribution from the testing set. Namely, the distribution of features in the 7K-set presents a much higher class overlap than those in the 100K-set, which would imply a much higher separability in the testing set than in the training set.\u003C\/p\u003E\u003C\/div\u003E\u003Cdiv class=\u0022section\u0022 id=\u0022sec-1\u0022\u003E\u003Ch2 class=\u0022\u0022\u003E1 Introduction\u003C\/h2\u003E\u003Cp id=\u0022p-16\u0022\u003EThe development and success of deep learning has relied on the existence of large sets of labelled data, in addition to high computational power provided by hard-ware like graphical processing units. Medical data usually requires labelling by experts, like radiologists, cytologists and pathologists, all of which have experienced shortages in recent years [\u003Ca id=\u0022xref-ref-1-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-1\u0022\u003E1\u003C\/a\u003E,\u003Ca id=\u0022xref-ref-16-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-16\u0022\u003E16\u003C\/a\u003E,\u003Ca id=\u0022xref-ref-18-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-18\u0022\u003E18\u003C\/a\u003E]. Thus, open datasets, such as those provided in \u003Cem\u003Echallenges\u003C\/em\u003E through websites such as Grand-Challenge (\u003Ca href=\u0022https:\/\/grand-challenge.org\/\u0022\u003Ehttps:\/\/grand-challenge.org\/\u003C\/a\u003E), which, in some cases, are related to conferences like ISBI (IEEE D. Brito-Pacheco et al. International Symposium in Biomedical Imaging) or MICCAI (Medical Image Computing and Computer-Assisted Intervention) are welcome by the community. In many cases, the datasets are linked to a competition where algorithms or results are evaluated with certain criteria, e.g. accuracy or Jaccard index, and then ranked and placed in leaderboards. The reproducibility, interpretation and ranking aspects of some challenges have been scrutinised as rankings can vary depending on certain factors, e.g., rank and aggregate v. aggregate and rank, mean v. median, rank with Hausdorff distance (HD) v. rank with HD95, etc. [\u003Ca id=\u0022xref-ref-14-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-14\u0022\u003E14\u003C\/a\u003E]. Another important factor is the test data used for validation.\u003C\/p\u003E\u003Cp id=\u0022p-17\u0022\u003EIn this paper, a commonly used dataset (NCT-CRC-HE-100K [\u003Ca id=\u0022xref-ref-11-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-11\u0022\u003E11\u003C\/a\u003E,\u003Ca id=\u0022xref-ref-12-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-12\u0022\u003E12\u003C\/a\u003E]) is scrutinised. The dataset is commonly used to train deep learning architectures for the classification of histological images of colorectal cancer stained with Haema-toxylin and Eosin (H\u0026amp;E). The dataset consists of 100,000 patches of human tissue stained with H\u0026amp;E of healthy and cancerous tissues grouped into 9 categories: adipose, background, debris, lymphocytes, mucus, smooth muscle, normal colon mucosa, cancer-associated stroma, colorectal adenocarcinoma epithelium, which are normally used for training, and a separate set (CRC-VAL-HE-7K) of 7,180 patches is commonly used for testing [\u003Ca id=\u0022xref-ref-13-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-13\u0022\u003E13\u003C\/a\u003E, \u003Ca id=\u0022xref-ref-19-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-19\u0022\u003E19\u003C\/a\u003E, \u003Ca id=\u0022xref-ref-21-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-21\u0022\u003E21\u003C\/a\u003E]. Concerns have already been raised about these datasets. In [\u003Ca id=\u0022xref-ref-9-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-9\u0022\u003E9\u003C\/a\u003E], the authors identified biases in the classes by performing classification experiments using simple models that focused solely on the colour of the images and obtaining satisfactory results. The authors also managed to identify compression artifacts that show up in different concentrations by class, leading to high classification accuracies.\u003C\/p\u003E\u003Cp id=\u0022p-18\u0022\u003EThis paper focuses onfeatures of the patches that are not related to colour, namely texture and structure. By using Persistent Homology and Gabor filters it was observed that these two datasets were not equivalent in the separability of the classes, revealing further issues with the datasets. A visual analysis of the patches was also performed and the effects of normalisation on them was shown. Using these analyses, light is shed on the difference in qualities of the datasets, with CRC-VAL-HE-7K (7K-set) having a higher separability than NCT-CRC-HE-100K (100K-set). Additionally, an experiment was carried out in which a random forest was trained on the 100K-set and tested on the 7K-set before reversing the roles of the sets to train on 7K-set, and tested on 100K-set. This helped to confirm the differences between the sets and show the implications of training and testing on different-quality datasets.\u003C\/p\u003E\u003C\/div\u003E\u003Cdiv class=\u0022section\u0022 id=\u0022sec-2\u0022\u003E\u003Ch2 class=\u0022\u0022\u003E2 Background: Persistent Homology\u003C\/h2\u003E\u003Cp id=\u0022p-19\u0022\u003EPersistent Homology (PH) is an important tool that belongs to the mathematical field of Topological Data Analysis (TDA), which allows to extract topological features from datasets and create statistical models. This tool has already seen applications in biomedical imaging [\u003Ca id=\u0022xref-ref-6-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-6\u0022\u003E6\u003C\/a\u003E]. Since PH and TDA are not commonly used in Computer Vision and Image Processing areas, a short review will be presented. For a comprehensive introduction to PH, the reader is referred to [\u003Ca id=\u0022xref-ref-8-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-8\u0022\u003E8\u003C\/a\u003E].\u003C\/p\u003E\u003Cdiv id=\u0022sec-3\u0022 class=\u0022subsection\u0022\u003E\u003Ch3\u003E2.1 Components and Holes\u003C\/h3\u003E\u003Cp id=\u0022p-20\u0022\u003EPH analyses an image or a space to find its \u003Cem\u003Ecomponents\u003C\/em\u003E and \u003Cem\u003Eholes\u003C\/em\u003E. A (connected) \u003Cem\u003Ecomponent\u003C\/em\u003E is the set of a space where elements are connected to each other. In the case of an image, a component would correspond to a group of pixels that are adjacent to each other. A \u003Cem\u003Ehole\u003C\/em\u003E corresponds to a region that is completely surrounded by a single component and does not belong to that component. PH essentially tracks how the number of components and holes change as the conditions on the space or image change. \u003Ca id=\u0022xref-fig-1-1\u0022 class=\u0022xref-fig\u0022 href=\u0022#F1\u0022\u003EFig. 1 (a)\u003C\/a\u003E illustrates six 2D spaces (binary images) with different numbers of components (1,2,3,3,2,1) and holes (0,0,0,1,1,0). These cases can also be understood as a process from left to right where components and holes are born (appear) and die (disappear). This is illustrated in \u003Ca id=\u0022xref-fig-1-2\u0022 class=\u0022xref-fig\u0022 href=\u0022#F1\u0022\u003EFig. 1 (b)\u003C\/a\u003E, (c) where the birth of each is indicated with a vertical black line, and the death is indicated with a vertical grey line with a red cross.\u003C\/p\u003E\u003Cdiv id=\u0022F1\u0022 class=\u0022fig pos-float type-figure  odd\u0022\u003E\u003Cdiv class=\u0022highwire-figure\u0022\u003E\u003Cdiv class=\u0022fig-inline-img-wrapper\u0022\u003E\u003Cdiv class=\u0022fig-inline-img\u0022\u003E\u003Ca href=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2025\/04\/08\/2025.04.07.25325392\/F1.large.jpg?width=800\u0026amp;height=600\u0026amp;carousel=1\u0022 title=\u0022Illustration of the main concepts of Persistence Homology: births, death, holes and components. (a) A sequence of six binary images (b) A black bar corresponds to the birth of a component and, a red cross and a grey bar indicates a death. (c) The birth and death of holes. (d) Representation of the components as a simplicial complex: A point corresponds to a component, except if there is a hole, in which case a cycle of 3 points and 3 edges are added. The cycle is filled with a triangle when the hole dies. (e) Step-by-step formation of the persistence diagram.\u0022 class=\u0022highwire-fragment fragment-images colorbox-load\u0022 rel=\u0022gallery-fragment-images-851399613\u0022 data-figure-caption=\u0022\u0026lt;div class=\u0026quot;highwire-markup\u0026quot;\u0026gt;Illustration of the main concepts of Persistence Homology: births, death, holes and components. (a) A sequence of six binary images (b) A black bar corresponds to the birth of a component and, a red cross and a grey bar indicates a death. (c) The birth and death of holes. (d) Representation of the components as a simplicial complex: A point corresponds to a component, except if there is a hole, in which case a cycle of 3 points and 3 edges are added. The cycle is filled with a triangle when the hole dies. (e) Step-by-step formation of the persistence diagram.\u0026lt;\/div\u0026gt;\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003E\u003Cspan class=\u0022hw-responsive-img\u0022\u003E\u003Cimg class=\u0022highwire-fragment fragment-image lazyload\u0022 alt=\u0022Fig. 1.\u0022 src=\u0022data:image\/gif;base64,R0lGODlhAQABAIAAAAAAAP\/\/\/yH5BAEAAAAALAAAAAABAAEAAAIBRAA7\u0022 data-src=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2025\/04\/08\/2025.04.07.25325392\/F1.medium.gif\u0022 width=\u0022440\u0022 height=\u0022357\u0022\/\u003E\u003Cnoscript\u003E\u003Cimg class=\u0022highwire-fragment fragment-image\u0022 alt=\u0022Fig. 1.\u0022 src=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2025\/04\/08\/2025.04.07.25325392\/F1.medium.gif\u0022 width=\u0022440\u0022 height=\u0022357\u0022\/\u003E\u003C\/noscript\u003E\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003Cul class=\u0022highwire-figure-links inline\u0022\u003E\u003Cli class=\u0022download-fig first\u0022\u003E\u003Ca href=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2025\/04\/08\/2025.04.07.25325392\/F1.large.jpg?download=true\u0022 class=\u0022highwire-figure-link highwire-figure-link-download\u0022 title=\u0022Download Fig. 1.\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003EDownload figure\u003C\/a\u003E\u003C\/li\u003E\u003Cli class=\u0022new-tab last\u0022\u003E\u003Ca href=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2025\/04\/08\/2025.04.07.25325392\/F1.large.jpg\u0022 class=\u0022highwire-figure-link highwire-figure-link-newtab\u0022 target=\u0022_blank\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003EOpen in new tab\u003C\/a\u003E\u003C\/li\u003E\u003C\/ul\u003E\u003C\/div\u003E\u003Cdiv class=\u0022fig-caption\u0022 xmlns:xhtml=\u0022http:\/\/www.w3.org\/1999\/xhtml\u0022\u003E\u003Cspan class=\u0022fig-label\u0022\u003EFig. 1.\u003C\/span\u003E \u003Cp id=\u0022p-21\u0022 class=\u0022first-child\u0022\u003EIllustration of the main concepts of Persistence Homology: births, death, holes and components. (a) A sequence of six binary images (b) A black bar corresponds to the birth of a component and, a red cross and a grey bar indicates a death. (c) The birth and death of holes. (d) Representation of the components as a simplicial complex: A point corresponds to a component, except if there is a hole, in which case a cycle of 3 points and 3 edges are added. The cycle is filled with a triangle when the hole dies. (e) Step-by-step formation of the persistence diagram.\u003C\/p\u003E\u003Cdiv class=\u0022sb-div caption-clear\u0022\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/div\u003E\u003Cdiv id=\u0022sec-4\u0022 class=\u0022subsection\u0022\u003E\u003Ch3\u003E2.2 Simplicial Complexes\u003C\/h3\u003E\u003Cp id=\u0022p-22\u0022\u003ESimplicial complexes are an abstraction from the components and holes previously described. Specifically, an \u003Cem\u003En-simplex\u003C\/em\u003E is an \u003Cem\u003En\u003C\/em\u003E-dimensional generalisation of a triangle. A 0-simplex is a point, a 1-simplex is an edge that connects 2 points, a 2-simplex is a triangle that connects 3 points, a 3-simplex is a tetrahedron, etc. A simplicial complex \u003Cem\u003ES\u003C\/em\u003E is a collection of \u003Cem\u003En\u003C\/em\u003E-simplices such that their \u003Cem\u003En-1\u003C\/em\u003E - dimensional faces are also in \u003Cem\u003ES\u003C\/em\u003E. This means, for example that if there is an edge \u003Cem\u003Ee\u003C\/em\u003E in \u003Cem\u003ES\u003C\/em\u003E, then the two vertices at the ends of \u003Cem\u003Ee\u003C\/em\u003E also have to be present in \u003Cem\u003ES\u003C\/em\u003E. In the context of PH, simplicial complexes are used as a way to represent a more complex object topologically as illustrated in \u003Ca id=\u0022xref-fig-1-3\u0022 class=\u0022xref-fig\u0022 href=\u0022#F1\u0022\u003EFig. 1 (d)\u003C\/a\u003E. The topological information of the holes and components can be represented simply as a collection of vertices, edges and triangles. From left to right: A vertex appears when the first white component appears, a second vertex is added in the second column. a third vertex is added in the third column, a hole appears which is represented by a cycle of edges and their vertices, two components merge into one represented by an edge in row (d) (the component that was born last has died at this point), the hole is filled up; represented by adding a blue triangle in row (d) (the hole has died at this point).\u003C\/p\u003E\u003Cp id=\u0022p-23\u0022\u003EIt is important to highlight two points. First, a cycle of edges is different from a triangle. In the first case, the cycle has a hole, whereas a triangle has no hole. Second, a component in a simplicial complex is any union of vertices, edges, triangles, tetrahedron that are touching. A sequence of \u003Cem\u003En\u003C\/em\u003E simplicial complexes \u003Cem\u003ES\u003C\/em\u003E\u003Csub\u003E\u003Cem\u003Ei\u003C\/em\u003E\u003C\/sub\u003E is called a \u003Cem\u003Efiltration\u003C\/em\u003E if \u003Cem\u003ES\u003C\/em\u003E\u003Csub\u003E1\u003C\/sub\u003E \u2282 \u003Cem\u003ES\u003C\/em\u003E\u003Csub\u003E2\u003C\/sub\u003E \u2282\u003Cem\u003ES\u003C\/em\u003E\u003Csub\u003E3\u003C\/sub\u003E \u2282 \u2026 \u2282 \u003Cem\u003ES\u003C\/em\u003E\u003Csub\u003E\u003Cem\u003En\u003C\/em\u003E\u003C\/sub\u003E. In \u003Ca id=\u0022xref-fig-1-4\u0022 class=\u0022xref-fig\u0022 href=\u0022#F1\u0022\u003EFig. 1\u003C\/a\u003E the sequence of simplicial complexes in row (d) makes a filtration. It is also worth mentioning that, in the literature on PH, the formal definition of filtrations applies to sequences of topological spaces, not just simplicial complexes.\u003C\/p\u003E\u003C\/div\u003E\u003Cdiv id=\u0022sec-5\u0022 class=\u0022subsection\u0022\u003E\u003Ch3\u003E2.3 Birth, Death, and Persistence Diagrams\u003C\/h3\u003E\u003Cp id=\u0022p-24\u0022\u003E\u003Cem\u003EPersistence Diagrams\u003C\/em\u003E are a way to encode invariants about a filtration. Particularly, the points when a new topological feature appears, as well as when it disappears in the filtration. Alluded to before, at the step when a component or hole first appears, it is said to be \u201cborn\u201d. Analogously, at the step when a component or hole disappears from the filtration, it is said that it \u201cdies\u201d. This information can be encoded in a scatter plot where the horizontal coordinate shows the birth of the component or hole, and the vertical coordinate shows the death of the component or hole as illustrated in \u003Ca id=\u0022xref-fig-1-5\u0022 class=\u0022xref-fig\u0022 href=\u0022#F1\u0022\u003EFig. 1 (e)\u003C\/a\u003E.\u003C\/p\u003E\u003Cp id=\u0022p-25\u0022\u003EIt is important to note that holes die by being filled in, but components die by merging into each other. When two components merge, there is a choice to be made as to which component dies. Typically, the eldest component of the merger will survive.\u003C\/p\u003E\u003C\/div\u003E\u003Cdiv id=\u0022sec-6\u0022 class=\u0022subsection\u0022\u003E\u003Ch3\u003E2.4 Level-Set Filtration\u003C\/h3\u003E\u003Cp id=\u0022p-26\u0022\u003EA very common way to get a filtration from a greyscale image is through the level-set filtration, which is illustrated in \u003Ca id=\u0022xref-fig-2-1\u0022 class=\u0022xref-fig\u0022 href=\u0022#F2\u0022\u003EFig. 2\u003C\/a\u003E. First, a greyscale image (\u003Ca id=\u0022xref-fig-2-2\u0022 class=\u0022xref-fig\u0022 href=\u0022#F2\u0022\u003EFig. 2 (a)\u003C\/a\u003E) is thresholded at decreasing intensities to produce a series of binary images (\u003Ca id=\u0022xref-fig-2-3\u0022 class=\u0022xref-fig\u0022 href=\u0022#F2\u0022\u003EFig. 2 (d)\u003C\/a\u003E). Second, a vertex is added at every white pixel, an edge between the vertices if the corresponding pixels neighbour each other, and a triangle if three edges form a cycle (\u003Ca id=\u0022xref-fig-2-4\u0022 class=\u0022xref-fig\u0022 href=\u0022#F2\u0022\u003EFig. 2 (e)\u003C\/a\u003E and \u003Ca id=\u0022xref-fig-2-5\u0022 class=\u0022xref-fig\u0022 href=\u0022#F2\u0022\u003EFig. 2 (f)\u003C\/a\u003E). For completeness, \u003Ca id=\u0022xref-fig-2-6\u0022 class=\u0022xref-fig\u0022 href=\u0022#F2\u0022\u003EFig. 2 (b)\u003C\/a\u003E shows the persistence diagram computed from the filtration of simplicial complexes. There are other methodologies of filtration, which will not be covered in this paper and the reader is referred to [\u003Ca id=\u0022xref-ref-7-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-7\u0022\u003E7\u003C\/a\u003E].\u003C\/p\u003E\u003Cdiv id=\u0022F2\u0022 class=\u0022fig pos-float type-figure  odd\u0022\u003E\u003Cdiv class=\u0022highwire-figure\u0022\u003E\u003Cdiv class=\u0022fig-inline-img-wrapper\u0022\u003E\u003Cdiv class=\u0022fig-inline-img\u0022\u003E\u003Ca href=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2025\/04\/08\/2025.04.07.25325392\/F2.large.jpg?width=800\u0026amp;height=600\u0026amp;carousel=1\u0022 title=\u0022Illustration of the filtration and PH calculation.(a) Original greyscale image in the range [0, 255]. (b) Persistence diagram. (c) 3D representation of the greyscale and a threshold. (d) Binary images of pixels above the threshold. (e) Filtration overlaid on the binary images. A vertex is placed at each white pixel. Edges are added between neighbouring pixels. A triangle is added when cycles of three edges are formed. (f) Filtration with pixels removed. (g) Betti numbers.\u0022 class=\u0022highwire-fragment fragment-images colorbox-load\u0022 rel=\u0022gallery-fragment-images-851399613\u0022 data-figure-caption=\u0022\u0026lt;div class=\u0026quot;highwire-markup\u0026quot;\u0026gt;Illustration of the filtration and PH calculation.(a) Original greyscale image in the range [0, 255]. (b) Persistence diagram. (c) 3D representation of the greyscale and a threshold. (d) Binary images of pixels above the threshold. (e) Filtration overlaid on the binary images. A vertex is placed at each white pixel. Edges are added between neighbouring pixels. A triangle is added when cycles of three edges are formed. (f) Filtration with pixels removed. (g) Betti numbers.\u0026lt;\/div\u0026gt;\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003E\u003Cspan class=\u0022hw-responsive-img\u0022\u003E\u003Cimg class=\u0022highwire-fragment fragment-image lazyload\u0022 alt=\u0022Fig. 2.\u0022 src=\u0022data:image\/gif;base64,R0lGODlhAQABAIAAAAAAAP\/\/\/yH5BAEAAAAALAAAAAABAAEAAAIBRAA7\u0022 data-src=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2025\/04\/08\/2025.04.07.25325392\/F2.medium.gif\u0022 width=\u0022440\u0022 height=\u0022259\u0022\/\u003E\u003Cnoscript\u003E\u003Cimg class=\u0022highwire-fragment fragment-image\u0022 alt=\u0022Fig. 2.\u0022 src=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2025\/04\/08\/2025.04.07.25325392\/F2.medium.gif\u0022 width=\u0022440\u0022 height=\u0022259\u0022\/\u003E\u003C\/noscript\u003E\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003Cul class=\u0022highwire-figure-links inline\u0022\u003E\u003Cli class=\u0022download-fig first\u0022\u003E\u003Ca href=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2025\/04\/08\/2025.04.07.25325392\/F2.large.jpg?download=true\u0022 class=\u0022highwire-figure-link highwire-figure-link-download\u0022 title=\u0022Download Fig. 2.\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003EDownload figure\u003C\/a\u003E\u003C\/li\u003E\u003Cli class=\u0022new-tab last\u0022\u003E\u003Ca href=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2025\/04\/08\/2025.04.07.25325392\/F2.large.jpg\u0022 class=\u0022highwire-figure-link highwire-figure-link-newtab\u0022 target=\u0022_blank\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003EOpen in new tab\u003C\/a\u003E\u003C\/li\u003E\u003C\/ul\u003E\u003C\/div\u003E\u003Cdiv class=\u0022fig-caption\u0022\u003E\u003Cspan class=\u0022fig-label\u0022\u003EFig. 2.\u003C\/span\u003E \u003Cp id=\u0022p-27\u0022 class=\u0022first-child\u0022\u003EIllustration of the filtration and PH calculation.(a) Original greyscale image in the range [0, 255]. (b) Persistence diagram. (c) 3D representation of the greyscale and a threshold. (d) Binary images of pixels above the threshold. (e) Filtration overlaid on the binary images. A vertex is placed at each white pixel. Edges are added between neighbouring pixels. A triangle is added when cycles of three edges are formed. (f) Filtration with pixels removed. (g) Betti numbers.\u003C\/p\u003E\u003Cdiv class=\u0022sb-div caption-clear\u0022\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/div\u003E\u003Cdiv class=\u0022section\u0022 id=\u0022sec-7\u0022\u003E\u003Ch2 class=\u0022\u0022\u003E3 Materials\u003C\/h2\u003E\u003Cp id=\u0022p-28\u0022\u003EIn this work, two datasets of colorectal cancer slides stained with H\u0026amp;E were used: 100K-set and 7K-set, which contain 100,000 and 7180 images, respectively [\u003Ca id=\u0022xref-ref-11-2\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-11\u0022\u003E11\u003C\/a\u003E]. Both sets contain images of nine different classes: ADI: adipose tissue; BACK: background; CRC: colorectal cancer; DEB: debris; LYM: lymphocytes; MUC: mucus; MUS: smooth muscle; NORM: normal colon mucosa; STR: cancer-associated stroma; TUM: colorectal adenocarcinoma epithelium (\u003Ca id=\u0022xref-fig-3-1\u0022 class=\u0022xref-fig\u0022 href=\u0022#F3\u0022\u003EFig. 3\u003C\/a\u003E).\u003C\/p\u003E\u003Cdiv id=\u0022F3\u0022 class=\u0022fig pos-float type-figure  odd\u0022\u003E\u003Cdiv class=\u0022highwire-figure\u0022\u003E\u003Cdiv class=\u0022fig-inline-img-wrapper\u0022\u003E\u003Cdiv class=\u0022fig-inline-img\u0022\u003E\u003Ca href=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2025\/04\/08\/2025.04.07.25325392\/F3.large.jpg?width=800\u0026amp;height=600\u0026amp;carousel=1\u0022 title=\u0022Illustration of the datasets with 100 sample patches per class from each set. By class: NCT-CRC-HE-100K images are shown above, normalized CRC-VAL-HE-7K images are shown below. ADI: adipose tissue; BACK: background; CRC: colorectal cancer; DEB: debris; LYM: lymphocytes; MUC: mucus; MUS: smooth muscle; NORM: normal colon mucosa; STR: cancer-associated stroma; TUM: colorectal adenocarcinoma epithelium.\u0022 class=\u0022highwire-fragment fragment-images colorbox-load\u0022 rel=\u0022gallery-fragment-images-851399613\u0022 data-figure-caption=\u0022\u0026lt;div class=\u0026quot;highwire-markup\u0026quot;\u0026gt;Illustration of the datasets with 100 sample patches per class from each set. By class: NCT-CRC-HE-100K images are shown above, normalized CRC-VAL-HE-7K images are shown below. ADI: adipose tissue; BACK: background; CRC: colorectal cancer; DEB: debris; LYM: lymphocytes; MUC: mucus; MUS: smooth muscle; NORM: normal colon mucosa; STR: cancer-associated stroma; TUM: colorectal adenocarcinoma epithelium.\u0026lt;\/div\u0026gt;\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003E\u003Cspan class=\u0022hw-responsive-img\u0022\u003E\u003Cimg class=\u0022highwire-fragment fragment-image lazyload\u0022 alt=\u0022Fig. 3.\u0022 src=\u0022data:image\/gif;base64,R0lGODlhAQABAIAAAAAAAP\/\/\/yH5BAEAAAAALAAAAAABAAEAAAIBRAA7\u0022 data-src=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2025\/04\/08\/2025.04.07.25325392\/F3.medium.gif\u0022 width=\u0022239\u0022 height=\u0022440\u0022\/\u003E\u003Cnoscript\u003E\u003Cimg class=\u0022highwire-fragment fragment-image\u0022 alt=\u0022Fig. 3.\u0022 src=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2025\/04\/08\/2025.04.07.25325392\/F3.medium.gif\u0022 width=\u0022239\u0022 height=\u0022440\u0022\/\u003E\u003C\/noscript\u003E\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003Cul class=\u0022highwire-figure-links inline\u0022\u003E\u003Cli class=\u0022download-fig first\u0022\u003E\u003Ca href=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2025\/04\/08\/2025.04.07.25325392\/F3.large.jpg?download=true\u0022 class=\u0022highwire-figure-link highwire-figure-link-download\u0022 title=\u0022Download Fig. 3.\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003EDownload figure\u003C\/a\u003E\u003C\/li\u003E\u003Cli class=\u0022new-tab last\u0022\u003E\u003Ca href=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2025\/04\/08\/2025.04.07.25325392\/F3.large.jpg\u0022 class=\u0022highwire-figure-link highwire-figure-link-newtab\u0022 target=\u0022_blank\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003EOpen in new tab\u003C\/a\u003E\u003C\/li\u003E\u003C\/ul\u003E\u003C\/div\u003E\u003Cdiv class=\u0022fig-caption\u0022\u003E\u003Cspan class=\u0022fig-label\u0022\u003EFig. 3.\u003C\/span\u003E \u003Cp id=\u0022p-29\u0022 class=\u0022first-child\u0022\u003EIllustration of the datasets with 100 sample patches per class from each set. By class: NCT-CRC-HE-100K images are shown above, normalized CRC-VAL-HE-7K images are shown below. ADI: adipose tissue; BACK: background; CRC: colorectal cancer; DEB: debris; LYM: lymphocytes; MUC: mucus; MUS: smooth muscle; NORM: normal colon mucosa; STR: cancer-associated stroma; TUM: colorectal adenocarcinoma epithelium.\u003C\/p\u003E\u003Cdiv class=\u0022sb-div caption-clear\u0022\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/div\u003E\u003Cp id=\u0022p-30\u0022\u003EThe 100K-set\u2019s images were explicitly stated to be normalised using Macenko\u2019s method in the paper where the set is introduced [\u003Ca id=\u0022xref-ref-12-2\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-12\u0022\u003E12\u003C\/a\u003E], but the 7K-set was not explicitly stated to be normalised. Upon closer inspection, it was found that the 7K-set had not been normalised. With the same reference image as the 100K-set, the 7K-set was normalised using Macenko\u2019s method on a per-patch basis before any further analysis was performed (\u003Ca id=\u0022xref-fig-4-1\u0022 class=\u0022xref-fig\u0022 href=\u0022#F4\u0022\u003EFig. 4\u003C\/a\u003E).\u003C\/p\u003E\u003Cdiv id=\u0022F4\u0022 class=\u0022fig pos-float type-figure  odd\u0022\u003E\u003Cdiv class=\u0022highwire-figure\u0022\u003E\u003Cdiv class=\u0022fig-inline-img-wrapper\u0022\u003E\u003Cdiv class=\u0022fig-inline-img\u0022\u003E\u003Ca href=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2025\/04\/08\/2025.04.07.25325392\/F4.large.jpg?width=800\u0026amp;height=600\u0026amp;carousel=1\u0022 title=\u0022Effects of normalisation. Left: two BACK patches from the 7K-set; one presents a large dark spot causing the rest of the normalised image to become extremely bright, while the other stays relatively uniform. Right: two very purple patches (from DEB and MUS classes), when normalised look visually more like the rest of the set.\u0022 class=\u0022highwire-fragment fragment-images colorbox-load\u0022 rel=\u0022gallery-fragment-images-851399613\u0022 data-figure-caption=\u0022\u0026lt;div class=\u0026quot;highwire-markup\u0026quot;\u0026gt;Effects of normalisation. Left: two BACK patches from the 7K-set; one presents a large dark spot causing the rest of the normalised image to become extremely bright, while the other stays relatively uniform. Right: two very purple patches (from DEB and MUS classes), when normalised look visually more like the rest of the set.\u0026lt;\/div\u0026gt;\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003E\u003Cspan class=\u0022hw-responsive-img\u0022\u003E\u003Cimg class=\u0022highwire-fragment fragment-image lazyload\u0022 alt=\u0022Fig. 4.\u0022 src=\u0022data:image\/gif;base64,R0lGODlhAQABAIAAAAAAAP\/\/\/yH5BAEAAAAALAAAAAABAAEAAAIBRAA7\u0022 data-src=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2025\/04\/08\/2025.04.07.25325392\/F4.medium.gif\u0022 width=\u0022440\u0022 height=\u0022197\u0022\/\u003E\u003Cnoscript\u003E\u003Cimg class=\u0022highwire-fragment fragment-image\u0022 alt=\u0022Fig. 4.\u0022 src=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2025\/04\/08\/2025.04.07.25325392\/F4.medium.gif\u0022 width=\u0022440\u0022 height=\u0022197\u0022\/\u003E\u003C\/noscript\u003E\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003Cul class=\u0022highwire-figure-links inline\u0022\u003E\u003Cli class=\u0022download-fig first\u0022\u003E\u003Ca href=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2025\/04\/08\/2025.04.07.25325392\/F4.large.jpg?download=true\u0022 class=\u0022highwire-figure-link highwire-figure-link-download\u0022 title=\u0022Download Fig. 4.\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003EDownload figure\u003C\/a\u003E\u003C\/li\u003E\u003Cli class=\u0022new-tab last\u0022\u003E\u003Ca href=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2025\/04\/08\/2025.04.07.25325392\/F4.large.jpg\u0022 class=\u0022highwire-figure-link highwire-figure-link-newtab\u0022 target=\u0022_blank\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003EOpen in new tab\u003C\/a\u003E\u003C\/li\u003E\u003C\/ul\u003E\u003C\/div\u003E\u003Cdiv class=\u0022fig-caption\u0022\u003E\u003Cspan class=\u0022fig-label\u0022\u003EFig. 4.\u003C\/span\u003E \u003Cp id=\u0022p-31\u0022 class=\u0022first-child\u0022\u003EEffects of normalisation. Left: two BACK patches from the 7K-set; one presents a large dark spot causing the rest of the normalised image to become extremely bright, while the other stays relatively uniform. Right: two very purple patches (from DEB and MUS classes), when normalised look visually more like the rest of the set.\u003C\/p\u003E\u003Cdiv class=\u0022sb-div caption-clear\u0022\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/div\u003E\u003Cdiv class=\u0022section\u0022 id=\u0022sec-8\u0022\u003E\u003Ch2 class=\u0022\u0022\u003E4 Methods\u003C\/h2\u003E\u003Cp id=\u0022p-32\u0022\u003ETwo types of features were computed from the images from the train and test datasets. Topological features were obtained from the level-set filtration pre-viously introduced. For comparison purposes, features using Gabor frequency filtering were also calculated. All the features were normalised to a range of 0-100. These methodologies are described below.\u003C\/p\u003E\u003Cdiv id=\u0022sec-9\u0022 class=\u0022subsection\u0022\u003E\u003Ch3\u003E4.1 Topological Features\u003C\/h3\u003E\u003Cp id=\u0022p-33\u0022\u003ETopological features were calculated from images by the following process. First, the colour image was converted to greyscale. Then, a 5 \u00d7 5 median filter was applied to the greyscale image to reduce noise and make regions of similar intensities smoother. Next, the persistence diagram was calculated using level-set thresholds in the range [0,255]. \u003Ca id=\u0022xref-fig-5-1\u0022 class=\u0022xref-fig\u0022 href=\u0022#F5\u0022\u003EFig. 5\u003C\/a\u003E shows some example patches from the 100K-set together with the persistence diagram generated through this process. There is always exactly one component that makes it to the end of the filtration and, in strict mathematical notation, its death is given as \u221e. This was also the value returned by the GUDHI [\u003Ca id=\u0022xref-ref-15-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-15\u0022\u003E15\u003C\/a\u003E] package used to compute the diagram. For simplicity, the point corresponding to this component was discarded from the persistence diagram before calculating the following features: number of components\/holes, mean birth of components\/holes, mean death of components\/holes, standard deviation of the births of components\/holes, standard deviation of the deaths of components\/holes, mean persistence of components\/holes, median persistence of components\/holes, standard deviation of the persistences of components\/holes, minimum birth of components\/holes, maximum birth of components\/holes, minimum death of components\/holes, maximum death of components\/holes, range of births of components\/holes, range of deaths of components\/holes, 1\u003Csup\u003Est\u003C\/sup\u003E, 5\u003Csup\u003Eth\u003C\/sup\u003E, 25\u003Csup\u003Eth\u003C\/sup\u003E, 50\u003Csup\u003Eth\u003C\/sup\u003E(median), 75\u003Csup\u003Eth\u003C\/sup\u003E, 95\u003Csup\u003Eth\u003C\/sup\u003E, 99\u003Csup\u003Eth\u003C\/sup\u003Epercentiles of births of components\/holes, 1\u003Csup\u003Est\u003C\/sup\u003E, 5\u003Csup\u003Eth\u003C\/sup\u003E, 25\u003Csup\u003Eth\u003C\/sup\u003E, 50\u003Csup\u003Eth\u003C\/sup\u003E(median), 75\u003Csup\u003Eth\u003C\/sup\u003E, 95\u003Csup\u003Eth\u003C\/sup\u003E, 99\u003Csup\u003Eth\u003C\/sup\u003Epercentiles of deaths of components\/holes. The ratio between the number of holes and the number of components was also calculated and added to the list of features, for a total of 57 topological features from each persistence diagram.\u003C\/p\u003E\u003Cdiv id=\u0022F5\u0022 class=\u0022fig pos-float type-figure  odd\u0022\u003E\u003Cdiv class=\u0022highwire-figure\u0022\u003E\u003Cdiv class=\u0022fig-inline-img-wrapper\u0022\u003E\u003Cdiv class=\u0022fig-inline-img\u0022\u003E\u003Ca href=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2025\/04\/08\/2025.04.07.25325392\/F5.large.jpg?width=800\u0026amp;height=600\u0026amp;carousel=1\u0022 title=\u0022Illustration of the persistence diagrams of the histological tissues from the 100K-set. One representative patch from each of the classes (ADI, BACK, DEB, LYM, MUC, MUS, NORM, STR, TUM) is converted to greyscale and inverted. Noise is removed on the greyscale image by applying a 5 \u0026#xD7; 5 median filter. A persistence diagram is calculated from the smoothed greyscale image where blue circles are components and red triangles are holes. The distribution of the scatterplots in the persistence diagram capture differences in the textures of different tissues. ADI: adipose tissue; BACK: background; CRC: colorectal cancer; DEB: debris; LYM: lymphocytes; MUC: mucus; MUS: smooth muscle; NORM: normal colon mucosa; STR: cancer-associated stroma; TUM: colorectal adenocarcinoma epithelium.\u0022 class=\u0022highwire-fragment fragment-images colorbox-load\u0022 rel=\u0022gallery-fragment-images-851399613\u0022 data-figure-caption=\u0022\u0026lt;div class=\u0026quot;highwire-markup\u0026quot;\u0026gt;Illustration of the persistence diagrams of the histological tissues from the 100K-set. One representative patch from each of the classes (ADI, BACK, DEB, LYM, MUC, MUS, NORM, STR, TUM) is converted to greyscale and inverted. Noise is removed on the greyscale image by applying a 5 \u0026#xD7; 5 median filter. A persistence diagram is calculated from the smoothed greyscale image where blue circles are components and red triangles are holes. The distribution of the scatterplots in the persistence diagram capture differences in the textures of different tissues. ADI: adipose tissue; BACK: background; CRC: colorectal cancer; DEB: debris; LYM: lymphocytes; MUC: mucus; MUS: smooth muscle; NORM: normal colon mucosa; STR: cancer-associated stroma; TUM: colorectal adenocarcinoma epithelium.\u0026lt;\/div\u0026gt;\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003E\u003Cspan class=\u0022hw-responsive-img\u0022\u003E\u003Cimg class=\u0022highwire-fragment fragment-image lazyload\u0022 alt=\u0022Fig. 5.\u0022 src=\u0022data:image\/gif;base64,R0lGODlhAQABAIAAAAAAAP\/\/\/yH5BAEAAAAALAAAAAABAAEAAAIBRAA7\u0022 data-src=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2025\/04\/08\/2025.04.07.25325392\/F5.medium.gif\u0022 width=\u0022440\u0022 height=\u0022330\u0022\/\u003E\u003Cnoscript\u003E\u003Cimg class=\u0022highwire-fragment fragment-image\u0022 alt=\u0022Fig. 5.\u0022 src=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2025\/04\/08\/2025.04.07.25325392\/F5.medium.gif\u0022 width=\u0022440\u0022 height=\u0022330\u0022\/\u003E\u003C\/noscript\u003E\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003Cul class=\u0022highwire-figure-links inline\u0022\u003E\u003Cli class=\u0022download-fig first\u0022\u003E\u003Ca href=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2025\/04\/08\/2025.04.07.25325392\/F5.large.jpg?download=true\u0022 class=\u0022highwire-figure-link highwire-figure-link-download\u0022 title=\u0022Download Fig. 5.\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003EDownload figure\u003C\/a\u003E\u003C\/li\u003E\u003Cli class=\u0022new-tab last\u0022\u003E\u003Ca href=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2025\/04\/08\/2025.04.07.25325392\/F5.large.jpg\u0022 class=\u0022highwire-figure-link highwire-figure-link-newtab\u0022 target=\u0022_blank\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003EOpen in new tab\u003C\/a\u003E\u003C\/li\u003E\u003C\/ul\u003E\u003C\/div\u003E\u003Cdiv class=\u0022fig-caption\u0022\u003E\u003Cspan class=\u0022fig-label\u0022\u003EFig. 5.\u003C\/span\u003E \u003Cp id=\u0022p-34\u0022 class=\u0022first-child\u0022\u003EIllustration of the persistence diagrams of the histological tissues from the 100K-set. One representative patch from each of the classes (ADI, BACK, DEB, LYM, MUC, MUS, NORM, STR, TUM) is converted to greyscale and inverted. Noise is removed on the greyscale image by applying a 5 \u00d7 5 median filter. A persistence diagram is calculated from the smoothed greyscale image where blue circles are components and red triangles are holes. The distribution of the scatterplots in the persistence diagram capture differences in the textures of different tissues. ADI: adipose tissue; BACK: background; CRC: colorectal cancer; DEB: debris; LYM: lymphocytes; MUC: mucus; MUS: smooth muscle; NORM: normal colon mucosa; STR: cancer-associated stroma; TUM: colorectal adenocarcinoma epithelium.\u003C\/p\u003E\u003Cdiv class=\u0022sb-div caption-clear\u0022\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/div\u003E\u003Cdiv id=\u0022sec-10\u0022 class=\u0022subsection\u0022\u003E\u003Ch3\u003E4.2 Gabor Features\u003C\/h3\u003E\u003Cp id=\u0022p-35\u0022\u003EGabor filters are filters which operate on an image through convolution. They can be described as made up of a spatial frequency and orientation within a two-dimensional Gaussian envelope. For an in-depth explanation of how Gabor filters are used, the reader is referred to [\u003Ca id=\u0022xref-ref-17-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-17\u0022\u003E17\u003C\/a\u003E]. Each Gabor filter is defined uniquely by a direction, a frequency, and the standard deviations in the horizontal and vertical coordinates. 36 different Gabor filters of varying directions, frequencies and standard deviations were generated [\u003Ca id=\u0022xref-ref-20-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-20\u0022\u003E20\u003C\/a\u003E]. To compute features using Gabor filters, each image was converted to greyscale and then convolved with the Gabor filters. This yields a filtered greyscale image, from which the mean pixel intensity and variance of pixel intensities were computed. The effects that different directions and frequencies for the Gabor filters have on the filtered image are shown in \u003Ca id=\u0022xref-fig-6-1\u0022 class=\u0022xref-fig\u0022 href=\u0022#F6\u0022\u003EFig. 6\u003C\/a\u003E.\u003C\/p\u003E\u003Cdiv id=\u0022F6\u0022 class=\u0022fig pos-float type-figure  odd\u0022\u003E\u003Cdiv class=\u0022highwire-figure\u0022\u003E\u003Cdiv class=\u0022fig-inline-img-wrapper\u0022\u003E\u003Cdiv class=\u0022fig-inline-img\u0022\u003E\u003Ca href=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2025\/04\/08\/2025.04.07.25325392\/F6.large.jpg?width=800\u0026amp;height=600\u0026amp;carousel=1\u0022 title=\u0022Illustration of Gabor filters. (a) Gabor filters (b) Sample patches converted to greyscales (c) Filtered results.\u0022 class=\u0022highwire-fragment fragment-images colorbox-load\u0022 rel=\u0022gallery-fragment-images-851399613\u0022 data-figure-caption=\u0022\u0026lt;div class=\u0026quot;highwire-markup\u0026quot;\u0026gt;Illustration of Gabor filters. (a) Gabor filters (b) Sample patches converted to greyscales (c) Filtered results.\u0026lt;\/div\u0026gt;\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003E\u003Cspan class=\u0022hw-responsive-img\u0022\u003E\u003Cimg class=\u0022highwire-fragment fragment-image lazyload\u0022 alt=\u0022Fig. 6.\u0022 src=\u0022data:image\/gif;base64,R0lGODlhAQABAIAAAAAAAP\/\/\/yH5BAEAAAAALAAAAAABAAEAAAIBRAA7\u0022 data-src=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2025\/04\/08\/2025.04.07.25325392\/F6.medium.gif\u0022 width=\u0022440\u0022 height=\u0022257\u0022\/\u003E\u003Cnoscript\u003E\u003Cimg class=\u0022highwire-fragment fragment-image\u0022 alt=\u0022Fig. 6.\u0022 src=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2025\/04\/08\/2025.04.07.25325392\/F6.medium.gif\u0022 width=\u0022440\u0022 height=\u0022257\u0022\/\u003E\u003C\/noscript\u003E\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003Cul class=\u0022highwire-figure-links inline\u0022\u003E\u003Cli class=\u0022download-fig first\u0022\u003E\u003Ca href=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2025\/04\/08\/2025.04.07.25325392\/F6.large.jpg?download=true\u0022 class=\u0022highwire-figure-link highwire-figure-link-download\u0022 title=\u0022Download Fig. 6.\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003EDownload figure\u003C\/a\u003E\u003C\/li\u003E\u003Cli class=\u0022new-tab last\u0022\u003E\u003Ca href=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2025\/04\/08\/2025.04.07.25325392\/F6.large.jpg\u0022 class=\u0022highwire-figure-link highwire-figure-link-newtab\u0022 target=\u0022_blank\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003EOpen in new tab\u003C\/a\u003E\u003C\/li\u003E\u003C\/ul\u003E\u003C\/div\u003E\u003Cdiv class=\u0022fig-caption\u0022\u003E\u003Cspan class=\u0022fig-label\u0022\u003EFig. 6.\u003C\/span\u003E \u003Cp id=\u0022p-36\u0022 class=\u0022first-child\u0022\u003EIllustration of Gabor filters. (a) Gabor filters (b) Sample patches converted to greyscales (c) Filtered results.\u003C\/p\u003E\u003Cdiv class=\u0022sb-div caption-clear\u0022\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/div\u003E\u003Cdiv id=\u0022sec-11\u0022 class=\u0022subsection\u0022\u003E\u003Ch3\u003E4.3 Random Forest\u003C\/h3\u003E\u003Cp id=\u0022p-37\u0022\u003ERandom forests are a supervised classification model introduced in 2001 by L. Breiman [\u003Ca id=\u0022xref-ref-4-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-4\u0022\u003E4\u003C\/a\u003E]. It is based on the much older decision tree classifiers [\u003Ca id=\u0022xref-ref-5-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-5\u0022\u003E5\u003C\/a\u003E]. In broad terms, a random forest classifier is created by building multiple decision trees and combining their outputs: the algorithm creates many subsets of the training data by randomly sampling with replacement (this is called \u201cbootstrapping\u201d), then each subset (bootstrapped set) is used to train a single decision tree. Additionally, not all features are considered for each tree - only a random subset of them. For classification, the label assigned to a new sample is given by majority voting from all the decision trees in the forest. A comprehensive overview on random forests can be found in [\u003Ca id=\u0022xref-ref-2-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-2\u0022\u003E2\u003C\/a\u003E].\u003C\/p\u003E\u003Cp id=\u0022p-38\u0022\u003EA random forest model was trained on subsets of different sizes (\u003Cem\u003En\u003C\/em\u003E =100, 250, 500, 750, 1000, 2500, 5000, 7500, 10000, 20000) of the 100K-set and tested on the complete 7K-set, using only topological features, only Gabor features, and the combination of both (Combined features). The model was fit using 100 estimators (decision trees) and a maximum depth of 100 nodes for each estimator. The criterion used to build the decision trees was the Gini index.\u003C\/p\u003E\u003Cp id=\u0022p-39\u0022\u003EThe accuracy on the 7K-set was calculated by taking the ratio of correctly classified samples to the total number of samples in the set. The \u003Cem\u003Eout-of-bag\u003C\/em\u003E (OOB) score of each model was also calculated [\u003Ca id=\u0022xref-ref-3-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-3\u0022\u003E3\u003C\/a\u003E]. As mentioned previously, the model works by bootstrapping the dataset many times; i.e. randomly sampling with replacement many times. Then, decision trees are built using the boot-strapped sets. The OOB score is calculated by classifying the samples which are not included in the bootstrapped sets for each tree. In other words, if a sample is not part of the bootstrapped set used to build a particular tree, the sample is labelled by that tree, and it is checked whether or not the assigned label is correct. The OOB score is a popular way to estimate how well a random forest will generalise, as for large samples it approximates a \u003Cem\u003Ek\u003C\/em\u003E-fold cross-validation estimation [\u003Ca id=\u0022xref-ref-10-1\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-10\u0022\u003E10\u003C\/a\u003E]. For both the accuracy and OOB score, a correct classification was considered any sample that was assigned the correct tissue label. When a sample was assigned a different class, it was considered a misclassification.\u003C\/p\u003E\u003C\/div\u003E\u003C\/div\u003E\u003Cdiv class=\u0022section\u0022 id=\u0022sec-12\u0022\u003E\u003Ch2 class=\u0022\u0022\u003E5 Results\u003C\/h2\u003E\u003Cp id=\u0022p-40\u0022\u003EThe t-SNE visualisations reveal clearly that the separability of the classes is greater in the 7K-set than in the 100K-set for the topological features (\u003Ca id=\u0022xref-fig-7-1\u0022 class=\u0022xref-fig\u0022 href=\u0022#F7\u0022\u003EFig. 7(a)\u003C\/a\u003E), Gabor features (\u003Ca id=\u0022xref-fig-7-2\u0022 class=\u0022xref-fig\u0022 href=\u0022#F7\u0022\u003EFig. 7(b)\u003C\/a\u003E) and Combined features (\u003Ca id=\u0022xref-fig-7-3\u0022 class=\u0022xref-fig\u0022 href=\u0022#F7\u0022\u003EFig. 7(c)\u003C\/a\u003E). To emphasise the separability, 2D Gaussian distributions were fitted to the distributions of the points per class, and the equation of the ellipse containing the area 1.5 standard deviations away from the mean was calculated. Whilst the ellipses for 4 classes overlap substantially in the 100K, MUS and MUC (red and purple) are quite separate from NORM and TUM (green and yellow) in the 7K.\u003C\/p\u003E\u003Cdiv id=\u0022F7\u0022 class=\u0022fig pos-float type-figure  odd\u0022\u003E\u003Cdiv class=\u0022highwire-figure\u0022\u003E\u003Cdiv class=\u0022fig-inline-img-wrapper\u0022\u003E\u003Cdiv class=\u0022fig-inline-img\u0022\u003E\u003Ca href=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2025\/04\/08\/2025.04.07.25325392\/F7.large.jpg?width=800\u0026amp;height=600\u0026amp;carousel=1\u0022 title=\u0022t-SNE visualisation of the samples by feature type and set they belong to. (a) Visualisations created from topological features. (b) Visualisations created from Gabor features. (c) Visualisations created from Combined features.\u0022 class=\u0022highwire-fragment fragment-images colorbox-load\u0022 rel=\u0022gallery-fragment-images-851399613\u0022 data-figure-caption=\u0022\u0026lt;div class=\u0026quot;highwire-markup\u0026quot;\u0026gt;t-SNE visualisation of the samples by feature type and set they belong to. (a) Visualisations created from topological features. (b) Visualisations created from Gabor features. (c) Visualisations created from Combined features.\u0026lt;\/div\u0026gt;\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003E\u003Cspan class=\u0022hw-responsive-img\u0022\u003E\u003Cimg class=\u0022highwire-fragment fragment-image lazyload\u0022 alt=\u0022Fig. 7.\u0022 src=\u0022data:image\/gif;base64,R0lGODlhAQABAIAAAAAAAP\/\/\/yH5BAEAAAAALAAAAAABAAEAAAIBRAA7\u0022 data-src=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2025\/04\/08\/2025.04.07.25325392\/F7.medium.gif\u0022 width=\u0022312\u0022 height=\u0022440\u0022\/\u003E\u003Cnoscript\u003E\u003Cimg class=\u0022highwire-fragment fragment-image\u0022 alt=\u0022Fig. 7.\u0022 src=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2025\/04\/08\/2025.04.07.25325392\/F7.medium.gif\u0022 width=\u0022312\u0022 height=\u0022440\u0022\/\u003E\u003C\/noscript\u003E\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003Cul class=\u0022highwire-figure-links inline\u0022\u003E\u003Cli class=\u0022download-fig first\u0022\u003E\u003Ca href=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2025\/04\/08\/2025.04.07.25325392\/F7.large.jpg?download=true\u0022 class=\u0022highwire-figure-link highwire-figure-link-download\u0022 title=\u0022Download Fig. 7.\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003EDownload figure\u003C\/a\u003E\u003C\/li\u003E\u003Cli class=\u0022new-tab last\u0022\u003E\u003Ca href=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2025\/04\/08\/2025.04.07.25325392\/F7.large.jpg\u0022 class=\u0022highwire-figure-link highwire-figure-link-newtab\u0022 target=\u0022_blank\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003EOpen in new tab\u003C\/a\u003E\u003C\/li\u003E\u003C\/ul\u003E\u003C\/div\u003E\u003Cdiv class=\u0022fig-caption\u0022\u003E\u003Cspan class=\u0022fig-label\u0022\u003EFig. 7.\u003C\/span\u003E \u003Cp id=\u0022p-41\u0022 class=\u0022first-child\u0022\u003Et-SNE visualisation of the samples by feature type and set they belong to. (a) Visualisations created from topological features. (b) Visualisations created from Gabor features. (c) Visualisations created from Combined features.\u003C\/p\u003E\u003Cdiv class=\u0022sb-div caption-clear\u0022\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/div\u003E\u003Cp id=\u0022p-42\u0022\u003ETo confirm the separability of classes between sets, Random Forests were trained with an increasing number of topological, Gabor and Combined features and then tested on the 7K-set (\u003Ca id=\u0022xref-fig-8-1\u0022 class=\u0022xref-fig\u0022 href=\u0022#F8\u0022\u003EFig. 8\u003C\/a\u003E). The results of the OOB score and accuracy follow similar patterns and stabilised between 5,000 and 10,000 samples, which suggested that around 7,000 samples were sufficient to obtain good results.\u003C\/p\u003E\u003Cdiv id=\u0022F8\u0022 class=\u0022fig pos-float type-figure  odd\u0022\u003E\u003Cdiv class=\u0022highwire-figure\u0022\u003E\u003Cdiv class=\u0022fig-inline-img-wrapper\u0022\u003E\u003Cdiv class=\u0022fig-inline-img\u0022\u003E\u003Ca href=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2025\/04\/08\/2025.04.07.25325392\/F8.large.jpg?width=800\u0026amp;height=600\u0026amp;carousel=1\u0022 title=\u0022Accuracy on the 7K-set and the OOB score when a Random Forest model is trained on random samples of differing sizes from the 100K-set and using topological, Gabor or combined features.\u0022 class=\u0022highwire-fragment fragment-images colorbox-load\u0022 rel=\u0022gallery-fragment-images-851399613\u0022 data-figure-caption=\u0022\u0026lt;div class=\u0026quot;highwire-markup\u0026quot;\u0026gt;Accuracy on the 7K-set and the OOB score when a Random Forest model is trained on random samples of differing sizes from the 100K-set and using topological, Gabor or combined features.\u0026lt;\/div\u0026gt;\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003E\u003Cspan class=\u0022hw-responsive-img\u0022\u003E\u003Cimg class=\u0022highwire-fragment fragment-image lazyload\u0022 alt=\u0022Fig. 8.\u0022 src=\u0022data:image\/gif;base64,R0lGODlhAQABAIAAAAAAAP\/\/\/yH5BAEAAAAALAAAAAABAAEAAAIBRAA7\u0022 data-src=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2025\/04\/08\/2025.04.07.25325392\/F8.medium.gif\u0022 width=\u0022440\u0022 height=\u0022148\u0022\/\u003E\u003Cnoscript\u003E\u003Cimg class=\u0022highwire-fragment fragment-image\u0022 alt=\u0022Fig. 8.\u0022 src=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2025\/04\/08\/2025.04.07.25325392\/F8.medium.gif\u0022 width=\u0022440\u0022 height=\u0022148\u0022\/\u003E\u003C\/noscript\u003E\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003Cul class=\u0022highwire-figure-links inline\u0022\u003E\u003Cli class=\u0022download-fig first\u0022\u003E\u003Ca href=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2025\/04\/08\/2025.04.07.25325392\/F8.large.jpg?download=true\u0022 class=\u0022highwire-figure-link highwire-figure-link-download\u0022 title=\u0022Download Fig. 8.\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003EDownload figure\u003C\/a\u003E\u003C\/li\u003E\u003Cli class=\u0022new-tab last\u0022\u003E\u003Ca href=\u0022https:\/\/www.medrxiv.org\/content\/medrxiv\/early\/2025\/04\/08\/2025.04.07.25325392\/F8.large.jpg\u0022 class=\u0022highwire-figure-link highwire-figure-link-newtab\u0022 target=\u0022_blank\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003EOpen in new tab\u003C\/a\u003E\u003C\/li\u003E\u003C\/ul\u003E\u003C\/div\u003E\u003Cdiv class=\u0022fig-caption\u0022\u003E\u003Cspan class=\u0022fig-label\u0022\u003EFig. 8.\u003C\/span\u003E \u003Cp id=\u0022p-43\u0022 class=\u0022first-child\u0022\u003EAccuracy on the 7K-set and the OOB score when a Random Forest model is trained on random samples of differing sizes from the 100K-set and using topological, Gabor or combined features.\u003C\/p\u003E\u003Cdiv class=\u0022sb-div caption-clear\u0022\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/div\u003E\u003Cp id=\u0022p-44\u0022\u003ENext, the number of samples of the 100K-set were restricted to 7,180 to perform a reverse experiment: train on the reduced 100K-set and classify the 7K-set and then train on the 7K-set and classify the reduced 100K-set. The results confirmed the higher separability of the 7K-set, reaching 0.96 OOB-Score but only a 0.55 when the 100K-set was classified with training on the 7K-set (\u003Ca id=\u0022xref-table-wrap-1-1\u0022 class=\u0022xref-table\u0022 href=\u0022#T1\u0022\u003ETable 1\u003C\/a\u003E). On the other hand, when training on the 100K-set, the OOB-Score was lower (0.88) and the classification of the 7K-set was 0.74.\u003C\/p\u003E\u003Cdiv id=\u0022T1\u0022 class=\u0022table pos-float\u0022\u003E\u003Cdiv class=\u0022table-inline table-callout-links\u0022\u003E\u003Cdiv class=\u0022callout\u0022\u003E\u003Cspan\u003EView this table:\u003C\/span\u003E\u003Cul class=\u0022callout-links\u0022\u003E\u003Cli class=\u0022view-inline first\u0022\u003E\u003Ca href=\u0022\u0022 class=\u0022table-expand-inline\u0022 data-table-url=\u0022\/highwire\/markup\/974856\/expansion?postprocessors=highwire_tables%2Chighwire_reclass%2Chighwire_figures%2Chighwire_math%2Chighwire_inline_linked_media%2Chighwire_embed\u0026amp;table-expand-inline=1\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003EView inline\u003C\/a\u003E\u003C\/li\u003E\u003Cli class=\u0022view-popup\u0022\u003E\u003Ca href=\u0022\/highwire\/markup\/974856\/expansion?width=1000\u0026amp;height=500\u0026amp;iframe=true\u0026amp;postprocessors=highwire_tables%2Chighwire_reclass%2Chighwire_figures%2Chighwire_math%2Chighwire_inline_linked_media%2Chighwire_embed\u0022 class=\u0022colorbox colorbox-load table-expand-popup\u0022 rel=\u0022gallery-fragment-tables\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003EView popup\u003C\/a\u003E\u003C\/li\u003E\u003Cli class=\u0022download-ppt last\u0022\u003E\u003Ca href=\u0022\/highwire\/powerpoint\/974856\u0022 class=\u0022highwire-figure-link highwire-figure-link-ppt\u0022 data-icon-position=\u0022\u0022 data-hide-link-title=\u00220\u0022\u003EDownload powerpoint\u003C\/a\u003E\u003C\/li\u003E\u003C\/ul\u003E\u003C\/div\u003E\u003C\/div\u003E\u003Cdiv class=\u0022table-caption\u0022\u003E\u003Cspan class=\u0022table-label\u0022\u003ETable 1.\u003C\/span\u003E \u003Cp id=\u0022p-45\u0022 class=\u0022first-child\u0022\u003EEffects of training on the different sets. The model was trained on the combined features using 7180 samples from the 100K-set and tested on the 7K-set, then trained on the 7K-set and tested on the 7180 samples from the 100K-set. Note that training on 100K-set and testing on 7K-set yields a much higher Test Accuracy but lower OOB-Score than the reverse case.\u003C\/p\u003E\u003Cdiv class=\u0022sb-div caption-clear\u0022\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/div\u003E\u003Cdiv class=\u0022section\u0022 id=\u0022sec-13\u0022\u003E\u003Ch2 class=\u0022\u0022\u003E6 Discussion\u003C\/h2\u003E\u003Cp id=\u0022p-46\u0022\u003EConcerns about the bias in the colour profile of the classes and improper handling of the images 100K-set and the 7K-set had been highlighted [\u003Ca id=\u0022xref-ref-9-2\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-9\u0022\u003E9\u003C\/a\u003E]. However, up to the best knowledge of the authors, the differences in separability of classes of these important datasets had not been discussed. These were demonstrated with topological and Gabor features, which were used to extract textural and structural properties from the data, that is, properties unrelated to colour.\u003C\/p\u003E\u003Cp id=\u0022p-47\u0022\u003EFirst, the problems related to \u003Cstrong\u003Ebrightness\u003C\/strong\u003E previously noted in [\u003Ca id=\u0022xref-ref-9-3\u0022 class=\u0022xref-bibr\u0022 href=\u0022#ref-9\u0022\u003E9\u003C\/a\u003E] were confirmed. The intensity profiles of the BACK class in the 100K-set go to the extremes; very bright or very dark (\u003Ca id=\u0022xref-fig-3-2\u0022 class=\u0022xref-fig\u0022 href=\u0022#F3\u0022\u003EFig. 3\u003C\/a\u003E). In contrast, the patches in the BACK class in the 7K-set have more uniform intensities. The few patches that are brighter almost always contain a very dark region, these are most likely due to issues of \u003Cstrong\u003Enormalisation\u003C\/strong\u003E. Macenko\u2019s normalisation seems to have this effect when there is a dark region in the image.\u003C\/p\u003E\u003Cp id=\u0022p-48\u0022\u003ESecond, there were problems of \u003Cstrong\u003Ehue\u003C\/strong\u003E, which are visible in the DEB and MUS classes of the 100K-set. In the DEB class, there are four patches that are visually more purple\/violet (hue values around 270-280) than the majority, which are visually closer to pink\/magenta (hue values around 300-320). In fact, it was found that the average hue of the four DEB patches was 277, while the average hue of the non-faulty patches was 312. These variations were also present in the MUS class. These differences in colour seem to suggest that the 100K-set has not been properly normalised using Macenko\u2019s method.\u003C\/p\u003E\u003Cp id=\u0022p-49\u0022\u003EThird, issues of \u003Cstrong\u003Ecuration and labelling\u003C\/strong\u003E were detected. The 100K-set shows signs of less consistent curation and labelling when compared to the 7K-set. For example, MUS patches of the 100K-set show white areas (possibly background). This will be further analysed below.\u003C\/p\u003E\u003Cp id=\u0022p-50\u0022\u003EFourth, there may be differences in \u003Cstrong\u003Ecell populations\u003C\/strong\u003E, specifically there seem to exist two different types of LYM pathces in the 7K-set. An initial observation of these patches suggests that the difference arises from the brightness of the patches. However, a closer inspection suggests that there is also a difference in the sparseness and size of the lymphocytes (\u003Ca id=\u0022xref-fig-3-3\u0022 class=\u0022xref-fig\u0022 href=\u0022#F3\u0022\u003EFig. 3\u003C\/a\u003E).\u003C\/p\u003E\u003Cp id=\u0022p-51\u0022\u003EFifth, the \u003Cstrong\u003Edifferences in separability of classes\u003C\/strong\u003E between the 100K-set and the 7K-set became evident with the extraction of topological and Gabor features and the visualisation with t-SNE (\u003Ca id=\u0022xref-fig-7-4\u0022 class=\u0022xref-fig\u0022 href=\u0022#F7\u0022\u003EFig. 7\u003C\/a\u003E), and the previous visual observations were confirmed. Four classes \u003Cspan class=\u0022inline-formula\u0022 id=\u0022inline-formula-1\u0022\u003E\u003Cspan class=\u0022highwire-responsive-lazyload\u0022\u003E\u003Cimg src=\u0022data:image\/gif;base64,R0lGODlhAQABAIAAAAAAAP\/\/\/yH5BAEAAAAALAAAAAABAAEAAAIBRAA7\u0022 class=\u0022highwire-embed lazyload\u0022 alt=\u0022Embedded Image\u0022 data-src=\u0022https:\/\/www.medrxiv.org\/sites\/default\/files\/highwire\/medrxiv\/early\/2025\/04\/08\/2025.04.07.25325392\/embed\/inline-graphic-1.gif\u0022\/\u003E\u003Cnoscript\u003E\u003Cimg class=\u0022highwire-embed\u0022 alt=\u0022Embedded Image\u0022 src=\u0022https:\/\/www.medrxiv.org\/sites\/default\/files\/highwire\/medrxiv\/early\/2025\/04\/08\/2025.04.07.25325392\/embed\/inline-graphic-1.gif\u0022\/\u003E\u003C\/noscript\u003E\u003C\/span\u003E\u003C\/span\u003E were highlighted with ellipses as previously described. Whilst in the 100K-set, these four classes partially overlap in the topological, Gabor and combined features, in the 7K-set, these appear comparatively separated, especially MUC in the topological and TUM and NORM from MUS and MUC in Gabor and combined. Similarly, the ADI \u003Cspan class=\u0022inline-formula\u0022 id=\u0022inline-formula-2\u0022\u003E\u003Cspan class=\u0022highwire-responsive-lazyload\u0022\u003E\u003Cimg src=\u0022data:image\/gif;base64,R0lGODlhAQABAIAAAAAAAP\/\/\/yH5BAEAAAAALAAAAAABAAEAAAIBRAA7\u0022 class=\u0022highwire-embed lazyload\u0022 alt=\u0022Embedded Image\u0022 data-src=\u0022https:\/\/www.medrxiv.org\/sites\/default\/files\/highwire\/medrxiv\/early\/2025\/04\/08\/2025.04.07.25325392\/embed\/inline-graphic-2.gif\u0022\/\u003E\u003Cnoscript\u003E\u003Cimg class=\u0022highwire-embed\u0022 alt=\u0022Embedded Image\u0022 src=\u0022https:\/\/www.medrxiv.org\/sites\/default\/files\/highwire\/medrxiv\/early\/2025\/04\/08\/2025.04.07.25325392\/embed\/inline-graphic-2.gif\u0022\/\u003E\u003C\/noscript\u003E\u003C\/span\u003E\u003C\/span\u003E and the BACK \u003Cspan class=\u0022inline-formula\u0022 id=\u0022inline-formula-3\u0022\u003E\u003Cspan class=\u0022highwire-responsive-lazyload\u0022\u003E\u003Cimg src=\u0022data:image\/gif;base64,R0lGODlhAQABAIAAAAAAAP\/\/\/yH5BAEAAAAALAAAAAABAAEAAAIBRAA7\u0022 class=\u0022highwire-embed lazyload\u0022 alt=\u0022Embedded Image\u0022 data-src=\u0022https:\/\/www.medrxiv.org\/sites\/default\/files\/highwire\/medrxiv\/early\/2025\/04\/08\/2025.04.07.25325392\/embed\/inline-graphic-3.gif\u0022\/\u003E\u003Cnoscript\u003E\u003Cimg class=\u0022highwire-embed\u0022 alt=\u0022Embedded Image\u0022 src=\u0022https:\/\/www.medrxiv.org\/sites\/default\/files\/highwire\/medrxiv\/early\/2025\/04\/08\/2025.04.07.25325392\/embed\/inline-graphic-3.gif\u0022\/\u003E\u003C\/noscript\u003E\u003C\/span\u003E\u003C\/span\u003E are better separated in the 7K-set. For the topological features, the class BACK is strongly clustered in the 7K-set with just a few elements close to the ADI class, whilst in the 100K-set there are several clusters and the ADI class is surrounded by elements of BACK. For Gabor features, the BACK in 7K-set is totally separated from the ADI class but in the 100K-set again many elements overlap and presents more clusters.\u003C\/p\u003E\u003Cp id=\u0022p-52\u0022\u003EAs noted previously, the LYM class in the 100K-set always appears as a single cluster (pink crosses), whilst in the 7K-set always appears as two or even three large and distinctly located clusters (\u003Ca id=\u0022xref-fig-7-5\u0022 class=\u0022xref-fig\u0022 href=\u0022#F7\u0022\u003EFig. 7(a-c)\u003C\/a\u003E). A smaller group is also visible and close to the other classes suggesting that there may be a third type of LYM patches not immediately distinguishable in \u003Ca id=\u0022xref-fig-3-4\u0022 class=\u0022xref-fig\u0022 href=\u0022#F3\u0022\u003EFig. 3\u003C\/a\u003E.\u003C\/p\u003E\u003Cp id=\u0022p-53\u0022\u003ETo quantify the effects of the separability of classes, the following experiments were conducted: (1) Train on 7,180 samples from the 100K-set and classify the 7K-set, (2) train on the 7K-set and classify 7,180 samples from the 100K-set (\u003Ca id=\u0022xref-table-wrap-1-2\u0022 class=\u0022xref-table\u0022 href=\u0022#T1\u0022\u003ETable 1\u003C\/a\u003E). When trained on the 7K-set, a random forest model obtained a higher OOB-Score (0.9642) than when trained on 7180 samples of the 100K-set (0.8840). Yet, when the opposite set was classified, the results of the model trained on the 7K-set were far lower (0.5618) than those trained on the 100K-set (0.6926). Both of these results imply a higher separability of classes in the 7K-set.\u003C\/p\u003E\u003Cp id=\u0022p-54\u0022\u003ESeparability of classes can come from many factors: intrinsic differences in the classes (i.e. different textures, colours, or shape). However, it is to be expected that two sets of images treated in a similar manner should show the same degrees of separations between their classes. Some prominent factors can potentially influence the observed differences in separability between the sets, mainly differences in preprocessing and handling of the images. In the case of these two data sets, the most probable causes are problems of normalisation and problems of curation and labelling. In the case of normalisation, some patches show evidence of not having been properly normalised when compiling the dataset. This is especially noticeable in \u003Ca id=\u0022xref-fig-3-5\u0022 class=\u0022xref-fig\u0022 href=\u0022#F3\u0022\u003EFig. 3\u003C\/a\u003E when looking at the DEB and MUS classes and the effects shown in \u003Ca id=\u0022xref-fig-4-2\u0022 class=\u0022xref-fig\u0022 href=\u0022#F4\u0022\u003EFig. 4\u003C\/a\u003E. An example of problems with curation and labelling is that MUS patches have a seemingly more even texture than the patches in the 100K-set: some of these patches seem to show more white areas (possibly back-ground) than actual tissue (\u003Ca id=\u0022xref-fig-3-6\u0022 class=\u0022xref-fig\u0022 href=\u0022#F3\u0022\u003EFig. 3\u003C\/a\u003E). However, other factors such as the probable existence of three different populations of lymphocytes could suggest that there may be intrinsic differences related to the nature of the tissue (healthy\/diseased) in one particular patient.\u003C\/p\u003E\u003Cp id=\u0022p-55\u0022\u003ETo summarise, greyscale versions of images from two datasets (100K-set and 7K-set) were used to extract features from the corresponding persistence diagrams and the Gabor-filtered versions of the images. The features were used to train random forest models (\u003Ca id=\u0022xref-fig-8-2\u0022 class=\u0022xref-fig\u0022 href=\u0022#F8\u0022\u003EFig. 8\u003C\/a\u003E) and compare the effects of training on one of the sets and testing on the other, then reversing the roles (\u003Ca id=\u0022xref-table-wrap-1-3\u0022 class=\u0022xref-table\u0022 href=\u0022#T1\u0022\u003ETable 1\u003C\/a\u003E). The difference in accuracies and OOB-Score of the experiment, together with a visual analysis of the images (\u003Ca id=\u0022xref-fig-3-7\u0022 class=\u0022xref-fig\u0022 href=\u0022#F3\u0022\u003EFig. 3\u003C\/a\u003E) and a t-SNE visualisation of the classes reveal inconsistencies between the datasets. These inconsistencies go beyond what is expected between two different datasets of supposedly similar images, leading to very different generalisation scores of the random forest model.\u003C\/p\u003E\u003Cp id=\u0022p-56\u0022\u003ETo conclude, the final point is expanded upon. It is reasonable to expect that the datasets that one chooses to train and test on will impart some differences in the scores. However, in the case of these two datasets, it has been shown that the quality of the sets has a very large effect on the precision scores achieved by the machine learning model and researchers should be careful about the datasets used to train and test models. Even though NCT-CRC-HE-100K and CRC-VAL-HE-7K are popular datasets used to train and test machine learning and deep learning models, it seems the quality of the training set is lower than that of the test set, and researchers using these sets to train models should be wary of this fact.\u003C\/p\u003E\u003C\/div\u003E\u003Cdiv class=\u0022section data-availability\u0022 id=\u0022sec-14\u0022\u003E\u003Ch2 class=\u0022\u0022\u003EData Availability\u003C\/h2\u003E\u003Cp id=\u0022p-57\u0022\u003EAll data produced are available online at\nhttps:\/\/zenodo.org\/records\/1214456\u003C\/p\u003E\u003Cp id=\u0022p-58\u0022\u003E\n\u003Ca href=\u0022https:\/\/zenodo.org\/records\/1214456\u0022\u003Ehttps:\/\/zenodo.org\/records\/1214456\u003C\/a\u003E\n\u003C\/p\u003E\u003C\/div\u003E\u003Cdiv class=\u0022section fn-group\u0022 id=\u0022fn-group-1\u0022\u003E\u003Ch2\u003EFootnotes\u003C\/h2\u003E\u003Cul\u003E\u003Cli class=\u0022fn-others\u0022 id=\u0022fn-1\u0022\u003E\u003Cp id=\u0022p-1\u0022\u003E\u003Cspan class=\u0022em-link\u0022\u003E\u003Cspan class=\u0022em-addr\u0022\u003Edaniel.brito{at}citystgeorges.ac.uk\u003C\/span\u003E\u003C\/span\u003E\u003C\/p\u003E\u003C\/li\u003E\u003C\/ul\u003E\u003C\/div\u003E\u003Cdiv class=\u0022section ref-list\u0022 id=\u0022ref-list-1\u0022\u003E\u003Ch2 class=\u0022\u0022\u003EReferences\u003C\/h2\u003E\u003Col class=\u0022cit-list ref-use-labels\u0022\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E1.\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-1-1\u0022 title=\u0022View reference 1. in text\u0022 id=\u0022ref-1\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal\u0022 id=\u0022cit-2025.04.07.25325392v1.1\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EAfshari Mirak\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003ES.\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-etal\u0022\u003Eet al.\u003C\/span\u003E, N., \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EMohamed\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EI.\u003C\/span\u003E\u003C\/span\u003E: \u003Cspan class=\u0022cit-article-title\u0022\u003EThe growing nationwide radiologist shortage: Current opportunities and ongoing challenges for international medical graduate radiologists\u003C\/span\u003E. \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003ERadiology\u003C\/abbr\u003E \u003Cspan class=\u0022cit-vol\u0022\u003E314\u003C\/span\u003E(\u003Cspan class=\u0022cit-issue\u0022\u003E3\u003C\/span\u003E), \u003Cspan class=\u0022cit-fpage\u0022\u003Ee232625\u003C\/span\u003E (\u003Cspan class=\u0022cit-month\u0022\u003EMar\u003C\/span\u003E \u003Cspan class=\u0022cit-pub-date\u0022\u003E2025\u003C\/span\u003E)\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003Ca href=\u0022{openurl}?query=rft.jtitle%253DRadiology%26rft.volume%253D314%26rft.spage%253De232625%26rft_id%253Dinfo%253Apmid%252F40035678%26rft.genre%253Darticle%26rft_val_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Ajournal%26ctx_ver%253DZ39.88-2004%26url_ver%253DZ39.88-2004%26url_ctx_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Actx\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-openurl cit-ref-sprinkles-open-url\u0022\u003E\u003Cspan\u003EOpenUrl\u003C\/span\u003E\u003C\/a\u003E\u003Ca href=\u0022\/lookup\/external-ref?access_num=40035678\u0026amp;link_type=MED\u0026amp;atom=%2Fmedrxiv%2Fearly%2F2025%2F04%2F08%2F2025.04.07.25325392.atom\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-medline\u0022\u003E\u003Cspan\u003EPubMed\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E2.\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-2-1\u0022 title=\u0022View reference 2. in text\u0022 id=\u0022ref-2\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal\u0022 id=\u0022cit-2025.04.07.25325392v1.2\u0022 data-doi=\u002210.1007\/s11749-016-0481-7\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EBiau\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EG.\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EScornet\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EE.\u003C\/span\u003E\u003C\/span\u003E: \u003Cspan class=\u0022cit-article-title\u0022\u003EA random forest guided tour\u003C\/span\u003E. \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003ETEST\u003C\/abbr\u003E \u003Cspan class=\u0022cit-vol\u0022\u003E25\u003C\/span\u003E(\u003Cspan class=\u0022cit-issue\u0022\u003E2\u003C\/span\u003E), \u003Cspan class=\u0022cit-fpage\u0022\u003E197\u003C\/span\u003E\u2013\u003Cspan class=\u0022cit-lpage\u0022\u003E227\u003C\/span\u003E (\u003Cspan class=\u0022cit-month\u0022\u003EJun\u003C\/span\u003E \u003Cspan class=\u0022cit-pub-date\u0022\u003E2016\u003C\/span\u003E).\u003Cspan class=\u0022cit-pub-id-sep cit-pub-id-doi-sep\u0022\u003E \u003C\/span\u003E\u003Cspan class=\u0022cit-pub-id cit-pub-id-doi\u0022\u003E\u003Cspan class=\u0022cit-pub-id-scheme-doi\u0022\u003Edoi:\u003C\/span\u003E10.1007\/s11749-016-0481-7\u003C\/span\u003E\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003Ca href=\u0022{openurl}?query=rft.jtitle%253DTEST%26rft.volume%253D25%26rft.spage%253D197%26rft_id%253Dinfo%253Adoi%252F10.1007%252Fs11749-016-0481-7%26rft.genre%253Darticle%26rft_val_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Ajournal%26ctx_ver%253DZ39.88-2004%26url_ver%253DZ39.88-2004%26url_ctx_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Actx\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-openurl cit-ref-sprinkles-open-url\u0022\u003E\u003Cspan\u003EOpenUrl\u003C\/span\u003E\u003C\/a\u003E\u003Ca href=\u0022\/lookup\/external-ref?access_num=10.1007\/s11749-016-0481-7\u0026amp;link_type=DOI\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-doi cit-ref-sprinkles-crossref\u0022\u003E\u003Cspan\u003ECrossRef\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E3.\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-3-1\u0022 title=\u0022View reference 3. in text\u0022 id=\u0022ref-3\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-book\u0022 id=\u0022cit-2025.04.07.25325392v1.3\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EBreiman\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EL.\u003C\/span\u003E\u003C\/span\u003E: \u003Cspan class=\u0022cit-source\u0022\u003EOut-of-bag estimation. Tech. rep\u003C\/span\u003E., \u003Cspan class=\u0022cit-publ-name\u0022\u003EUniversity of California Berkeley\u003C\/span\u003E (\u003Cspan class=\u0022cit-pub-date\u0022\u003E1996\u003C\/span\u003E)\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E4.\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-4-1\u0022 title=\u0022View reference 4. in text\u0022 id=\u0022ref-4\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal\u0022 id=\u0022cit-2025.04.07.25325392v1.4\u0022 data-doi=\u002210.1023\/A:1010933404324\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EBreiman\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EL.\u003C\/span\u003E\u003C\/span\u003E: \u003Cspan class=\u0022cit-article-title\u0022\u003ERandom forests\u003C\/span\u003E. \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003EMachine Learning\u003C\/abbr\u003E \u003Cspan class=\u0022cit-vol\u0022\u003E45\u003C\/span\u003E(\u003Cspan class=\u0022cit-issue\u0022\u003E1\u003C\/span\u003E), \u003Cspan class=\u0022cit-fpage\u0022\u003E5\u003C\/span\u003E\u2013\u003Cspan class=\u0022cit-lpage\u0022\u003E32\u003C\/span\u003E (\u003Cspan class=\u0022cit-month\u0022\u003EOct\u003C\/span\u003E \u003Cspan class=\u0022cit-pub-date\u0022\u003E2001\u003C\/span\u003E).\u003Cspan class=\u0022cit-pub-id-sep cit-pub-id-doi-sep\u0022\u003E \u003C\/span\u003E\u003Cspan class=\u0022cit-pub-id cit-pub-id-doi\u0022\u003E\u003Cspan class=\u0022cit-pub-id-scheme-doi\u0022\u003Edoi:\u003C\/span\u003E10.1023\/A:1010933404324\u003C\/span\u003E\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003Ca href=\u0022{openurl}?query=rft.jtitle%253DMachine%2BLearning%26rft.volume%253D45%26rft.spage%253D5%26rft_id%253Dinfo%253Adoi%252F10.1023%252FA%253A1010933404324%26rft.genre%253Darticle%26rft_val_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Ajournal%26ctx_ver%253DZ39.88-2004%26url_ver%253DZ39.88-2004%26url_ctx_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Actx\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-openurl cit-ref-sprinkles-open-url\u0022\u003E\u003Cspan\u003EOpenUrl\u003C\/span\u003E\u003C\/a\u003E\u003Ca href=\u0022\/lookup\/external-ref?access_num=10.1023\/A:1010933404324\u0026amp;link_type=DOI\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-doi cit-ref-sprinkles-crossref\u0022\u003E\u003Cspan\u003ECrossRef\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E5.\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-5-1\u0022 title=\u0022View reference 5. in text\u0022 id=\u0022ref-5\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-book\u0022 id=\u0022cit-2025.04.07.25325392v1.5\u0022 data-doi=\u002210.1201\/9781315139470\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EBreiman\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EL.\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-etal\u0022\u003Eet al\u003C\/span\u003E: \u003Cspan class=\u0022cit-source\u0022\u003EClassification and Regression Trees\u003C\/span\u003E. \u003Cspan class=\u0022cit-publ-name\u0022\u003EChapman and Hall\/CRC\u003C\/span\u003E, \u003Cspan class=\u0022cit-publ-loc\u0022\u003ENew York\u003C\/span\u003E (\u003Cspan class=\u0022cit-month\u0022\u003EOct\u003C\/span\u003E \u003Cspan class=\u0022cit-pub-date\u0022\u003E2017\u003C\/span\u003E).\u003Cspan class=\u0022cit-pub-id-sep cit-pub-id-doi-sep\u0022\u003E \u003C\/span\u003E\u003Cspan class=\u0022cit-pub-id cit-pub-id-doi\u0022\u003E\u003Cspan class=\u0022cit-pub-id-scheme-doi\u0022\u003Edoi:\u003C\/span\u003E10.1201\/9781315139470\u003C\/span\u003E\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003Ca href=\u0022{openurl}?query=rft.jtitle%253DClassification%2Band%2BRegression%2BTrees%26rft_id%253Dinfo%253Adoi%252F10.1201%252F9781315139470%26rft.genre%253Darticle%26rft_val_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Ajournal%26ctx_ver%253DZ39.88-2004%26url_ver%253DZ39.88-2004%26url_ctx_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Actx\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-openurl cit-ref-sprinkles-open-url\u0022\u003E\u003Cspan\u003EOpenUrl\u003C\/span\u003E\u003C\/a\u003E\u003Ca href=\u0022\/lookup\/external-ref?access_num=10.1201\/9781315139470\u0026amp;link_type=DOI\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-doi cit-ref-sprinkles-crossref\u0022\u003E\u003Cspan\u003ECrossRef\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E6.\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-6-1\u0022 title=\u0022View reference 6. in text\u0022 id=\u0022ref-6\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal\u0022 id=\u0022cit-2025.04.07.25325392v1.6\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EBrito-Pacheco\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003ED.\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-etal\u0022\u003Eet al\u003C\/span\u003E: \u003Cspan class=\u0022cit-article-title\u0022\u003ERelationship between irregularities of the nuclear envelope and mitochondria in hela cells observed with electron microscopy\u003C\/span\u003E. \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003EIn: 2024 IEEE ISBI\u003C\/abbr\u003E. p. \u003Cspan class=\u0022cit-fpage\u0022\u003E1\u003C\/span\u003E\u2013\u003Cspan class=\u0022cit-lpage\u0022\u003E5\u003C\/span\u003E (\u003Cspan class=\u0022cit-month\u0022\u003EMay\u003C\/span\u003E \u003Cspan class=\u0022cit-pub-date\u0022\u003E2024\u003C\/span\u003E)\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E7.\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-7-1\u0022 title=\u0022View reference 7. in text\u0022 id=\u0022ref-7\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal\u0022 id=\u0022cit-2025.04.07.25325392v1.7\u0022 data-doi=\u002210.1101\/2025.02.21.25322669\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EBrito-Pacheco\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003ED.\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EGiannopoulos\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EP.\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EReyes-Aldasoro\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EC.C.\u003C\/span\u003E\u003C\/span\u003E: \u003Cspan class=\u0022cit-article-title\u0022\u003EPersistent homol-ogy in medical image processing: A literature review\u003C\/span\u003E. \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003EmedRxiv\u003C\/abbr\u003E (\u003Cspan class=\u0022cit-month\u0022\u003EFeb\u003C\/span\u003E \u003Cspan class=\u0022cit-pub-date\u0022\u003E2025\u003C\/span\u003E).\u003Cspan class=\u0022cit-pub-id-sep cit-pub-id-doi-sep\u0022\u003E \u003C\/span\u003E\u003Cspan class=\u0022cit-pub-id cit-pub-id-doi\u0022\u003E\u003Cspan class=\u0022cit-pub-id-scheme-doi\u0022\u003Edoi:\u003C\/span\u003E10.1101\/2025.02.21.25322669\u003C\/span\u003E\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003Ca href=\u0022{openurl}?query=rft.jtitle%253DmedRxiv%26rft_id%253Dinfo%253Adoi%252F10.1101%252F2025.02.21.25322669%26rft.genre%253Darticle%26rft_val_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Ajournal%26ctx_ver%253DZ39.88-2004%26url_ver%253DZ39.88-2004%26url_ctx_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Actx\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-openurl cit-ref-sprinkles-open-url\u0022\u003E\u003Cspan\u003EOpenUrl\u003C\/span\u003E\u003C\/a\u003E\u003Ca href=\u0022\/lookup\/ijlink\/YTozOntzOjQ6InBhdGgiO3M6MTQ6Ii9sb29rdXAvaWpsaW5rIjtzOjU6InF1ZXJ5IjthOjQ6e3M6ODoibGlua1R5cGUiO3M6NDoiQUJTVCI7czoxMToiam91cm5hbENvZGUiO3M6NzoibWVkcnhpdiI7czo1OiJyZXNpZCI7czoyMToiMjAyNS4wMi4yMS4yNTMyMjY2OXYxIjtzOjQ6ImF0b20iO3M6NTA6Ii9tZWRyeGl2L2Vhcmx5LzIwMjUvMDQvMDgvMjAyNS4wNC4wNy4yNTMyNTM5Mi5hdG9tIjt9czo4OiJmcmFnbWVudCI7czowOiIiO30=\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-ijlink\u0022\u003E\u003Cspan\u003E\u003Cspan class=\u0022cit-reflinks-abstract\u0022\u003EAbstract\u003C\/span\u003E\u003Cspan class=\u0022cit-sep cit-reflinks-variant-name-sep\u0022\u003E\/\u003C\/span\u003E\u003Cspan class=\u0022cit-reflinks-full-text\u0022\u003E\u003Cspan class=\u0022free-full-text\u0022\u003EFREE \u003C\/span\u003EFull Text\u003C\/span\u003E\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E8.\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-8-1\u0022 title=\u0022View reference 8. in text\u0022 id=\u0022ref-8\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-book\u0022 id=\u0022cit-2025.04.07.25325392v1.8\u0022 data-doi=\u002210.1090\/conm\/453\/08802\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EEdelsbrunner\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EH.\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EHarer\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EJ.\u003C\/span\u003E\u003C\/span\u003E: \u003Cspan class=\u0022cit-chapter-title\u0022\u003EPersistent homology\u2014a survey\u003C\/span\u003E, vol. \u003Cspan class=\u0022cit-vol\u0022\u003E453\u003C\/span\u003E, p. \u003Cspan class=\u0022cit-fpage\u0022\u003E257\u003C\/span\u003E\u2013\u003Cspan class=\u0022cit-lpage\u0022\u003E282\u003C\/span\u003E. \u003Cspan class=\u0022cit-source\u0022\u003EAmerican Mathematical Society, Providence\u003C\/span\u003E, \u003Cspan class=\u0022cit-publ-loc\u0022\u003ERhode Island\u003C\/span\u003E (\u003Cspan class=\u0022cit-pub-date\u0022\u003E2008\u003C\/span\u003E).\u003Cspan class=\u0022cit-pub-id-sep cit-pub-id-doi-sep\u0022\u003E \u003C\/span\u003E\u003Cspan class=\u0022cit-pub-id cit-pub-id-doi\u0022\u003E\u003Cspan class=\u0022cit-pub-id-scheme-doi\u0022\u003Edoi:\u003C\/span\u003E10.1090\/conm\/453\/08802\u003C\/span\u003E\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003Ca href=\u0022{openurl}?query=rft.jtitle%253DAmerican%2BMathematical%2BSociety%252C%2BProvidence%26rft.volume%253D453%26rft.spage%253D257%26rft_id%253Dinfo%253Adoi%252F10.1090%252Fconm%252F453%252F08802%26rft.genre%253Darticle%26rft_val_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Ajournal%26ctx_ver%253DZ39.88-2004%26url_ver%253DZ39.88-2004%26url_ctx_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Actx\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-openurl cit-ref-sprinkles-open-url\u0022\u003E\u003Cspan\u003EOpenUrl\u003C\/span\u003E\u003C\/a\u003E\u003Ca href=\u0022\/lookup\/external-ref?access_num=10.1090\/conm\/453\/08802\u0026amp;link_type=DOI\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-doi cit-ref-sprinkles-crossref\u0022\u003E\u003Cspan\u003ECrossRef\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E9.\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-9-1\u0022 title=\u0022View reference 9. in text\u0022 id=\u0022ref-9\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal\u0022 id=\u0022cit-2025.04.07.25325392v1.9\u0022 data-doi=\u002210.48550\/arXiv.2409.11546\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EIgnatov\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EA.\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EMalivenko\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EG.\u003C\/span\u003E\u003C\/span\u003E: \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003ENct-crc-he: Not all histopathological datasets are equally useful\u003C\/abbr\u003E (\u003Cspan class=\u0022cit-month\u0022\u003ESep\u003C\/span\u003E \u003Cspan class=\u0022cit-pub-date\u0022\u003E2024\u003C\/span\u003E).\u003Cspan class=\u0022cit-pub-id-sep cit-pub-id-doi-sep\u0022\u003E \u003C\/span\u003E\u003Cspan class=\u0022cit-pub-id cit-pub-id-doi\u0022\u003E\u003Cspan class=\u0022cit-pub-id-scheme-doi\u0022\u003Edoi:\u003C\/span\u003E10.48550\/arXiv.2409.11546\u003C\/span\u003E\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003Ca href=\u0022{openurl}?query=rft.jtitle%253DNct-crc-he%253A%2BNot%2Ball%2Bhistopathological%2Bdatasets%2Bare%2Bequally%2Buseful%26rft_id%253Dinfo%253Adoi%252F10.48550%252FarXiv.2409.11546%26rft.genre%253Darticle%26rft_val_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Ajournal%26ctx_ver%253DZ39.88-2004%26url_ver%253DZ39.88-2004%26url_ctx_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Actx\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-openurl cit-ref-sprinkles-open-url\u0022\u003E\u003Cspan\u003EOpenUrl\u003C\/span\u003E\u003C\/a\u003E\u003Ca href=\u0022\/lookup\/external-ref?access_num=10.48550\/arXiv.2409.11546\u0026amp;link_type=DOI\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-doi cit-ref-sprinkles-crossref\u0022\u003E\u003Cspan\u003ECrossRef\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E10.\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-10-1\u0022 title=\u0022View reference 10. in text\u0022 id=\u0022ref-10\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal\u0022 id=\u0022cit-2025.04.07.25325392v1.10\u0022 data-doi=\u002210.1371\/journal.pone.0201904\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EJanitza\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003ES.\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EHornung\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003ER.\u003C\/span\u003E\u003C\/span\u003E: \u003Cspan class=\u0022cit-article-title\u0022\u003EOn the overestimation of random forest\u2019s out-of-bag error\u003C\/span\u003E. \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003EPLoS ONE\u003C\/abbr\u003E \u003Cspan class=\u0022cit-vol\u0022\u003E13\u003C\/span\u003E(\u003Cspan class=\u0022cit-issue\u0022\u003E8\u003C\/span\u003E), \u003Cspan class=\u0022cit-fpage\u0022\u003Ee0201904\u003C\/span\u003E (\u003Cspan class=\u0022cit-month\u0022\u003EAug\u003C\/span\u003E \u003Cspan class=\u0022cit-pub-date\u0022\u003E2018\u003C\/span\u003E).\u003Cspan class=\u0022cit-pub-id-sep cit-pub-id-doi-sep\u0022\u003E \u003C\/span\u003E\u003Cspan class=\u0022cit-pub-id cit-pub-id-doi\u0022\u003E\u003Cspan class=\u0022cit-pub-id-scheme-doi\u0022\u003Edoi:\u003C\/span\u003E10.1371\/journal.pone.0201904\u003C\/span\u003E\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003Ca href=\u0022{openurl}?query=rft.jtitle%253DPLoS%2BONE%26rft.volume%253D13%26rft.spage%253De0201904%26rft_id%253Dinfo%253Adoi%252F10.1371%252Fjournal.pone.0201904%26rft_id%253Dinfo%253Apmid%252F30080866%26rft.genre%253Darticle%26rft_val_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Ajournal%26ctx_ver%253DZ39.88-2004%26url_ver%253DZ39.88-2004%26url_ctx_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Actx\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-openurl cit-ref-sprinkles-open-url\u0022\u003E\u003Cspan\u003EOpenUrl\u003C\/span\u003E\u003C\/a\u003E\u003Ca href=\u0022\/lookup\/external-ref?access_num=10.1371\/journal.pone.0201904\u0026amp;link_type=DOI\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-doi cit-ref-sprinkles-crossref\u0022\u003E\u003Cspan\u003ECrossRef\u003C\/span\u003E\u003C\/a\u003E\u003Ca href=\u0022\/lookup\/external-ref?access_num=30080866\u0026amp;link_type=MED\u0026amp;atom=%2Fmedrxiv%2Fearly%2F2025%2F04%2F08%2F2025.04.07.25325392.atom\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-medline\u0022\u003E\u003Cspan\u003EPubMed\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E11.\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-11-1\u0022 title=\u0022View reference 11. in text\u0022 id=\u0022ref-11\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal\u0022 id=\u0022cit-2025.04.07.25325392v1.11\u0022 data-doi=\u002210.5281\/zenodo.1214456\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EKather\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EJ.N.\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-etal\u0022\u003Eet al\u003C\/span\u003E: \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003E100,000 histological images of human colorectal cancer and healthy tissue\u003C\/abbr\u003E (\u003Cspan class=\u0022cit-month\u0022\u003EApr\u003C\/span\u003E \u003Cspan class=\u0022cit-pub-date\u0022\u003E2018\u003C\/span\u003E).\u003Cspan class=\u0022cit-pub-id-sep cit-pub-id-doi-sep\u0022\u003E \u003C\/span\u003E\u003Cspan class=\u0022cit-pub-id cit-pub-id-doi\u0022\u003E\u003Cspan class=\u0022cit-pub-id-scheme-doi\u0022\u003Edoi:\u003C\/span\u003E10.5281\/zenodo.1214456\u003C\/span\u003E\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003Ca href=\u0022{openurl}?query=rft.jtitle%253D100%252C000%2Bhistological%2Bimages%2Bof%2Bhuman%2Bcolorectal%2Bcancer%2Band%2Bhealthy%2Btissue%26rft_id%253Dinfo%253Adoi%252F10.5281%252Fzenodo.1214456%26rft.genre%253Darticle%26rft_val_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Ajournal%26ctx_ver%253DZ39.88-2004%26url_ver%253DZ39.88-2004%26url_ctx_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Actx\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-openurl cit-ref-sprinkles-open-url\u0022\u003E\u003Cspan\u003EOpenUrl\u003C\/span\u003E\u003C\/a\u003E\u003Ca href=\u0022\/lookup\/external-ref?access_num=10.5281\/zenodo.1214456\u0026amp;link_type=DOI\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-doi cit-ref-sprinkles-crossref\u0022\u003E\u003Cspan\u003ECrossRef\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E12.\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-12-1\u0022 title=\u0022View reference 12. in text\u0022 id=\u0022ref-12\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal\u0022 id=\u0022cit-2025.04.07.25325392v1.12\u0022 data-doi=\u002210.1371\/journal.pmed.1002730\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EKather\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EJ.N.\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-etal\u0022\u003Eet al\u003C\/span\u003E: \u003Cspan class=\u0022cit-article-title\u0022\u003EPredicting survival from colorectal cancer histology slides using deep learning: A retrospective multicenter study\u003C\/span\u003E. \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003EPLOS Medicine\u003C\/abbr\u003E \u003Cspan class=\u0022cit-vol\u0022\u003E16\u003C\/span\u003E(\u003Cspan class=\u0022cit-issue\u0022\u003E1\u003C\/span\u003E), \u003Cspan class=\u0022cit-fpage\u0022\u003Ee1002730\u003C\/span\u003E (\u003Cspan class=\u0022cit-month\u0022\u003EJan\u003C\/span\u003E \u003Cspan class=\u0022cit-pub-date\u0022\u003E2019\u003C\/span\u003E).\u003Cspan class=\u0022cit-pub-id-sep cit-pub-id-doi-sep\u0022\u003E \u003C\/span\u003E\u003Cspan class=\u0022cit-pub-id cit-pub-id-doi\u0022\u003E\u003Cspan class=\u0022cit-pub-id-scheme-doi\u0022\u003Edoi:\u003C\/span\u003E10.1371\/journal.pmed.1002730\u003C\/span\u003E\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003Ca href=\u0022{openurl}?query=rft.jtitle%253DPLOS%2BMedicine%26rft.volume%253D16%26rft.spage%253De1002730%26rft_id%253Dinfo%253Adoi%252F10.1371%252Fjournal.pmed.1002730%26rft_id%253Dinfo%253Apmid%252F30677016%26rft.genre%253Darticle%26rft_val_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Ajournal%26ctx_ver%253DZ39.88-2004%26url_ver%253DZ39.88-2004%26url_ctx_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Actx\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-openurl cit-ref-sprinkles-open-url\u0022\u003E\u003Cspan\u003EOpenUrl\u003C\/span\u003E\u003C\/a\u003E\u003Ca href=\u0022\/lookup\/external-ref?access_num=10.1371\/journal.pmed.1002730\u0026amp;link_type=DOI\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-doi cit-ref-sprinkles-crossref\u0022\u003E\u003Cspan\u003ECrossRef\u003C\/span\u003E\u003C\/a\u003E\u003Ca href=\u0022\/lookup\/external-ref?access_num=30677016\u0026amp;link_type=MED\u0026amp;atom=%2Fmedrxiv%2Fearly%2F2025%2F04%2F08%2F2025.04.07.25325392.atom\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-medline\u0022\u003E\u003Cspan\u003EPubMed\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E13.\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-13-1\u0022 title=\u0022View reference 13. in text\u0022 id=\u0022ref-13\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-book\u0022 id=\u0022cit-2025.04.07.25325392v1.13\u0022 data-doi=\u002210.20948\/graphicon-2021-3027-496-507\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EKhvostikov\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EA.\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-etal\u0022\u003Eet al\u003C\/span\u003E: \u003Cspan class=\u0022cit-chapter-title\u0022\u003ETissue type recognition in whole slide histological images\u003C\/span\u003E. \u003Cspan class=\u0022cit-source\u0022\u003EIn: Proceedings of the 31th International Conference on Computer Graphics and Vision\u003C\/span\u003E. Volume \u003Cspan class=\u0022cit-vol\u0022\u003E2\u003C\/span\u003E. p. \u003Cspan class=\u0022cit-fpage\u0022\u003E496\u003C\/span\u003E\u2013\u003Cspan class=\u0022cit-lpage\u0022\u003E507\u003C\/span\u003E. \u003Cspan class=\u0022cit-publ-name\u0022\u003EKeldysh Institute of Applied Mathematics\u003C\/span\u003E (\u003Cspan class=\u0022cit-pub-date\u0022\u003E2021\u003C\/span\u003E).\u003Cspan class=\u0022cit-pub-id-sep cit-pub-id-doi-sep\u0022\u003E \u003C\/span\u003E\u003Cspan class=\u0022cit-pub-id cit-pub-id-doi\u0022\u003E\u003Cspan class=\u0022cit-pub-id-scheme-doi\u0022\u003Edoi:\u003C\/span\u003E10.20948\/graphicon-2021-3027-496-507\u003C\/span\u003E\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003Ca href=\u0022{openurl}?query=rft.jtitle%253DIn%253A%2BProceedings%2Bof%2Bthe%2B31th%2BInternational%2BConference%2Bon%2BComputer%2BGraphics%2Band%2BVision%26rft.volume%253D2%26rft.spage%253D496%26rft_id%253Dinfo%253Adoi%252F10.20948%252Fgraphicon-2021-3027-496-507%26rft.genre%253Darticle%26rft_val_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Ajournal%26ctx_ver%253DZ39.88-2004%26url_ver%253DZ39.88-2004%26url_ctx_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Actx\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-openurl cit-ref-sprinkles-open-url\u0022\u003E\u003Cspan\u003EOpenUrl\u003C\/span\u003E\u003C\/a\u003E\u003Ca href=\u0022\/lookup\/external-ref?access_num=10.20948\/graphicon-2021-3027-496-507\u0026amp;link_type=DOI\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-doi cit-ref-sprinkles-crossref\u0022\u003E\u003Cspan\u003ECrossRef\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E14.\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-14-1\u0022 title=\u0022View reference 14. in text\u0022 id=\u0022ref-14\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal\u0022 id=\u0022cit-2025.04.07.25325392v1.14\u0022 data-doi=\u002210.1038\/s41467-018-07619-7\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EMaier-Hein\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EL.\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-etal\u0022\u003Eet al\u003C\/span\u003E: \u003Cspan class=\u0022cit-article-title\u0022\u003EWhy rankings of biomedical image analysis competitions should be interpreted with care\u003C\/span\u003E. \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003ENat. Commun\u003C\/abbr\u003E. \u003Cspan class=\u0022cit-vol\u0022\u003E9\u003C\/span\u003E(\u003Cspan class=\u0022cit-issue\u0022\u003E1\u003C\/span\u003E), \u003Cspan class=\u0022cit-fpage\u0022\u003E5217\u003C\/span\u003E (\u003Cspan class=\u0022cit-month\u0022\u003EDec\u003C\/span\u003E \u003Cspan class=\u0022cit-pub-date\u0022\u003E2018\u003C\/span\u003E)\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003Ca href=\u0022{openurl}?query=rft.jtitle%253DNat.%2BCommun%26rft.volume%253D9%26rft.spage%253D5217%26rft_id%253Dinfo%253Adoi%252F10.1038%252Fs41467-018-07619-7%26rft_id%253Dinfo%253Apmid%252F30523263%26rft.genre%253Darticle%26rft_val_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Ajournal%26ctx_ver%253DZ39.88-2004%26url_ver%253DZ39.88-2004%26url_ctx_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Actx\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-openurl cit-ref-sprinkles-open-url\u0022\u003E\u003Cspan\u003EOpenUrl\u003C\/span\u003E\u003C\/a\u003E\u003Ca href=\u0022\/lookup\/external-ref?access_num=10.1038\/s41467-018-07619-7\u0026amp;link_type=DOI\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-doi cit-ref-sprinkles-crossref\u0022\u003E\u003Cspan\u003ECrossRef\u003C\/span\u003E\u003C\/a\u003E\u003Ca href=\u0022\/lookup\/external-ref?access_num=30523263\u0026amp;link_type=MED\u0026amp;atom=%2Fmedrxiv%2Fearly%2F2025%2F04%2F08%2F2025.04.07.25325392.atom\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-medline\u0022\u003E\u003Cspan\u003EPubMed\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E15.\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-15-1\u0022 title=\u0022View reference 15. in text\u0022 id=\u0022ref-15\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-web\u0022 id=\u0022cit-2025.04.07.25325392v1.15\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EProject\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003ET.G.\u003C\/span\u003E\u003C\/span\u003E: \u003Cspan class=\u0022cit-article-title\u0022\u003EGUDHI User and Reference Manual\u003C\/span\u003E. \u003Cspan class=\u0022cit-source\u0022\u003EGUDHI Editorial Board, 3.11.0 edn\u003C\/span\u003E. (\u003Cspan class=\u0022cit-pub-date\u0022\u003E2025\u003C\/span\u003E), \u003Ca href=\u0022https:\/\/gudhi.inria.fr\/doc\/3.11.0\/\u0022\u003Ehttps:\/\/gudhi.inria.fr\/doc\/3.11.0\/\u003C\/a\u003E\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E16.\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-16-1\u0022 title=\u0022View reference 16. in text\u0022 id=\u0022ref-16\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal\u0022 id=\u0022cit-2025.04.07.25325392v1.16\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003ERamos\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EJ.\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EAung\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EP.P.\u003C\/span\u003E\u003C\/span\u003E: \u003Cspan class=\u0022cit-article-title\u0022\u003EInternational medical graduates and the shortage of US pathologists: Challenges and opportunities\u003C\/span\u003E. \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003EArch. Pathol. Lab. Med\u003C\/abbr\u003E. \u003Cspan class=\u0022cit-vol\u0022\u003E148\u003C\/span\u003E(\u003Cspan class=\u0022cit-issue\u0022\u003E6\u003C\/span\u003E), \u003Cspan class=\u0022cit-fpage\u0022\u003E735\u003C\/span\u003E\u2013\u003Cspan class=\u0022cit-lpage\u0022\u003E738\u003C\/span\u003E (\u003Cspan class=\u0022cit-month\u0022\u003EJun\u003C\/span\u003E \u003Cspan class=\u0022cit-pub-date\u0022\u003E2024\u003C\/span\u003E)\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003Ca href=\u0022{openurl}?query=rft.jtitle%253DArch.%2BPathol.%2BLab.%2BMed%26rft.volume%253D148%26rft.spage%253D735%26rft_id%253Dinfo%253Apmid%252F37787415%26rft.genre%253Darticle%26rft_val_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Ajournal%26ctx_ver%253DZ39.88-2004%26url_ver%253DZ39.88-2004%26url_ctx_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Actx\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-openurl cit-ref-sprinkles-open-url\u0022\u003E\u003Cspan\u003EOpenUrl\u003C\/span\u003E\u003C\/a\u003E\u003Ca href=\u0022\/lookup\/external-ref?access_num=37787415\u0026amp;link_type=MED\u0026amp;atom=%2Fmedrxiv%2Fearly%2F2025%2F04%2F08%2F2025.04.07.25325392.atom\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-medline\u0022\u003E\u003Cspan\u003EPubMed\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E17.\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-17-1\u0022 title=\u0022View reference 17. in text\u0022 id=\u0022ref-17\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-book\u0022 id=\u0022cit-2025.04.07.25325392v1.17\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EReyes-Aldasoro\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EC.C.\u003C\/span\u003E\u003C\/span\u003E: \u003Cspan class=\u0022cit-source\u0022\u003EMultiresolution Volumetric Texture Segmentation\u003C\/span\u003E. \u003Cspan class=\u0022cit-publ-name\u0022\u003Edoctoral, University of Warwick\u003C\/span\u003E (\u003Cspan class=\u0022cit-month\u0022\u003ENov\u003C\/span\u003E \u003Cspan class=\u0022cit-pub-date\u0022\u003E2004\u003C\/span\u003E), \u003Ca href=\u0022http:\/\/wrap.warwick.ac.uk\/67756\/\u0022\u003Ehttp:\/\/wrap.warwick.ac.uk\/67756\/\u003C\/a\u003E\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E18.\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-18-1\u0022 title=\u0022View reference 18. in text\u0022 id=\u0022ref-18\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal\u0022 id=\u0022cit-2025.04.07.25325392v1.18\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003ERussell\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003ED.K.\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-etal\u0022\u003Eet al\u003C\/span\u003E: \u003Cspan class=\u0022cit-article-title\u0022\u003EAnalysis of 2023 cytologists employment survey\u003C\/span\u003E. \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003EJ. Am. Soc. Cytopathol\u003C\/abbr\u003E. \u003Cspan class=\u0022cit-vol\u0022\u003E14\u003C\/span\u003E(\u003Cspan class=\u0022cit-issue\u0022\u003E2\u003C\/span\u003E), \u003Cspan class=\u0022cit-fpage\u0022\u003E78\u003C\/span\u003E\u2013\u003Cspan class=\u0022cit-lpage\u0022\u003E85\u003C\/span\u003E (\u003Cspan class=\u0022cit-month\u0022\u003EMar\u003C\/span\u003E \u003Cspan class=\u0022cit-pub-date\u0022\u003E2025\u003C\/span\u003E)\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003Ca href=\u0022{openurl}?query=rft.jtitle%253DJ.%2BAm.%2BSoc.%2BCytopathol%26rft.volume%253D14%26rft.spage%253D78%26rft_id%253Dinfo%253Apmid%252F39894740%26rft.genre%253Darticle%26rft_val_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Ajournal%26ctx_ver%253DZ39.88-2004%26url_ver%253DZ39.88-2004%26url_ctx_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Actx\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-openurl cit-ref-sprinkles-open-url\u0022\u003E\u003Cspan\u003EOpenUrl\u003C\/span\u003E\u003C\/a\u003E\u003Ca href=\u0022\/lookup\/external-ref?access_num=39894740\u0026amp;link_type=MED\u0026amp;atom=%2Fmedrxiv%2Fearly%2F2025%2F04%2F08%2F2025.04.07.25325392.atom\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-medline\u0022\u003E\u003Cspan\u003EPubMed\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E19.\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-19-1\u0022 title=\u0022View reference 19. in text\u0022 id=\u0022ref-19\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal\u0022 id=\u0022cit-2025.04.07.25325392v1.19\u0022 data-doi=\u002210.3390\/diagnostics13071277\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003ESun\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EK.\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-etal\u0022\u003Eet al\u003C\/span\u003E: \u003Cspan class=\u0022cit-article-title\u0022\u003EAutomatic classification of histopathology images across multiple cancers based on heterogeneous transfer learning\u003C\/span\u003E. \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003EDiagnostics\u003C\/abbr\u003E \u003Cspan class=\u0022cit-vol\u0022\u003E13\u003C\/span\u003E(\u003Cspan class=\u0022cit-issue\u0022\u003E77\u003C\/span\u003E), \u003Cspan class=\u0022cit-fpage\u0022\u003E1277\u003C\/span\u003E (\u003Cspan class=\u0022cit-month\u0022\u003EJan\u003C\/span\u003E \u003Cspan class=\u0022cit-pub-date\u0022\u003E2023\u003C\/span\u003E).\u003Cspan class=\u0022cit-pub-id-sep cit-pub-id-doi-sep\u0022\u003E \u003C\/span\u003E\u003Cspan class=\u0022cit-pub-id cit-pub-id-doi\u0022\u003E\u003Cspan class=\u0022cit-pub-id-scheme-doi\u0022\u003Edoi:\u003C\/span\u003E10.3390\/diagnostics13071277\u003C\/span\u003E\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003Ca href=\u0022{openurl}?query=rft.jtitle%253DDiagnostics%26rft.volume%253D13%26rft.spage%253D1277%26rft_id%253Dinfo%253Adoi%252F10.3390%252Fdiagnostics13071277%26rft_id%253Dinfo%253Apmid%252F37046497%26rft.genre%253Darticle%26rft_val_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Ajournal%26ctx_ver%253DZ39.88-2004%26url_ver%253DZ39.88-2004%26url_ctx_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Actx\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-openurl cit-ref-sprinkles-open-url\u0022\u003E\u003Cspan\u003EOpenUrl\u003C\/span\u003E\u003C\/a\u003E\u003Ca href=\u0022\/lookup\/external-ref?access_num=10.3390\/diagnostics13071277\u0026amp;link_type=DOI\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-doi cit-ref-sprinkles-crossref\u0022\u003E\u003Cspan\u003ECrossRef\u003C\/span\u003E\u003C\/a\u003E\u003Ca href=\u0022\/lookup\/external-ref?access_num=37046497\u0026amp;link_type=MED\u0026amp;atom=%2Fmedrxiv%2Fearly%2F2025%2F04%2F08%2F2025.04.07.25325392.atom\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-medline\u0022\u003E\u003Cspan\u003EPubMed\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E20.\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-20-1\u0022 title=\u0022View reference 20. in text\u0022 id=\u0022ref-20\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal\u0022 id=\u0022cit-2025.04.07.25325392v1.20\u0022 data-doi=\u002210.7717\/peerj.453\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003Evan der Walt\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003ES.\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-etal\u0022\u003Eet al\u003C\/span\u003E: \u003Cspan class=\u0022cit-article-title\u0022\u003Escikit-image: image processing in python\u003C\/span\u003E. \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003EPeerJ\u003C\/abbr\u003E \u003Cspan class=\u0022cit-vol\u0022\u003E2\u003C\/span\u003E, \u003Cspan class=\u0022cit-fpage\u0022\u003Ee453\u003C\/span\u003E (\u003Cspan class=\u0022cit-month\u0022\u003EJun\u003C\/span\u003E \u003Cspan class=\u0022cit-pub-date\u0022\u003E2014\u003C\/span\u003E).\u003Cspan class=\u0022cit-pub-id-sep cit-pub-id-doi-sep\u0022\u003E \u003C\/span\u003E\u003Cspan class=\u0022cit-pub-id cit-pub-id-doi\u0022\u003E\u003Cspan class=\u0022cit-pub-id-scheme-doi\u0022\u003Edoi:\u003C\/span\u003E10.7717\/peerj.453\u003C\/span\u003E\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003Ca href=\u0022{openurl}?query=rft.jtitle%253DPeerJ%26rft.volume%253D2%26rft.spage%253De453%26rft_id%253Dinfo%253Adoi%252F10.7717%252Fpeerj.453%26rft_id%253Dinfo%253Apmid%252F25024921%26rft.genre%253Darticle%26rft_val_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Ajournal%26ctx_ver%253DZ39.88-2004%26url_ver%253DZ39.88-2004%26url_ctx_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Actx\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-openurl cit-ref-sprinkles-open-url\u0022\u003E\u003Cspan\u003EOpenUrl\u003C\/span\u003E\u003C\/a\u003E\u003Ca href=\u0022\/lookup\/external-ref?access_num=10.7717\/peerj.453\u0026amp;link_type=DOI\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-doi cit-ref-sprinkles-crossref\u0022\u003E\u003Cspan\u003ECrossRef\u003C\/span\u003E\u003C\/a\u003E\u003Ca href=\u0022\/lookup\/external-ref?access_num=25024921\u0026amp;link_type=MED\u0026amp;atom=%2Fmedrxiv%2Fearly%2F2025%2F04%2F08%2F2025.04.07.25325392.atom\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-medline\u0022\u003E\u003Cspan\u003EPubMed\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003Cli\u003E\u003Cspan class=\u0022ref-label\u0022\u003E21.\u003C\/span\u003E\u003Ca class=\u0022rev-xref-ref\u0022 href=\u0022#xref-ref-21-1\u0022 title=\u0022View reference 21. in text\u0022 id=\u0022ref-21\u0022\u003E\u21b5\u003C\/a\u003E\u003Cdiv class=\u0022cit ref-cit ref-journal\u0022 id=\u0022cit-2025.04.07.25325392v1.21\u0022 data-doi=\u002210.1186\/s12916-021-01942-5\u0022\u003E\u003Cdiv class=\u0022cit-metadata\u0022\u003E\u003Ccite\u003E\u003Cspan class=\u0022cit-auth\u0022\u003E\u003Cspan class=\u0022cit-name-surname\u0022\u003EWang\u003C\/span\u003E,  \u003Cspan class=\u0022cit-name-given-names\u0022\u003EK.S.\u003C\/span\u003E\u003C\/span\u003E, \u003Cspan class=\u0022cit-etal\u0022\u003Eet al\u003C\/span\u003E: \u003Cspan class=\u0022cit-article-title\u0022\u003EAccurate diagnosis of colorectal cancer based on histopathology images using artificial intelligence\u003C\/span\u003E. \u003Cabbr class=\u0022cit-jnl-abbrev\u0022\u003EBMC medicine\u003C\/abbr\u003E \u003Cspan class=\u0022cit-vol\u0022\u003E19\u003C\/span\u003E(\u003Cspan class=\u0022cit-issue\u0022\u003E1\u003C\/span\u003E), \u003Cspan class=\u0022cit-fpage\u0022\u003E76\u003C\/span\u003E (\u003Cspan class=\u0022cit-month\u0022\u003EMar\u003C\/span\u003E \u003Cspan class=\u0022cit-pub-date\u0022\u003E2021\u003C\/span\u003E).\u003Cspan class=\u0022cit-pub-id-sep cit-pub-id-doi-sep\u0022\u003E \u003C\/span\u003E\u003Cspan class=\u0022cit-pub-id cit-pub-id-doi\u0022\u003E\u003Cspan class=\u0022cit-pub-id-scheme-doi\u0022\u003Edoi:\u003C\/span\u003E10.1186\/s12916-021-01942-5\u003C\/span\u003E\u003C\/cite\u003E\u003C\/div\u003E\u003Cdiv class=\u0022cit-extra\u0022\u003E\u003Ca href=\u0022{openurl}?query=rft.jtitle%253DBMC%2Bmedicine%26rft.volume%253D19%26rft.spage%253D76%26rft_id%253Dinfo%253Adoi%252F10.1186%252Fs12916-021-01942-5%26rft_id%253Dinfo%253Apmid%252F33752648%26rft.genre%253Darticle%26rft_val_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Ajournal%26ctx_ver%253DZ39.88-2004%26url_ver%253DZ39.88-2004%26url_ctx_fmt%253Dinfo%253Aofi%252Ffmt%253Akev%253Amtx%253Actx\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-openurl cit-ref-sprinkles-open-url\u0022\u003E\u003Cspan\u003EOpenUrl\u003C\/span\u003E\u003C\/a\u003E\u003Ca href=\u0022\/lookup\/external-ref?access_num=10.1186\/s12916-021-01942-5\u0026amp;link_type=DOI\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-doi cit-ref-sprinkles-crossref\u0022\u003E\u003Cspan\u003ECrossRef\u003C\/span\u003E\u003C\/a\u003E\u003Ca href=\u0022\/lookup\/external-ref?access_num=33752648\u0026amp;link_type=MED\u0026amp;atom=%2Fmedrxiv%2Fearly%2F2025%2F04%2F08%2F2025.04.07.25325392.atom\u0022 class=\u0022cit-ref-sprinkles cit-ref-sprinkles-medline\u0022\u003E\u003Cspan\u003EPubMed\u003C\/span\u003E\u003C\/a\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/li\u003E\u003C\/ol\u003E\u003C\/div\u003E\u003Cspan class=\u0022highwire-journal-article-marker-end\u0022\u003E\u003C\/span\u003E\u003C\/div\u003E\u003Cspan class=\u0022related-urls\u0022\u003E\u003C\/span\u003E\u003C\/div\u003E\u003C\/div\u003E  \u003C\/div\u003E\n\n  \n  \u003C\/div\u003E\n\u003C\/div\u003E\n  \u003C\/div\u003E\n\u003C\/div\u003E\n\u003C\/div\u003E\u003Cscript type=\u0022text\/javascript\u0022 src=\u0022https:\/\/www.medrxiv.org\/sites\/default\/files\/js\/js_zP7WWIfzbyzvaM63L39cNV2juU_1XVH7wduFK9gcMNI.js\u0022\u003E\u003C\/script\u003E\n\u003C\/body\u003E\u003C\/html\u003E"}