@article {Barmpas2021.01.23.21250355, author = {Petros Barmpas and Sotiris Tasoulis and Aristidis G. Vrahatis and Matthew Prina and Jos{\'e} Luis Ayuso-Mateos and Jerome Bickenbach and Ivet Bayes and Martin Bobak and Francisco F{\'e}lix Caballero and Somnath Chatterji and Laia Egea-Cort{\'e}s and Esther Garc{\'\i}a-Esquinas and Matilde Leonardi and Seppo Koskinen and Ilona Koupil and Andrzej Paj{\k a}k and Martin Prince and Warren Sanderson and Sergei Scherbov and Abdonas Tamosiunas and Aleksander Galas and Josep Maria Haro and Albert Sanchez-Niubo and Vassilis Plagianakos and Demosthenes Panagiotakos}, title = {A Hybrid Machine Learning Framework for Enhancing the Prediction Power in Large Scale Population Studies: The ATHLOS Project}, elocation-id = {2021.01.23.21250355}, year = {2021}, doi = {10.1101/2021.01.23.21250355}, publisher = {Cold Spring Harbor Laboratory Press}, abstract = {The ATHLOS cohort is composed of several harmonized datasets of international cohorts related to health and aging. The healthy aging scale has been constructed based on a selection of particular variables from 16 individual studies. In this paper, we consider a selection of additional variables found in ATHLOS and investigate their utilization for predicting the healthy aging. For this purpose motivated by the dataset{\textquoteright}s volume and diversity we focus our attention upon the clustering for prediction scheme, where unsupervised learning is utilized to enhance prediction power, showing the predictive utility of exploiting structure in the data by clustering. We show that imposed computation bottlenecks can be surpassed when using appropriate hierarchical clustering within a clustering for ensemble classification scheme while retaining prediction benefits. We propose a complete methodology which is evaluated against baseline methods and the original concept. The results are very encouraging suggesting further developments in this direction along with applications in tasks with similar characteristics. A strait-forward open source implementation is provided for the R project.Competing Interest StatementThe authors have declared no competing interest.Funding StatementThis work is supported by the ATHLOS (Aging Trajectories of Health: Longitudinal Opportunities and Synergies) project, funded by the European Union{\textquoteright}s Horizon 2020 Research and Innovation Program under grant agreement number 635316.Author DeclarationsI confirm all relevant ethical guidelines have been followed, and any necessary IRB and/or ethics committee approvals have been obtained.YesThe details of the IRB/oversight body that provided approval or exemption for the research described are given below:Does not apply in our work All necessary patient/participant consent has been obtained and the appropriate institutional forms have been archived.YesI understand that all clinical trials and any other prospective interventional studies must be registered with an ICMJE-approved registry, such as ClinicalTrials.gov. I confirm that any such study reported in the manuscript has been registered and the trial registration ID is provided (note: if posting a prospective study registered retrospectively, please provide a statement in the trial ID field explaining why the study was not registered in advance).YesI have followed all appropriate research reporting guidelines and uploaded the relevant EQUATOR Network research reporting checklist(s) and other pertinent material as supplementary files, if applicable.YesThe data were obtained from the official partners of the ATHLOS project. http://athlosproject.eu/}, URL = {https://www.medrxiv.org/content/early/2021/01/28/2021.01.23.21250355}, eprint = {https://www.medrxiv.org/content/early/2021/01/28/2021.01.23.21250355.full.pdf}, journal = {medRxiv} }