@article {Yang2020.05.08.20095810, author = {Wenmian Yang and Guangtao Zeng and Bowen Tan and Zeqian Ju and Subrato Chakravorty and Xuehai He and Shu Chen and Xingyi Yang and Qingyang Wu and Zhou Yu and Eric Xing and Pengtao Xie}, title = {On the Generation of Medical Dialogues for COVID-19}, elocation-id = {2020.05.08.20095810}, year = {2020}, doi = {10.1101/2020.05.08.20095810}, publisher = {Cold Spring Harbor Laboratory Press}, abstract = {Under the pandemic of COVID-19, people experiencing COVID19-related symptoms or exposed to risk factors have a pressing need to consult doctors. Due to hospital closure, a lot of consulting services have been moved online. Because of the shortage of medical professionals, many people cannot receive online consultations timely. To address this problem, we aim to develop a medical dialogue system that can provide COVID19-related consultations. We collected two dialogue datasets {\textendash} CovidDialog {\textendash} (in English and Chinese respectively) containing conversations between doctors and patients about COVID-19. On these two datasets, we train several dialogue generation models based on Transformer, GPT, and BERT-GPT. Since the two COVID-19 dialogue datasets are small in size, which bear high risk of overfitting, we leverage transfer learning to mitigate data deficiency. Specifically, we take the pretrained models of Transformer, GPT, and BERT-GPT on dialog datasets and other large-scale texts, then finetune them on our CovidDialog datasets. Experiments demonstrate that these approaches are promising in generating meaningful medical dialogues about COVID-19. But more advanced approaches are needed to build a fully useful dialogue system that can offer accurate COVID-related consultations. The data and code are available at https://github.com/UCSD-AI4H/COVID-DialogueCompeting Interest StatementThe authors have declared no competing interest.Funding StatementNoAuthor DeclarationsAll relevant ethical guidelines have been followed; any necessary IRB and/or ethics committee approvals have been obtained and details of the IRB/oversight body are included in the manuscript.YesAll necessary patient/participant consent has been obtained and the appropriate institutional forms have been archived.YesI understand that all clinical trials and any other prospective interventional studies must be registered with an ICMJE-approved registry, such as ClinicalTrials.gov. I confirm that any such study reported in the manuscript has been registered and the trial registration ID is provided (note: if posting a prospective study registered retrospectively, please provide a statement in the trial ID field explaining why the study was not registered in advance).YesI have followed all appropriate research reporting guidelines and uploaded the relevant EQUATOR Network research reporting checklist(s) and other pertinent material as supplementary files, if applicable.YesCOVID-Dialogue-Dataset-English is an English medical dialogue dataset about COVID-19 and other types of pneumonia. Patients who are concerned that they may be infected by COVID-19 or other pneumonia consult doctors and doctors provide advice. There are 603 consultations. COVID-Dialogue-Dataset-Chinese is a Chinese medical dialogue dataset about COVID-19 and other types of pneumonia. Patients who are concerned that they may be infected by COVID-19 or other pneumonia consult doctors and doctors provide advice. There are 1393 consultations. https://github.com/UCSD-AI4H/COVID-Dialogue}, URL = {https://www.medrxiv.org/content/early/2020/05/15/2020.05.08.20095810}, eprint = {https://www.medrxiv.org/content/early/2020/05/15/2020.05.08.20095810.full.pdf}, journal = {medRxiv} }