@inproceedings{nishino-etal-2020-reinforcement,
title = "Reinforcement Learning with Imbalanced Dataset for Data-to-Text Medical Report Generation",
author = "Nishino, Toru and
Ozaki, Ryota and
Momoki, Yohei and
Taniguchi, Tomoki and
Kano, Ryuji and
Nakano, Norihisa and
Tagawa, Yuki and
Taniguchi, Motoki and
Ohkuma, Tomoko and
Nakamura, Keigo",
editor = "Cohn, Trevor and
He, Yulan and
Liu, Yang",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2020",
month = nov,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.findings-emnlp.202",
doi = "10.18653/v1/2020.findings-emnlp.202",
pages = "2223--2236",
abstract = "Automated generation of medical reports that describe the findings in the medical images helps radiologists by alleviating their workload. Medical report generation system should generate correct and concise reports. However, data imbalance makes it difficult to train models accurately. Medical datasets are commonly imbalanced in their finding labels because incidence rates differ among diseases; moreover, the ratios of abnormalities to normalities are significantly imbalanced. We propose a novel reinforcement learning method with a reconstructor to improve the clinical correctness of generated reports to train the data-to-text module with a highly imbalanced dataset. Moreover, we introduce a novel data augmentation strategy for reinforcement learning to additionally train the model on infrequent findings. From the perspective of a practical use, we employ a Two-Stage Medical Report Generator (TS-MRGen) for controllable report generation from input images. TS-MRGen consists of two separated stages: an image diagnosis module and a data-to-text module. Radiologists can modify the image diagnosis module results to control the reports that the data-to-text module generates. We conduct an experiment with two medical datasets to assess the data-to-text module and the entire two-stage model. Results demonstrate that the reports generated by our model describe the findings in the input image more correctly.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="nishino-etal-2020-reinforcement">
<titleInfo>
<title>Reinforcement Learning with Imbalanced Dataset for Data-to-Text Medical Report Generation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Toru</namePart>
<namePart type="family">Nishino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ryota</namePart>
<namePart type="family">Ozaki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yohei</namePart>
<namePart type="family">Momoki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tomoki</namePart>
<namePart type="family">Taniguchi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ryuji</namePart>
<namePart type="family">Kano</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Norihisa</namePart>
<namePart type="family">Nakano</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yuki</namePart>
<namePart type="family">Tagawa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Motoki</namePart>
<namePart type="family">Taniguchi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tomoko</namePart>
<namePart type="family">Ohkuma</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Keigo</namePart>
<namePart type="family">Nakamura</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2020</title>
</titleInfo>
<name type="personal">
<namePart type="given">Trevor</namePart>
<namePart type="family">Cohn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yulan</namePart>
<namePart type="family">He</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yang</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Automated generation of medical reports that describe the findings in the medical images helps radiologists by alleviating their workload. Medical report generation system should generate correct and concise reports. However, data imbalance makes it difficult to train models accurately. Medical datasets are commonly imbalanced in their finding labels because incidence rates differ among diseases; moreover, the ratios of abnormalities to normalities are significantly imbalanced. We propose a novel reinforcement learning method with a reconstructor to improve the clinical correctness of generated reports to train the data-to-text module with a highly imbalanced dataset. Moreover, we introduce a novel data augmentation strategy for reinforcement learning to additionally train the model on infrequent findings. From the perspective of a practical use, we employ a Two-Stage Medical Report Generator (TS-MRGen) for controllable report generation from input images. TS-MRGen consists of two separated stages: an image diagnosis module and a data-to-text module. Radiologists can modify the image diagnosis module results to control the reports that the data-to-text module generates. We conduct an experiment with two medical datasets to assess the data-to-text module and the entire two-stage model. Results demonstrate that the reports generated by our model describe the findings in the input image more correctly.</abstract>
<identifier type="citekey">nishino-etal-2020-reinforcement</identifier>
<identifier type="doi">10.18653/v1/2020.findings-emnlp.202</identifier>
<location>
<url>https://aclanthology.org/2020.findings-emnlp.202</url>
</location>
<part>
<date>2020-11</date>
<extent unit="page">
<start>2223</start>
<end>2236</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Reinforcement Learning with Imbalanced Dataset for Data-to-Text Medical Report Generation
%A Nishino, Toru
%A Ozaki, Ryota
%A Momoki, Yohei
%A Taniguchi, Tomoki
%A Kano, Ryuji
%A Nakano, Norihisa
%A Tagawa, Yuki
%A Taniguchi, Motoki
%A Ohkuma, Tomoko
%A Nakamura, Keigo
%Y Cohn, Trevor
%Y He, Yulan
%Y Liu, Yang
%S Findings of the Association for Computational Linguistics: EMNLP 2020
%D 2020
%8 November
%I Association for Computational Linguistics
%C Online
%F nishino-etal-2020-reinforcement
%X Automated generation of medical reports that describe the findings in the medical images helps radiologists by alleviating their workload. Medical report generation system should generate correct and concise reports. However, data imbalance makes it difficult to train models accurately. Medical datasets are commonly imbalanced in their finding labels because incidence rates differ among diseases; moreover, the ratios of abnormalities to normalities are significantly imbalanced. We propose a novel reinforcement learning method with a reconstructor to improve the clinical correctness of generated reports to train the data-to-text module with a highly imbalanced dataset. Moreover, we introduce a novel data augmentation strategy for reinforcement learning to additionally train the model on infrequent findings. From the perspective of a practical use, we employ a Two-Stage Medical Report Generator (TS-MRGen) for controllable report generation from input images. TS-MRGen consists of two separated stages: an image diagnosis module and a data-to-text module. Radiologists can modify the image diagnosis module results to control the reports that the data-to-text module generates. We conduct an experiment with two medical datasets to assess the data-to-text module and the entire two-stage model. Results demonstrate that the reports generated by our model describe the findings in the input image more correctly.
%R 10.18653/v1/2020.findings-emnlp.202
%U https://aclanthology.org/2020.findings-emnlp.202
%U https://doi.org/10.18653/v1/2020.findings-emnlp.202
%P 2223-2236
Markdown (Informal)
[Reinforcement Learning with Imbalanced Dataset for Data-to-Text Medical Report Generation](https://aclanthology.org/2020.findings-emnlp.202) (Nishino et al., Findings 2020)
ACL
- Toru Nishino, Ryota Ozaki, Yohei Momoki, Tomoki Taniguchi, Ryuji Kano, Norihisa Nakano, Yuki Tagawa, Motoki Taniguchi, Tomoko Ohkuma, and Keigo Nakamura. 2020. Reinforcement Learning with Imbalanced Dataset for Data-to-Text Medical Report Generation. In Findings of the Association for Computational Linguistics: EMNLP 2020, pages 2223–2236, Online. Association for Computational Linguistics.