@inproceedings{ishigaki-etal-2021-generating,
title = "Generating Racing Game Commentary from Vision, Language, and Structured Data",
author = "Ishigaki, Tatsuya and
Topic, Goran and
Hamazono, Yumi and
Noji, Hiroshi and
Kobayashi, Ichiro and
Miyao, Yusuke and
Takamura, Hiroya",
editor = "Belz, Anya and
Fan, Angela and
Reiter, Ehud and
Sripada, Yaji",
booktitle = "Proceedings of the 14th International Conference on Natural Language Generation",
month = aug,
year = "2021",
address = "Aberdeen, Scotland, UK",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.inlg-1.11/",
doi = "10.18653/v1/2021.inlg-1.11",
pages = "103--113",
abstract = "We propose the task of automatically generating commentaries for races in a motor racing game, from vision, structured numerical, and textual data. Commentaries provide information to support spectators in understanding events in races. Commentary generation models need to interpret the race situation and generate the correct content at the right moment. We divide the task into two subtasks: utterance timing identification and utterance generation. Because existing datasets do not have such alignments of data in multiple modalities, this setting has not been explored in depth. In this study, we introduce a new large-scale dataset that contains aligned video data, structured numerical data, and transcribed commentaries that consist of 129,226 utterances in 1,389 races in a game. Our analysis reveals that the characteristics of commentaries change over time or from viewpoints. Our experiments on the subtasks show that it is still challenging for a state-of-the-art vision encoder to capture useful information from videos to generate accurate commentaries. We make the dataset and baseline implementation publicly available for further research."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ishigaki-etal-2021-generating">
<titleInfo>
<title>Generating Racing Game Commentary from Vision, Language, and Structured Data</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tatsuya</namePart>
<namePart type="family">Ishigaki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Goran</namePart>
<namePart type="family">Topic</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yumi</namePart>
<namePart type="family">Hamazono</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hiroshi</namePart>
<namePart type="family">Noji</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ichiro</namePart>
<namePart type="family">Kobayashi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yusuke</namePart>
<namePart type="family">Miyao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hiroya</namePart>
<namePart type="family">Takamura</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 14th International Conference on Natural Language Generation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anya</namePart>
<namePart type="family">Belz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Angela</namePart>
<namePart type="family">Fan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ehud</namePart>
<namePart type="family">Reiter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yaji</namePart>
<namePart type="family">Sripada</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Aberdeen, Scotland, UK</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We propose the task of automatically generating commentaries for races in a motor racing game, from vision, structured numerical, and textual data. Commentaries provide information to support spectators in understanding events in races. Commentary generation models need to interpret the race situation and generate the correct content at the right moment. We divide the task into two subtasks: utterance timing identification and utterance generation. Because existing datasets do not have such alignments of data in multiple modalities, this setting has not been explored in depth. In this study, we introduce a new large-scale dataset that contains aligned video data, structured numerical data, and transcribed commentaries that consist of 129,226 utterances in 1,389 races in a game. Our analysis reveals that the characteristics of commentaries change over time or from viewpoints. Our experiments on the subtasks show that it is still challenging for a state-of-the-art vision encoder to capture useful information from videos to generate accurate commentaries. We make the dataset and baseline implementation publicly available for further research.</abstract>
<identifier type="citekey">ishigaki-etal-2021-generating</identifier>
<identifier type="doi">10.18653/v1/2021.inlg-1.11</identifier>
<location>
<url>https://aclanthology.org/2021.inlg-1.11/</url>
</location>
<part>
<date>2021-08</date>
<extent unit="page">
<start>103</start>
<end>113</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Generating Racing Game Commentary from Vision, Language, and Structured Data
%A Ishigaki, Tatsuya
%A Topic, Goran
%A Hamazono, Yumi
%A Noji, Hiroshi
%A Kobayashi, Ichiro
%A Miyao, Yusuke
%A Takamura, Hiroya
%Y Belz, Anya
%Y Fan, Angela
%Y Reiter, Ehud
%Y Sripada, Yaji
%S Proceedings of the 14th International Conference on Natural Language Generation
%D 2021
%8 August
%I Association for Computational Linguistics
%C Aberdeen, Scotland, UK
%F ishigaki-etal-2021-generating
%X We propose the task of automatically generating commentaries for races in a motor racing game, from vision, structured numerical, and textual data. Commentaries provide information to support spectators in understanding events in races. Commentary generation models need to interpret the race situation and generate the correct content at the right moment. We divide the task into two subtasks: utterance timing identification and utterance generation. Because existing datasets do not have such alignments of data in multiple modalities, this setting has not been explored in depth. In this study, we introduce a new large-scale dataset that contains aligned video data, structured numerical data, and transcribed commentaries that consist of 129,226 utterances in 1,389 races in a game. Our analysis reveals that the characteristics of commentaries change over time or from viewpoints. Our experiments on the subtasks show that it is still challenging for a state-of-the-art vision encoder to capture useful information from videos to generate accurate commentaries. We make the dataset and baseline implementation publicly available for further research.
%R 10.18653/v1/2021.inlg-1.11
%U https://aclanthology.org/2021.inlg-1.11/
%U https://doi.org/10.18653/v1/2021.inlg-1.11
%P 103-113
Markdown (Informal)
[Generating Racing Game Commentary from Vision, Language, and Structured Data](https://aclanthology.org/2021.inlg-1.11/) (Ishigaki et al., INLG 2021)
ACL
- Tatsuya Ishigaki, Goran Topic, Yumi Hamazono, Hiroshi Noji, Ichiro Kobayashi, Yusuke Miyao, and Hiroya Takamura. 2021. Generating Racing Game Commentary from Vision, Language, and Structured Data. In Proceedings of the 14th International Conference on Natural Language Generation, pages 103–113, Aberdeen, Scotland, UK. Association for Computational Linguistics.