@inproceedings{cui-swayamdipta-2024-annotating,
title = "Annotating {F}rame{N}et via Structure-Conditioned Language Generation",
author = "Cui, Xinyue and
Swayamdipta, Swabha",
editor = "Ku, Lun-Wei and
Martins, Andre and
Srikumar, Vivek",
booktitle = "Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)",
month = aug,
year = "2024",
address = "Bangkok, Thailand",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.acl-short.63",
doi = "10.18653/v1/2024.acl-short.63",
pages = "681--692",
abstract = "Despite the remarkable generative capabilities of language models in producing naturalistic language, their effectiveness on explicit manipulation and generation of linguistic structures remain understudied. In this paper, we investigate the task of generating new sentences preserving a given semantic structure, following the FrameNet formalism. We propose a framework to produce novel frame-semantically annotated sentences following an overgenerate-and-filter approach. Our results show that conditioning on rich, explicit semantic information tends to produce generations with high human acceptance, under both prompting and finetuning. Our generated frame-semantic structured annotations are effective at training data augmentation for frame-semantic role labeling in low-resource settings; however, we do not see benefits under higher resource settings. Our study concludes that while generating high-quality, semantically rich data might be within reach, the downstream utility of such generations remains to be seen, highlighting the outstanding challenges with automating linguistic annotation tasks.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="cui-swayamdipta-2024-annotating">
<titleInfo>
<title>Annotating FrameNet via Structure-Conditioned Language Generation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Xinyue</namePart>
<namePart type="family">Cui</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Swabha</namePart>
<namePart type="family">Swayamdipta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lun-Wei</namePart>
<namePart type="family">Ku</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andre</namePart>
<namePart type="family">Martins</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vivek</namePart>
<namePart type="family">Srikumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Bangkok, Thailand</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Despite the remarkable generative capabilities of language models in producing naturalistic language, their effectiveness on explicit manipulation and generation of linguistic structures remain understudied. In this paper, we investigate the task of generating new sentences preserving a given semantic structure, following the FrameNet formalism. We propose a framework to produce novel frame-semantically annotated sentences following an overgenerate-and-filter approach. Our results show that conditioning on rich, explicit semantic information tends to produce generations with high human acceptance, under both prompting and finetuning. Our generated frame-semantic structured annotations are effective at training data augmentation for frame-semantic role labeling in low-resource settings; however, we do not see benefits under higher resource settings. Our study concludes that while generating high-quality, semantically rich data might be within reach, the downstream utility of such generations remains to be seen, highlighting the outstanding challenges with automating linguistic annotation tasks.</abstract>
<identifier type="citekey">cui-swayamdipta-2024-annotating</identifier>
<identifier type="doi">10.18653/v1/2024.acl-short.63</identifier>
<location>
<url>https://aclanthology.org/2024.acl-short.63</url>
</location>
<part>
<date>2024-08</date>
<extent unit="page">
<start>681</start>
<end>692</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Annotating FrameNet via Structure-Conditioned Language Generation
%A Cui, Xinyue
%A Swayamdipta, Swabha
%Y Ku, Lun-Wei
%Y Martins, Andre
%Y Srikumar, Vivek
%S Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)
%D 2024
%8 August
%I Association for Computational Linguistics
%C Bangkok, Thailand
%F cui-swayamdipta-2024-annotating
%X Despite the remarkable generative capabilities of language models in producing naturalistic language, their effectiveness on explicit manipulation and generation of linguistic structures remain understudied. In this paper, we investigate the task of generating new sentences preserving a given semantic structure, following the FrameNet formalism. We propose a framework to produce novel frame-semantically annotated sentences following an overgenerate-and-filter approach. Our results show that conditioning on rich, explicit semantic information tends to produce generations with high human acceptance, under both prompting and finetuning. Our generated frame-semantic structured annotations are effective at training data augmentation for frame-semantic role labeling in low-resource settings; however, we do not see benefits under higher resource settings. Our study concludes that while generating high-quality, semantically rich data might be within reach, the downstream utility of such generations remains to be seen, highlighting the outstanding challenges with automating linguistic annotation tasks.
%R 10.18653/v1/2024.acl-short.63
%U https://aclanthology.org/2024.acl-short.63
%U https://doi.org/10.18653/v1/2024.acl-short.63
%P 681-692
Markdown (Informal)
[Annotating FrameNet via Structure-Conditioned Language Generation](https://aclanthology.org/2024.acl-short.63) (Cui & Swayamdipta, ACL 2024)
ACL