@inproceedings{dainese-etal-2024-docstring,
title = "Can docstring reformulation with an {LLM} improve code generation?",
author = "Dainese, Nicola and
Ilin, Alexander and
Marttinen, Pekka",
editor = "Falk, Neele and
Papi, Sara and
Zhang, Mike",
booktitle = "Proceedings of the 18th Conference of the European Chapter of the Association for Computational Linguistics: Student Research Workshop",
month = mar,
year = "2024",
address = "St. Julian{'}s, Malta",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.eacl-srw.24",
pages = "296--312",
abstract = "Generating code is an important application of Large Language Models (LLMs) and the task of function completion is one of the core open challenges in this context. Existing approaches focus on either training, fine-tuning or prompting LLMs to generate better outputs given the same input. We propose a novel and complementary approach: to optimize part of the input, the docstring (summary of a function{'}s purpose and usage), via reformulation with an LLM, in order to improve code generation. We develop two baseline methods for optimizing code generation via docstring reformulation and test them on the original HumanEval benchmark and multiple curated variants which are made more challenging by realistically worsening the docstrings. Our results show that, when operating on docstrings reformulated by an LLM instead of the original (or worsened) inputs, the performance of a number of open-source LLMs does not change significantlyThis finding demonstrates an unexpected robustness of current open-source LLMs to the details of the docstrings. We conclude by examining a series of questions, accompanied by in-depth analyses, pertaining to the sensitivity of current open-source LLMs to the details in the docstrings, the potential for improvement via docstring reformulation and the limitations of the methods employed in this work.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="dainese-etal-2024-docstring">
<titleInfo>
<title>Can docstring reformulation with an LLM improve code generation?</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicola</namePart>
<namePart type="family">Dainese</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexander</namePart>
<namePart type="family">Ilin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pekka</namePart>
<namePart type="family">Marttinen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 18th Conference of the European Chapter of the Association for Computational Linguistics: Student Research Workshop</title>
</titleInfo>
<name type="personal">
<namePart type="given">Neele</namePart>
<namePart type="family">Falk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Papi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mike</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">St. Julian’s, Malta</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Generating code is an important application of Large Language Models (LLMs) and the task of function completion is one of the core open challenges in this context. Existing approaches focus on either training, fine-tuning or prompting LLMs to generate better outputs given the same input. We propose a novel and complementary approach: to optimize part of the input, the docstring (summary of a function’s purpose and usage), via reformulation with an LLM, in order to improve code generation. We develop two baseline methods for optimizing code generation via docstring reformulation and test them on the original HumanEval benchmark and multiple curated variants which are made more challenging by realistically worsening the docstrings. Our results show that, when operating on docstrings reformulated by an LLM instead of the original (or worsened) inputs, the performance of a number of open-source LLMs does not change significantlyThis finding demonstrates an unexpected robustness of current open-source LLMs to the details of the docstrings. We conclude by examining a series of questions, accompanied by in-depth analyses, pertaining to the sensitivity of current open-source LLMs to the details in the docstrings, the potential for improvement via docstring reformulation and the limitations of the methods employed in this work.</abstract>
<identifier type="citekey">dainese-etal-2024-docstring</identifier>
<location>
<url>https://aclanthology.org/2024.eacl-srw.24</url>
</location>
<part>
<date>2024-03</date>
<extent unit="page">
<start>296</start>
<end>312</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Can docstring reformulation with an LLM improve code generation?
%A Dainese, Nicola
%A Ilin, Alexander
%A Marttinen, Pekka
%Y Falk, Neele
%Y Papi, Sara
%Y Zhang, Mike
%S Proceedings of the 18th Conference of the European Chapter of the Association for Computational Linguistics: Student Research Workshop
%D 2024
%8 March
%I Association for Computational Linguistics
%C St. Julian’s, Malta
%F dainese-etal-2024-docstring
%X Generating code is an important application of Large Language Models (LLMs) and the task of function completion is one of the core open challenges in this context. Existing approaches focus on either training, fine-tuning or prompting LLMs to generate better outputs given the same input. We propose a novel and complementary approach: to optimize part of the input, the docstring (summary of a function’s purpose and usage), via reformulation with an LLM, in order to improve code generation. We develop two baseline methods for optimizing code generation via docstring reformulation and test them on the original HumanEval benchmark and multiple curated variants which are made more challenging by realistically worsening the docstrings. Our results show that, when operating on docstrings reformulated by an LLM instead of the original (or worsened) inputs, the performance of a number of open-source LLMs does not change significantlyThis finding demonstrates an unexpected robustness of current open-source LLMs to the details of the docstrings. We conclude by examining a series of questions, accompanied by in-depth analyses, pertaining to the sensitivity of current open-source LLMs to the details in the docstrings, the potential for improvement via docstring reformulation and the limitations of the methods employed in this work.
%U https://aclanthology.org/2024.eacl-srw.24
%P 296-312
Markdown (Informal)
[Can docstring reformulation with an LLM improve code generation?](https://aclanthology.org/2024.eacl-srw.24) (Dainese et al., EACL 2024)
ACL
- Nicola Dainese, Alexander Ilin, and Pekka Marttinen. 2024. Can docstring reformulation with an LLM improve code generation?. In Proceedings of the 18th Conference of the European Chapter of the Association for Computational Linguistics: Student Research Workshop, pages 296–312, St. Julian’s, Malta. Association for Computational Linguistics.