@inproceedings{bhushan-etal-2024-unveiling,
title = "Unveiling the Power of Integration: Block Diagram Summarization through Local-Global Fusion",
author = "Bhushan, Shreyanshu and
Jung, Eun-Soo and
Lee, Minho",
editor = "Ku, Lun-Wei and
Martins, Andre and
Srikumar, Vivek",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2024",
month = aug,
year = "2024",
address = "Bangkok, Thailand",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.findings-acl.822",
doi = "10.18653/v1/2024.findings-acl.822",
pages = "13837--13856",
abstract = "Block Diagrams play an essential role in visualizing the relationships between components or systems. Generating summaries of block diagrams is important for document understanding or question answering (QA) tasks by providing concise overviews of complex systems. However, it{'}s a challenging task as it requires compressing complex relationships into informative descriptions. In this paper, we present {``}BlockNet{''}, a fusion framework that summarizes block diagrams by integrating local and global information, catering to both English and Korean languages. Additionally, we introduce a new multilingual method to produce block diagram data, resulting in a high-quality dataset called {``}BD-EnKo{''}. In BlockNet, we develop {``}BlockSplit{''}, an Optical Character Recognition (OCR) based algorithm employing the divide-and-conquer principle for local information extraction. We train an OCR-free transformer architecture for global information extraction using BD-EnKo and public data. To assess the effectiveness of our model, we conduct thorough experiments on different datasets. The assessment shows that BlockNet surpasses all previous methods and models, including GPT-4V, for block diagram summarization.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="bhushan-etal-2024-unveiling">
<titleInfo>
<title>Unveiling the Power of Integration: Block Diagram Summarization through Local-Global Fusion</title>
</titleInfo>
<name type="personal">
<namePart type="given">Shreyanshu</namePart>
<namePart type="family">Bhushan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eun-Soo</namePart>
<namePart type="family">Jung</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Minho</namePart>
<namePart type="family">Lee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lun-Wei</namePart>
<namePart type="family">Ku</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andre</namePart>
<namePart type="family">Martins</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vivek</namePart>
<namePart type="family">Srikumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Bangkok, Thailand</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Block Diagrams play an essential role in visualizing the relationships between components or systems. Generating summaries of block diagrams is important for document understanding or question answering (QA) tasks by providing concise overviews of complex systems. However, it’s a challenging task as it requires compressing complex relationships into informative descriptions. In this paper, we present “BlockNet”, a fusion framework that summarizes block diagrams by integrating local and global information, catering to both English and Korean languages. Additionally, we introduce a new multilingual method to produce block diagram data, resulting in a high-quality dataset called “BD-EnKo”. In BlockNet, we develop “BlockSplit”, an Optical Character Recognition (OCR) based algorithm employing the divide-and-conquer principle for local information extraction. We train an OCR-free transformer architecture for global information extraction using BD-EnKo and public data. To assess the effectiveness of our model, we conduct thorough experiments on different datasets. The assessment shows that BlockNet surpasses all previous methods and models, including GPT-4V, for block diagram summarization.</abstract>
<identifier type="citekey">bhushan-etal-2024-unveiling</identifier>
<identifier type="doi">10.18653/v1/2024.findings-acl.822</identifier>
<location>
<url>https://aclanthology.org/2024.findings-acl.822</url>
</location>
<part>
<date>2024-08</date>
<extent unit="page">
<start>13837</start>
<end>13856</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Unveiling the Power of Integration: Block Diagram Summarization through Local-Global Fusion
%A Bhushan, Shreyanshu
%A Jung, Eun-Soo
%A Lee, Minho
%Y Ku, Lun-Wei
%Y Martins, Andre
%Y Srikumar, Vivek
%S Findings of the Association for Computational Linguistics: ACL 2024
%D 2024
%8 August
%I Association for Computational Linguistics
%C Bangkok, Thailand
%F bhushan-etal-2024-unveiling
%X Block Diagrams play an essential role in visualizing the relationships between components or systems. Generating summaries of block diagrams is important for document understanding or question answering (QA) tasks by providing concise overviews of complex systems. However, it’s a challenging task as it requires compressing complex relationships into informative descriptions. In this paper, we present “BlockNet”, a fusion framework that summarizes block diagrams by integrating local and global information, catering to both English and Korean languages. Additionally, we introduce a new multilingual method to produce block diagram data, resulting in a high-quality dataset called “BD-EnKo”. In BlockNet, we develop “BlockSplit”, an Optical Character Recognition (OCR) based algorithm employing the divide-and-conquer principle for local information extraction. We train an OCR-free transformer architecture for global information extraction using BD-EnKo and public data. To assess the effectiveness of our model, we conduct thorough experiments on different datasets. The assessment shows that BlockNet surpasses all previous methods and models, including GPT-4V, for block diagram summarization.
%R 10.18653/v1/2024.findings-acl.822
%U https://aclanthology.org/2024.findings-acl.822
%U https://doi.org/10.18653/v1/2024.findings-acl.822
%P 13837-13856
Markdown (Informal)
[Unveiling the Power of Integration: Block Diagram Summarization through Local-Global Fusion](https://aclanthology.org/2024.findings-acl.822) (Bhushan et al., Findings 2024)
ACL