{ "access": { "embargo": { "active": false, "reason": null }, "files": "public", "record": "public", "status": "open" }, "created": "2017-03-02T12:30:40.873430+00:00", "custom_fields": { "meeting:meeting": { "acronym": "MMM2017", "dates": "04-06 January 2017", "place": "Reykjavik, Iceland", "session": "2A", "title": "23rd International Conference on Multimedia Modeling", "url": "http://mmm2017.ru.is/" } }, "deletion_status": { "is_deleted": false, "status": "P" }, "files": { "count": 1, "enabled": true, "entries": { "2017-MMM-BoeschenScherp-TX.pdf": { "checksum": "md5:491fe99e667082902c708de885759075", "ext": "pdf", "id": "a45e76f8-9bd7-4bba-bac5-9c9973c36061", "key": "2017-MMM-BoeschenScherp-TX.pdf", "metadata": null, "mimetype": "application/pdf", "size": 268796 } }, "order": [], "total_bytes": 268796 }, "id": "345104", "is_draft": false, "is_published": true, "links": { "access": "https://zenodo.org/api/records/345104/access", "access_links": "https://zenodo.org/api/records/345104/access/links", "access_request": "https://zenodo.org/api/records/345104/access/request", "access_users": "https://zenodo.org/api/records/345104/access/users", "archive": "https://zenodo.org/api/records/345104/files-archive", "archive_media": "https://zenodo.org/api/records/345104/media-files-archive", "communities": "https://zenodo.org/api/records/345104/communities", "communities-suggestions": "https://zenodo.org/api/records/345104/communities-suggestions", "doi": "https://doi.org/10.1007/978-3-319-51811-4_2", "draft": "https://zenodo.org/api/records/345104/draft", "files": "https://zenodo.org/api/records/345104/files", "latest": "https://zenodo.org/api/records/345104/versions/latest", "latest_html": "https://zenodo.org/records/345104/latest", "media_files": "https://zenodo.org/api/records/345104/media-files", "parent": "https://zenodo.org/api/records/780168", "parent_doi": "https://zenodo.org/doi/", "parent_html": "https://zenodo.org/records/780168", "requests": "https://zenodo.org/api/records/345104/requests", "reserve_doi": "https://zenodo.org/api/records/345104/draft/pids/doi", "self": "https://zenodo.org/api/records/345104", "self_doi": "https://zenodo.org/doi/10.1007/978-3-319-51811-4_2", "self_html": "https://zenodo.org/records/345104", "self_iiif_manifest": "https://zenodo.org/api/iiif/record:345104/manifest", "self_iiif_sequence": "https://zenodo.org/api/iiif/record:345104/sequence/default", "versions": "https://zenodo.org/api/records/345104/versions" }, "media_files": { "count": 0, "enabled": false, "entries": {}, "order": [], "total_bytes": 0 }, "metadata": { "creators": [ { "affiliations": [ { "name": "Kiel University" } ], "person_or_org": { "family_name": "B\u00f6schen", "given_name": "Falk", "name": "B\u00f6schen, Falk", "type": "personal" } }, { "affiliations": [ { "name": "Kiel University and Leibniz Information Centre for Economics (ZBW)" } ], "person_or_org": { "family_name": "Scherp", "given_name": "Ansgar", "name": "Scherp, Ansgar", "type": "personal" } } ], "description": "
So far, there has not been a comparative evaluation of different approaches for text extraction from scholarly figures. In order to fill this gap, we have defined a generic pipeline for text extraction that abstracts from the existing approaches as documented in the literature. In this paper, we use this generic pipeline to systematically evaluate and compare 32 configurations for text extraction over four datasets of scholarly figures of different origin and characteristics. In total, our experiments have been run over more than 400 manually labeled figures. The experimental results show that the approach BS-4OS results in the best F-measure of 0.67 for the Text Location Detection and the best average Levenshtein Distance of 4.71 between the recognized text and the gold standard on all four datasets using the Ocropy OCR engine.
", "funding": [ { "award": { "acronym": "MOVING", "id": "00k4n6c32::693092", "identifiers": [ { "identifier": "https://cordis.europa.eu/projects/693092", "scheme": "url" } ], "number": "693092", "program": "H2020", "title": { "en": "Training towards a society of data-savvy information professionals to enable open leadership innovation" } }, "funder": { "id": "00k4n6c32", "name": "European Commission" } } ], "publication_date": "2016-12-31", "publisher": "Zenodo", "resource_type": { "id": "publication-conferencepaper", "title": { "de": "Konferenzbeitrag", "en": "Conference paper" } }, "rights": [ { "description": { "en": "The Creative Commons Attribution license allows re-distribution and re-use of a licensed work on the condition that the creator is appropriately credited." }, "icon": "cc-by-icon", "id": "cc-by-4.0", "props": { "scheme": "spdx", "url": "https://creativecommons.org/licenses/by/4.0/legalcode" }, "title": { "en": "Creative Commons Attribution 4.0 International" } } ], "subjects": [ { "subject": "Scholarly Figures" }, { "subject": "Text Extraction" }, { "subject": "Comparison" } ], "title": "A Comparison of Approaches for Automated Text Extraction from Scholarly Figures" }, "parent": { "access": { "owned_by": { "user": 29108 } }, "communities": { "default": "021e015b-fbc8-4aea-a67d-405fb1e9dfbd", "entries": [ { "access": { "member_policy": "open", "members_visibility": "public", "record_policy": "open", "review_policy": "open", "visibility": "public" }, "children": { "allow": false }, "created": "2016-06-17T11:12:23+00:00", "custom_fields": {}, "deletion_status": { "is_deleted": false, "status": "P" }, "id": "021e015b-fbc8-4aea-a67d-405fb1e9dfbd", "links": {}, "metadata": { "curation_policy": "", "description": "This is the collection of materials created within the \"MOVING: Training towards a society of data-savvy information professionals to enable open leadership innovation\", H2020 Research and Innovation Action (Grant Agreement 693092).", "page": "This is the collection of materials created within the "MOVING: Training towards a society of data-savvy information professionals to enable open leadership innovation", H2020 Research and Innovation Action (Grant Agreement 693092). http://www.moving-project.eu/
\r\n", "title": "MOVING H2020 Project" }, "revision_id": 0, "slug": "moving-h2020", "updated": "2018-01-03T12:53:34.056569+00:00" } ], "ids": [ "021e015b-fbc8-4aea-a67d-405fb1e9dfbd" ] }, "id": "780168", "pids": {} }, "pids": { "doi": { "identifier": "10.1007/978-3-319-51811-4_2", "provider": "external" }, "oai": { "identifier": "oai:zenodo.org:345104", "provider": "oai" } }, "revision_id": 15, "stats": { "all_versions": { "data_volume": 62629468.0, "downloads": 233, "unique_downloads": 229, "unique_views": 134, "views": 145 }, "this_version": { "data_volume": 62629468.0, "downloads": 233, "unique_downloads": 229, "unique_views": 134, "views": 145 } }, "status": "published", "updated": "2020-01-20T15:57:03.138208+00:00", "versions": { "index": 1, "is_latest": true } }