{ "access": { "embargo": { "active": false, "reason": null }, "files": "public", "record": "public", "status": "open" }, "created": "2022-01-16T10:52:41.198622+00:00", "custom_fields": {}, "deletion_status": { "is_deleted": false, "status": "P" }, "files": { "count": 2, "enabled": true, "entries": { "Common Phone.pdf": { "checksum": "md5:c7c99eb0c18696acf7f4d7ef6e811a25", "ext": "pdf", "id": "7ad4757e-b5bc-48ca-8f4a-5b42f5baa548", "key": "Common Phone.pdf", "metadata": null, "mimetype": "application/pdf", "size": 138517 }, "cp-1-0.tgz": { "checksum": "md5:2022f16ef7296b9141b275e2288280b9", "ext": "tgz", "id": "7aba9dbf-ba49-430a-b980-6328faf535dd", "key": "cp-1-0.tgz", "metadata": null, "mimetype": "application/gzip", "size": 13268029941 } }, "order": [], "total_bytes": 13268168458 }, "id": "5846137", "is_draft": false, "is_published": true, "links": { "access": "https://zenodo.org/api/records/5846137/access", "access_links": "https://zenodo.org/api/records/5846137/access/links", "access_request": "https://zenodo.org/api/records/5846137/access/request", "access_users": "https://zenodo.org/api/records/5846137/access/users", "archive": "https://zenodo.org/api/records/5846137/files-archive", "archive_media": "https://zenodo.org/api/records/5846137/media-files-archive", "communities": "https://zenodo.org/api/records/5846137/communities", "communities-suggestions": "https://zenodo.org/api/records/5846137/communities-suggestions", "doi": "https://doi.org/10.5281/zenodo.5846137", "draft": "https://zenodo.org/api/records/5846137/draft", "files": "https://zenodo.org/api/records/5846137/files", "latest": "https://zenodo.org/api/records/5846137/versions/latest", "latest_html": "https://zenodo.org/records/5846137/latest", "media_files": "https://zenodo.org/api/records/5846137/media-files", "parent": "https://zenodo.org/api/records/5846136", "parent_doi": "https://zenodo.org/doi/10.5281/zenodo.5846136", "parent_html": "https://zenodo.org/records/5846136", "requests": "https://zenodo.org/api/records/5846137/requests", "reserve_doi": "https://zenodo.org/api/records/5846137/draft/pids/doi", "self": "https://zenodo.org/api/records/5846137", "self_doi": "https://zenodo.org/doi/10.5281/zenodo.5846137", "self_html": "https://zenodo.org/records/5846137", "self_iiif_manifest": "https://zenodo.org/api/iiif/record:5846137/manifest", "self_iiif_sequence": "https://zenodo.org/api/iiif/record:5846137/sequence/default", "versions": "https://zenodo.org/api/records/5846137/versions" }, "media_files": { "count": 0, "enabled": false, "entries": {}, "order": [], "total_bytes": 0 }, "metadata": { "creators": [ { "affiliations": [ { "name": "Friedrich-Alexander-Universit\u00e4t Erlangen-N\u00fcrnberg" } ], "person_or_org": { "family_name": "Klumpp", "given_name": "Philipp", "identifiers": [ { "identifier": "0000-0002-7531-1693", "scheme": "orcid" } ], "name": "Klumpp, Philipp", "type": "personal" } }, { "affiliations": [ { "name": "Friedrich-Alexander-Universit\u00e4t Erlangen-N\u00fcrnberg" } ], "person_or_org": { "family_name": "Arias-Vergara", "given_name": "Tom\u00e1s", "name": "Arias-Vergara, Tom\u00e1s", "type": "personal" } }, { "affiliations": [ { "name": "Friedrich-Alexander-Universit\u00e4t Erlangen-N\u00fcrnberg" } ], "person_or_org": { "family_name": "P\u00e9rez-Toro", "given_name": "Paula Andrea", "name": "P\u00e9rez-Toro, Paula Andrea", "type": "personal" } }, { "affiliations": [ { "name": "Friedrich-Alexander-Universit\u00e4t Erlangen-N\u00fcrnberg" } ], "person_or_org": { "family_name": "N\u00f6th", "given_name": "Elmar", "name": "N\u00f6th, Elmar", "type": "personal" } }, { "affiliations": [ { "name": "Friedrich-Alexander-Universit\u00e4t Erlangen-N\u00fcrnberg" } ], "person_or_org": { "family_name": "Orozco-Arroyave", "given_name": "Juan Rafael", "name": "Orozco-Arroyave, Juan Rafael", "type": "personal" } } ], "description": "
Release Date: 17.01.22
\n\nWelcome to Common Phone 1.0
\n\nLegal Information
\n\nCommon Phone is a subset of the Common Voice corpus collected by Mozilla Corporation. By using Common Phone, you agree to the Common Voice Legal Terms. Common Phone is maintained and distributed by speech researchers at the Pattern Recognition Lab of Friedrich-Alexander-University Erlangen-Nuremberg (FAU) under the CC0 license.
\n\nLike for Common Voice, you must not make any attempt to identify speakers that contributed to Common Phone.
\n\nAbout Common Phone
\n\nThis corpus aims to provide a basis for Machine Learning (ML) researchers and enthusiasts to train and test their models against a wide variety of speakers, hardware/software ecosystems and acoustic conditions to improve generalization and availability of ML in real-world speech applications.
\nThe current version of Common Phone comprises 116,5 hours of speech samples, collected from 11.246 speakers in 6 languages:
\n\t\t\t Language \n\t\t\t | \n\t\t\t\n\t\t\t Speakers \n\t\t\t | \n\t\t\t\n\t\t\t Hours \n\t\t\t | \n\t\t
---|---|---|
\n\t\t\t | \n\t\t\t
| \n\t\t\t\n\t\t\t
| \n\t\t
\n\t\t\t English \n\t\t\t | \n\t\t\t\n\t\t\t 4716 / 771 / 774 \n\t\t\t | \n\t\t\t\n\t\t\t 14.1 / 2.3 / 2.3 \n\t\t\t | \n\t\t
\n\t\t\t French \n\t\t\t | \n\t\t\t\n\t\t\t 796 / 138 / 135 \n\t\t\t | \n\t\t\t\n\t\t\t 13.6 / 2.3 / 2.2 \n\t\t\t | \n\t\t
\n\t\t\t German \n\t\t\t | \n\t\t\t\n\t\t\t 1176 / 202 / 206 \n\t\t\t | \n\t\t\t\n\t\t\t 14.5 / 2.5 / 2.6 \n\t\t\t | \n\t\t
\n\t\t\t Italian \n\t\t\t | \n\t\t\t\n\t\t\t 1031 / 176 / 178 \n\t\t\t | \n\t\t\t\n\t\t\t 14.6 / 2.5 / 2.5 \n\t\t\t | \n\t\t
\n\t\t\t Spanish \n\t\t\t | \n\t\t\t\n\t\t\t 508 / 88 / 91 \n\t\t\t | \n\t\t\t\n\t\t\t 16.5 / 3.0 / 3.1 \n\t\t\t | \n\t\t
\n\t\t\t Russian \n\t\t\t | \n\t\t\t\n\t\t\t 190 / 34 / 36 \n\t\t\t | \n\t\t\t\n\t\t\t 12.7 / 2.6 / 2.8 \n\t\t\t | \n\t\t
\n\t\t\t Total \n\t\t\t | \n\t\t\t\n\t\t\t 8417 / 1409 / 1420 \n\t\t\t | \n\t\t\t\n\t\t\t 85.8 / 15.2 / 15.5 \n\t\t\t | \n\t\t
\n\n
Presented train
, dev
and test
splits are not identical to those shipped with Common Voice. Speaker separation among splits was realized by only using those speakers that had provided age and gender information. This information can only be provided as a registered user on the website. When logged in, the session ID of contributed recordings is always linked to your user, thus we could easily link recordings to individual speakers. Keep in mind this would not be possible for unregistered users, as their session ID changes if they decide to contribute more than once.
\nDuring speaker selection, we considered that some speakers had contributed to more than one of the six Common Voice datasets (one for each language). In Common Phone, a speaker will only appear in one language.
\nThe dataset is structured as follows:
Where does the phonetic annotation come from?
\n\nPhonetic annotation was computed via BAS Web Services. We used the regular Pipeline (G2P-MAUS) without ASR to create an alignment of text transcripts with audio signals. We chose International Phonetic Alphabet (IPA) output symbols as they work well even in a multi-lingual setup. Common Phone annotation comprises 101 phonetic symbols, including silence.
\n\nWhy Common Phone?
\n\nIs there any publication available?
\n\nYes, a paper describing Common Phone in detail is currently under revision for LREC 2022. You can access a pre-print version on arXiv entitled “Common Phone: A Multilingual Dataset for Robust Acoustic Modelling”.
", "languages": [ { "id": "eng", "title": { "en": "English" } } ], "publication_date": "2022-01-17", "publisher": "Zenodo", "references": [ { "reference": "Klumpp, Philipp et al. (2022); \"Common Phone: A Multilingual Dataset for Robust Acoustic Modelling\" https://arxiv.org/abs/2201.05912" } ], "resource_type": { "id": "dataset", "title": { "de": "Datensatz", "en": "Dataset" } }, "rights": [ { "description": { "en": "CC0 waives copyright interest in a work you've created and dedicates it to the world-wide public domain. Use CC0 to opt out of copyright entirely and ensure your work has the widest reach." }, "icon": "cc-cc0-icon", "id": "cc0-1.0", "props": { "scheme": "spdx", "url": "https://creativecommons.org/publicdomain/zero/1.0/legalcode" }, "title": { "en": "Creative Commons Zero v1.0 Universal" } } ], "subjects": [ { "subject": "Multilingual" }, { "subject": "Phoneme Recognition" }, { "subject": "Phonetic Annotation" }, { "subject": "Machine Learning" }, { "subject": "ASR" }, { "subject": "Speech" }, { "subject": "Speech Processing" } ], "title": "Common Phone: A Multilingual Dataset for Robust Acoustic Modelling", "version": "1.0" }, "parent": { "access": { "owned_by": { "user": 286183 } }, "communities": { "default": "0ddda3ee-61ac-4ac0-846e-343c54ac5a19", "entries": [ { "access": { "member_policy": "open", "members_visibility": "public", "record_policy": "open", "review_policy": "open", "visibility": "public" }, "children": { "allow": false }, "created": "2015-03-09T12:07:58+00:00", "custom_fields": {}, "deletion_status": { "is_deleted": false, "status": "P" }, "id": "0ddda3ee-61ac-4ac0-846e-343c54ac5a19", "links": {}, "metadata": { "curation_policy": "Article videos, tables and images to a maximum of 30MB in size can be published using the open access model in OPUS FAU (http://opus4.kobv.de/opus4-fau/home/). The corresponding publication must also be made available in OPUS FAU. For research data which must meet more strict requirements in terms of memory capacity, documentation or compliance with research funding organisations' requirements, feel free to use this ZENODO community collection. This also applies to stand-alone or raw data sets.
\r\n\r\nSince this community is operated by the 'Referat Open Access', opening and making the data available (i.e., maximum restricted access, please no closed access) is desired.
\r\n", "page": "Zenodo collection of the Friedrich-Alexander University Erlangen-Nürnberg (FAU) can be used as a community platform for research data of all members of the FAU. The FAU is a strong research university and one of the largest universities in Germany: www.fau.de
\r\n\r\nThe Friedrich-Alexander University Erlangen-Nürnberg (FAU) is a strong research university with an international perspective and one of the largest universities in Germany, with around 40,000 students, 260 degree programmes, 4,000 academic staff (including over 580 professors), 200 million euros third-party funding, and 500 partnerships with universities all over the world. Teaching at the University is closely linked to research and focuses on training students in both theory and practice to enable them to think critically and work independently. The research itself also strikes the perfect balance between a theoretical approach and practical application. Only fast, direct and ideally free access to academic publications and primary research data unlocks the full potential of this research environment. Visit https://www.fau.de/ to learn more about the FAU.
\r\n\r\nZenodo FAU community collection
\r\n\r\nFeel free to use this Zenodo collection FAU as a central electronic archiving and publication platform for research data which can´t be archived or published on OPUS FAU (http://opus4.kobv.de/opus4-fau/home), RRZE basis storage (https://www.rrze.fau.de/files/2017/06/Betreuungsvereinbarung_Basis-Storage.pdf) or institutional websites. Qualified scientific research data of all members of FAU may be published here free of charge. Corresponding full texts to the research data shall be published on OPUS FAU or disciplinary repositories. Thus all publications and research data sets are permanently available to the global public and are searchable and citable via catalogues and search engines.
\r\n\r\nFor the creation of data management plans the tools RDMO (https://rdmorganiser.github.io/) or DMPonline (https://dmponline.dcc.ac.uk/) can be useful. For further information visit e.g. the Digital Curation Centre (DDC) (http://www.dcc.ac.uk/resources/data-management-plans). FAU also offers advice on legal and organizational matters to all researchers who would like to publish their research in open access media, via its Open Access Policy (urn:nbn:de:bvb:29-opus4-68651).
\r\n\r\nOnly free and direct access to academic publications and primary research data unlocks the full potential of a digital research environment, academic networks and integrated research databases. The University thus wants to promote open access in the long term and has established an open access publishing fund to cover these costs (https://ub.fau.de/en/writing-publishing/open-access/).
\r\n\r\nSince this community is operated by the 'Referat Open Access', opening and making the data available (i.e., maximum restricted access, please no closed access) is desired.
\r\n\r\nSubject-based research data repositories
\r\n\r\nOther research data repositories can be found sorted by subjects on the web-based directory of the project re3data.org (Registry of Research Data Repositories, http://www.re3data.org/browse/by-subject/).
\r\n\r\nData citation standards
\r\n\r\nUsing the citation standard offers proper recognition to authors as well as permanent identification through the use of global, persistent identifiers in place of URLs. Use of universal numerical fingerprints (UNFs) guarantees to the scholarly community that future researchers will be able to verify that data retrieved is identical to that used in a publication decades earlier, even if it has changed storage media, operating systems, hardware, and statistical program format.
\r\n\r\nFollowing are two authentic examples of replication data citations:
\r\n\r\nFrom International Studies Quarterly, King and Zeng, 2006, p. 209:
\r\n\r\n\r\n\r\n\r\nGary King; Langche Zeng, 2006, “Replication data for: When Can History be Our Guide? The Pitfalls of Counterfactual Inference”, Harvard Dataverse, V2, http://hdl.handle.net/1902.1/DXRXCFAWPK UNF:3:DaYlT6QSX9r0D50ye+tXpA==
\r\n
From Political Analysis, Hanmer, Banks, and White, 2013:
\r\n\r\n\r\n\r\n\r\nHanmer, Michael J.; Banks, Antoine J., White, Ismail K., 2013, “Replication data for: Experiments to Reduce the Over-reporting of Voting: A Pipeline to the Truth”, Harvard Dataverse, V1, http://dx.doi.org/10.7910/DVN/22893 UNF:5:eJOVAjDU0E0jzSQ2bRCg9g==
\r\n
For more information, please click here to read: http://best-practices.dataverse.org/data-citation/
\r\n\r\nContact
\r\n\r\nContact for research data management: https://www.cdi.fau.de/kontakt/
\r\nContact person for open access: Mr Markus Putnings
\r\nContact person for publication in OPUS: https://opus4.kobv.de/opus4-fau/home/index/contact
\r\nContact person for legal questions about open access: Ms Petra Heermann
\r\nContact Section F - Research and Young Scientists: https://www.fau.de/universitaet/leitung-und-struktur/geschaeftsverteilungsplan-der-verwaltung/abteilung-f/
\r\n\r\n
Exclusion of liability
\r\n\r\nThis community website was created by Markus Putnings (hereinafter: author) in March 2015. The author has compiled and verified all information provided with great care. However, he cannot assume liability or furnish a guarantee that the data is correct, complete or up-to-date, or regarding the quality or constant availability of the information provided. He won´t be liable for any damages arising from the installation and use of software or data when accessing or downloading contents from this website. This community website reflects the opinions and insights of the author. The author expressly reserves the right to change, amend or delete website contents.
\r\n\r\nLinks and references (disclaimer)
\r\n\r\nThe author is only responsible for the original content provided in accordance with the applicable laws. This original content is to be distinguished from links to other websites. The author is not responsible for this third-party content as the operator does not initiate the data transmission, does not choose the recipient of the information and does not select or have any influence on the information transmitted. The methods used to provide access and link to this third-party information also do not involve any automatic short-term storage, resulting in a full exclusion of any liability for third-party content on the author's part. Should the author become aware or be made aware by others that the content of a website linked from this site could constitute a civil or criminal law violation, then the link will be immediately removed as long as this is technically feasible and within reasonable expectation. Liability for illegal, inaccurate or incomplete content and for damages resulting from the use or non-use of information provided by third parties shall lie exclusively with the respective operators of the linked sites.
", "title": "Friedrich-Alexander-Universit\u00e4t Erlangen-N\u00fcrnberg - FAU" }, "revision_id": 0, "slug": "fau", "updated": "2023-05-12T10:50:19.495994+00:00" } ], "ids": [ "0ddda3ee-61ac-4ac0-846e-343c54ac5a19" ] }, "id": "5846136", "pids": { "doi": { "client": "datacite", "identifier": "10.5281/zenodo.5846136", "provider": "datacite" } } }, "pids": { "doi": { "client": "datacite", "identifier": "10.5281/zenodo.5846137", "provider": "datacite" }, "oai": { "identifier": "oai:zenodo.org:5846137", "provider": "oai" } }, "revision_id": 6, "stats": { "all_versions": { "data_volume": 3569277078855.0, "downloads": 1547, "unique_downloads": 1301, "unique_views": 1646, "views": 1746 }, "this_version": { "data_volume": 3556008217812.0, "downloads": 1540, "unique_downloads": 1295, "unique_views": 1639, "views": 1739 } }, "status": "published", "updated": "2022-01-23T17:53:29.189905+00:00", "versions": { "index": 1, "is_latest": true } }