{
  "access": {
    "embargo": {
      "active": false,
      "reason": null
    },
    "files": "public",
    "record": "public",
    "status": "open"
  },
  "created": "2025-08-25T07:57:59.490404+00:00",
  "custom_fields": {
    "code:developmentStatus": {
      "id": "active",
      "title": {
        "en": "Active"
      }
    },
    "meeting:meeting": {
      "acronym": "IS2025",
      "dates": "17-21 August 2025",
      "place": "Rotterdam",
      "session": "Special Session - Challenges in Speech Data  Collection, Curation and Annotation",
      "session_part": "2",
      "title": "Interspeech 2025",
      "url": "https://www.interspeech2025.org/home"
    }
  },
  "deletion_status": {
    "is_deleted": false,
    "status": "P"
  },
  "files": {
    "count": 6,
    "enabled": true,
    "entries": {
      "emilia_file_map.csv": {
        "access": {
          "hidden": false
        },
        "checksum": "md5:9bc8c456b92f29c9412278fbd5cc081d",
        "ext": "csv",
        "id": "3d4663fa-24bc-48e2-bb31-fdfae77cfcf0",
        "key": "emilia_file_map.csv",
        "links": {
          "content": "https://zenodo.org/api/records/16937418/files/emilia_file_map.csv/content",
          "self": "https://zenodo.org/api/records/16937418/files/emilia_file_map.csv"
        },
        "metadata": {},
        "mimetype": "text/csv",
        "size": 1176647,
        "storage_class": "L"
      },
      "finetuning_set.csv": {
        "access": {
          "hidden": false
        },
        "checksum": "md5:3be4b058df8a01e111126d35c4b79e1f",
        "ext": "csv",
        "id": "28013811-7d75-47f6-88a7-8fbbb6f9b63b",
        "key": "finetuning_set.csv",
        "links": {
          "content": "https://zenodo.org/api/records/16937418/files/finetuning_set.csv/content",
          "self": "https://zenodo.org/api/records/16937418/files/finetuning_set.csv"
        },
        "metadata": {},
        "mimetype": "text/csv",
        "size": 1296844,
        "storage_class": "L"
      },
      "label-studio-template.xml": {
        "access": {
          "hidden": false
        },
        "checksum": "md5:348936f0957eb978a863fc86aba1ec39",
        "ext": "xml",
        "id": "d3e492c0-59fc-480e-a28f-51899b222ce5",
        "key": "label-studio-template.xml",
        "links": {
          "content": "https://zenodo.org/api/records/16937418/files/label-studio-template.xml/content",
          "self": "https://zenodo.org/api/records/16937418/files/label-studio-template.xml"
        },
        "metadata": {},
        "mimetype": "text/xml",
        "size": 1494,
        "storage_class": "L"
      },
      "test_set.csv": {
        "access": {
          "hidden": false
        },
        "checksum": "md5:7d5f855091f9a33726ed31361a63a62e",
        "ext": "csv",
        "id": "9674ea88-d934-465f-be54-611ac969f951",
        "key": "test_set.csv",
        "links": {
          "content": "https://zenodo.org/api/records/16937418/files/test_set.csv/content",
          "self": "https://zenodo.org/api/records/16937418/files/test_set.csv"
        },
        "metadata": {},
        "mimetype": "text/csv",
        "size": 83802,
        "storage_class": "L"
      },
      "validation_set.csv": {
        "access": {
          "hidden": false
        },
        "checksum": "md5:b03d03adba58c4f498744bef5c405d0e",
        "ext": "csv",
        "id": "2567f6fa-faf2-4a92-b10e-57ef56b7cd38",
        "key": "validation_set.csv",
        "links": {
          "content": "https://zenodo.org/api/records/16937418/files/validation_set.csv/content",
          "self": "https://zenodo.org/api/records/16937418/files/validation_set.csv"
        },
        "metadata": {},
        "mimetype": "text/csv",
        "size": 62513,
        "storage_class": "L"
      },
      "yodas_file_map.csv": {
        "access": {
          "hidden": false
        },
        "checksum": "md5:0b3a98e0ec014ee2d264d42d18b42f29",
        "ext": "csv",
        "id": "e45cdee7-24aa-40ae-bde6-ea57fa5705a1",
        "key": "yodas_file_map.csv",
        "links": {
          "content": "https://zenodo.org/api/records/16937418/files/yodas_file_map.csv/content",
          "self": "https://zenodo.org/api/records/16937418/files/yodas_file_map.csv"
        },
        "metadata": {},
        "mimetype": "text/csv",
        "size": 1823674,
        "storage_class": "L"
      }
    },
    "order": [],
    "total_bytes": 4444974
  },
  "id": "16937418",
  "is_draft": false,
  "is_published": true,
  "links": {
    "access": "https://zenodo.org/api/records/16937418/access",
    "access_grants": "https://zenodo.org/api/records/16937418/access/grants",
    "access_links": "https://zenodo.org/api/records/16937418/access/links",
    "access_request": "https://zenodo.org/api/records/16937418/access/request",
    "access_users": "https://zenodo.org/api/records/16937418/access/users",
    "archive": "https://zenodo.org/api/records/16937418/files-archive",
    "archive_media": "https://zenodo.org/api/records/16937418/media-files-archive",
    "communities": "https://zenodo.org/api/records/16937418/communities",
    "communities-suggestions": "https://zenodo.org/api/records/16937418/communities-suggestions",
    "doi": "https://doi.org/10.5281/zenodo.16937418",
    "draft": "https://zenodo.org/api/records/16937418/draft",
    "file_modification": "https://zenodo.org/api/records/16937418/file-modification",
    "files": "https://zenodo.org/api/records/16937418/files",
    "latest": "https://zenodo.org/api/records/16937418/versions/latest",
    "latest_html": "https://zenodo.org/records/16937418/latest",
    "media_files": "https://zenodo.org/api/records/16937418/media-files",
    "parent": "https://zenodo.org/api/records/15534661",
    "parent_doi": "https://doi.org/10.5281/zenodo.15534661",
    "parent_doi_html": "https://zenodo.org/doi/10.5281/zenodo.15534661",
    "parent_html": "https://zenodo.org/records/15534661",
    "preview_html": "https://zenodo.org/records/16937418?preview=1",
    "request_deletion": "https://zenodo.org/api/records/16937418/request-deletion",
    "requests": "https://zenodo.org/api/records/16937418/requests",
    "reserve_doi": "https://zenodo.org/api/records/16937418/draft/pids/doi",
    "self": "https://zenodo.org/api/records/16937418",
    "self_doi": "https://doi.org/10.5281/zenodo.16937418",
    "self_doi_html": "https://zenodo.org/doi/10.5281/zenodo.16937418",
    "self_html": "https://zenodo.org/records/16937418",
    "self_iiif_manifest": "https://zenodo.org/api/iiif/record:16937418/manifest",
    "self_iiif_sequence": "https://zenodo.org/api/iiif/record:16937418/sequence/default",
    "versions": "https://zenodo.org/api/records/16937418/versions"
  },
  "media_files": {
    "count": 0,
    "enabled": false,
    "entries": {},
    "order": [],
    "total_bytes": 0
  },
  "metadata": {
    "copyright": "Copyright (C) 2025 ConnexAI",
    "creators": [
      {
        "affiliations": [
          {
            "name": "ConnexAI"
          }
        ],
        "person_or_org": {
          "family_name": "Ravenscroft",
          "given_name": "William",
          "identifiers": [
            {
              "identifier": "0000-0002-0780-3303",
              "scheme": "orcid"
            }
          ],
          "name": "Ravenscroft, William",
          "type": "personal"
        },
        "role": {
          "id": "contactperson",
          "title": {
            "de": "Kontaktperson",
            "en": "Contact person"
          }
        }
      },
      {
        "affiliations": [
          {
            "name": "ConnexAI"
          }
        ],
        "person_or_org": {
          "family_name": "Close",
          "given_name": "George",
          "identifiers": [
            {
              "identifier": "0000-0002-9478-5421",
              "scheme": "orcid"
            }
          ],
          "name": "Close, George",
          "type": "personal"
        }
      },
      {
        "affiliations": [
          {
            "name": "ConnexAI"
          }
        ],
        "person_or_org": {
          "family_name": "Stacey",
          "given_name": "Jamie",
          "name": "Stacey, Jamie",
          "type": "personal"
        }
      },
      {
        "affiliations": [
          {
            "name": "ConnexAI"
          }
        ],
        "person_or_org": {
          "family_name": "Kit",
          "given_name": "Bower-Morris",
          "name": "Kit, Bower-Morris",
          "type": "personal"
        }
      },
      {
        "affiliations": [
          {
            "name": "ConnexAI"
          }
        ],
        "person_or_org": {
          "family_name": "Sityaev",
          "given_name": "Dmitry",
          "name": "Sityaev, Dmitry",
          "type": "personal"
        }
      },
      {
        "affiliations": [
          {
            "name": "ConnexAI"
          }
        ],
        "person_or_org": {
          "family_name": "Hong",
          "given_name": "Kris Y.",
          "name": "Hong, Kris Y.",
          "type": "personal"
        }
      },
      {
        "affiliations": [
          {
            "name": "ConnexAI"
          }
        ],
        "person_or_org": {
          "name": "ConnexAI",
          "type": "organizational"
        }
      }
    ],
    "dates": [
      {
        "date": "2025-05-19",
        "description": "Interspeech 2025",
        "type": {
          "id": "accepted",
          "title": {
            "de": "Angenommen",
            "en": "Accepted"
          }
        }
      }
    ],
    "description": "<h2>The Annotated In-The-Wild (AITW) Dataset for Filtering of In-the-Wild Speech Data (v1.1)</h2>\n<h3><em>Version 1.1: Some missing entries in the Emilia file map have been updated and a number of YODAS archive URLs have been corrected.</em></h3>\n<p>The Annotated In-The-Wild (AITW) dataset accompanies the paper &ldquo;Whilter: A Whisper-based Data Filter for &lsquo;in-the-wild&rsquo; Speech Corpora Using Utterance-level Multi-Task Classification&rdquo;, accepted at Interspeech 2025 in Rotterdam. This dataset supports research into automated filtering of noisy or undesirable audio segments in large-scale, real-world speech corpora, particularly for training high-quality English TTS and ASR models.</p>\n<p>AITW includes over 21,000 manually labeled audio samples (&asymp;64 hours) from two popular in-the-wild speech datasets (Emilia and YODAS). Each audio clip is annotated at the utterance level with binary or numerical labels for five key properties.</p>\n<p><strong>Numerical labels:</strong></p>\n<ul>\n<li>Speaker count</li>\n</ul>\n<p><strong>Binary labels:</strong></p>\n<ul>\n<li>Non-English (foreign) language</li>\n<li>Background music</li>\n<li>Noisy or poor-quality speech</li>\n<li>Synthetic (spoofed) speech</li>\n</ul>\n<p>Annotations were performed by expert annotators using a custom Label Studio interface, with consistent guidelines applied across all tasks. This dataset enables the benchmarking of multi-task classification models like Whilter and comparison with single-task baselines.</p>\n<p>AITW is designed to foster further research in scalable speech data curation and low-resource dataset bootstrapping. We encourage contributions and improvements through the included Label Studio GUI.</p>\n<p><strong>Files include:</strong></p>\n<ul>\n<li>Labeled audio metadata along with file maps which map back to the data in YODAS and Emilia (.csv or .json)</li>\n<li>Interface config for Label Studio (.xml)</li>\n</ul>\n<p><strong>If you use this dataset, please cite:</strong></p>\n<blockquote>\n<p><em>W. Ravenscroft, G. Close, K. Bower-Morris, J. Stacey, D. Sityaev, K. Hong. &ldquo;Whilter: A Whisper-based Data Filter for &lsquo;in-the-wild&rsquo; Speech Corpora Using Utterance-level Multi-Task Classification,&rdquo; Interspeech 2025.</em></p>\n</blockquote>\n<p><strong>License</strong>:</p>\n<p>Creative Commons Attribution 4.0 International (CC BY 4.0)</p>\n<p>&nbsp;</p>",
    "languages": [
      {
        "id": "eng",
        "title": {
          "en": "English"
        }
      },
      {
        "id": "mul",
        "title": {
          "en": "Multiple languages"
        }
      }
    ],
    "publication_date": "2025-08-17",
    "publisher": "Zenodo",
    "references": [
      {
        "reference": "X. Li, S. Takamichi, T. Saeki, W. Chen, S. Shiota and S. Watanabe, \"Yodas: Youtube-Oriented Dataset for Audio and Speech,\" 2023 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU), Taipei, Taiwan, 2023, pp. 1-8, doi: 10.1109/ASRU57964.2023.10389689. keywords: {Training;Video on demand;Conferences;Pipelines;Self-supervised learning;Manuals;Data collection;multilingual speech processing;speech recognition;large-scale speech dataset},"
      },
      {
        "reference": "H. He et al., \"Emilia: An Extensive, Multilingual, and Diverse Speech Dataset For Large-Scale Speech Generation,\" 2024 IEEE Spoken Language Technology Workshop (SLT), Macao, 2024, pp. 885-890, doi: 10.1109/SLT61566.2024.10832365. keywords: {Training;Technological innovation;Annotations;Conferences;Pipelines;Training data;Transforms;Speech;Data models;Multilingual;Extensive Multilingual and Diverse Dataset;Large-scale Speech Generation},"
      }
    ],
    "resource_type": {
      "id": "dataset",
      "title": {
        "de": "Datensatz",
        "en": "Dataset"
      }
    },
    "rights": [
      {
        "description": {
          "en": "The Creative Commons Attribution license allows re-distribution and re-use of a licensed work on the condition that the creator is appropriately credited."
        },
        "icon": "cc-by-icon",
        "id": "cc-by-4.0",
        "props": {
          "scheme": "spdx",
          "url": "https://creativecommons.org/licenses/by/4.0/legalcode"
        },
        "title": {
          "en": "Creative Commons Attribution 4.0 International"
        }
      }
    ],
    "subjects": [
      {
        "subject": "Audio Classification"
      },
      {
        "subject": "In-The-Wild Data"
      },
      {
        "subject": "Speech Datasets"
      }
    ],
    "title": "AITW: The Annotated In-the-Wild Dataset for Filtering of In-the-Wild Speech Data",
    "version": "1.1"
  },
  "parent": {
    "access": {
      "owned_by": {
        "user": "1330242"
      },
      "settings": {
        "accept_conditions_text": null,
        "allow_guest_requests": false,
        "allow_user_requests": false,
        "secret_link_expiration": 0
      }
    },
    "communities": {},
    "id": "15534661",
    "pids": {
      "doi": {
        "client": "datacite",
        "identifier": "10.5281/zenodo.15534661",
        "provider": "datacite"
      }
    }
  },
  "pids": {
    "doi": {
      "client": "datacite",
      "identifier": "10.5281/zenodo.16937418",
      "provider": "datacite"
    },
    "oai": {
      "identifier": "oai:zenodo.org:16937418",
      "provider": "oai"
    }
  },
  "revision_id": 4,
  "stats": {
    "all_versions": {
      "data_volume": 379812275.0,
      "downloads": 315,
      "unique_downloads": 266,
      "unique_views": 217,
      "views": 258
    },
    "this_version": {
      "data_volume": 135972517.0,
      "downloads": 139,
      "unique_downloads": 122,
      "unique_views": 67,
      "views": 82
    }
  },
  "status": "published",
  "swh": {},
  "updated": "2025-08-26T08:01:20.952435+00:00",
  "versions": {
    "index": 2,
    "is_latest": true
  }
}