{
  "access": {
    "embargo": {
      "active": false,
      "reason": null
    },
    "files": "public",
    "record": "public",
    "status": "open"
  },
  "created": "2024-04-06T14:47:44.398555+00:00",
  "custom_fields": {
    "code:codeRepository": "https://github.com/partha2409/DCASE2024_seld_baseline"
  },
  "deletion_status": {
    "is_deleted": false,
    "status": "P"
  },
  "files": {
    "count": 5,
    "enabled": true,
    "entries": {
      "DCASE Task 3 synthetic dataset 2024.zip.001": {
        "access": {
          "hidden": false
        },
        "checksum": "md5:c651f4a9670326097361763015d955f1",
        "ext": "001",
        "id": "4b92aa31-31c8-414b-8486-f1b86647b4e8",
        "key": "DCASE Task 3 synthetic dataset 2024.zip.001",
        "links": {
          "content": "https://zenodo.org/api/records/10932241/files/DCASE%20Task%203%20synthetic%20dataset%202024.zip.001/content",
          "self": "https://zenodo.org/api/records/10932241/files/DCASE%20Task%203%20synthetic%20dataset%202024.zip.001"
        },
        "metadata": null,
        "mimetype": "application/octet-stream",
        "size": 4697620480,
        "storage_class": "L"
      },
      "DCASE Task 3 synthetic dataset 2024.zip.002": {
        "access": {
          "hidden": false
        },
        "checksum": "md5:3199250c0c7fa718888f74a7d3325400",
        "ext": "002",
        "id": "4261079f-1d6b-450c-8085-b83861530c33",
        "key": "DCASE Task 3 synthetic dataset 2024.zip.002",
        "links": {
          "content": "https://zenodo.org/api/records/10932241/files/DCASE%20Task%203%20synthetic%20dataset%202024.zip.002/content",
          "self": "https://zenodo.org/api/records/10932241/files/DCASE%20Task%203%20synthetic%20dataset%202024.zip.002"
        },
        "metadata": null,
        "mimetype": "application/octet-stream",
        "size": 4697620480,
        "storage_class": "L"
      },
      "DCASE Task 3 synthetic dataset 2024.zip.003": {
        "access": {
          "hidden": false
        },
        "checksum": "md5:203df900f0275d8c34f04fa508b60a85",
        "ext": "003",
        "id": "6702cba5-6833-4fc9-bc8d-f176a6c7fa52",
        "key": "DCASE Task 3 synthetic dataset 2024.zip.003",
        "links": {
          "content": "https://zenodo.org/api/records/10932241/files/DCASE%20Task%203%20synthetic%20dataset%202024.zip.003/content",
          "self": "https://zenodo.org/api/records/10932241/files/DCASE%20Task%203%20synthetic%20dataset%202024.zip.003"
        },
        "metadata": null,
        "mimetype": "application/octet-stream",
        "size": 4697620480,
        "storage_class": "L"
      },
      "DCASE Task 3 synthetic dataset 2024.zip.004": {
        "access": {
          "hidden": false
        },
        "checksum": "md5:12306d0d398327847734896eae3986c1",
        "ext": "004",
        "id": "25aa976b-80d6-4e2a-acc5-bb67808e11ef",
        "key": "DCASE Task 3 synthetic dataset 2024.zip.004",
        "links": {
          "content": "https://zenodo.org/api/records/10932241/files/DCASE%20Task%203%20synthetic%20dataset%202024.zip.004/content",
          "self": "https://zenodo.org/api/records/10932241/files/DCASE%20Task%203%20synthetic%20dataset%202024.zip.004"
        },
        "metadata": null,
        "mimetype": "application/octet-stream",
        "size": 4697620480,
        "storage_class": "L"
      },
      "DCASE Task 3 synthetic dataset 2024.zip.005": {
        "access": {
          "hidden": false
        },
        "checksum": "md5:d1e53c4c21dab44a1a6a6c150bc90e3f",
        "ext": "005",
        "id": "35a8a6e2-7a0f-42be-b354-7ea4f18b7850",
        "key": "DCASE Task 3 synthetic dataset 2024.zip.005",
        "links": {
          "content": "https://zenodo.org/api/records/10932241/files/DCASE%20Task%203%20synthetic%20dataset%202024.zip.005/content",
          "self": "https://zenodo.org/api/records/10932241/files/DCASE%20Task%203%20synthetic%20dataset%202024.zip.005"
        },
        "metadata": null,
        "mimetype": "application/octet-stream",
        "size": 140056763,
        "storage_class": "L"
      }
    },
    "order": [],
    "total_bytes": 18930538683
  },
  "id": "10932241",
  "is_draft": false,
  "is_published": true,
  "links": {
    "access": "https://zenodo.org/api/records/10932241/access",
    "access_grants": "https://zenodo.org/api/records/10932241/access/grants",
    "access_links": "https://zenodo.org/api/records/10932241/access/links",
    "access_request": "https://zenodo.org/api/records/10932241/access/request",
    "access_users": "https://zenodo.org/api/records/10932241/access/users",
    "archive": "https://zenodo.org/api/records/10932241/files-archive",
    "archive_media": "https://zenodo.org/api/records/10932241/media-files-archive",
    "communities": "https://zenodo.org/api/records/10932241/communities",
    "communities-suggestions": "https://zenodo.org/api/records/10932241/communities-suggestions",
    "doi": "https://doi.org/10.5281/zenodo.10932241",
    "draft": "https://zenodo.org/api/records/10932241/draft",
    "file_modification": "https://zenodo.org/api/records/10932241/file-modification",
    "files": "https://zenodo.org/api/records/10932241/files",
    "latest": "https://zenodo.org/api/records/10932241/versions/latest",
    "latest_html": "https://zenodo.org/records/10932241/latest",
    "media_files": "https://zenodo.org/api/records/10932241/media-files",
    "parent": "https://zenodo.org/api/records/10932240",
    "parent_doi": "https://doi.org/10.5281/zenodo.10932240",
    "parent_doi_html": "https://zenodo.org/doi/10.5281/zenodo.10932240",
    "parent_html": "https://zenodo.org/records/10932240",
    "preview_html": "https://zenodo.org/records/10932241?preview=1",
    "quota_increase": "https://zenodo.org/api/records/10932241/quota-increase",
    "request_deletion": "https://zenodo.org/api/records/10932241/request-deletion",
    "requests": "https://zenodo.org/api/records/10932241/requests",
    "reserve_doi": "https://zenodo.org/api/records/10932241/draft/pids/doi",
    "self": "https://zenodo.org/api/records/10932241",
    "self_doi": "https://doi.org/10.5281/zenodo.10932241",
    "self_doi_html": "https://zenodo.org/doi/10.5281/zenodo.10932241",
    "self_html": "https://zenodo.org/records/10932241",
    "self_iiif_manifest": "https://zenodo.org/api/iiif/record:10932241/manifest",
    "self_iiif_sequence": "https://zenodo.org/api/iiif/record:10932241/sequence/default",
    "versions": "https://zenodo.org/api/records/10932241/versions"
  },
  "media_files": {
    "count": 0,
    "enabled": false,
    "entries": {},
    "order": [],
    "total_bytes": 0
  },
  "metadata": {
    "creators": [
      {
        "person_or_org": {
          "family_name": "Krause",
          "given_name": "Daniel Aleksander",
          "identifiers": [
            {
              "identifier": "0000-0003-2704-636X",
              "scheme": "orcid"
            }
          ],
          "name": "Krause, Daniel Aleksander",
          "type": "personal"
        }
      },
      {
        "person_or_org": {
          "family_name": "Politis",
          "given_name": "Archontis",
          "identifiers": [
            {
              "identifier": "0000-0002-0595-2356",
              "scheme": "orcid"
            }
          ],
          "name": "Politis, Archontis",
          "type": "personal"
        }
      }
    ],
    "description": "<p><strong>DESCRIPTION:</strong><br><br>This audio dataset serves serves as supplementary material for the&nbsp;<a href=\"https://dcase.community/challenge2024/task-audio-and-audiovisual-sound-event-localization-and-detection-with-source-distance-estimation\">DCASE2024 Challenge Task 3: Audio and Audiovisual Sound Event Localization and Detection with Distance Estimation</a>. The dataset consists of synthetic spatial audio mixtures of sound events spatialized for two different spatial formats using real measured room impulse responses (RIRs) measured in various spaces of Tampere University (TAU). The mixtures are generated using the same process as the one used to generate the recordings of the <a href=\"../record/5476980\">TAU-NIGENS Spatial Sound Scenes 2021</a>&nbsp;dataset for the&nbsp;<a href=\"https://dcase.community/challenge2021/task-sound-event-localization-and-detection-results\">DCASE2021 Challenge Task 3</a>.&nbsp;</p>\n<p>The SELD task setup in DCASE2024 is based on spatial recordings of real scenes, captured in the <a href=\"../records/7880637\">STARS23</a> dataset. Since the task setup allows use of external data, these synthetic mixtures serve as additional training material for the&nbsp;<a href=\"https://github.com/partha2409/DCASE2024_seld_baseline\">baseline model</a>. For more details on the task setup, please refer to the&nbsp;<a href=\"https://dcase.community/challenge2024/task-audio-and-audiovisual-sound-event-localization-and-detection-with-source-distance-estimation\">task description</a>.</p>\n<p>Note that the generator code and the collection of room responses used to spatialize sound samples will be also be made available soon. For more details on the recording of RIRs, spatialization, and generation, see:</p>\n<ul>\n<li>Archontis Politis, Sharath Adavanne, Daniel Krause, Antoine Deleforge, Prerak Srivastava, Tuomas Virtanen (2021).&nbsp;A Dataset of Dynamic Reverberant Sound Scenes with Directional Interferers for Sound Event Localization and Detection.&nbsp;In&nbsp;<em>Proceedings of the Detection and Classification of Acoustic Scenes and Events 2020 Workshop (DCASE2021)</em>, Barcelona, Spain.</li>\n</ul>\n<p>available&nbsp;<a href=\"https://dcase.community/documents/workshop2021/proceedings/DCASE2021Workshop_Politis_43.pdf\">here</a>.</p>\n<p><strong>SPECIFICATIONS:</strong></p>\n<ul>\n<li><strong>13 target sound classes</strong> (see task description for details)</li>\n<li>The sound event samples are sources from the&nbsp;<strong><a href=\"../record/4060432\">FSD50K</a></strong>&nbsp;dataset, based on affinity of the labels in that dataset to the target classes. The selection on distinguishing which labels in FSD50K corresponded to the target ones, then selecting samples that were tagged with only those labels, and additionally that they had annotator rating of Present and Predominant (see FSD50K for more details). The list of the selected files is included here.</li>\n<li><strong>1200</strong> 1-minute long spatial recordings</li>\n<li>Sampling rate of<strong> 24kHz</strong></li>\n<li>Two 4-channel recording formats, first-order Ambisonics (<strong>FOA</strong>) and tetrahedral microphone array (<strong>MIC</strong>)</li>\n<li>Spatial events spatialized in <strong>9 unique rooms</strong>, using measured RIRs for the two formats</li>\n<li>Maximum <strong>polyphony of 3</strong> (with possible same-class events overlapping)</li>\n<li>Even though the whole set is used for training of the baseline without distinction between the mixtures, we have included a <strong>separation into a training and testing split</strong>, in case on one needs to&nbsp;test&nbsp;the performance purely on those&nbsp;synthetic conditions (for example for comparisons with training on mixed synthetic-real data, fine-tuning on real data, or training on real data only).</li>\n<li>The training split is indicated as <strong>fold1</strong>&nbsp;in the dataset, contains 900 recordings spatialized on 6 rooms (150 recordings/room) and it is based on samples from the development set of FSD50K.</li>\n<li>The testing split is indicated as <strong>fold2</strong>&nbsp;in the dataset, contains 300 recordings spatialized on 3 rooms (100 recordings/room) and it is based on samples from the evaluation set of FSD50K.</li>\n<li>Common metadata files for both formats are provided. For the file naming and the metadata format, refer to the task setup.</li>\n</ul>\n<p>&nbsp;</p>\n<p><strong>DOWNLOAD INSTRUCTIONS:</strong></p>\n<p>Download the zip files and use your preferred compression tool to unzip these split zip files. To extract a split zip archive (named as zip, z01, z02, ...), you could use, for example, the following syntax in Linux or OSX terminal:</p>\n<ol>\n<li>Combine the split archive to a single archive:\n<pre>zip -s 0 split.zip --out single.zip</pre>\n</li>\n<li>Extract the single archive using unzip:\n<pre>unzip single.zip</pre>\n</li>\n</ol>",
    "publication_date": "2024-04-05",
    "publisher": "Zenodo",
    "references": [
      {
        "reference": "Archontis Politis, Sharath Adavanne, Daniel Krause, Antoine Deleforge, Prerak Srivastava, Tuomas Virtanen (2021). A Dataset of Dynamic Reverberant Sound Scenes with Directional Interferers for Sound Event Localization and Detection. In Proceedings of the Detection and Classification of Acoustic Scenes and Events 2020 Workshop (DCASE2021), Barcelona, Spain."
      }
    ],
    "related_identifiers": [
      {
        "identifier": "https://doi.org/10.5281/zenodo.6387880",
        "relation_type": {
          "id": "requires",
          "title": {
            "de": "Setzt voraus",
            "en": "Requires"
          }
        },
        "resource_type": {
          "id": "dataset",
          "title": {
            "de": "Datensatz",
            "en": "Dataset"
          }
        },
        "scheme": "other"
      },
      {
        "identifier": "https://zenodo.org/records/6406873",
        "relation_type": {
          "id": "isnewversionof",
          "title": {
            "de": "Ist eine neue Version von",
            "en": "Is new version of"
          }
        },
        "resource_type": {
          "id": "dataset",
          "title": {
            "de": "Datensatz",
            "en": "Dataset"
          }
        },
        "scheme": "other"
      }
    ],
    "resource_type": {
      "id": "dataset",
      "title": {
        "de": "Datensatz",
        "en": "Dataset"
      }
    },
    "rights": [
      {
        "description": {
          "en": "The Creative Commons Attribution license allows re-distribution and re-use of a licensed work on the condition that the creator is appropriately credited."
        },
        "icon": "cc-by-icon",
        "id": "cc-by-4.0",
        "props": {
          "scheme": "spdx",
          "url": "https://creativecommons.org/licenses/by/4.0/legalcode"
        },
        "title": {
          "en": "Creative Commons Attribution 4.0 International"
        }
      }
    ],
    "subjects": [
      {
        "subject": "sound event detection"
      },
      {
        "subject": "sound source localization"
      },
      {
        "subject": "spatial audio"
      },
      {
        "subject": "Ambisonics"
      },
      {
        "subject": "microphone array"
      },
      {
        "subject": "machine listening"
      },
      {
        "subject": "acoustic scene analysis"
      },
      {
        "subject": "sound distance estimation"
      }
    ],
    "title": "[DCASE2024 Task 3] Synthetic SELD mixtures for baseline training"
  },
  "parent": {
    "access": {
      "owned_by": {
        "user": "239209"
      },
      "settings": {
        "accept_conditions_text": null,
        "allow_guest_requests": false,
        "allow_user_requests": false,
        "secret_link_expiration": 0
      }
    },
    "communities": {},
    "id": "10932240",
    "pids": {
      "doi": {
        "client": "datacite",
        "identifier": "10.5281/zenodo.10932240",
        "provider": "datacite"
      }
    }
  },
  "pids": {
    "doi": {
      "client": "datacite",
      "identifier": "10.5281/zenodo.10932241",
      "provider": "datacite"
    },
    "oai": {
      "identifier": "oai:zenodo.org:10932241",
      "provider": "oai"
    }
  },
  "revision_id": 4,
  "stats": {
    "all_versions": {
      "data_volume": 25701623412332.0,
      "downloads": 6096,
      "unique_downloads": 2660,
      "unique_views": 1578,
      "views": 1675
    },
    "this_version": {
      "data_volume": 25701623412332.0,
      "downloads": 6096,
      "unique_downloads": 2660,
      "unique_views": 1578,
      "views": 1675
    }
  },
  "status": "published",
  "swh": {},
  "updated": "2024-04-06T14:47:44.721214+00:00",
  "versions": {
    "index": 1,
    "is_latest": true
  }
}