{
  "access": {
    "embargo": {
      "active": false,
      "reason": null
    },
    "files": "public",
    "record": "public",
    "status": "open"
  },
  "created": "2026-04-01T09:57:57.764624+00:00",
  "custom_fields": {},
  "deletion_status": {
    "is_deleted": false,
    "status": "P"
  },
  "files": {
    "count": 3,
    "enabled": true,
    "entries": {
      "32ch_audio_dev.zip": {
        "access": {
          "hidden": false
        },
        "checksum": "md5:6de3a12ddec8d344b4b599b413e4061e",
        "ext": "zip",
        "id": "82c01ae8-0694-428e-b526-ef5094de1098",
        "key": "32ch_audio_dev.zip",
        "links": {
          "content": "https://zenodo.org/api/records/18171005/files/32ch_audio_dev.zip/content",
          "self": "https://zenodo.org/api/records/18171005/files/32ch_audio_dev.zip"
        },
        "metadata": {},
        "mimetype": "application/zip",
        "size": 25040345245,
        "storage_class": "L"
      },
      "LICENSE": {
        "access": {
          "hidden": false
        },
        "checksum": "md5:78b83d885f9055bf6e383bba6d7eb596",
        "ext": "bin",
        "id": "6c0fe44f-92a0-456b-90b5-5f5937fb6747",
        "key": "LICENSE",
        "links": {
          "content": "https://zenodo.org/api/records/18171005/files/LICENSE/content",
          "self": "https://zenodo.org/api/records/18171005/files/LICENSE"
        },
        "metadata": {},
        "mimetype": "application/octet-stream",
        "size": 1195,
        "storage_class": "L"
      },
      "labels_dev.zip": {
        "access": {
          "hidden": false
        },
        "checksum": "md5:2f6f453b62c905a33518adfa0cc809cf",
        "ext": "zip",
        "id": "ec7c53d5-5c4f-4848-bfaf-ee1dafc0740e",
        "key": "labels_dev.zip",
        "links": {
          "content": "https://zenodo.org/api/records/18171005/files/labels_dev.zip/content",
          "self": "https://zenodo.org/api/records/18171005/files/labels_dev.zip"
        },
        "metadata": {},
        "mimetype": "application/zip",
        "size": 3643241017,
        "storage_class": "L"
      }
    },
    "order": [],
    "total_bytes": 28683587457
  },
  "id": "18171005",
  "is_draft": false,
  "is_published": true,
  "links": {
    "access": "https://zenodo.org/api/records/18171005/access",
    "access_grants": "https://zenodo.org/api/records/18171005/access/grants",
    "access_links": "https://zenodo.org/api/records/18171005/access/links",
    "access_request": "https://zenodo.org/api/records/18171005/access/request",
    "access_users": "https://zenodo.org/api/records/18171005/access/users",
    "archive": "https://zenodo.org/api/records/18171005/files-archive",
    "archive_media": "https://zenodo.org/api/records/18171005/media-files-archive",
    "communities": "https://zenodo.org/api/records/18171005/communities",
    "communities-suggestions": "https://zenodo.org/api/records/18171005/communities-suggestions",
    "doi": "https://doi.org/10.5281/zenodo.18171005",
    "draft": "https://zenodo.org/api/records/18171005/draft",
    "file_modification": "https://zenodo.org/api/records/18171005/file-modification",
    "files": "https://zenodo.org/api/records/18171005/files",
    "latest": "https://zenodo.org/api/records/18171005/versions/latest",
    "latest_html": "https://zenodo.org/records/18171005/latest",
    "media_files": "https://zenodo.org/api/records/18171005/media-files",
    "parent": "https://zenodo.org/api/records/18171004",
    "parent_doi": "https://doi.org/10.5281/zenodo.18171004",
    "parent_doi_html": "https://zenodo.org/doi/10.5281/zenodo.18171004",
    "parent_html": "https://zenodo.org/records/18171004",
    "preview_html": "https://zenodo.org/records/18171005?preview=1",
    "quota_increase": "https://zenodo.org/api/records/18171005/quota-increase",
    "request_deletion": "https://zenodo.org/api/records/18171005/request-deletion",
    "requests": "https://zenodo.org/api/records/18171005/requests",
    "reserve_doi": "https://zenodo.org/api/records/18171005/draft/pids/doi",
    "self": "https://zenodo.org/api/records/18171005",
    "self_doi": "https://doi.org/10.5281/zenodo.18171005",
    "self_doi_html": "https://zenodo.org/doi/10.5281/zenodo.18171005",
    "self_html": "https://zenodo.org/records/18171005",
    "self_iiif_manifest": "https://zenodo.org/api/iiif/record:18171005/manifest",
    "self_iiif_sequence": "https://zenodo.org/api/iiif/record:18171005/sequence/default",
    "versions": "https://zenodo.org/api/records/18171005/versions"
  },
  "media_files": {
    "count": 0,
    "enabled": false,
    "entries": {},
    "order": [],
    "total_bytes": 0
  },
  "metadata": {
    "creators": [
      {
        "person_or_org": {
          "family_name": "Roman",
          "given_name": "Iran R.",
          "identifiers": [
            {
              "identifier": "0000-0003-3781-7244",
              "scheme": "orcid"
            }
          ],
          "name": "Roman, Iran R.",
          "type": "personal"
        },
        "role": {
          "id": "researcher",
          "title": {
            "de": "WissenschaftlerIn",
            "en": "Researcher"
          }
        }
      },
      {
        "affiliations": [
          {
            "id": "033003e23",
            "identifiers": [
              {
                "identifier": "033003e23",
                "scheme": "ror"
              },
              {
                "identifier": "grid.502801.e",
                "scheme": "grid"
              },
              {
                "identifier": "0000 0001 2314 6254",
                "scheme": "isni"
              }
            ],
            "name": "Tampere University"
          }
        ],
        "person_or_org": {
          "family_name": "Politis",
          "given_name": "Archontis",
          "identifiers": [
            {
              "identifier": "0000-0002-0595-2356",
              "scheme": "orcid"
            }
          ],
          "name": "Politis, Archontis",
          "type": "personal"
        },
        "role": {
          "id": "researcher",
          "title": {
            "de": "WissenschaftlerIn",
            "en": "Researcher"
          }
        }
      },
      {
        "affiliations": [
          {
            "id": "04wzv3n59",
            "identifiers": [
              {
                "identifier": "04wzv3n59",
                "scheme": "ror"
              },
              {
                "identifier": "grid.410792.9",
                "scheme": "grid"
              },
              {
                "identifier": "0000 0004 1763 5918",
                "scheme": "isni"
              }
            ],
            "name": "Sony (Japan)"
          }
        ],
        "person_or_org": {
          "family_name": "Shimada",
          "given_name": "Kazuki",
          "identifiers": [
            {
              "identifier": "0000-0001-5389-2346",
              "scheme": "orcid"
            }
          ],
          "name": "Shimada, Kazuki",
          "type": "personal"
        },
        "role": {
          "id": "researcher",
          "title": {
            "de": "WissenschaftlerIn",
            "en": "Researcher"
          }
        }
      },
      {
        "person_or_org": {
          "family_name": "Cheston",
          "given_name": "Huw",
          "identifiers": [
            {
              "identifier": "0000-0001-7891-5532",
              "scheme": "orcid"
            }
          ],
          "name": "Cheston, Huw",
          "type": "personal"
        },
        "role": {
          "id": "researcher",
          "title": {
            "de": "WissenschaftlerIn",
            "en": "Researcher"
          }
        }
      },
      {
        "affiliations": [
          {
            "id": "033003e23",
            "identifiers": [
              {
                "identifier": "033003e23",
                "scheme": "ror"
              },
              {
                "identifier": "grid.502801.e",
                "scheme": "grid"
              },
              {
                "identifier": "0000 0001 2314 6254",
                "scheme": "isni"
              }
            ],
            "name": "Tampere University"
          }
        ],
        "person_or_org": {
          "family_name": "Sudarsanam",
          "given_name": "Parthasaarathy",
          "identifiers": [
            {
              "identifier": "0009-0009-3751-6469",
              "scheme": "orcid"
            }
          ],
          "name": "Sudarsanam, Parthasaarathy",
          "type": "personal"
        },
        "role": {
          "id": "researcher",
          "title": {
            "de": "WissenschaftlerIn",
            "en": "Researcher"
          }
        }
      },
      {
        "person_or_org": {
          "family_name": "D\u00cdAZ-GUERRA APARICIO",
          "given_name": "DAVID",
          "identifiers": [
            {
              "identifier": "0000-0002-1041-0498",
              "scheme": "orcid"
            }
          ],
          "name": "D\u00cdAZ-GUERRA APARICIO, DAVID",
          "type": "personal"
        },
        "role": {
          "id": "researcher",
          "title": {
            "de": "WissenschaftlerIn",
            "en": "Researcher"
          }
        }
      },
      {
        "affiliations": [
          {
            "id": "033003e23",
            "identifiers": [
              {
                "identifier": "033003e23",
                "scheme": "ror"
              },
              {
                "identifier": "grid.502801.e",
                "scheme": "grid"
              },
              {
                "identifier": "0000 0001 2314 6254",
                "scheme": "isni"
              }
            ],
            "name": "Tampere University"
          }
        ],
        "person_or_org": {
          "family_name": "Sun",
          "given_name": "Yifu",
          "name": "Sun, Yifu",
          "type": "personal"
        },
        "role": {
          "id": "researcher",
          "title": {
            "de": "WissenschaftlerIn",
            "en": "Researcher"
          }
        }
      },
      {
        "affiliations": [
          {
            "id": "04wzv3n59",
            "identifiers": [
              {
                "identifier": "04wzv3n59",
                "scheme": "ror"
              },
              {
                "identifier": "grid.410792.9",
                "scheme": "grid"
              },
              {
                "identifier": "0000 0004 1763 5918",
                "scheme": "isni"
              }
            ],
            "name": "Sony (Japan)"
          }
        ],
        "person_or_org": {
          "family_name": "Shibuya",
          "given_name": "Takashi",
          "identifiers": [
            {
              "identifier": "0000-0002-4277-0164",
              "scheme": "orcid"
            }
          ],
          "name": "Shibuya, Takashi",
          "type": "personal"
        },
        "role": {
          "id": "researcher",
          "title": {
            "de": "WissenschaftlerIn",
            "en": "Researcher"
          }
        }
      },
      {
        "affiliations": [
          {
            "name": "Sony Group Corporation"
          }
        ],
        "person_or_org": {
          "family_name": "Shusuke",
          "given_name": "Takahashi",
          "name": "Shusuke, Takahashi",
          "type": "personal"
        },
        "role": {
          "id": "researcher",
          "title": {
            "de": "WissenschaftlerIn",
            "en": "Researcher"
          }
        }
      },
      {
        "affiliations": [
          {
            "id": "04wzv3n59",
            "identifiers": [
              {
                "identifier": "04wzv3n59",
                "scheme": "ror"
              },
              {
                "identifier": "grid.410792.9",
                "scheme": "grid"
              },
              {
                "identifier": "0000 0004 1763 5918",
                "scheme": "isni"
              }
            ],
            "name": "Sony (Japan)"
          },
          {
            "id": "0112mx960",
            "identifiers": [
              {
                "identifier": "0112mx960",
                "scheme": "ror"
              },
              {
                "identifier": "grid.32197.3e",
                "scheme": "grid"
              },
              {
                "identifier": "0000 0001 2179 2105",
                "scheme": "isni"
              }
            ],
            "name": "Tokyo Institute of Technology"
          }
        ],
        "person_or_org": {
          "family_name": "MITSUFUJI",
          "given_name": "YUKI",
          "identifiers": [
            {
              "identifier": "0000-0002-6806-6140",
              "scheme": "orcid"
            }
          ],
          "name": "MITSUFUJI, YUKI",
          "type": "personal"
        },
        "role": {
          "id": "researcher",
          "title": {
            "de": "WissenschaftlerIn",
            "en": "Researcher"
          }
        }
      }
    ],
    "description": "<div dir=\"ltr\">\n<h3>Description</h3>\n<p><strong>STAIRS26</strong> (Sony-Tau Acoustic Images of Real-World Scapes) is a spatial audio dataset designed to benchmark <strong>Semantic Acoustic Imaging: </strong>the task of visualizing sound energy and identifying semantic sounding objects in space. This release serves as the development set for Task 3 of the DCASE 2026 Challenge.</p>\n<p>STAIRS26 fundamentally extends the legacy <strong>STARSS23</strong> dataset, shifting the paradigm from sparse point-based localization to dense acoustic field estimation. It upgrades the original real-world recordings (captured in Finland and Japan) with two critical features:</p>\n<ul>\n<li>\n<p><strong>32-Channel Raw Audio:</strong> Full microphone array signals enabling high-resolution beamforming and acoustic super-resolution.</p>\n</li>\n<li>\n<p><strong>Acoustic Radiance Maps:</strong> High-definition energy acoustic images that serve as ground-truth labels for training models to visually reconstruct acoustic fields.</p>\n</li>\n</ul>\n<p><em>(Note: For details on the physical recording setup, hardware specifications, and scene scripting, please refer to the original <a href=\"https://zenodo.org/records/7880637\"><strong>STARSS23 Dataset</strong></a>).</em></p>\n<h3>Aim</h3>\n<p>The primary goal of STAIRS26 is to train and evaluate models on acoustic super-resolution: reconstructing high-fidelity, class-aware energy maps from standard 4-channel inputs. By providing full 32-channel recordings and ground-truth images, the dataset enables researchers to:</p>\n<ul>\n<li>\n<p>Develop deep learning architectures that output dense polygon masks encoding event class, spatial location, and acoustic energy intensity.</p>\n</li>\n<li>\n<p>Evaluate high-resolution Direction-of-Arrival (DOA) estimation and multi-source tracking algorithms.</p>\n</li>\n<li>\n<p>Bridge audio signal processing with computer-vision-based semantic segmentation.</p>\n</li>\n</ul>\n<h3>Specifications</h3>\n<p><strong>Volume and Data Split</strong></p>\n<ul>\n<li>\n<p><strong>Size:</strong> ~7.5 hours of recordings across 168 development clips.</p>\n</li>\n<li>\n<p><strong>Scope:</strong> This release contains only the development data (audio and labels) used for training and validation.</p>\n</li>\n<li>\n<p><strong>Compatibility:</strong> File naming and splits are identical to <strong>STARSS23</strong>. To utilize the full multimodal suite, users should pair this dataset with the STARSS23 audio and video files.</p>\n</li>\n</ul>\n<p><strong>Audio Format</strong></p>\n<ul>\n<li>\n<p><strong>Sampling rate:</strong> 24 kHz</p>\n</li>\n<li>\n<p><strong>Bit depth:</strong> 16-bit</p>\n</li>\n<li>\n<p><strong>Format:</strong> 32-channel (raw Eigenmike recordings)</p>\n</li>\n</ul>\n<p><strong>Acoustic Maps (Labels)</strong></p>\n<p>High-definition acoustic images, generated via <em>proximal gradient descent</em> from the 32-channel recordings, are provided as individual <code>.json</code> files (one per recording).</p>\n<ul>\n<li>\n<p><strong>Structure:</strong> The <code>annotations</code> key contains a list of dictionaries. Each dictionary represents a single active sound object at a specific frame (10 FPS temporal resolution).</p>\n</li>\n<li>\n<p><strong>Multi-source Frames:</strong> If a frame contains multiple sources, multiple dictionaries are present. Silent frames have no annotations.</p>\n</li>\n<li>\n<p><strong>Metadata:</strong> Inherits frame indices and the 13 source classes from the DCASE2023/STARSS23 metadata <code>.csv</code> files.</p>\n</li>\n<li>\n<p><strong>Segmentation:</strong> Polygon masks are stored as an array of shape <code>(n_pixels, 3)</code>. Each row represents <code>[x, y, amplitude]</code>:</p>\n<ul>\n<li>\n<p><code>x</code> and <code>y</code>: Integer spatial coordinates corresponding to a 1-pixel-per-degree angular grid (<code>x &isin; [0, 359]</code>, <code>y &isin; [0, 179]</code>).</p>\n</li>\n<li>\n<p><code>amplitude</code>: Standardized acoustic energy intensity (<code>[0.0, 1.0]</code>), where <code>1.0</code> represents the loudest pixel within the entire training dataset.</p>\n</li>\n</ul>\n</li>\n</ul>\n<h3>File Downloads</h3>\n<ul>\n<li>\n<p><code>32ch_audio_dev.zip</code>: Development audio in the raw 32-channel Eigenmike format.</p>\n</li>\n<li>\n<p><code>labels_dev_std.zip</code>: Generated acoustic-image labels in <code>.json</code> format.</p>\n</li>\n</ul>\n<p><em>(Download and extract using standard compression utilities).</em></p>\n<h3>Citation</h3>\n<p>If you use this dataset, please cite the following:</p>\n<blockquote>\n<p>Roman, I. R., Politis, A., Shimada, K., Cheston, H., Sudarsanam, P., D&iacute;az-Guerra, D., Sun, Y., Shibuya, T., Takahashi, S., &amp; Mitsufuji, Y. (2026). <em>STAIRS26: Sony-Tau Acoustic Images of Real-World Scapes</em> [Data set]. Zenodo. <a href=\"https://doi.org/10.5281/zenodo.18171005\" target=\"_blank\" rel=\"noopener\">https://doi.org/10.5281/zenodo.18171005</a></p>\n</blockquote>\n<blockquote>\n<p>Shimada, K., et al. (2023). <em>STARSS23: An Audio-Visual Dataset of Spatial Recordings of Real Scenes with Spatiotemporal Annotations of Sound Events</em>. Advances in Neural Information Processing Systems 36 (NeurIPS 2023).</p>\n</blockquote>\n</div>",
    "publication_date": "2026-04-01",
    "publisher": "Zenodo",
    "references": [
      {
        "reference": "Archontis Politis, Sharath Adavanne, Daniel Krause, Antoine Deleforge, Prerak Srivastava, Tuomas Virtanen (2021). A Dataset of Dynamic Reverberant Sound Scenes with Directional Interferers for Sound Event Localization and Detection.  In Proceedings of the Detection and Classification of Acoustic Scenes and Events 2020 Workshop (DCASE2021), Barcelona, Spain."
      },
      {
        "reference": "Archontis Politis, Sharath Adavanne, and Tuomas Virtanen (2020). A Dataset of Reverberant Spatial Sound Scenes with Moving Sources for Sound Event Localization and Detection. In Proceedings of the Detection and Classification of Acoustic Scenes and Events 2020 Workshop (DCASE2020), Tokyo, Japan."
      },
      {
        "reference": "Sharath Adavanne, Archontis Politis, and Tuomas Virtanen (2019). A Multi-room reverberant dataset for sound event localization and detection. Proceedings of the Detection and Classification of Acoustic Scenes and Events 2019 Workshop (DCASE2019), New York, NY, USA."
      },
      {
        "reference": "Archontis Politis, Kazuki Shimada, Parthasaarathy Sudarsanam, Sharath Adavanne, Daniel Krause, Yuichiro Koyama, Naoya Takahashi, Shusuke Takahashi, Yuki Mitsufuji, Tuomas Virtanen (2022). STARSS22: A dataset of spatial recordings of real scenes with spatiotemporal annotations of sound events. In Proceedings of the Detection and Classification of Acoustic Scenes and Events 2022 Workshop (DCASE2022), Nancy, France."
      },
      {
        "reference": "Kazuki Shimada, Archontis Politis, Parthasaarathy Sudarsanam, Daniel Krause, Kengo Uchida, Sharath Adavanne, Aapo Hakala, Yuichiro Koyama, Naoya Takahashi, Shusuke Takahashi, Tuomas Virtanen, Yuki Mitsufuji (2023). STARSS23: An Audio-Visual Dataset of Spatial Recordings of Real Scenes with Spatiotemporal Annotations of Sound Events.\u00a0In\u00a0Advances in Neural Information Processing Systems 36 (NeurIPS 2023), New Orleans, LA."
      }
    ],
    "related_identifiers": [
      {
        "identifier": "10.5281/zenodo.7709052",
        "relation_type": {
          "id": "isnewversionof",
          "title": {
            "de": "Ist eine neue Version von",
            "en": "Is new version of"
          }
        },
        "resource_type": {
          "id": "dataset",
          "title": {
            "de": "Datensatz",
            "en": "Dataset"
          }
        },
        "scheme": "doi"
      },
      {
        "identifier": "10.5281/zenodo.6387880",
        "relation_type": {
          "id": "isnewversionof",
          "title": {
            "de": "Ist eine neue Version von",
            "en": "Is new version of"
          }
        },
        "resource_type": {
          "id": "dataset",
          "title": {
            "de": "Datensatz",
            "en": "Dataset"
          }
        },
        "scheme": "doi"
      }
    ],
    "resource_type": {
      "id": "dataset",
      "title": {
        "de": "Datensatz",
        "en": "Dataset"
      }
    },
    "rights": [
      {
        "description": {
          "en": "A short and simple permissive license with conditions only requiring preservation of copyright and license notices. Licensed works, modifications, and larger works may be distributed under different terms and without source code."
        },
        "id": "mit",
        "props": {
          "scheme": "spdx",
          "url": "https://opensource.org/licenses/MIT"
        },
        "title": {
          "en": "MIT License"
        }
      }
    ],
    "subjects": [
      {
        "subject": "sound event detection"
      },
      {
        "subject": "sound event localization"
      },
      {
        "subject": "spatial audio"
      },
      {
        "subject": "acoustic scene analysis"
      },
      {
        "subject": "machine listening"
      },
      {
        "subject": "Ambisonics"
      },
      {
        "subject": "acoustic imaging"
      }
    ],
    "title": "STAIRS26: Sony-Tau Acoustic Images of Real-World Scapes 2026"
  },
  "parent": {
    "access": {
      "owned_by": {
        "user": "523206"
      },
      "settings": {
        "accept_conditions_text": null,
        "allow_guest_requests": false,
        "allow_user_requests": false,
        "secret_link_expiration": 0
      }
    },
    "communities": {
      "default": "2d7be671-0ce3-4c1e-9ed3-cfc92571ad1a",
      "entries": [
        {
          "access": {
            "member_policy": "open",
            "members_visibility": "public",
            "record_submission_policy": "open",
            "review_policy": "open",
            "visibility": "public"
          },
          "children": {
            "allow": false
          },
          "created": "2019-11-25T09:04:41.826171+00:00",
          "custom_fields": {},
          "deletion_status": {
            "is_deleted": false,
            "status": "P"
          },
          "id": "2d7be671-0ce3-4c1e-9ed3-cfc92571ad1a",
          "links": {},
          "metadata": {
            "curation_policy": "",
            "description": "Datasets related to the DCASE challenges.",
            "page": "<p>The goal of this communty is to gather datasets related to the DCASE challenge series. More information available on <a href=\"http://dcase.community/\">DCASE website</a>.</p>\r\n",
            "title": "Detection and Classification of Acoustic Scenes and Events"
          },
          "revision_id": 0,
          "slug": "dcase",
          "updated": "2019-11-25T09:06:23.594750+00:00"
        }
      ],
      "ids": [
        "2d7be671-0ce3-4c1e-9ed3-cfc92571ad1a"
      ]
    },
    "id": "18171004",
    "pids": {
      "doi": {
        "client": "datacite",
        "identifier": "10.5281/zenodo.18171004",
        "provider": "datacite"
      }
    }
  },
  "pids": {
    "doi": {
      "client": "datacite",
      "identifier": "10.5281/zenodo.18171005",
      "provider": "datacite"
    },
    "oai": {
      "identifier": "oai:zenodo.org:18171005",
      "provider": "oai"
    }
  },
  "revision_id": 4,
  "stats": {
    "all_versions": {
      "data_volume": 3638066944314.0,
      "downloads": 321,
      "unique_downloads": 207,
      "unique_views": 165,
      "views": 195
    },
    "this_version": {
      "data_volume": 3638066944314.0,
      "downloads": 321,
      "unique_downloads": 207,
      "unique_views": 165,
      "views": 195
    }
  },
  "status": "published",
  "swh": {},
  "updated": "2026-04-01T09:57:58.073449+00:00",
  "versions": {
    "index": 1,
    "is_latest": true
  }
}