Dataset Open Access

Top Quark Tagging Reference Dataset

Kasieczka, Gregor; Plehn, Tilman; Thompson, Jennifer; Russel, Michael


JSON Export

{
  "files": [
    {
      "links": {
        "self": "https://zenodo.org/api/files/5cb4ec3f-5a6b-4674-ad4a-978e46d02736/test.h5"
      }, 
      "checksum": "md5:13163479dee30a5fe546e4536cc3d04d", 
      "bucket": "5cb4ec3f-5a6b-4674-ad4a-978e46d02736", 
      "key": "test.h5", 
      "type": "h5", 
      "size": 347849376
    }, 
    {
      "links": {
        "self": "https://zenodo.org/api/files/5cb4ec3f-5a6b-4674-ad4a-978e46d02736/train.h5"
      }, 
      "checksum": "md5:45663819f47c13724f67eb0fd80bfa5c", 
      "bucket": "5cb4ec3f-5a6b-4674-ad4a-978e46d02736", 
      "key": "train.h5", 
      "type": "h5", 
      "size": 1038496555
    }, 
    {
      "links": {
        "self": "https://zenodo.org/api/files/5cb4ec3f-5a6b-4674-ad4a-978e46d02736/val.h5"
      }, 
      "checksum": "md5:dca4b7248027618f041f9baa86d360fc", 
      "bucket": "5cb4ec3f-5a6b-4674-ad4a-978e46d02736", 
      "key": "val.h5", 
      "type": "h5", 
      "size": 347378076
    }
  ], 
  "owners": [
    63414
  ], 
  "doi": "10.5281/zenodo.2603256", 
  "stats": {
    "version_unique_downloads": 1526.0, 
    "unique_views": 2365.0, 
    "views": 2652.0, 
    "version_views": 2652.0, 
    "unique_downloads": 1526.0, 
    "version_unique_views": 2365.0, 
    "volume": 1766894573145.0, 
    "version_downloads": 3145.0, 
    "downloads": 3145.0, 
    "version_volume": 1766894573145.0
  }, 
  "links": {
    "doi": "https://doi.org/10.5281/zenodo.2603256", 
    "conceptdoi": "https://doi.org/10.5281/zenodo.2603255", 
    "bucket": "https://zenodo.org/api/files/5cb4ec3f-5a6b-4674-ad4a-978e46d02736", 
    "conceptbadge": "https://zenodo.org/badge/doi/10.5281/zenodo.2603255.svg", 
    "html": "https://zenodo.org/record/2603256", 
    "latest_html": "https://zenodo.org/record/2603256", 
    "badge": "https://zenodo.org/badge/doi/10.5281/zenodo.2603256.svg", 
    "latest": "https://zenodo.org/api/records/2603256"
  }, 
  "conceptdoi": "10.5281/zenodo.2603255", 
  "created": "2019-03-22T20:44:04.822901+00:00", 
  "updated": "2020-01-24T19:24:56.491003+00:00", 
  "conceptrecid": "2603255", 
  "revision": 4, 
  "id": 2603256, 
  "metadata": {
    "access_right_category": "success", 
    "doi": "10.5281/zenodo.2603256", 
    "description": "<p>A set of MC simulated training/testing events for the evaluation of top quark tagging architectures.</p>\n\n<p>In total 1.2M training events, 400k validation events and 400k test events. Use &ldquo;train&rdquo; for training, &ldquo;val&rdquo; for validation during the training and &ldquo;test&rdquo; for final testing and reporting results.</p>\n\n<p><strong>Description</strong></p>\n\n<ul>\n\t<li>\n\t<p>14 TeV, hadronic tops for signal, qcd diets background, Delphes ATLAS detector card with Pythia8</p>\n\t</li>\n\t<li>\n\t<p>No MPI/pile-up included</p>\n\t</li>\n\t<li>\n\t<p>Clustering of&nbsp; particle-flow entries (produced by Delphes E-flow) into anti-kT 0.8 jets in the pT range [550,650] GeV</p>\n\t</li>\n\t<li>\n\t<p>All top jets are matched to a parton-level top within \u2206R = 0.8, and to all top decay partons within 0.8</p>\n\t</li>\n\t<li>\n\t<p>Jets are required to have |eta| &lt; 2</p>\n\t</li>\n\t<li>\n\t<p>The leading 200 jet constituent four-momenta are stored, with zero-padding for jets with fewer than 200</p>\n\t</li>\n\t<li>\n\t<p>Constituents are sorted by pT, with the highest pT one first</p>\n\t</li>\n\t<li>\n\t<p>The truth top four-momentum is stored as truth_px etc.</p>\n\t</li>\n\t<li>\n\t<p>A flag (1 for top, 0 for QCD) is kept for each jet. It is called is_signal_new</p>\n\t</li>\n\t<li>\n\t<p>The variable &quot;ttv&quot; (= test/train/validation) is kept for each jet. It indicates to which dataset the jet belongs. It is redundant as the different sets are already distributed as different files.</p>\n\t</li>\n</ul>", 
    "license": {
      "id": "CC-BY-4.0"
    }, 
    "title": "Top Quark Tagging Reference Dataset", 
    "relations": {
      "version": [
        {
          "count": 1, 
          "index": 0, 
          "parent": {
            "pid_type": "recid", 
            "pid_value": "2603255"
          }, 
          "is_last": true, 
          "last_child": {
            "pid_type": "recid", 
            "pid_value": "2603256"
          }
        }
      ]
    }, 
    "version": "v0 (2018_03_27)", 
    "references": [
      "Butter, Anja; Kasieczka, Gregor; Plehn, Tilman and Russell, Michael (2017). Based on data from 10.21468/SciPostPhys.5.3.028 (1707.08966)", 
      "Kasieczka, Gregor et al (2019). Dataset used for arXiv:1902.09914 (The Machine Learning Landscape of Top Taggers)"
    ], 
    "publication_date": "2019-03-22", 
    "creators": [
      {
        "affiliation": "Institut f\u00fcr Experimentalphysik, Universit\u00e4t Hamburg, Germany", 
        "name": "Kasieczka, Gregor"
      }, 
      {
        "affiliation": "Institut f\u00fcr Theoretische Physik, Universit\u00e4t Heidelberg, Germany", 
        "name": "Plehn, Tilman"
      }, 
      {
        "affiliation": "Institut f\u00fcr Theoretische Physik, Universit\u00e4t Heidelberg, Germany", 
        "name": "Thompson, Jennifer"
      }, 
      {
        "affiliation": "Institut f\u00fcr Theoretische Physik, Universit\u00e4t Heidelberg, Germany", 
        "name": "Russel, Michael"
      }
    ], 
    "access_right": "open", 
    "resource_type": {
      "type": "dataset", 
      "title": "Dataset"
    }, 
    "related_identifiers": [
      {
        "scheme": "doi", 
        "identifier": "10.5281/zenodo.2603255", 
        "relation": "isVersionOf"
      }
    ]
  }
}
2,652
3,145
views
downloads
All versions This version
Views 2,6522,652
Downloads 3,1453,145
Data volume 1.8 TB1.8 TB
Unique views 2,3652,365
Unique downloads 1,5261,526

Share

Cite as