Dataset Open Access

ITOP Dataset

Haque, Albert; Peng, Boya; Luo, Zelun; Alahi, Alexandre; Yeung, Serena; Fei-Fei, Li


DCAT Export

<?xml version='1.0' encoding='utf-8'?>
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:adms="http://www.w3.org/ns/adms#" xmlns:cnt="http://www.w3.org/2011/content#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dct="http://purl.org/dc/terms/" xmlns:dctype="http://purl.org/dc/dcmitype/" xmlns:dcat="http://www.w3.org/ns/dcat#" xmlns:duv="http://www.w3.org/ns/duv#" xmlns:foaf="http://xmlns.com/foaf/0.1/" xmlns:frapo="http://purl.org/cerif/frapo/" xmlns:geo="http://www.w3.org/2003/01/geo/wgs84_pos#" xmlns:gsp="http://www.opengis.net/ont/geosparql#" xmlns:locn="http://www.w3.org/ns/locn#" xmlns:org="http://www.w3.org/ns/org#" xmlns:owl="http://www.w3.org/2002/07/owl#" xmlns:prov="http://www.w3.org/ns/prov#" xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#" xmlns:schema="http://schema.org/" xmlns:skos="http://www.w3.org/2004/02/skos/core#" xmlns:vcard="http://www.w3.org/2006/vcard/ns#" xmlns:wdrs="http://www.w3.org/2007/05/powder-s#">
  <rdf:Description rdf:about="https://doi.org/10.5281/zenodo.3932973">
    <rdf:type rdf:resource="http://www.w3.org/ns/dcat#Dataset"/>
    <dct:type rdf:resource="http://purl.org/dc/dcmitype/Dataset"/>
    <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#anyURI">https://doi.org/10.5281/zenodo.3932973</dct:identifier>
    <foaf:page rdf:resource="https://doi.org/10.5281/zenodo.3932973"/>
    <dct:creator>
      <rdf:Description rdf:about="http://orcid.org/0000-0001-6769-6370">
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>Haque, Albert</foaf:name>
        <foaf:givenName>Albert</foaf:givenName>
        <foaf:familyName>Haque</foaf:familyName>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>Stanford University</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:creator>
      <rdf:Description>
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>Peng, Boya</foaf:name>
        <foaf:givenName>Boya</foaf:givenName>
        <foaf:familyName>Peng</foaf:familyName>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>Stanford University</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:creator>
      <rdf:Description>
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>Luo, Zelun</foaf:name>
        <foaf:givenName>Zelun</foaf:givenName>
        <foaf:familyName>Luo</foaf:familyName>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>Stanford University</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:creator>
      <rdf:Description>
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>Alahi, Alexandre</foaf:name>
        <foaf:givenName>Alexandre</foaf:givenName>
        <foaf:familyName>Alahi</foaf:familyName>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>Stanford University</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:creator>
      <rdf:Description rdf:about="http://orcid.org/0000-0003-0529-0628">
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>Yeung, Serena</foaf:name>
        <foaf:givenName>Serena</foaf:givenName>
        <foaf:familyName>Yeung</foaf:familyName>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>Stanford University</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:creator>
      <rdf:Description rdf:about="http://orcid.org/0000-0002-7481-0810">
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>Fei-Fei, Li</foaf:name>
        <foaf:givenName>Li</foaf:givenName>
        <foaf:familyName>Fei-Fei</foaf:familyName>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>Stanford University</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:title>ITOP Dataset</dct:title>
    <dct:publisher>
      <foaf:Agent>
        <foaf:name>Zenodo</foaf:name>
      </foaf:Agent>
    </dct:publisher>
    <dct:issued rdf:datatype="http://www.w3.org/2001/XMLSchema#gYear">2016</dct:issued>
    <dcat:keyword>depth sensor</dcat:keyword>
    <dcat:keyword>human pose estimation</dcat:keyword>
    <dcat:keyword>computer vision</dcat:keyword>
    <dcat:keyword>3D vision</dcat:keyword>
    <dct:issued rdf:datatype="http://www.w3.org/2001/XMLSchema#date">2016-10-08</dct:issued>
    <dct:language rdf:resource="http://publications.europa.eu/resource/authority/language/ENG"/>
    <owl:sameAs rdf:resource="https://zenodo.org/record/3932973"/>
    <adms:identifier>
      <adms:Identifier>
        <skos:notation rdf:datatype="http://www.w3.org/2001/XMLSchema#anyURI">https://zenodo.org/record/3932973</skos:notation>
      </adms:Identifier>
    </adms:identifier>
    <dct:relation rdf:resource="http://arxiv.org/abs/1603.07076"/>
    <dct:isVersionOf rdf:resource="https://doi.org/10.5281/zenodo.3932972"/>
    <owl:versionInfo>1.0</owl:versionInfo>
    <dct:description>&lt;p&gt;&lt;strong&gt;Summary&lt;/strong&gt;&lt;/p&gt; &lt;p&gt;The ITOP dataset (Invariant Top View) contains 100K depth images from side and top views of a person in a scene. For each image, the location of 15 human body parts are labeled with 3-dimensional (x,y,z) coordinates, relative to the sensor&amp;#39;s position. Read the full paper for more context [&lt;a href="https://arxiv.org/pdf/1603.07076.pdf"&gt;pdf&lt;/a&gt;].&lt;/p&gt; &lt;p&gt;&lt;strong&gt;Getting Started&lt;/strong&gt;&lt;/p&gt; &lt;p&gt;Download then decompress the h5.gz file.&lt;/p&gt; &lt;pre&gt;&lt;code class="language-bash"&gt;gunzip ITOP_side_test_depth_map.h5.gz&lt;/code&gt;&lt;/pre&gt; &lt;p&gt;Using Python and &lt;a href="https://www.h5py.org/"&gt;h5py&lt;/a&gt; (&lt;em&gt;pip install h5py&lt;/em&gt; or &lt;em&gt;conda install h5py&lt;/em&gt;), we can load the contents:&lt;/p&gt; &lt;pre&gt;&lt;code class="language-python"&gt;import h5py import numpy as np f = h5py.File('ITOP_side_test_depth_map.h5', 'r') data, ids = f.get('data'), f.get('id') data, ids = np.asarray(data), np.asarray(ids) print(data.shape, ids.shape) # (10501, 240, 320) (10501,)&lt;/code&gt;&lt;/pre&gt; &lt;p&gt;&lt;strong&gt;Note:&lt;/strong&gt; For any of the &lt;em&gt;*_images.h5.gz&lt;/em&gt; files, the underlying file is a tar file and not a h5 file. Please rename the file extension from &lt;em&gt;h5.gz&lt;/em&gt; to &lt;em&gt;tar.gz&lt;/em&gt; before opening. The following commands will work:&lt;/p&gt; &lt;pre&gt;&lt;code class="language-bash"&gt;mv ITOP_side_test_images.h5.gz ITOP_side_test_images.tar.gz tar xf ITOP_side_test_images.tar.gz&lt;/code&gt;&lt;/pre&gt; &lt;p&gt;&lt;strong&gt;Metadata&lt;/strong&gt;&lt;/p&gt; &lt;p&gt;File sizes for images, depth maps, point clouds, and labels refer to the uncompressed size.&lt;/p&gt; &lt;pre&gt;&lt;code&gt;+-------+--------+---------+---------+----------+------------+--------------+---------+ | View | Split | Frames | People | Images | Depth Map | Point Cloud | Labels | +-------+--------+---------+---------+----------+------------+--------------+---------+ | Side | Train | 39,795 | 16 | 1.1 GiB | 5.7 GiB | 18 GiB | 2.9 GiB | | Side | Test | 10,501 | 4 | 276 MiB | 1.6 GiB | 4.6 GiB | 771 MiB | | Top | Train | 39,795 | 16 | 974 MiB | 5.7 GiB | 18 GiB | 2.9 GiB | | Top | Test | 10,501 | 4 | 261 MiB | 1.6 GiB | 4.6 GiB | 771 MiB | +-------+--------+---------+---------+----------+------------+--------------+---------+&lt;/code&gt;&lt;/pre&gt; &lt;p&gt;&lt;strong&gt;Data Schema&lt;/strong&gt;&lt;/p&gt; &lt;p&gt;Each file contains several HDF5 datasets at the root level. Dimensions, attributes, and data types are listed below. The key refers to the (HDF5) dataset name. Let &lt;span class="math-tex"&gt;\(n\)&lt;/span&gt; denote the number of images.&lt;br&gt; &lt;br&gt; &lt;strong&gt;Transformation&lt;/strong&gt;&lt;/p&gt; &lt;p&gt;To convert from point clouds to a&amp;nbsp;&lt;span class="math-tex"&gt;\(240 \times 320\)&lt;/span&gt; image, the following transformations were used. Let&amp;nbsp;&lt;span class="math-tex"&gt;\(x_{\textrm{img}}\)&lt;/span&gt; and&amp;nbsp;&lt;span class="math-tex"&gt;\(y_{\textrm{img}}\)&lt;/span&gt; denote the&amp;nbsp;&lt;span class="math-tex"&gt;\((x,y)\)&lt;/span&gt; coordinate in the image plane. Using the raw point cloud&amp;nbsp;&lt;span class="math-tex"&gt;\((x,y,z)\)&lt;/span&gt; real world coordinates, we compute the depth map as follows:&amp;nbsp;&lt;span class="math-tex"&gt;\(x_{\textrm{img}} = \frac{x}{Cz} + 160\)&lt;/span&gt; and&amp;nbsp;&lt;span class="math-tex"&gt;\(y_{\textrm{img}} = -\frac{y}{Cz} + 120\)&lt;/span&gt; where &lt;span class="math-tex"&gt;\(C\approx 3.50×10^{−3} = 0.0035\)&lt;/span&gt; is the intrinsic camera calibration parameter. This results in the depth map:&amp;nbsp;&lt;span class="math-tex"&gt;\((x_{\textrm{img}}, y_{\textrm{img}}, z)\)&lt;/span&gt;.&lt;/p&gt; &lt;p&gt;&lt;strong&gt;Joint ID (Index) Mapping&lt;/strong&gt;&lt;/p&gt; &lt;pre&gt;&lt;code&gt;joint_id_to_name = { 0: 'Head', 8: 'Torso', 1: 'Neck', 9: 'R Hip', 2: 'R Shoulder', 10: 'L Hip', 3: 'L Shoulder', 11: 'R Knee', 4: 'R Elbow', 12: 'L Knee', 5: 'L Elbow', 13: 'R Foot', 6: 'R Hand', 14: 'L Foot', 7: 'L Hand', }&lt;/code&gt;&lt;/pre&gt; &lt;p&gt;&lt;strong&gt;Depth Maps&lt;/strong&gt;&lt;/p&gt; &lt;ul&gt; &lt;li&gt;&lt;em&gt;Key:&lt;/em&gt; id &lt;ul&gt; &lt;li&gt;&lt;em&gt;Dimensions:&lt;/em&gt; &lt;span class="math-tex"&gt;\((n,)\)&lt;/span&gt;&lt;/li&gt; &lt;li&gt;&lt;em&gt;Data Type:&lt;/em&gt; uint8&lt;/li&gt; &lt;li&gt;&lt;em&gt;Description:&lt;/em&gt; Frame identifier in the form XX_YYYYY where XX is the person&amp;#39;s ID number and YYYYY is the frame number.&lt;/li&gt; &lt;/ul&gt; &lt;/li&gt; &lt;li&gt;&lt;em&gt;Key: &lt;/em&gt;data &lt;ul&gt; &lt;li&gt;&lt;em&gt;Dimensions: &lt;/em&gt;&lt;span class="math-tex"&gt;\((n,240,320)\)&lt;/span&gt;&lt;/li&gt; &lt;li&gt;&lt;em&gt;Data Type:&lt;/em&gt; float16&lt;/li&gt; &lt;li&gt;&lt;em&gt;Description:&lt;/em&gt; Depth map (i.e. mesh) corresponding to a single frame. Depth values are in real world meters (m).&lt;/li&gt; &lt;/ul&gt; &lt;/li&gt; &lt;/ul&gt; &lt;p&gt;&lt;strong&gt;Point Clouds&lt;/strong&gt;&lt;/p&gt; &lt;ul&gt; &lt;li&gt;&lt;em&gt;Key:&lt;/em&gt; id &lt;ul&gt; &lt;li&gt;&lt;em&gt;Dimensions:&lt;/em&gt; &lt;span class="math-tex"&gt;\((n,)\)&lt;/span&gt;&lt;/li&gt; &lt;li&gt;&lt;em&gt;Data Type:&lt;/em&gt; uint8&lt;/li&gt; &lt;li&gt;&lt;em&gt;Description:&lt;/em&gt; Frame identifier in the form XX_YYYYY where XX is the person&amp;#39;s ID number and YYYYY is the frame number.&lt;/li&gt; &lt;/ul&gt; &lt;/li&gt; &lt;li&gt;&lt;em&gt;Key: &lt;/em&gt;data &lt;ul&gt; &lt;li&gt;&lt;em&gt;Dimensions: &lt;/em&gt;&lt;span class="math-tex"&gt;\((n,76800,3)\)&lt;/span&gt;&lt;/li&gt; &lt;li&gt;&lt;em&gt;Data Type: float16&lt;/em&gt;&lt;/li&gt; &lt;li&gt;&lt;em&gt;Description:&lt;/em&gt; Point cloud containing 76,800 points (240x320). Each point is represented by a 3D tuple measured in real world meters (m).&lt;/li&gt; &lt;/ul&gt; &lt;/li&gt; &lt;/ul&gt; &lt;p&gt;&lt;strong&gt;Labels&lt;/strong&gt;&lt;/p&gt; &lt;ul&gt; &lt;li&gt;&lt;em&gt;Key: &lt;/em&gt;id &lt;ul&gt; &lt;li&gt;&lt;em&gt;Dimensions: &lt;/em&gt;&lt;span class="math-tex"&gt;\((n,)\)&lt;/span&gt;&lt;/li&gt; &lt;li&gt;&lt;em&gt;Data Type: &lt;/em&gt;uint8&lt;/li&gt; &lt;li&gt;&lt;em&gt;Description:&lt;/em&gt; Frame identifier in the form XX_YYYYY where XX is the person&amp;#39;s ID number and YYYYY is the frame number.&lt;/li&gt; &lt;/ul&gt; &lt;/li&gt; &lt;li&gt;&lt;em&gt;Key: &lt;/em&gt;is_valid &lt;ul&gt; &lt;li&gt;&lt;em&gt;Dimensions: &lt;/em&gt;&lt;span class="math-tex"&gt;\((n,)\)&lt;/span&gt;&lt;/li&gt; &lt;li&gt;&lt;em&gt;Data Type: &lt;/em&gt;uint8&lt;/li&gt; &lt;li&gt;&lt;em&gt;Description:&lt;/em&gt; Flag corresponding to the result of the human labeling effort. This is a boolean value (represented by an integer) where a one (1) denotes clean, human-approved data. A zero (0) denotes noisy human body part labels. If is_valid is equal to zero, you should not use any of the provided human joint locations for the particular frame.&lt;/li&gt; &lt;/ul&gt; &lt;/li&gt; &lt;li&gt;&lt;em&gt;Key: &lt;/em&gt;visible_joints &lt;ul&gt; &lt;li&gt;&lt;em&gt;Dimensions: &lt;/em&gt;&lt;span class="math-tex"&gt;\((n,15)\)&lt;/span&gt;&lt;/li&gt; &lt;li&gt;&lt;em&gt;Data Type: &lt;/em&gt;int16&lt;/li&gt; &lt;li&gt;&lt;em&gt;Description:&lt;/em&gt; Binary mask indicating if each human joint is visible or occluded. This is denoted by&amp;nbsp;&lt;span class="math-tex"&gt;\(\alpha\)&lt;/span&gt; in the paper. If&amp;nbsp;&lt;span class="math-tex"&gt;\(\alpha_j=1\)&lt;/span&gt; then the&amp;nbsp;&lt;span class="math-tex"&gt;\(j^{th}\)&lt;/span&gt; joint is visible (i.e. not occluded). Otherwise, if&amp;nbsp;&lt;span class="math-tex"&gt;\(\alpha_j = 0\)&lt;/span&gt; then the &lt;span class="math-tex"&gt;\(j^{th}\)&lt;/span&gt; joint is occluded.&lt;/li&gt; &lt;/ul&gt; &lt;/li&gt; &lt;li&gt;&lt;em&gt;Key: &lt;/em&gt;image_coordinates &lt;ul&gt; &lt;li&gt;&lt;em&gt;Dimensions: &lt;/em&gt;&lt;span class="math-tex"&gt;\((n,15,2)\)&lt;/span&gt;&lt;/li&gt; &lt;li&gt;&lt;em&gt;Data Type: &lt;/em&gt;int16&lt;/li&gt; &lt;li&gt;&lt;em&gt;Description:&lt;/em&gt; Two-dimensional&amp;nbsp;&lt;span class="math-tex"&gt;\((x,y)\)&lt;/span&gt; points corresponding to the location of each joint in the depth image or depth map.&lt;/li&gt; &lt;/ul&gt; &lt;/li&gt; &lt;li&gt;&lt;em&gt;Key: &lt;/em&gt;real_world_coordinates &lt;ul&gt; &lt;li&gt;&lt;em&gt;Dimensions: &lt;/em&gt;&lt;span class="math-tex"&gt;\((n,15,3)\)&lt;/span&gt;&lt;/li&gt; &lt;li&gt;&lt;em&gt;Data Type: &lt;/em&gt;float16&lt;/li&gt; &lt;li&gt;&lt;em&gt;Description:&lt;/em&gt; Three-dimensional&amp;nbsp;&lt;span class="math-tex"&gt;\((x,y,z)\)&lt;/span&gt; points corresponding to the location of each joint in real world meters (m).&lt;/li&gt; &lt;/ul&gt; &lt;/li&gt; &lt;li&gt;&lt;em&gt;Key: &lt;/em&gt;segmentation &lt;ul&gt; &lt;li&gt;&lt;em&gt;Dimensions: &lt;/em&gt;&lt;span class="math-tex"&gt;\((n,240,320)\)&lt;/span&gt;&lt;/li&gt; &lt;li&gt;&lt;em&gt;Data Type: &lt;/em&gt;&lt;em&gt;int8&lt;/em&gt;&lt;/li&gt; &lt;li&gt;&lt;em&gt;Description:&lt;/em&gt; Pixel-wise assignment of body part labels. The background class (i.e. no body part) is denoted by &amp;minus;1.&lt;/li&gt; &lt;/ul&gt; &lt;/li&gt; &lt;/ul&gt; &lt;p&gt;&lt;strong&gt;Citation&lt;/strong&gt;&lt;/p&gt; &lt;p&gt;If you would like to cite our work, please use the following.&lt;/p&gt; &lt;p&gt;&lt;strong&gt;Haque A, Peng B, Luo Z, Alahi A, Yeung S, Fei-Fei L. (2016). Towards Viewpoint Invariant 3D Human Pose Estimation. European Conference on Computer Vision. Amsterdam, Netherlands. Springer.&lt;/strong&gt;&lt;/p&gt; &lt;pre&gt;@inproceedings{haque2016viewpoint, title={Towards Viewpoint Invariant 3D Human Pose Estimation}, author={Haque, Albert and Peng, Boya and Luo, Zelun and Alahi, Alexandre and Yeung, Serena and Fei-Fei, Li}, booktitle = {European Conference on Computer Vision}, month = {October}, year = {2016} }&lt;/pre&gt; &lt;ul&gt; &lt;/ul&gt;</dct:description>
    <dct:accessRights rdf:resource="http://publications.europa.eu/resource/authority/access-right/PUBLIC"/>
    <dct:accessRights>
      <dct:RightsStatement rdf:about="info:eu-repo/semantics/openAccess">
        <rdfs:label>Open Access</rdfs:label>
      </dct:RightsStatement>
    </dct:accessRights>
    <dcat:distribution>
      <dcat:Distribution>
        <dct:rights>
          <dct:RightsStatement rdf:about="https://creativecommons.org/licenses/by/4.0/legalcode">
            <rdfs:label>Creative Commons Attribution 4.0 International</rdfs:label>
          </dct:RightsStatement>
        </dct:rights>
        <dcat:accessURL rdf:resource="https://doi.org/10.5281/zenodo.3932973"/>
      </dcat:Distribution>
    </dcat:distribution>
  </rdf:Description>
</rdf:RDF>
373
5,334
views
downloads
All versions This version
Views 373373
Downloads 5,3345,334
Data volume 17.0 TB17.0 TB
Unique views 284284
Unique downloads 588588

Share

Cite as