Dataset Open Access

im2latex-100k , arXiv:1609.04938

Kanervisto, Anssi


DCAT Export

<?xml version='1.0' encoding='utf-8'?>
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:adms="http://www.w3.org/ns/adms#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dct="http://purl.org/dc/terms/" xmlns:dctype="http://purl.org/dc/dcmitype/" xmlns:dcat="http://www.w3.org/ns/dcat#" xmlns:duv="http://www.w3.org/ns/duv#" xmlns:foaf="http://xmlns.com/foaf/0.1/" xmlns:frapo="http://purl.org/cerif/frapo/" xmlns:geo="http://www.w3.org/2003/01/geo/wgs84_pos#" xmlns:gsp="http://www.opengis.net/ont/geosparql#" xmlns:locn="http://www.w3.org/ns/locn#" xmlns:org="http://www.w3.org/ns/org#" xmlns:owl="http://www.w3.org/2002/07/owl#" xmlns:prov="http://www.w3.org/ns/prov#" xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#" xmlns:schema="http://schema.org/" xmlns:skos="http://www.w3.org/2004/02/skos/core#" xmlns:vcard="http://www.w3.org/2006/vcard/ns#" xmlns:wdrs="http://www.w3.org/2007/05/powder-s#">
  <rdf:Description rdf:about="https://doi.org/10.5281/zenodo.56198">
    <rdf:type rdf:resource="http://www.w3.org/ns/dcat#Dataset"/>
    <dct:type rdf:resource="http://purl.org/dc/dcmitype/Dataset"/>
    <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#anyURI">https://doi.org/10.5281/zenodo.56198</dct:identifier>
    <foaf:page rdf:resource="https://doi.org/10.5281/zenodo.56198"/>
    <dct:creator>
      <rdf:Description>
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>Kanervisto, Anssi</foaf:name>
        <foaf:givenName>Anssi</foaf:givenName>
        <foaf:familyName>Kanervisto</foaf:familyName>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>University of Eastern Finland</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:title>im2latex-100k , arXiv:1609.04938</dct:title>
    <dct:publisher>
      <foaf:Agent>
        <foaf:name>Zenodo</foaf:name>
      </foaf:Agent>
    </dct:publisher>
    <dct:issued rdf:datatype="http://www.w3.org/2001/XMLSchema#gYear">2016</dct:issued>
    <dcat:keyword>im2latex</dcat:keyword>
    <dcat:keyword>latex</dcat:keyword>
    <dcat:keyword>tex</dcat:keyword>
    <dcat:keyword>formula</dcat:keyword>
    <dcat:keyword>openai</dcat:keyword>
    <dct:issued rdf:datatype="http://www.w3.org/2001/XMLSchema#date">2016-06-21</dct:issued>
    <owl:sameAs rdf:resource="https://zenodo.org/record/56198"/>
    <adms:identifier>
      <adms:Identifier>
        <skos:notation rdf:datatype="http://www.w3.org/2001/XMLSchema#anyURI">https://zenodo.org/record/56198</skos:notation>
        <adms:schemeAgency>url</adms:schemeAgency>
      </adms:Identifier>
    </adms:identifier>
    <dct:isPartOf rdf:resource="http://arxiv.org/abs/1609.04938"/>
    <dct:description>&lt;p&gt;A prebuilt dataset for OpenAI's task for image-2-latex system. Includes total of ~100k formulas and images splitted into train, validation and test sets. Formulas were parsed from LaTeX sources provided here: http://www.cs.cornell.edu/projects/kddcup/datasets.html(originally fromĀ  arXiv)&lt;/p&gt; &lt;p&gt;Each image is a PNG image of fixed size. Formula is in black and rest of the image is transparent.&lt;/p&gt; &lt;p&gt;For related tools (eg. tokenizer) check out this repository: https://github.com/Miffyli/im2latex-dataset&lt;br&gt; For pre-made evaluation scripts and built im2latex system check this repository: https://github.com/harvardnlp/im2markup&lt;/p&gt; &lt;p&gt;&lt;strong&gt;Newlines used in &lt;em&gt;formulas_im2latex.lst &lt;/em&gt;are UNIX-style newlines (\n). Reading file with other type of newlines results to slightly wrong amount of lines (104563 instead of 103558), and thus breaks the structure used by this dataset. Python 3.x reads files using newlines of the running system by default, and to avoid this file must be opened with newlines&lt;/strong&gt;&lt;strong&gt;="\n" (eg. open("formulas_im2latex.lst", newline="\n")).&lt;/strong&gt;&lt;/p&gt;</dct:description>
    <dct:accessRights rdf:resource="http://publications.europa.eu/resource/authority/access-right/PUBLIC"/>
    <dct:accessRights>
      <dct:RightsStatement rdf:about="info:eu-repo/semantics/openAccess">
        <rdfs:label>Open Access</rdfs:label>
      </dct:RightsStatement>
    </dct:accessRights>
    <dcat:distribution>
      <dcat:Distribution>
        <dct:license rdf:resource="https://creativecommons.org/publicdomain/zero/1.0/legalcode"/>
        <dcat:accessURL rdf:resource="https://doi.org/10.5281/zenodo.56198"/>
      </dcat:Distribution>
    </dcat:distribution>
    <dcat:distribution>
      <dcat:Distribution>
        <dcat:accessURL rdf:resource="https://doi.org/10.5281/zenodo.56198"/>
        <dcat:byteSize>237383</dcat:byteSize>
        <dcat:downloadURL rdf:resource="https://zenodo.org/record/56198/files/im2latex_test.lst"/>
      </dcat:Distribution>
    </dcat:distribution>
    <dcat:distribution>
      <dcat:Distribution>
        <dcat:accessURL rdf:resource="https://doi.org/10.5281/zenodo.56198"/>
        <dcat:byteSize>1923255</dcat:byteSize>
        <dcat:downloadURL rdf:resource="https://zenodo.org/record/56198/files/im2latex_train.lst"/>
      </dcat:Distribution>
    </dcat:distribution>
    <dcat:distribution>
      <dcat:Distribution>
        <dcat:accessURL rdf:resource="https://doi.org/10.5281/zenodo.56198"/>
        <dcat:byteSize>12323106</dcat:byteSize>
        <dcat:downloadURL rdf:resource="https://zenodo.org/record/56198/files/im2latex_formulas.lst"/>
      </dcat:Distribution>
    </dcat:distribution>
    <dcat:distribution>
      <dcat:Distribution>
        <dcat:accessURL rdf:resource="https://doi.org/10.5281/zenodo.56198"/>
        <dcat:byteSize>292150187</dcat:byteSize>
        <dcat:downloadURL rdf:resource="https://zenodo.org/record/56198/files/formula_images.tar.gz"/>
        <dcat:mediaType>application/x-tar</dcat:mediaType>
      </dcat:Distribution>
    </dcat:distribution>
    <dcat:distribution>
      <dcat:Distribution>
        <dcat:accessURL rdf:resource="https://doi.org/10.5281/zenodo.56198"/>
        <dcat:byteSize>213665</dcat:byteSize>
        <dcat:downloadURL rdf:resource="https://zenodo.org/record/56198/files/im2latex_validate.lst"/>
      </dcat:Distribution>
    </dcat:distribution>
    <dcat:distribution>
      <dcat:Distribution>
        <dcat:accessURL rdf:resource="https://doi.org/10.5281/zenodo.56198"/>
        <dcat:byteSize>924</dcat:byteSize>
        <dcat:downloadURL rdf:resource="https://zenodo.org/record/56198/files/readme.txt"/>
        <dcat:mediaType>text/plain</dcat:mediaType>
      </dcat:Distribution>
    </dcat:distribution>
  </rdf:Description>
</rdf:RDF>
23,042
28,822
views
downloads
All versions This version
Views 23,04223,066
Downloads 28,82228,821
Data volume 3.7 TB3.7 TB
Unique views 20,10420,124
Unique downloads 8,3778,376

Share

Cite as