Dataset Open Access

Sentiment analysis in Galaxy with IMDB movie review dataset

Kaivan Kamali


DCAT Export

<?xml version='1.0' encoding='utf-8'?>
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:adms="http://www.w3.org/ns/adms#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dct="http://purl.org/dc/terms/" xmlns:dctype="http://purl.org/dc/dcmitype/" xmlns:dcat="http://www.w3.org/ns/dcat#" xmlns:duv="http://www.w3.org/ns/duv#" xmlns:foaf="http://xmlns.com/foaf/0.1/" xmlns:frapo="http://purl.org/cerif/frapo/" xmlns:geo="http://www.w3.org/2003/01/geo/wgs84_pos#" xmlns:gsp="http://www.opengis.net/ont/geosparql#" xmlns:locn="http://www.w3.org/ns/locn#" xmlns:org="http://www.w3.org/ns/org#" xmlns:owl="http://www.w3.org/2002/07/owl#" xmlns:prov="http://www.w3.org/ns/prov#" xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#" xmlns:schema="http://schema.org/" xmlns:skos="http://www.w3.org/2004/02/skos/core#" xmlns:vcard="http://www.w3.org/2006/vcard/ns#" xmlns:wdrs="http://www.w3.org/2007/05/powder-s#">
  <rdf:Description rdf:about="https://doi.org/10.5281/zenodo.4477881">
    <rdf:type rdf:resource="http://www.w3.org/ns/dcat#Dataset"/>
    <dct:type rdf:resource="http://purl.org/dc/dcmitype/Dataset"/>
    <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#anyURI">https://doi.org/10.5281/zenodo.4477881</dct:identifier>
    <foaf:page rdf:resource="https://doi.org/10.5281/zenodo.4477881"/>
    <dct:creator>
      <rdf:Description>
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>Kaivan Kamali</foaf:name>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>Penn State University</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:title>Sentiment analysis in Galaxy with IMDB movie review dataset</dct:title>
    <dct:publisher>
      <foaf:Agent>
        <foaf:name>Zenodo</foaf:name>
      </foaf:Agent>
    </dct:publisher>
    <dct:issued rdf:datatype="http://www.w3.org/2001/XMLSchema#gYear">2021</dct:issued>
    <dcat:keyword>IMDB</dcat:keyword>
    <dcat:keyword>Sentiment Analysis</dcat:keyword>
    <dcat:keyword>Movie reviews</dcat:keyword>
    <dct:issued rdf:datatype="http://www.w3.org/2001/XMLSchema#date">2021-01-28</dct:issued>
    <dct:language rdf:resource="http://publications.europa.eu/resource/authority/language/ENG"/>
    <owl:sameAs rdf:resource="https://zenodo.org/record/4477881"/>
    <adms:identifier>
      <adms:Identifier>
        <skos:notation rdf:datatype="http://www.w3.org/2001/XMLSchema#anyURI">https://zenodo.org/record/4477881</skos:notation>
        <adms:schemeAgency>url</adms:schemeAgency>
      </adms:Identifier>
    </adms:identifier>
    <dct:isVersionOf rdf:resource="https://doi.org/10.5281/zenodo.4477880"/>
    <owl:versionInfo>1.0</owl:versionInfo>
    <dct:description>&lt;p&gt;IMDB movie review sentiment classification dataset (Andrew L. Maas, Raymond E. Daly, Peter T. Pham, Dan Huang, Andrew Y. Ng, and Christopher Potts. (2011).&amp;nbsp;Learning Word Vectors for Sentiment Analysis.&amp;nbsp;The 49th Annual Meeting of the Association for Computational Linguistics (ACL 2011)). For more information&amp;nbsp;please refer to:&amp;nbsp;https://ai.stanford.edu/~amaas/data/sentiment/&lt;br&gt; &lt;br&gt; The IMDB dataset was modified as follows to prepare it for use in a Galaxy Training Tutorial (https://training.galaxyproject.org/):&lt;br&gt; &lt;br&gt; The top 50 words are excluded (mostly stop words). Included&amp;nbsp;the next 10,000 top words. Reviews are limited to&amp;nbsp;500 words max (Longer reviews trimmed and shorter reviews are padded). 25,000 reviews are used for training and testing each. Files are&amp;nbsp;in tsv (tab separated value) format to be consumed by Galaxy (www.usegalaxy.org).&amp;nbsp;&lt;/p&gt;</dct:description>
    <dct:description>Andrew L. Maas, Raymond E. Daly, Peter T. Pham, Dan Huang, Andrew Y. Ng, and Christopher Potts. (2011). Learning Word Vectors for Sentiment Analysis. The 49th Annual Meeting of the Association for Computational Linguistics (ACL 2011). For more information please refer to: https://ai.stanford.edu/~amaas/data/sentiment/</dct:description>
    <dct:accessRights rdf:resource="http://publications.europa.eu/resource/authority/access-right/PUBLIC"/>
    <dct:accessRights>
      <dct:RightsStatement rdf:about="info:eu-repo/semantics/openAccess">
        <rdfs:label>Open Access</rdfs:label>
      </dct:RightsStatement>
    </dct:accessRights>
    <dcat:distribution>
      <dcat:Distribution>
        <dct:license rdf:resource="https://creativecommons.org/licenses/by/4.0/legalcode"/>
        <dcat:accessURL rdf:resource="https://doi.org/10.5281/zenodo.4477881"/>
      </dcat:Distribution>
    </dcat:distribution>
    <dcat:distribution>
      <dcat:Distribution>
        <dcat:accessURL rdf:resource="https://doi.org/10.5281/zenodo.4477881"/>
        <dcat:byteSize>118634408</dcat:byteSize>
        <dcat:downloadURL rdf:resource="https://zenodo.org/record/4477881/files/X_test.tsv"/>
        <dcat:mediaType>text/tab-separated-values</dcat:mediaType>
      </dcat:Distribution>
    </dcat:distribution>
    <dcat:distribution>
      <dcat:Distribution>
        <dcat:accessURL rdf:resource="https://doi.org/10.5281/zenodo.4477881"/>
        <dcat:byteSize>118817859</dcat:byteSize>
        <dcat:downloadURL rdf:resource="https://zenodo.org/record/4477881/files/X_train.tsv"/>
        <dcat:mediaType>text/tab-separated-values</dcat:mediaType>
      </dcat:Distribution>
    </dcat:distribution>
    <dcat:distribution>
      <dcat:Distribution>
        <dcat:accessURL rdf:resource="https://doi.org/10.5281/zenodo.4477881"/>
        <dcat:byteSize>225000</dcat:byteSize>
        <dcat:downloadURL rdf:resource="https://zenodo.org/record/4477881/files/y_test.tsv"/>
        <dcat:mediaType>text/tab-separated-values</dcat:mediaType>
      </dcat:Distribution>
    </dcat:distribution>
    <dcat:distribution>
      <dcat:Distribution>
        <dcat:accessURL rdf:resource="https://doi.org/10.5281/zenodo.4477881"/>
        <dcat:byteSize>225000</dcat:byteSize>
        <dcat:downloadURL rdf:resource="https://zenodo.org/record/4477881/files/y_train.tsv"/>
        <dcat:mediaType>text/tab-separated-values</dcat:mediaType>
      </dcat:Distribution>
    </dcat:distribution>
  </rdf:Description>
</rdf:RDF>
146
166
views
downloads
All versions This version
Views 146146
Downloads 166166
Data volume 11.3 GB11.3 GB
Unique views 121121
Unique downloads 6363

Share

Cite as