Dataset Open Access

Russian Distributional Thesaurus (RDT): Word Embeddings

Alexander Panchenko; Nikolay Arefyev; Dmitry Ustalov; Natalia Loukachevitch; Denis Paperno; Chris Biemann; Natalia Konstantinova


DCAT Export

<?xml version='1.0' encoding='utf-8'?>
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:adms="http://www.w3.org/ns/adms#" xmlns:cnt="http://www.w3.org/2011/content#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dct="http://purl.org/dc/terms/" xmlns:dctype="http://purl.org/dc/dcmitype/" xmlns:dcat="http://www.w3.org/ns/dcat#" xmlns:duv="http://www.w3.org/ns/duv#" xmlns:foaf="http://xmlns.com/foaf/0.1/" xmlns:frapo="http://purl.org/cerif/frapo/" xmlns:geo="http://www.w3.org/2003/01/geo/wgs84_pos#" xmlns:gsp="http://www.opengis.net/ont/geosparql#" xmlns:locn="http://www.w3.org/ns/locn#" xmlns:org="http://www.w3.org/ns/org#" xmlns:owl="http://www.w3.org/2002/07/owl#" xmlns:prov="http://www.w3.org/ns/prov#" xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#" xmlns:schema="http://schema.org/" xmlns:skos="http://www.w3.org/2004/02/skos/core#" xmlns:vcard="http://www.w3.org/2006/vcard/ns#" xmlns:wdrs="http://www.w3.org/2007/05/powder-s#">
  <rdf:Description rdf:about="https://doi.org/10.5281/zenodo.400631">
    <rdf:type rdf:resource="http://www.w3.org/ns/dcat#Dataset"/>
    <dct:type rdf:resource="http://purl.org/dc/dcmitype/Dataset"/>
    <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#anyURI">https://doi.org/10.5281/zenodo.400631</dct:identifier>
    <foaf:page rdf:resource="https://doi.org/10.5281/zenodo.400631"/>
    <dct:creator>
      <rdf:Description>
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>Alexander Panchenko</foaf:name>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>University of Hamburg</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:creator>
      <rdf:Description>
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>Nikolay Arefyev</foaf:name>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>Moscow State University</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:creator>
      <rdf:Description>
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>Dmitry Ustalov</foaf:name>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>Ural Federal Univerisity</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:creator>
      <rdf:Description>
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>Natalia Loukachevitch</foaf:name>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>Moscow State University</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:creator>
      <rdf:Description>
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>Denis Paperno</foaf:name>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>University of Trento</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:creator>
      <rdf:Description>
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>Chris Biemann</foaf:name>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>University of Hamburg</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:creator>
      <rdf:Description>
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>Natalia Konstantinova</foaf:name>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>University of Wolverhampton</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:title>Russian Distributional Thesaurus (RDT): Word Embeddings</dct:title>
    <dct:publisher>
      <foaf:Agent>
        <foaf:name>Zenodo</foaf:name>
      </foaf:Agent>
    </dct:publisher>
    <dct:issued rdf:datatype="http://www.w3.org/2001/XMLSchema#gYear">2017</dct:issued>
    <dcat:keyword>word embeddings</dcat:keyword>
    <dcat:keyword>distributional semantics</dcat:keyword>
    <dcat:keyword>Russian</dcat:keyword>
    <dcat:keyword>Russian language</dcat:keyword>
    <dcat:keyword>word vectors</dcat:keyword>
    <dcat:keyword>word2vec</dcat:keyword>
    <dcat:keyword>SGNS</dcat:keyword>
    <dct:issued rdf:datatype="http://www.w3.org/2001/XMLSchema#date">2017-03-18</dct:issued>
    <owl:sameAs rdf:resource="https://zenodo.org/record/400631"/>
    <adms:identifier>
      <adms:Identifier>
        <skos:notation rdf:datatype="http://www.w3.org/2001/XMLSchema#anyURI">https://zenodo.org/record/400631</skos:notation>
      </adms:Identifier>
    </adms:identifier>
    <dct:description>&lt;p&gt;This resource is a part of the Russian Distributional Thesaurus (RDT): see http://russe.nlpub.ru/downloads and http://nlpub.ru/RDT. &lt;/p&gt; &lt;p&gt;This dataset contains a large scale word embeddings model for Russian trained using the SGNS model (Mikolov et al., 2013) on a 12.9 billion word collection of books in Russian. According to the results of our participation in the shared task on Russian semantic similarity (Panchenko et al., 2015), this approach scored in the top 5 among 105 submissions (Arefyev et al., 2015). Following our prior experiments (Arefyev et al., 2015) we have selected the following parameters for the model: minimal word frequency – 5, number of dimensions in a word vector – 500, three or five iterations of the learning algorithm over the input corpus, context window size of 1, 2, 3, 5, 7 and 10 words. Parameters of the model are listed below:&lt;/p&gt; &lt;ul&gt; &lt;li&gt;Model: skip-gram&lt;/li&gt; &lt;li&gt;Corpus: a 150Gb sample of the lib.rus.ec book collection.&lt;/li&gt; &lt;li&gt;Context window size: 10 words&lt;/li&gt; &lt;li&gt;Number of dimensions: 500&lt;/li&gt; &lt;li&gt;Number of iterations: 3&lt;/li&gt; &lt;li&gt;Minimal word frequency: 5&lt;/li&gt; &lt;/ul&gt; &lt;p&gt;References:&lt;/p&gt; &lt;ul&gt; &lt;li&gt;Panchenko A., Ustalov D., Arefyev N., Paperno D., Konstantinova N., Loukachevitch N. and Biemann C. (2016): Human and Machine Judgements about Russian Semantic Relatedness. In Proceedings of the 5th Conference on Analysis of Images, Social Networks, and Texts (AIST'2016). Communications in Computer and Information Science (CCIS). Springer-Verlag Berlin Heidelberg&lt;/li&gt; &lt;/ul&gt; &lt;ul&gt; &lt;li&gt;Panchenko A., Loukachevitch N. V., Ustalov D., Paperno D., Meyer C. M., Konstantinova N. (2015): RUSSE: The First International Workshop on Russian Semantic Similarity. In Proceedings of the 21st International Conference on Computational Linguistics and Intellectual Technologies (Dialogue'2015). Moscow, Russia. RGGU&lt;/li&gt; &lt;/ul&gt; &lt;ul&gt; &lt;li&gt;Arefyev N., Panchenko A., Lukanin A., Lesota O., Romanov P. (2015): Evaluating Three Corpus-Based Semantic Similarity Systems for Russian. In Proceedings of the 21st International Conference on Computational Linguistics and Intellectual Technologies (Dialogue'2015). Moscow, Russia. RGGU&lt;/li&gt; &lt;/ul&gt;</dct:description>
    <dct:accessRights rdf:resource="http://publications.europa.eu/resource/authority/access-right/PUBLIC"/>
    <dct:accessRights>
      <dct:RightsStatement rdf:about="info:eu-repo/semantics/openAccess">
        <rdfs:label>Open Access</rdfs:label>
      </dct:RightsStatement>
    </dct:accessRights>
    <dcat:distribution>
      <dcat:Distribution>
        <dct:license rdf:resource="http://creativecommons.org/licenses/by/4.0/legalcode"/>
        <dcat:accessURL rdf:resource="https://doi.org/10.5281/zenodo.400631"/>
      </dcat:Distribution>
    </dcat:distribution>
  </rdf:Description>
</rdf:RDF>
228
42
views
downloads
All versions This version
Views 228228
Downloads 4242
Data volume 625.6 GB625.6 GB
Unique views 216216
Unique downloads 3636

Share

Cite as