There is a newer version of this record available.

Dataset Open Access

LatinISE test data for SemEval 2020 task 1

McGillivray, Barbara; Schlechtweg, Dominik; Dubossarsky, Haim; Tahmasebi, Nina; Hengchen, Simon


DCAT Export

<?xml version='1.0' encoding='utf-8'?>
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:adms="http://www.w3.org/ns/adms#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dct="http://purl.org/dc/terms/" xmlns:dctype="http://purl.org/dc/dcmitype/" xmlns:dcat="http://www.w3.org/ns/dcat#" xmlns:duv="http://www.w3.org/ns/duv#" xmlns:foaf="http://xmlns.com/foaf/0.1/" xmlns:frapo="http://purl.org/cerif/frapo/" xmlns:geo="http://www.w3.org/2003/01/geo/wgs84_pos#" xmlns:gsp="http://www.opengis.net/ont/geosparql#" xmlns:locn="http://www.w3.org/ns/locn#" xmlns:org="http://www.w3.org/ns/org#" xmlns:owl="http://www.w3.org/2002/07/owl#" xmlns:prov="http://www.w3.org/ns/prov#" xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#" xmlns:schema="http://schema.org/" xmlns:skos="http://www.w3.org/2004/02/skos/core#" xmlns:vcard="http://www.w3.org/2006/vcard/ns#" xmlns:wdrs="http://www.w3.org/2007/05/powder-s#">
  <rdf:Description rdf:about="https://doi.org/10.5281/zenodo.3734089">
    <rdf:type rdf:resource="http://www.w3.org/ns/dcat#Dataset"/>
    <dct:type rdf:resource="http://purl.org/dc/dcmitype/Dataset"/>
    <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#anyURI">https://doi.org/10.5281/zenodo.3734089</dct:identifier>
    <foaf:page rdf:resource="https://doi.org/10.5281/zenodo.3734089"/>
    <dct:creator>
      <rdf:Description>
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>McGillivray, Barbara</foaf:name>
        <foaf:givenName>Barbara</foaf:givenName>
        <foaf:familyName>McGillivray</foaf:familyName>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>University of Cambridge</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:creator>
      <rdf:Description>
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>Schlechtweg, Dominik</foaf:name>
        <foaf:givenName>Dominik</foaf:givenName>
        <foaf:familyName>Schlechtweg</foaf:familyName>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>IMS, University of Stuttgart</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:creator>
      <rdf:Description>
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>Dubossarsky, Haim</foaf:name>
        <foaf:givenName>Haim</foaf:givenName>
        <foaf:familyName>Dubossarsky</foaf:familyName>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>University of Cambridge</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:creator>
      <rdf:Description>
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>Tahmasebi, Nina</foaf:name>
        <foaf:givenName>Nina</foaf:givenName>
        <foaf:familyName>Tahmasebi</foaf:familyName>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>University of Gothenburg</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:creator>
      <rdf:Description>
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>Hengchen, Simon</foaf:name>
        <foaf:givenName>Simon</foaf:givenName>
        <foaf:familyName>Hengchen</foaf:familyName>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>University of Helsinki</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:title>LatinISE test data for SemEval 2020 task 1</dct:title>
    <dct:publisher>
      <foaf:Agent>
        <foaf:name>Zenodo</foaf:name>
      </foaf:Agent>
    </dct:publisher>
    <dct:issued rdf:datatype="http://www.w3.org/2001/XMLSchema#gYear">2020</dct:issued>
    <dcat:keyword>Latin, corpus</dcat:keyword>
    <frapo:isFundedBy rdf:resource="info:eu-repo/grantAgreement/RCUK/EPSRC/EP%2FN510129%2F1/"/>
    <schema:funder>
      <foaf:Organization>
        <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#string">10.13039/501100000690</dct:identifier>
        <foaf:name>Research Councils UK</foaf:name>
      </foaf:Organization>
    </schema:funder>
    <dct:issued rdf:datatype="http://www.w3.org/2001/XMLSchema#date">2020-03-31</dct:issued>
    <dct:language rdf:resource="http://publications.europa.eu/resource/authority/language/LA"/>
    <owl:sameAs rdf:resource="https://zenodo.org/record/3734089"/>
    <adms:identifier>
      <adms:Identifier>
        <skos:notation rdf:datatype="http://www.w3.org/2001/XMLSchema#anyURI">https://zenodo.org/record/3734089</skos:notation>
        <adms:schemeAgency>url</adms:schemeAgency>
      </adms:Identifier>
    </adms:identifier>
    <dct:isVersionOf rdf:resource="https://doi.org/10.5281/zenodo.3674098"/>
    <owl:versionInfo>2</owl:versionInfo>
    <dct:description>&lt;p&gt;This data collection contains the Latin test data for &lt;a href="https://competitions.codalab.org/competitions/20948"&gt;SemEval 2020 Task 1: Unsupervised Lexical Semantic Change Detection&lt;/a&gt;:&amp;nbsp;&lt;/p&gt; &lt;ul&gt; &lt;li&gt;a Latin text corpus pair (`corpus1/lemma`, `corpus2/lemma`)&lt;/li&gt; &lt;li&gt;40 lemmas which have been annotated for their lexical semantic change between the two corpora (`targets.txt`)&lt;/li&gt; &lt;li&gt;the annotated binary change scores of the targets for subtask 1, and their annotated graded change scores for subtask 2 (`truth/`)&lt;/li&gt; &lt;/ul&gt; &lt;p&gt;The corpus data have been automatically lemmatized and part-of-speech tagged, and have been partially corrected by hand. For homonyms, the lemmas are followed by the &amp;#39;\#&amp;#39; symbol and the number of the homonym according to the Lewis-Short dictionary of Latin when this number is greater than 1. For example, the lemma &amp;#39;dico&amp;#39; corresponds to the first homonym in the Lewis-Short dictionary and &amp;#39;dico\#2&amp;#39; corresponds to the second homonym, cf. Lewis-Short dictionary.&lt;/p&gt; &lt;p&gt;__Corpus 1__&lt;/p&gt; &lt;ul&gt; &lt;li&gt;based on: &lt;a href="http://hdl.handle.net/11372/LRT-3170"&gt;LatinISE&lt;/a&gt;&amp;nbsp;(McGillivray and Kilgarriff 2013), &lt;a href="https://app.sketchengine.eu/#dashboard?corpname=preloaded/latinise_4"&gt;version on Sketch Engine&lt;/a&gt;&lt;/li&gt; &lt;li&gt;language: Latin&lt;/li&gt; &lt;li&gt;time covered: from the beginning of the second century before Christ (BC) to the end of the first century BC&lt;/li&gt; &lt;li&gt;size: ~1.7 million tokens&lt;/li&gt; &lt;li&gt;format: lemmatized, sentence length &amp;gt;= 2, no punctuation, sentences randomly shuffled&lt;/li&gt; &lt;li&gt;encoding: UTF-8&lt;/li&gt; &lt;/ul&gt; &lt;p&gt;__Corpus 2__&lt;/p&gt; &lt;ul&gt; &lt;li&gt;based on: &lt;a href="http://hdl.handle.net/11372/LRT-3170"&gt;LatinISE&lt;/a&gt;&amp;nbsp;(McGillivray and Kilgarriff 2013) , &lt;a href="https://app.sketchengine.eu/#dashboard?corpname=preloaded/latinise_4"&gt;version on Sketch Engine&lt;/a&gt;&lt;/li&gt; &lt;li&gt;language: Latin&lt;/li&gt; &lt;li&gt;time covered: from the beginning of the first century after Christ (AD) to the end of the twenty-first century AD&lt;/li&gt; &lt;li&gt;size: ~9.4 million tokens&lt;/li&gt; &lt;li&gt;format: lemmatized, sentence length &amp;gt;= 2, no punctuation, sentences randomly shuffled&lt;/li&gt; &lt;li&gt;encoding: UTF-8&lt;/li&gt; &lt;/ul&gt; &lt;p&gt;Find more information on the data in the papers referenced below.&lt;/p&gt; &lt;p&gt;&lt;strong&gt;References&lt;/strong&gt;&lt;/p&gt; &lt;p&gt;Dominik Schlechtweg, Barbara McGillivray, Simon Hengchen, Haim Dubossarsky and Nina Tahmasebi &lt;a href="https://competitions.codalab.org/competitions/20948"&gt;SemEval 2020 Task 1: Unsupervised Lexical Semantic Change Detection&lt;/a&gt;. To appear in SemEval@COLING2020.&lt;/p&gt; &lt;p&gt;McGillivray, B. and Kilgarriff, A. (2013). &lt;a href="https://www.sketchengine.co.uk/wp-content/uploads/2015/05/Latin_historical_corpus_2013.pdf"&gt;Tools for historical corpus research, and a corpus of Latin&lt;/a&gt;. In Paul Bennett, Martin Durrell, Silke Scheible, Richard J. Whitt (eds.), New Methods in Historical Corpus Linguistics, T&amp;uuml;bingen: Narr.&lt;br&gt; &amp;nbsp;&lt;/p&gt;</dct:description>
    <dct:accessRights rdf:resource="http://publications.europa.eu/resource/authority/access-right/PUBLIC"/>
    <dct:accessRights>
      <dct:RightsStatement rdf:about="info:eu-repo/semantics/openAccess">
        <rdfs:label>Open Access</rdfs:label>
      </dct:RightsStatement>
    </dct:accessRights>
    <dcat:distribution>
      <dcat:Distribution>
        <dct:license rdf:resource="https://creativecommons.org/licenses/by/4.0/legalcode"/>
        <dcat:accessURL rdf:resource="https://doi.org/10.5281/zenodo.3734089"/>
      </dcat:Distribution>
    </dcat:distribution>
    <dcat:distribution>
      <dcat:Distribution>
        <dcat:accessURL>https://doi.org/10.5281/zenodo.3734089</dcat:accessURL>
        <dcat:byteSize>28445562</dcat:byteSize>
        <dcat:downloadURL>https://zenodo.org/record/3734089/files/semeval2020_ulscd_lat.zip</dcat:downloadURL>
        <dcat:mediaType>application/zip</dcat:mediaType>
      </dcat:Distribution>
    </dcat:distribution>
  </rdf:Description>
  <foaf:Project rdf:about="info:eu-repo/grantAgreement/RCUK/EPSRC/EP%2FN510129%2F1/">
    <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#string">EP/N510129/1</dct:identifier>
    <dct:title>The Alan Turing Institute</dct:title>
    <frapo:isAwardedBy>
      <foaf:Organization>
        <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#string">10.13039/501100000690</dct:identifier>
        <foaf:name>Research Councils UK</foaf:name>
      </foaf:Organization>
    </frapo:isAwardedBy>
  </foaf:Project>
</rdf:RDF>
737
615
views
downloads
All versions This version
Views 737131
Downloads 61563
Data volume 18.2 GB1.8 GB
Unique views 619114
Unique downloads 54157

Share

Cite as