Dataset Open Access

TweetsCOV19 - A Semantically Annotated Corpus of Tweets About the COVID-19 Pandemic (Part 2, May 2020)

Baran, Erdal; Dimitrov, Dimitar


DCAT Export

<?xml version='1.0' encoding='utf-8'?>
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:adms="http://www.w3.org/ns/adms#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dct="http://purl.org/dc/terms/" xmlns:dctype="http://purl.org/dc/dcmitype/" xmlns:dcat="http://www.w3.org/ns/dcat#" xmlns:duv="http://www.w3.org/ns/duv#" xmlns:foaf="http://xmlns.com/foaf/0.1/" xmlns:frapo="http://purl.org/cerif/frapo/" xmlns:geo="http://www.w3.org/2003/01/geo/wgs84_pos#" xmlns:gsp="http://www.opengis.net/ont/geosparql#" xmlns:locn="http://www.w3.org/ns/locn#" xmlns:org="http://www.w3.org/ns/org#" xmlns:owl="http://www.w3.org/2002/07/owl#" xmlns:prov="http://www.w3.org/ns/prov#" xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#" xmlns:schema="http://schema.org/" xmlns:skos="http://www.w3.org/2004/02/skos/core#" xmlns:vcard="http://www.w3.org/2006/vcard/ns#" xmlns:wdrs="http://www.w3.org/2007/05/powder-s#">
  <rdf:Description rdf:about="https://doi.org/10.5281/zenodo.4593502">
    <rdf:type rdf:resource="http://www.w3.org/ns/dcat#Dataset"/>
    <dct:type rdf:resource="http://purl.org/dc/dcmitype/Dataset"/>
    <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#anyURI">https://doi.org/10.5281/zenodo.4593502</dct:identifier>
    <foaf:page rdf:resource="https://doi.org/10.5281/zenodo.4593502"/>
    <dct:creator>
      <rdf:Description>
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>Baran, Erdal</foaf:name>
        <foaf:givenName>Erdal</foaf:givenName>
        <foaf:familyName>Baran</foaf:familyName>
      </rdf:Description>
    </dct:creator>
    <dct:creator>
      <rdf:Description>
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>Dimitrov, Dimitar</foaf:name>
        <foaf:givenName>Dimitar</foaf:givenName>
        <foaf:familyName>Dimitrov</foaf:familyName>
      </rdf:Description>
    </dct:creator>
    <dct:title>TweetsCOV19 - A Semantically Annotated Corpus of Tweets About the COVID-19 Pandemic (Part 2, May 2020)</dct:title>
    <dct:publisher>
      <foaf:Agent>
        <foaf:name>Zenodo</foaf:name>
      </foaf:Agent>
    </dct:publisher>
    <dct:issued rdf:datatype="http://www.w3.org/2001/XMLSchema#gYear">2021</dct:issued>
    <dcat:keyword>twitter</dcat:keyword>
    <dcat:keyword>tweets</dcat:keyword>
    <dcat:keyword>linked data</dcat:keyword>
    <dcat:keyword>microblogging</dcat:keyword>
    <dcat:keyword>RDF</dcat:keyword>
    <dcat:keyword>csv</dcat:keyword>
    <dcat:keyword>covid-19</dcat:keyword>
    <dcat:keyword>coronavirus</dcat:keyword>
    <dct:issued rdf:datatype="http://www.w3.org/2001/XMLSchema#date">2021-03-10</dct:issued>
    <owl:sameAs rdf:resource="https://zenodo.org/record/4593502"/>
    <adms:identifier>
      <adms:Identifier>
        <skos:notation rdf:datatype="http://www.w3.org/2001/XMLSchema#anyURI">https://zenodo.org/record/4593502</skos:notation>
        <adms:schemeAgency>url</adms:schemeAgency>
      </adms:Identifier>
    </adms:identifier>
    <foaf:page rdf:resource="https://data.gesis.org/tweetscov19/"/>
    <dct:isVersionOf rdf:resource="https://doi.org/10.5281/zenodo.4593501"/>
    <dct:isPartOf rdf:resource="https://zenodo.org/communities/covid-19"/>
    <dct:isPartOf rdf:resource="https://zenodo.org/communities/twitter-datasets"/>
    <dct:description>&lt;p&gt;&lt;strong&gt;&lt;a href="https://data.gesis.org/tweetscov19/"&gt;TweetsCOV19&lt;/a&gt;&lt;/strong&gt;&lt;strong&gt; &lt;/strong&gt;is a semantically annotated corpus of Tweets about the COVID-19 pandemic. It is a subset of &lt;a href="https://data.gesis.org/tweetskb"&gt;TweetsKB&lt;/a&gt; and aims at capturing online discourse about various aspects of the pandemic and its societal impact. &lt;strong&gt;Metadata&lt;/strong&gt; information about the tweets as well as extracted &lt;strong&gt;entities&lt;/strong&gt;, &lt;strong&gt;sentiments&lt;/strong&gt;, &lt;strong&gt;hashtags&lt;/strong&gt;, &lt;strong&gt;user mentions&lt;/strong&gt;, and &lt;strong&gt;resolved URLs &lt;/strong&gt;are exposed in RDF using established RDF/S vocabularies*.&lt;/p&gt; &lt;p&gt;We also provide a &lt;em&gt;&lt;strong&gt;tab-separated values (tsv)&lt;/strong&gt;&lt;/em&gt; version of the dataset. Each line contains features of a tweet instance. Features are separated by tab character (&amp;quot;\t&amp;quot;). The following list indicate the feature indices:&lt;/p&gt; &lt;ol&gt; &lt;li&gt;Tweet Id: Long.&lt;/li&gt; &lt;li&gt;Username: String. Encrypted for privacy issues*.&lt;/li&gt; &lt;li&gt;Timestamp: Format ( &amp;quot;EEE MMM dd HH:mm:ss Z yyyy&amp;quot; ).&lt;/li&gt; &lt;li&gt;#Followers: Integer.&lt;/li&gt; &lt;li&gt;#Friends: Integer.&lt;/li&gt; &lt;li&gt;#Retweets: Integer.&lt;/li&gt; &lt;li&gt;#Favorites: Integer.&lt;/li&gt; &lt;li&gt;Entities: String. For each entity, we aggregated the original text, the annotated entity and the produced score from &lt;a href="https://github.com/yahoo/FEL"&gt;FEL&lt;/a&gt; library. Each entity is separated from another entity by char &amp;quot;;&amp;quot;. Also, each entity is separated by char &amp;quot;:&amp;quot; in order to store &amp;quot;original_text:annotated_entity:score;&amp;quot;. If FEL did not find any entities, we have stored &amp;quot;null;&amp;quot;.&lt;/li&gt; &lt;li&gt;Sentiment: String. &lt;a href="http://sentistrength.wlv.ac.uk/"&gt;SentiStrength&lt;/a&gt; produces a score for positive (1 to 5) and negative (-1 to -5) sentiment. We splitted these two numbers by whitespace char &amp;quot; &amp;quot;. Positive sentiment was stored first and then negative sentiment (i.e. &amp;quot;2 -1&amp;quot;).&lt;/li&gt; &lt;li&gt;Mentions: String. If the tweet contains mentions, we remove the char &amp;quot;@&amp;quot; and concatenate the mentions with whitespace char &amp;quot; &amp;quot;. If no mentions appear, we have stored &amp;quot;null;&amp;quot;.&lt;/li&gt; &lt;li&gt;Hashtags: String. If the tweet contains hashtags, we remove the char &amp;quot;#&amp;quot; and concatenate the hashtags with whitespace char &amp;quot; &amp;quot;. If no hashtags appear, we have stored &amp;quot;null;&amp;quot;.&lt;/li&gt; &lt;li&gt;URLs: String: If the tweet contains URLs, we concatenate the URLs using &amp;quot;:-: &amp;quot;. If no URLs appear, we have stored &amp;quot;null;&amp;quot;&lt;/li&gt; &lt;/ol&gt; &lt;p&gt;To extract the dataset from &lt;a href="https://data.gesis.org/tweetskb"&gt;TweetsKB&lt;/a&gt;, we compiled a seed list of 268 COVID-19-related &lt;a href="https://data.gesis.org/tweetscov19/keywords.txt"&gt;keywords&lt;/a&gt;.&lt;/p&gt; &lt;p&gt;&lt;em&gt;* For the sake of privacy, we anonymize&amp;nbsp;user IDs&amp;nbsp;and we do not provide the text of the tweets.&lt;/em&gt;&lt;/p&gt;</dct:description>
    <dct:accessRights rdf:resource="http://publications.europa.eu/resource/authority/access-right/PUBLIC"/>
    <dct:accessRights>
      <dct:RightsStatement rdf:about="info:eu-repo/semantics/openAccess">
        <rdfs:label>Open Access</rdfs:label>
      </dct:RightsStatement>
    </dct:accessRights>
    <dcat:distribution>
      <dcat:Distribution>
        <dct:license rdf:resource="https://creativecommons.org/licenses/by/4.0/legalcode"/>
        <dcat:accessURL rdf:resource="https://doi.org/10.5281/zenodo.4593502"/>
      </dcat:Distribution>
    </dcat:distribution>
    <dcat:distribution>
      <dcat:Distribution>
        <dcat:accessURL rdf:resource="https://doi.org/10.5281/zenodo.4593502"/>
        <dcat:byteSize>404722462</dcat:byteSize>
        <dcat:downloadURL rdf:resource="https://zenodo.org/record/4593502/files/TweetsCOV19_052020.n3.gz"/>
        <dcat:mediaType>text/n3</dcat:mediaType>
      </dcat:Distribution>
    </dcat:distribution>
    <dcat:distribution>
      <dcat:Distribution>
        <dcat:accessURL rdf:resource="https://doi.org/10.5281/zenodo.4593502"/>
        <dcat:byteSize>197659685</dcat:byteSize>
        <dcat:downloadURL rdf:resource="https://zenodo.org/record/4593502/files/TweetsCOV19_052020.tsv.gz"/>
        <dcat:mediaType>text/tab-separated-values</dcat:mediaType>
      </dcat:Distribution>
    </dcat:distribution>
  </rdf:Description>
</rdf:RDF>
1,078
353
views
downloads
All versions This version
Views 1,0781,078
Downloads 353353
Data volume 88.2 GB88.2 GB
Unique views 1,0281,028
Unique downloads 273273

Share

Cite as