Dataset Open Access

URLs from tweets for a 2014 sample of Twitter users and for a set of computer scientists

Robert Jäschke


DCAT Export

<?xml version='1.0' encoding='utf-8'?>
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:adms="http://www.w3.org/ns/adms#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dct="http://purl.org/dc/terms/" xmlns:dctype="http://purl.org/dc/dcmitype/" xmlns:dcat="http://www.w3.org/ns/dcat#" xmlns:duv="http://www.w3.org/ns/duv#" xmlns:foaf="http://xmlns.com/foaf/0.1/" xmlns:frapo="http://purl.org/cerif/frapo/" xmlns:geo="http://www.w3.org/2003/01/geo/wgs84_pos#" xmlns:gsp="http://www.opengis.net/ont/geosparql#" xmlns:locn="http://www.w3.org/ns/locn#" xmlns:org="http://www.w3.org/ns/org#" xmlns:owl="http://www.w3.org/2002/07/owl#" xmlns:prov="http://www.w3.org/ns/prov#" xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#" xmlns:schema="http://schema.org/" xmlns:skos="http://www.w3.org/2004/02/skos/core#" xmlns:vcard="http://www.w3.org/2006/vcard/ns#" xmlns:wdrs="http://www.w3.org/2007/05/powder-s#">
  <rdf:Description rdf:about="https://doi.org/10.5281/zenodo.580587">
    <rdf:type rdf:resource="http://www.w3.org/ns/dcat#Dataset"/>
    <dct:type rdf:resource="http://purl.org/dc/dcmitype/Dataset"/>
    <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#anyURI">https://doi.org/10.5281/zenodo.580587</dct:identifier>
    <foaf:page rdf:resource="https://doi.org/10.5281/zenodo.580587"/>
    <dct:creator>
      <rdf:Description>
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>Robert Jäschke</foaf:name>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>University of Sheffield</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:title>URLs from tweets for a 2014 sample of Twitter users and for a set of computer scientists</dct:title>
    <dct:publisher>
      <foaf:Agent>
        <foaf:name>Zenodo</foaf:name>
      </foaf:Agent>
    </dct:publisher>
    <dct:issued rdf:datatype="http://www.w3.org/2001/XMLSchema#gYear">2017</dct:issued>
    <dcat:keyword>Twitter</dcat:keyword>
    <dcat:keyword>tweets</dcat:keyword>
    <dct:issued rdf:datatype="http://www.w3.org/2001/XMLSchema#date">2017-05-17</dct:issued>
    <owl:sameAs rdf:resource="https://zenodo.org/record/580587"/>
    <adms:identifier>
      <adms:Identifier>
        <skos:notation rdf:datatype="http://www.w3.org/2001/XMLSchema#anyURI">https://zenodo.org/record/580587</skos:notation>
        <adms:schemeAgency>url</adms:schemeAgency>
      </adms:Identifier>
    </adms:identifier>
    <dct:isVersionOf rdf:resource="https://doi.org/10.5281/zenodo.154583"/>
    <dct:relation rdf:resource="https://doi.org/10.5281/zenodo.12942"/>
    <dct:relation rdf:resource="https://doi.org/10.1371/journal.pone.0179630"/>
    <dct:description>&lt;p&gt;The files in this dataset are used to analyse the tweeting behaviour of computer scientists on Twitter. They comprise&lt;/p&gt; &lt;ul&gt; &lt;li&gt;a set of 989,529 tweet-URL pairs (&lt;em&gt;tweets_2014_researcher.tsv.bz2&lt;/em&gt;) from 2014 from 6,271 users of the computer scientists sample in https://zenodo.org/record/12942 specified by time, tweet id, user id, and URL,&lt;/li&gt; &lt;li&gt;a set of 300,053,850 tweet ids (&lt;em&gt;tweets_2014_sample.tsv.bz2&lt;/em&gt;) from the 1% Twitter stream sample from 2014,&lt;/li&gt; &lt;li&gt;a set of 605,080 tweet-URL pairs (&lt;em&gt;tweets_2014_sample_6694_users.tsv.bz2&lt;/em&gt;) from the 1% Twitter stream sample from 2014 for 6,694 users specified by time, tweet id, user id, and URL,&lt;/li&gt; &lt;li&gt;a set of the top 10,000 host names (&lt;em&gt;MAG_hosts_10000.tsv&lt;/em&gt;) from the Microsoft Academic Graph data (http://blogs.msdn.com/b/msr_er/archive/2015/06/26/announcing-the-microsoft-academic-graph-let-the-research-begin.aspx), specified by rank, URL count, and host name, and&lt;/li&gt; &lt;li&gt;a set of 340 host names of URL shortening services (&lt;em&gt;url_shortening_services.tsv&lt;/em&gt;).&lt;/li&gt; &lt;/ul&gt; &lt;p&gt;In addition, the following rankings (based on the odds ratio) of domains, hosts, and URLs that appear in both the researcher dataset and the sample are included:&lt;/p&gt; &lt;ul&gt; &lt;li&gt;&lt;em&gt;domains_by_odds_ratio.tsv.bz2&lt;/em&gt; - a ranking of 61,860 domains,&lt;/li&gt; &lt;li&gt;&lt;em&gt;hosts_by_odds_ratio.tsv.bz2&lt;/em&gt; - a ranking of 80,384 hosts,&lt;/li&gt; &lt;li&gt;&lt;em&gt;publisher_domains_by_odds_ratio.tsv.bz2&lt;/em&gt; - a ranking of 924 publisher domains,&lt;/li&gt; &lt;li&gt;&lt;em&gt;publisher_urls_by_odds_ratio.tsv.bz2&lt;/em&gt; - a ranking of 4,227 publisher URLs.&lt;/li&gt; &lt;/ul&gt;</dct:description>
    <dct:description>This is an updated and extended version of 10.5281/zenodo.154583 where a new sample of users has been used, resulting in an updated file tweets_2014_sample_6694_users.tsv.bz2. In addition, domain, host, and URL rankings have been added.</dct:description>
    <dct:accessRights rdf:resource="http://publications.europa.eu/resource/authority/access-right/PUBLIC"/>
    <dct:accessRights>
      <dct:RightsStatement rdf:about="info:eu-repo/semantics/openAccess">
        <rdfs:label>Open Access</rdfs:label>
      </dct:RightsStatement>
    </dct:accessRights>
    <dcat:distribution>
      <dcat:Distribution>
        <dct:license rdf:resource="https://creativecommons.org/licenses/by-sa/4.0/legalcode"/>
        <dcat:accessURL rdf:resource="https://doi.org/10.5281/zenodo.580587"/>
      </dcat:Distribution>
    </dcat:distribution>
    <dcat:distribution>
      <dcat:Distribution>
        <dcat:accessURL>https://doi.org/10.5281/zenodo.580587</dcat:accessURL>
        <dcat:byteSize>444954</dcat:byteSize>
        <dcat:downloadURL>https://zenodo.org/record/580587/files/domains_by_odds_ratio.tsv.bz2</dcat:downloadURL>
        <dcat:mediaType>text/tab-separated-values</dcat:mediaType>
      </dcat:Distribution>
    </dcat:distribution>
    <dcat:distribution>
      <dcat:Distribution>
        <dcat:accessURL>https://doi.org/10.5281/zenodo.580587</dcat:accessURL>
        <dcat:byteSize>619682</dcat:byteSize>
        <dcat:downloadURL>https://zenodo.org/record/580587/files/hosts_by_odds_ratio.tsv.bz2</dcat:downloadURL>
        <dcat:mediaType>text/tab-separated-values</dcat:mediaType>
      </dcat:Distribution>
    </dcat:distribution>
    <dcat:distribution>
      <dcat:Distribution>
        <dcat:accessURL>https://doi.org/10.5281/zenodo.580587</dcat:accessURL>
        <dcat:byteSize>298167</dcat:byteSize>
        <dcat:downloadURL>https://zenodo.org/record/580587/files/MAG_hosts_10000.tsv</dcat:downloadURL>
        <dcat:mediaType>text/tab-separated-values</dcat:mediaType>
      </dcat:Distribution>
    </dcat:distribution>
    <dcat:distribution>
      <dcat:Distribution>
        <dcat:accessURL>https://doi.org/10.5281/zenodo.580587</dcat:accessURL>
        <dcat:byteSize>8120</dcat:byteSize>
        <dcat:downloadURL>https://zenodo.org/record/580587/files/publisher_domains_by_odds_ratio.tsv.bz2</dcat:downloadURL>
        <dcat:mediaType>text/tab-separated-values</dcat:mediaType>
      </dcat:Distribution>
    </dcat:distribution>
    <dcat:distribution>
      <dcat:Distribution>
        <dcat:accessURL>https://doi.org/10.5281/zenodo.580587</dcat:accessURL>
        <dcat:byteSize>84262</dcat:byteSize>
        <dcat:downloadURL>https://zenodo.org/record/580587/files/publisher_urls_by_odds_ratio.tsv.bz2</dcat:downloadURL>
        <dcat:mediaType>text/tab-separated-values</dcat:mediaType>
      </dcat:Distribution>
    </dcat:distribution>
    <dcat:distribution>
      <dcat:Distribution>
        <dcat:accessURL>https://doi.org/10.5281/zenodo.580587</dcat:accessURL>
        <dcat:byteSize>31993560</dcat:byteSize>
        <dcat:downloadURL>https://zenodo.org/record/580587/files/tweets_2014_researcher.tsv.bz2</dcat:downloadURL>
        <dcat:mediaType>text/tab-separated-values</dcat:mediaType>
      </dcat:Distribution>
    </dcat:distribution>
    <dcat:distribution>
      <dcat:Distribution>
        <dcat:accessURL>https://doi.org/10.5281/zenodo.580587</dcat:accessURL>
        <dcat:byteSize>12227572</dcat:byteSize>
        <dcat:downloadURL>https://zenodo.org/record/580587/files/tweets_2014_sample_6694_users.tsv.bz2</dcat:downloadURL>
        <dcat:mediaType>text/tab-separated-values</dcat:mediaType>
      </dcat:Distribution>
    </dcat:distribution>
    <dcat:distribution>
      <dcat:Distribution>
        <dcat:accessURL>https://doi.org/10.5281/zenodo.580587</dcat:accessURL>
        <dcat:byteSize>2295230252</dcat:byteSize>
        <dcat:downloadURL>https://zenodo.org/record/580587/files/tweets_2014_sample.tsv.bz2</dcat:downloadURL>
        <dcat:mediaType>text/tab-separated-values</dcat:mediaType>
      </dcat:Distribution>
    </dcat:distribution>
    <dcat:distribution>
      <dcat:Distribution>
        <dcat:accessURL>https://doi.org/10.5281/zenodo.580587</dcat:accessURL>
        <dcat:byteSize>2967</dcat:byteSize>
        <dcat:downloadURL>https://zenodo.org/record/580587/files/url_shortening_services.tsv</dcat:downloadURL>
        <dcat:mediaType>text/tab-separated-values</dcat:mediaType>
      </dcat:Distribution>
    </dcat:distribution>
  </rdf:Description>
</rdf:RDF>
750
95
views
downloads
All versions This version
Views 750752
Downloads 9595
Data volume 39.8 GB39.8 GB
Unique views 738740
Unique downloads 6767

Share

Cite as