Dataset Open Access

URLs from tweets for a 2014 sample of Twitter users and for a set of computer scientists

Robert Jäschke


JSON Export

{
  "conceptrecid": "785910", 
  "created": "2017-05-17T14:04:47.973057+00:00", 
  "doi": "10.5281/zenodo.580587", 
  "files": [
    {
      "bucket": "1fdd3abb-448e-4b42-a9c8-627e58c7823c", 
      "checksum": "md5:299e3ec2469d3a91582e592a2fc0aa1e", 
      "key": "domains_by_odds_ratio.tsv.bz2", 
      "links": {
        "self": "https://zenodo.org/api/files/1fdd3abb-448e-4b42-a9c8-627e58c7823c/domains_by_odds_ratio.tsv.bz2"
      }, 
      "size": 444954, 
      "type": "bz2"
    }, 
    {
      "bucket": "1fdd3abb-448e-4b42-a9c8-627e58c7823c", 
      "checksum": "md5:bd959f2b67bc50e746a4740d8969f18c", 
      "key": "hosts_by_odds_ratio.tsv.bz2", 
      "links": {
        "self": "https://zenodo.org/api/files/1fdd3abb-448e-4b42-a9c8-627e58c7823c/hosts_by_odds_ratio.tsv.bz2"
      }, 
      "size": 619682, 
      "type": "bz2"
    }, 
    {
      "bucket": "1fdd3abb-448e-4b42-a9c8-627e58c7823c", 
      "checksum": "md5:bf92fe9d92a45949d44037a81356b82b", 
      "key": "MAG_hosts_10000.tsv", 
      "links": {
        "self": "https://zenodo.org/api/files/1fdd3abb-448e-4b42-a9c8-627e58c7823c/MAG_hosts_10000.tsv"
      }, 
      "size": 298167, 
      "type": "tsv"
    }, 
    {
      "bucket": "1fdd3abb-448e-4b42-a9c8-627e58c7823c", 
      "checksum": "md5:10e489478e9076e76d158c18e95f51bc", 
      "key": "publisher_domains_by_odds_ratio.tsv.bz2", 
      "links": {
        "self": "https://zenodo.org/api/files/1fdd3abb-448e-4b42-a9c8-627e58c7823c/publisher_domains_by_odds_ratio.tsv.bz2"
      }, 
      "size": 8120, 
      "type": "bz2"
    }, 
    {
      "bucket": "1fdd3abb-448e-4b42-a9c8-627e58c7823c", 
      "checksum": "md5:e5f563f85a2ea56fac3b20109e1c2402", 
      "key": "publisher_urls_by_odds_ratio.tsv.bz2", 
      "links": {
        "self": "https://zenodo.org/api/files/1fdd3abb-448e-4b42-a9c8-627e58c7823c/publisher_urls_by_odds_ratio.tsv.bz2"
      }, 
      "size": 84262, 
      "type": "bz2"
    }, 
    {
      "bucket": "1fdd3abb-448e-4b42-a9c8-627e58c7823c", 
      "checksum": "md5:6c466537064b5a5574734f418893b199", 
      "key": "tweets_2014_researcher.tsv.bz2", 
      "links": {
        "self": "https://zenodo.org/api/files/1fdd3abb-448e-4b42-a9c8-627e58c7823c/tweets_2014_researcher.tsv.bz2"
      }, 
      "size": 31993560, 
      "type": "bz2"
    }, 
    {
      "bucket": "1fdd3abb-448e-4b42-a9c8-627e58c7823c", 
      "checksum": "md5:2dff10a6301cb97c53a653a65019199c", 
      "key": "tweets_2014_sample_6694_users.tsv.bz2", 
      "links": {
        "self": "https://zenodo.org/api/files/1fdd3abb-448e-4b42-a9c8-627e58c7823c/tweets_2014_sample_6694_users.tsv.bz2"
      }, 
      "size": 12227572, 
      "type": "bz2"
    }, 
    {
      "bucket": "1fdd3abb-448e-4b42-a9c8-627e58c7823c", 
      "checksum": "md5:d0ea5705cb86480a0f22a1c7439533b4", 
      "key": "tweets_2014_sample.tsv.bz2", 
      "links": {
        "self": "https://zenodo.org/api/files/1fdd3abb-448e-4b42-a9c8-627e58c7823c/tweets_2014_sample.tsv.bz2"
      }, 
      "size": 2295230252, 
      "type": "bz2"
    }, 
    {
      "bucket": "1fdd3abb-448e-4b42-a9c8-627e58c7823c", 
      "checksum": "md5:1f040245142c7309b9c46f897f79f7ce", 
      "key": "url_shortening_services.tsv", 
      "links": {
        "self": "https://zenodo.org/api/files/1fdd3abb-448e-4b42-a9c8-627e58c7823c/url_shortening_services.tsv"
      }, 
      "size": 2967, 
      "type": "tsv"
    }
  ], 
  "id": 580587, 
  "links": {
    "badge": "https://zenodo.org/badge/doi/10.5281/zenodo.580587.svg", 
    "bucket": "https://zenodo.org/api/files/1fdd3abb-448e-4b42-a9c8-627e58c7823c", 
    "doi": "https://doi.org/10.5281/zenodo.580587", 
    "html": "https://zenodo.org/record/580587", 
    "latest": "https://zenodo.org/api/records/580587", 
    "latest_html": "https://zenodo.org/record/580587"
  }, 
  "metadata": {
    "access_right": "open", 
    "access_right_category": "success", 
    "contributors": [
      {
        "affiliation": "L3S Research Center", 
        "name": "Asmelash, Teka Hadgu", 
        "type": "DataCollector"
      }
    ], 
    "creators": [
      {
        "affiliation": "University of Sheffield", 
        "name": "Robert J\u00e4schke"
      }
    ], 
    "description": "<p>The files in this dataset are used to analyse the tweeting behaviour of computer scientists on Twitter. They comprise</p>\n\n<ul>\n\t<li>a set of 989,529 tweet-URL pairs (<em>tweets_2014_researcher.tsv.bz2</em>) from 2014 from 6,271 users of the computer scientists sample in https://zenodo.org/record/12942 specified by time, tweet id, user id, and URL,</li>\n\t<li>a set of 300,053,850 tweet ids (<em>tweets_2014_sample.tsv.bz2</em>) from the 1% Twitter stream sample from 2014,</li>\n\t<li>a set of 605,080 tweet-URL pairs (<em>tweets_2014_sample_6694_users.tsv.bz2</em>) from the 1% Twitter stream sample from 2014 for 6,694 users specified by time, tweet id, user id, and URL,</li>\n\t<li>a set of the top 10,000 host names (<em>MAG_hosts_10000.tsv</em>) from the Microsoft Academic Graph data (http://blogs.msdn.com/b/msr_er/archive/2015/06/26/announcing-the-microsoft-academic-graph-let-the-research-begin.aspx), specified by rank, URL count, and host name, and</li>\n\t<li>a set of 340 host names of URL shortening services (<em>url_shortening_services.tsv</em>).</li>\n</ul>\n\n<p>In addition, the following rankings (based on the odds ratio) of domains, hosts, and URLs that appear in both the researcher dataset and the sample are included:</p>\n\n<ul>\n\t<li><em>domains_by_odds_ratio.tsv.bz2</em> - a ranking of 61,860 domains,</li>\n\t<li><em>hosts_by_odds_ratio.tsv.bz2</em> - a ranking of 80,384 hosts,</li>\n\t<li><em>publisher_domains_by_odds_ratio.tsv.bz2</em> - a ranking of 924 publisher domains,</li>\n\t<li><em>publisher_urls_by_odds_ratio.tsv.bz2</em> - a ranking of 4,227 publisher URLs.</li>\n</ul>", 
    "doi": "10.5281/zenodo.580587", 
    "journal": {
      "issue": "6", 
      "title": "PLoS ONE", 
      "volume": "12"
    }, 
    "keywords": [
      "Twitter", 
      "tweets"
    ], 
    "license": {
      "id": "CC-BY-SA-4.0"
    }, 
    "notes": "This is an updated and extended version of 10.5281/zenodo.154583 where a new sample of users has been used, resulting in an updated file tweets_2014_sample_6694_users.tsv.bz2. In addition, domain, host, and URL rankings have been added.", 
    "publication_date": "2017-05-17", 
    "related_identifiers": [
      {
        "identifier": "10.5281/zenodo.154583", 
        "relation": "isNewVersionOf", 
        "scheme": "doi"
      }, 
      {
        "identifier": "10.5281/zenodo.12942", 
        "relation": "isSupplementTo", 
        "scheme": "doi"
      }, 
      {
        "identifier": "10.1371/journal.pone.0179630", 
        "relation": "isSupplementTo", 
        "scheme": "doi"
      }
    ], 
    "relations": {
      "version": [
        {
          "count": 1, 
          "index": 0, 
          "is_last": true, 
          "last_child": {
            "pid_type": "recid", 
            "pid_value": "580587"
          }, 
          "parent": {
            "pid_type": "recid", 
            "pid_value": "785910"
          }
        }
      ]
    }, 
    "resource_type": {
      "title": "Dataset", 
      "type": "dataset"
    }, 
    "title": "URLs from tweets for a 2014 sample of Twitter users and for a set of computer scientists"
  }, 
  "owners": [
    7442
  ], 
  "revision": 6, 
  "stats": {
    "downloads": 0.0, 
    "unique_downloads": 0.0, 
    "unique_views": 27.0, 
    "version_downloads": 0.0, 
    "version_unique_downloads": 0.0, 
    "version_unique_views": 27.0, 
    "version_views": 27.0, 
    "version_volume": 0.0, 
    "views": 27.0, 
    "volume": 0.0
  }, 
  "updated": "2017-11-02T08:15:27.976364+00:00"
}
27
0
views
downloads
All versions This version
Views 2727
Downloads 00
Data volume 0 Bytes0 Bytes
Unique views 2727
Unique downloads 00

Share

Cite as