Dataset Open Access

All Your Script Are Belong to Us: Collecting and Analyzing JavaScript Code from 10K Sites for 9 Months

Dimitris Mitropoulos; Panos Louridas; Vitalis Salis; Diomidis Spinellis


JSON Export

{
  "files": [
    {
      "links": {
        "self": "https://zenodo.org/api/files/7274a32b-0fee-46af-878a-f25d9f59a2f3/defects-jshint.json"
      }, 
      "checksum": "md5:a342f46c1cd00d322d00db54995f096b", 
      "bucket": "7274a32b-0fee-46af-878a-f25d9f59a2f3", 
      "key": "defects-jshint.json", 
      "type": "json", 
      "size": 4989467
    }, 
    {
      "links": {
        "self": "https://zenodo.org/api/files/7274a32b-0fee-46af-878a-f25d9f59a2f3/defects-retire.json"
      }, 
      "checksum": "md5:fb9aa18838570dcf633f81f5b4c54b37", 
      "bucket": "7274a32b-0fee-46af-878a-f25d9f59a2f3", 
      "key": "defects-retire.json", 
      "type": "json", 
      "size": 358634433
    }, 
    {
      "links": {
        "self": "https://zenodo.org/api/files/7274a32b-0fee-46af-878a-f25d9f59a2f3/hashes.tar.gz"
      }, 
      "checksum": "md5:0af19c80475a5397dbfe585f24d113c5", 
      "bucket": "7274a32b-0fee-46af-878a-f25d9f59a2f3", 
      "key": "hashes.tar.gz", 
      "type": "gz", 
      "size": 536530756
    }, 
    {
      "links": {
        "self": "https://zenodo.org/api/files/7274a32b-0fee-46af-878a-f25d9f59a2f3/js_evolution_data.tar.gz"
      }, 
      "checksum": "md5:6e17d9020e0b4f0f41fd5007f9d3d2e3", 
      "bucket": "7274a32b-0fee-46af-878a-f25d9f59a2f3", 
      "key": "js_evolution_data.tar.gz", 
      "type": "gz", 
      "size": 60427767991
    }
  ], 
  "owners": [
    62566
  ], 
  "doi": "10.5281/zenodo.2593266", 
  "stats": {
    "version_unique_downloads": 6.0, 
    "unique_views": 73.0, 
    "views": 82.0, 
    "downloads": 10.0, 
    "unique_downloads": 6.0, 
    "version_unique_views": 73.0, 
    "volume": 363471752069.0, 
    "version_downloads": 10.0, 
    "version_views": 82.0, 
    "version_volume": 363471752069.0
  }, 
  "links": {
    "doi": "https://doi.org/10.5281/zenodo.2593266", 
    "conceptdoi": "https://doi.org/10.5281/zenodo.2593265", 
    "bucket": "https://zenodo.org/api/files/7274a32b-0fee-46af-878a-f25d9f59a2f3", 
    "conceptbadge": "https://zenodo.org/badge/doi/10.5281/zenodo.2593265.svg", 
    "html": "https://zenodo.org/record/2593266", 
    "latest_html": "https://zenodo.org/record/2593266", 
    "badge": "https://zenodo.org/badge/doi/10.5281/zenodo.2593266.svg", 
    "latest": "https://zenodo.org/api/records/2593266"
  }, 
  "conceptdoi": "10.5281/zenodo.2593265", 
  "created": "2019-03-15T10:35:20.980587+00:00", 
  "updated": "2019-11-03T17:30:32.286585+00:00", 
  "conceptrecid": "2593265", 
  "revision": 5, 
  "id": 2593266, 
  "metadata": {
    "access_right_category": "success", 
    "doi": "10.5281/zenodo.2593266", 
    "description": "<p>We present a massive dataset (~2 TB) of client-side JavaScript code. Specifically, we have collected and stored on adaily basis JavaScript code from Alexa&#39;s Top 10000 web sites (~7.5 GB per day) for nine consecutive months. Our collection involved both inline scripts extracted from each web site&#39;s main page and external scripts linked from it. In order to aid researchers identify similar scripts and examine their popularity and evolution, we have produced hashes that represent the scripts&#39; logical structure. Furthermore, we have analyzed the resulting dataset with well-established static analysis tools, generating additional metadata including reports with quality bugs and vulnerable libraries.</p>", 
    "license": {
      "id": "CC-BY-4.0"
    }, 
    "title": "All Your Script Are Belong to Us: Collecting and Analyzing JavaScript Code from 10K Sites for 9 Months", 
    "relations": {
      "version": [
        {
          "count": 1, 
          "index": 0, 
          "parent": {
            "pid_type": "recid", 
            "pid_value": "2593265"
          }, 
          "is_last": true, 
          "last_child": {
            "pid_type": "recid", 
            "pid_value": "2593266"
          }
        }
      ]
    }, 
    "publication_date": "2019-03-14", 
    "creators": [
      {
        "orcid": "0000-0002-5061-9018", 
        "affiliation": "Athens University of Economics and Business", 
        "name": "Dimitris Mitropoulos"
      }, 
      {
        "orcid": "0000-0002-3971-4612", 
        "affiliation": "Athens University of Economics and Business", 
        "name": "Panos Louridas"
      }, 
      {
        "affiliation": "Greek Research and Technology Network", 
        "name": "Vitalis Salis"
      }, 
      {
        "orcid": "0000-0003-4231-1897", 
        "affiliation": "Athens University of Economics and Business", 
        "name": "Diomidis Spinellis"
      }
    ], 
    "meeting": {
      "acronym": "MSR '19", 
      "url": "https://2019.msrconf.org/home", 
      "dates": "26 -27 May", 
      "place": "Montreal, QC, Canada", 
      "title": "16th International Conference on Mining Software Repositories"
    }, 
    "access_right": "open", 
    "resource_type": {
      "type": "dataset", 
      "title": "Dataset"
    }, 
    "related_identifiers": [
      {
        "scheme": "doi", 
        "identifier": "10.5281/zenodo.2593265", 
        "relation": "isVersionOf"
      }
    ]
  }
}
82
10
views
downloads
All versions This version
Views 8282
Downloads 1010
Data volume 363.5 GB363.5 GB
Unique views 7373
Unique downloads 66

Share

Cite as