{ "access": { "embargo": { "active": false, "reason": null }, "files": "public", "record": "public", "status": "open" }, "created": "2018-04-11T14:42:25.726865+00:00", "custom_fields": {}, "deletion_status": { "is_deleted": false, "status": "P" }, "files": { "count": 25, "enabled": true, "entries": { "README.txt": { "checksum": "md5:3a6fa77c0d5c6a63a71236a24a6eede8", "ext": "txt", "id": "81682fbd-b3dd-44b4-b730-14d71d0d100c", "key": "README.txt", "metadata": null, "mimetype": "text/plain", "size": 3043 }, "page-quality-interface.png": { "checksum": "md5:8ad67239dde6f7d20369d428ba284e06", "ext": "png", "id": "f7b4e1b6-af9f-4a83-9828-a5217653107c", "key": "page-quality-interface.png", "metadata": null, "mimetype": "image/png", "size": 271407 }, "page-quality-scores.txt": { "checksum": "md5:f58fa9ad1ee5fd4c8539b01b2b5295fc", "ext": "txt", "id": "08345023-3ccb-42b6-8b4c-bf31da1bc83c", "key": "page-quality-scores.txt", "metadata": null, "mimetype": "text/plain", "size": 180161 }, "sites-and-pages.txt": { "checksum": "md5:a5cfd4b545772ec3bf46f845fc7696b1", "ext": "txt", "id": "5283099e-66e0-4a7f-89b1-d6992ca4f6f7", "key": "sites-and-pages.txt", "metadata": null, "mimetype": "text/plain", "size": 1042424 }, "webis-web-archive-17-archives-part0.zip": { "checksum": "md5:2ec3e59b0c8f19a07318397fca1e9a1f", "ext": "zip", "id": "fc4b7fd3-4912-4117-b98b-9dfbc704ce46", "key": "webis-web-archive-17-archives-part0.zip", "metadata": null, "mimetype": "application/zip", "size": 3070458110 }, "webis-web-archive-17-archives-part1.zip": { "checksum": "md5:208d356268f62174ccb969632ddfb404", "ext": "zip", "id": "1e5a9a9a-f816-4aaf-a899-c73ae4647c0e", "key": "webis-web-archive-17-archives-part1.zip", "metadata": null, "mimetype": "application/zip", "size": 4516632497 }, "webis-web-archive-17-archives-part2.zip": { "checksum": "md5:470913f265a5423e53d5707a32c65086", "ext": "zip", "id": "289b6050-c833-404a-980b-93a74c38c8b5", "key": "webis-web-archive-17-archives-part2.zip", "metadata": null, "mimetype": "application/zip", "size": 2996706003 }, "webis-web-archive-17-archives-part3.zip": { "checksum": "md5:29898c5b1a9fea42aa45e77cbab23ad9", "ext": "zip", "id": "99d2c820-8d1d-4d94-a749-6e11b80d73ed", "key": "webis-web-archive-17-archives-part3.zip", "metadata": null, "mimetype": "application/zip", "size": 2032650542 }, "webis-web-archive-17-archives-part4.zip": { "checksum": "md5:6c76d9791d86cc2810d2c9a1676df7ba", "ext": "zip", "id": "b1b62eb7-7b90-4603-8415-7d97b2fe98ba", "key": "webis-web-archive-17-archives-part4.zip", "metadata": null, "mimetype": "application/zip", "size": 3899862639 }, "webis-web-archive-17-archives-part5.zip": { "checksum": "md5:c1e0e9548e615a655d8e23b3c51b771c", "ext": "zip", "id": "8400fa68-b264-4b1d-a49a-92c249f086d4", "key": "webis-web-archive-17-archives-part5.zip", "metadata": null, "mimetype": "application/zip", "size": 5313174827 }, "webis-web-archive-17-archives-part6.zip": { "checksum": "md5:fbca883e22ed8cb00d6be8e24477a47a", "ext": "zip", "id": "8ee096b2-6e6c-4250-9e92-7dca29331426", "key": "webis-web-archive-17-archives-part6.zip", "metadata": null, "mimetype": "application/zip", "size": 5398352852 }, "webis-web-archive-17-archives-part7.zip": { "checksum": "md5:3931e329b8f9e1e8bd4f3527a4551730", "ext": "zip", "id": "67e26d69-67ce-432c-91ef-477353a2b2bd", "key": "webis-web-archive-17-archives-part7.zip", "metadata": null, "mimetype": "application/zip", "size": 4220696958 }, "webis-web-archive-17-archives-part8.zip": { "checksum": "md5:edc24c195a50e43241ad269fc0f9eec2", "ext": "zip", "id": "437b252a-52ea-4834-b93d-9f0050db5d43", "key": "webis-web-archive-17-archives-part8.zip", "metadata": null, "mimetype": "application/zip", "size": 7444114251 }, "webis-web-archive-17-archives-part9.zip": { "checksum": "md5:1cec18ac83f6ebd38e3cb4bb6061481e", "ext": "zip", "id": "e897fbb5-7618-4ac6-b748-85d629eaec47", "key": "webis-web-archive-17-archives-part9.zip", "metadata": null, "mimetype": "application/zip", "size": 4862162835 }, "webis-web-archive-17-dom-snapshots.zip": { "checksum": "md5:de36dc487398e153551b83647a801562", "ext": "zip", "id": "d324578d-355d-4d5d-ba45-4a873166ae88", "key": "webis-web-archive-17-dom-snapshots.zip", "metadata": null, "mimetype": "application/zip", "size": 1265691826 }, "webis-web-archive-17-screenshots-part0.zip": { "checksum": "md5:21c9a6db2d739c52263cd2f565cfb6e5", "ext": "zip", "id": "d42fc633-d716-4a4d-89f4-133167215e81", "key": "webis-web-archive-17-screenshots-part0.zip", "metadata": null, "mimetype": "application/zip", "size": 5724356432 }, "webis-web-archive-17-screenshots-part1.zip": { "checksum": "md5:523707ee732e3502b670b37a38908faf", "ext": "zip", "id": "e31cf544-c380-49e5-8ff2-697f1d01e4d0", "key": "webis-web-archive-17-screenshots-part1.zip", "metadata": null, "mimetype": "application/zip", "size": 6231683755 }, "webis-web-archive-17-screenshots-part2.zip": { "checksum": "md5:41076c729087e061aa2f125db643317f", "ext": "zip", "id": "298f4f48-08c9-40eb-93fc-b2a5fbd0a759", "key": "webis-web-archive-17-screenshots-part2.zip", "metadata": null, "mimetype": "application/zip", "size": 5249581760 }, "webis-web-archive-17-screenshots-part3.zip": { "checksum": "md5:65fb839103987758a67693047524ee9c", "ext": "zip", "id": "2462289d-b6e5-4f40-8d03-064509da5d94", "key": "webis-web-archive-17-screenshots-part3.zip", "metadata": null, "mimetype": "application/zip", "size": 4580065196 }, "webis-web-archive-17-screenshots-part4.zip": { "checksum": "md5:262a90e4cd847a48b980376b03aad0f8", "ext": "zip", "id": "014229f7-2471-49f2-abbf-55ae711fc44a", "key": "webis-web-archive-17-screenshots-part4.zip", "metadata": null, "mimetype": "application/zip", "size": 5064595999 }, "webis-web-archive-17-screenshots-part5.zip": { "checksum": "md5:15eb19b14ecd6ed8d16d544a749eb1ff", "ext": "zip", "id": "dfcd8781-7c8a-4f80-a761-631efb940db4", "key": "webis-web-archive-17-screenshots-part5.zip", "metadata": null, "mimetype": "application/zip", "size": 4286165720 }, "webis-web-archive-17-screenshots-part6.zip": { "checksum": "md5:f4b185bf714afd6de6f2c56c9bb59f47", "ext": "zip", "id": "639ff3f7-ec5e-4028-a164-4e549e51b276", "key": "webis-web-archive-17-screenshots-part6.zip", "metadata": null, "mimetype": "application/zip", "size": 5323192784 }, "webis-web-archive-17-screenshots-part7.zip": { "checksum": "md5:0838751d6765630030a3208ebb7ddef7", "ext": "zip", "id": "c43f739b-d37a-47ca-b891-fa4686c8b05d", "key": "webis-web-archive-17-screenshots-part7.zip", "metadata": null, "mimetype": "application/zip", "size": 5756463424 }, "webis-web-archive-17-screenshots-part8.zip": { "checksum": "md5:6d31ccce3043b80a5c2d14bd76e02a2d", "ext": "zip", "id": "f465481e-190b-437d-8a0c-5dd095be2253", "key": "webis-web-archive-17-screenshots-part8.zip", "metadata": null, "mimetype": "application/zip", "size": 6037756761 }, "webis-web-archive-17-screenshots-part9.zip": { "checksum": "md5:c87e29c8731f9d19760183236232d019", "ext": "zip", "id": "f4f08ea1-cf51-4510-956e-bccebe4f1545", "key": "webis-web-archive-17-screenshots-part9.zip", "metadata": null, "mimetype": "application/zip", "size": 6187749812 } }, "order": [], "total_bytes": 99463612018 }, "id": "1002204", "is_draft": false, "is_published": true, "links": { "access": "https://zenodo.org/api/records/1002204/access", "access_links": "https://zenodo.org/api/records/1002204/access/links", "access_request": "https://zenodo.org/api/records/1002204/access/request", "access_users": "https://zenodo.org/api/records/1002204/access/users", "archive": "https://zenodo.org/api/records/1002204/files-archive", "archive_media": "https://zenodo.org/api/records/1002204/media-files-archive", "communities": "https://zenodo.org/api/records/1002204/communities", "communities-suggestions": "https://zenodo.org/api/records/1002204/communities-suggestions", "doi": "https://doi.org/10.5281/zenodo.1002204", "draft": "https://zenodo.org/api/records/1002204/draft", "files": "https://zenodo.org/api/records/1002204/files", "latest": "https://zenodo.org/api/records/1002204/versions/latest", "latest_html": "https://zenodo.org/records/1002204/latest", "media_files": "https://zenodo.org/api/records/1002204/media-files", "parent": "https://zenodo.org/api/records/1002203", "parent_doi": "https://zenodo.org/doi/10.5281/zenodo.1002203", "parent_html": "https://zenodo.org/records/1002203", "requests": "https://zenodo.org/api/records/1002204/requests", "reserve_doi": "https://zenodo.org/api/records/1002204/draft/pids/doi", "self": "https://zenodo.org/api/records/1002204", "self_doi": "https://zenodo.org/doi/10.5281/zenodo.1002204", "self_html": "https://zenodo.org/records/1002204", "self_iiif_manifest": "https://zenodo.org/api/iiif/record:1002204/manifest", "self_iiif_sequence": "https://zenodo.org/api/iiif/record:1002204/sequence/default", "versions": "https://zenodo.org/api/records/1002204/versions" }, "media_files": { "count": 0, "enabled": false, "entries": {}, "order": [], "total_bytes": 0 }, "metadata": { "creators": [ { "affiliations": [ { "name": "Bauhaus-Universit\u00e4t Weimar" } ], "person_or_org": { "family_name": "Kiesel", "given_name": "Johannes", "identifiers": [ { "identifier": "0000-0002-1617-6508", "scheme": "orcid" } ], "name": "Kiesel, Johannes", "type": "personal" } }, { "affiliations": [ { "name": "Leipzig University" } ], "person_or_org": { "family_name": "Potthast", "given_name": "Martin", "identifiers": [ { "identifier": "0000-0003-2451-0665", "scheme": "orcid" } ], "name": "Potthast, Martin", "type": "personal" } }, { "affiliations": [ { "name": "Halle University" } ], "person_or_org": { "family_name": "Hagen", "given_name": "Matthias", "identifiers": [ { "identifier": "0000-0002-9733-2890", "scheme": "orcid" } ], "name": "Hagen, Matthias", "type": "personal" } }, { "affiliations": [ { "name": "Ulm University" } ], "person_or_org": { "family_name": "Kneist", "given_name": "Florian", "name": "Kneist, Florian", "type": "personal" } }, { "affiliations": [ { "name": "Bauhaus-Universit\u00e4t Weimar" } ], "person_or_org": { "family_name": "Stein", "given_name": "Benno", "identifiers": [ { "identifier": "0000-0001-9033-2217", "scheme": "orcid" } ], "name": "Stein, Benno", "type": "personal" } } ], "description": "
The Webis-Web-Archive-17 comprises a total of 10,000 web page archives from mid-2017 that were carefully sampled from the Common Crawl to involve a mixture of high-ranking and low-ranking web pages. The dataset contains the web archive files, HTML DOM, and screenshots of each web page, as well as per-page annotations of visual web archive quality. See this overview for all datasets that built upon this one. If you use this dataset in your research, please cite it using this paper.
", "publication_date": "2017-10-04", "publisher": "Zenodo", "related_identifiers": [ { "identifier": "https://github.com/webis-de/webis-web-archiver", "relation_type": { "id": "issupplementedby", "title": { "de": "Wird erg\u00e4nzt durch", "en": "Is supplemented by" } }, "resource_type": { "id": "software", "title": { "de": "Software", "en": "Software" } }, "scheme": "url" }, { "identifier": "https://zenodo.org/record/2549837", "relation_type": { "id": "issupplementedby", "title": { "de": "Wird erg\u00e4nzt durch", "en": "Is supplemented by" } }, "resource_type": { "id": "dataset", "title": { "de": "Datensatz", "en": "Dataset" } }, "scheme": "url" }, { "identifier": "https://zenodo.org/record/3354902", "relation_type": { "id": "issupplementedby", "title": { "de": "Wird erg\u00e4nzt durch", "en": "Is supplemented by" } }, "resource_type": { "id": "dataset", "title": { "de": "Datensatz", "en": "Dataset" } }, "scheme": "url" }, { "identifier": "https://webis.de/publications.html?q=10.1145%2F3239574", "relation_type": { "id": "isdocumentedby", "title": { "de": "Wird dokumentiert von", "en": "Is documented by" } }, "resource_type": { "id": "publication-article", "title": { "de": "Zeitschriftenartikel", "en": "Journal article" } }, "scheme": "url" } ], "resource_type": { "id": "dataset", "title": { "de": "Datensatz", "en": "Dataset" } }, "rights": [ { "description": { "en": "Permits almost any use subject to providing credit and license notice. Frequently used for media assets and educational materials. The most common license for Open Access scientific publications. Not recommended for software." }, "icon": "cc-by-sa-icon", "id": "cc-by-sa-4.0", "props": { "scheme": "spdx", "url": "https://creativecommons.org/licenses/by-sa/4.0/legalcode" }, "title": { "en": "Creative Commons Attribution Share Alike 4.0 International" } } ], "subjects": [ { "subject": "web" }, { "subject": "web page" }, { "subject": "web archive" }, { "subject": "web archive quality" } ], "title": "Webis-Web-Archive-17", "version": "1.0.0" }, "parent": { "access": { "owned_by": { "user": 65747 } }, "communities": { "default": "32031607-a92a-4ccf-9ca7-6cc3a8a3426f", "entries": [ { "access": { "member_policy": "open", "members_visibility": "public", "record_policy": "open", "review_policy": "open", "visibility": "public" }, "children": { "allow": false }, "created": "2018-06-09T20:15:32.768721+00:00", "custom_fields": {}, "deletion_status": { "is_deleted": false, "status": "P" }, "id": "32031607-a92a-4ccf-9ca7-6cc3a8a3426f", "links": {}, "metadata": { "description": "The Webis Group addresses challenges of the information society by conducting basic research, developing technology, and implementing and evaluating prototypes for future information systems.", "page": "Data curated by the Webis Group (webis.de)
", "title": "Webis", "type": { "id": "organization" }, "website": "https://webis.de" }, "revision_id": 1, "slug": "webis", "updated": "2023-10-16T06:20:44.424792+00:00" } ], "ids": [ "32031607-a92a-4ccf-9ca7-6cc3a8a3426f" ] }, "id": "1002203", "pids": { "doi": { "client": "datacite", "identifier": "10.5281/zenodo.1002203", "provider": "datacite" } } }, "pids": { "doi": { "client": "datacite", "identifier": "10.5281/zenodo.1002204", "provider": "datacite" }, "oai": { "identifier": "oai:zenodo.org:1002204", "provider": "oai" } }, "revision_id": 22, "stats": { "all_versions": { "data_volume": 29989829419986.0, "downloads": 698836, "unique_downloads": 342153, "unique_views": 1887, "views": 2057 }, "this_version": { "data_volume": 3150715463296.0, "downloads": 687198, "unique_downloads": 338308, "unique_views": 1093, "views": 1186 } }, "status": "published", "updated": "2021-01-21T11:50:36.448720+00:00", "versions": { "index": 1, "is_latest": false } }