{ "access": { "embargo": { "active": false, "reason": null }, "files": "public", "record": "public", "status": "open" }, "created": "2020-01-15T21:41:46.791837+00:00", "custom_fields": {}, "deletion_status": { "is_deleted": false, "status": "P" }, "files": { "count": 2, "enabled": true, "entries": { "RC_2019-04.zst": { "checksum": "md5:5651d5fc9ab9577a56be33e8f52c2bdf", "ext": "zst", "id": "5d508371-2e5c-48ba-aeb7-2dd4bc78b288", "key": "RC_2019-04.zst", "metadata": null, "mimetype": "application/octet-stream", "size": 15531201485 }, "RS_2019-04.zst": { "checksum": "md5:e24ecb20e08751f0bf3b9189860d7ac9", "ext": "zst", "id": "856efb5d-1879-4b8b-90a7-24d9d77723a7", "key": "RS_2019-04.zst", "metadata": null, "mimetype": "application/octet-stream", "size": 5587265157 } }, "order": [], "total_bytes": 21118466642 }, "id": "3608135", "is_draft": false, "is_published": true, "links": { "access": "https://zenodo.org/api/records/3608135/access", "access_links": "https://zenodo.org/api/records/3608135/access/links", "access_request": "https://zenodo.org/api/records/3608135/access/request", "access_users": "https://zenodo.org/api/records/3608135/access/users", "archive": "https://zenodo.org/api/records/3608135/files-archive", "archive_media": "https://zenodo.org/api/records/3608135/media-files-archive", "communities": "https://zenodo.org/api/records/3608135/communities", "communities-suggestions": "https://zenodo.org/api/records/3608135/communities-suggestions", "doi": "https://doi.org/10.5281/zenodo.3608135", "draft": "https://zenodo.org/api/records/3608135/draft", "files": "https://zenodo.org/api/records/3608135/files", "latest": "https://zenodo.org/api/records/3608135/versions/latest", "latest_html": "https://zenodo.org/records/3608135/latest", "media_files": "https://zenodo.org/api/records/3608135/media-files", "parent": "https://zenodo.org/api/records/3608134", "parent_doi": "https://zenodo.org/doi/10.5281/zenodo.3608134", "parent_html": "https://zenodo.org/records/3608134", "requests": "https://zenodo.org/api/records/3608135/requests", "reserve_doi": "https://zenodo.org/api/records/3608135/draft/pids/doi", "self": "https://zenodo.org/api/records/3608135", "self_doi": "https://zenodo.org/doi/10.5281/zenodo.3608135", "self_html": "https://zenodo.org/records/3608135", "self_iiif_manifest": "https://zenodo.org/api/iiif/record:3608135/manifest", "self_iiif_sequence": "https://zenodo.org/api/iiif/record:3608135/sequence/default", "versions": "https://zenodo.org/api/records/3608135/versions" }, "media_files": { "count": 0, "enabled": false, "entries": {}, "order": [], "total_bytes": 0 }, "metadata": { "creators": [ { "affiliations": [ { "name": "Pushshift.io" } ], "person_or_org": { "family_name": "Baumgartner", "given_name": "Jason", "name": "Baumgartner, Jason", "type": "personal" } }, { "affiliations": [ { "name": "Max Planck Institute" } ], "person_or_org": { "family_name": "Zannettou", "given_name": "Savvas", "name": "Zannettou, Savvas", "type": "personal" } }, { "affiliations": [ { "name": "University of Colorado Boulder" } ], "person_or_org": { "family_name": "Keegan", "given_name": "Brian", "name": "Keegan, Brian", "type": "personal" } }, { "affiliations": [ { "name": "Elon University" } ], "person_or_org": { "family_name": "Squire", "given_name": "Megan", "name": "Squire, Megan", "type": "personal" } }, { "affiliations": [ { "name": "Binghamton University" } ], "person_or_org": { "family_name": "Blackburn", "given_name": "Jeremy", "name": "Blackburn, Jeremy", "type": "personal" } } ], "description": "
The Pushshift Reddit Dataset
\n\nWe provide a small sample of the Pushshift Reddit dataset. The sample consists of two files:
\n\nRS_2019-04.zst: All Reddit submissions that were posted during April 2019.
\n\nRC_2019-04.zst: All Reddit comments that were posted during April 2019.
\n\nThe full dataset can be downloaded from: https://files.pushshift.io/reddit/submissions/ for submissions and https://files.pushshift.io/reddit/comments/ for comments. In the website, you can find a file for each month of our data collection. Each file is a newline delimited json (ndjson) file , where each line contains the json object of a submission or a comment.
\n\n\n\n
\n\n
", "publication_date": "2020-01-14", "publisher": "Zenodo", "resource_type": { "id": "publication-conferencepaper", "title": { "de": "Konferenzbeitrag", "en": "Conference paper" } }, "rights": [ { "description": { "en": "The Creative Commons Attribution license allows re-distribution and re-use of a licensed work on the condition that the creator is appropriately credited." }, "icon": "cc-by-icon", "id": "cc-by-4.0", "props": { "scheme": "spdx", "url": "https://creativecommons.org/licenses/by/4.0/legalcode" }, "title": { "en": "Creative Commons Attribution 4.0 International" } } ], "subjects": [ { "subject": "reddit" }, { "subject": "pushshift" } ], "title": "The Pushshift Reddit Dataset" }, "parent": { "access": { "owned_by": { "user": 87836 } }, "communities": {}, "id": "3608134", "pids": { "doi": { "client": "datacite", "identifier": "10.5281/zenodo.3608134", "provider": "datacite" } } }, "pids": { "doi": { "client": "datacite", "identifier": "10.5281/zenodo.3608135", "provider": "datacite" }, "oai": { "identifier": "oai:zenodo.org:3608135", "provider": "oai" } }, "revision_id": 2, "stats": { "all_versions": { "data_volume": 13537998744495.0, "downloads": 1211, "unique_downloads": 733, "unique_views": 6255, "views": 6761 }, "this_version": { "data_volume": 13249158070650.0, "downloads": 1186, "unique_downloads": 715, "unique_views": 6187, "views": 6678 } }, "status": "published", "updated": "2020-01-20T17:09:13.725459+00:00", "versions": { "index": 1, "is_latest": true } }