Dataset Open Access
Al-Khatib, Khalid;
Völske, Michael;
Syed, Shahbaz;
Kolyada, Nikolay;
Stein, Benno
{ "inLanguage": { "alternateName": "eng", "@type": "Language", "name": "English" }, "description": "<p>The Webis-CMV-20 dataset comprises all available posts and comments in the <a href=\"https://reddit.com/r/changemyview\">ChangeMyView</a> subreddit from the foundation of the subreddit in 2005, until September 2017. From these, we have derived two sub-datasets for the tasks of persuasiveness prediction, and opinion malleability prediction. In addition, the corpus comprises historical posts by CMV authors, and derived personal characteristics.</p>\n\n<p><strong>Dataset specification</strong></p>\n\n<p>All files are in bzip2-compressed <a href=\"http://jsonlines.org/\">JSON Lines</a> format.</p>\n\n<ul>\n\t<li><strong>threads.jsonl:</strong> contains all the selected discussion threads from CMV</li>\n\t<li><strong>pairs.jsonl:</strong> each record contains submission, delta-comment and nondelta-comment and the comments' similarity score</li>\n\t<li><strong>posts-malleability.jsonl:</strong> contains posts for opinion mallebility prediction, in the format provided in the original <a href=\"https://files.pushshift.io/reddit/\">Reddit Crawl</a> dataset</li>\n\t<li><strong>author_entity_category.jsonl:</strong> each record contains the author and list of Wikipedia entities mentioned by the author in the messages across all subreddits. For each mentioned entity we provide the following data: </li>\n</ul>\n\n<pre><code class=\"language-json\">[title, wikidata_id, wikipedia_page_id, mentioned_entity_title, wikifier_score, subreddit_name, subreddit_id, subreddit_category_name, subreddit_topcategory_name]</code></pre>\n\n<ul>\n\t<li><strong>author_liwc.jsonl:</strong> personality traits features computed with <a href=\"https://liwc.wpengine.com/\">LIWC</a> for the authors from pairs.jsonl and post_malleability.jsonl datasets</li>\n\t<li><strong>author_subreddit.jsonl:</strong> for each author statistics of all number of all posts (submissions/comments) across all subreddits are provided</li>\n\t<li><strong>author_subreddit_category.jsonl:</strong> similar to author_subreddit.jsonl, the statistics of all author posts is grouped by top-categories and categories according to <a href=\"https://snoopsnoo.com/subreddits/\">snoopsnoo.com</a><br>\n\t </li>\n</ul>", "license": "https://creativecommons.org/licenses/by/4.0/legalcode", "creator": [ { "affiliation": "Bauhaus-Universit\u00e4t Weimar", "@type": "Person", "name": "Al-Khatib, Khalid" }, { "affiliation": "Bauhaus-Universit\u00e4t Weimar", "@id": "https://orcid.org/0000-0002-9283-6846", "@type": "Person", "name": "V\u00f6lske, Michael" }, { "affiliation": "Leipzig University", "@id": "https://orcid.org/0000-0002-4821-1507", "@type": "Person", "name": "Syed, Shahbaz" }, { "affiliation": "Bauhaus-Universit\u00e4t Weimar", "@id": "https://orcid.org/0000-0002-6493-9557", "@type": "Person", "name": "Kolyada, Nikolay" }, { "affiliation": "Bauhaus-Universit\u00e4t Weimar", "@id": "https://orcid.org/0000-0001-9033-2217", "@type": "Person", "name": "Stein, Benno" } ], "url": "https://zenodo.org/record/3778298", "datePublished": "2020-04-30", "keywords": [ "social media", "argumentation", "persuasiveness" ], "@context": "https://schema.org/", "distribution": [ { "contentUrl": "https://zenodo.org/api/files/b82010cf-0103-4216-8474-fa6fe0252f25/author_entity_category.jsonl.bz2", "encodingFormat": "bz2", "@type": "DataDownload" }, { "contentUrl": "https://zenodo.org/api/files/b82010cf-0103-4216-8474-fa6fe0252f25/author_liwc.jsonl.bz2", "encodingFormat": "bz2", "@type": "DataDownload" }, { "contentUrl": "https://zenodo.org/api/files/b82010cf-0103-4216-8474-fa6fe0252f25/author_subreddit_category.jsonl.bz2", "encodingFormat": "bz2", "@type": "DataDownload" }, { "contentUrl": "https://zenodo.org/api/files/b82010cf-0103-4216-8474-fa6fe0252f25/author_subreddit.jsonl.bz2", "encodingFormat": "bz2", "@type": "DataDownload" }, { "contentUrl": "https://zenodo.org/api/files/b82010cf-0103-4216-8474-fa6fe0252f25/pairs.jsonl.bz2", "encodingFormat": "bz2", "@type": "DataDownload" }, { "contentUrl": "https://zenodo.org/api/files/b82010cf-0103-4216-8474-fa6fe0252f25/posts_malleability.jsonl.bz2", "encodingFormat": "bz2", "@type": "DataDownload" }, { "contentUrl": "https://zenodo.org/api/files/b82010cf-0103-4216-8474-fa6fe0252f25/threads.jsonl.bz2", "encodingFormat": "bz2", "@type": "DataDownload" } ], "identifier": "https://doi.org/10.5281/zenodo.3778298", "@id": "https://doi.org/10.5281/zenodo.3778298", "@type": "Dataset", "name": "Webis ChangeMyView Corpus 2020 (Webis-CMV-20)" }
All versions | This version | |
---|---|---|
Views | 713 | 713 |
Downloads | 333 | 333 |
Data volume | 148.5 GB | 148.5 GB |
Unique views | 600 | 600 |
Unique downloads | 140 | 140 |