Dataset Open Access
Potthast, Martin;
Stein, Benno;
Eiselt, Andreas;
Barrón-Cedeño, Alberto;
Rosso, Paolo
{ "inLanguage": { "alternateName": "eng", "@type": "Language", "name": "English" }, "description": "<p>This corpus is outdated. Please use its successor PAN-PC-11: https://doi.org/10.5281/zenodo.3250095</p>\n\n<p>The PAN plagiarism corpus 2010 (PAN-PC-10) is a corpus for the evaluation of automatic plagiarism detection algorithms. For research purposes the corpus can be used free of charge.</p>\n\n<p>The PAN-PC-10 contains documents in which artificial plagiarism has been inserted automatically as well as documents in which simulated plagiarism has been inserted manually. The former have been constructed using a so-called random plagiarist, a computer program which constructs plagiarism according to a number of parameters, while the latter have been obtained with crowdsourcing via Amazon's Mechanical Turk.</p>", "license": "https://creativecommons.org/licenses/by/4.0/legalcode", "creator": [ { "affiliation": "Bauhaus-Universit\u00e4t Weimar", "@id": "https://orcid.org/0000-0003-2451-0665", "@type": "Person", "name": "Potthast, Martin" }, { "affiliation": "Bauhaus-Universit\u00e4t Weimar", "@id": "https://orcid.org/0000-0001-9033-2217", "@type": "Person", "name": "Stein, Benno" }, { "affiliation": "Bauhaus-Universit\u00e4t Weimar", "@type": "Person", "name": "Eiselt, Andreas" }, { "affiliation": "Universidad Polyt\u00e9cnica de Valencia", "@type": "Person", "name": "Barr\u00f3n-Cede\u00f1o, Alberto" }, { "affiliation": "Universidad Polyt\u00e9cnica de Valencia", "@type": "Person", "name": "Rosso, Paolo" } ], "url": "https://zenodo.org/record/3250123", "datePublished": "2010-05-01", "keywords": [ "plagiarism", "plagiarism detection", "documents", "PAN", "2010" ], "@context": "https://schema.org/", "distribution": [ { "contentUrl": "https://zenodo.org/api/files/e3a35c8d-4d87-4f55-9f53-91c4d8165758/pan-plagiarism-corpus-2010.part1.rar", "encodingFormat": "rar", "@type": "DataDownload" }, { "contentUrl": "https://zenodo.org/api/files/e3a35c8d-4d87-4f55-9f53-91c4d8165758/pan-plagiarism-corpus-2010.part2.rar", "encodingFormat": "rar", "@type": "DataDownload" } ], "identifier": "https://doi.org/10.5281/zenodo.3250123", "@id": "https://doi.org/10.5281/zenodo.3250123", "@type": "Dataset", "name": "PAN Plagiarism Corpus 2010 (PAN-PC-10)" }
All versions | This version | |
---|---|---|
Views | 624 | 625 |
Downloads | 426 | 426 |
Data volume | 388.3 GB | 388.3 GB |
Unique views | 570 | 571 |
Unique downloads | 191 | 191 |