{
  "DOI": "10.5281/zenodo.4742068",
  "abstract": "The data set consists of a METS file for each of the PDFs that were used for transcription and a directory data/page_xml that contains the transcriptions of the ground truth in PAGE-XML format. In parallel to the data set publication, a data paper will be published that contains a detailed description of the data set. As soon as it is published, we will link to it. The corresponding source code can be found here\u00a0https://github.com/millawell/ocr-data/tree/1.1",
  "author": [
    {
      "family": "David Lassner"
    },
    {
      "family": "Julius Coburger"
    },
    {
      "family": "Clemens Neudecker"
    },
    {
      "family": "Anne Baillot"
    }
  ],
  "id": "4742068",
  "issued": {
    "date-parts": [
      [
        "2021",
        "05",
        "07"
      ]
    ]
  },
  "publisher": "Zenodo",
  "title": "Data set of the paper \"Publishing an OCR ground truth data set for reuse in an unclear copyright setting\"",
  "type": "dataset",
  "version": "1.1"
}