Video/Audio Open Access

CORPUS17: a philological French corpus for 17thcentury

Simon Gabay; Alexandre Bartz; Yohann Deguin


DataCite XML Export

<?xml version='1.0' encoding='utf-8'?>
<resource xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://datacite.org/schema/kernel-4" xsi:schemaLocation="http://datacite.org/schema/kernel-4 http://schema.datacite.org/meta/kernel-4.1/metadata.xsd">
  <identifier identifierType="DOI">10.5281/zenodo.4088669</identifier>
  <creators>
    <creator>
      <creatorName>Simon Gabay</creatorName>
      <affiliation>Universités de Neuchâtel et de Genève, Neuchâtel and Genève, Switzerland</affiliation>
    </creator>
    <creator>
      <creatorName>Alexandre Bartz</creatorName>
      <affiliation>École des Chartes, Paris, Franc</affiliation>
    </creator>
    <creator>
      <creatorName>Yohann Deguin</creatorName>
      <affiliation>Université de Rennes, Rennes, France</affiliation>
    </creator>
  </creators>
  <titles>
    <title>CORPUS17: a philological French corpus for 17thcentury</title>
  </titles>
  <publisher>Zenodo</publisher>
  <publicationYear>2020</publicationYear>
  <subjects>
    <subject>17th c. French, OCR, normalisation, lemmatisation, POS-tagging,named entities, digital humanities, XML-TEI</subject>
  </subjects>
  <dates>
    <date dateType="Issued">2020-10-14</date>
  </dates>
  <language>fr</language>
  <resourceType resourceTypeGeneral="Audiovisual"/>
  <alternateIdentifiers>
    <alternateIdentifier alternateIdentifierType="url">https://zenodo.org/record/4088669</alternateIdentifier>
  </alternateIdentifiers>
  <relatedIdentifiers>
    <relatedIdentifier relatedIdentifierType="DOI" relationType="Continues" resourceTypeGeneral="ConferencePaper">10.1145/3423603.3424002</relatedIdentifier>
    <relatedIdentifier relatedIdentifierType="DOI" relationType="IsVersionOf">10.5281/zenodo.4088668</relatedIdentifier>
  </relatedIdentifiers>
  <rightsList>
    <rights rightsURI="https://creativecommons.org/licenses/by/4.0/legalcode">Creative Commons Attribution 4.0 International</rights>
    <rights rightsURI="info:eu-repo/semantics/openAccess">Open Access</rights>
  </rightsList>
  <descriptions>
    <description descriptionType="Abstract">&lt;p&gt;We investigate the creation of a 17th c. French literary corpus. We present the main options regarding available standards, the training data we created and the efficiency of the models produced for OCR, spelling normalization, and lemmatization &amp;ndash; always with open-source solutions. We also present our encoding choices and the global logic of a corpus designed as a virtuous circle, enhancing automatically the tools that are used for its construction.&lt;/p&gt;</description>
  </descriptions>
</resource>
25
717
views
downloads
All versions This version
Views 2525
Downloads 717717
Data volume 22.1 GB22.1 GB
Unique views 2323
Unique downloads 606606

Share

Cite as