Dataset Open Access

ChroniclItaly 3.0. A deep-learning, contextually enriched digital heritage collection of Italian immigrant newspapers published in the USA, 1898-1936.

Lorella Viola; Antonio Maria Fiscarelli


DCAT Export

<?xml version='1.0' encoding='utf-8'?>
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:adms="http://www.w3.org/ns/adms#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dct="http://purl.org/dc/terms/" xmlns:dctype="http://purl.org/dc/dcmitype/" xmlns:dcat="http://www.w3.org/ns/dcat#" xmlns:duv="http://www.w3.org/ns/duv#" xmlns:foaf="http://xmlns.com/foaf/0.1/" xmlns:frapo="http://purl.org/cerif/frapo/" xmlns:geo="http://www.w3.org/2003/01/geo/wgs84_pos#" xmlns:gsp="http://www.opengis.net/ont/geosparql#" xmlns:locn="http://www.w3.org/ns/locn#" xmlns:org="http://www.w3.org/ns/org#" xmlns:owl="http://www.w3.org/2002/07/owl#" xmlns:prov="http://www.w3.org/ns/prov#" xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#" xmlns:schema="http://schema.org/" xmlns:skos="http://www.w3.org/2004/02/skos/core#" xmlns:vcard="http://www.w3.org/2006/vcard/ns#" xmlns:wdrs="http://www.w3.org/2007/05/powder-s#">
  <rdf:Description rdf:about="https://doi.org/10.5281/zenodo.4596345">
    <rdf:type rdf:resource="http://www.w3.org/ns/dcat#Dataset"/>
    <dct:type rdf:resource="http://purl.org/dc/dcmitype/Dataset"/>
    <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#anyURI">https://doi.org/10.5281/zenodo.4596345</dct:identifier>
    <foaf:page rdf:resource="https://doi.org/10.5281/zenodo.4596345"/>
    <dct:creator>
      <rdf:Description rdf:about="http://orcid.org/0000-0001-9994-0841">
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#string">0000-0001-9994-0841</dct:identifier>
        <foaf:name>Lorella Viola</foaf:name>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>Centre for Contemporary and Digital History (C2DH) - University of Luxembourg</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:creator>
      <rdf:Description rdf:about="http://orcid.org/0000-0003-0287-4388">
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#string">0000-0003-0287-4388</dct:identifier>
        <foaf:name>Antonio Maria Fiscarelli</foaf:name>
      </rdf:Description>
    </dct:creator>
    <dct:title>ChroniclItaly 3.0. A deep-learning, contextually enriched digital heritage collection of Italian immigrant newspapers published in the USA, 1898-1936.</dct:title>
    <dct:publisher>
      <foaf:Agent>
        <foaf:name>Zenodo</foaf:name>
      </foaf:Agent>
    </dct:publisher>
    <dct:issued rdf:datatype="http://www.w3.org/2001/XMLSchema#gYear">2021</dct:issued>
    <dcat:keyword>Italian diaspora</dcat:keyword>
    <dcat:keyword>Digital heritage</dcat:keyword>
    <dcat:keyword>Contextual enrichment</dcat:keyword>
    <dcat:keyword>AI for digital heritage</dcat:keyword>
    <dcat:keyword>American ethnic press</dcat:keyword>
    <dct:issued rdf:datatype="http://www.w3.org/2001/XMLSchema#date">2021-03-11</dct:issued>
    <dct:language rdf:resource="http://publications.europa.eu/resource/authority/language/ITA"/>
    <owl:sameAs rdf:resource="https://zenodo.org/record/4596345"/>
    <adms:identifier>
      <adms:Identifier>
        <skos:notation rdf:datatype="http://www.w3.org/2001/XMLSchema#anyURI">https://zenodo.org/record/4596345</skos:notation>
        <adms:schemeAgency>url</adms:schemeAgency>
      </adms:Identifier>
    </adms:identifier>
    <dct:isVersionOf rdf:resource="https://doi.org/10.24416/UU01-T4YMOW"/>
    <dct:isVersionOf rdf:resource="https://doi.org/10.24416/UU01-4MECRO"/>
    <foaf:page rdf:resource="http://issn.org/resource/ISSN/1613-0073"/>
    <dct:relation rdf:resource="https://c2dh.shinyapps.io/dexter/"/>
    <foaf:page rdf:resource="https://github.com/lorellav/DeXTER-DeepTextMiner"/>
    <dct:source rdf:resource="https://chroniclingamerica.loc.gov/"/>
    <dct:isVersionOf rdf:resource="https://doi.org/10.5281/zenodo.4596344"/>
    <owl:versionInfo>v3.0.0</owl:versionInfo>
    <dct:description>&lt;p&gt;This&amp;nbsp;open access collection includes the digitized front pages of 10 Italian language newspapers published in California, Connecticut, Pennsylvania, Vermont, and West Virginia. It totals 8,653 issues and contains 21,454,455 words. The&amp;nbsp;titles are:&amp;nbsp;&lt;em&gt;L&amp;rsquo;Italia&lt;/em&gt;,&amp;nbsp;&lt;em&gt;Cronaca sovversiva&lt;/em&gt;,&amp;nbsp;&lt;em&gt;La libera parola&lt;/em&gt;,&amp;nbsp;&lt;em&gt;The patriot&lt;/em&gt;,&amp;nbsp;&lt;em&gt;La ragione&lt;/em&gt;,&amp;nbsp;&lt;em&gt;La rassegna&lt;/em&gt;,&amp;nbsp;&lt;em&gt;La sentinella del West Virginia&lt;/em&gt;,&amp;nbsp;&lt;em&gt;L&amp;rsquo;Indipendente&lt;/em&gt;,&amp;nbsp;&lt;em&gt;La Sentinella&lt;/em&gt;, and&amp;nbsp;&lt;em&gt;and La Tribuna del Connecticut&lt;/em&gt;. The material was collected from&amp;nbsp;&lt;a href="https://chroniclingamerica.loc.gov/newspapers/"&gt;&lt;em&gt;Chronicling America&lt;/em&gt;&lt;/a&gt;, an Internet-based, searchable database of U.S. newspapers published in the United States from 1789 to 1963 made available by the Library of Congress. The corpus features mainstream (&lt;em&gt;prominenti&lt;/em&gt;), anarchic (&lt;em&gt;sovversivi&lt;/em&gt;), and independent newspapers thus providing a very nuanced picture of the Italian immigrant community in the United States at the turn of the twentieth century. To promote transparency, the collection includes two versions of &lt;em&gt;ChroniclItaly 3.0&lt;/em&gt;: unprocessed (as it was collected from &lt;em&gt;Chronicling America&lt;/em&gt;) and processed (with pre-processing interventions). Users can also find the data-sets including all the outputs from all the enrichment steps and&amp;nbsp;post-intervention: named entity recognition (NER), geo-coding, sentiment analysis, and network analysis in addition to&amp;nbsp;the readme.txt file that helps users navigate the folders&amp;nbsp;and the metadata&amp;nbsp;file containing relevant information.&amp;nbsp;The code used to perform all the interventions is available at this GitHub repository&amp;nbsp;https://github.com/lorellav/DeXTER-DeepTextMiner. Finally, all the enrichment outputs&amp;nbsp;can be explored in the&amp;nbsp;interactive app &lt;strong&gt;DeXTER&lt;/strong&gt; available at&amp;nbsp;https://c2dh.shinyapps.io/dexter/.&lt;/p&gt;</dct:description>
    <dct:accessRights rdf:resource="http://publications.europa.eu/resource/authority/access-right/PUBLIC"/>
    <dct:accessRights>
      <dct:RightsStatement rdf:about="info:eu-repo/semantics/openAccess">
        <rdfs:label>Open Access</rdfs:label>
      </dct:RightsStatement>
    </dct:accessRights>
    <dcat:distribution>
      <dcat:Distribution>
        <dct:license rdf:resource="https://creativecommons.org/licenses/by/4.0/legalcode"/>
        <dcat:accessURL rdf:resource="https://doi.org/10.5281/zenodo.4596345"/>
      </dcat:Distribution>
    </dcat:distribution>
    <dcat:distribution>
      <dcat:Distribution>
        <dcat:accessURL>https://doi.org/10.5281/zenodo.4596345</dcat:accessURL>
        <dcat:byteSize>96839242</dcat:byteSize>
        <dcat:downloadURL>https://zenodo.org/record/4596345/files/ChroniclItaly_3.0_original.zip</dcat:downloadURL>
        <dcat:mediaType>application/zip</dcat:mediaType>
      </dcat:Distribution>
    </dcat:distribution>
    <dcat:distribution>
      <dcat:Distribution>
        <dcat:accessURL>https://doi.org/10.5281/zenodo.4596345</dcat:accessURL>
        <dcat:byteSize>84883881</dcat:byteSize>
        <dcat:downloadURL>https://zenodo.org/record/4596345/files/ChroniclItaly_3.0_processed.zip</dcat:downloadURL>
        <dcat:mediaType>application/zip</dcat:mediaType>
      </dcat:Distribution>
    </dcat:distribution>
    <dcat:distribution>
      <dcat:Distribution>
        <dcat:accessURL>https://doi.org/10.5281/zenodo.4596345</dcat:accessURL>
        <dcat:byteSize>749</dcat:byteSize>
        <dcat:downloadURL>https://zenodo.org/record/4596345/files/DeXTER_readme.txt</dcat:downloadURL>
        <dcat:mediaType>text/plain</dcat:mediaType>
      </dcat:Distribution>
    </dcat:distribution>
    <dcat:distribution>
      <dcat:Distribution>
        <dcat:accessURL>https://doi.org/10.5281/zenodo.4596345</dcat:accessURL>
        <dcat:byteSize>20766</dcat:byteSize>
        <dcat:downloadURL>https://zenodo.org/record/4596345/files/geocoding.rar</dcat:downloadURL>
      </dcat:Distribution>
    </dcat:distribution>
    <dcat:distribution>
      <dcat:Distribution>
        <dcat:accessURL>https://doi.org/10.5281/zenodo.4596345</dcat:accessURL>
        <dcat:byteSize>28119</dcat:byteSize>
        <dcat:downloadURL>https://zenodo.org/record/4596345/files/Metadata_ChroniclItaly 3.0.xlsx</dcat:downloadURL>
        <dcat:mediaType>application/vnd.openxmlformats-officedocument.spreadsheetml.sheet</dcat:mediaType>
      </dcat:Distribution>
    </dcat:distribution>
    <dcat:distribution>
      <dcat:Distribution>
        <dcat:accessURL>https://doi.org/10.5281/zenodo.4596345</dcat:accessURL>
        <dcat:byteSize>228707170</dcat:byteSize>
        <dcat:downloadURL>https://zenodo.org/record/4596345/files/NER_corpora_tagged.zip</dcat:downloadURL>
        <dcat:mediaType>application/zip</dcat:mediaType>
      </dcat:Distribution>
    </dcat:distribution>
    <dcat:distribution>
      <dcat:Distribution>
        <dcat:accessURL>https://doi.org/10.5281/zenodo.4596345</dcat:accessURL>
        <dcat:byteSize>57819697</dcat:byteSize>
        <dcat:downloadURL>https://zenodo.org/record/4596345/files/NER_dataframes_intervention.zip</dcat:downloadURL>
        <dcat:mediaType>application/zip</dcat:mediaType>
      </dcat:Distribution>
    </dcat:distribution>
    <dcat:distribution>
      <dcat:Distribution>
        <dcat:accessURL>https://doi.org/10.5281/zenodo.4596345</dcat:accessURL>
        <dcat:byteSize>98909806</dcat:byteSize>
        <dcat:downloadURL>https://zenodo.org/record/4596345/files/NER_dataframes.zip</dcat:downloadURL>
        <dcat:mediaType>application/zip</dcat:mediaType>
      </dcat:Distribution>
    </dcat:distribution>
    <dcat:distribution>
      <dcat:Distribution>
        <dcat:accessURL>https://doi.org/10.5281/zenodo.4596345</dcat:accessURL>
        <dcat:byteSize>19043721</dcat:byteSize>
        <dcat:downloadURL>https://zenodo.org/record/4596345/files/NER_sentiment.zip</dcat:downloadURL>
        <dcat:mediaType>application/zip</dcat:mediaType>
      </dcat:Distribution>
    </dcat:distribution>
    <dcat:distribution>
      <dcat:Distribution>
        <dcat:accessURL>https://doi.org/10.5281/zenodo.4596345</dcat:accessURL>
        <dcat:byteSize>6022380</dcat:byteSize>
        <dcat:downloadURL>https://zenodo.org/record/4596345/files/SNA.rar</dcat:downloadURL>
      </dcat:Distribution>
    </dcat:distribution>
  </rdf:Description>
</rdf:RDF>
117
17
views
downloads
All versions This version
Views 117117
Downloads 1717
Data volume 801.2 MB801.2 MB
Unique views 9595
Unique downloads 66

Share

Cite as