Project deliverable Open Access

BigDataGrapes D3.4 - Linguistic Pipelines for Semantic Enrichment

Todor Primov; Andrey Avramov; Nikola Rusinov; Vladimir Alexiev


DCAT Export

<?xml version='1.0' encoding='utf-8'?>
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:adms="http://www.w3.org/ns/adms#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dct="http://purl.org/dc/terms/" xmlns:dctype="http://purl.org/dc/dcmitype/" xmlns:dcat="http://www.w3.org/ns/dcat#" xmlns:duv="http://www.w3.org/ns/duv#" xmlns:foaf="http://xmlns.com/foaf/0.1/" xmlns:frapo="http://purl.org/cerif/frapo/" xmlns:geo="http://www.w3.org/2003/01/geo/wgs84_pos#" xmlns:gsp="http://www.opengis.net/ont/geosparql#" xmlns:locn="http://www.w3.org/ns/locn#" xmlns:org="http://www.w3.org/ns/org#" xmlns:owl="http://www.w3.org/2002/07/owl#" xmlns:prov="http://www.w3.org/ns/prov#" xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#" xmlns:schema="http://schema.org/" xmlns:skos="http://www.w3.org/2004/02/skos/core#" xmlns:vcard="http://www.w3.org/2006/vcard/ns#" xmlns:wdrs="http://www.w3.org/2007/05/powder-s#">
  <rdf:Description rdf:about="https://doi.org/10.5281/zenodo.4546049">
    <rdf:type rdf:resource="http://www.w3.org/ns/dcat#Dataset"/>
    <dct:type rdf:resource="http://purl.org/dc/dcmitype/Text"/>
    <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#anyURI">https://doi.org/10.5281/zenodo.4546049</dct:identifier>
    <foaf:page rdf:resource="https://doi.org/10.5281/zenodo.4546049"/>
    <dct:creator>
      <rdf:Description>
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>Todor Primov</foaf:name>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>Sirma AI</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:creator>
      <rdf:Description>
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>Andrey Avramov</foaf:name>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>Sirma AI</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:creator>
      <rdf:Description>
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>Nikola Rusinov</foaf:name>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>Sirma AI</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:creator>
      <rdf:Description>
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>Vladimir Alexiev</foaf:name>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>Sirma AI</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:title>BigDataGrapes D3.4 - Linguistic Pipelines for Semantic Enrichment</dct:title>
    <dct:publisher>
      <foaf:Agent>
        <foaf:name>Zenodo</foaf:name>
      </foaf:Agent>
    </dct:publisher>
    <dct:issued rdf:datatype="http://www.w3.org/2001/XMLSchema#gYear">2020</dct:issued>
    <frapo:isFundedBy rdf:resource="info:eu-repo/grantAgreement/EC/H2020/780751/"/>
    <schema:funder>
      <foaf:Organization>
        <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#string">10.13039/501100000780</dct:identifier>
        <foaf:name>European Commission</foaf:name>
      </foaf:Organization>
    </schema:funder>
    <dct:issued rdf:datatype="http://www.w3.org/2001/XMLSchema#date">2020-11-27</dct:issued>
    <dct:language rdf:resource="http://publications.europa.eu/resource/authority/language/ENG"/>
    <owl:sameAs rdf:resource="https://zenodo.org/record/4546049"/>
    <adms:identifier>
      <adms:Identifier>
        <skos:notation rdf:datatype="http://www.w3.org/2001/XMLSchema#anyURI">https://zenodo.org/record/4546049</skos:notation>
        <adms:schemeAgency>url</adms:schemeAgency>
      </adms:Identifier>
    </adms:identifier>
    <dct:isVersionOf rdf:resource="https://doi.org/10.5281/zenodo.4546048"/>
    <dct:isPartOf rdf:resource="https://zenodo.org/communities/bigdatagrapes"/>
    <owl:versionInfo>3.0</owl:versionInfo>
    <dct:description>&lt;p&gt;This deliverable is the third report on the progress of T3.4 Semantic Enrichment. It aims to describe the practical application of advanced text analytics pipelines used to extract and semantically annotate information from unstructured textual data sources from the Big Data Grapes (BDG) data pool. The report describes the practical approach of designing a source knowledge graph for wine and wine review related information; semantic data fusion with basic ontologies and thesauri of relevant terminologies from the BigDataGrapes data pool; designing named entity recognition pipelines for data extraction public wine reviews and configuration of semantic search on top of the annotated content. The demonstrated approach is generic and can be applied on any type of unstructured content (research publications, news articles, patent data, trials reports, food quality reports, etc) using any of the available in the BDG data pool terminologies (sensor data, wine varieties, etc) or any other data set available in the linked open data (LOD) cloud.&lt;/p&gt; &lt;p&gt;The work reported in the first version of the deliverable (Version 1 of &lt;strong&gt;D3.4 - Linguistic Pipelines for Semantic Enrichment&lt;/strong&gt;, reported in M12 of BDG project) was focused mostly on setting up the overall semantic enrichment workflow that must be followed, covering domain modeling; building a core knowledge graph to support the semantic enrichment; development and customization of NLP pipeline components; post-processing of the annotation schema into a corresponding RDF representation.&lt;/p&gt; &lt;p&gt;The second reported period (Version 2 of &lt;strong&gt;D3.4 - Linguistic Pipelines for Semantic Enrichment&lt;/strong&gt;, in M24 of BDG project) was planned to apply the generic semantic enrichment approach on a concrete use case and to demonstrate how end users can benefit of using semantic enrichment to navigate and browse through large sample linked data set (described in Version 2 of &lt;strong&gt;D4.3 - Models and Tools for Predictive Analy&lt;/strong&gt;tics over Extremely Large Datasets reported in M15 of BDG project).&lt;/p&gt; &lt;p&gt;The current work describes improvements implemented in the semantic enrichment of the data set used in Version 2 of &lt;strong&gt;D3.4 - Linguistic Pipelines for Semantic Enrichment&lt;/strong&gt; including 1) extraction and filtering of grape, wine and food concepts from the data set; 2) semantic enrichment of wine reviews textual fields with these concepts and 3) improvement of the semantic search building new search indices over the semantically enriched wine reviews.&lt;/p&gt; &lt;p&gt;In addition to the work related to the Wine Search demonstrator was developed a PubMed Central web crawler that can be configured to download fresh relevant content for research related to wine, antioxidants and other relevant bioactive compounds. The content is then processed by a text analysis pipeline which identifies instances of organic compounds of interest for the project and classify them to functional groups of compounds (e.g. flavonoids, glycosides, etc).&lt;/p&gt;</dct:description>
    <dct:accessRights rdf:resource="http://publications.europa.eu/resource/authority/access-right/PUBLIC"/>
    <dct:accessRights>
      <dct:RightsStatement rdf:about="info:eu-repo/semantics/openAccess">
        <rdfs:label>Open Access</rdfs:label>
      </dct:RightsStatement>
    </dct:accessRights>
    <dcat:distribution>
      <dcat:Distribution>
        <dct:license rdf:resource="https://creativecommons.org/licenses/by/4.0/legalcode"/>
        <dcat:accessURL rdf:resource="https://doi.org/10.5281/zenodo.4546049"/>
      </dcat:Distribution>
    </dcat:distribution>
    <dcat:distribution>
      <dcat:Distribution>
        <dcat:accessURL rdf:resource="https://doi.org/10.5281/zenodo.4546049">https://doi.org/10.5281/zenodo.4546049</dcat:accessURL>
        <dcat:byteSize>2965288</dcat:byteSize>
        <dcat:downloadURL rdf:resource="https://zenodo.org/record/4546049/files/D3.4 Linguistic Pipelines for Semantic Enrichment v.3 (Submitted to EC).pdf">https://zenodo.org/record/4546049/files/D3.4 Linguistic Pipelines for Semantic Enrichment v.3 (Submitted to EC).pdf</dcat:downloadURL>
        <dcat:mediaType>application/pdf</dcat:mediaType>
      </dcat:Distribution>
    </dcat:distribution>
  </rdf:Description>
  <foaf:Project rdf:about="info:eu-repo/grantAgreement/EC/H2020/780751/">
    <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#string">780751</dct:identifier>
    <dct:title>Big Data to Enable Global Disruption of the Grapevine-powered Industries</dct:title>
    <frapo:isAwardedBy>
      <foaf:Organization>
        <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#string">10.13039/501100000780</dct:identifier>
        <foaf:name>European Commission</foaf:name>
      </foaf:Organization>
    </frapo:isAwardedBy>
  </foaf:Project>
</rdf:RDF>
16
6
views
downloads
All versions This version
Views 1616
Downloads 66
Data volume 17.8 MB17.8 MB
Unique views 1111
Unique downloads 55

Share

Cite as