There is a newer version of this record available.

Dataset Open Access

CancerMine

Jake Lever


DCAT Export

<?xml version='1.0' encoding='utf-8'?>
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:adms="http://www.w3.org/ns/adms#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dct="http://purl.org/dc/terms/" xmlns:dctype="http://purl.org/dc/dcmitype/" xmlns:dcat="http://www.w3.org/ns/dcat#" xmlns:duv="http://www.w3.org/ns/duv#" xmlns:foaf="http://xmlns.com/foaf/0.1/" xmlns:frapo="http://purl.org/cerif/frapo/" xmlns:geo="http://www.w3.org/2003/01/geo/wgs84_pos#" xmlns:gsp="http://www.opengis.net/ont/geosparql#" xmlns:locn="http://www.w3.org/ns/locn#" xmlns:org="http://www.w3.org/ns/org#" xmlns:owl="http://www.w3.org/2002/07/owl#" xmlns:prov="http://www.w3.org/ns/prov#" xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#" xmlns:schema="http://schema.org/" xmlns:skos="http://www.w3.org/2004/02/skos/core#" xmlns:vcard="http://www.w3.org/2006/vcard/ns#" xmlns:wdrs="http://www.w3.org/2007/05/powder-s#">
  <rdf:Description rdf:about="https://doi.org/10.5281/zenodo.5157772">
    <rdf:type rdf:resource="http://www.w3.org/ns/dcat#Dataset"/>
    <dct:type rdf:resource="http://purl.org/dc/dcmitype/Dataset"/>
    <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#anyURI">https://doi.org/10.5281/zenodo.5157772</dct:identifier>
    <foaf:page rdf:resource="https://doi.org/10.5281/zenodo.5157772"/>
    <dct:creator>
      <rdf:Description>
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>Jake Lever</foaf:name>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>Stanford University</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:title>CancerMine</dct:title>
    <dct:publisher>
      <foaf:Agent>
        <foaf:name>Zenodo</foaf:name>
      </foaf:Agent>
    </dct:publisher>
    <dct:issued rdf:datatype="http://www.w3.org/2001/XMLSchema#gYear">2021</dct:issued>
    <dct:issued rdf:datatype="http://www.w3.org/2001/XMLSchema#date">2021-08-04</dct:issued>
    <owl:sameAs rdf:resource="https://zenodo.org/record/5157772"/>
    <adms:identifier>
      <adms:Identifier>
        <skos:notation rdf:datatype="http://www.w3.org/2001/XMLSchema#anyURI">https://zenodo.org/record/5157772</skos:notation>
        <adms:schemeAgency>url</adms:schemeAgency>
      </adms:Identifier>
    </adms:identifier>
    <dct:isVersionOf rdf:resource="https://doi.org/10.5281/zenodo.1156241"/>
    <dct:description>&lt;p&gt;This describes the output files for the &lt;a href="https://github.com/jakelever/cancermine"&gt;CancerMine&lt;/a&gt; project. These files are loaded directly by the &lt;a href="http://bionlp.bcgsc.ca/cancermine/"&gt;CancerMine viewer&lt;/a&gt;. The code for this viewer is available in the CancerMine Github repo if you want to run it independently. Each file is a tab-delimited file with a header, no comments and no quoting.&lt;/p&gt; &lt;p&gt;You likely want &lt;strong&gt;cancermine_collated.tsv&lt;/strong&gt; if you just want the list of cancer gene roles. If you want the supporting sentences, look at &lt;strong&gt;cancermine_sentences.tsv&lt;/strong&gt;. You can use the &lt;em&gt;matching_id&lt;/em&gt; column to connect the two files. If you want to dig further and are okay with a higher false positive rate, look at &lt;strong&gt;cancermine_unfiltered.tsv&lt;/strong&gt;.&lt;/p&gt; &lt;p&gt;&lt;strong&gt;cancermine_collated.tsv:&lt;/strong&gt; This contains the cancer gene roles with citation counts supporting them. It contains the normalized cancer and gene names along with IDs for HUGO, Entrez Gene and the Disease Ontology.&lt;/p&gt; &lt;p&gt;&lt;strong&gt;cancermine_sentences.tsv:&lt;/strong&gt; This contains the supporting sentences for the cancer gene roles in the collated file. Each row is a single supporting sentence for one cancer gene role. This file contains information on the source publication (e.g. journal, publication date, etc), the actual sentence and the cancer gene role extracted.&lt;/p&gt; &lt;p&gt;&lt;strong&gt;cancermine_unfiltered.tsv:&lt;/strong&gt; This is the raw output of the applyModelsToSentences.py script across all of PubMed, Pubmed Central Open Access and PubMed Central Author Manuscript Collection. It contains every predicted relation with a prediction score above 0.5. So this may contain many false positives. Each row contain information on the publication (e.g. journal, publication date, etc) along with the sentence and the specific cancer gene role extracted (with HUGO, Entrez Gene and Disease Ontology IDs). This file is further processed to create the other two.&lt;/p&gt;</dct:description>
    <dct:accessRights rdf:resource="http://publications.europa.eu/resource/authority/access-right/PUBLIC"/>
    <dct:accessRights>
      <dct:RightsStatement rdf:about="info:eu-repo/semantics/openAccess">
        <rdfs:label>Open Access</rdfs:label>
      </dct:RightsStatement>
    </dct:accessRights>
    <dcat:distribution>
      <dcat:Distribution>
        <dct:license rdf:resource="https://creativecommons.org/publicdomain/zero/1.0/legalcode"/>
        <dcat:accessURL rdf:resource="https://doi.org/10.5281/zenodo.5157772"/>
      </dcat:Distribution>
    </dcat:distribution>
    <dcat:distribution>
      <dcat:Distribution>
        <dcat:accessURL rdf:resource="https://doi.org/10.5281/zenodo.5157772">https://doi.org/10.5281/zenodo.5157772</dcat:accessURL>
        <dcat:byteSize>2317223</dcat:byteSize>
        <dcat:downloadURL rdf:resource="https://zenodo.org/record/5157772/files/cancermine_collated.tsv">https://zenodo.org/record/5157772/files/cancermine_collated.tsv</dcat:downloadURL>
        <dcat:mediaType>text/tab-separated-values</dcat:mediaType>
      </dcat:Distribution>
    </dcat:distribution>
    <dcat:distribution>
      <dcat:Distribution>
        <dcat:accessURL rdf:resource="https://doi.org/10.5281/zenodo.5157772">https://doi.org/10.5281/zenodo.5157772</dcat:accessURL>
        <dcat:byteSize>49185687</dcat:byteSize>
        <dcat:downloadURL rdf:resource="https://zenodo.org/record/5157772/files/cancermine_sentences.tsv">https://zenodo.org/record/5157772/files/cancermine_sentences.tsv</dcat:downloadURL>
        <dcat:mediaType>text/tab-separated-values</dcat:mediaType>
      </dcat:Distribution>
    </dcat:distribution>
    <dcat:distribution>
      <dcat:Distribution>
        <dcat:accessURL rdf:resource="https://doi.org/10.5281/zenodo.5157772">https://doi.org/10.5281/zenodo.5157772</dcat:accessURL>
        <dcat:byteSize>154458314</dcat:byteSize>
        <dcat:downloadURL rdf:resource="https://zenodo.org/record/5157772/files/cancermine_unfiltered.tsv">https://zenodo.org/record/5157772/files/cancermine_unfiltered.tsv</dcat:downloadURL>
        <dcat:mediaType>text/tab-separated-values</dcat:mediaType>
      </dcat:Distribution>
    </dcat:distribution>
  </rdf:Description>
</rdf:RDF>
4,086
2,546
views
downloads
All versions This version
Views 4,08640
Downloads 2,54627
Data volume 69.5 GB952.3 MB
Unique views 3,31940
Unique downloads 1,11920

Share

Cite as