Dataset Open Access

Relate-estimated coalescence rates, allele ages, and selection p-values for the 1000 Genomes Project

Speidel, Leo; Forest, Marie; Shi, Sinan; Myers, Simon R.


DCAT Export

<?xml version='1.0' encoding='utf-8'?>
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:adms="http://www.w3.org/ns/adms#" xmlns:cnt="http://www.w3.org/2011/content#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dct="http://purl.org/dc/terms/" xmlns:dctype="http://purl.org/dc/dcmitype/" xmlns:dcat="http://www.w3.org/ns/dcat#" xmlns:duv="http://www.w3.org/ns/duv#" xmlns:foaf="http://xmlns.com/foaf/0.1/" xmlns:frapo="http://purl.org/cerif/frapo/" xmlns:geo="http://www.w3.org/2003/01/geo/wgs84_pos#" xmlns:gsp="http://www.opengis.net/ont/geosparql#" xmlns:locn="http://www.w3.org/ns/locn#" xmlns:org="http://www.w3.org/ns/org#" xmlns:owl="http://www.w3.org/2002/07/owl#" xmlns:prov="http://www.w3.org/ns/prov#" xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#" xmlns:schema="http://schema.org/" xmlns:skos="http://www.w3.org/2004/02/skos/core#" xmlns:vcard="http://www.w3.org/2006/vcard/ns#" xmlns:wdrs="http://www.w3.org/2007/05/powder-s#">
  <rdf:Description rdf:about="https://doi.org/10.5281/zenodo.3234689">
    <rdf:type rdf:resource="http://www.w3.org/ns/dcat#Dataset"/>
    <dct:type rdf:resource="http://purl.org/dc/dcmitype/Dataset"/>
    <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#anyURI">https://doi.org/10.5281/zenodo.3234689</dct:identifier>
    <foaf:page rdf:resource="https://doi.org/10.5281/zenodo.3234689"/>
    <dct:creator>
      <rdf:Description rdf:about="http://orcid.org/0000-0002-4644-8033">
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>Speidel, Leo</foaf:name>
        <foaf:givenName>Leo</foaf:givenName>
        <foaf:familyName>Speidel</foaf:familyName>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>Department of Statistics, University of Oxford</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:creator>
      <rdf:Description>
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>Forest, Marie</foaf:name>
        <foaf:givenName>Marie</foaf:givenName>
        <foaf:familyName>Forest</foaf:familyName>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>Université du Québec à Montréal, Montréal, Canada</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:creator>
      <rdf:Description>
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>Shi, Sinan</foaf:name>
        <foaf:givenName>Sinan</foaf:givenName>
        <foaf:familyName>Shi</foaf:familyName>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>Department of Statistics, University of Oxford</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:creator>
      <rdf:Description rdf:about="http://orcid.org/0000-0002-2585-9626">
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>Myers, Simon R.</foaf:name>
        <foaf:givenName>Simon R.</foaf:givenName>
        <foaf:familyName>Myers</foaf:familyName>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>Department of Statistics, University of Oxford</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:title>Relate-estimated coalescence rates, allele ages, and selection p-values for the 1000 Genomes Project</dct:title>
    <dct:publisher>
      <foaf:Agent>
        <foaf:name>Zenodo</foaf:name>
      </foaf:Agent>
    </dct:publisher>
    <dct:issued rdf:datatype="http://www.w3.org/2001/XMLSchema#gYear">2019</dct:issued>
    <dcat:keyword>Genetics</dcat:keyword>
    <dcat:keyword>Genealogy</dcat:keyword>
    <dcat:keyword>Population size</dcat:keyword>
    <dcat:keyword>Allele age</dcat:keyword>
    <dcat:keyword>Positive selection</dcat:keyword>
    <dcat:keyword>1000 Genomes Project</dcat:keyword>
    <dct:issued rdf:datatype="http://www.w3.org/2001/XMLSchema#date">2019-05-29</dct:issued>
    <dct:language rdf:resource="http://publications.europa.eu/resource/authority/language/ENG"/>
    <owl:sameAs rdf:resource="https://zenodo.org/record/3234689"/>
    <adms:identifier>
      <adms:Identifier>
        <skos:notation rdf:datatype="http://www.w3.org/2001/XMLSchema#anyURI">https://zenodo.org/record/3234689</skos:notation>
      </adms:Identifier>
    </adms:identifier>
    <dct:isVersionOf rdf:resource="https://doi.org/10.5281/zenodo.3234688"/>
    <owl:versionInfo>v1.0.0</owl:versionInfo>
    <dct:description>&lt;p&gt;&lt;strong&gt;Overview&lt;/strong&gt;&lt;/p&gt; &lt;p&gt;Coalescence rates, allele ages, and p-values for evidence of positive selection calculated for 2478&amp;nbsp;samples of the&amp;nbsp;1000 Genomes Project&amp;nbsp;using Relate.&lt;/p&gt; &lt;p&gt;We estimated the joint genealogy of all 1000 GP populations and then extracted the embedded genealogy for each population.&lt;br&gt; For the genealogy of each population, we jointly estimated the population size history and branch lengths.&amp;nbsp;&lt;br&gt; Variants segregating in more than one&amp;nbsp;population&amp;nbsp;therefore have&amp;nbsp;correlated but different allele ages in each population.&lt;/p&gt; &lt;p&gt;Please refer to&amp;nbsp;&lt;a href="https://www.nature.com/articles/s41588-019-0484-x"&gt;Speidel et al.&amp;nbsp;Nature Genetics (2019)&lt;/a&gt;&amp;nbsp;for more details or email leo.speidel@outlook.com for any queries.&lt;/p&gt; &lt;p&gt;&lt;strong&gt;Coalescence rates&lt;/strong&gt;&lt;/p&gt; &lt;p&gt;The zipped directory&amp;nbsp;coalescence_rates.zip&amp;nbsp;contains coalescence rates for 26 populations in the 1000 Genomes Project data set.&lt;/p&gt; &lt;ul&gt; &lt;li&gt;The .coal files show the haploid coalescence rates, please refer to the&amp;nbsp;&lt;a href="https://myersgroup.github.io/relate/modules.html#PopulationSizeScript_FileFormats"&gt;Relate documentation&lt;/a&gt;&amp;nbsp;for the file format.&lt;/li&gt; &lt;li&gt;The popsize.RData file is an R data frame storing the diploid population sizes (0.5/coalescence rate) calculated using the .coal files. The columns of this data frame, named &amp;quot;pop_size&amp;quot;,&amp;nbsp;are &lt;ul&gt; &lt;li&gt;gens_ago: Time in generations at which epoch starts. (To get years from generations, we multiply by 28.)&lt;/li&gt; &lt;li&gt;population_size: Diploid population size in this epoch.&lt;/li&gt; &lt;li&gt;population: Name of population&amp;nbsp;&lt;/li&gt; &lt;li&gt;region: Name of region (AFR, AMR, EAS, EUR, SAS)&lt;/li&gt; &lt;/ul&gt; &lt;/li&gt; &lt;/ul&gt; &lt;p&gt;&lt;strong&gt;Allele ages and selection p-values&lt;/strong&gt;&lt;/p&gt; &lt;p&gt;The zipped directories&amp;nbsp;allele_ages_*.zip&amp;nbsp;contain&amp;nbsp;R&amp;nbsp;data frames for each 1000GP population storing allele ages and selection p-values.&lt;br&gt; Please note that only mutations that segregate in the population and map to a unique branch in the Relate-estimated marginal trees are included. Selection p-values are only provided for mutations of DAF &amp;gt; 2 that pass quality filters (see Speidel et al., 2019).&amp;nbsp;&lt;/p&gt; &lt;p&gt;To get an age estimate for a neutral mutation, use&amp;nbsp;0.5*(lower_age + upper_age). To get years from generations, we multiply by 28.&lt;/p&gt; &lt;p&gt;The columns of these&amp;nbsp;data frames, named &amp;quot;allele_ages&amp;quot;,&amp;nbsp;are&lt;/p&gt; &lt;ul&gt; &lt;li&gt;CHR: chromosome index&lt;/li&gt; &lt;li&gt;BP: base-pair position (GRCh37)&lt;/li&gt; &lt;li&gt;ID: id of SNP&lt;/li&gt; &lt;li&gt;lower_age: Age in generations of coalescence event at the lower end of the branch onto which the mutation maps&lt;/li&gt; &lt;li&gt;upper_age: Age in generations of coalescence event at the upper end of the branch onto which the mutation maps&lt;/li&gt; &lt;li&gt;ancestral/derived: Ancestral/derived allele&lt;/li&gt; &lt;li&gt;upstream: Upstream (5&amp;#39;) allele&lt;/li&gt; &lt;li&gt;downstream: Downstream (3&amp;#39;) allele&lt;/li&gt; &lt;li&gt;DAF: Derived-allele frequency&lt;/li&gt; &lt;li&gt;pvalue: log10 p-value for selection evidence&lt;/li&gt; &lt;/ul&gt;</dct:description>
    <dct:description xml:lang="">For R object files, use load() to load data frames into R.</dct:description>
    <dct:description xml:lang="">{"references": ["Speidel et al., Nature Genetics 2019, A method for genome-wide genealogy estimation for thousands of samples. https://doi.org/10.1038/s41588-019-0484-x"]}</dct:description>
    <dct:accessRights rdf:resource="http://publications.europa.eu/resource/authority/access-right/PUBLIC"/>
    <dct:accessRights>
      <dct:RightsStatement rdf:about="info:eu-repo/semantics/openAccess">
        <rdfs:label>Open Access</rdfs:label>
      </dct:RightsStatement>
    </dct:accessRights>
    <dcat:distribution>
      <dcat:Distribution>
        <dct:license rdf:resource="http://creativecommons.org/licenses/by/4.0/legalcode"/>
        <dcat:accessURL rdf:resource="https://doi.org/10.5281/zenodo.3234689"/>
      </dcat:Distribution>
    </dcat:distribution>
  </rdf:Description>
</rdf:RDF>
474
217
views
downloads
All versions This version
Views 474474
Downloads 217217
Data volume 296.5 GB296.5 GB
Unique views 437437
Unique downloads 102102

Share

Cite as