There is a newer version of this record available.

Preprint Open Access

Sharing interoperable workflow provenance: A review of best practices and their practical application in CWLProv

Farah Zaib Khan; Stian Soiland-Reyes; Richard O. Sinnott; Andrew Lonie; Carole Goble; Michael R. Crusoe


DCAT Export

<?xml version='1.0' encoding='utf-8'?>
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:adms="http://www.w3.org/ns/adms#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dct="http://purl.org/dc/terms/" xmlns:dctype="http://purl.org/dc/dcmitype/" xmlns:dcat="http://www.w3.org/ns/dcat#" xmlns:duv="http://www.w3.org/ns/duv#" xmlns:foaf="http://xmlns.com/foaf/0.1/" xmlns:frapo="http://purl.org/cerif/frapo/" xmlns:geo="http://www.w3.org/2003/01/geo/wgs84_pos#" xmlns:gsp="http://www.opengis.net/ont/geosparql#" xmlns:locn="http://www.w3.org/ns/locn#" xmlns:org="http://www.w3.org/ns/org#" xmlns:owl="http://www.w3.org/2002/07/owl#" xmlns:prov="http://www.w3.org/ns/prov#" xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#" xmlns:schema="http://schema.org/" xmlns:skos="http://www.w3.org/2004/02/skos/core#" xmlns:vcard="http://www.w3.org/2006/vcard/ns#" xmlns:wdrs="http://www.w3.org/2007/05/powder-s#">
  <rdf:Description rdf:about="https://doi.org/10.5281/zenodo.1966881">
    <rdf:type rdf:resource="http://www.w3.org/ns/dcat#Dataset"/>
    <dct:type rdf:resource="http://purl.org/dc/dcmitype/Text"/>
    <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#anyURI">https://doi.org/10.5281/zenodo.1966881</dct:identifier>
    <foaf:page rdf:resource="https://doi.org/10.5281/zenodo.1966881"/>
    <dct:creator>
      <rdf:Description rdf:about="http://orcid.org/0000-0002-6337-3037">
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#string">0000-0002-6337-3037</dct:identifier>
        <foaf:name>Farah Zaib Khan</foaf:name>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>The University of Melbourne; Common Workflow Language project</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:creator>
      <rdf:Description rdf:about="http://orcid.org/0000-0001-9842-9718">
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#string">0000-0001-9842-9718</dct:identifier>
        <foaf:name>Stian Soiland-Reyes</foaf:name>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>The University of Manchester; Common Workflow Language project</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:creator>
      <rdf:Description rdf:about="http://orcid.org/0000-0001-5998-222X">
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#string">0000-0001-5998-222X</dct:identifier>
        <foaf:name>Richard O. Sinnott</foaf:name>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>The University of Melbourne</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:creator>
      <rdf:Description rdf:about="http://orcid.org/0000-0002-2006-3856">
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#string">0000-0002-2006-3856</dct:identifier>
        <foaf:name>Andrew Lonie</foaf:name>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>The University of Melbourne</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:creator>
      <rdf:Description rdf:about="http://orcid.org/0000-0003-1219-2137">
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#string">0000-0003-1219-2137</dct:identifier>
        <foaf:name>Carole Goble</foaf:name>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>The University of Manchester</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:creator>
      <rdf:Description rdf:about="http://orcid.org/0000-0002-2961-9670">
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#string">0000-0002-2961-9670</dct:identifier>
        <foaf:name>Michael R. Crusoe</foaf:name>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>Common Workflow Language project</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:title>Sharing interoperable workflow provenance: A review of best practices and their practical application in CWLProv</dct:title>
    <dct:publisher>
      <foaf:Agent>
        <foaf:name>Zenodo</foaf:name>
      </foaf:Agent>
    </dct:publisher>
    <dct:issued rdf:datatype="http://www.w3.org/2001/XMLSchema#gYear">2018</dct:issued>
    <dcat:keyword>Provenance</dcat:keyword>
    <dcat:keyword>Common Workflow Language</dcat:keyword>
    <dcat:keyword>CWL</dcat:keyword>
    <dcat:keyword>Research Object</dcat:keyword>
    <dcat:keyword>RO</dcat:keyword>
    <dcat:keyword>BagIt</dcat:keyword>
    <dcat:keyword>Interoperability</dcat:keyword>
    <dcat:keyword>Scientific Workflows</dcat:keyword>
    <dcat:keyword>Containers</dcat:keyword>
    <frapo:isFundedBy rdf:resource="info:eu-repo/grantAgreement/EC/H2020/730976/"/>
    <schema:funder>
      <foaf:Organization>
        <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#string">10.13039/501100000780</dct:identifier>
        <foaf:name>European Commission</foaf:name>
      </foaf:Organization>
    </schema:funder>
    <frapo:isFundedBy rdf:resource="info:eu-repo/grantAgreement/EC/H2020/675728/"/>
    <schema:funder>
      <foaf:Organization>
        <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#string">10.13039/501100000780</dct:identifier>
        <foaf:name>European Commission</foaf:name>
      </foaf:Organization>
    </schema:funder>
    <dct:issued rdf:datatype="http://www.w3.org/2001/XMLSchema#date">2018-12-04</dct:issued>
    <owl:sameAs rdf:resource="https://zenodo.org/record/1966881"/>
    <adms:identifier>
      <adms:Identifier>
        <skos:notation rdf:datatype="http://www.w3.org/2001/XMLSchema#anyURI">https://zenodo.org/record/1966881</skos:notation>
        <adms:schemeAgency>url</adms:schemeAgency>
      </adms:Identifier>
    </adms:identifier>
    <dct:relation rdf:resource="https://doi.org/10.1109/BigData.2016.7840618"/>
    <dct:relation rdf:resource="https://doi.org/10.5281/zenodo.592090"/>
    <dct:relation rdf:resource="https://doi.org/10.5281/zenodo.51314"/>
    <foaf:page rdf:resource="https://zenodo.org/record/1304969"/>
    <dct:relation rdf:resource="https://doi.org/10.17632/xnwncxpw42.1"/>
    <dct:relation rdf:resource="https://doi.org/10.17632/6wtpgr3kbj.1"/>
    <dct:relation rdf:resource="https://doi.org/10.17632/97hj93mkfd.3"/>
    <dct:hasPart rdf:resource="https://doi.org/10.5281/zenodo.1471376"/>
    <dct:hasPart rdf:resource="https://doi.org/10.5281/zenodo.1471585"/>
    <dct:hasPart rdf:resource="https://doi.org/10.5281/zenodo.1471589"/>
    <dct:isVersionOf rdf:resource="https://doi.org/10.5281/zenodo.1208477"/>
    <dct:isPartOf rdf:resource="https://zenodo.org/communities/linkeddata"/>
    <dct:isPartOf rdf:resource="https://zenodo.org/communities/ro"/>
    <dct:description>&lt;p&gt;&lt;strong&gt;Background:&lt;/strong&gt; The automation of data analysis in the form of scientific workflows has become a widely adopted practice in many fields of research. Computationally driven data-intensive experiments using workflows enable &lt;strong&gt;A&lt;/strong&gt;utomation, &lt;strong&gt;S&lt;/strong&gt;caling, &lt;strong&gt;A&lt;/strong&gt;daption and &lt;strong&gt;P&lt;/strong&gt;rovenance support (&lt;strong&gt;ASAP&lt;/strong&gt;). However, there are still several challenges associated with the effective sharing, publication and reproducibility of such workflows due to the incomplete capture of provenance and lack of interoperability between different technical (software) platforms.&lt;/p&gt; &lt;p&gt;&lt;strong&gt;Results:&lt;/strong&gt; Based on best practice recommendations identified from literature on workflow design, sharing and publishing, we define a &lt;em&gt;hierarchical provenance framework&lt;/em&gt; to achieve uniformity in the provenance and support comprehensive and fully re-executable workflows equipped with domain-specific information. To realise this framework, we present &lt;a href="https://w3id.org/cwl/prov/"&gt;&lt;strong&gt;CWLProv&lt;/strong&gt;&lt;/a&gt;, a standard-based format to represent any workflow-based computational analysis to produce workflow output artefacts that satisfy the various levels of provenance. We utilise open source community-driven standards; interoperable workflow definitions in &lt;a href="https://www.commonwl.org/"&gt;Common Workflow Language&lt;/a&gt; (CWL), structured provenance representation using the &lt;a href="https://www.w3.org/TR/prov-overview/"&gt;W3C PROV&lt;/a&gt; model, and resource aggregation and sharing as workflow-centric &lt;a href="http://www.researchobject.org/"&gt;Research Objects&lt;/a&gt; (RO) generated along with the final outputs of a given workflow enactment. We demonstrate the utility of this approach through a practical implementation of &lt;em&gt;CWLProv&lt;/em&gt; and evaluation using real-life genomic workflows developed by independent groups.&lt;/p&gt; &lt;p&gt;&lt;strong&gt;Conclusions: &lt;/strong&gt;The underlying principles of the standards utilised by &lt;em&gt;CWLProv&lt;/em&gt; enable semantically-rich and executable Research Objects that capture computational workflows with retrospective provenance such that any platform supporting CWL will be able to understand the analysis, re-use the methods for partial re-runs, or reproduce the analysis to validate the published findings.&lt;/p&gt;</dct:description>
    <dct:description>Submitted to GigaScience (GIGA-D-18-00483)</dct:description>
    <dct:accessRights rdf:resource="http://publications.europa.eu/resource/authority/access-right/PUBLIC"/>
    <dct:accessRights>
      <dct:RightsStatement rdf:about="info:eu-repo/semantics/openAccess">
        <rdfs:label>Open Access</rdfs:label>
      </dct:RightsStatement>
    </dct:accessRights>
    <dcat:distribution>
      <dcat:Distribution>
        <dct:license rdf:resource="https://creativecommons.org/licenses/by/4.0/legalcode"/>
        <dcat:accessURL rdf:resource="https://doi.org/10.5281/zenodo.1966881"/>
      </dcat:Distribution>
    </dcat:distribution>
    <dcat:distribution>
      <dcat:Distribution>
        <dcat:accessURL>https://doi.org/10.5281/zenodo.1966881</dcat:accessURL>
        <dcat:byteSize>6214005</dcat:byteSize>
        <dcat:downloadURL>https://zenodo.org/record/1966881/files/CWLProv.pdf</dcat:downloadURL>
        <dcat:mediaType>application/pdf</dcat:mediaType>
      </dcat:Distribution>
    </dcat:distribution>
  </rdf:Description>
  <foaf:Project rdf:about="info:eu-repo/grantAgreement/EC/H2020/730976/">
    <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#string">730976</dct:identifier>
    <dct:title>Industrial Biotechnology Innovation and Synthetic Biology Accelerator</dct:title>
    <frapo:isAwardedBy>
      <foaf:Organization>
        <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#string">10.13039/501100000780</dct:identifier>
        <foaf:name>European Commission</foaf:name>
      </foaf:Organization>
    </frapo:isAwardedBy>
  </foaf:Project>
  <foaf:Project rdf:about="info:eu-repo/grantAgreement/EC/H2020/675728/">
    <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#string">675728</dct:identifier>
    <dct:title>Centre of Excellence for Biomolecular Research</dct:title>
    <frapo:isAwardedBy>
      <foaf:Organization>
        <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#string">10.13039/501100000780</dct:identifier>
        <foaf:name>European Commission</foaf:name>
      </foaf:Organization>
    </frapo:isAwardedBy>
  </foaf:Project>
</rdf:RDF>
2,843
1,829
views
downloads
All versions This version
Views 2,843699
Downloads 1,829438
Data volume 7.2 GB2.7 GB
Unique views 2,286600
Unique downloads 1,379401

Share

Cite as