There is a newer version of this record available.

Preprint Open Access

CWLProv - Interoperable Retrospective Provenance capture and its challenges

Farah Zaib Khan; Stian Soiland-Reyes; Michael R. Crusoe; Andrew Lonie; Richard O. Sinnott


MARC21 XML Export

<?xml version='1.0' encoding='UTF-8'?>
<record xmlns="http://www.loc.gov/MARC21/slim">
  <leader>00000nam##2200000uu#4500</leader>
  <datafield tag="653" ind1=" " ind2=" ">
    <subfield code="a">Provenance</subfield>
  </datafield>
  <datafield tag="653" ind1=" " ind2=" ">
    <subfield code="a">Retrospective provenance</subfield>
  </datafield>
  <datafield tag="653" ind1=" " ind2=" ">
    <subfield code="a">Research Object</subfield>
  </datafield>
  <datafield tag="653" ind1=" " ind2=" ">
    <subfield code="a">Common Workflow Language</subfield>
  </datafield>
  <datafield tag="653" ind1=" " ind2=" ">
    <subfield code="a">PROV Data Model</subfield>
  </datafield>
  <datafield tag="653" ind1=" " ind2=" ">
    <subfield code="a">PROV-N</subfield>
  </datafield>
  <datafield tag="653" ind1=" " ind2=" ">
    <subfield code="a">PROV-JSON</subfield>
  </datafield>
  <controlfield tag="005">20181205111951.0</controlfield>
  <datafield tag="500" ind1=" " ind2=" ">
    <subfield code="a">&lt;CWLProv-ProvenanceWeek_2018_paper_7.pdf&gt; is preprint, which was submitted for consideration at IPAW 2018.
&lt;RunTimeResearchObject-f0b553d37e4255a3291393948f3e308bd88ed301.zip&gt; is Research Object of described workflow run.
&lt;cwltool-61797e04db7db33486d6074ec4ecc7dadd20d5e6.zip&gt; is CWLProv-enabled cwltool reference implementation used.</subfield>
  </datafield>
  <controlfield tag="001">1215611</controlfield>
  <datafield tag="700" ind1=" " ind2=" ">
    <subfield code="u">The University of Manchester; Common Workflow Language project</subfield>
    <subfield code="0">(orcid)0000-0001-9842-9718</subfield>
    <subfield code="a">Stian Soiland-Reyes</subfield>
  </datafield>
  <datafield tag="700" ind1=" " ind2=" ">
    <subfield code="u">Common Workflow Language project</subfield>
    <subfield code="0">(orcid)0000-0002-2961-9670</subfield>
    <subfield code="a">Michael R. Crusoe</subfield>
  </datafield>
  <datafield tag="700" ind1=" " ind2=" ">
    <subfield code="u">The University of Melbourne</subfield>
    <subfield code="0">(orcid)0000-0002-2006-3856</subfield>
    <subfield code="a">Andrew Lonie</subfield>
  </datafield>
  <datafield tag="700" ind1=" " ind2=" ">
    <subfield code="u">The University of Melbourne</subfield>
    <subfield code="0">(orcid)0000-0001-5998-222X</subfield>
    <subfield code="a">Richard O. Sinnott</subfield>
  </datafield>
  <datafield tag="856" ind1="4" ind2=" ">
    <subfield code="s">1033878</subfield>
    <subfield code="z">md5:4789648cb4fb5a6ea86b039f04b7dd49</subfield>
    <subfield code="u">https://zenodo.org/record/1215611/files/CWLProv-ProvenanceWeek_2018_paper_7.pdf</subfield>
  </datafield>
  <datafield tag="856" ind1="4" ind2=" ">
    <subfield code="s">3552427</subfield>
    <subfield code="z">md5:41b46e8177ac7983c03d50204e204cdd</subfield>
    <subfield code="u">https://zenodo.org/record/1215611/files/cwltool-61797e04db7db33486d6074ec4ecc7dadd20d5e6.zip</subfield>
  </datafield>
  <datafield tag="856" ind1="4" ind2=" ">
    <subfield code="s">141133</subfield>
    <subfield code="z">md5:cd66d4250e011fccc1e8efbc8952d667</subfield>
    <subfield code="u">https://zenodo.org/record/1215611/files/RunTimeResearchObject-f0b553d37e4255a3291393948f3e308bd88ed301.zip</subfield>
  </datafield>
  <datafield tag="542" ind1=" " ind2=" ">
    <subfield code="l">open</subfield>
  </datafield>
  <datafield tag="260" ind1=" " ind2=" ">
    <subfield code="c">2018-03-27</subfield>
  </datafield>
  <datafield tag="909" ind1="C" ind2="O">
    <subfield code="p">openaire</subfield>
    <subfield code="p">user-linkeddata</subfield>
    <subfield code="p">user-ro</subfield>
    <subfield code="o">oai:zenodo.org:1215611</subfield>
  </datafield>
  <datafield tag="100" ind1=" " ind2=" ">
    <subfield code="u">The University of Melbourne; Common Workflow Language project</subfield>
    <subfield code="0">(orcid)0000-0002-6337-3037</subfield>
    <subfield code="a">Farah Zaib Khan</subfield>
  </datafield>
  <datafield tag="245" ind1=" " ind2=" ">
    <subfield code="a">CWLProv - Interoperable Retrospective Provenance capture and its challenges</subfield>
  </datafield>
  <datafield tag="980" ind1=" " ind2=" ">
    <subfield code="a">user-linkeddata</subfield>
  </datafield>
  <datafield tag="980" ind1=" " ind2=" ">
    <subfield code="a">user-ro</subfield>
  </datafield>
  <datafield tag="536" ind1=" " ind2=" ">
    <subfield code="c">675728</subfield>
    <subfield code="a">Centre of Excellence for Biomolecular Research</subfield>
  </datafield>
  <datafield tag="540" ind1=" " ind2=" ">
    <subfield code="u">http://creativecommons.org/licenses/by/4.0/legalcode</subfield>
    <subfield code="a">Creative Commons Attribution 4.0 International</subfield>
  </datafield>
  <datafield tag="650" ind1="1" ind2="7">
    <subfield code="a">cc-by</subfield>
    <subfield code="2">opendefinition.org</subfield>
  </datafield>
  <datafield tag="520" ind1=" " ind2=" ">
    <subfield code="a">&lt;p&gt;The automation of data analysis in the form of scientific workflows is a widely adopted practice in many fields of research nowadays. Computationally driven data-intensive experiments using workflows enable &lt;strong&gt;A&lt;/strong&gt;utomation, &lt;strong&gt;S&lt;/strong&gt;caling, &lt;strong&gt;A&lt;/strong&gt;daption and &lt;strong&gt;P&lt;/strong&gt;rovenance support (ASAP).&lt;/p&gt;

&lt;p&gt;However, there are still several challenges associated with the effective sharing, publication, understandability and reproducibility of such workflows due to the incomplete capture of provenance and the dependence on particular technical (software) platforms. This paper presents &lt;strong&gt;CWLProv&lt;/strong&gt;, an approach for retrospective provenance capture utilizing open source community-driven standards involving application and customization of workflow-centric &lt;a href="http://www.researchobject.org/"&gt;Research Objects&lt;/a&gt; (ROs).&lt;/p&gt;

&lt;p&gt;The ROs are produced as an output of a workflow enactment defined in the &lt;a href="http://www.commonwl.org/"&gt;Common Workflow Language&lt;/a&gt; (CWL) using the CWL reference implementation and its data structures. The approach aggregates and annotates all the resources involved in the scientific investigation including inputs, outputs, workflow specification, command line tool specifications and input parameter settings. The resources are linked within the RO to enable re-enactment of an analysis without depending on external resources.&lt;/p&gt;

&lt;p&gt;The workflow provenance profile is represented in W3C recommended standard &lt;a href="https://www.w3.org/TR/prov-n/"&gt;PROV-N&lt;/a&gt; and &lt;a href="https://www.w3.org/Submission/prov-json/"&gt;PROV-JSON&lt;/a&gt; format to capture retrospective provenance of the workflow enactment. The workflow-centric RO produced as an output of a CWL workflow enactment is expected to be interoperable, reusable, shareable and portable across different plat-&lt;br&gt;
forms.&lt;/p&gt;

&lt;p&gt;This paper describes the need and motivation for &lt;a href="https://github.com/common-workflow-language/cwltool/tree/provenance"&gt;CWLProv&lt;/a&gt; and the lessons learned in applying it for ROs using CWL in the bioinformatics domain.&lt;/p&gt;</subfield>
  </datafield>
  <datafield tag="773" ind1=" " ind2=" ">
    <subfield code="n">url</subfield>
    <subfield code="i">hasPart</subfield>
    <subfield code="a">https://github.com/common-workflow-language/cwltool/tree/61797e04db7db33486d6074ec4ecc7dadd20d5e6</subfield>
  </datafield>
  <datafield tag="773" ind1=" " ind2=" ">
    <subfield code="n">url</subfield>
    <subfield code="i">hasPart</subfield>
    <subfield code="a">https://github.com/FarahZKhan/scalability-reproducibility-chapter/tree/f0b553d37e4255a3291393948f3e308bd88ed301/CWL/ProvCaptureOutput/RunTimeResearchObject</subfield>
  </datafield>
  <datafield tag="773" ind1=" " ind2=" ">
    <subfield code="n">doi</subfield>
    <subfield code="i">cites</subfield>
    <subfield code="a">10.5281/zenodo.833999</subfield>
  </datafield>
  <datafield tag="773" ind1=" " ind2=" ">
    <subfield code="n">doi</subfield>
    <subfield code="i">cites</subfield>
    <subfield code="a">10.1109/BigData.2016.7840618</subfield>
  </datafield>
  <datafield tag="773" ind1=" " ind2=" ">
    <subfield code="n">doi</subfield>
    <subfield code="i">cites</subfield>
    <subfield code="a">10.5281/zenodo.592090</subfield>
  </datafield>
  <datafield tag="773" ind1=" " ind2=" ">
    <subfield code="n">url</subfield>
    <subfield code="i">isSupplementedBy</subfield>
    <subfield code="a">https://github.com/common-workflow-language/cwltool/pull/676</subfield>
  </datafield>
  <datafield tag="773" ind1=" " ind2=" ">
    <subfield code="n">doi</subfield>
    <subfield code="i">cites</subfield>
    <subfield code="a">10.5281/zenodo.51314</subfield>
  </datafield>
  <datafield tag="773" ind1=" " ind2=" ">
    <subfield code="n">url</subfield>
    <subfield code="i">isCitedBy</subfield>
    <subfield code="a">https://zenodo.org/record/1304969</subfield>
  </datafield>
  <datafield tag="773" ind1=" " ind2=" ">
    <subfield code="n">doi</subfield>
    <subfield code="i">isVersionOf</subfield>
    <subfield code="a">10.5281/zenodo.1208477</subfield>
  </datafield>
  <datafield tag="024" ind1=" " ind2=" ">
    <subfield code="a">10.5281/zenodo.1215611</subfield>
    <subfield code="2">doi</subfield>
  </datafield>
  <datafield tag="980" ind1=" " ind2=" ">
    <subfield code="a">publication</subfield>
    <subfield code="b">preprint</subfield>
  </datafield>
</record>
1,158
599
views
downloads
All versions This version
Views 1,158258
Downloads 599157
Data volume 2.4 GB163.1 MB
Unique views 964238
Unique downloads 455130

Share

Cite as