Dataset Open Access

GAP: Forecasting Commit Activity in git Projects

Alexandre Decan; Eleni Constantinou; Tom Mens; Henrique Rocha


DCAT Export

<?xml version='1.0' encoding='utf-8'?>
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:adms="http://www.w3.org/ns/adms#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dct="http://purl.org/dc/terms/" xmlns:dctype="http://purl.org/dc/dcmitype/" xmlns:dcat="http://www.w3.org/ns/dcat#" xmlns:duv="http://www.w3.org/ns/duv#" xmlns:foaf="http://xmlns.com/foaf/0.1/" xmlns:frapo="http://purl.org/cerif/frapo/" xmlns:geo="http://www.w3.org/2003/01/geo/wgs84_pos#" xmlns:gsp="http://www.opengis.net/ont/geosparql#" xmlns:locn="http://www.w3.org/ns/locn#" xmlns:org="http://www.w3.org/ns/org#" xmlns:owl="http://www.w3.org/2002/07/owl#" xmlns:prov="http://www.w3.org/ns/prov#" xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#" xmlns:schema="http://schema.org/" xmlns:skos="http://www.w3.org/2004/02/skos/core#" xmlns:vcard="http://www.w3.org/2006/vcard/ns#" xmlns:wdrs="http://www.w3.org/2007/05/powder-s#">
  <rdf:Description rdf:about="https://doi.org/10.5281/zenodo.3666048">
    <rdf:type rdf:resource="http://www.w3.org/ns/dcat#Dataset"/>
    <dct:type rdf:resource="http://purl.org/dc/dcmitype/Dataset"/>
    <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#anyURI">https://doi.org/10.5281/zenodo.3666048</dct:identifier>
    <foaf:page rdf:resource="https://doi.org/10.5281/zenodo.3666048"/>
    <dct:creator>
      <rdf:Description>
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>Alexandre Decan</foaf:name>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>University of Mons</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:creator>
      <rdf:Description>
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>Eleni Constantinou</foaf:name>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>Eindhoven University of Technology</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:creator>
      <rdf:Description>
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>Tom Mens</foaf:name>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>University of Mons</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:creator>
      <rdf:Description>
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>Henrique Rocha</foaf:name>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>University of Antwerp</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:title>GAP: Forecasting Commit Activity in git Projects</dct:title>
    <dct:publisher>
      <foaf:Agent>
        <foaf:name>Zenodo</foaf:name>
      </foaf:Agent>
    </dct:publisher>
    <dct:issued rdf:datatype="http://www.w3.org/2001/XMLSchema#gYear">2020</dct:issued>
    <dct:issued rdf:datatype="http://www.w3.org/2001/XMLSchema#date">2020-02-13</dct:issued>
    <dct:language rdf:resource="http://publications.europa.eu/resource/authority/language/ENG"/>
    <owl:sameAs rdf:resource="https://zenodo.org/record/3666048"/>
    <adms:identifier>
      <adms:Identifier>
        <skos:notation rdf:datatype="http://www.w3.org/2001/XMLSchema#anyURI">https://zenodo.org/record/3666048</skos:notation>
        <adms:schemeAgency>url</adms:schemeAgency>
      </adms:Identifier>
    </adms:identifier>
    <dct:isVersionOf rdf:resource="https://doi.org/10.5281/zenodo.3666047"/>
    <dct:description>&lt;p&gt;This entry contains the replication package for our paper &lt;em&gt;GAP: Forecasting Commit Activity in git Projects&lt;/em&gt; accepted for publication in Journal of Systems and Software.&lt;/p&gt; &lt;p&gt;Abandonment of active developers poses a significant risk for many open source software projects. This risk can be reduced by forecasting the future activity of contributors involved in such projects. Focusing on the commit activity of individuals involved in git repositories, this paper proposes a practicable probabilistic forecasting model based on the statistical technique of survival analysis. The model is empirically validated on a wide variety of projects accounting for 7,528 git repositories and 5,947 active contributors. We found that a model based on the last 20 observed days of commit activity per contributor provides the best concordance. We also found that the predictions provided by the model are generally close to actual observations, with slight underestimations for low probability predictions and slight overestimations for higher probability predictions. This model is implemented as part of an open source tool, called gap, that predicts future commit activity.&lt;/p&gt; &lt;p&gt;&lt;strong&gt;Replication package&lt;/strong&gt;&lt;/p&gt; &lt;p&gt;The model is explained and defined in &lt;em&gt;&amp;quot;notebooks/Survival analysis.ipynb&amp;quot;&lt;/em&gt;. This is a Jupyter notebook created with Jupyter Lab. The dependencies required to run this notebook are listed in &lt;em&gt;requirements.txt&lt;/em&gt; and can be automatically installed using &lt;code&gt;pip install -r requirements.txt&lt;/code&gt;. Consider making use of a virtual environment to ensure a proper replication of the analyses.&lt;/p&gt; &lt;p&gt;The data used to validate the model can be found in &lt;em&gt;data/cargo.csv.gz&lt;/em&gt;. They were produced with the script &lt;em&gt;data/convert.py&lt;/em&gt; that requires file &lt;em&gt;data-raw/cargo_all_proj_commits_id.csv.gz&lt;/em&gt;. This file was created by retrieving all the commits of all projects hosted on github that are related to a project distributed on Cargo. To identify such projects, we relied on libraries.io dataset. GitHub API was then queried to obtain the username of each author (if available) to allow some basic identity merging task. Data about repositories were extracted from libraries.io 1.4.0 dataset and can be found in &lt;em&gt;data/repositories.csv.gz&lt;/em&gt;.&lt;/p&gt; &lt;p&gt;&lt;strong&gt;The GAP tool&lt;/strong&gt;&lt;/p&gt; &lt;p&gt;GAP is made available on &lt;a href="https://github.com/AlexandreDecan/gap"&gt;https://github.com/AlexandreDecan/gap&lt;/a&gt;&lt;/p&gt;</dct:description>
    <dct:accessRights rdf:resource="http://publications.europa.eu/resource/authority/access-right/PUBLIC"/>
    <dct:accessRights>
      <dct:RightsStatement rdf:about="info:eu-repo/semantics/openAccess">
        <rdfs:label>Open Access</rdfs:label>
      </dct:RightsStatement>
    </dct:accessRights>
    <dcat:distribution>
      <dcat:Distribution>
        <dct:license rdf:resource="https://creativecommons.org/licenses/by/4.0/legalcode"/>
        <dcat:accessURL rdf:resource="https://doi.org/10.5281/zenodo.3666048"/>
      </dcat:Distribution>
    </dcat:distribution>
    <dcat:distribution>
      <dcat:Distribution>
        <dcat:accessURL>https://doi.org/10.5281/zenodo.3666048</dcat:accessURL>
        <dcat:byteSize>49444568</dcat:byteSize>
        <dcat:downloadURL>https://zenodo.org/record/3666048/files/replication.zip</dcat:downloadURL>
        <dcat:mediaType>application/zip</dcat:mediaType>
      </dcat:Distribution>
    </dcat:distribution>
  </rdf:Description>
</rdf:RDF>
51
19
views
downloads
All versions This version
Views 5151
Downloads 1919
Data volume 939.4 MB939.4 MB
Unique views 4444
Unique downloads 44

Share

Cite as