Preprint Open Access

Applying Data Synthesis for Longitudinal Business Data across Three Countries

Alam, M. Jahangir; Dostie, Benoit; Drechsler, Jörg; Vilhuber, Lars


DCAT Export

<?xml version='1.0' encoding='utf-8'?>
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:adms="http://www.w3.org/ns/adms#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dct="http://purl.org/dc/terms/" xmlns:dctype="http://purl.org/dc/dcmitype/" xmlns:dcat="http://www.w3.org/ns/dcat#" xmlns:duv="http://www.w3.org/ns/duv#" xmlns:foaf="http://xmlns.com/foaf/0.1/" xmlns:frapo="http://purl.org/cerif/frapo/" xmlns:geo="http://www.w3.org/2003/01/geo/wgs84_pos#" xmlns:gsp="http://www.opengis.net/ont/geosparql#" xmlns:locn="http://www.w3.org/ns/locn#" xmlns:org="http://www.w3.org/ns/org#" xmlns:owl="http://www.w3.org/2002/07/owl#" xmlns:prov="http://www.w3.org/ns/prov#" xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#" xmlns:schema="http://schema.org/" xmlns:skos="http://www.w3.org/2004/02/skos/core#" xmlns:vcard="http://www.w3.org/2006/vcard/ns#" xmlns:wdrs="http://www.w3.org/2007/05/powder-s#">
  <rdf:Description rdf:about="https://doi.org/10.5281/zenodo.3832173">
    <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#anyURI">https://doi.org/10.5281/zenodo.3832173</dct:identifier>
    <foaf:page rdf:resource="https://doi.org/10.5281/zenodo.3832173"/>
    <dct:creator>
      <rdf:Description>
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>Alam, M. Jahangir</foaf:name>
        <foaf:givenName>M. Jahangir</foaf:givenName>
        <foaf:familyName>Alam</foaf:familyName>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>Truman State University</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:creator>
      <rdf:Description>
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>Dostie, Benoit</foaf:name>
        <foaf:givenName>Benoit</foaf:givenName>
        <foaf:familyName>Dostie</foaf:familyName>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>HEC Montréal</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:creator>
      <rdf:Description>
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>Drechsler, Jörg</foaf:name>
        <foaf:givenName>Jörg</foaf:givenName>
        <foaf:familyName>Drechsler</foaf:familyName>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>Institute for Employment Research</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:creator>
      <rdf:Description rdf:about="http://orcid.org/0000-0001-5733-8932">
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#string">0000-0001-5733-8932</dct:identifier>
        <foaf:name>Vilhuber, Lars</foaf:name>
        <foaf:givenName>Lars</foaf:givenName>
        <foaf:familyName>Vilhuber</foaf:familyName>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>Cornell University</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:title>Applying Data Synthesis for Longitudinal Business Data across Three Countries</dct:title>
    <dct:publisher>
      <foaf:Agent>
        <foaf:name>Zenodo</foaf:name>
      </foaf:Agent>
    </dct:publisher>
    <dct:issued rdf:datatype="http://www.w3.org/2001/XMLSchema#gYear">2020</dct:issued>
    <dcat:keyword>business data</dcat:keyword>
    <dcat:keyword>confidentiality</dcat:keyword>
    <dcat:keyword>LBD</dcat:keyword>
    <dcat:keyword>LEAP</dcat:keyword>
    <dcat:keyword>BHP</dcat:keyword>
    <dcat:keyword>synthetic data</dcat:keyword>
    <frapo:isFundedBy rdf:resource="info:eu-repo/grantAgreement/NSF/Directorate+for+Social%2C+Behavioral+%26+Economic+Sciences/1042181/"/>
    <schema:funder>
      <foaf:Organization>
        <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#string">10.13039/100000001</dct:identifier>
        <foaf:name>National Science Foundation</foaf:name>
      </foaf:Organization>
    </schema:funder>
    <frapo:isFundedBy rdf:resource="info:eu-repo/grantAgreement/NSF/Directorate+for+Social%2C+Behavioral+%26+Economic+Sciences/0427889/"/>
    <schema:funder>
      <foaf:Organization>
        <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#string">10.13039/100000001</dct:identifier>
        <foaf:name>National Science Foundation</foaf:name>
      </foaf:Organization>
    </schema:funder>
    <frapo:isFundedBy rdf:resource="info:eu-repo/grantAgreement/NSF/Directorate+for+Social%2C+Behavioral+%26+Economic+Sciences/1131848/"/>
    <schema:funder>
      <foaf:Organization>
        <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#string">10.13039/100000001</dct:identifier>
        <foaf:name>National Science Foundation</foaf:name>
      </foaf:Organization>
    </schema:funder>
    <dct:issued rdf:datatype="http://www.w3.org/2001/XMLSchema#date">2020-05-05</dct:issued>
    <dct:language rdf:resource="http://publications.europa.eu/resource/authority/language/ENG"/>
    <owl:sameAs rdf:resource="https://zenodo.org/record/3832173"/>
    <adms:identifier>
      <adms:Identifier>
        <skos:notation rdf:datatype="http://www.w3.org/2001/XMLSchema#anyURI">https://zenodo.org/record/3832173</skos:notation>
        <adms:schemeAgency>url</adms:schemeAgency>
      </adms:Identifier>
    </adms:identifier>
    <dct:isPartOf rdf:resource="https://github.com/labordynamicsinstitute/SyntheticLEAP/releases/tag/v20200518"/>
    <dct:isVersionOf rdf:resource="https://doi.org/10.5281/zenodo.3785743"/>
    <dct:isPartOf rdf:resource="https://zenodo.org/communities/labordynamicsinstitute"/>
    <owl:versionInfo>v20200504</owl:versionInfo>
    <dct:description>&lt;p&gt;Data on businesses collected by statistical agencies are challenging to protect.Many businesses have unique characteristics, and distributions of employment,sales, and profits are highly skewed. Attackers wishing to conduct identificationattacks often have access to much more information than for any individual. Asa consequence, most disclosure avoidance mechanisms fail to strike an accept-able balance between usefulness and confidentiality protection. Detailed aggregatestatistics by geography or detailed industry classes are rare, public-use microdataon businesses are virtually inexistant, and access to confidential microdata can beburdensome. Synthetic microdata have been proposed as a secure mechanism topublish microdata, as part of a broader discussion of how to provide broader accessto such datasets to researchers. In this article, we document an experiment to cre-ate analytically valid synthetic data, using the exact same model and methods previ-ously employed for the United States, for data from two different countries: Canada(Longitudinal Employment Analysis Program (LEAP)) and Germany (EstablishmentHistory Panel (BHP)). We assess utility and protection, and provide an assessmentof the feasibility of extending such an approach in a cost-effective way to other data.&lt;/p&gt;</dct:description>
    <dct:description>The opinions expressed here are those of the authors, and do not reflect the opinions of any of the statistical agencies involved. All results were reviewed for disclosure risks by their respective custodians, and released to the authors. Alam thanks Claudiu Motoc and Danny Leung for help with the Canadian data. Vilhuber acknowledges funding through NSF Grants SES-1131848 and SES-1042181, and a grant from Alfred P. Sloan Grant (G-2015-13903). Alam and Dostie acknowledge funding through SSHRC Partnership Grant ``Productivity, Firms and Incomes''. The creation of the Synthetic LBD was funded by NSF Grant SES-0427889. Revisions: - Minor edits based on editor's comments (only on pg. 9)</dct:description>
    <dct:accessRights rdf:resource="http://publications.europa.eu/resource/authority/access-right/PUBLIC"/>
    <dct:accessRights>
      <dct:RightsStatement rdf:about="info:eu-repo/semantics/openAccess">
        <rdfs:label>Open Access</rdfs:label>
      </dct:RightsStatement>
    </dct:accessRights>
    <dct:license rdf:resource="https://creativecommons.org/licenses/by-nc/4.0/legalcode"/>
    <dcat:distribution>
      <dcat:Distribution>
        <dcat:accessURL rdf:resource="https://doi.org/10.5281/zenodo.3832173"/>
        <dcat:byteSize>1763588</dcat:byteSize>
        <dcat:downloadURL rdf:resource="https://zenodo.org/record/3832173/files/AlamDostieDrechslerVilhuber_20200518.pdf"/>
        <dcat:mediaType>application/pdf</dcat:mediaType>
      </dcat:Distribution>
    </dcat:distribution>
    <dcat:distribution>
      <dcat:Distribution>
        <dcat:accessURL rdf:resource="https://doi.org/10.5281/zenodo.3832173"/>
        <dcat:byteSize>471441</dcat:byteSize>
        <dcat:downloadURL rdf:resource="https://zenodo.org/record/3832173/files/AlamDostieDrechslerVilhuber-online-appendix.pdf"/>
        <dcat:mediaType>application/pdf</dcat:mediaType>
      </dcat:Distribution>
    </dcat:distribution>
    <dcat:distribution>
      <dcat:Distribution>
        <dcat:accessURL rdf:resource="https://doi.org/10.5281/zenodo.3832173"/>
        <dcat:byteSize>8978190</dcat:byteSize>
        <dcat:downloadURL rdf:resource="https://zenodo.org/record/3832173/files/SyntheticLEAP-20200518.zip"/>
        <dcat:mediaType>application/zip</dcat:mediaType>
      </dcat:Distribution>
    </dcat:distribution>
  </rdf:Description>
  <foaf:Project rdf:about="info:eu-repo/grantAgreement/NSF/Directorate+for+Social%2C+Behavioral+%26+Economic+Sciences/1042181/">
    <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#string">1042181</dct:identifier>
    <dct:title>Synthetic Data User Testing and Dissemination</dct:title>
    <frapo:isAwardedBy>
      <foaf:Organization>
        <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#string">10.13039/100000001</dct:identifier>
        <foaf:name>National Science Foundation</foaf:name>
      </foaf:Organization>
    </frapo:isAwardedBy>
  </foaf:Project>
  <foaf:Project rdf:about="info:eu-repo/grantAgreement/NSF/Directorate+for+Social%2C+Behavioral+%26+Economic+Sciences/0427889/">
    <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#string">0427889</dct:identifier>
    <dct:title>ITR-(ECS+ASE)-(dmc+int): Info Tech Challenges for Secure Access to Confidential Social Science Data</dct:title>
    <frapo:isAwardedBy>
      <foaf:Organization>
        <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#string">10.13039/100000001</dct:identifier>
        <foaf:name>National Science Foundation</foaf:name>
      </foaf:Organization>
    </frapo:isAwardedBy>
  </foaf:Project>
  <foaf:Project rdf:about="info:eu-repo/grantAgreement/NSF/Directorate+for+Social%2C+Behavioral+%26+Economic+Sciences/1131848/">
    <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#string">1131848</dct:identifier>
    <dct:title>NCRN-MN: Cornell Census-NSF Research Node: Integrated Research Support, Training and Data Documentation</dct:title>
    <frapo:isAwardedBy>
      <foaf:Organization>
        <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#string">10.13039/100000001</dct:identifier>
        <foaf:name>National Science Foundation</foaf:name>
      </foaf:Organization>
    </frapo:isAwardedBy>
  </foaf:Project>
</rdf:RDF>
40
33
views
downloads
All versions This version
Views 4025
Downloads 3331
Data volume 85.7 MB70.6 MB
Unique views 3122
Unique downloads 2826

Share

Cite as