{ "$defs": { "DeterminationSource": { "description": "Enum defining sources for determining the version of a software.", "enum": [ "url", "publication-date", "archive-metadata", "related-identifier-url" ], "title": "DeterminationSource", "type": "string" }, "IdType": { "description": "Enum defining the type of identifier for a mentioning resource.", "enum": [ "arxiv", "pmc", "doi" ], "title": "IdType", "type": "string" }, "Language": { "additionalProperties": false, "description": "Represents a repository's programming language and the percentage of bytes of code for relevant files\nin the repository, as determined by the github-linguist Ruby package.\n\nSee for more information:\nhttps://github.com/github-linguist/linguist/blob/559a6426942abcae16b6d6b328147476432bf6cb/docs/how-linguist-works.md", "properties": { "language": { "title": "Language", "type": "string" }, "fraction": { "title": "Fraction", "type": "number" } }, "required": [ "language", "fraction" ], "title": "Language", "type": "object" }, "LatestVersion": { "additionalProperties": false, "description": "Represents information about the latest available version of the software in a source code repository\nat dataset construction.\n\nNote that this type does not distinguish between version and reference (e.g., commit sha and tag name for git).\nWhat is being recorded is the reference, i.e., either the tag name, or a revision identifier.", "properties": { "version": { "description": "The commit sha or the tag name of the git ref to the latest version of a software in a source code repository", "title": "Version", "type": "string" }, "version_type": { "description": "The type of the latest version (only tag or revision)", "enum": [ "tag", "revision" ], "title": "Version Type", "type": "string" }, "revision_id": { "anyOf": [ { "pattern": "^([0-9a-f]{5,40}|\\d+)$", "type": "string" }, { "type": "null" } ], "default": null, "description": "The commit sha (for git) or revision number (for svn) of the latest version tag or commit/revision.", "title": "Revision Id" }, "tag_sha": { "anyOf": [ { "pattern": "^([0-9a-f]{5,40}|\\d+)$", "type": "string" }, { "type": "null" } ], "default": null, "description": "The tag sha of the latest version tag if the latest version is of a git repository and the tag is annotated.", "title": "Tag Sha" }, "languages": { "anyOf": [ { "items": { "$ref": "#/$defs/Language" }, "type": "array" }, { "type": "null" } ], "default": null, "description": "A list of programming languages and their fractions as retrievable from the latest version", "title": "Languages" }, "date": { "anyOf": [ { "format": "date-time", "type": "string" }, { "type": "null" } ], "default": null, "description": "The date of the version, i.e., committed date of the revision, or committed date of the tag.", "title": "Date" }, "swh_id": { "anyOf": [ { "pattern": "^swh:[1-9]:(rel|rev):[0-9a-f]+(;(origin|visit|anchor|path|lines)=\\S+)*$", "type": "string" }, { "type": "null" } ], "default": null, "description": "The Software Heritage identifier of an archive of, or containing, the latest version.", "title": "Swh Id" }, "swh_dir_id": { "anyOf": [ { "pattern": "^swh:[1-9]:dir:[0-9a-f]+(;(origin|visit|anchor|path|lines)=\\S+)*$", "type": "string" }, { "type": "null" } ], "default": null, "description": "The Software Heritage 'dir' identifier of an archive of, or containing, the latest version.", "title": "Swh Dir Id" }, "licenses": { "anyOf": [ { "items": { "$ref": "#/$defs/LicenseData" }, "type": "array" }, { "type": "null" } ], "default": null, "description": "List of detected licenses with confidences.\nDefault None, and None when no licenses could be detected.", "title": "Licenses" }, "metadata_files": { "anyOf": [ { "items": { "$ref": "#/$defs/MetadataFile" }, "type": "array" }, { "type": "null" } ], "default": null, "description": "List of detected metadata files.\nDefault None, and None when no files could be detected.", "title": "Metadata Files" }, "id": { "anyOf": [ { "type": "string" }, { "type": "null" } ], "default": null, "description": "A string uniquely identifying the latest version.\nIdentifies artifacts in the dataset.", "title": "Id" } }, "required": [ "version", "version_type" ], "title": "LatestVersion", "type": "object" }, "LicenseData": { "additionalProperties": false, "description": "Represents information about detected licenses for a latest version.\nFor each object, two required fields are completed:\n\n- license: provides the SPDX-2.0 identifier for the license\n- confidence: provides the confidence (in % (integers)) that the license has been detected correctly.", "properties": { "license": { "title": "License", "type": "string" }, "confidence": { "title": "Confidence", "type": "integer" } }, "required": [ "license", "confidence" ], "title": "LicenseData", "type": "object" }, "Mention": { "additionalProperties": false, "description": "Represents a mention of a research software within a work", "properties": { "metadata_source": { "$ref": "#/$defs/MetadataSource", "description": "The source of the mention metadata" }, "id": { "description": "The mention identifier from the metadata source", "title": "Id", "type": "string" }, "id_type": { "$ref": "#/$defs/IdType", "description": "The type of the mention identifier, e.g., PMC identifier, ArXiv identifier, DOI, ..." }, "pub_date": { "anyOf": [ { "pattern": "^\\d{4}(?:-\\d{2}(?:-\\d{2})?)?$", "type": "string" }, { "type": "null" } ], "default": null, "description": "The maximally precise date of the earliest publication of the mentioning work in the format YYYY[-MM[-DD]]", "title": "Publication date" }, "version": { "anyOf": [ { "$ref": "#/$defs/MentionedVersion" }, { "type": "null" } ], "default": null, "description": "The version of the software mentioned in the mentioning work (may be approximate)" }, "mentioning_urls": { "description": "The URLs used to mention the software in the mentioning work", "items": { "type": "string" }, "title": "Mentioning Urls", "type": "array", "uniqueItems": true } }, "required": [ "metadata_source", "id", "id_type", "mentioning_urls" ], "title": "Mention", "type": "object" }, "MentionedVersion": { "additionalProperties": false, "description": "Represents a specific software source code state (here: version) mentioned in a work", "properties": { "identification_url": { "anyOf": [ { "format": "uri", "minLength": 1, "type": "string" }, { "type": "null" } ], "default": null, "description": "The URL that was used to identify the mentioned software version with maximal precision", "title": "Identification Url" }, "type": { "anyOf": [ { "$ref": "#/$defs/VersionType" }, { "type": "null" } ], "default": null, "description": "The type of the version, e.g., whether it refers to a tagged version, a specific revision, a branch head, ..." }, "based_on": { "anyOf": [ { "$ref": "#/$defs/DeterminationSource" }, { "type": "null" } ], "default": null, "description": "The source of the information from which the version of the mentioned software was determined" }, "version": { "anyOf": [ { "type": "string" }, { "type": "null" } ], "default": null, "description": "The actual expression of the version reference, i.e., the tag or revision identifier.", "title": "Version" }, "reference": { "anyOf": [ { "type": "string" }, { "type": "null" } ], "default": null, "description": "The reference to the version, i.e., the version name, tag name, revision name, branch name or path.", "title": "Reference" }, "date": { "anyOf": [ { "format": "date-time", "type": "string" }, { "type": "null" } ], "default": null, "description": "The date of the version, i.e., committed date of the revision, or committed date of the tag.", "title": "Date" }, "archive_url": { "anyOf": [ { "format": "uri", "minLength": 1, "type": "string" }, { "type": "null" } ], "default": null, "description": "A URL to an archive deposit of the determined version", "title": "Archive Url" } }, "title": "MentionedVersion", "type": "object" }, "MetadataFile": { "description": "Enum defining software metadata files.", "enum": [ "CITATION.cff", "codemeta.json", ".zenodo.json" ], "title": "MetadataFile", "type": "string" }, "MetadataSource": { "description": "Enum defining the data source for the data.", "enum": [ "extract-urls-pmc", "extract-urls-arxiv", "joss" ], "title": "MetadataSource", "type": "string" }, "Platform": { "description": "Enum defining source code repository platforms.", "enum": [ "github.com", "gitlab.com", "bitbucket.org", "sourceforge.net" ], "title": "Platform", "type": "string" }, "ResearchSoftware": { "additionalProperties": false, "description": "Represents a research software project.", "properties": { "canonical_url": { "description": "A canonical URL for the software project on a version control platform", "format": "uri", "minLength": 1, "title": "Canonical Url", "type": "string" }, "mentions": { "description": "A list of mentions from the literature to this software", "items": { "$ref": "#/$defs/Mention" }, "title": "Mentions", "type": "array" }, "repositories": { "anyOf": [ { "items": { "$ref": "#/$defs/SourceCodeRepository" }, "type": "array" }, { "type": "null" } ], "default": [], "description": "A list of the source code repositories for the software project", "title": "Repositories" }, "platform": { "$ref": "#/$defs/Platform", "description": "The source code repository platform where the software project is hosted" } }, "required": [ "canonical_url", "mentions", "platform" ], "title": "ResearchSoftware", "type": "object" }, "SourceCodeRepository": { "additionalProperties": false, "description": "Represents a git or Subversion source code repository.", "properties": { "vcs": { "$ref": "#/$defs/VCS", "description": "The version control system used by the repository" }, "clone_url": { "anyOf": [ { "format": "uri", "minLength": 1, "type": "string" }, { "type": "null" } ], "default": null, "description": "The URL that can be used to retrieve a local copy of the repository via a VCS tool", "title": "Clone Url" }, "accessible": { "anyOf": [ { "type": "boolean" }, { "type": "null" } ], "default": false, "description": "Whether the repository could be successfully accessed during dataset compilation, where 'accessible'\ndescribes a successful clone or checkout operation.", "title": "Accessible" }, "latest": { "anyOf": [ { "$ref": "#/$defs/LatestVersion" }, { "type": "null" } ], "default": null, "description": "Information about the latest version available from the repository at dataset construction" }, "main_repo": { "anyOf": [ { "type": "boolean" }, { "type": "null" } ], "default": false, "description": "Whether this repository is regarded as containing 'the software'. This is relevant because Sourceforge\nprojects that may be mentioned as 'the software' may contain more than one source code repository.", "title": "Main Repo" }, "trunk": { "anyOf": [ { "type": "boolean" }, { "type": "null" } ], "default": false, "description": "For SVN repositories, whether the repository has a trunk/ directory.", "title": "Trunk" }, "structure": { "anyOf": [ { "$ref": "#/$defs/SvnStructure" }, { "type": "null" } ], "default": null } }, "required": [ "vcs" ], "title": "SourceCodeRepository", "type": "object" }, "SvnStructure": { "maxItems": 6, "minItems": 6, "prefixItems": [ { "title": "Url" }, { "title": "Trunk" }, { "title": "Branches" }, { "title": "Tags" }, { "title": "Compliance" }, { "title": "Sub Dirs" } ], "type": "array" }, "VCS": { "description": "Enum defining version control systems.", "enum": [ "git", "svn", "cvs" ], "title": "VCS", "type": "string" }, "VersionType": { "enum": [ "tag", "revision", "branch", "path", "name" ], "title": "VersionType", "type": "string" } }, "additionalProperties": false, "description": "Represents a corpus of research software repositories.", "properties": { "research_software": { "default": [], "description": "The software included in the corpus.", "items": { "$ref": "#/$defs/ResearchSoftware" }, "title": "Research Software", "type": "array" }, "version": { "anyOf": [ { "pattern": "^\\d{4}-\\d{2}-\\d{2}$", "type": "string" }, { "type": "null" } ], "default": null, "description": "The version of the corpus (format: YYYY-MM-DD).", "title": "Version" }, "date": { "anyOf": [ { "format": "date-time", "type": "string" }, { "type": "null" } ], "default": null, "description": "The date of the version.", "title": "Date" } }, "title": "Corpus", "type": "object" }