{
  "access": {
    "embargo": {
      "active": false,
      "reason": null
    },
    "files": "public",
    "record": "public",
    "status": "open"
  },
  "created": "2024-03-18T19:16:20.334303+00:00",
  "custom_fields": {},
  "deletion_status": {
    "is_deleted": false,
    "status": "P"
  },
  "files": {
    "count": 1,
    "enabled": true,
    "entries": {
      "GenePT_emebdding_v2.zip": {
        "access": {
          "hidden": false
        },
        "checksum": "md5:3f6ce4317e3a0091978ae5cb8fbf05a3",
        "ext": "zip",
        "id": "ad3a5b2c-a430-4cf0-9413-d690f8e107c3",
        "key": "GenePT_emebdding_v2.zip",
        "links": {
          "content": "https://zenodo.org/api/records/10833191/files/GenePT_emebdding_v2.zip/content",
          "self": "https://zenodo.org/api/records/10833191/files/GenePT_emebdding_v2.zip"
        },
        "metadata": null,
        "mimetype": "application/zip",
        "size": 574395233,
        "storage_class": "L"
      }
    },
    "order": [],
    "total_bytes": 574395233
  },
  "id": "10833191",
  "is_draft": false,
  "is_published": true,
  "links": {
    "access": "https://zenodo.org/api/records/10833191/access",
    "access_grants": "https://zenodo.org/api/records/10833191/access/grants",
    "access_links": "https://zenodo.org/api/records/10833191/access/links",
    "access_request": "https://zenodo.org/api/records/10833191/access/request",
    "access_users": "https://zenodo.org/api/records/10833191/access/users",
    "archive": "https://zenodo.org/api/records/10833191/files-archive",
    "archive_media": "https://zenodo.org/api/records/10833191/media-files-archive",
    "communities": "https://zenodo.org/api/records/10833191/communities",
    "communities-suggestions": "https://zenodo.org/api/records/10833191/communities-suggestions",
    "doi": "https://doi.org/10.5281/zenodo.10833191",
    "draft": "https://zenodo.org/api/records/10833191/draft",
    "file_modification": "https://zenodo.org/api/records/10833191/file-modification",
    "files": "https://zenodo.org/api/records/10833191/files",
    "latest": "https://zenodo.org/api/records/10833191/versions/latest",
    "latest_html": "https://zenodo.org/records/10833191/latest",
    "media_files": "https://zenodo.org/api/records/10833191/media-files",
    "parent": "https://zenodo.org/api/records/10030425",
    "parent_doi": "https://doi.org/10.5281/zenodo.10030425",
    "parent_doi_html": "https://zenodo.org/doi/10.5281/zenodo.10030425",
    "parent_html": "https://zenodo.org/records/10030425",
    "preview_html": "https://zenodo.org/records/10833191?preview=1",
    "quota_increase": "https://zenodo.org/api/records/10833191/quota-increase",
    "request_deletion": "https://zenodo.org/api/records/10833191/request-deletion",
    "requests": "https://zenodo.org/api/records/10833191/requests",
    "reserve_doi": "https://zenodo.org/api/records/10833191/draft/pids/doi",
    "self": "https://zenodo.org/api/records/10833191",
    "self_doi": "https://doi.org/10.5281/zenodo.10833191",
    "self_doi_html": "https://zenodo.org/doi/10.5281/zenodo.10833191",
    "self_html": "https://zenodo.org/records/10833191",
    "self_iiif_manifest": "https://zenodo.org/api/iiif/record:10833191/manifest",
    "self_iiif_sequence": "https://zenodo.org/api/iiif/record:10833191/sequence/default",
    "versions": "https://zenodo.org/api/records/10833191/versions"
  },
  "media_files": {
    "count": 0,
    "enabled": false,
    "entries": {},
    "order": [],
    "total_bytes": 0
  },
  "metadata": {
    "creators": [
      {
        "affiliations": [
          {
            "id": "00f54p054",
            "identifiers": [
              {
                "identifier": "00f54p054",
                "scheme": "ror"
              },
              {
                "identifier": "grid.168010.e",
                "scheme": "grid"
              },
              {
                "identifier": "0000 0004 1936 8956",
                "scheme": "isni"
              }
            ],
            "name": "Stanford University"
          }
        ],
        "person_or_org": {
          "family_name": "Yiqun",
          "given_name": "Chen",
          "identifiers": [
            {
              "identifier": "0000-0002-4100-1507",
              "scheme": "orcid"
            }
          ],
          "name": "Yiqun, Chen",
          "type": "personal"
        },
        "role": {
          "id": "datacollector",
          "title": {
            "de": "DatensammlerIn",
            "en": "Data collector"
          }
        }
      }
    ],
    "dates": [
      {
        "date": "2023-10-21",
        "type": {
          "id": "submitted",
          "title": {
            "de": "Eingereicht",
            "en": "Submitted"
          }
        }
      },
      {
        "date": "2024-03-18",
        "type": {
          "id": "updated",
          "title": {
            "de": "Aktualisiert",
            "en": "Updated"
          }
        }
      }
    ],
    "description": "<p>These are the pulled NCBI (and UniProt, when applicable) summaries of genes, as well as the corresponding OpenAI text embeddings (text-embedding-ada-002 and text-embedding-3-large) computed on the summaries. See methods details in Chen and Zou (2024+).</p>\n<p>The unzipped folder contains four different files:&nbsp;</p>\n<ol>\n<li>NCBI_summary_of_genes.json (NCBI gene card summary of human genes)</li>\n<li>NCBI_UniProt_summary_of_genes.json (NCBI gene card and UniProt protein (when applicable) summary of human genes)</li>\n<li>GenePT_gene_embedding_ada_text.pickle (a dictionary of numpy array where gene names (upper case) are keys and text-embedding-ada-002 embeddings of the summary in 1. are the values)</li>\n<li>GenePT_gene_protein_embedding_model_3_text.pickle (a dictionary of numpy array where gene names (upper case) are keys and text-embedding-3-large embeddings of the summary in 1. are the values)</li>\n</ol>\n<p>Reference:</p>\n<p>Chen YT, Zou J. (2024+) GenePT: A Simple But Effective Foundation Model for Genes and Cells Built From ChatGPT. bioRxiv preprint: <a href=\"https://www.biorxiv.org/content/10.1101/2023.10.16.562533v1\">https://www.biorxiv.org/content/10.1101/2023.10.16.562533v1</a>.</p>",
    "languages": [
      {
        "id": "eng",
        "title": {
          "en": "English"
        }
      }
    ],
    "publication_date": "2024-03-18",
    "publisher": "Zenodo",
    "related_identifiers": [
      {
        "identifier": "10.1101/2023.10.16.562533",
        "relation_type": {
          "id": "ispartof",
          "title": {
            "de": "Ist Teil von",
            "en": "Is part of"
          }
        },
        "resource_type": {
          "id": "publication-preprint",
          "title": {
            "de": "Preprint",
            "en": "Preprint"
          }
        },
        "scheme": "doi"
      }
    ],
    "resource_type": {
      "id": "dataset",
      "title": {
        "de": "Datensatz",
        "en": "Dataset"
      }
    },
    "rights": [
      {
        "description": {
          "en": "The Creative Commons Attribution license allows re-distribution and re-use of a licensed work on the condition that the creator is appropriately credited."
        },
        "icon": "cc-by-icon",
        "id": "cc-by-4.0",
        "props": {
          "scheme": "spdx",
          "url": "https://creativecommons.org/licenses/by/4.0/legalcode"
        },
        "title": {
          "en": "Creative Commons Attribution 4.0 International"
        }
      }
    ],
    "subjects": [
      {
        "subject": "Computational Biology"
      },
      {
        "subject": "Foundation Models"
      }
    ],
    "title": "Gene embeddings used in GenePT: A Simple But Hard-to-Beat Foundation Model for Genes and Cells Built From ChatGPT"
  },
  "parent": {
    "access": {
      "owned_by": {
        "user": "465067"
      },
      "settings": {
        "accept_conditions_text": null,
        "allow_guest_requests": false,
        "allow_user_requests": false,
        "secret_link_expiration": 0
      }
    },
    "communities": {},
    "id": "10030425",
    "pids": {
      "doi": {
        "client": "datacite",
        "identifier": "10.5281/zenodo.10030425",
        "provider": "datacite"
      }
    }
  },
  "pids": {
    "doi": {
      "client": "datacite",
      "identifier": "10.5281/zenodo.10833191",
      "provider": "datacite"
    },
    "oai": {
      "identifier": "oai:zenodo.org:10833191",
      "provider": "oai"
    }
  },
  "revision_id": 4,
  "stats": {
    "all_versions": {
      "data_volume": 1366307147120.0,
      "downloads": 2656,
      "unique_downloads": 1769,
      "unique_views": 5232,
      "views": 6041
    },
    "this_version": {
      "data_volume": 1265967093532.0,
      "downloads": 2204,
      "unique_downloads": 1415,
      "unique_views": 4290,
      "views": 4995
    }
  },
  "status": "published",
  "swh": {},
  "updated": "2024-03-18T19:16:20.596498+00:00",
  "versions": {
    "index": 2,
    "is_latest": true
  }
}