{
  "access": {
    "embargo": {
      "active": false,
      "reason": null
    },
    "files": "public",
    "record": "public",
    "status": "open"
  },
  "created": "2019-11-08T10:01:20.126305+00:00",
  "custom_fields": {},
  "deletion_status": {
    "is_deleted": false,
    "status": "P"
  },
  "files": {
    "count": 1,
    "enabled": true,
    "entries": {
      "semcor..3-512.tar.gz": {
        "access": {
          "hidden": false
        },
        "checksum": "md5:6cb97dc74a359a3d0d136aa1081e6b88",
        "ext": "gz",
        "id": "4da6c10d-4380-4728-b1b5-0eee18f30799",
        "key": "semcor..3-512.tar.gz",
        "links": {
          "content": "https://zenodo.org/api/records/3532708/files/semcor..3-512.tar.gz/content",
          "self": "https://zenodo.org/api/records/3532708/files/semcor..3-512.tar.gz"
        },
        "metadata": null,
        "mimetype": "application/gzip",
        "size": 139536843,
        "storage_class": "L"
      }
    },
    "order": [],
    "total_bytes": 139536843
  },
  "id": "3532708",
  "is_draft": false,
  "is_published": true,
  "links": {
    "access": "https://zenodo.org/api/records/3532708/access",
    "access_grants": "https://zenodo.org/api/records/3532708/access/grants",
    "access_links": "https://zenodo.org/api/records/3532708/access/links",
    "access_request": "https://zenodo.org/api/records/3532708/access/request",
    "access_users": "https://zenodo.org/api/records/3532708/access/users",
    "archive": "https://zenodo.org/api/records/3532708/files-archive",
    "archive_media": "https://zenodo.org/api/records/3532708/media-files-archive",
    "communities": "https://zenodo.org/api/records/3532708/communities",
    "communities-suggestions": "https://zenodo.org/api/records/3532708/communities-suggestions",
    "doi": "https://doi.org/10.5281/zenodo.3532708",
    "draft": "https://zenodo.org/api/records/3532708/draft",
    "file_modification": "https://zenodo.org/api/records/3532708/file-modification",
    "files": "https://zenodo.org/api/records/3532708/files",
    "latest": "https://zenodo.org/api/records/3532708/versions/latest",
    "latest_html": "https://zenodo.org/records/3532708/latest",
    "media_files": "https://zenodo.org/api/records/3532708/media-files",
    "parent": "https://zenodo.org/api/records/3532707",
    "parent_doi": "https://doi.org/10.5281/zenodo.3532707",
    "parent_doi_html": "https://zenodo.org/doi/10.5281/zenodo.3532707",
    "parent_html": "https://zenodo.org/records/3532707",
    "preview_html": "https://zenodo.org/records/3532708?preview=1",
    "quota_increase": "https://zenodo.org/api/records/3532708/quota-increase",
    "request_deletion": "https://zenodo.org/api/records/3532708/request-deletion",
    "requests": "https://zenodo.org/api/records/3532708/requests",
    "reserve_doi": "https://zenodo.org/api/records/3532708/draft/pids/doi",
    "self": "https://zenodo.org/api/records/3532708",
    "self_doi": "https://doi.org/10.5281/zenodo.3532708",
    "self_doi_html": "https://zenodo.org/doi/10.5281/zenodo.3532708",
    "self_html": "https://zenodo.org/records/3532708",
    "self_iiif_manifest": "https://zenodo.org/api/iiif/record:3532708/manifest",
    "self_iiif_sequence": "https://zenodo.org/api/iiif/record:3532708/sequence/default",
    "versions": "https://zenodo.org/api/records/3532708/versions"
  },
  "media_files": {
    "count": 0,
    "enabled": false,
    "entries": {},
    "order": [],
    "total_bytes": 0
  },
  "metadata": {
    "additional_descriptions": [
      {
        "description": "Funding: ELG (EU H2020 project, grant number: 825627) and Co-Inform (EU H2020 project, grant number: 770302)",
        "type": {
          "id": "notes",
          "title": {
            "de": "Anmerkungen",
            "en": "Notes"
          }
        }
      }
    ],
    "creators": [
      {
        "affiliations": [
          {
            "name": "Expert System"
          }
        ],
        "person_or_org": {
          "family_name": "Ronald Denaux",
          "identifiers": [
            {
              "identifier": "0000-0001-5672-9915",
              "scheme": "orcid"
            }
          ],
          "name": "Ronald Denaux",
          "type": "personal"
        }
      },
      {
        "affiliations": [
          {
            "name": "Expert System"
          }
        ],
        "person_or_org": {
          "family_name": "Raul Ortega",
          "identifiers": [
            {
              "identifier": "0000-0002-0290-7373",
              "scheme": "orcid"
            }
          ],
          "name": "Raul Ortega",
          "type": "personal"
        }
      },
      {
        "affiliations": [
          {
            "name": "Expert System"
          }
        ],
        "person_or_org": {
          "family_name": "Jose Manuel Gomez-Perez",
          "identifiers": [
            {
              "identifier": "0000-0002-5491-6431",
              "scheme": "orcid"
            }
          ],
          "name": "Jose Manuel Gomez-Perez",
          "type": "personal"
        }
      }
    ],
    "description": "<p>This dataset contains word vectors generated after training the LMMS&nbsp;<code>Language Modelling Makes Sense (ACL 2019)</code> model with the whole train set of SemCor, adapted by rdenaux.</p>\n\n<p>The main modifications include:</p>\n\n<ul>\n\t<li>support for&nbsp;<a href=\"https://github.com/huggingface/transformers\">transformers</a>&nbsp;backend ** this makes it possible to experiment with other transformer architectures besides BERT, e.g. XLNet, XLM, RoBERTa ** optimised training since we no longer have to pad sequences to 512 wordpiece tokens</li>\n\t<li>Introduced&nbsp;<code>SentenceEncoder</code>&nbsp;which is an experimental generalisation of bert-as-service like encoding services using the transformers backend ** allows to extract various types of embeddings from a single execution of a batch of sequences</li>\n\t<li>rolling cosine similarity metrics during training phase</li>\n</ul>\n\n<p>The original repository includes the code to replicate the experiments in the&nbsp;<a href=\"https://arxiv.org/abs/1906.10007\">&quot;Language Modelling Makes Sense (ACL 2019)&quot;</a>&nbsp;paper.</p>\n\n<p>This project is designed to be modular so that others can easily modify or reuse the portions that are relevant for them. Its composed of a series of scripts that when run in sequence produce most of the work described in the paper (for simplicity, we&#39;ve focused this release on BERT, let us know if you need ELMo).</p>\n\n<p>The&nbsp;code is available <a href=\"https://github.com/rdenaux/LMMS\">here</a>.</p>\n\n<p>&nbsp;</p>",
    "funding": [
      {
        "award": {
          "acronym": "Co-Inform",
          "id": "00k4n6c32::770302",
          "identifiers": [
            {
              "identifier": "https://cordis.europa.eu/projects/770302",
              "scheme": "url"
            }
          ],
          "number": "770302",
          "program": "H2020-EU.3.6.",
          "title": {
            "en": "Co-Creating Misinformation-Resilient Societies"
          }
        },
        "funder": {
          "id": "00k4n6c32",
          "name": "European Commission"
        }
      }
    ],
    "publication_date": "2019-11-08",
    "publisher": "Zenodo",
    "resource_type": {
      "id": "dataset",
      "title": {
        "de": "Datensatz",
        "en": "Dataset"
      }
    },
    "rights": [
      {
        "description": {
          "en": "The Creative Commons Attribution license allows re-distribution and re-use of a licensed work on the condition that the creator is appropriately credited."
        },
        "icon": "cc-by-icon",
        "id": "cc-by-4.0",
        "props": {
          "scheme": "spdx",
          "url": "https://creativecommons.org/licenses/by/4.0/legalcode"
        },
        "title": {
          "en": "Creative Commons Attribution 4.0 International"
        }
      }
    ],
    "title": "LMMS Wordnet Embeddings for SemCor corpus"
  },
  "parent": {
    "access": {
      "owned_by": {
        "user": "50638"
      }
    },
    "communities": {},
    "id": "3532707",
    "pids": {
      "doi": {
        "client": "datacite",
        "identifier": "10.5281/zenodo.3532707",
        "provider": "datacite"
      }
    }
  },
  "pids": {
    "doi": {
      "client": "datacite",
      "identifier": "10.5281/zenodo.3532708",
      "provider": "datacite"
    },
    "oai": {
      "identifier": "oai:zenodo.org:3532708",
      "provider": "oai"
    }
  },
  "revision_id": 2,
  "stats": {
    "all_versions": {
      "data_volume": 11023410597.0,
      "downloads": 79,
      "unique_downloads": 74,
      "unique_views": 359,
      "views": 374
    },
    "this_version": {
      "data_volume": 11023410597.0,
      "downloads": 79,
      "unique_downloads": 74,
      "unique_views": 359,
      "views": 374
    }
  },
  "status": "published",
  "swh": {},
  "updated": "2020-01-24T19:24:37.766785+00:00",
  "versions": {
    "index": 1,
    "is_latest": true
  }
}