{
  "access": {
    "embargo": {
      "active": false,
      "reason": null
    },
    "files": "public",
    "record": "public",
    "status": "open"
  },
  "created": "2019-11-02T01:13:18.324800+00:00",
  "custom_fields": {},
  "deletion_status": {
    "is_deleted": false,
    "status": "P"
  },
  "files": {
    "count": 1,
    "enabled": true,
    "entries": {
      "IWSLT2019_paper_26.pdf": {
        "checksum": "md5:5a6c18ef21719ddacdab79deec9a4b39",
        "ext": "pdf",
        "id": "06156981-0bd6-4825-b4a9-6433947a86dc",
        "key": "IWSLT2019_paper_26.pdf",
        "metadata": null,
        "mimetype": "application/pdf",
        "size": 345932
      }
    },
    "order": [],
    "total_bytes": 345932
  },
  "id": "3525484",
  "is_draft": false,
  "is_published": true,
  "links": {
    "access": "https://zenodo.org/api/records/3525484/access",
    "access_grants": "https://zenodo.org/api/records/3525484/access/grants",
    "access_links": "https://zenodo.org/api/records/3525484/access/links",
    "access_request": "https://zenodo.org/api/records/3525484/access/request",
    "access_users": "https://zenodo.org/api/records/3525484/access/users",
    "archive": "https://zenodo.org/api/records/3525484/files-archive",
    "archive_media": "https://zenodo.org/api/records/3525484/media-files-archive",
    "communities": "https://zenodo.org/api/records/3525484/communities",
    "communities-suggestions": "https://zenodo.org/api/records/3525484/communities-suggestions",
    "doi": "https://doi.org/10.5281/zenodo.3525484",
    "draft": "https://zenodo.org/api/records/3525484/draft",
    "files": "https://zenodo.org/api/records/3525484/files",
    "latest": "https://zenodo.org/api/records/3525484/versions/latest",
    "latest_html": "https://zenodo.org/records/3525484/latest",
    "media_files": "https://zenodo.org/api/records/3525484/media-files",
    "parent": "https://zenodo.org/api/records/3525483",
    "parent_doi": "https://zenodo.org/doi/10.5281/zenodo.3525483",
    "parent_html": "https://zenodo.org/records/3525483",
    "requests": "https://zenodo.org/api/records/3525484/requests",
    "reserve_doi": "https://zenodo.org/api/records/3525484/draft/pids/doi",
    "self": "https://zenodo.org/api/records/3525484",
    "self_doi": "https://zenodo.org/doi/10.5281/zenodo.3525484",
    "self_html": "https://zenodo.org/records/3525484",
    "self_iiif_manifest": "https://zenodo.org/api/iiif/record:3525484/manifest",
    "self_iiif_sequence": "https://zenodo.org/api/iiif/record:3525484/sequence/default",
    "versions": "https://zenodo.org/api/records/3525484/versions"
  },
  "media_files": {
    "count": 0,
    "enabled": false,
    "entries": {},
    "order": [],
    "total_bytes": 0
  },
  "metadata": {
    "creators": [
      {
        "affiliations": [
          {
            "name": "University of Notre Dame"
          }
        ],
        "person_or_org": {
          "family_name": "Nguyen",
          "given_name": "Toan Q.",
          "name": "Nguyen, Toan Q.",
          "type": "personal"
        }
      },
      {
        "affiliations": [
          {
            "name": "Amazon AWS AI"
          }
        ],
        "person_or_org": {
          "family_name": "Salazar",
          "given_name": "Julian",
          "name": "Salazar, Julian",
          "type": "personal"
        }
      }
    ],
    "description": "<p>We evaluate three simple, normalization-centric changes to improve Transformer training. First, we show that pre-norm residual connections (PRENORM) and smaller initializations enable warmup-free, validation-based training with large learning rates. Second, we propose&nbsp;l2&nbsp;normalization with a single scale parameter (SCALENORM) for faster training and better performance. Finally, we reaffirm the effectiveness of normalizing word embeddings to a fixed length (FIXNORM). On five low-resource translation pairs from TED Talks-based corpora, these changes always converge, giving an average +1.1 BLEU over state-of-the-art bilingual baselines and a new 32.8 BLEU on IWSLT &#39;15 English-Vietnamese. We ob- serve sharper performance curves, more consistent gradient norms, and a linear relationship between activation scaling and decoder depth. Surprisingly, in the high-resource setting (WMT &#39;14 English-German), SCALENORM&nbsp;and FIXNORM&nbsp;remain competitive but PRENORM&nbsp;degrades performance.</p>",
    "languages": [
      {
        "id": "eng",
        "title": {
          "en": "English"
        }
      }
    ],
    "publication_date": "2019-11-02",
    "publisher": "Zenodo",
    "resource_type": {
      "id": "publication-conferencepaper",
      "title": {
        "de": "Konferenzbeitrag",
        "en": "Conference paper"
      }
    },
    "rights": [
      {
        "description": {
          "en": "The Creative Commons Attribution license allows re-distribution and re-use of a licensed work on the condition that the creator is appropriately credited."
        },
        "icon": "cc-by-icon",
        "id": "cc-by-4.0",
        "props": {
          "scheme": "spdx",
          "url": "https://creativecommons.org/licenses/by/4.0/legalcode"
        },
        "title": {
          "en": "Creative Commons Attribution 4.0 International"
        }
      }
    ],
    "title": "Transformers without Tears: Improving the Normalization of Self-Attention"
  },
  "parent": {
    "access": {
      "owned_by": {
        "user": "50447"
      }
    },
    "communities": {
      "default": "db2a5261-ee99-4982-9879-e8274ea4f1af",
      "entries": [
        {
          "access": {
            "member_policy": "open",
            "members_visibility": "public",
            "record_policy": "open",
            "review_policy": "open",
            "visibility": "public"
          },
          "children": {
            "allow": false
          },
          "created": "2019-11-01T07:43:01.061176+00:00",
          "custom_fields": {},
          "deletion_status": {
            "is_deleted": false,
            "status": "P"
          },
          "id": "db2a5261-ee99-4982-9879-e8274ea4f1af",
          "links": {},
          "metadata": {
            "curation_policy": "<p>Papers published at&nbsp;16th International Workshop on Spoken Language Translation 2019 in Hong Kong, as maintained by the program and evaluation chairs.</p>\r\n",
            "description": "",
            "page": "",
            "title": "16th International Workshop on Spoken Language Translation 2019"
          },
          "revision_id": 0,
          "slug": "iwslt2019",
          "updated": "2019-11-01T07:43:01.127561+00:00"
        }
      ],
      "ids": [
        "db2a5261-ee99-4982-9879-e8274ea4f1af"
      ]
    },
    "id": "3525483",
    "pids": {
      "doi": {
        "client": "datacite",
        "identifier": "10.5281/zenodo.3525483",
        "provider": "datacite"
      }
    }
  },
  "pids": {
    "doi": {
      "client": "datacite",
      "identifier": "10.5281/zenodo.3525484",
      "provider": "datacite"
    },
    "oai": {
      "identifier": "oai:zenodo.org:3525484",
      "provider": "oai"
    }
  },
  "revision_id": 2,
  "stats": {
    "all_versions": {
      "data_volume": 271210688.0,
      "downloads": 784,
      "unique_downloads": 696,
      "unique_views": 1091,
      "views": 1255
    },
    "this_version": {
      "data_volume": 270518824.0,
      "downloads": 782,
      "unique_downloads": 694,
      "unique_views": 1084,
      "views": 1247
    }
  },
  "status": "published",
  "updated": "2020-01-20T17:44:54.821195+00:00",
  "versions": {
    "index": 1,
    "is_latest": true
  }
}