{ "access": { "embargo": { "active": false, "reason": null }, "files": "public", "record": "public", "status": "open" }, "created": "2022-04-07T18:08:30.428181+00:00", "custom_fields": {}, "deletion_status": { "is_deleted": false, "status": "P" }, "files": { "count": 1, "enabled": true, "entries": { "huggingface/transformers-v4.18.0.zip": { "checksum": "md5:2cb9cbc7786d91f3af6d679bf1853933", "ext": "zip", "id": "d6a2fbd7-d027-4dd1-98a6-4990e8a20f2e", "key": "huggingface/transformers-v4.18.0.zip", "metadata": null, "mimetype": "application/zip", "size": 10849980 } }, "order": [], "total_bytes": 10849980 }, "id": "6422483", "is_draft": false, "is_published": true, "links": { "access": "https://zenodo.org/api/records/6422483/access", "access_links": "https://zenodo.org/api/records/6422483/access/links", "access_request": "https://zenodo.org/api/records/6422483/access/request", "access_users": "https://zenodo.org/api/records/6422483/access/users", "archive": "https://zenodo.org/api/records/6422483/files-archive", "archive_media": "https://zenodo.org/api/records/6422483/media-files-archive", "communities": "https://zenodo.org/api/records/6422483/communities", "communities-suggestions": "https://zenodo.org/api/records/6422483/communities-suggestions", "doi": "https://doi.org/10.5281/zenodo.6422483", "draft": "https://zenodo.org/api/records/6422483/draft", "files": "https://zenodo.org/api/records/6422483/files", "latest": "https://zenodo.org/api/records/6422483/versions/latest", "latest_html": "https://zenodo.org/records/6422483/latest", "media_files": "https://zenodo.org/api/records/6422483/media-files", "parent": "https://zenodo.org/api/records/3385997", "parent_doi": "https://zenodo.org/doi/10.5281/zenodo.3385997", "parent_html": "https://zenodo.org/records/3385997", "requests": "https://zenodo.org/api/records/6422483/requests", "reserve_doi": "https://zenodo.org/api/records/6422483/draft/pids/doi", "self": "https://zenodo.org/api/records/6422483", "self_doi": "https://zenodo.org/doi/10.5281/zenodo.6422483", "self_html": "https://zenodo.org/records/6422483", "self_iiif_manifest": "https://zenodo.org/api/iiif/record:6422483/manifest", "self_iiif_sequence": "https://zenodo.org/api/iiif/record:6422483/sequence/default", "versions": "https://zenodo.org/api/records/6422483/versions" }, "media_files": { "count": 0, "enabled": false, "entries": {}, "order": [], "total_bytes": 0 }, "metadata": { "additional_descriptions": [ { "description": "If you use this software, please cite it using these metadata.", "type": { "id": "notes", "title": { "de": "Anmerkungen", "en": "Notes" } } } ], "creators": [ { "person_or_org": { "family_name": "Wolf", "given_name": "Thomas", "name": "Wolf, Thomas", "type": "personal" } }, { "person_or_org": { "family_name": "Debut", "given_name": "Lysandre", "name": "Debut, Lysandre", "type": "personal" } }, { "person_or_org": { "family_name": "Sanh", "given_name": "Victor", "name": "Sanh, Victor", "type": "personal" } }, { "person_or_org": { "family_name": "Chaumond", "given_name": "Julien", "name": "Chaumond, Julien", "type": "personal" } }, { "person_or_org": { "family_name": "Delangue", "given_name": "Clement", "name": "Delangue, Clement", "type": "personal" } }, { "person_or_org": { "family_name": "Moi", "given_name": "Anthony", "name": "Moi, Anthony", "type": "personal" } }, { "person_or_org": { "family_name": "Cistac", "given_name": "Perric", "name": "Cistac, Perric", "type": "personal" } }, { "person_or_org": { "family_name": "Ma", "given_name": "Clara", "name": "Ma, Clara", "type": "personal" } }, { "person_or_org": { "family_name": "Jernite", "given_name": "Yacine", "name": "Jernite, Yacine", "type": "personal" } }, { "person_or_org": { "family_name": "Plu", "given_name": "Julien", "name": "Plu, Julien", "type": "personal" } }, { "person_or_org": { "family_name": "Xu", "given_name": "Canwen", "name": "Xu, Canwen", "type": "personal" } }, { "person_or_org": { "family_name": "Le Scao", "given_name": "Teven", "name": "Le Scao, Teven", "type": "personal" } }, { "person_or_org": { "family_name": "Gugger", "given_name": "Sylvain", "name": "Gugger, Sylvain", "type": "personal" } }, { "person_or_org": { "family_name": "Drame", "given_name": "Mariama", "name": "Drame, Mariama", "type": "personal" } }, { "person_or_org": { "family_name": "Lhoest", "given_name": "Quentin", "name": "Lhoest, Quentin", "type": "personal" } }, { "person_or_org": { "family_name": "Rush", "given_name": "Alexander M.", "name": "Rush, Alexander M.", "type": "personal" } } ], "description": "New model additions\n
You'll notice that we are starting to add several older models in vision. This is because those models are used as backbones in recent architectures. While we could rely on existing libraries for such pretrained models, we will ultimately need some support for those backbones in PyTorch/TensorFlow and Jax, and there is currently no library that supports those three frameworks. This is why we are starting to add those models to Transformers directly (here ResNet and VAN)
\nGLPN\nThe GLPN model was proposed in Global-Local Path Networks for Monocular Depth Estimation with Vertical CutDepth by Doyeon Kim, Woonghyun Ga, Pyungwhan Ahn, Donggyu Joo, Sehwan Chun, Junmo Kim. GLPN combines SegFormer's hierarchical mix-Transformer with a lightweight decoder for monocular depth estimation. The proposed decoder shows better performance than the previously proposed decoders, with considerably less computational complexity.
\nThe ResNet model was proposed in Deep Residual Learning for Image Recognition by Kaiming He, Xiangyu Zhang, Shaoqing Ren and Jian Sun. Our implementation follows the small changes made by Nvidia, we apply the stride=2 for downsampling in bottleneck's 3x3 conv and not in the first 1x1. This is generally known as \"ResNet v1.5\".
\nResNet introduced residual connections, they allow to train networks with an unseen number of layers (up to 1000). ResNet won the 2015 ILSVRC & COCO competition, one important milestone in deep computer vision.
\nThe VAN model was proposed in Visual Attention Network by Meng-Hao Guo, Cheng-Ze Lu, Zheng-Ning Liu, Ming-Ming Cheng, Shi-Min Hu.
\nThis paper introduces a new attention layer based on convolution operations able to capture both local and distant relationships. This is done by combining normal and large kernel convolution layers. The latter uses a dilated convolution to capture distant correlations.
\nThe VisionTextDualEncoderModel can be used to initialize a vision-text dual encoder model with any pretrained vision autoencoding model as the vision encoder (e.g. ViT, BEiT, DeiT) and any pretrained text autoencoding model as the text encoder (e.g. RoBERTa, BERT). Two projection layers are added on top of both the vision and text encoder to project the output embeddings to a shared latent space. The projection layers are randomly initialized so the model should be fine-tuned on a downstream task. This model can be used to align the vision-text embeddings using CLIP like contrastive image-text training and then can be used for zero-shot vision tasks such image-classification or retrieval.
\nIn LiT: Zero-Shot Transfer with Locked-image Text Tuning it is shown how leveraging pre-trained (locked/frozen) image and text model for contrastive learning yields significant improvment on new zero-shot vision tasks such as image classification or retrieval.
\nDiT was proposed in DiT: Self-supervised Pre-training for Document Image Transformer by Junlong Li, Yiheng Xu, Tengchao Lv, Lei Cui, Cha Zhang, Furu Wei. DiT applies the self-supervised objective of BEiT (BERT pre-training of Image Transformers) to 42 million document images, allowing for state-of-the-art results on tasks including:
\ntable detection: the ICDAR 2019 cTDaR dataset (a collection of 600 training images and 240 testing images).
\nAdd Document Image Transformer (DiT) by @NielsRogge in https://github.com/huggingface/transformers/pull/15984
\nThe DPT model was proposed in Vision Transformers for Dense Prediction by Ren\u00e9 Ranftl, Alexey Bochkovskiy, Vladlen Koltun. DPT is a model that leverages the Vision Transformer (ViT) as backbone for dense prediction tasks like semantic segmentation and depth estimation.
\nLarge models are becoming more and more the norm and having a checkpoint in a single file is challenging for several reasons:
\nThat's why the save_pretrained
method will know automatically shard a checkpoint in several files when you go above a 10GB threshold for PyTorch models. from_pretrained
will handle such sharded checkpoints as if there was only one file.
GPT-J and ViTMAE are now available in TensorFlow.
\nThe IA migration is wrapped up with a new conceptual guide available.
\ntorch.diag
by @Narsil in https://github.com/huggingface/transformers/pull/15890MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING
by @Narsil in https://github.com/huggingface/transformers/pull/15934max_length
in BeamScorer.finalize()
) by @cwkeam in https://github.com/huggingface/transformers/pull/15555ForInstanceSegmentation
models to image-segmentation
pipelines by @Narsil in https://github.com/huggingface/transformers/pull/15937pos
optional in PerceiverAudioPreprocessor
to avoid crashing PerceiverModel
operation by @basilevh in https://github.com/huggingface/transformers/pull/15972'torch.dtype'
has str-type value in config and all nested dicts for JSON serializability by @feifang24 in https://github.com/huggingface/transformers/pull/16065HF_ENDPOINT
for custom endpoints by @sgugger in https://github.com/huggingface/transformers/pull/16139hidden_states
by @ydshieh in https://github.com/huggingface/transformers/pull/16167jax.ops
operations with jnp's at
by @sanchit-gandhi in https://github.com/huggingface/transformers/pull/16078has_attentions
as done in PyTorch side by @ydshieh in https://github.com/huggingface/transformers/pull/16259add-new-model-like
work in an env without all frameworks by @sgugger in https://github.com/huggingface/transformers/pull/16239cached_download \u2218 hf_hub_url
is hf_hub_download
by @julien-c in https://github.com/huggingface/transformers/pull/16375torch.distributed
process group if one is already initailized by @Yard1 in https://github.com/huggingface/transformers/pull/16487segmentation_maps
by @FrancescoSaverioZuppichini in https://github.com/huggingface/transformers/pull/15964run_qa_no_trainer.py
by @bhadreshpsavani in https://github.com/huggingface/transformers/pull/16508__init__.py
: modeling_xglm -> modeling_flax_xglm
by @stancld in https://github.com/huggingface/transformers/pull/16556convert_tokens_to_string
's output by @SaulLu in https://github.com/huggingface/transformers/pull/16540unpack_inputs
-related changes by @gante in https://github.com/huggingface/transformers/pull/16499_load_pretrained_model_low_mem
static + bug fix by @FrancescoSaverioZuppichini in https://github.com/huggingface/transformers/pull/16548Full Changelog: https://github.com/huggingface/transformers/compare/v4.17.0...v4.18.0
", "publication_date": "2020-10-01", "publisher": "Zenodo", "related_identifiers": [ { "identifier": "https://github.com/huggingface/transformers/tree/v4.18.0", "relation_type": { "id": "issupplementto", "title": { "de": "Erg\u00e4nzt", "en": "Is supplement to" } }, "scheme": "url" } ], "resource_type": { "id": "software", "title": { "de": "Software", "en": "Software" } }, "rights": [ { "description": { "en": "" }, "id": "other-open", "title": { "en": "Other (Open)" } } ], "title": "Transformers: State-of-the-Art Natural Language Processing", "version": "v4.18.0" }, "parent": { "access": { "owned_by": { "user": 75471 } }, "communities": {}, "id": "3385997", "pids": { "doi": { "client": "datacite", "identifier": "10.5281/zenodo.3385997", "provider": "datacite" } } }, "pids": { "doi": { "client": "datacite", "identifier": "10.5281/zenodo.6422483", "provider": "datacite" }, "oai": { "identifier": "oai:zenodo.org:6422483", "provider": "oai" } }, "revision_id": 21, "stats": { "all_versions": { "data_volume": 20259032945.0, "downloads": 2181, "unique_downloads": 2000, "unique_views": 68405, "views": 75563 }, "this_version": { "data_volume": 292949460.0, "downloads": 27, "unique_downloads": 24, "unique_views": 1312, "views": 1475 } }, "status": "published", "updated": "2022-12-02T16:03:58.651036+00:00", "versions": { "index": 77, "is_latest": false } }