{"authors": [{"name": "Alix Chagu\u00e9", "affiliation": "ALMAnaCH, Inria, Paris ; Universit\u00e9 de Montr\u00e9al, Montr\u00e9al ; Ecole Pratique des Hautes Etudes, Paris"}], "summary": "McCATMuS - Transcription model for handwritten, printed and typewritten documents from the 16th century to the 21st century", "description": "The McCATMuS model was trained on the aggregation of 22 datasets covering documents written in diverse languages (mostly French, but also Latin, Spanish, English, German and Italian), handwritten, typewritten or printed, produced between the late 16th century and 2023. \n\nThe datasets were curated and converted to follow the CATMuS guidelines, thus guaranteeing homogenous and correctly documented transcription rules. \n\n{add more info once in Zenodo's editor}\n\nThe dataset is also available via HuggingFace: https://huggingface.co/datasets/CATMuS/modern\n", "accuracy": 92.80267357826233, "license": "CC-BY-4.0", "script": ["Latn"], "name": "McCATMuS_nfd_nofix_V1.mlmodel", "graphemes": [" ", "!", "\"", "#", "$", "%", "&", "'", "(", ")", "*", "+", ",", "-", ".", "/", "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", ":", ";", "=", "?", "@", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "[", "]", "^", "_", "`", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "~", "\u00a4", "\u00a8", "\u00df", "\u00e6", "\u0256", "\u0300", "\u0301", "\u0302", "\u0303", "\u0304", "\u0308", "\u0327", "\u0328", "\u033e", "\u0368", "\u036b", "\u2013", "\u2038", "\u204a", "\u20ac", "\u2191", "\u27e6", "\u27e7", "\ua751", "\ua76e", "\ua770", "\ue8bf"]}