From ec1faa5e64f0aa4fb3176e2dfb599440ffe17cc9 Mon Sep 17 00:00:00 2001 From: asajatovic Date: Sat, 7 Dec 2019 21:08:24 +0100 Subject: [PATCH] Update default language models (#7) * Update default language models Fixes #6 * Update version --- setup.py | 2 +- spacy_udpipe/languages.json | 54 +++++++++++++++++++++++++------------ 2 files changed, 38 insertions(+), 18 deletions(-) diff --git a/setup.py b/setup.py index 3115bd5..0a4376a 100644 --- a/setup.py +++ b/setup.py @@ -19,7 +19,7 @@ setuptools.setup( name="spacy-udpipe", - version="0.0.4", + version="0.0.5", description="Use fast UDPipe models directly in spaCy", long_description=long_description, long_description_content_type="text/markdown", diff --git a/spacy_udpipe/languages.json b/spacy_udpipe/languages.json index 3c7b1f5..eb47d18 100644 --- a/spacy_udpipe/languages.json +++ b/spacy_udpipe/languages.json @@ -4,58 +4,71 @@ "eu": "basque-bdt-ud-2.4-190531.udpipe", "be": "belarusian-hse-ud-2.4-190531.udpipe", "grc": "ancient_greek-perseus-ud-2.4-190531.udpipe", + "grc-perseus": "ancient_greek-perseus-ud-2.4-190531.udpipe", "grc-proiel": "ancient_greek-proiel-ud-2.4-190531.udpipe", "ca": "catalan-ancora-ud-2.4-190531.udpipe", "zh": "chinese-gsd-ud-2.4-190531.udpipe", "lzh": "classical_chinese-kyoto-ud-2.4-190531.udpipe", "ar": "arabic-padt-ud-2.4-190531.udpipe", "bg": "bulgarian-btb-ud-2.4-190531.udpipe", - "cs-cac": "czech-cac-ud-2.4-190531.udpipe", - "cs": "czech-cltt-ud-2.4-190531.udpipe", + "cs": "czech-pdt-ud-2.4-190531.udpipe", "cs-pdt": "czech-pdt-ud-2.4-190531.udpipe", + "cs-cac": "czech-cac-ud-2.4-190531.udpipe", "cs-fictree": "czech-fictree-ud-2.4-190531.udpipe", + "cs-cltt": "czech-cltt-ud-2.4-190531.udpipe", "hr": "croatian-set-ud-2.4-190531.udpipe", "coptic": "coptic-scriptorium-ud-2.4-190531.udpipe", - "nl": "dutch-lassysmall-ud-2.4-190531.udpipe", + "nl": "dutch-alpino-ud-2.4-190531.udpipe", "nl-alpino": "dutch-alpino-ud-2.4-190531.udpipe", + "nl-lassysmall": "dutch-lassysmall-ud-2.4-190531.udpipe", "da": "danish-ddt-ud-2.4-190531.udpipe", "et": "estonian-edt-ud-2.4-190531.udpipe", + "et-edt": "estonian-edt-ud-2.4-190531.udpipe", "et-ewt": "estonian-ewt-ud-2.4-190531.udpipe", - "en": "english-partut-ud-2.4-190531.udpipe", - "en-gum": "english-gum-ud-2.4-190531.udpipe", + "en": "english-ewt-ud-2.4-190531.udpipe", "en-ewt": "english-ewt-ud-2.4-190531.udpipe", + "en-gum": "english-gum-ud-2.4-190531.udpipe", "en-lines": "english-lines-ud-2.4-190531.udpipe", - "fr": "french-partut-ud-2.4-190531.udpipe", + "en-partut": "english-partut-ud-2.4-190531.udpipe", + "fr": "french-gsd-ud-2.4-190531.udpipe", "fr-gsd": "french-gsd-ud-2.4-190531.udpipe", - "fr-spoken": "french-spoken-ud-2.4-190531.udpipe", + "fr-partut": "french-partut-ud-2.4-190531.udpipe", "fr-sequoia": "french-sequoia-ud-2.4-190531.udpipe", + "fr-spoken": "french-spoken-ud-2.4-190531.udpipe", "fro": "old_french-srcmf-ud-2.4-190531.udpipe", - "fi": "finnish-ftb-ud-2.4-190531.udpipe", + "fi": "finnish-tdt-ud-2.4-190531.udpipe", "fi-tdt": "finnish-tdt-ud-2.4-190531.udpipe", + "fi-ftb": "finnish-ftb-ud-2.4-190531.udpipe", "de": "german-gsd-ud-2.4-190531.udpipe", - "gl": "galician-treegal-ud-2.4-190531.udpipe", + "gl": "galician-ctg-ud-2.4-190531.udpipe", "gl-ctg": "galician-ctg-ud-2.4-190531.udpipe", + "gl-treegal": "galician-treegal-ud-2.4-190531.udpipe", "el": "greek-gdt-ud-2.4-190531.udpipe", "got": "gothic-proiel-ud-2.4-190531.udpipe", "hu": "hungarian-szeged-ud-2.4-190531.udpipe", "hi": "hindi-hdtb-ud-2.4-190531.udpipe", "he": "hebrew-htb-ud-2.4-190531.udpipe", - "it": "italian-partut-ud-2.4-190531.udpipe", - "it-postwita": "italian-postwita-ud-2.4-190531.udpipe", + "it": "italian-isdt-ud-2.4-190531.udpipe", "it-isdt": "italian-isdt-ud-2.4-190531.udpipe", + "it-partut": "italian-partut-ud-2.4-190531.udpipe", + "it-postwita": "italian-postwita-ud-2.4-190531.udpipe", "it-vit": "italian-vit-ud-2.4-190531.udpipe", "ga": "irish-idt-ud-2.4-190531.udpipe", "id": "indonesian-gsd-ud-2.4-190531.udpipe", - "la": "latin-perseus-ud-2.4-190531.udpipe", - "la-proiel": "latin-proiel-ud-2.4-190531.udpipe", + "la": "latin-ittb-ud-2.4-190531.udpipe", "la-ittb": "latin-ittb-ud-2.4-190531.udpipe", - "ko": "korean-gsd-ud-2.4-190531.udpipe", + "la-proiel": "latin-proiel-ud-2.4-190531.udpipe", + "la-perseus": "latin-perseus-ud-2.4-190531.udpipe", + "ko": "korean-kaist-ud-2.4-190531.udpipe", "ko-kaist": "korean-kaist-ud-2.4-190531.udpipe", + "ko-gsd": "korean-gsd-ud-2.4-190531.udpipe", "ja": "japanese-gsd-ud-2.4-190531.udpipe", - "lt": "lithuanian-hse-ud-2.4-190531.udpipe", + "lt": "lithuanian-alksnis-ud-2.4-190531.udpipe", "lt-alksnis": "lithuanian-alksnis-ud-2.4-190531.udpipe", + "lt-hse": "lithuanian-hse-ud-2.4-190531.udpipe", "lv": "latvian-lvtb-ud-2.4-190531.udpipe", "nn": "norwegian-nynorsk-ud-2.4-190531.udpipe", + "nn-nynorsk": "norwegian-nynorsk-ud-2.4-190531.udpipe", "nn-nynorsklia": "norwegian-nynorsklia-ud-2.4-190531.udpipe", "nb": "norwegian-bokmaal-ud-2.4-190531.udpipe", "se": "north_sami-giella-ud-2.4-190531.udpipe", @@ -64,19 +77,25 @@ "fa": "persian-seraji-ud-2.4-190531.udpipe", "cu": "old_church_slavonic-proiel-ud-2.4-190531.udpipe", "ro": "romanian-rrt-ud-2.4-190531.udpipe", + "ro-rrt": "romanian-rrt-ud-2.4-190531.udpipe", "ro-nonstandard": "romanian-nonstandard-ud-2.4-190531.udpipe", "pt": "portuguese-gsd-ud-2.4-190531.udpipe", + "pt-gsd": "portuguese-gsd-ud-2.4-190531.udpipe", "pt-bosque": "portuguese-bosque-ud-2.4-190531.udpipe", "pl": "polish-pdb-ud-2.4-190531.udpipe", + "pl-pdb": "polish-pdb-ud-2.4-190531.udpipe", "pl-lfg": "polish-lfg-ud-2.4-190531.udpipe", "sr": "serbian-set-ud-2.4-190531.udpipe", - "ru": "russian-gsd-ud-2.4-190531.udpipe", + "ru": "russian-syntagrus-ud-2.4-190531.udpipe", "ru-syntagrus": "russian-syntagrus-ud-2.4-190531.udpipe", + "ru-gsd": "russian-gsd-ud-2.4-190531.udpipe", "ru-taiga": "russian-taiga-ud-2.4-190531.udpipe", "orv": "old_russian-torot-ud-2.4-190531.udpipe", - "es": "spanish-gsd-ud-2.4-190531.udpipe", + "es": "spanish-ancora-ud-2.4-190531.udpipe", "es-ancora": "spanish-ancora-ud-2.4-190531.udpipe", + "es-gsd": "spanish-gsd-ud-2.4-190531.udpipe", "sl": "slovenian-ssj-ud-2.4-190531.udpipe", + "sl-ssj": "slovenian-ssj-ud-2.4-190531.udpipe", "sl-sst": "slovenian-sst-ud-2.4-190531.udpipe", "sk": "slovak-snk-ud-2.4-190531.udpipe", "uk": "ukrainian-iu-ud-2.4-190531.udpipe", @@ -84,6 +103,7 @@ "te": "telugu-mtg-ud-2.4-190531.udpipe", "ta": "tamil-ttb-ud-2.4-190531.udpipe", "sv": "swedish-talbanken-ud-2.4-190531.udpipe", + "sv-talbanken": "swedish-talbanken-ud-2.4-190531.udpipe", "sv-lines": "swedish-lines-ud-2.4-190531.udpipe", "wo": "wolof-wtb-ud-2.4-190531.udpipe", "vi": "vietnamese-vtb-ud-2.4-190531.udpipe",