From 01fde43e51001defc67bc6cc4e4cc9ecb78c59d3 Mon Sep 17 00:00:00 2001 From: ayeffkay Date: Tue, 13 Jul 2021 18:16:26 +0300 Subject: [PATCH 01/17] DistilBERT links and description added --- docs/features/models/bert.rst | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/docs/features/models/bert.rst b/docs/features/models/bert.rst index 564cd31d73..5404a4d433 100644 --- a/docs/features/models/bert.rst +++ b/docs/features/models/bert.rst @@ -29,6 +29,8 @@ We have trained BERT-base model for other languages and domains: `[deeppavlov_pytorch] `__ - Conversational RuBERT, Russian, cased, 12-layer, 768-hidden, 12-heads, 180M parameters: `[deeppavlov] `__, `[deeppavlov_pytorch] `__ +- Conversational DistilRuBERT, Russian, cased, 6-layer, 768-hidden, 12-heads, 135.4M parameters: `[deeppavlov_pytorch] `__ +- Conversational DistilRuBERT-tiny, Russian, cased, 2-layer, 768-hidden, 12-heads, 107M parameters: `[deeppavlov_pytorch] `__ - Sentence Multilingual BERT, 101 languages, cased, 12-layer, 768-hidden, 12-heads, 180M parameters: `[deeppavlov] `__, `[deeppavlov_pytorch] `__ - Sentence RuBERT, Russian, cased, 12-layer, 768-hidden, 12-heads, 180M parameters: `[deeppavlov] `__, @@ -50,6 +52,13 @@ English cased version of BERT-base as initialization for English Conversational Conversational RuBERT was trained on OpenSubtitles [5]_, Dirty, Pikabu, and Social Media segment of Taiga corpus [8]_. We assembled new vocabulary for Conversational RuBERT model on this data and initialized model with RuBERT. +Conversational DistilRuBERT (6 transformer layers) and DistilRuBERT-tiny (2 transformer layers) were trained on the same data as Conversational RuBERT and highly inspired by DistilBERT [13]_. Namely, Distil* models (students) used pretrained Conversational RuBERT as teacher and linear combination of the following losses: + +1. Masked language modeling loss (between student output logits for tokens and its true labels) +2. Kullback-Leibler divergence (between student and teacher output logits) +3. Cosine embedding loss (between averaged hidden states of the teacher and hidden states of the student) +4. Mean squared error loss (between averaged attention maps of the teacher and attention maps of the student) + Sentence Multilingual BERT is a representation-based sentence encoder for 101 languages of Multilingual BERT. It is initialized with Multilingual BERT and then fine-tuned on english MultiNLI [9]_ and on dev set of multilingual XNLI [10]_. Sentence representations are mean pooled token embeddings in the same manner as in Sentence-BERT [12]_. @@ -196,3 +205,4 @@ the :doc:`config ` file must be changed to match new BERT .. [10] Williams A., Bowman S. (2018) XNLI: Evaluating Cross-lingual Sentence Representations. arXiv preprint arXiv:1809.05053 .. [11] S. R. Bowman, G. Angeli, C. Potts, and C. D. Manning. (2015) A large annotated corpus for learning natural language inference. arXiv preprint arXiv:1508.05326 .. [12] N. Reimers, I. Gurevych (2019) Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks. arXiv preprint arXiv:1908.10084 +.. [13] Sanh, V., Debut, L., Chaumond, J., & Wolf, T. (2019). DistilBERT, a distilled version of BERT: smaller, faster, cheaper and lighter. arXiv preprint arXiv:1910.01108. From b003a53632a6e7b8ed6fc19f6ad13e11e4f320e2 Mon Sep 17 00:00:00 2001 From: ayeffkay Date: Tue, 13 Jul 2021 18:23:12 +0300 Subject: [PATCH 02/17] Distil configs for paraphraser --- ...phraser_convers_distilrubert_2L_torch.json | 85 +++++++++++++++++++ ...phraser_convers_distilrubert_6L_torch.json | 85 +++++++++++++++++++ 2 files changed, 170 insertions(+) create mode 100644 deeppavlov/configs/classifiers/paraphraser_convers_distilrubert_2L_torch.json create mode 100644 deeppavlov/configs/classifiers/paraphraser_convers_distilrubert_6L_torch.json diff --git a/deeppavlov/configs/classifiers/paraphraser_convers_distilrubert_2L_torch.json b/deeppavlov/configs/classifiers/paraphraser_convers_distilrubert_2L_torch.json new file mode 100644 index 0000000000..4e51808e89 --- /dev/null +++ b/deeppavlov/configs/classifiers/paraphraser_convers_distilrubert_2L_torch.json @@ -0,0 +1,85 @@ + { + "dataset_reader": { + "class_name": "paraphraser_reader", + "data_path": "{DOWNLOADS_PATH}/paraphraser_data", + "do_lower_case": false + }, + "dataset_iterator": { + "class_name": "siamese_iterator", + "seed": 243, + "len_valid": 500 + }, + "chainer": { + "in": ["text_a", "text_b"], + "in_y": ["y"], + "pipe": [ + { + "class_name": "torch_transformers_preprocessor", + "vocab_file": "{TRANSFORMER}", + "do_lower_case": false, + "max_seq_length": 64, + "in": ["text_a", "text_b"], + "out": ["bert_features"] + }, + { + "class_name": "torch_transformers_classifier", + "n_classes": 2, + "return_probas": false, + "pretrained_bert": "{TRANSFORMER}", + "save_path": "{MODEL_PATH}/model", + "load_path": "{MODEL_PATH}/model", + "attention_probs_keep_prob": 0.11, + "hidden_keep_prob": 1.0, + "optimizer": "AdamW", + "optimizer_parameters": { + "lr": 1.89e-5 + }, + "learning_rate_drop_patience": 3, + "learning_rate_drop_div": 1.5, + "in": [ + "bert_features" + ], + "in_y": [ + "y" + ], + "out": [ + "predictions" + ] + } + ], + "out": ["predictions"] + }, + "train": { + "epochs": 100, + "batch_size": 64, + "metrics": [ + "f1", + "accuracy" + ], + "validation_patience": 7, + "val_every_n_batches": 50, + "log_every_n_batches": 50, + "evaluation_targets": [ + "train", + "valid", + "test" + ], + "tensorboard_log_dir": "{MODEL_PATH}/", + "class_name": "torch_trainer" + }, + "metadata": { + "variables": { + "ROOT_PATH": "~/.deeppavlov", + "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", + "TRANSFORMER": "DeepPavlov/distilrubert-tiny-cased-conversational", + "MODELS_PATH": "{ROOT_PATH}/models", + "MODEL_PATH": "{MODELS_PATH}/paraphraser_convers_distilrubert_2L_torch" + }, + "download": [ + { + "url": "http://files.deeppavlov.ai/deeppavlov_data/classifiers/paraphraser_convers_distilrubert_2L_torch.tar.gz", + "subdir": "{MODELS_PATH}" + } + ] + } +} diff --git a/deeppavlov/configs/classifiers/paraphraser_convers_distilrubert_6L_torch.json b/deeppavlov/configs/classifiers/paraphraser_convers_distilrubert_6L_torch.json new file mode 100644 index 0000000000..ee21189915 --- /dev/null +++ b/deeppavlov/configs/classifiers/paraphraser_convers_distilrubert_6L_torch.json @@ -0,0 +1,85 @@ +{ + "dataset_reader": { + "class_name": "paraphraser_reader", + "data_path": "{DOWNLOADS_PATH}/paraphraser_data", + "do_lower_case": false + }, + "dataset_iterator": { + "class_name": "siamese_iterator", + "seed": 243, + "len_valid": 500 + }, + "chainer": { + "in": ["text_a", "text_b"], + "in_y": ["y"], + "pipe": [ + { + "class_name": "torch_transformers_preprocessor", + "vocab_file": "{TRANSFORMER}", + "do_lower_case": false, + "max_seq_length": 64, + "in": ["text_a", "text_b"], + "out": ["bert_features"] + }, + { + "class_name": "torch_transformers_classifier", + "n_classes": 2, + "return_probas": false, + "pretrained_bert": "{TRANSFORMER}", + "save_path": "{MODEL_PATH}/model", + "load_path": "{MODEL_PATH}/model", + "attention_probs_keep_prob": 0.0, + "hidden_keep_prob": 0.67, + "optimizer": "AdamW", + "optimizer_parameters": { + "lr": 7.22e-5 + }, + "learning_rate_drop_patience": 3, + "learning_rate_drop_div": 1.5, + "in": [ + "bert_features" + ], + "in_y": [ + "y" + ], + "out": [ + "predictions" + ] + } + ], + "out": ["predictions"] + }, + "train": { + "epochs": 100, + "batch_size": 64, + "metrics": [ + "f1", + "accuracy" + ], + "validation_patience": 7, + "val_every_n_batches": 50, + "log_every_n_batches": 50, + "evaluation_targets": [ + "train", + "valid", + "test" + ], + "tensorboard_log_dir": "{MODEL_PATH}/", + "class_name": "torch_trainer" + }, + "metadata": { + "variables": { + "ROOT_PATH": "~/.deeppavlov", + "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", + "TRANSFORMER": "DeepPavlov/distilrubert-base-cased-conversational", + "MODELS_PATH": "{ROOT_PATH}/models", + "MODEL_PATH": "{MODELS_PATH}/paraphraser_convers_distilrubert_6L_torch" + }, + "download": [ + { + "url": "http://files.deeppavlov.ai/deeppavlov_data/classifiers/paraphraser_convers_distilrubert_6L_torch.tar.gz", + "subdir": "{MODELS_PATH}" + } + ] + } +} From e840a0b2fa75f62e1633e08f27f5fee083c0d6f9 Mon Sep 17 00:00:00 2001 From: ayeffkay Date: Tue, 13 Jul 2021 18:35:54 +0300 Subject: [PATCH 03/17] Distil scores and configs added --- docs/features/overview.rst | 264 ++++++++++++++++++++----------------- 1 file changed, 140 insertions(+), 124 deletions(-) diff --git a/docs/features/overview.rst b/docs/features/overview.rst index 31e822ff89..72f58015c3 100644 --- a/docs/features/overview.rst +++ b/docs/features/overview.rst @@ -20,27 +20,31 @@ The second model reproduces architecture from the paper `Application of a Hybrid Bi-LSTM-CRF model to the task of Russian Named Entity Recognition `__ which is inspired by Bi-LSTM+CRF architecture from https://arxiv.org/pdf/1603.01360.pdf. -+---------------------------------------------------------+-------+-----------------------------------------------------------------------------+-------------+ -| Dataset | Lang | Model | Test F1 | -+=========================================================+=======+=============================================================================+=============+ -| Persons-1000 dataset with additional LOC and ORG markup | Ru | :config:`ner_rus_bert.json ` | 98.1 | -+ + +-----------------------------------------------------------------------------+-------------+ -| (Collection 3) | | :config:`ner_rus.json ` | 95.1 | -+---------------------------------------------------------+-------+-----------------------------------------------------------------------------+-------------+ -| Ontonotes | Multi | :config:`ner_ontonotes_bert_mult.json ` | 88.8 | -+ +-------+-----------------------------------------------------------------------------+-------------+ -| | En | :config:`ner_ontonotes_bert.json ` | 88.6 | -+ + +-----------------------------------------------------------------------------+-------------+ -| | | :config:`ner_ontonotes.json ` | 87.1 | -+---------------------------------------------------------+ +-----------------------------------------------------------------------------+-------------+ -| ConLL-2003 | | :config:`ner_conll2003_bert.json ` | 91.7 | -+ + +-----------------------------------------------------------------------------+-------------+ -| | | :config:`ner_conll2003_torch_bert.json ` | 88.6 | -+ + +-----------------------------------------------------------------------------+-------------+ -| | | :config:`ner_conll2003.json ` | 89.9 | -+---------------------------------------------------------+ +-----------------------------------------------------------------------------+-------------+ -| DSTC2 | | :config:`ner_dstc2.json ` | 97.1 | -+---------------------------------------------------------+-------+-----------------------------------------------------------------------------+-------------+ ++---------------------------------------------------------+-------+--------------------------------------------------------------------------------------------------------+-------------+ +| Dataset | Lang | Model | Test F1 | ++=========================================================+=======+========================================================================================================+=============+ +| Persons-1000 dataset with additional LOC and ORG markup | Ru | :config:`ner_rus_bert.json ` | 98.1 | ++ + +--------------------------------------------------------------------------------------------------------+-------------+ +| (Collection 3) | | :config:`ner_rus.json ` | 95.1 | ++ + +--------------------------------------------------------------------------------------------------------+-------------+ +| | | :config:`ner_rus_convers_distilrubert_2L_torch.json ` | 88.4 ± 0.5 | ++ + +--------------------------------------------------------------------------------------------------------+-------------+ +| | | :config:`ner_rus_convers_distilrubert_6L_torch.json ` | 93.3 ± 0.3 | ++---------------------------------------------------------+-------+--------------------------------------------------------------------------------------------------------+-------------+ +| Ontonotes | Multi | :config:`ner_ontonotes_bert_mult.json ` | 88.8 | ++ +-------+--------------------------------------------------------------------------------------------------------+-------------+ +| | En | :config:`ner_ontonotes_bert.json ` | 88.6 | ++ + +--------------------------------------------------------------------------------------------------------+-------------+ +| | | :config:`ner_ontonotes.json ` | 87.1 | ++---------------------------------------------------------+ +--------------------------------------------------------------------------------------------------------+-------------+ +| ConLL-2003 | | :config:`ner_conll2003_bert.json ` | 91.7 | ++ + +--------------------------------------------------------------------------------------------------------+-------------+ +| | | :config:`ner_conll2003_torch_bert.json ` | 88.6 | ++ + +--------------------------------------------------------------------------------------------------------+-------------+ +| | | :config:`ner_conll2003.json ` | 89.9 | ++---------------------------------------------------------+ +--------------------------------------------------------------------------------------------------------+-------------+ +| DSTC2 | | :config:`ner_dstc2.json ` | 97.1 | ++---------------------------------------------------------+-------+--------------------------------------------------------------------------------------------------------+-------------+ Slot filling models :doc:`[docs] ` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -63,61 +67,65 @@ BiLSTM with self-attention and other models are presented. The model also allows Several pre-trained models are available and presented in Table below. -+------------------+--------------------+------+-------------------------------------------------------------------------------------------------+-------------+--------+--------+-----------+ -| Task | Dataset | Lang | Model | Metric | Valid | Test | Downloads | -+==================+====================+======+=================================================================================================+=============+========+========+===========+ -| 28 intents | `DSTC 2`_ | En | :config:`DSTC 2 emb ` | Accuracy | 0.7613 | 0.7733 | 800 Mb | -+ + + +-------------------------------------------------------------------------------------------------+ +--------+--------+-----------+ -| | | | :config:`Wiki emb ` | | 0.9629 | 0.9617 | 8.5 Gb | -+ + + +-------------------------------------------------------------------------------------------------+ +--------+--------+-----------+ -| | | | :config:`BERT ` | | 0.9673 | 0.9636 | 800 Mb | -+------------------+--------------------+ +-------------------------------------------------------------------------------------------------+-------------+--------+--------+-----------+ -| 7 intents | `SNIPS-2017`_ [1]_ | | :config:`DSTC 2 emb ` | F1-macro | 0.8591 | -- | 800 Mb | -+ + + +-------------------------------------------------------------------------------------------------+ +--------+--------+-----------+ -| | | | :config:`Wiki emb ` | | 0.9820 | -- | 8.5 Gb | -+ + + +-------------------------------------------------------------------------------------------------+ +--------+--------+-----------+ -| | | | :config:`Tfidf + SelectKBest + PCA + Wiki emb ` | | 0.9673 | -- | 8.6 Gb | -+ + + +-------------------------------------------------------------------------------------------------+ +--------+--------+-----------+ -| | | | :config:`Wiki emb weighted by Tfidf ` | | 0.9786 | -- | 8.5 Gb | -+------------------+--------------------+ +-------------------------------------------------------------------------------------------------+-------------+--------+--------+-----------+ -| Insult detection | `Insults`_ | | :config:`Reddit emb ` | ROC-AUC | 0.9263 | 0.8556 | 6.2 Gb | -+ + + +-------------------------------------------------------------------------------------------------+ +--------+--------+-----------+ -| | | | :config:`English BERT ` | | 0.9255 | 0.8612 | 1200 Mb | -+ + + +-------------------------------------------------------------------------------------------------+ +--------+--------+-----------+ -| | | | :config:`English Conversational BERT ` | | 0.9389 | 0.8941 | 1200 Mb | -+ + + +-------------------------------------------------------------------------------------------------+ +--------+--------+-----------+ -| | | | :config:`English BERT on PyTorch ` | | 0.9329 | 0.877 | 1.1 Gb | -+------------------+--------------------+ +-------------------------------------------------------------------------------------------------+-------------+--------+--------+-----------+ -| 5 topics | `AG News`_ | | :config:`Wiki emb ` | Accuracy | 0.8922 | 0.9059 | 8.5 Gb | -+------------------+--------------------+ +-------------------------------------------------------------------------------------------------+-------------+--------+--------+-----------+ -| Intent |`Yahoo-L31`_ | | :config:`Yahoo-L31 on conversational BERT ` | ROC-AUC | 0.9436 | -- | 1200 Mb | -+------------------+--------------------+ +-------------------------------------------------------------------------------------------------+-------------+--------+--------+-----------+ -| Sentiment |`SST`_ | | :config:`5-classes SST on conversational BERT ` | Accuracy | 0.6456 | 0.6715 | 400 Mb | -+ + + +-------------------------------------------------------------------------------------------------+ +--------+--------+-----------+ -| | | | :config:`5-classes SST on multilingual BERT ` | | 0.5738 | 0.6024 | 660 Mb | -+ + + +-------------------------------------------------------------------------------------------------+ +--------+--------+-----------+ -| | | | :config:`3-classes SST SWCNN on PyTorch ` | | 0.7379 | 0.6312 | 4.3 Mb | -+ +--------------------+ +-------------------------------------------------------------------------------------------------+ +--------+--------+-----------+ -| |`Yelp`_ | | :config:`5-classes Yelp on conversational BERT ` | | 0.6925 | 0.6842 | 400 Mb | -+ + + +-------------------------------------------------------------------------------------------------+ +--------+--------+-----------+ -| | | | :config:`5-classes Yelp on multilingual BERT ` | | 0.5896 | 0.5874 | 660 Mb | -+------------------+--------------------+------+-------------------------------------------------------------------------------------------------+-------------+--------+--------+-----------+ -| Sentiment |`Twitter mokoron`_ | Ru | :config:`RuWiki+Lenta emb w/o preprocessing ` | | 0.9965 | 0.9961 | 6.2 Gb | -+ + + +-------------------------------------------------------------------------------------------------+ +--------+--------+-----------+ -| | | | :config:`RuWiki+Lenta emb with preprocessing ` | | 0.7823 | 0.7759 | 6.2 Gb | -+ +--------------------+ +-------------------------------------------------------------------------------------------------+-------------+--------+--------+-----------+ -| |`RuSentiment`_ | | :config:`RuWiki+Lenta emb ` | F1-weighted | 0.6541 | 0.7016 | 6.2 Gb | -+ + + +-------------------------------------------------------------------------------------------------+ +--------+--------+-----------+ -| | | | :config:`Twitter emb super-convergence ` [2]_ | | 0.7301 | 0.7576 | 3.4 Gb | -+ + + +-------------------------------------------------------------------------------------------------+ +--------+--------+-----------+ -| | | | :config:`ELMo ` | | 0.7519 | 0.7875 | 700 Mb | -+ + + +-------------------------------------------------------------------------------------------------+ +--------+--------+-----------+ -| | | | :config:`Multi-language BERT ` | | 0.6809 | 0.7193 | 1900 Mb | -+ + + +-------------------------------------------------------------------------------------------------+ +--------+--------+-----------+ -| | | | :config:`Conversational RuBERT ` | | 0.7548 | 0.7742 | 657 Mb | -+------------------+--------------------+ +-------------------------------------------------------------------------------------------------+-------------+--------+--------+-----------+ -| Intent |Ru like`Yahoo-L31`_ | | :config:`Conversational vs Informational on ELMo ` | ROC-AUC | 0.9412 | -- | 700 Mb | -+------------------+--------------------+------+-------------------------------------------------------------------------------------------------+-------------+--------+--------+-----------+ ++------------------+---------------------+------+----------------------------------------------------------------------------------------------------------+-------------+------------------+-----------------+-----------+ +| Task | Dataset | Lang | Model | Metric | Valid | Test | Downloads | ++==================+=====================+======+==========================================================================================================+=============+==================+=================+===========+ +| 28 intents | `DSTC 2`_ | En | :config:`DSTC 2 emb ` | Accuracy | 0.7613 | 0.7733 | 800 Mb | ++ + + +----------------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ +| | | | :config:`Wiki emb ` | | 0.9629 | 0.9617 | 8.5 Gb | ++ + + +----------------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ +| | | | :config:`BERT ` | | 0.9673 | 0.9636 | 800 Mb | ++------------------+---------------------+ +----------------------------------------------------------------------------------------------------------+-------------+------------------+-----------------+-----------+ +| 7 intents | `SNIPS-2017`_ [1]_ | | :config:`DSTC 2 emb ` | F1-macro | 0.8591 | -- | 800 Mb | ++ + + +----------------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ +| | | | :config:`Wiki emb ` | | 0.9820 | -- | 8.5 Gb | ++ + + +----------------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ +| | | | :config:`Tfidf + SelectKBest + PCA + Wiki emb ` | | 0.9673 | -- | 8.6 Gb | ++ + + +----------------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ +| | | | :config:`Wiki emb weighted by Tfidf ` | | 0.9786 | -- | 8.5 Gb | ++------------------+---------------------+ +----------------------------------------------------------------------------------------------------------+-------------+------------------+-----------------+-----------+ +| Insult detection | `Insults`_ | | :config:`Reddit emb ` | ROC-AUC | 0.9263 | 0.8556 | 6.2 Gb | ++ + + +----------------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ +| | | | :config:`English BERT ` | | 0.9255 | 0.8612 | 1200 Mb | ++ + + +----------------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ +| | | | :config:`English Conversational BERT ` | | 0.9389 | 0.8941 | 1200 Mb | ++ + + +----------------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ +| | | | :config:`English BERT on PyTorch ` | | 0.9329 | 0.877 | 1.1 Gb | ++------------------+---------------------+ +----------------------------------------------------------------------------------------------------------+-------------+------------------+-----------------+-----------+ +| 5 topics | `AG News`_ | | :config:`Wiki emb ` | Accuracy | 0.8922 | 0.9059 | 8.5 Gb | ++------------------+---------------------+ +----------------------------------------------------------------------------------------------------------+-------------+------------------+-----------------+-----------+ +| Intent | `Yahoo-L31`_ | | :config:`Yahoo-L31 on conversational BERT ` | ROC-AUC | 0.9436 | -- | 1200 Mb | ++------------------+---------------------+ +----------------------------------------------------------------------------------------------------------+-------------+------------------+-----------------+-----------+ +| Sentiment | `SST`_ | | :config:`5-classes SST on conversational BERT ` | Accuracy | 0.6456 | 0.6715 | 400 Mb | ++ + + +----------------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ +| | | | :config:`5-classes SST on multilingual BERT ` | | 0.5738 | 0.6024 | 660 Mb | ++ + + +----------------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ +| | | | :config:`3-classes SST SWCNN on PyTorch ` | | 0.7379 | 0.6312 | 4.3 Mb | ++ +---------------------+ +----------------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ +| | `Yelp`_ | | :config:`5-classes Yelp on conversational BERT ` | | 0.6925 | 0.6842 | 400 Mb | ++ + + +----------------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ +| | | | :config:`5-classes Yelp on multilingual BERT ` | | 0.5896 | 0.5874 | 660 Mb | ++------------------+---------------------+------+----------------------------------------------------------------------------------------------------------+-------------+------------------+-----------------+-----------+ +| Sentiment | `Twitter mokoron`_ | Ru | :config:`RuWiki+Lenta emb w/o preprocessing ` | | 0.9965 | 0.9961 | 6.2 Gb | ++ + + +----------------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ +| | | | :config:`RuWiki+Lenta emb with preprocessing ` | | 0.7823 | 0.7759 | 6.2 Gb | ++ +---------------------+ +----------------------------------------------------------------------------------------------------------+-------------+------------------+-----------------+-----------+ +| | `RuSentiment`_ | | :config:`RuWiki+Lenta emb ` | F1-weighted | 0.6541 | 0.7016 | 6.2 Gb | ++ + + +----------------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ +| | | | :config:`Twitter emb super-convergence ` [2]_ | | 0.7301 | 0.7576 | 3.4 Gb | ++ + + +----------------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ +| | | | :config:`ELMo ` | | 0.7519 | 0.7875 | 700 Mb | ++ + + +----------------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ +| | | | :config:`Multi-language BERT ` | | 0.6809 | 0.7193 | 1900 Mb | ++ + + +----------------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ +| | | | :config:`Conversational RuBERT ` | | 0.7548 | 0.7742 | 657 Mb | ++ + + +----------------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ +| | | | :config:`Conversational DistilRuBERT-tiny ` | | 0.703 ± 0.0031 | 0.7348 ± 0.0028 | 690 Mb | ++ + + +----------------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ +| | | | :config:`Conversational DistilRuBERT-base ` | | 0.7376 ± 0.0045 | 0.7645 ± 0.035 | 1.0 Gb | ++------------------+---------------------+ +----------------------------------------------------------------------------------------------------------+-------------+------------------+-----------------+-----------+ +| Intent | Ru like`Yahoo-L31`_ | | :config:`Conversational vs Informational on ELMo ` | ROC-AUC | 0.9412 | -- | 700 Mb | ++------------------+---------------------+------+----------------------------------------------------------------------------------------------------------+-------------+------------------+-----------------+-----------+ .. [1] Coucke A. et al. Snips voice platform: an embedded spoken language understanding system for private-by-design voice interfaces //arXiv preprint arXiv:1805.10190. – 2018. .. [2] Smith L. N., Topin N. Super-convergence: Very fast training of residual networks using large learning rates. – 2018. @@ -231,11 +239,11 @@ Available pre-trained models for ranking: +-------------------+----------------------------------------------------------------------------------------------------------------------+-----------+-------+-------+-------+-----------+ | `Ubuntu V2`_ | :config:`ranking_ubuntu_v2_mt_word2vec_smn ` | 68.56 | 67.91 | 81.49 | 95.63 | 1609 MB | +-------------------+----------------------------------------------------------------------------------------------------------------------+-----------+-------+-------+-------+-----------+ - | `Ubuntu V2`_ |:config:`ranking_ubuntu_v2_bert_uncased ` | 66.5 | 66.6 | -- | -- | 396 MB | + | `Ubuntu V2`_ | :config:`ranking_ubuntu_v2_bert_uncased ` | 66.5 | 66.6 | -- | -- | 396 MB | +-------------------+----------------------------------------------------------------------------------------------------------------------+-----------+-------+-------+-------+-----------+ - | `Ubuntu V2`_ |:config:`ranking_ubuntu_v2_bert_uncased on PyTorch ` | 65.73 | 65.74 | -- | -- | 1.1 Gb | + | `Ubuntu V2`_ | :config:`ranking_ubuntu_v2_bert_uncased on PyTorch ` | 65.73 | 65.74 | -- | -- | 1.1 Gb | +-------------------+----------------------------------------------------------------------------------------------------------------------+-----------+-------+-------+-------+-----------+ - | `Ubuntu V2`_ |:config:`ranking_ubuntu_v2_bert_sep ` | 66.5 | 66.5 | -- | -- | 396 MB | + | `Ubuntu V2`_ | :config:`ranking_ubuntu_v2_bert_sep ` | 66.5 | 66.5 | -- | -- | 396 MB | +-------------------+----------------------------------------------------------------------------------------------------------------------+-----------+-------+-------+-------+-----------+ | `Ubuntu V2`_ | :config:`ranking_ubuntu_v2_mt_interact ` | 59.2 | 58.7 | -- | -- | 8906 MB | +-------------------+----------------------------------------------------------------------------------------------------------------------+-----------+-------+-------+-------+-----------+ @@ -247,15 +255,19 @@ Available pre-trained models for paraphrase identification: .. table:: :widths: auto - +------------------------+-----------------------------------------------------------------------------------------------+---------------+----------------+---------+----------+---------------+----------------+----------+ - | Dataset |Model config | Val (accuracy)| Test (accuracy)| Val (F1)| Test (F1)| Val (log_loss)| Test (log_loss)|Downloads | - +========================+===============================================================================================+===============+================+=========+==========+===============+================+==========+ - |`paraphraser.ru`_ |:config:`paraphrase_ident_paraphraser_ft ` | 83.8 | 75.4 | 87.9 | 80.9 | 0.468 | 0.616 |5938M | - +------------------------+-----------------------------------------------------------------------------------------------+---------------+----------------+---------+----------+---------------+----------------+----------+ - |`paraphraser.ru`_ |:config:`paraphrase_bert_multilingual ` | 87.4 | 79.3 | 90.2 | 83.4 | -- | -- |1330M | - +------------------------+-----------------------------------------------------------------------------------------------+---------------+----------------+---------+----------+---------------+----------------+----------+ - |`paraphraser.ru`_ |:config:`paraphrase_rubert ` | 90.2 | 84.9 | 92.3 | 87.9 | -- | -- |1325M | - +------------------------+-----------------------------------------------------------------------------------------------+---------------+----------------+---------+----------+---------------+----------------+----------+ + +------------------------+------------------------------------------------------------------------------------------------------------------+----------------+-----------------+------------+------------+----------------+-----------------+-----------+ + | Dataset | Model config | Val (accuracy) | Test (accuracy) | Val (F1) | Test (F1) | Val (log_loss) | Test (log_loss) | Downloads | + +========================+==================================================================================================================+================+=================+============+============+================+=================+===========+ + | `paraphraser.ru`_ | :config:`paraphrase_ident_paraphraser_ft ` | 83.8 | 75.4 | 87.9 | 80.9 | 0.468 | 0.616 | 5938M | + +------------------------+------------------------------------------------------------------------------------------------------------------+----------------+-----------------+------------+------------+----------------+-----------------+-----------+ + | `paraphraser.ru`_ | :config:`paraphrase_bert_multilingual ` | 87.4 | 79.3 | 90.2 | 83.4 | -- | -- | 1330M | + +------------------------+------------------------------------------------------------------------------------------------------------------+----------------+-----------------+------------+------------+----------------+-----------------+-----------+ + | `paraphraser.ru`_ | :config:`paraphrase_rubert ` | 90.2 | 84.9 | 92.3 | 87.9 | -- | -- | 1325M | + +------------------------+------------------------------------------------------------------------------------------------------------------+----------------+-----------------+------------+------------+----------------+-----------------+-----------+ + | `paraphraser.ru`_ | :config:`paraphraser_convers_distilrubert_2L_torch ` | 76.1 ± 0.2 | 64.5 ± 0.5 | 81.8 ± 0.2 | 73.9 ± 0.8 | -- | -- | 618M | + +------------------------+------------------------------------------------------------------------------------------------------------------+----------------+-----------------+------------+------------+----------------+-----------------+-----------+ + | `paraphraser.ru`_ | :config:`paraphraser_convers_distilrubert_2L_torch ` | 86.5 ± 0.5 | 78.9 ± 0.4 | 89.6 ± 0.3 | 83.2 ± 0.5 | -- | -- | 930M | + +------------------------+------------------------------------------------------------------------------------------------------------------+----------------+-----------------+------------+------------+----------------+-----------------+-----------+ .. _`paraphraser.ru`: https://paraphraser.ru/ @@ -319,25 +331,29 @@ BERT-based model is described in `BERT: Pre-training of Deep Bidirectional Tran R-Net model is based on `R-NET: Machine Reading Comprehension with Self-matching Networks `__. -+---------------+------------------------------------------------------------------------+-------+----------------+-----------------+-----------------+ -| Dataset | Model config | lang | EM (dev) | F-1 (dev) | Downloads | -+===============+========================================================================+=======+================+=================+=================+ -| `SQuAD-v1.1`_ | :config:`DeepPavlov BERT ` | en | 80.88 | 88.49 | 806Mb | -+---------------+------------------------------------------------------------------------+-------+----------------+-----------------+-----------------+ -| `SQuAD-v1.1`_ | :config:`DeepPavlov BERT on PyTorch ` | en | 80.79 | 88.30 | 1.1 Gb | -+---------------+------------------------------------------------------------------------+-------+----------------+-----------------+-----------------+ -| `SQuAD-v1.1`_ | :config:`DeepPavlov R-Net ` | en | 71.49 | 80.34 | ~2.5Gb | -+---------------+------------------------------------------------------------------------+-------+----------------+-----------------+-----------------+ -| SDSJ Task B | :config:`DeepPavlov RuBERT ` | ru | 66.30+-0.24 | 84.60+-0.11 | 1325Mb | -+---------------+------------------------------------------------------------------------+-------+----------------+-----------------+-----------------+ -| SDSJ Task B | :config:`DeepPavlov multilingual BERT `| ru | 64.35+-0.39 | 83.39+-0.08 | 1323Mb | -+---------------+------------------------------------------------------------------------+-------+----------------+-----------------+-----------------+ -| SDSJ Task B | :config:`DeepPavlov R-Net ` | ru | 60.62 | 80.04 | ~5Gb | -+---------------+------------------------------------------------------------------------+-------+----------------+-----------------+-----------------+ -| `DRCD`_ | :config:`DeepPavlov multilingual BERT ` | ch | 84.86 | 89.03 | 630Mb | -+---------------+------------------------------------------------------------------------+-------+----------------+-----------------+-----------------+ -| `DRCD`_ | :config:`DeepPavlov Chinese BERT ` | ch | 84.19 | 89.23 | 362Mb | -+---------------+------------------------------------------------------------------------+-------+----------------+-----------------+-----------------+ ++---------------+---------------------------------------------------------------------------------------------------+-------+----------------+-----------------+-----------------+ +| Dataset | Model config | lang | EM (dev) | F-1 (dev) | Downloads | ++===============+===================================================================================================+=======+================+=================+=================+ +| `SQuAD-v1.1`_ | :config:`DeepPavlov BERT ` | en | 80.88 | 88.49 | 806Mb | ++---------------+---------------------------------------------------------------------------------------------------+-------+----------------+-----------------+-----------------+ +| `SQuAD-v1.1`_ | :config:`DeepPavlov BERT on PyTorch ` | en | 80.79 | 88.30 | 1.1 Gb | ++---------------+---------------------------------------------------------------------------------------------------+-------+----------------+-----------------+-----------------+ +| `SQuAD-v1.1`_ | :config:`DeepPavlov R-Net ` | en | 71.49 | 80.34 | ~2.5Gb | ++---------------+---------------------------------------------------------------------------------------------------+-------+----------------+-----------------+-----------------+ +| SDSJ Task B | :config:`DeepPavlov RuBERT ` | ru | 66.30 ± 0.24 | 84.60 ± 0.11 | 1325Mb | ++---------------+---------------------------------------------------------------------------------------------------+-------+----------------+-----------------+-----------------+ +| SDSJ Task B | :config:`DeepPavlov multilingual BERT ` | ru | 64.35 ± 0.39 | 83.39 ± 0.08 | 1323Mb | ++---------------+---------------------------------------------------------------------------------------------------+-------+----------------+-----------------+-----------------+ +| SDSJ Task B | :config:`DeepPavlov R-Net ` | ru | 60.62 | 80.04 | ~5Gb | ++---------------+---------------------------------------------------------------------------------------------------+-------+----------------+-----------------+-----------------+ +| SDSJ Task B | :config:`DeepPavlov DistilRuBERT-tiny ` | ru | 44.2 ± 0.46 | 65.1 ± 0.36 | 867Mb | ++---------------+---------------------------------------------------------------------------------------------------+-------+----------------+-----------------+-----------------+ +| SDSJ Task B | :config:`DeepPavlov DistilRuBERT-base ` | ru | 61.23 ± 0.42 | 80.36 ± 0.28 | 1.18Gb | ++---------------+---------------------------------------------------------------------------------------------------+-------+----------------+-----------------+-----------------+ +| `DRCD`_ | :config:`DeepPavlov multilingual BERT ` | ch | 84.86 | 89.03 | 630Mb | ++---------------+---------------------------------------------------------------------------------------------------+-------+----------------+-----------------+-----------------+ +| `DRCD`_ | :config:`DeepPavlov Chinese BERT ` | ch | 84.19 | 89.23 | 362Mb | ++---------------+---------------------------------------------------------------------------------------------------+-------+----------------+-----------------+-----------------+ In the case when answer is not necessary present in given context we have :config:`squad_noans ` model. This model outputs empty string in case if there is no answer in context. @@ -361,31 +377,31 @@ For more scores see :doc:`full table `. +----------------------+--------------------------------------------------------------------------------------------------------------+---------------+----------------+--------------------+ | Dataset | Model | Word accuracy | Sent. accuracy | Download size (MB) | +======================+==============================================================================================================+===============+================+====================+ - |`UD2.3`_ (Russian) |`UD Pipe 2.3`_ (Straka et al., 2017) | 93.5 | | | + | `UD2.3`_ (Russian) | `UD Pipe 2.3`_ (Straka et al., 2017) | 93.5 | | | | +--------------------------------------------------------------------------------------------------------------+---------------+----------------+--------------------+ | | `UD Pipe Future`_ (Straka et al., 2018) | 96.90 | | | | +--------------------------------------------------------------------------------------------------------------+---------------+----------------+--------------------+ - | |:config:`BERT-based model ` | 97.83 | 72.02 | 661 | + | | :config:`BERT-based model ` | 97.83 | 72.02 | 661 | +----------------------+--------------------------------------------------------------------------------------------------------------+---------------+----------------+--------------------+ - | |`Pymorphy`_ + `russian_tagsets`_ (first tag) | 60.93 | 0.00 | | + | | `Pymorphy`_ + `russian_tagsets`_ (first tag) | 60.93 | 0.00 | | + +--------------------------------------------------------------------------------------------------------------+---------------+----------------+--------------------+ - |`UD2.0`_ (Russian) |`UD Pipe 1.2`_ (Straka et al., 2017) | 93.57 | 43.04 | | + | `UD2.0`_ (Russian) | `UD Pipe 1.2`_ (Straka et al., 2017) | 93.57 | 43.04 | | + +--------------------------------------------------------------------------------------------------------------+---------------+----------------+--------------------+ - | |:config:`Basic model ` | 95.17 | 50.58 | 48.7 | + | | :config:`Basic model ` | 95.17 | 50.58 | 48.7 | + +--------------------------------------------------------------------------------------------------------------+---------------+----------------+--------------------+ - | |:config:`Pymorphy-enhanced model ` | **96.23** | 58.00 | 48.7 | + | | :config:`Pymorphy-enhanced model ` | **96.23** | 58.00 | 48.7 | +----------------------+--------------------------------------------------------------------------------------------------------------+---------------+----------------+--------------------+ - | `UD2.0`_ (Czech) |`UD Pipe 1.2`_ (Straka et al., 2017) | 91.86 | 42.28 | | + | `UD2.0`_ (Czech) | `UD Pipe 1.2`_ (Straka et al., 2017) | 91.86 | 42.28 | | | +--------------------------------------------------------------------------------------------------------------+---------------+----------------+--------------------+ - | |:config:`Basic model ` | **94.35** | 51.56 | 41.8 | + | | :config:`Basic model ` | **94.35** | 51.56 | 41.8 | +----------------------+--------------------------------------------------------------------------------------------------------------+---------------+----------------+--------------------+ - |`UD2.0`_ (English) |`UD Pipe 1.2`_ (Straka et al., 2017) | 92.89 | 55.75 | | + | `UD2.0`_ (English) | `UD Pipe 1.2`_ (Straka et al., 2017) | 92.89 | 55.75 | | | +--------------------------------------------------------------------------------------------------------------+---------------+----------------+--------------------+ - | |:config:`Basic model ` | **93.00** | 55.18 | 16.9 | + | | :config:`Basic model ` | **93.00** | 55.18 | 16.9 | +----------------------+--------------------------------------------------------------------------------------------------------------+---------------+----------------+--------------------+ - |`UD2.0`_ (German) |`UD Pipe 1.2`_ (Straka et al., 2017) | 76.65 | 10.24 | | + | `UD2.0`_ (German) | `UD Pipe 1.2`_ (Straka et al., 2017) | 76.65 | 10.24 | | | +--------------------------------------------------------------------------------------------------------------+---------------+----------------+--------------------+ - | |:config:`Basic model ` | **83.83** | 15.25 | 18.6 | + | | :config:`Basic model ` | **83.83** | 15.25 | 18.6 | +----------------------+--------------------------------------------------------------------------------------------------------------+---------------+----------------+--------------------+ .. _`Pymorphy`: https://pymorphy2.readthedocs.io/en/latest/ @@ -415,7 +431,7 @@ on ``ru_syntagrus`` Russian corpus (version UD 2.3). | +-------------------------------------------------------------------------------------------+---------+----------+ | | `UDify (multilingual BERT)`_ (Kondratyuk, 2018) | 94.8 | 93.1 | | +-------------------------------------------------------------------------------------------+---------+----------+ - | |:config:`our BERT model ` | 95.2 | 93.7 | + | | :config:`our BERT model ` | 95.2 | 93.7 | +-------------------------+-------------------------------------------------------------------------------------------+---------+----------+ .. _`UD2.3`: http://hdl.handle.net/11234/1-2895 @@ -473,13 +489,13 @@ based on its Wikipedia knowledge. +----------------+--------------------------------------------------------------------+-----------------------+--------+-----------+ | Dataset | Model config | Wiki dump | F1 | Downloads | +================+====================================================================+=======================+========+===========+ -| `SQuAD-v1.1`_ |:config:`ODQA ` | enwiki (2018-02-11) | 35.89 | 9.7Gb | +| `SQuAD-v1.1`_ | :config:`ODQA ` | enwiki (2018-02-11) | 35.89 | 9.7Gb | +----------------+--------------------------------------------------------------------+-----------------------+--------+-----------+ -| `SQuAD-v1.1`_ |:config:`ODQA ` | enwiki (2016-12-21) | 37.83 | 9.3Gb | +| `SQuAD-v1.1`_ | :config:`ODQA ` | enwiki (2016-12-21) | 37.83 | 9.3Gb | +----------------+--------------------------------------------------------------------+-----------------------+--------+-----------+ -| `SDSJ Task B`_ |:config:`ODQA ` | ruwiki (2018-04-01) | 28.56 | 7.7Gb | +| `SDSJ Task B`_ | :config:`ODQA ` | ruwiki (2018-04-01) | 28.56 | 7.7Gb | +----------------+--------------------------------------------------------------------+-----------------------+--------+-----------+ -| `SDSJ Task B`_ |:config:`ODQA with RuBERT ` | ruwiki (2018-04-01) | 37.83 | 4.3Gb | +| `SDSJ Task B`_ | :config:`ODQA with RuBERT ` | ruwiki (2018-04-01) | 37.83 | 4.3Gb | +----------------+--------------------------------------------------------------------+-----------------------+--------+-----------+ From 544508d616daca02012e4c67559a4ddf36a4af16 Mon Sep 17 00:00:00 2001 From: ayeffkay Date: Tue, 13 Jul 2021 18:36:51 +0300 Subject: [PATCH 04/17] Distil configs for paraphraser --- .../classifiers/paraphraser_convers_distilrubert_2L_torch.json | 2 +- .../classifiers/paraphraser_convers_distilrubert_6L_torch.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/deeppavlov/configs/classifiers/paraphraser_convers_distilrubert_2L_torch.json b/deeppavlov/configs/classifiers/paraphraser_convers_distilrubert_2L_torch.json index 4e51808e89..aabfa9d7b5 100644 --- a/deeppavlov/configs/classifiers/paraphraser_convers_distilrubert_2L_torch.json +++ b/deeppavlov/configs/classifiers/paraphraser_convers_distilrubert_2L_torch.json @@ -32,7 +32,7 @@ "hidden_keep_prob": 1.0, "optimizer": "AdamW", "optimizer_parameters": { - "lr": 1.89e-5 + "lr": 1.89e-05 }, "learning_rate_drop_patience": 3, "learning_rate_drop_div": 1.5, diff --git a/deeppavlov/configs/classifiers/paraphraser_convers_distilrubert_6L_torch.json b/deeppavlov/configs/classifiers/paraphraser_convers_distilrubert_6L_torch.json index ee21189915..aab78819d2 100644 --- a/deeppavlov/configs/classifiers/paraphraser_convers_distilrubert_6L_torch.json +++ b/deeppavlov/configs/classifiers/paraphraser_convers_distilrubert_6L_torch.json @@ -32,7 +32,7 @@ "hidden_keep_prob": 0.67, "optimizer": "AdamW", "optimizer_parameters": { - "lr": 7.22e-5 + "lr": 7.22e-05 }, "learning_rate_drop_patience": 3, "learning_rate_drop_div": 1.5, From 05ae90842a064a9a9f23cd8897c6dbb99b82b27c Mon Sep 17 00:00:00 2001 From: ayeffkay Date: Tue, 13 Jul 2021 18:38:02 +0300 Subject: [PATCH 05/17] Distil configs for rusentiment --- ...ntiment_convers_distilrubert_2L_torch.json | 153 ++++++++++++++++++ ...ntiment_convers_distilrubert_6L_torch.json | 153 ++++++++++++++++++ 2 files changed, 306 insertions(+) create mode 100644 deeppavlov/configs/classifiers/rusentiment_convers_distilrubert_2L_torch.json create mode 100644 deeppavlov/configs/classifiers/rusentiment_convers_distilrubert_6L_torch.json diff --git a/deeppavlov/configs/classifiers/rusentiment_convers_distilrubert_2L_torch.json b/deeppavlov/configs/classifiers/rusentiment_convers_distilrubert_2L_torch.json new file mode 100644 index 0000000000..505fefb545 --- /dev/null +++ b/deeppavlov/configs/classifiers/rusentiment_convers_distilrubert_2L_torch.json @@ -0,0 +1,153 @@ +{ + "dataset_reader": { + "class_name": "basic_classification_reader", + "x": "text", + "y": "label", + "data_path": "{DOWNLOADS_PATH}/rusentiment/", + "train": "rusentiment_random_posts.csv", + "test": "rusentiment_test.csv" + }, + "dataset_iterator": { + "class_name": "basic_classification_iterator", + "seed": 42, + "split_seed": 23, + "field_to_split": "train", + "split_fields": [ + "train", + "valid" + ], + "split_proportions": [ + 0.9, + 0.1 + ] + }, + "chainer": { + "in": [ + "x" + ], + "in_y": [ + "y" + ], + "pipe": [ + { + "class_name": "torch_transformers_preprocessor", + "vocab_file": "{TRANSFORMER}", + "do_lower_case": true, + "max_seq_length": 64, + "in": [ + "x" + ], + "out": [ + "bert_features" + ] + }, + { + "id": "classes_vocab", + "class_name": "simple_vocab", + "fit_on": [ + "y" + ], + "save_path": "{MODEL_PATH}/classes.dict", + "load_path": "{MODEL_PATH}/classes.dict", + "in": "y", + "out": "y_ids" + }, + { + "in": "y_ids", + "out": "y_onehot", + "class_name": "one_hotter", + "depth": "#classes_vocab.len", + "single_vector": true + }, + { + "class_name": "torch_transformers_classifier", + "n_classes": "#classes_vocab.len", + "return_probas": true, + "pretrained_bert": "{TRANSFORMER}", + "save_path": "{MODEL_PATH}/model", + "load_path": "{MODEL_PATH}/model", + "attention_probs_keep_prob": 0.78, + "hidden_keep_prob": 0.89, + "optimizer": "AdamW", + "optimizer_parameters": { + "lr": 7.22e-05 + }, + "learning_rate_drop_patience": 5, + "learning_rate_drop_div": 1.5, + "in": [ + "bert_features" + ], + "in_y": [ + "y_ids" + ], + "out": [ + "y_pred_probas" + ] + }, + { + "in": "y_pred_probas", + "out": "y_pred_ids", + "class_name": "proba2labels", + "max_proba": true + }, + { + "in": "y_pred_ids", + "out": "y_pred_labels", + "ref": "classes_vocab" + } + ], + "out": [ + "y_pred_labels" + ] + }, + "train": { + "epochs": 100, + "batch_size": 64, + "metrics": [ + "f1_weighted", + "f1_macro", + "accuracy", + { + "name": "roc_auc", + "inputs": [ + "y_onehot", + "y_pred_probas" + ] + } + ], + "validation_patience": 5, + "val_every_n_epochs": 1, + "log_every_n_epochs": 1, + "show_examples": false, + "evaluation_targets": [ + "train", + "valid", + "test" + ], + "tensorboard_log_dir": "{MODEL_PATH}/", + "class_name": "torch_trainer" + }, + "metadata": { + "variables": { + "ROOT_PATH": "~/.deeppavlov", + "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", + "TRANSFORMER": "DeepPavlov/distilrubert-tiny-cased-conversational", + "MODELS_PATH": "{ROOT_PATH}/models", + "MODEL_PATH": "{MODELS_PATH}/classifiers/rusentiment_convers_distilrubert_2L_torch" + }, + "download": [ + { + "url": "https://raw.githubusercontent.com/strawberrypie/rusentiment/master/Dataset/rusentiment_random_posts.csv", + "subdir": "{DOWNLOADS_PATH}/rusentiment" + }, + { + "url": "https://raw.githubusercontent.com/strawberrypie/rusentiment/master/Dataset/rusentiment_test.csv", + "subdir": "{DOWNLOADS_PATH}/rusentiment" + }, + { + "url": "http://files.deeppavlov.ai/deeppavlov_data/classifiers/rusentiment_convers_distilrubert_2L_torch.tar.gz", + "subdir": "{MODELS_PATH}/classifiers/" + } + ] + } +} diff --git a/deeppavlov/configs/classifiers/rusentiment_convers_distilrubert_6L_torch.json b/deeppavlov/configs/classifiers/rusentiment_convers_distilrubert_6L_torch.json new file mode 100644 index 0000000000..9d06ab2701 --- /dev/null +++ b/deeppavlov/configs/classifiers/rusentiment_convers_distilrubert_6L_torch.json @@ -0,0 +1,153 @@ +{ + "dataset_reader": { + "class_name": "basic_classification_reader", + "x": "text", + "y": "label", + "data_path": "{DOWNLOADS_PATH}/rusentiment/", + "train": "rusentiment_random_posts.csv", + "test": "rusentiment_test.csv" + }, + "dataset_iterator": { + "class_name": "basic_classification_iterator", + "seed": 42, + "split_seed": 23, + "field_to_split": "train", + "split_fields": [ + "train", + "valid" + ], + "split_proportions": [ + 0.9, + 0.1 + ] + }, + "chainer": { + "in": [ + "x" + ], + "in_y": [ + "y" + ], + "pipe": [ + { + "class_name": "torch_transformers_preprocessor", + "vocab_file": "{TRANSFORMER}", + "do_lower_case": true, + "max_seq_length": 64, + "in": [ + "x" + ], + "out": [ + "bert_features" + ] + }, + { + "id": "classes_vocab", + "class_name": "simple_vocab", + "fit_on": [ + "y" + ], + "save_path": "{MODEL_PATH}/classes.dict", + "load_path": "{MODEL_PATH}/classes.dict", + "in": "y", + "out": "y_ids" + }, + { + "in": "y_ids", + "out": "y_onehot", + "class_name": "one_hotter", + "depth": "#classes_vocab.len", + "single_vector": true + }, + { + "class_name": "torch_transformers_classifier", + "n_classes": "#classes_vocab.len", + "return_probas": true, + "pretrained_bert": "{TRANSFORMER}", + "save_path": "{MODEL_PATH}/model", + "load_path": "{MODEL_PATH}/model", + "attention_probs_keep_prob": 0.78, + "hidden_keep_prob": 0, + "optimizer": "AdamW", + "optimizer_parameters": { + "lr": 4.56e-05 + }, + "learning_rate_drop_patience": 5, + "learning_rate_drop_div": 1.5, + "in": [ + "bert_features" + ], + "in_y": [ + "y_ids" + ], + "out": [ + "y_pred_probas" + ] + }, + { + "in": "y_pred_probas", + "out": "y_pred_ids", + "class_name": "proba2labels", + "max_proba": true + }, + { + "in": "y_pred_ids", + "out": "y_pred_labels", + "ref": "classes_vocab" + } + ], + "out": [ + "y_pred_labels" + ] + }, + "train": { + "epochs": 100, + "batch_size": 64, + "metrics": [ + "f1_weighted", + "f1_macro", + "accuracy", + { + "name": "roc_auc", + "inputs": [ + "y_onehot", + "y_pred_probas" + ] + } + ], + "validation_patience": 5, + "val_every_n_epochs": 1, + "log_every_n_epochs": 1, + "show_examples": false, + "evaluation_targets": [ + "train", + "valid", + "test" + ], + "tensorboard_log_dir": "{MODEL_PATH}/", + "class_name": "torch_trainer" + }, + "metadata": { + "variables": { + "ROOT_PATH": "~/.deeppavlov", + "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", + "TRANSFORMER": "DeepPavlov/distilrubert-base-cased-conversational", + "MODELS_PATH": "{ROOT_PATH}/models", + "MODEL_PATH": "{MODELS_PATH}/classifiers/rusentiment_convers_distilrubert_6L_torch" + }, + "download": [ + { + "url": "https://raw.githubusercontent.com/strawberrypie/rusentiment/master/Dataset/rusentiment_random_posts.csv", + "subdir": "{DOWNLOADS_PATH}/rusentiment" + }, + { + "url": "https://raw.githubusercontent.com/strawberrypie/rusentiment/master/Dataset/rusentiment_test.csv", + "subdir": "{DOWNLOADS_PATH}/rusentiment" + }, + { + "url": "http://files.deeppavlov.ai/deeppavlov_data/classifiers/rusentiment_convers_distilrubert_6L_torch.tar.gz", + "subdir": "{MODELS_PATH}/classifiers/" + } + ] + } +} From 74eeedd47e5df72c1ca4100226a3dd67c5eb4a0f Mon Sep 17 00:00:00 2001 From: ayeffkay Date: Tue, 13 Jul 2021 18:41:17 +0300 Subject: [PATCH 06/17] Distil configs for ner --- ...ner_rus_convers_distilrubert_2L_torch.json | 155 ++++++++++++++++++ ...ner_rus_convers_distilrubert_6L_torch.json | 155 ++++++++++++++++++ 2 files changed, 310 insertions(+) create mode 100644 deeppavlov/configs/ner/ner_rus_convers_distilrubert_2L_torch.json create mode 100644 deeppavlov/configs/ner/ner_rus_convers_distilrubert_6L_torch.json diff --git a/deeppavlov/configs/ner/ner_rus_convers_distilrubert_2L_torch.json b/deeppavlov/configs/ner/ner_rus_convers_distilrubert_2L_torch.json new file mode 100644 index 0000000000..147aef5cee --- /dev/null +++ b/deeppavlov/configs/ner/ner_rus_convers_distilrubert_2L_torch.json @@ -0,0 +1,155 @@ + { + "dataset_reader": { + "class_name": "conll2003_reader", + "data_path": "{DOWNLOADS_PATH}/total_rus/", + "dataset_name": "collection_rus", + "provide_pos": false + }, + "dataset_iterator": { + "class_name": "data_learning_iterator" + }, + "chainer": { + "in": [ + "x" + ], + "in_y": [ + "y" + ], + "pipe": [ + { + "class_name": "torch_transformers_ner_preprocessor", + "vocab_file": "{TRANSFORMER}", + "do_lower_case": false, + "max_seq_length": 512, + "max_subword_length": 15, + "token_masking_prob": 0.0, + "in": [ + "x" + ], + "out": [ + "x_tokens", + "x_subword_tokens", + "x_subword_tok_ids", + "startofword_markers", + "attention_mask" + ] + }, + { + "id": "tag_vocab", + "class_name": "simple_vocab", + "unk_token": [ + "O" + ], + "pad_with_zeros": true, + "save_path": "{MODEL_PATH}/tag.dict", + "load_path": "{MODEL_PATH}/tag.dict", + "fit_on": [ + "y" + ], + "in": [ + "y" + ], + "out": [ + "y_ind" + ] + }, + { + "class_name": "torch_transformers_sequence_tagger", + "n_tags": "#tag_vocab.len", + "pretrained_bert": "{TRANSFORMER}", + "attention_probs_keep_prob": 0.11, + "hidden_keep_prob": 0.11, + "return_probas": false, + "encoder_layer_ids": [ + -1 + ], + "optimizer": "AdamW", + "optimizer_parameters": { + "lr": 5.45e-05, + "weight_decay": 1e-06, + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-06 + }, + "clip_norm": 1.0, + "min_learning_rate": 1e-07, + "learning_rate_drop_patience": 30, + "learning_rate_drop_div": 1.5, + "load_before_drop": true, + "save_path": "{MODEL_PATH}/model", + "load_path": "{MODEL_PATH}/model", + "in": [ + "x_subword_tok_ids", + "attention_mask", + "startofword_markers" + ], + "in_y": [ + "y_ind" + ], + "out": [ + "y_pred_ind" + ] + }, + { + "ref": "tag_vocab", + "in": [ + "y_pred_ind" + ], + "out": [ + "y_pred" + ] + } + ], + "out": [ + "x_tokens", + "y_pred" + ] + }, + "train": { + "epochs": 30, + "batch_size": 10, + "metrics": [ + { + "name": "ner_f1", + "inputs": [ + "y", + "y_pred" + ] + }, + { + "name": "ner_token_f1", + "inputs": [ + "y", + "y_pred" + ] + } + ], + "validation_patience": 100, + "val_every_n_batches": 20, + "log_every_n_batches": 20, + "show_examples": false, + "evaluation_targets": [ + "valid", + "test" + ], + "tensorboard_log_dir": "{MODEL_PATH}/", + "class_name": "torch_trainer" + }, + "metadata": { + "variables": { + "ROOT_PATH": "~/.deeppavlov", + "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", + "MODELS_PATH": "{ROOT_PATH}/models", + "MODEL_PATH": "{MODELS_PATH}/ner_rus_conversational_distilrubert_2L_torch", + "TRANSFORMER": "DeepPavlov/distilrubert-tiny-cased-conversational" + }, + "download": [ + { + "url": "http://files.deeppavlov.ai/v1/ner/ner_rus_conversational_distilrubert_2L_torch.tar.gz", + "subdir": "{MODELS_PATH}" + } + ] + } +} diff --git a/deeppavlov/configs/ner/ner_rus_convers_distilrubert_6L_torch.json b/deeppavlov/configs/ner/ner_rus_convers_distilrubert_6L_torch.json new file mode 100644 index 0000000000..0878c393e2 --- /dev/null +++ b/deeppavlov/configs/ner/ner_rus_convers_distilrubert_6L_torch.json @@ -0,0 +1,155 @@ + { + "dataset_reader": { + "class_name": "conll2003_reader", + "data_path": "{DOWNLOADS_PATH}/total_rus/", + "dataset_name": "collection_rus", + "provide_pos": false + }, + "dataset_iterator": { + "class_name": "data_learning_iterator" + }, + "chainer": { + "in": [ + "x" + ], + "in_y": [ + "y" + ], + "pipe": [ + { + "class_name": "torch_transformers_ner_preprocessor", + "vocab_file": "{TRANSFORMER}", + "do_lower_case": false, + "max_seq_length": 512, + "max_subword_length": 15, + "token_masking_prob": 0.0, + "in": [ + "x" + ], + "out": [ + "x_tokens", + "x_subword_tokens", + "x_subword_tok_ids", + "startofword_markers", + "attention_mask" + ] + }, + { + "id": "tag_vocab", + "class_name": "simple_vocab", + "unk_token": [ + "O" + ], + "pad_with_zeros": true, + "save_path": "{MODEL_PATH}/tag.dict", + "load_path": "{MODEL_PATH}/tag.dict", + "fit_on": [ + "y" + ], + "in": [ + "y" + ], + "out": [ + "y_ind" + ] + }, + { + "class_name": "torch_transformers_sequence_tagger", + "n_tags": "#tag_vocab.len", + "pretrained_bert": "{TRANSFORMER}", + "attention_probs_keep_prob": 0.44, + "hidden_keep_prob": 0.89, + "return_probas": false, + "encoder_layer_ids": [ + -1 + ], + "optimizer": "AdamW", + "optimizer_parameters": { + "lr": 2.78e-05, + "weight_decay": 1e-06, + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-06 + }, + "clip_norm": 1.0, + "min_learning_rate": 1e-07, + "learning_rate_drop_patience": 30, + "learning_rate_drop_div": 1.5, + "load_before_drop": true, + "save_path": "{MODEL_PATH}/model", + "load_path": "{MODEL_PATH}/model", + "in": [ + "x_subword_tok_ids", + "attention_mask", + "startofword_markers" + ], + "in_y": [ + "y_ind" + ], + "out": [ + "y_pred_ind" + ] + }, + { + "ref": "tag_vocab", + "in": [ + "y_pred_ind" + ], + "out": [ + "y_pred" + ] + } + ], + "out": [ + "x_tokens", + "y_pred" + ] + }, + "train": { + "epochs": 30, + "batch_size": 10, + "metrics": [ + { + "name": "ner_f1", + "inputs": [ + "y", + "y_pred" + ] + }, + { + "name": "ner_token_f1", + "inputs": [ + "y", + "y_pred" + ] + } + ], + "validation_patience": 100, + "val_every_n_batches": 20, + "log_every_n_batches": 20, + "show_examples": false, + "evaluation_targets": [ + "valid", + "test" + ], + "tensorboard_log_dir": "{MODEL_PATH}/", + "class_name": "torch_trainer" + }, + "metadata": { + "variables": { + "ROOT_PATH": "~/.deeppavlov", + "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", + "MODELS_PATH": "{ROOT_PATH}/models", + "MODEL_PATH": "{MODELS_PATH}/ner_rus_conversational_distilrubert_6L_torch", + "TRANSFORMER": "DeepPavlov/distilrubert-base-cased-conversational" + }, + "download": [ + { + "url": "http://files.deeppavlov.ai/v1/ner/ner_rus_conversational_distilrubert_6L_torch.tar.gz", + "subdir": "{MODELS_PATH}" + } + ] + } +} From 5ae20e18936527c6b4e14c096ea84823981e1d75 Mon Sep 17 00:00:00 2001 From: ayeffkay Date: Tue, 13 Jul 2021 18:59:00 +0300 Subject: [PATCH 07/17] Distil squad configs added --- ...quad_ru_convers_distilrubert_2L_torch.json | 173 ++++++++++++++++++ ...u_convers_distilrubert_2L_torch_infer.json | 76 ++++++++ ...quad_ru_convers_distilrubert_6L_torch.json | 173 ++++++++++++++++++ ...u_convers_distilrubert_6L_torch_infer.json | 76 ++++++++ 4 files changed, 498 insertions(+) create mode 100644 deeppavlov/configs/squad/squad_ru_convers_distilrubert_2L_torch.json create mode 100644 deeppavlov/configs/squad/squad_ru_convers_distilrubert_2L_torch_infer.json create mode 100644 deeppavlov/configs/squad/squad_ru_convers_distilrubert_6L_torch.json create mode 100644 deeppavlov/configs/squad/squad_ru_convers_distilrubert_6L_torch_infer.json diff --git a/deeppavlov/configs/squad/squad_ru_convers_distilrubert_2L_torch.json b/deeppavlov/configs/squad/squad_ru_convers_distilrubert_2L_torch.json new file mode 100644 index 0000000000..162a8f7013 --- /dev/null +++ b/deeppavlov/configs/squad/squad_ru_convers_distilrubert_2L_torch.json @@ -0,0 +1,173 @@ +{ + "dataset_reader": { + "class_name": "squad_dataset_reader", + "dataset": "SberSQuADClean", + "url": "http://files.deeppavlov.ai/datasets/sber_squad_clean-v1.1.tar.gz", + "data_path": "{DOWNLOADS_PATH}/squad_ru_clean/" + }, + "dataset_iterator": { + "class_name": "squad_iterator", + "seed": 1337, + "shuffle": true + }, + "chainer": { + "in": [ + "context_raw", + "question_raw" + ], + "in_y": [ + "ans_raw", + "ans_raw_start" + ], + "pipe": [ + { + "class_name": "torch_squad_transformers_preprocessor", + "add_token_type_ids": true, + "vocab_file": "{TRANSFORMER}", + "do_lower_case": "{lowercase}", + "max_seq_length": 384, + "return_tokens": true, + "in": [ + "question_raw", + "context_raw" + ], + "out": [ + "bert_features", + "subtokens" + ] + }, + { + "class_name": "squad_bert_mapping", + "do_lower_case": "{lowercase}", + "in": [ + "context_raw", + "bert_features", + "subtokens" + ], + "out": [ + "subtok2chars", + "char2subtoks" + ] + }, + { + "class_name": "squad_bert_ans_preprocessor", + "do_lower_case": "{lowercase}", + "in": [ + "ans_raw", + "ans_raw_start", + "char2subtoks" + ], + "out": [ + "ans", + "ans_start", + "ans_end" + ] + }, + { + "class_name": "torch_transformers_squad", + "pretrained_bert": "{TRANSFORMER}", + "save_path": "{MODEL_PATH}/model", + "load_path": "{MODEL_PATH}/model", + "attention_probs_keep_prob": 0.11, + "hidden_keep_prob": 0.33, + "optimizer": "AdamW", + "optimizer_parameters": { + "lr": 9e-05 + }, + "learning_rate_drop_patience": 2, + "learning_rate_drop_div": 1.5, + "in": [ + "bert_features" + ], + "in_y": [ + "ans_start", + "ans_end" + ], + "out": [ + "ans_start_predicted", + "ans_end_predicted", + "logits" + ] + }, + { + "class_name": "squad_bert_ans_postprocessor", + "in": [ + "ans_start_predicted", + "ans_end_predicted", + "context_raw", + "bert_features", + "subtok2chars", + "subtokens" + ], + "out": [ + "ans_predicted", + "ans_start_predicted", + "ans_end_predicted" + ] + } + ], + "out": [ + "ans_predicted", + "ans_start_predicted", + "logits" + ] + }, + "train": { + "show_examples": false, + "evaluation_targets": [ + "valid" + ], + "log_every_n_batches": 250, + "val_every_n_batches": 500, + "batch_size": 10, + "validation_patience": 10, + "metrics": [ + { + "name": "squad_v2_f1", + "inputs": [ + "ans", + "ans_predicted" + ] + }, + { + "name": "squad_v2_em", + "inputs": [ + "ans", + "ans_predicted" + ] + }, + { + "name": "squad_v1_f1", + "inputs": [ + "ans", + "ans_predicted" + ] + }, + { + "name": "squad_v1_em", + "inputs": [ + "ans", + "ans_predicted" + ] + } + ], + "tensorboard_log_dir": "{MODEL_PATH}/logs", + "class_name": "torch_trainer" + }, + "metadata": { + "variables": { + "lowercase": false, + "ROOT_PATH": "~/.deeppavlov", + "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", + "TRANSFORMER": "DeepPavlov/distilrubert-tiny-cased-conversational", + "MODELS_PATH": "{ROOT_PATH}/models", + "MODEL_PATH": "{MODELS_PATH}/squad_ru_convers_distilrubert_2L_torch" + }, + "download": [ + { + "url": "http://files.deeppavlov.ai/deeppavlov_data/squad_ru_convers_distilrubert_2L_torch.tar.gz", + "subdir": "{MODELS_PATH}" + } + ] + } +} diff --git a/deeppavlov/configs/squad/squad_ru_convers_distilrubert_2L_torch_infer.json b/deeppavlov/configs/squad/squad_ru_convers_distilrubert_2L_torch_infer.json new file mode 100644 index 0000000000..b67331a2fc --- /dev/null +++ b/deeppavlov/configs/squad/squad_ru_convers_distilrubert_2L_torch_infer.json @@ -0,0 +1,76 @@ +{ + "dataset_reader": { + "class_name": "squad_dataset_reader", + "dataset": "SberSQuADClean", + "url": "http://files.deeppavlov.ai/datasets/sber_squad_clean-v1.1.tar.gz", + "data_path": "{DOWNLOADS_PATH}/squad_ru_clean/" + }, + "dataset_iterator": { + "class_name": "squad_iterator", + "seed": 1337, + "shuffle": true + }, + "chainer": { + "in": ["context_raw", "question_raw"], + "in_y": ["ans_raw", "ans_raw_start"], + "pipe": [ + { + "class_name": "torch_transformers_squad_infer", + "lang": "ru", + "batch_size": 128, + "squad_model_config": "{CONFIGS_PATH}/squad/squad_ru_convers_distilrubert_2L_torch.json", + "vocab_file": "{TRANSFORMER}", + "do_lower_case": "{lowercase}", + "max_seq_length": 256, + "in": ["context_raw", "question_raw"], + "out": ["ans_predicted", "ans_start_predicted", "logits"] + } + ], + "out": ["ans_predicted", "ans_start_predicted", "logits"] + }, + "train": { + "show_examples": false, + "evaluation_targets": [ + "valid" + ], + "log_every_n_batches": 250, + "val_every_n_batches": 500, + "batch_size": 10, + "validation_patience": 10, + "metrics": [ + { + "name": "squad_v2_f1", + "inputs": ["ans_raw", "ans_predicted"] + }, + { + "name": "squad_v2_em", + "inputs": ["ans_raw", "ans_predicted"] + }, + { + "name": "squad_v1_f1", + "inputs": ["ans_raw", "ans_predicted"] + }, + { + "name": "squad_v1_em", + "inputs": ["ans_raw", "ans_predicted"] + } + ] + }, + "metadata": { + "variables": { + "lowercase": false, + "ROOT_PATH": "~/.deeppavlov", + "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", + "TRANSFORMER": "DeepPavlov/distilrubert-tiny-cased-conversational", + "MODELS_PATH": "{ROOT_PATH}/models", + "MODEL_PATH": "{MODELS_PATH}/squad_ru_convers_distilrubert_2L_torch", + "CONFIGS_PATH": "{DEEPPAVLOV_PATH}/configs" + }, + "download": [ + { + "url": "http://files.deeppavlov.ai/deeppavlov_data/squad_ru_convers_distilrubert_2L_torch.tar.gz", + "subdir": "{MODELS_PATH}" + } + ] + } +} diff --git a/deeppavlov/configs/squad/squad_ru_convers_distilrubert_6L_torch.json b/deeppavlov/configs/squad/squad_ru_convers_distilrubert_6L_torch.json new file mode 100644 index 0000000000..574702120c --- /dev/null +++ b/deeppavlov/configs/squad/squad_ru_convers_distilrubert_6L_torch.json @@ -0,0 +1,173 @@ +{ + "dataset_reader": { + "class_name": "squad_dataset_reader", + "dataset": "SberSQuADClean", + "url": "http://files.deeppavlov.ai/datasets/sber_squad_clean-v1.1.tar.gz", + "data_path": "{DOWNLOADS_PATH}/squad_ru_clean/" + }, + "dataset_iterator": { + "class_name": "squad_iterator", + "seed": 1337, + "shuffle": true + }, + "chainer": { + "in": [ + "context_raw", + "question_raw" + ], + "in_y": [ + "ans_raw", + "ans_raw_start" + ], + "pipe": [ + { + "class_name": "torch_squad_transformers_preprocessor", + "add_token_type_ids": true, + "vocab_file": "{TRANSFORMER}", + "do_lower_case": "{lowercase}", + "max_seq_length": 384, + "return_tokens": true, + "in": [ + "question_raw", + "context_raw" + ], + "out": [ + "bert_features", + "subtokens" + ] + }, + { + "class_name": "squad_bert_mapping", + "do_lower_case": "{lowercase}", + "in": [ + "context_raw", + "bert_features", + "subtokens" + ], + "out": [ + "subtok2chars", + "char2subtoks" + ] + }, + { + "class_name": "squad_bert_ans_preprocessor", + "do_lower_case": "{lowercase}", + "in": [ + "ans_raw", + "ans_raw_start", + "char2subtoks" + ], + "out": [ + "ans", + "ans_start", + "ans_end" + ] + }, + { + "class_name": "torch_transformers_squad", + "pretrained_bert": "{TRANSFORMER}", + "save_path": "{MODEL_PATH}/model", + "load_path": "{MODEL_PATH}/model", + "attention_probs_keep_prob": 0.0, + "hidden_keep_prob": 0.33, + "optimizer": "AdamW", + "optimizer_parameters": { + "lr": 3.67e-5 + }, + "learning_rate_drop_patience": 2, + "learning_rate_drop_div": 1.5, + "in": [ + "bert_features" + ], + "in_y": [ + "ans_start", + "ans_end" + ], + "out": [ + "ans_start_predicted", + "ans_end_predicted", + "logits" + ] + }, + { + "class_name": "squad_bert_ans_postprocessor", + "in": [ + "ans_start_predicted", + "ans_end_predicted", + "context_raw", + "bert_features", + "subtok2chars", + "subtokens" + ], + "out": [ + "ans_predicted", + "ans_start_predicted", + "ans_end_predicted" + ] + } + ], + "out": [ + "ans_predicted", + "ans_start_predicted", + "logits" + ] + }, + "train": { + "show_examples": false, + "evaluation_targets": [ + "valid" + ], + "log_every_n_batches": 250, + "val_every_n_batches": 500, + "batch_size": 10, + "validation_patience": 10, + "metrics": [ + { + "name": "squad_v2_f1", + "inputs": [ + "ans", + "ans_predicted" + ] + }, + { + "name": "squad_v2_em", + "inputs": [ + "ans", + "ans_predicted" + ] + }, + { + "name": "squad_v1_f1", + "inputs": [ + "ans", + "ans_predicted" + ] + }, + { + "name": "squad_v1_em", + "inputs": [ + "ans", + "ans_predicted" + ] + } + ], + "tensorboard_log_dir": "{MODEL_PATH}/logs", + "class_name": "torch_trainer" + }, + "metadata": { + "variables": { + "lowercase": false, + "ROOT_PATH": "~/.deeppavlov", + "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", + "TRANSFORMER": "DeepPavlov/distilrubert-base-cased-conversational", + "MODELS_PATH": "{ROOT_PATH}/models", + "MODEL_PATH": "{MODELS_PATH}/squad_ru_convers_distilrubert_6L_torch" + }, + "download": [ + { + "url": "http://files.deeppavlov.ai/deeppavlov_data/squad_ru_convers_distilrubert_6L_torch.tar.gz", + "subdir": "{MODELS_PATH}" + } + ] + } +} diff --git a/deeppavlov/configs/squad/squad_ru_convers_distilrubert_6L_torch_infer.json b/deeppavlov/configs/squad/squad_ru_convers_distilrubert_6L_torch_infer.json new file mode 100644 index 0000000000..dbe16c055d --- /dev/null +++ b/deeppavlov/configs/squad/squad_ru_convers_distilrubert_6L_torch_infer.json @@ -0,0 +1,76 @@ +{ + "dataset_reader": { + "class_name": "squad_dataset_reader", + "dataset": "SberSQuADClean", + "url": "http://files.deeppavlov.ai/datasets/sber_squad_clean-v1.1.tar.gz", + "data_path": "{DOWNLOADS_PATH}/squad_ru_clean/" + }, + "dataset_iterator": { + "class_name": "squad_iterator", + "seed": 1337, + "shuffle": true + }, + "chainer": { + "in": ["context_raw", "question_raw"], + "in_y": ["ans_raw", "ans_raw_start"], + "pipe": [ + { + "class_name": "torch_transformers_squad_infer", + "lang": "ru", + "batch_size": 128, + "squad_model_config": "{CONFIGS_PATH}/squad/squad_ru_convers_distilrubert_6L_torch.json", + "vocab_file": "{TRANSFORMER}", + "do_lower_case": "{lowercase}", + "max_seq_length": 256, + "in": ["context_raw", "question_raw"], + "out": ["ans_predicted", "ans_start_predicted", "logits"] + } + ], + "out": ["ans_predicted", "ans_start_predicted", "logits"] + }, + "train": { + "show_examples": false, + "evaluation_targets": [ + "valid" + ], + "log_every_n_batches": 250, + "val_every_n_batches": 500, + "batch_size": 10, + "validation_patience": 10, + "metrics": [ + { + "name": "squad_v2_f1", + "inputs": ["ans_raw", "ans_predicted"] + }, + { + "name": "squad_v2_em", + "inputs": ["ans_raw", "ans_predicted"] + }, + { + "name": "squad_v1_f1", + "inputs": ["ans_raw", "ans_predicted"] + }, + { + "name": "squad_v1_em", + "inputs": ["ans_raw", "ans_predicted"] + } + ] + }, + "metadata": { + "variables": { + "lowercase": false, + "ROOT_PATH": "~/.deeppavlov", + "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", + "TRANSFORMER": "DeepPavlov/distilrubert-base-cased-conversational", + "MODELS_PATH": "{ROOT_PATH}/models", + "MODEL_PATH": "{MODELS_PATH}/squad_ru_convers_distilrubert_6L_torch", + "CONFIGS_PATH": "{DEEPPAVLOV_PATH}/configs" + }, + "download": [ + { + "url": "http://files.deeppavlov.ai/deeppavlov_data/squad_ru_convers_distilrubert_6L_torch.tar.gz", + "subdir": "{MODELS_PATH}" + } + ] + } +} From b3dc15c133808f38eef67d53c764c9c309d6dbd4 Mon Sep 17 00:00:00 2001 From: Fedor Ignatov Date: Fri, 30 Jul 2021 14:49:37 +0300 Subject: [PATCH 08/17] feat: paraphraser_convers_distilrubert_2L_torch removed torch --- ...2L_torch.json => paraphraser_convers_distilrubert_2L.json} | 4 ++-- docs/features/overview.rst | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) rename deeppavlov/configs/classifiers/{paraphraser_convers_distilrubert_2L_torch.json => paraphraser_convers_distilrubert_2L.json} (96%) diff --git a/deeppavlov/configs/classifiers/paraphraser_convers_distilrubert_2L_torch.json b/deeppavlov/configs/classifiers/paraphraser_convers_distilrubert_2L.json similarity index 96% rename from deeppavlov/configs/classifiers/paraphraser_convers_distilrubert_2L_torch.json rename to deeppavlov/configs/classifiers/paraphraser_convers_distilrubert_2L.json index aabfa9d7b5..5dcc8460b5 100644 --- a/deeppavlov/configs/classifiers/paraphraser_convers_distilrubert_2L_torch.json +++ b/deeppavlov/configs/classifiers/paraphraser_convers_distilrubert_2L.json @@ -73,11 +73,11 @@ "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", "TRANSFORMER": "DeepPavlov/distilrubert-tiny-cased-conversational", "MODELS_PATH": "{ROOT_PATH}/models", - "MODEL_PATH": "{MODELS_PATH}/paraphraser_convers_distilrubert_2L_torch" + "MODEL_PATH": "{MODELS_PATH}/paraphraser_convers_distilrubert_2L" }, "download": [ { - "url": "http://files.deeppavlov.ai/deeppavlov_data/classifiers/paraphraser_convers_distilrubert_2L_torch.tar.gz", + "url": "http://files.deeppavlov.ai/deeppavlov_data/classifiers/paraphraser_convers_distilrubert_2L.tar.gz", "subdir": "{MODELS_PATH}" } ] diff --git a/docs/features/overview.rst b/docs/features/overview.rst index 72f58015c3..3503cf36cd 100644 --- a/docs/features/overview.rst +++ b/docs/features/overview.rst @@ -264,9 +264,9 @@ Available pre-trained models for paraphrase identification: +------------------------+------------------------------------------------------------------------------------------------------------------+----------------+-----------------+------------+------------+----------------+-----------------+-----------+ | `paraphraser.ru`_ | :config:`paraphrase_rubert ` | 90.2 | 84.9 | 92.3 | 87.9 | -- | -- | 1325M | +------------------------+------------------------------------------------------------------------------------------------------------------+----------------+-----------------+------------+------------+----------------+-----------------+-----------+ - | `paraphraser.ru`_ | :config:`paraphraser_convers_distilrubert_2L_torch ` | 76.1 ± 0.2 | 64.5 ± 0.5 | 81.8 ± 0.2 | 73.9 ± 0.8 | -- | -- | 618M | + | `paraphraser.ru`_ | :config:`paraphraser_convers_distilrubert_2L ` | 76.1 ± 0.2 | 64.5 ± 0.5 | 81.8 ± 0.2 | 73.9 ± 0.8 | -- | -- | 618M | +------------------------+------------------------------------------------------------------------------------------------------------------+----------------+-----------------+------------+------------+----------------+-----------------+-----------+ - | `paraphraser.ru`_ | :config:`paraphraser_convers_distilrubert_2L_torch ` | 86.5 ± 0.5 | 78.9 ± 0.4 | 89.6 ± 0.3 | 83.2 ± 0.5 | -- | -- | 930M | + | `paraphraser.ru`_ | :config:`paraphraser_convers_distilrubert_6L_torch ` | 86.5 ± 0.5 | 78.9 ± 0.4 | 89.6 ± 0.3 | 83.2 ± 0.5 | -- | -- | 930M | +------------------------+------------------------------------------------------------------------------------------------------------------+----------------+-----------------+------------+------------+----------------+-----------------+-----------+ .. _`paraphraser.ru`: https://paraphraser.ru/ From 54830d134b0ed78b12b76ec398f2787a3552f03f Mon Sep 17 00:00:00 2001 From: Fedor Ignatov Date: Fri, 30 Jul 2021 15:11:49 +0300 Subject: [PATCH 09/17] feat: added test paraphraser_convers_distilrubert_2L --- ...6L_torch.json => paraphraser_convers_distilrubert_6L.json} | 4 ++-- tests/test_quick_start.py | 3 +++ 2 files changed, 5 insertions(+), 2 deletions(-) rename deeppavlov/configs/classifiers/{paraphraser_convers_distilrubert_6L_torch.json => paraphraser_convers_distilrubert_6L.json} (96%) diff --git a/deeppavlov/configs/classifiers/paraphraser_convers_distilrubert_6L_torch.json b/deeppavlov/configs/classifiers/paraphraser_convers_distilrubert_6L.json similarity index 96% rename from deeppavlov/configs/classifiers/paraphraser_convers_distilrubert_6L_torch.json rename to deeppavlov/configs/classifiers/paraphraser_convers_distilrubert_6L.json index aab78819d2..f8a9e96921 100644 --- a/deeppavlov/configs/classifiers/paraphraser_convers_distilrubert_6L_torch.json +++ b/deeppavlov/configs/classifiers/paraphraser_convers_distilrubert_6L.json @@ -73,11 +73,11 @@ "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", "TRANSFORMER": "DeepPavlov/distilrubert-base-cased-conversational", "MODELS_PATH": "{ROOT_PATH}/models", - "MODEL_PATH": "{MODELS_PATH}/paraphraser_convers_distilrubert_6L_torch" + "MODEL_PATH": "{MODELS_PATH}/paraphraser_convers_distilrubert_6L" }, "download": [ { - "url": "http://files.deeppavlov.ai/deeppavlov_data/classifiers/paraphraser_convers_distilrubert_6L_torch.tar.gz", + "url": "http://files.deeppavlov.ai/deeppavlov_data/classifiers/paraphraser_convers_distilrubert_6L.tar.gz", "subdir": "{MODELS_PATH}" } ] diff --git a/tests/test_quick_start.py b/tests/test_quick_start.py index 9c7b642364..7f16fefadc 100644 --- a/tests/test_quick_start.py +++ b/tests/test_quick_start.py @@ -144,6 +144,9 @@ ("classifiers/intents_sample_csv.json", "classifiers", ('TI',)): [ONE_ARGUMENT_INFER_CHECK], ("classifiers/intents_sample_json.json", "classifiers", ('TI',)): [ONE_ARGUMENT_INFER_CHECK] }, + "distil": { + ("classifiers/paraphraser_convers_distilrubert_2L.json", "distil", ('IP')): [TWO_ARGUMENTS_INFER_CHECK] + }, "entity_linking": { ("kbqa/entity_linking_rus.json", "entity_linking", ('IP',)): [ From d79aca0cdb888049c45d1918b36af1d976d9e447 Mon Sep 17 00:00:00 2001 From: Fedor Ignatov Date: Fri, 30 Jul 2021 15:12:37 +0300 Subject: [PATCH 10/17] paraphraser_convers_distilrubert_6L renamed --- docs/features/overview.rst | 2 +- tests/test_quick_start.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/features/overview.rst b/docs/features/overview.rst index 3503cf36cd..a8b50c10fe 100644 --- a/docs/features/overview.rst +++ b/docs/features/overview.rst @@ -266,7 +266,7 @@ Available pre-trained models for paraphrase identification: +------------------------+------------------------------------------------------------------------------------------------------------------+----------------+-----------------+------------+------------+----------------+-----------------+-----------+ | `paraphraser.ru`_ | :config:`paraphraser_convers_distilrubert_2L ` | 76.1 ± 0.2 | 64.5 ± 0.5 | 81.8 ± 0.2 | 73.9 ± 0.8 | -- | -- | 618M | +------------------------+------------------------------------------------------------------------------------------------------------------+----------------+-----------------+------------+------------+----------------+-----------------+-----------+ - | `paraphraser.ru`_ | :config:`paraphraser_convers_distilrubert_6L_torch ` | 86.5 ± 0.5 | 78.9 ± 0.4 | 89.6 ± 0.3 | 83.2 ± 0.5 | -- | -- | 930M | + | `paraphraser.ru`_ | :config:`paraphraser_convers_distilrubert_6L ` | 86.5 ± 0.5 | 78.9 ± 0.4 | 89.6 ± 0.3 | 83.2 ± 0.5 | -- | -- | 930M | +------------------------+------------------------------------------------------------------------------------------------------------------+----------------+-----------------+------------+------------+----------------+-----------------+-----------+ .. _`paraphraser.ru`: https://paraphraser.ru/ diff --git a/tests/test_quick_start.py b/tests/test_quick_start.py index 7f16fefadc..7b0dea2d9a 100644 --- a/tests/test_quick_start.py +++ b/tests/test_quick_start.py @@ -145,7 +145,8 @@ ("classifiers/intents_sample_json.json", "classifiers", ('TI',)): [ONE_ARGUMENT_INFER_CHECK] }, "distil": { - ("classifiers/paraphraser_convers_distilrubert_2L.json", "distil", ('IP')): [TWO_ARGUMENTS_INFER_CHECK] + ("classifiers/paraphraser_convers_distilrubert_2L.json", "distil", ('IP')): [TWO_ARGUMENTS_INFER_CHECK], + ("classifiers/paraphraser_convers_distilrubert_6L.json", "distil", ('IP')): [TWO_ARGUMENTS_INFER_CHECK] }, "entity_linking": { ("kbqa/entity_linking_rus.json", "entity_linking", ('IP',)): From 7c120a5145b0327c5c5e65d284bcb28c4b72adb7 Mon Sep 17 00:00:00 2001 From: Fedor Ignatov Date: Fri, 30 Jul 2021 15:24:06 +0300 Subject: [PATCH 11/17] fix html build --- docs/features/overview.rst | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/docs/features/overview.rst b/docs/features/overview.rst index a8b50c10fe..ab2015ed37 100644 --- a/docs/features/overview.rst +++ b/docs/features/overview.rst @@ -255,19 +255,19 @@ Available pre-trained models for paraphrase identification: .. table:: :widths: auto - +------------------------+------------------------------------------------------------------------------------------------------------------+----------------+-----------------+------------+------------+----------------+-----------------+-----------+ - | Dataset | Model config | Val (accuracy) | Test (accuracy) | Val (F1) | Test (F1) | Val (log_loss) | Test (log_loss) | Downloads | - +========================+==================================================================================================================+================+=================+============+============+================+=================+===========+ - | `paraphraser.ru`_ | :config:`paraphrase_ident_paraphraser_ft ` | 83.8 | 75.4 | 87.9 | 80.9 | 0.468 | 0.616 | 5938M | - +------------------------+------------------------------------------------------------------------------------------------------------------+----------------+-----------------+------------+------------+----------------+-----------------+-----------+ - | `paraphraser.ru`_ | :config:`paraphrase_bert_multilingual ` | 87.4 | 79.3 | 90.2 | 83.4 | -- | -- | 1330M | - +------------------------+------------------------------------------------------------------------------------------------------------------+----------------+-----------------+------------+------------+----------------+-----------------+-----------+ - | `paraphraser.ru`_ | :config:`paraphrase_rubert ` | 90.2 | 84.9 | 92.3 | 87.9 | -- | -- | 1325M | - +------------------------+------------------------------------------------------------------------------------------------------------------+----------------+-----------------+------------+------------+----------------+-----------------+-----------+ + +------------------------+------------------------------------------------------------------------------------------------------+----------------+-----------------+------------+------------+----------------+-----------------+-----------+ + | Dataset | Model config | Val (accuracy) | Test (accuracy) | Val (F1) | Test (F1) | Val (log_loss) | Test (log_loss) | Downloads | + +========================+======================================================================================================+================+=================+============+============+================+=================+===========+ + | `paraphraser.ru`_ | :config:`paraphrase_ident_paraphraser_ft ` | 83.8 | 75.4 | 87.9 | 80.9 | 0.468 | 0.616 | 5938M | + +------------------------+------------------------------------------------------------------------------------------------------+----------------+-----------------+------------+------------+----------------+-----------------+-----------+ + | `paraphraser.ru`_ | :config:`paraphrase_bert_multilingual ` | 87.4 | 79.3 | 90.2 | 83.4 | -- | -- | 1330M | + +------------------------+------------------------------------------------------------------------------------------------------+----------------+-----------------+------------+------------+----------------+-----------------+-----------+ + | `paraphraser.ru`_ | :config:`paraphrase_rubert ` | 90.2 | 84.9 | 92.3 | 87.9 | -- | -- | 1325M | + +------------------------+------------------------------------------------------------------------------------------------------+----------------+-----------------+------------+------------+----------------+-----------------+-----------+ | `paraphraser.ru`_ | :config:`paraphraser_convers_distilrubert_2L ` | 76.1 ± 0.2 | 64.5 ± 0.5 | 81.8 ± 0.2 | 73.9 ± 0.8 | -- | -- | 618M | - +------------------------+------------------------------------------------------------------------------------------------------------------+----------------+-----------------+------------+------------+----------------+-----------------+-----------+ + +------------------------+------------------------------------------------------------------------------------------------------+----------------+-----------------+------------+------------+----------------+-----------------+-----------+ | `paraphraser.ru`_ | :config:`paraphraser_convers_distilrubert_6L ` | 86.5 ± 0.5 | 78.9 ± 0.4 | 89.6 ± 0.3 | 83.2 ± 0.5 | -- | -- | 930M | - +------------------------+------------------------------------------------------------------------------------------------------------------+----------------+-----------------+------------+------------+----------------+-----------------+-----------+ + +------------------------+------------------------------------------------------------------------------------------------------+----------------+-----------------+------------+------------+----------------+-----------------+-----------+ .. _`paraphraser.ru`: https://paraphraser.ru/ From d8f53e5b26c580446a49e3c482a1f93cc031b9a7 Mon Sep 17 00:00:00 2001 From: Fedor Ignatov Date: Fri, 30 Jul 2021 18:30:17 +0300 Subject: [PATCH 12/17] removed torch from names --- ... rusentiment_convers_distilrubert_2L.json} | 4 +- ... rusentiment_convers_distilrubert_6L.json} | 4 +- ...n => ner_rus_convers_distilrubert_2L.json} | 4 +- ...n => ner_rus_convers_distilrubert_6L.json} | 4 +- ... => squad_ru_convers_distilrubert_2L.json} | 4 +- ...uad_ru_convers_distilrubert_2L_infer.json} | 6 +- ... => squad_ru_convers_distilrubert_6L.json} | 4 +- ...uad_ru_convers_distilrubert_6L_infer.json} | 6 +- docs/features/overview.rst | 216 +++++++++--------- tests/test_quick_start.py | 10 +- 10 files changed, 135 insertions(+), 127 deletions(-) rename deeppavlov/configs/classifiers/{rusentiment_convers_distilrubert_2L_torch.json => rusentiment_convers_distilrubert_2L.json} (97%) rename deeppavlov/configs/classifiers/{rusentiment_convers_distilrubert_6L_torch.json => rusentiment_convers_distilrubert_6L.json} (97%) rename deeppavlov/configs/ner/{ner_rus_convers_distilrubert_2L_torch.json => ner_rus_convers_distilrubert_2L.json} (98%) rename deeppavlov/configs/ner/{ner_rus_convers_distilrubert_6L_torch.json => ner_rus_convers_distilrubert_6L.json} (98%) rename deeppavlov/configs/squad/{squad_ru_convers_distilrubert_2L_torch.json => squad_ru_convers_distilrubert_2L.json} (98%) rename deeppavlov/configs/squad/{squad_ru_convers_distilrubert_2L_torch_infer.json => squad_ru_convers_distilrubert_2L_infer.json} (95%) rename deeppavlov/configs/squad/{squad_ru_convers_distilrubert_6L_torch.json => squad_ru_convers_distilrubert_6L.json} (98%) rename deeppavlov/configs/squad/{squad_ru_convers_distilrubert_6L_torch_infer.json => squad_ru_convers_distilrubert_6L_infer.json} (95%) diff --git a/deeppavlov/configs/classifiers/rusentiment_convers_distilrubert_2L_torch.json b/deeppavlov/configs/classifiers/rusentiment_convers_distilrubert_2L.json similarity index 97% rename from deeppavlov/configs/classifiers/rusentiment_convers_distilrubert_2L_torch.json rename to deeppavlov/configs/classifiers/rusentiment_convers_distilrubert_2L.json index 505fefb545..ff6c6cdabc 100644 --- a/deeppavlov/configs/classifiers/rusentiment_convers_distilrubert_2L_torch.json +++ b/deeppavlov/configs/classifiers/rusentiment_convers_distilrubert_2L.json @@ -133,7 +133,7 @@ "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", "TRANSFORMER": "DeepPavlov/distilrubert-tiny-cased-conversational", "MODELS_PATH": "{ROOT_PATH}/models", - "MODEL_PATH": "{MODELS_PATH}/classifiers/rusentiment_convers_distilrubert_2L_torch" + "MODEL_PATH": "{MODELS_PATH}/classifiers/rusentiment_convers_distilrubert_2L" }, "download": [ { @@ -145,7 +145,7 @@ "subdir": "{DOWNLOADS_PATH}/rusentiment" }, { - "url": "http://files.deeppavlov.ai/deeppavlov_data/classifiers/rusentiment_convers_distilrubert_2L_torch.tar.gz", + "url": "http://files.deeppavlov.ai/deeppavlov_data/classifiers/rusentiment_convers_distilrubert_2L.tar.gz", "subdir": "{MODELS_PATH}/classifiers/" } ] diff --git a/deeppavlov/configs/classifiers/rusentiment_convers_distilrubert_6L_torch.json b/deeppavlov/configs/classifiers/rusentiment_convers_distilrubert_6L.json similarity index 97% rename from deeppavlov/configs/classifiers/rusentiment_convers_distilrubert_6L_torch.json rename to deeppavlov/configs/classifiers/rusentiment_convers_distilrubert_6L.json index 9d06ab2701..b211ebed0c 100644 --- a/deeppavlov/configs/classifiers/rusentiment_convers_distilrubert_6L_torch.json +++ b/deeppavlov/configs/classifiers/rusentiment_convers_distilrubert_6L.json @@ -133,7 +133,7 @@ "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", "TRANSFORMER": "DeepPavlov/distilrubert-base-cased-conversational", "MODELS_PATH": "{ROOT_PATH}/models", - "MODEL_PATH": "{MODELS_PATH}/classifiers/rusentiment_convers_distilrubert_6L_torch" + "MODEL_PATH": "{MODELS_PATH}/classifiers/rusentiment_convers_distilrubert_6L" }, "download": [ { @@ -145,7 +145,7 @@ "subdir": "{DOWNLOADS_PATH}/rusentiment" }, { - "url": "http://files.deeppavlov.ai/deeppavlov_data/classifiers/rusentiment_convers_distilrubert_6L_torch.tar.gz", + "url": "http://files.deeppavlov.ai/deeppavlov_data/classifiers/rusentiment_convers_distilrubert_6L.tar.gz", "subdir": "{MODELS_PATH}/classifiers/" } ] diff --git a/deeppavlov/configs/ner/ner_rus_convers_distilrubert_2L_torch.json b/deeppavlov/configs/ner/ner_rus_convers_distilrubert_2L.json similarity index 98% rename from deeppavlov/configs/ner/ner_rus_convers_distilrubert_2L_torch.json rename to deeppavlov/configs/ner/ner_rus_convers_distilrubert_2L.json index 147aef5cee..15c931c1eb 100644 --- a/deeppavlov/configs/ner/ner_rus_convers_distilrubert_2L_torch.json +++ b/deeppavlov/configs/ner/ner_rus_convers_distilrubert_2L.json @@ -142,12 +142,12 @@ "ROOT_PATH": "~/.deeppavlov", "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", "MODELS_PATH": "{ROOT_PATH}/models", - "MODEL_PATH": "{MODELS_PATH}/ner_rus_conversational_distilrubert_2L_torch", + "MODEL_PATH": "{MODELS_PATH}/ner_rus_conversational_distilrubert_2L", "TRANSFORMER": "DeepPavlov/distilrubert-tiny-cased-conversational" }, "download": [ { - "url": "http://files.deeppavlov.ai/v1/ner/ner_rus_conversational_distilrubert_2L_torch.tar.gz", + "url": "http://files.deeppavlov.ai/v1/ner/ner_rus_conversational_distilrubert_2L.tar.gz", "subdir": "{MODELS_PATH}" } ] diff --git a/deeppavlov/configs/ner/ner_rus_convers_distilrubert_6L_torch.json b/deeppavlov/configs/ner/ner_rus_convers_distilrubert_6L.json similarity index 98% rename from deeppavlov/configs/ner/ner_rus_convers_distilrubert_6L_torch.json rename to deeppavlov/configs/ner/ner_rus_convers_distilrubert_6L.json index 0878c393e2..b2534426a6 100644 --- a/deeppavlov/configs/ner/ner_rus_convers_distilrubert_6L_torch.json +++ b/deeppavlov/configs/ner/ner_rus_convers_distilrubert_6L.json @@ -142,12 +142,12 @@ "ROOT_PATH": "~/.deeppavlov", "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", "MODELS_PATH": "{ROOT_PATH}/models", - "MODEL_PATH": "{MODELS_PATH}/ner_rus_conversational_distilrubert_6L_torch", + "MODEL_PATH": "{MODELS_PATH}/ner_rus_conversational_distilrubert_6L", "TRANSFORMER": "DeepPavlov/distilrubert-base-cased-conversational" }, "download": [ { - "url": "http://files.deeppavlov.ai/v1/ner/ner_rus_conversational_distilrubert_6L_torch.tar.gz", + "url": "http://files.deeppavlov.ai/v1/ner/ner_rus_conversational_distilrubert_6L.tar.gz", "subdir": "{MODELS_PATH}" } ] diff --git a/deeppavlov/configs/squad/squad_ru_convers_distilrubert_2L_torch.json b/deeppavlov/configs/squad/squad_ru_convers_distilrubert_2L.json similarity index 98% rename from deeppavlov/configs/squad/squad_ru_convers_distilrubert_2L_torch.json rename to deeppavlov/configs/squad/squad_ru_convers_distilrubert_2L.json index 162a8f7013..f278ad9627 100644 --- a/deeppavlov/configs/squad/squad_ru_convers_distilrubert_2L_torch.json +++ b/deeppavlov/configs/squad/squad_ru_convers_distilrubert_2L.json @@ -161,11 +161,11 @@ "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", "TRANSFORMER": "DeepPavlov/distilrubert-tiny-cased-conversational", "MODELS_PATH": "{ROOT_PATH}/models", - "MODEL_PATH": "{MODELS_PATH}/squad_ru_convers_distilrubert_2L_torch" + "MODEL_PATH": "{MODELS_PATH}/squad_ru_convers_distilrubert_2L" }, "download": [ { - "url": "http://files.deeppavlov.ai/deeppavlov_data/squad_ru_convers_distilrubert_2L_torch.tar.gz", + "url": "http://files.deeppavlov.ai/deeppavlov_data/squad_ru_convers_distilrubert_2L.tar.gz", "subdir": "{MODELS_PATH}" } ] diff --git a/deeppavlov/configs/squad/squad_ru_convers_distilrubert_2L_torch_infer.json b/deeppavlov/configs/squad/squad_ru_convers_distilrubert_2L_infer.json similarity index 95% rename from deeppavlov/configs/squad/squad_ru_convers_distilrubert_2L_torch_infer.json rename to deeppavlov/configs/squad/squad_ru_convers_distilrubert_2L_infer.json index b67331a2fc..9202d83ba8 100644 --- a/deeppavlov/configs/squad/squad_ru_convers_distilrubert_2L_torch_infer.json +++ b/deeppavlov/configs/squad/squad_ru_convers_distilrubert_2L_infer.json @@ -18,7 +18,7 @@ "class_name": "torch_transformers_squad_infer", "lang": "ru", "batch_size": 128, - "squad_model_config": "{CONFIGS_PATH}/squad/squad_ru_convers_distilrubert_2L_torch.json", + "squad_model_config": "{CONFIGS_PATH}/squad/squad_ru_convers_distilrubert_2L.json", "vocab_file": "{TRANSFORMER}", "do_lower_case": "{lowercase}", "max_seq_length": 256, @@ -63,12 +63,12 @@ "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", "TRANSFORMER": "DeepPavlov/distilrubert-tiny-cased-conversational", "MODELS_PATH": "{ROOT_PATH}/models", - "MODEL_PATH": "{MODELS_PATH}/squad_ru_convers_distilrubert_2L_torch", + "MODEL_PATH": "{MODELS_PATH}/squad_ru_convers_distilrubert_2L", "CONFIGS_PATH": "{DEEPPAVLOV_PATH}/configs" }, "download": [ { - "url": "http://files.deeppavlov.ai/deeppavlov_data/squad_ru_convers_distilrubert_2L_torch.tar.gz", + "url": "http://files.deeppavlov.ai/deeppavlov_data/squad_ru_convers_distilrubert_2L.tar.gz", "subdir": "{MODELS_PATH}" } ] diff --git a/deeppavlov/configs/squad/squad_ru_convers_distilrubert_6L_torch.json b/deeppavlov/configs/squad/squad_ru_convers_distilrubert_6L.json similarity index 98% rename from deeppavlov/configs/squad/squad_ru_convers_distilrubert_6L_torch.json rename to deeppavlov/configs/squad/squad_ru_convers_distilrubert_6L.json index 574702120c..8ca10a28f7 100644 --- a/deeppavlov/configs/squad/squad_ru_convers_distilrubert_6L_torch.json +++ b/deeppavlov/configs/squad/squad_ru_convers_distilrubert_6L.json @@ -161,11 +161,11 @@ "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", "TRANSFORMER": "DeepPavlov/distilrubert-base-cased-conversational", "MODELS_PATH": "{ROOT_PATH}/models", - "MODEL_PATH": "{MODELS_PATH}/squad_ru_convers_distilrubert_6L_torch" + "MODEL_PATH": "{MODELS_PATH}/squad_ru_convers_distilrubert_6L" }, "download": [ { - "url": "http://files.deeppavlov.ai/deeppavlov_data/squad_ru_convers_distilrubert_6L_torch.tar.gz", + "url": "http://files.deeppavlov.ai/deeppavlov_data/squad_ru_convers_distilrubert_6L.tar.gz", "subdir": "{MODELS_PATH}" } ] diff --git a/deeppavlov/configs/squad/squad_ru_convers_distilrubert_6L_torch_infer.json b/deeppavlov/configs/squad/squad_ru_convers_distilrubert_6L_infer.json similarity index 95% rename from deeppavlov/configs/squad/squad_ru_convers_distilrubert_6L_torch_infer.json rename to deeppavlov/configs/squad/squad_ru_convers_distilrubert_6L_infer.json index dbe16c055d..5c6171311c 100644 --- a/deeppavlov/configs/squad/squad_ru_convers_distilrubert_6L_torch_infer.json +++ b/deeppavlov/configs/squad/squad_ru_convers_distilrubert_6L_infer.json @@ -18,7 +18,7 @@ "class_name": "torch_transformers_squad_infer", "lang": "ru", "batch_size": 128, - "squad_model_config": "{CONFIGS_PATH}/squad/squad_ru_convers_distilrubert_6L_torch.json", + "squad_model_config": "{CONFIGS_PATH}/squad/squad_ru_convers_distilrubert_6L.json", "vocab_file": "{TRANSFORMER}", "do_lower_case": "{lowercase}", "max_seq_length": 256, @@ -63,12 +63,12 @@ "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", "TRANSFORMER": "DeepPavlov/distilrubert-base-cased-conversational", "MODELS_PATH": "{ROOT_PATH}/models", - "MODEL_PATH": "{MODELS_PATH}/squad_ru_convers_distilrubert_6L_torch", + "MODEL_PATH": "{MODELS_PATH}/squad_ru_convers_distilrubert_6L", "CONFIGS_PATH": "{DEEPPAVLOV_PATH}/configs" }, "download": [ { - "url": "http://files.deeppavlov.ai/deeppavlov_data/squad_ru_convers_distilrubert_6L_torch.tar.gz", + "url": "http://files.deeppavlov.ai/deeppavlov_data/squad_ru_convers_distilrubert_6L.tar.gz", "subdir": "{MODELS_PATH}" } ] diff --git a/docs/features/overview.rst b/docs/features/overview.rst index ab2015ed37..7a515a40cb 100644 --- a/docs/features/overview.rst +++ b/docs/features/overview.rst @@ -20,31 +20,31 @@ The second model reproduces architecture from the paper `Application of a Hybrid Bi-LSTM-CRF model to the task of Russian Named Entity Recognition `__ which is inspired by Bi-LSTM+CRF architecture from https://arxiv.org/pdf/1603.01360.pdf. -+---------------------------------------------------------+-------+--------------------------------------------------------------------------------------------------------+-------------+ -| Dataset | Lang | Model | Test F1 | -+=========================================================+=======+========================================================================================================+=============+ -| Persons-1000 dataset with additional LOC and ORG markup | Ru | :config:`ner_rus_bert.json ` | 98.1 | -+ + +--------------------------------------------------------------------------------------------------------+-------------+ -| (Collection 3) | | :config:`ner_rus.json ` | 95.1 | -+ + +--------------------------------------------------------------------------------------------------------+-------------+ -| | | :config:`ner_rus_convers_distilrubert_2L_torch.json ` | 88.4 ± 0.5 | -+ + +--------------------------------------------------------------------------------------------------------+-------------+ -| | | :config:`ner_rus_convers_distilrubert_6L_torch.json ` | 93.3 ± 0.3 | -+---------------------------------------------------------+-------+--------------------------------------------------------------------------------------------------------+-------------+ -| Ontonotes | Multi | :config:`ner_ontonotes_bert_mult.json ` | 88.8 | -+ +-------+--------------------------------------------------------------------------------------------------------+-------------+ -| | En | :config:`ner_ontonotes_bert.json ` | 88.6 | -+ + +--------------------------------------------------------------------------------------------------------+-------------+ -| | | :config:`ner_ontonotes.json ` | 87.1 | -+---------------------------------------------------------+ +--------------------------------------------------------------------------------------------------------+-------------+ -| ConLL-2003 | | :config:`ner_conll2003_bert.json ` | 91.7 | -+ + +--------------------------------------------------------------------------------------------------------+-------------+ -| | | :config:`ner_conll2003_torch_bert.json ` | 88.6 | -+ + +--------------------------------------------------------------------------------------------------------+-------------+ -| | | :config:`ner_conll2003.json ` | 89.9 | -+---------------------------------------------------------+ +--------------------------------------------------------------------------------------------------------+-------------+ -| DSTC2 | | :config:`ner_dstc2.json ` | 97.1 | -+---------------------------------------------------------+-------+--------------------------------------------------------------------------------------------------------+-------------+ ++---------------------------------------------------------+-------+--------------------------------------------------------------------------------------------+-------------+ +| Dataset | Lang | Model | Test F1 | ++=========================================================+=======+============================================================================================+=============+ +| Persons-1000 dataset with additional LOC and ORG markup | Ru | :config:`ner_rus_bert.json ` | 98.1 | ++ + +--------------------------------------------------------------------------------------------+-------------+ +| (Collection 3) | | :config:`ner_rus.json ` | 95.1 | ++ + +--------------------------------------------------------------------------------------------+-------------+ +| | | :config:`ner_rus_convers_distilrubert_2L.json ` | 88.4 ± 0.5 | ++ + +--------------------------------------------------------------------------------------------+-------------+ +| | | :config:`ner_rus_convers_distilrubert_6L.json ` | 93.3 ± 0.3 | ++---------------------------------------------------------+-------+--------------------------------------------------------------------------------------------+-------------+ +| Ontonotes | Multi | :config:`ner_ontonotes_bert_mult.json ` | 88.8 | ++ +-------+--------------------------------------------------------------------------------------------+-------------+ +| | En | :config:`ner_ontonotes_bert.json ` | 88.6 | ++ + +--------------------------------------------------------------------------------------------+-------------+ +| | | :config:`ner_ontonotes.json ` | 87.1 | ++---------------------------------------------------------+ +--------------------------------------------------------------------------------------------+-------------+ +| ConLL-2003 | | :config:`ner_conll2003_bert.json ` | 91.7 | ++ + +--------------------------------------------------------------------------------------------+-------------+ +| | | :config:`ner_conll2003_torch_bert.json ` | 88.6 | ++ + +--------------------------------------------------------------------------------------------+-------------+ +| | | :config:`ner_conll2003.json ` | 89.9 | ++---------------------------------------------------------+ +--------------------------------------------------------------------------------------------+-------------+ +| DSTC2 | | :config:`ner_dstc2.json ` | 97.1 | ++---------------------------------------------------------+-------+--------------------------------------------------------------------------------------------+-------------+ Slot filling models :doc:`[docs] ` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -67,65 +67,65 @@ BiLSTM with self-attention and other models are presented. The model also allows Several pre-trained models are available and presented in Table below. -+------------------+---------------------+------+----------------------------------------------------------------------------------------------------------+-------------+------------------+-----------------+-----------+ -| Task | Dataset | Lang | Model | Metric | Valid | Test | Downloads | -+==================+=====================+======+==========================================================================================================+=============+==================+=================+===========+ -| 28 intents | `DSTC 2`_ | En | :config:`DSTC 2 emb ` | Accuracy | 0.7613 | 0.7733 | 800 Mb | -+ + + +----------------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ -| | | | :config:`Wiki emb ` | | 0.9629 | 0.9617 | 8.5 Gb | -+ + + +----------------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ -| | | | :config:`BERT ` | | 0.9673 | 0.9636 | 800 Mb | -+------------------+---------------------+ +----------------------------------------------------------------------------------------------------------+-------------+------------------+-----------------+-----------+ -| 7 intents | `SNIPS-2017`_ [1]_ | | :config:`DSTC 2 emb ` | F1-macro | 0.8591 | -- | 800 Mb | -+ + + +----------------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ -| | | | :config:`Wiki emb ` | | 0.9820 | -- | 8.5 Gb | -+ + + +----------------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ -| | | | :config:`Tfidf + SelectKBest + PCA + Wiki emb ` | | 0.9673 | -- | 8.6 Gb | -+ + + +----------------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ -| | | | :config:`Wiki emb weighted by Tfidf ` | | 0.9786 | -- | 8.5 Gb | -+------------------+---------------------+ +----------------------------------------------------------------------------------------------------------+-------------+------------------+-----------------+-----------+ -| Insult detection | `Insults`_ | | :config:`Reddit emb ` | ROC-AUC | 0.9263 | 0.8556 | 6.2 Gb | -+ + + +----------------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ -| | | | :config:`English BERT ` | | 0.9255 | 0.8612 | 1200 Mb | -+ + + +----------------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ -| | | | :config:`English Conversational BERT ` | | 0.9389 | 0.8941 | 1200 Mb | -+ + + +----------------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ -| | | | :config:`English BERT on PyTorch ` | | 0.9329 | 0.877 | 1.1 Gb | -+------------------+---------------------+ +----------------------------------------------------------------------------------------------------------+-------------+------------------+-----------------+-----------+ -| 5 topics | `AG News`_ | | :config:`Wiki emb ` | Accuracy | 0.8922 | 0.9059 | 8.5 Gb | -+------------------+---------------------+ +----------------------------------------------------------------------------------------------------------+-------------+------------------+-----------------+-----------+ -| Intent | `Yahoo-L31`_ | | :config:`Yahoo-L31 on conversational BERT ` | ROC-AUC | 0.9436 | -- | 1200 Mb | -+------------------+---------------------+ +----------------------------------------------------------------------------------------------------------+-------------+------------------+-----------------+-----------+ -| Sentiment | `SST`_ | | :config:`5-classes SST on conversational BERT ` | Accuracy | 0.6456 | 0.6715 | 400 Mb | -+ + + +----------------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ -| | | | :config:`5-classes SST on multilingual BERT ` | | 0.5738 | 0.6024 | 660 Mb | -+ + + +----------------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ -| | | | :config:`3-classes SST SWCNN on PyTorch ` | | 0.7379 | 0.6312 | 4.3 Mb | -+ +---------------------+ +----------------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ -| | `Yelp`_ | | :config:`5-classes Yelp on conversational BERT ` | | 0.6925 | 0.6842 | 400 Mb | -+ + + +----------------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ -| | | | :config:`5-classes Yelp on multilingual BERT ` | | 0.5896 | 0.5874 | 660 Mb | -+------------------+---------------------+------+----------------------------------------------------------------------------------------------------------+-------------+------------------+-----------------+-----------+ -| Sentiment | `Twitter mokoron`_ | Ru | :config:`RuWiki+Lenta emb w/o preprocessing ` | | 0.9965 | 0.9961 | 6.2 Gb | -+ + + +----------------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ -| | | | :config:`RuWiki+Lenta emb with preprocessing ` | | 0.7823 | 0.7759 | 6.2 Gb | -+ +---------------------+ +----------------------------------------------------------------------------------------------------------+-------------+------------------+-----------------+-----------+ -| | `RuSentiment`_ | | :config:`RuWiki+Lenta emb ` | F1-weighted | 0.6541 | 0.7016 | 6.2 Gb | -+ + + +----------------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ -| | | | :config:`Twitter emb super-convergence ` [2]_ | | 0.7301 | 0.7576 | 3.4 Gb | -+ + + +----------------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ -| | | | :config:`ELMo ` | | 0.7519 | 0.7875 | 700 Mb | -+ + + +----------------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ -| | | | :config:`Multi-language BERT ` | | 0.6809 | 0.7193 | 1900 Mb | -+ + + +----------------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ -| | | | :config:`Conversational RuBERT ` | | 0.7548 | 0.7742 | 657 Mb | -+ + + +----------------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ -| | | | :config:`Conversational DistilRuBERT-tiny ` | | 0.703 ± 0.0031 | 0.7348 ± 0.0028 | 690 Mb | -+ + + +----------------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ -| | | | :config:`Conversational DistilRuBERT-base ` | | 0.7376 ± 0.0045 | 0.7645 ± 0.035 | 1.0 Gb | -+------------------+---------------------+ +----------------------------------------------------------------------------------------------------------+-------------+------------------+-----------------+-----------+ -| Intent | Ru like`Yahoo-L31`_ | | :config:`Conversational vs Informational on ELMo ` | ROC-AUC | 0.9412 | -- | 700 Mb | -+------------------+---------------------+------+----------------------------------------------------------------------------------------------------------+-------------+------------------+-----------------+-----------+ ++------------------+---------------------+------+----------------------------------------------------------------------------------------------------+-------------+------------------+-----------------+-----------+ +| Task | Dataset | Lang | Model | Metric | Valid | Test | Downloads | ++==================+=====================+======+====================================================================================================+=============+==================+=================+===========+ +| 28 intents | `DSTC 2`_ | En | :config:`DSTC 2 emb ` | Accuracy | 0.7613 | 0.7733 | 800 Mb | ++ + + +----------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ +| | | | :config:`Wiki emb ` | | 0.9629 | 0.9617 | 8.5 Gb | ++ + + +----------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ +| | | | :config:`BERT ` | | 0.9673 | 0.9636 | 800 Mb | ++------------------+---------------------+ +----------------------------------------------------------------------------------------------------+-------------+------------------+-----------------+-----------+ +| 7 intents | `SNIPS-2017`_ [1]_ | | :config:`DSTC 2 emb ` | F1-macro | 0.8591 | -- | 800 Mb | ++ + + +----------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ +| | | | :config:`Wiki emb ` | | 0.9820 | -- | 8.5 Gb | ++ + + +----------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ +| | | | :config:`Tfidf + SelectKBest + PCA + Wiki emb ` | | 0.9673 | -- | 8.6 Gb | ++ + + +----------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ +| | | | :config:`Wiki emb weighted by Tfidf ` | | 0.9786 | -- | 8.5 Gb | ++------------------+---------------------+ +----------------------------------------------------------------------------------------------------+-------------+------------------+-----------------+-----------+ +| Insult detection | `Insults`_ | | :config:`Reddit emb ` | ROC-AUC | 0.9263 | 0.8556 | 6.2 Gb | ++ + + +----------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ +| | | | :config:`English BERT ` | | 0.9255 | 0.8612 | 1200 Mb | ++ + + +----------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ +| | | | :config:`English Conversational BERT ` | | 0.9389 | 0.8941 | 1200 Mb | ++ + + +----------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ +| | | | :config:`English BERT on PyTorch ` | | 0.9329 | 0.877 | 1.1 Gb | ++------------------+---------------------+ +----------------------------------------------------------------------------------------------------+-------------+------------------+-----------------+-----------+ +| 5 topics | `AG News`_ | | :config:`Wiki emb ` | Accuracy | 0.8922 | 0.9059 | 8.5 Gb | ++------------------+---------------------+ +----------------------------------------------------------------------------------------------------+-------------+------------------+-----------------+-----------+ +| Intent | `Yahoo-L31`_ | | :config:`Yahoo-L31 on conversational BERT ` | ROC-AUC | 0.9436 | -- | 1200 Mb | ++------------------+---------------------+ +----------------------------------------------------------------------------------------------------+-------------+------------------+-----------------+-----------+ +| Sentiment | `SST`_ | | :config:`5-classes SST on conversational BERT ` | Accuracy | 0.6456 | 0.6715 | 400 Mb | ++ + + +----------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ +| | | | :config:`5-classes SST on multilingual BERT ` | | 0.5738 | 0.6024 | 660 Mb | ++ + + +----------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ +| | | | :config:`3-classes SST SWCNN on PyTorch ` | | 0.7379 | 0.6312 | 4.3 Mb | ++ +---------------------+ +----------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ +| | `Yelp`_ | | :config:`5-classes Yelp on conversational BERT ` | | 0.6925 | 0.6842 | 400 Mb | ++ + + +----------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ +| | | | :config:`5-classes Yelp on multilingual BERT ` | | 0.5896 | 0.5874 | 660 Mb | ++------------------+---------------------+------+----------------------------------------------------------------------------------------------------+-------------+------------------+-----------------+-----------+ +| Sentiment | `Twitter mokoron`_ | Ru | :config:`RuWiki+Lenta emb w/o preprocessing ` | | 0.9965 | 0.9961 | 6.2 Gb | ++ + + +----------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ +| | | | :config:`RuWiki+Lenta emb with preprocessing ` | | 0.7823 | 0.7759 | 6.2 Gb | ++ +---------------------+ +----------------------------------------------------------------------------------------------------+-------------+------------------+-----------------+-----------+ +| | `RuSentiment`_ | | :config:`RuWiki+Lenta emb ` | F1-weighted | 0.6541 | 0.7016 | 6.2 Gb | ++ + + +----------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ +| | | | :config:`Twitter emb super-convergence ` [2]_ | | 0.7301 | 0.7576 | 3.4 Gb | ++ + + +----------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ +| | | | :config:`ELMo ` | | 0.7519 | 0.7875 | 700 Mb | ++ + + +----------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ +| | | | :config:`Multi-language BERT ` | | 0.6809 | 0.7193 | 1900 Mb | ++ + + +----------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ +| | | | :config:`Conversational RuBERT ` | | 0.7548 | 0.7742 | 657 Mb | ++ + + +----------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ +| | | | :config:`Conversational DistilRuBERT-tiny ` | | 0.703 ± 0.0031 | 0.7348 ± 0.0028 | 690 Mb | ++ + + +----------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ +| | | | :config:`Conversational DistilRuBERT-base ` | | 0.7376 ± 0.0045 | 0.7645 ± 0.035 | 1.0 Gb | ++------------------+---------------------+ +----------------------------------------------------------------------------------------------------+-------------+------------------+-----------------+-----------+ +| Intent | Ru like`Yahoo-L31`_ | | :config:`Conversational vs Informational on ELMo ` | ROC-AUC | 0.9412 | -- | 700 Mb | ++------------------+---------------------+------+----------------------------------------------------------------------------------------------------+-------------+------------------+-----------------+-----------+ .. [1] Coucke A. et al. Snips voice platform: an embedded spoken language understanding system for private-by-design voice interfaces //arXiv preprint arXiv:1805.10190. – 2018. .. [2] Smith L. N., Topin N. Super-convergence: Very fast training of residual networks using large learning rates. – 2018. @@ -331,29 +331,29 @@ BERT-based model is described in `BERT: Pre-training of Deep Bidirectional Tran R-Net model is based on `R-NET: Machine Reading Comprehension with Self-matching Networks `__. -+---------------+---------------------------------------------------------------------------------------------------+-------+----------------+-----------------+-----------------+ -| Dataset | Model config | lang | EM (dev) | F-1 (dev) | Downloads | -+===============+===================================================================================================+=======+================+=================+=================+ -| `SQuAD-v1.1`_ | :config:`DeepPavlov BERT ` | en | 80.88 | 88.49 | 806Mb | -+---------------+---------------------------------------------------------------------------------------------------+-------+----------------+-----------------+-----------------+ -| `SQuAD-v1.1`_ | :config:`DeepPavlov BERT on PyTorch ` | en | 80.79 | 88.30 | 1.1 Gb | -+---------------+---------------------------------------------------------------------------------------------------+-------+----------------+-----------------+-----------------+ -| `SQuAD-v1.1`_ | :config:`DeepPavlov R-Net ` | en | 71.49 | 80.34 | ~2.5Gb | -+---------------+---------------------------------------------------------------------------------------------------+-------+----------------+-----------------+-----------------+ -| SDSJ Task B | :config:`DeepPavlov RuBERT ` | ru | 66.30 ± 0.24 | 84.60 ± 0.11 | 1325Mb | -+---------------+---------------------------------------------------------------------------------------------------+-------+----------------+-----------------+-----------------+ -| SDSJ Task B | :config:`DeepPavlov multilingual BERT ` | ru | 64.35 ± 0.39 | 83.39 ± 0.08 | 1323Mb | -+---------------+---------------------------------------------------------------------------------------------------+-------+----------------+-----------------+-----------------+ -| SDSJ Task B | :config:`DeepPavlov R-Net ` | ru | 60.62 | 80.04 | ~5Gb | -+---------------+---------------------------------------------------------------------------------------------------+-------+----------------+-----------------+-----------------+ -| SDSJ Task B | :config:`DeepPavlov DistilRuBERT-tiny ` | ru | 44.2 ± 0.46 | 65.1 ± 0.36 | 867Mb | -+---------------+---------------------------------------------------------------------------------------------------+-------+----------------+-----------------+-----------------+ -| SDSJ Task B | :config:`DeepPavlov DistilRuBERT-base ` | ru | 61.23 ± 0.42 | 80.36 ± 0.28 | 1.18Gb | -+---------------+---------------------------------------------------------------------------------------------------+-------+----------------+-----------------+-----------------+ -| `DRCD`_ | :config:`DeepPavlov multilingual BERT ` | ch | 84.86 | 89.03 | 630Mb | -+---------------+---------------------------------------------------------------------------------------------------+-------+----------------+-----------------+-----------------+ -| `DRCD`_ | :config:`DeepPavlov Chinese BERT ` | ch | 84.19 | 89.23 | 362Mb | -+---------------+---------------------------------------------------------------------------------------------------+-------+----------------+-----------------+-----------------+ ++----------------+---------------------------------------------------------------------------------------------+-------+----------------+-----------------+-----------------+ +| Dataset | Model config | lang | EM (dev) | F-1 (dev) | Downloads | ++================+=============================================================================================+=======+================+=================+=================+ +| `SQuAD-v1.1`_ | :config:`DeepPavlov BERT ` | en | 80.88 | 88.49 | 806Mb | ++----------------+---------------------------------------------------------------------------------------------+-------+----------------+-----------------+-----------------+ +| `SQuAD-v1.1`_ | :config:`DeepPavlov BERT on PyTorch ` | en | 80.79 | 88.30 | 1.1 Gb | ++----------------+---------------------------------------------------------------------------------------------+-------+----------------+-----------------+-----------------+ +| `SQuAD-v1.1`_ | :config:`DeepPavlov R-Net ` | en | 71.49 | 80.34 | ~2.5Gb | ++----------------+---------------------------------------------------------------------------------------------+-------+----------------+-----------------+-----------------+ +| `SDSJ Task B`_ | :config:`DeepPavlov RuBERT ` | ru | 66.30 ± 0.24 | 84.60 ± 0.11 | 1325Mb | ++----------------+---------------------------------------------------------------------------------------------+-------+----------------+-----------------+-----------------+ +| `SDSJ Task B`_ | :config:`DeepPavlov multilingual BERT ` | ru | 64.35 ± 0.39 | 83.39 ± 0.08 | 1323Mb | ++----------------+---------------------------------------------------------------------------------------------+-------+----------------+-----------------+-----------------+ +| `SDSJ Task B`_ | :config:`DeepPavlov R-Net ` | ru | 60.62 | 80.04 | ~5Gb | ++----------------+---------------------------------------------------------------------------------------------+-------+----------------+-----------------+-----------------+ +| `SDSJ Task B`_ | :config:`DeepPavlov DistilRuBERT-tiny ` | ru | 44.2 ± 0.46 | 65.1 ± 0.36 | 867Mb | ++----------------+---------------------------------------------------------------------------------------------+-------+----------------+-----------------+-----------------+ +| `SDSJ Task B`_ | :config:`DeepPavlov DistilRuBERT-base ` | ru | 61.23 ± 0.42 | 80.36 ± 0.28 | 1.18Gb | ++----------------+---------------------------------------------------------------------------------------------+-------+----------------+-----------------+-----------------+ +| `DRCD`_ | :config:`DeepPavlov multilingual BERT ` | ch | 84.86 | 89.03 | 630Mb | ++----------------+---------------------------------------------------------------------------------------------+-------+----------------+-----------------+-----------------+ +| `DRCD`_ | :config:`DeepPavlov Chinese BERT ` | ch | 84.19 | 89.23 | 362Mb | ++----------------+---------------------------------------------------------------------------------------------+-------+----------------+-----------------+-----------------+ In the case when answer is not necessary present in given context we have :config:`squad_noans ` model. This model outputs empty string in case if there is no answer in context. @@ -570,5 +570,5 @@ goal-oriented bot and a slot-filling model with Telegram UI. .. _`SQuAD-v1.1`: https://arxiv.org/abs/1606.05250 -.. _`SDSJ Task B`: https://sdsj.sberbank.ai/2017/ru/contest.html +.. _`SDSJ Task B`: https://arxiv.org/abs/1912.09723 .. _`DRCD`: https://arxiv.org/abs/1806.00920 diff --git a/tests/test_quick_start.py b/tests/test_quick_start.py index 7b0dea2d9a..74ae65dedc 100644 --- a/tests/test_quick_start.py +++ b/tests/test_quick_start.py @@ -146,7 +146,15 @@ }, "distil": { ("classifiers/paraphraser_convers_distilrubert_2L.json", "distil", ('IP')): [TWO_ARGUMENTS_INFER_CHECK], - ("classifiers/paraphraser_convers_distilrubert_6L.json", "distil", ('IP')): [TWO_ARGUMENTS_INFER_CHECK] + ("classifiers/paraphraser_convers_distilrubert_6L.json", "distil", ('IP')): [TWO_ARGUMENTS_INFER_CHECK], + ("classifiers/rusentiment_convers_distilrubert_2L.json", "distil", ('IP')): [ONE_ARGUMENT_INFER_CHECK], + ("classifiers/rusentiment_convers_distilrubert_6L.json", "distil", ('IP')): [ONE_ARGUMENT_INFER_CHECK], + ("ner/ner_rus_convers_distilrubert_2L.json", "distil", ('IP')): [ONE_ARGUMENT_INFER_CHECK], + ("ner/ner_rus_convers_distilrubert_6L.json", "distil", ('IP')): [ONE_ARGUMENT_INFER_CHECK], + ("squad/squad_ru_convers_distilrubert_2L.json", "distil", ('IP')): [TWO_ARGUMENTS_INFER_CHECK], + ("squad/squad_ru_convers_distilrubert_2L_infer.json", "distil", ('IP')): [TWO_ARGUMENTS_INFER_CHECK], + ("squad/squad_ru_convers_distilrubert_6L.json", "distil", ('IP')): [TWO_ARGUMENTS_INFER_CHECK], + ("squad/squad_ru_convers_distilrubert_6L_infer.json", "distil", ('IP')): [TWO_ARGUMENTS_INFER_CHECK], }, "entity_linking": { ("kbqa/entity_linking_rus.json", "entity_linking", ('IP',)): From f390f1b8f3828bfde7465ab25fd9db2e81cbfae4 Mon Sep 17 00:00:00 2001 From: Fedor Ignatov Date: Fri, 30 Jul 2021 20:10:10 +0300 Subject: [PATCH 13/17] refactor: datasets for distil --- .../paraphraser_convers_distilrubert_2L.json | 12 ++++++++++-- .../paraphraser_convers_distilrubert_6L.json | 12 ++++++++++-- .../rusentiment_convers_distilrubert_2L.json | 8 -------- .../rusentiment_convers_distilrubert_6L.json | 8 -------- 4 files changed, 20 insertions(+), 20 deletions(-) diff --git a/deeppavlov/configs/classifiers/paraphraser_convers_distilrubert_2L.json b/deeppavlov/configs/classifiers/paraphraser_convers_distilrubert_2L.json index 5dcc8460b5..d0a2eee508 100644 --- a/deeppavlov/configs/classifiers/paraphraser_convers_distilrubert_2L.json +++ b/deeppavlov/configs/classifiers/paraphraser_convers_distilrubert_2L.json @@ -76,10 +76,18 @@ "MODEL_PATH": "{MODELS_PATH}/paraphraser_convers_distilrubert_2L" }, "download": [ - { + { "url": "http://files.deeppavlov.ai/deeppavlov_data/classifiers/paraphraser_convers_distilrubert_2L.tar.gz", "subdir": "{MODELS_PATH}" - } + }, + { + "url": "http://files.deeppavlov.ai/datasets/paraphraser.zip", + "subdir": "{DOWNLOADS_PATH}/paraphraser_data" + }, + { + "url": "http://files.deeppavlov.ai/datasets/paraphraser_gold.zip", + "subdir": "{DOWNLOADS_PATH}/paraphraser_data" + } ] } } diff --git a/deeppavlov/configs/classifiers/paraphraser_convers_distilrubert_6L.json b/deeppavlov/configs/classifiers/paraphraser_convers_distilrubert_6L.json index f8a9e96921..c3f479ca07 100644 --- a/deeppavlov/configs/classifiers/paraphraser_convers_distilrubert_6L.json +++ b/deeppavlov/configs/classifiers/paraphraser_convers_distilrubert_6L.json @@ -76,10 +76,18 @@ "MODEL_PATH": "{MODELS_PATH}/paraphraser_convers_distilrubert_6L" }, "download": [ - { + { "url": "http://files.deeppavlov.ai/deeppavlov_data/classifiers/paraphraser_convers_distilrubert_6L.tar.gz", "subdir": "{MODELS_PATH}" - } + }, + { + "url": "http://files.deeppavlov.ai/datasets/paraphraser.zip", + "subdir": "{DOWNLOADS_PATH}/paraphraser_data" + }, + { + "url": "http://files.deeppavlov.ai/datasets/paraphraser_gold.zip", + "subdir": "{DOWNLOADS_PATH}/paraphraser_data" + } ] } } diff --git a/deeppavlov/configs/classifiers/rusentiment_convers_distilrubert_2L.json b/deeppavlov/configs/classifiers/rusentiment_convers_distilrubert_2L.json index ff6c6cdabc..8042987116 100644 --- a/deeppavlov/configs/classifiers/rusentiment_convers_distilrubert_2L.json +++ b/deeppavlov/configs/classifiers/rusentiment_convers_distilrubert_2L.json @@ -136,14 +136,6 @@ "MODEL_PATH": "{MODELS_PATH}/classifiers/rusentiment_convers_distilrubert_2L" }, "download": [ - { - "url": "https://raw.githubusercontent.com/strawberrypie/rusentiment/master/Dataset/rusentiment_random_posts.csv", - "subdir": "{DOWNLOADS_PATH}/rusentiment" - }, - { - "url": "https://raw.githubusercontent.com/strawberrypie/rusentiment/master/Dataset/rusentiment_test.csv", - "subdir": "{DOWNLOADS_PATH}/rusentiment" - }, { "url": "http://files.deeppavlov.ai/deeppavlov_data/classifiers/rusentiment_convers_distilrubert_2L.tar.gz", "subdir": "{MODELS_PATH}/classifiers/" diff --git a/deeppavlov/configs/classifiers/rusentiment_convers_distilrubert_6L.json b/deeppavlov/configs/classifiers/rusentiment_convers_distilrubert_6L.json index b211ebed0c..c02f44938a 100644 --- a/deeppavlov/configs/classifiers/rusentiment_convers_distilrubert_6L.json +++ b/deeppavlov/configs/classifiers/rusentiment_convers_distilrubert_6L.json @@ -136,14 +136,6 @@ "MODEL_PATH": "{MODELS_PATH}/classifiers/rusentiment_convers_distilrubert_6L" }, "download": [ - { - "url": "https://raw.githubusercontent.com/strawberrypie/rusentiment/master/Dataset/rusentiment_random_posts.csv", - "subdir": "{DOWNLOADS_PATH}/rusentiment" - }, - { - "url": "https://raw.githubusercontent.com/strawberrypie/rusentiment/master/Dataset/rusentiment_test.csv", - "subdir": "{DOWNLOADS_PATH}/rusentiment" - }, { "url": "http://files.deeppavlov.ai/deeppavlov_data/classifiers/rusentiment_convers_distilrubert_6L.tar.gz", "subdir": "{MODELS_PATH}/classifiers/" From f99ce48e5c28041aa9c068210fbd1a6b5fd7aaf0 Mon Sep 17 00:00:00 2001 From: ayeffkay Date: Fri, 24 Sep 2021 18:51:43 +0300 Subject: [PATCH 14/17] Distilrubert-tiny configs update --- .../paraphraser_convers_distilrubert_2L.json | 6 ++-- .../rusentiment_convers_distilrubert_2L.json | 6 ++-- .../ner/ner_rus_convers_distilrubert_2L.json | 4 +-- .../squad_ru_convers_distilrubert_2L.json | 4 +-- docs/features/overview.rst | 32 +++++++++---------- 5 files changed, 26 insertions(+), 26 deletions(-) diff --git a/deeppavlov/configs/classifiers/paraphraser_convers_distilrubert_2L.json b/deeppavlov/configs/classifiers/paraphraser_convers_distilrubert_2L.json index d0a2eee508..f35ebc134a 100644 --- a/deeppavlov/configs/classifiers/paraphraser_convers_distilrubert_2L.json +++ b/deeppavlov/configs/classifiers/paraphraser_convers_distilrubert_2L.json @@ -28,11 +28,11 @@ "pretrained_bert": "{TRANSFORMER}", "save_path": "{MODEL_PATH}/model", "load_path": "{MODEL_PATH}/model", - "attention_probs_keep_prob": 0.11, - "hidden_keep_prob": 1.0, + "attention_probs_keep_prob": 0.67, + "hidden_keep_prob": 0.0, "optimizer": "AdamW", "optimizer_parameters": { - "lr": 1.89e-05 + "lr": 9e-05 }, "learning_rate_drop_patience": 3, "learning_rate_drop_div": 1.5, diff --git a/deeppavlov/configs/classifiers/rusentiment_convers_distilrubert_2L.json b/deeppavlov/configs/classifiers/rusentiment_convers_distilrubert_2L.json index 8042987116..42d0c72fc4 100644 --- a/deeppavlov/configs/classifiers/rusentiment_convers_distilrubert_2L.json +++ b/deeppavlov/configs/classifiers/rusentiment_convers_distilrubert_2L.json @@ -66,11 +66,11 @@ "pretrained_bert": "{TRANSFORMER}", "save_path": "{MODEL_PATH}/model", "load_path": "{MODEL_PATH}/model", - "attention_probs_keep_prob": 0.78, - "hidden_keep_prob": 0.89, + "attention_probs_keep_prob": 0.33, + "hidden_keep_prob": 0.67, "optimizer": "AdamW", "optimizer_parameters": { - "lr": 7.22e-05 + "lr": 3.67e-05 }, "learning_rate_drop_patience": 5, "learning_rate_drop_div": 1.5, diff --git a/deeppavlov/configs/ner/ner_rus_convers_distilrubert_2L.json b/deeppavlov/configs/ner/ner_rus_convers_distilrubert_2L.json index 15c931c1eb..6123c18138 100644 --- a/deeppavlov/configs/ner/ner_rus_convers_distilrubert_2L.json +++ b/deeppavlov/configs/ner/ner_rus_convers_distilrubert_2L.json @@ -58,14 +58,14 @@ "n_tags": "#tag_vocab.len", "pretrained_bert": "{TRANSFORMER}", "attention_probs_keep_prob": 0.11, - "hidden_keep_prob": 0.11, + "hidden_keep_prob": 0.67, "return_probas": false, "encoder_layer_ids": [ -1 ], "optimizer": "AdamW", "optimizer_parameters": { - "lr": 5.45e-05, + "lr": 8.11e-05, "weight_decay": 1e-06, "betas": [ 0.9, diff --git a/deeppavlov/configs/squad/squad_ru_convers_distilrubert_2L.json b/deeppavlov/configs/squad/squad_ru_convers_distilrubert_2L.json index f278ad9627..830ded55f6 100644 --- a/deeppavlov/configs/squad/squad_ru_convers_distilrubert_2L.json +++ b/deeppavlov/configs/squad/squad_ru_convers_distilrubert_2L.json @@ -68,8 +68,8 @@ "pretrained_bert": "{TRANSFORMER}", "save_path": "{MODEL_PATH}/model", "load_path": "{MODEL_PATH}/model", - "attention_probs_keep_prob": 0.11, - "hidden_keep_prob": 0.33, + "attention_probs_keep_prob": 0.0, + "hidden_keep_prob": 0.11, "optimizer": "AdamW", "optimizer_parameters": { "lr": 9e-05 diff --git a/docs/features/overview.rst b/docs/features/overview.rst index 7a515a40cb..724e7c6fd7 100644 --- a/docs/features/overview.rst +++ b/docs/features/overview.rst @@ -27,7 +27,7 @@ which is inspired by Bi-LSTM+CRF architecture from https://arxiv.org/pdf/1603.01 + + +--------------------------------------------------------------------------------------------+-------------+ | (Collection 3) | | :config:`ner_rus.json ` | 95.1 | + + +--------------------------------------------------------------------------------------------+-------------+ -| | | :config:`ner_rus_convers_distilrubert_2L.json ` | 88.4 ± 0.5 | +| | | :config:`ner_rus_convers_distilrubert_2L.json ` | 94.2 ± 0.2 | + + +--------------------------------------------------------------------------------------------+-------------+ | | | :config:`ner_rus_convers_distilrubert_6L.json ` | 93.3 ± 0.3 | +---------------------------------------------------------+-------+--------------------------------------------------------------------------------------------+-------------+ @@ -120,7 +120,7 @@ Several pre-trained models are available and presented in Table below. + + + +----------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ | | | | :config:`Conversational RuBERT ` | | 0.7548 | 0.7742 | 657 Mb | + + + +----------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ -| | | | :config:`Conversational DistilRuBERT-tiny ` | | 0.703 ± 0.0031 | 0.7348 ± 0.0028 | 690 Mb | +| | | | :config:`Conversational DistilRuBERT-tiny ` | | 0.72 ± 0.0016 | 0.7458 ± 0.0098 | 690 Mb | + + + +----------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ | | | | :config:`Conversational DistilRuBERT-base ` | | 0.7376 ± 0.0045 | 0.7645 ± 0.035 | 1.0 Gb | +------------------+---------------------+ +----------------------------------------------------------------------------------------------------+-------------+------------------+-----------------+-----------+ @@ -255,19 +255,19 @@ Available pre-trained models for paraphrase identification: .. table:: :widths: auto - +------------------------+------------------------------------------------------------------------------------------------------+----------------+-----------------+------------+------------+----------------+-----------------+-----------+ - | Dataset | Model config | Val (accuracy) | Test (accuracy) | Val (F1) | Test (F1) | Val (log_loss) | Test (log_loss) | Downloads | - +========================+======================================================================================================+================+=================+============+============+================+=================+===========+ - | `paraphraser.ru`_ | :config:`paraphrase_ident_paraphraser_ft ` | 83.8 | 75.4 | 87.9 | 80.9 | 0.468 | 0.616 | 5938M | - +------------------------+------------------------------------------------------------------------------------------------------+----------------+-----------------+------------+------------+----------------+-----------------+-----------+ - | `paraphraser.ru`_ | :config:`paraphrase_bert_multilingual ` | 87.4 | 79.3 | 90.2 | 83.4 | -- | -- | 1330M | - +------------------------+------------------------------------------------------------------------------------------------------+----------------+-----------------+------------+------------+----------------+-----------------+-----------+ - | `paraphraser.ru`_ | :config:`paraphrase_rubert ` | 90.2 | 84.9 | 92.3 | 87.9 | -- | -- | 1325M | - +------------------------+------------------------------------------------------------------------------------------------------+----------------+-----------------+------------+------------+----------------+-----------------+-----------+ - | `paraphraser.ru`_ | :config:`paraphraser_convers_distilrubert_2L ` | 76.1 ± 0.2 | 64.5 ± 0.5 | 81.8 ± 0.2 | 73.9 ± 0.8 | -- | -- | 618M | - +------------------------+------------------------------------------------------------------------------------------------------+----------------+-----------------+------------+------------+----------------+-----------------+-----------+ - | `paraphraser.ru`_ | :config:`paraphraser_convers_distilrubert_6L ` | 86.5 ± 0.5 | 78.9 ± 0.4 | 89.6 ± 0.3 | 83.2 ± 0.5 | -- | -- | 930M | - +------------------------+------------------------------------------------------------------------------------------------------+----------------+-----------------+------------+------------+----------------+-----------------+-----------+ + +------------------------+------------------------------------------------------------------------------------------------------+----------------+-----------------+--------------+--------------+----------------+-----------------+-----------+ + | Dataset | Model config | Val (accuracy) | Test (accuracy) | Val (F1) | Test (F1) | Val (log_loss) | Test (log_loss) | Downloads | + +========================+======================================================================================================+================+=================+==============+==============+================+=================+===========+ + | `paraphraser.ru`_ | :config:`paraphrase_ident_paraphraser_ft ` | 83.8 | 75.4 | 87.9 | 80.9 | 0.468 | 0.616 | 5938M | + +------------------------+------------------------------------------------------------------------------------------------------+----------------+-----------------+--------------+--------------+----------------+-----------------+-----------+ + | `paraphraser.ru`_ | :config:`paraphrase_bert_multilingual ` | 87.4 | 79.3 | 90.2 | 83.4 | -- | -- | 1330M | + +------------------------+------------------------------------------------------------------------------------------------------+----------------+-----------------+--------------+--------------+----------------+-----------------+-----------+ + | `paraphraser.ru`_ | :config:`paraphrase_rubert ` | 90.2 | 84.9 | 92.3 | 87.9 | -- | -- | 1325M | + +------------------------+------------------------------------------------------------------------------------------------------+----------------+-----------------+--------------+--------------+----------------+-----------------+-----------+ + | `paraphraser.ru`_ | :config:`paraphraser_convers_distilrubert_2L ` | 79.4 ± 0.01 | 67.5 ± 0.006 | 84.4 ± 0.04 | 76.2 ± 0.006 | -- | -- | 618M | + +------------------------+------------------------------------------------------------------------------------------------------+----------------+-----------------+--------------+--------------+----------------+-----------------+-----------+ + | `paraphraser.ru`_ | :config:`paraphraser_convers_distilrubert_6L ` | 86.5 ± 0.5 | 78.9 ± 0.4 | 89.6 ± 0.3 | 83.2 ± 0.5 | -- | -- | 930M | + +------------------------+------------------------------------------------------------------------------------------------------+----------------+-----------------+--------------+--------------+----------------+-----------------+-----------+ .. _`paraphraser.ru`: https://paraphraser.ru/ @@ -346,7 +346,7 @@ R-Net model is based on `R-NET: Machine Reading Comprehension with Self-matching +----------------+---------------------------------------------------------------------------------------------+-------+----------------+-----------------+-----------------+ | `SDSJ Task B`_ | :config:`DeepPavlov R-Net ` | ru | 60.62 | 80.04 | ~5Gb | +----------------+---------------------------------------------------------------------------------------------+-------+----------------+-----------------+-----------------+ -| `SDSJ Task B`_ | :config:`DeepPavlov DistilRuBERT-tiny ` | ru | 44.2 ± 0.46 | 65.1 ± 0.36 | 867Mb | +| `SDSJ Task B`_ | :config:`DeepPavlov DistilRuBERT-tiny ` | ru | 48.3 ± 0.41 | 68.9 ± 0.39 | 867Mb | +----------------+---------------------------------------------------------------------------------------------+-------+----------------+-----------------+-----------------+ | `SDSJ Task B`_ | :config:`DeepPavlov DistilRuBERT-base ` | ru | 61.23 ± 0.42 | 80.36 ± 0.28 | 1.18Gb | +----------------+---------------------------------------------------------------------------------------------+-------+----------------+-----------------+-----------------+ From 3da37c461a7f9dae8571e08e1d93214a1a63fd2d Mon Sep 17 00:00:00 2001 From: ayeffkay Date: Wed, 3 Nov 2021 10:54:29 +0300 Subject: [PATCH 15/17] Distilrubert_base: configs updated --- .../classifiers/paraphraser_convers_distilrubert_6L.json | 6 +++--- .../classifiers/rusentiment_convers_distilrubert_6L.json | 4 ++-- deeppavlov/configs/ner/ner_rus_convers_distilrubert_6L.json | 4 ++-- .../configs/squad/squad_ru_convers_distilrubert_6L.json | 4 ++-- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/deeppavlov/configs/classifiers/paraphraser_convers_distilrubert_6L.json b/deeppavlov/configs/classifiers/paraphraser_convers_distilrubert_6L.json index c3f479ca07..02060d97ea 100644 --- a/deeppavlov/configs/classifiers/paraphraser_convers_distilrubert_6L.json +++ b/deeppavlov/configs/classifiers/paraphraser_convers_distilrubert_6L.json @@ -28,11 +28,11 @@ "pretrained_bert": "{TRANSFORMER}", "save_path": "{MODEL_PATH}/model", "load_path": "{MODEL_PATH}/model", - "attention_probs_keep_prob": 0.0, - "hidden_keep_prob": 0.67, + "attention_probs_keep_prob": 0.89, + "hidden_keep_prob": 0.44, "optimizer": "AdamW", "optimizer_parameters": { - "lr": 7.22e-05 + "lr": 5.46e-05 }, "learning_rate_drop_patience": 3, "learning_rate_drop_div": 1.5, diff --git a/deeppavlov/configs/classifiers/rusentiment_convers_distilrubert_6L.json b/deeppavlov/configs/classifiers/rusentiment_convers_distilrubert_6L.json index c02f44938a..f81488dbbb 100644 --- a/deeppavlov/configs/classifiers/rusentiment_convers_distilrubert_6L.json +++ b/deeppavlov/configs/classifiers/rusentiment_convers_distilrubert_6L.json @@ -66,8 +66,8 @@ "pretrained_bert": "{TRANSFORMER}", "save_path": "{MODEL_PATH}/model", "load_path": "{MODEL_PATH}/model", - "attention_probs_keep_prob": 0.78, - "hidden_keep_prob": 0, + "attention_probs_keep_prob": 0.22, + "hidden_keep_prob": 0.22, "optimizer": "AdamW", "optimizer_parameters": { "lr": 4.56e-05 diff --git a/deeppavlov/configs/ner/ner_rus_convers_distilrubert_6L.json b/deeppavlov/configs/ner/ner_rus_convers_distilrubert_6L.json index b2534426a6..b9cf79ab5e 100644 --- a/deeppavlov/configs/ner/ner_rus_convers_distilrubert_6L.json +++ b/deeppavlov/configs/ner/ner_rus_convers_distilrubert_6L.json @@ -57,8 +57,8 @@ "class_name": "torch_transformers_sequence_tagger", "n_tags": "#tag_vocab.len", "pretrained_bert": "{TRANSFORMER}", - "attention_probs_keep_prob": 0.44, - "hidden_keep_prob": 0.89, + "attention_probs_keep_prob": 0.0, + "hidden_keep_prob": 1.0, "return_probas": false, "encoder_layer_ids": [ -1 diff --git a/deeppavlov/configs/squad/squad_ru_convers_distilrubert_6L.json b/deeppavlov/configs/squad/squad_ru_convers_distilrubert_6L.json index 8ca10a28f7..2017ef8d8e 100644 --- a/deeppavlov/configs/squad/squad_ru_convers_distilrubert_6L.json +++ b/deeppavlov/configs/squad/squad_ru_convers_distilrubert_6L.json @@ -69,10 +69,10 @@ "save_path": "{MODEL_PATH}/model", "load_path": "{MODEL_PATH}/model", "attention_probs_keep_prob": 0.0, - "hidden_keep_prob": 0.33, + "hidden_keep_prob": 1.0, "optimizer": "AdamW", "optimizer_parameters": { - "lr": 3.67e-5 + "lr": 2.78e-05 }, "learning_rate_drop_patience": 2, "learning_rate_drop_div": 1.5, From d7618ee9abb06294887ee883c484f3a9974885a8 Mon Sep 17 00:00:00 2001 From: ayeffkay Date: Wed, 3 Nov 2021 11:20:50 +0300 Subject: [PATCH 16/17] Scores updated --- docs/features/overview.rst | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/features/overview.rst b/docs/features/overview.rst index 724e7c6fd7..376b780cca 100644 --- a/docs/features/overview.rst +++ b/docs/features/overview.rst @@ -29,7 +29,7 @@ which is inspired by Bi-LSTM+CRF architecture from https://arxiv.org/pdf/1603.01 + + +--------------------------------------------------------------------------------------------+-------------+ | | | :config:`ner_rus_convers_distilrubert_2L.json ` | 94.2 ± 0.2 | + + +--------------------------------------------------------------------------------------------+-------------+ -| | | :config:`ner_rus_convers_distilrubert_6L.json ` | 93.3 ± 0.3 | +| | | :config:`ner_rus_convers_distilrubert_6L.json ` | 96.4 ± 0.2 | +---------------------------------------------------------+-------+--------------------------------------------------------------------------------------------+-------------+ | Ontonotes | Multi | :config:`ner_ontonotes_bert_mult.json ` | 88.8 | + +-------+--------------------------------------------------------------------------------------------+-------------+ @@ -120,9 +120,9 @@ Several pre-trained models are available and presented in Table below. + + + +----------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ | | | | :config:`Conversational RuBERT ` | | 0.7548 | 0.7742 | 657 Mb | + + + +----------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ -| | | | :config:`Conversational DistilRuBERT-tiny ` | | 0.72 ± 0.0016 | 0.7458 ± 0.0098 | 690 Mb | +| | | | :config:`Conversational DistilRuBERT-tiny ` | | 0.72 ± 0.0016 | 0.74 ± 0.01 | 690 Mb | + + + +----------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ -| | | | :config:`Conversational DistilRuBERT-base ` | | 0.7376 ± 0.0045 | 0.7645 ± 0.035 | 1.0 Gb | +| | | | :config:`Conversational DistilRuBERT-base ` | | 0.73 ± 0.003 | 0.75 ± 0.013 | 1.0 Gb | +------------------+---------------------+ +----------------------------------------------------------------------------------------------------+-------------+------------------+-----------------+-----------+ | Intent | Ru like`Yahoo-L31`_ | | :config:`Conversational vs Informational on ELMo ` | ROC-AUC | 0.9412 | -- | 700 Mb | +------------------+---------------------+------+----------------------------------------------------------------------------------------------------+-------------+------------------+-----------------+-----------+ @@ -266,7 +266,7 @@ Available pre-trained models for paraphrase identification: +------------------------+------------------------------------------------------------------------------------------------------+----------------+-----------------+--------------+--------------+----------------+-----------------+-----------+ | `paraphraser.ru`_ | :config:`paraphraser_convers_distilrubert_2L ` | 79.4 ± 0.01 | 67.5 ± 0.006 | 84.4 ± 0.04 | 76.2 ± 0.006 | -- | -- | 618M | +------------------------+------------------------------------------------------------------------------------------------------+----------------+-----------------+--------------+--------------+----------------+-----------------+-----------+ - | `paraphraser.ru`_ | :config:`paraphraser_convers_distilrubert_6L ` | 86.5 ± 0.5 | 78.9 ± 0.4 | 89.6 ± 0.3 | 83.2 ± 0.5 | -- | -- | 930M | + | `paraphraser.ru`_ | :config:`paraphraser_convers_distilrubert_6L ` | 87.1 ± 0.01 | 78.0 ± 0.01 | 90.0 ± 0.08 | 82.9 ± 0.003 | -- | -- | 930M | +------------------------+------------------------------------------------------------------------------------------------------+----------------+-----------------+--------------+--------------+----------------+-----------------+-----------+ .. _`paraphraser.ru`: https://paraphraser.ru/ @@ -348,7 +348,7 @@ R-Net model is based on `R-NET: Machine Reading Comprehension with Self-matching +----------------+---------------------------------------------------------------------------------------------+-------+----------------+-----------------+-----------------+ | `SDSJ Task B`_ | :config:`DeepPavlov DistilRuBERT-tiny ` | ru | 48.3 ± 0.41 | 68.9 ± 0.39 | 867Mb | +----------------+---------------------------------------------------------------------------------------------+-------+----------------+-----------------+-----------------+ -| `SDSJ Task B`_ | :config:`DeepPavlov DistilRuBERT-base ` | ru | 61.23 ± 0.42 | 80.36 ± 0.28 | 1.18Gb | +| `SDSJ Task B`_ | :config:`DeepPavlov DistilRuBERT-base ` | ru | 61.77 ± 0.25 | 80.39 ± 0.21 | 1.18Gb | +----------------+---------------------------------------------------------------------------------------------+-------+----------------+-----------------+-----------------+ | `DRCD`_ | :config:`DeepPavlov multilingual BERT ` | ch | 84.86 | 89.03 | 630Mb | +----------------+---------------------------------------------------------------------------------------------+-------+----------------+-----------------+-----------------+ From f1118960c0c28052d5143c5a22d6ca4776e670d5 Mon Sep 17 00:00:00 2001 From: ayeffkay Date: Thu, 4 Nov 2021 12:21:42 +0300 Subject: [PATCH 17/17] Hyperparams changed for distil- ner and squad --- deeppavlov/configs/ner/ner_rus_convers_distilrubert_6L.json | 4 ++-- .../configs/squad/squad_ru_convers_distilrubert_6L.json | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/deeppavlov/configs/ner/ner_rus_convers_distilrubert_6L.json b/deeppavlov/configs/ner/ner_rus_convers_distilrubert_6L.json index b9cf79ab5e..f719065d58 100644 --- a/deeppavlov/configs/ner/ner_rus_convers_distilrubert_6L.json +++ b/deeppavlov/configs/ner/ner_rus_convers_distilrubert_6L.json @@ -1,4 +1,4 @@ - { +{ "dataset_reader": { "class_name": "conll2003_reader", "data_path": "{DOWNLOADS_PATH}/total_rus/", @@ -57,7 +57,7 @@ "class_name": "torch_transformers_sequence_tagger", "n_tags": "#tag_vocab.len", "pretrained_bert": "{TRANSFORMER}", - "attention_probs_keep_prob": 0.0, + "attention_probs_keep_prob": 0.56, "hidden_keep_prob": 1.0, "return_probas": false, "encoder_layer_ids": [ diff --git a/deeppavlov/configs/squad/squad_ru_convers_distilrubert_6L.json b/deeppavlov/configs/squad/squad_ru_convers_distilrubert_6L.json index 2017ef8d8e..58e815cc77 100644 --- a/deeppavlov/configs/squad/squad_ru_convers_distilrubert_6L.json +++ b/deeppavlov/configs/squad/squad_ru_convers_distilrubert_6L.json @@ -68,8 +68,8 @@ "pretrained_bert": "{TRANSFORMER}", "save_path": "{MODEL_PATH}/model", "load_path": "{MODEL_PATH}/model", - "attention_probs_keep_prob": 0.0, - "hidden_keep_prob": 1.0, + "attention_probs_keep_prob": 0.45, + "hidden_keep_prob": 0.56, "optimizer": "AdamW", "optimizer_parameters": { "lr": 2.78e-05