From 5219fe7ca39788cee3e4087176329bbada241da3 Mon Sep 17 00:00:00 2001 From: Thilina Rajapakse Date: Mon, 18 Dec 2023 16:25:57 +0100 Subject: [PATCH] Bump version --- examples/t5/mixed_tasks/data_prep.ipynb | 95 +++++++++++++------ examples/t5/mt5/data_prep.ipynb | 95 +++++++++++++------ setup.py | 3 +- .../classification/classification_model.py | 9 +- .../classification/classification_utils.py | 1 - .../multi_label_classification_model.py | 1 - .../multi_modal_classification_model.py | 1 - .../transformer_models/albert_model.py | 1 - .../transformer_models/bert_model.py | 1 - .../transformer_models/electra_model.py | 1 - .../transformer_models/layoutlm_model.py | 1 - .../transformer_models/mmbt_model.py | 1 - simpletransformers/conv_ai/conv_ai_model.py | 1 - simpletransformers/custom_models/models.py | 4 - .../transformer_models/albert_model.py | 1 - .../transformer_models/bert_model.py | 1 - .../language_generation_model.py | 2 - .../language_modeling_model.py | 1 - .../representation_model.py | 1 - simpletransformers/losses/dice_loss.py | 1 - simpletransformers/losses/tversky_loss.py | 1 - simpletransformers/ner/ner_model.py | 5 +- .../question_answering_model.py | 1 - .../question_answering_utils.py | 42 ++++---- simpletransformers/seq2seq/seq2seq_model.py | 19 ++-- simpletransformers/t5/t5_model.py | 10 +- simpletransformers/t5/t5_utils.py | 2 +- 27 files changed, 176 insertions(+), 126 deletions(-) diff --git a/examples/t5/mixed_tasks/data_prep.ipynb b/examples/t5/mixed_tasks/data_prep.ipynb index 31bf536e..8ece0cce 100644 --- a/examples/t5/mixed_tasks/data_prep.ipynb +++ b/examples/t5/mixed_tasks/data_prep.ipynb @@ -52,30 +52,34 @@ } ], "source": [ - "prefix = 'data/binary_classification/'\n", + "prefix = \"data/binary_classification/\"\n", "\n", - "binary_train_df = pd.read_csv(prefix + 'train.csv', header=None)\n", + "binary_train_df = pd.read_csv(prefix + \"train.csv\", header=None)\n", "binary_train_df.head()\n", "\n", - "binary_eval_df = pd.read_csv(prefix + 'test.csv', header=None)\n", + "binary_eval_df = pd.read_csv(prefix + \"test.csv\", header=None)\n", "binary_eval_df.head()\n", "\n", "binary_train_df[0] = (binary_train_df[0] == 2).astype(int)\n", "binary_eval_df[0] = (binary_eval_df[0] == 2).astype(int)\n", "\n", - "binary_train_df = pd.DataFrame({\n", - " 'prefix': [\"binary classification\" for i in range(len(binary_train_df))],\n", - " 'input_text': binary_train_df[1].str.replace('\\n', ' '),\n", - " 'target_text': binary_train_df[0].astype(str),\n", - "})\n", + "binary_train_df = pd.DataFrame(\n", + " {\n", + " \"prefix\": [\"binary classification\" for i in range(len(binary_train_df))],\n", + " \"input_text\": binary_train_df[1].str.replace(\"\\n\", \" \"),\n", + " \"target_text\": binary_train_df[0].astype(str),\n", + " }\n", + ")\n", "\n", "print(binary_train_df.head())\n", "\n", - "binary_eval_df = pd.DataFrame({\n", - " 'prefix': [\"binary classification\" for i in range(len(binary_eval_df))],\n", - " 'input_text': binary_eval_df[1].str.replace('\\n', ' '),\n", - " 'target_text': binary_eval_df[0].astype(str),\n", - "})\n", + "binary_eval_df = pd.DataFrame(\n", + " {\n", + " \"prefix\": [\"binary classification\" for i in range(len(binary_eval_df))],\n", + " \"input_text\": binary_eval_df[1].str.replace(\"\\n\", \" \"),\n", + " \"target_text\": binary_eval_df[0].astype(str),\n", + " }\n", + ")\n", "\n", "\n", "print(binary_eval_df.head())" @@ -171,16 +175,29 @@ "source": [ "prefix = \"data/multilabel_classification/\"\n", "\n", - "multi_train_df = pd.read_csv(prefix + 'train.csv')\n", - "multi_train_df[\"comment_text\"].str.replace('\\n', ' ').str.replace('\\t', ' ')\n", + "multi_train_df = pd.read_csv(prefix + \"train.csv\")\n", + "multi_train_df[\"comment_text\"].str.replace(\"\\n\", \" \").str.replace(\"\\t\", \" \")\n", "\n", "for col in multi_train_df.columns:\n", " if col not in [\"id\", \"comment_text\"]:\n", " multi_train_df[col] = multi_train_df[col].apply(lambda x: col if x else \"\")\n", "\n", - "multi_train_df[\"target_text\"] = multi_train_df['toxic'].str.cat(multi_train_df[[col for col in multi_train_df.columns if col not in [\"id\", \"comment_text\", \"toxic\"]]], sep=',')\n", - "multi_train_df[\"target_text\"] = multi_train_df[\"target_text\"].apply(lambda x: \",\".join(word for word in x.split(\",\") if word)).apply(lambda x: x if x else \"clean\")\n", - "multi_train_df[\"input_text\"] = multi_train_df[\"comment_text\"].str.replace('\\n', ' ')\n", + "multi_train_df[\"target_text\"] = multi_train_df[\"toxic\"].str.cat(\n", + " multi_train_df[\n", + " [\n", + " col\n", + " for col in multi_train_df.columns\n", + " if col not in [\"id\", \"comment_text\", \"toxic\"]\n", + " ]\n", + " ],\n", + " sep=\",\",\n", + ")\n", + "multi_train_df[\"target_text\"] = (\n", + " multi_train_df[\"target_text\"]\n", + " .apply(lambda x: \",\".join(word for word in x.split(\",\") if word))\n", + " .apply(lambda x: x if x else \"clean\")\n", + ")\n", + "multi_train_df[\"input_text\"] = multi_train_df[\"comment_text\"].str.replace(\"\\n\", \" \")\n", "multi_train_df[\"prefix\"] = \"multilabel classification\"\n", "multi_train_df = multi_train_df[[\"prefix\", \"input_text\", \"target_text\"]]\n", "\n", @@ -206,15 +223,25 @@ } ], "source": [ - "prefix = 'data/regression/'\n", + "prefix = \"data/regression/\"\n", "\n", - "sts_train_df = pd.read_csv(prefix + 'train.tsv', sep='\\t', error_bad_lines=False).dropna()\n", - "sts_eval_df = pd.read_csv(prefix + 'dev.tsv', sep='\\t', error_bad_lines=False).dropna()\n", + "sts_train_df = pd.read_csv(\n", + " prefix + \"train.tsv\", sep=\"\\t\", error_bad_lines=False\n", + ").dropna()\n", + "sts_eval_df = pd.read_csv(prefix + \"dev.tsv\", sep=\"\\t\", error_bad_lines=False).dropna()\n", "\n", - "sts_train_df[\"sentence1\"] = sts_train_df[\"sentence1\"].str.replace('\\n', ' ').str.replace('\\t', ' ')\n", - "sts_train_df[\"sentence2\"] = sts_train_df[\"sentence2\"].str.replace('\\n', ' ').str.replace('\\t', ' ')\n", - "sts_eval_df[\"sentence1\"] = sts_eval_df[\"sentence1\"].str.replace('\\n', ' ').str.replace('\\t', ' ')\n", - "sts_eval_df[\"sentence2\"] = sts_eval_df[\"sentence2\"].str.replace('\\n', ' ').str.replace('\\t', ' ')" + "sts_train_df[\"sentence1\"] = (\n", + " sts_train_df[\"sentence1\"].str.replace(\"\\n\", \" \").str.replace(\"\\t\", \" \")\n", + ")\n", + "sts_train_df[\"sentence2\"] = (\n", + " sts_train_df[\"sentence2\"].str.replace(\"\\n\", \" \").str.replace(\"\\t\", \" \")\n", + ")\n", + "sts_eval_df[\"sentence1\"] = (\n", + " sts_eval_df[\"sentence1\"].str.replace(\"\\n\", \" \").str.replace(\"\\t\", \" \")\n", + ")\n", + "sts_eval_df[\"sentence2\"] = (\n", + " sts_eval_df[\"sentence2\"].str.replace(\"\\n\", \" \").str.replace(\"\\t\", \" \")\n", + ")" ] }, { @@ -223,7 +250,7 @@ "metadata": {}, "outputs": [], "source": [ - "sts_train_df.drop(2001, inplace=True) # This line badly formatted. Getting rid." + "sts_train_df.drop(2001, inplace=True) # This line badly formatted. Getting rid." ] }, { @@ -232,11 +259,19 @@ "metadata": {}, "outputs": [], "source": [ - "sts_train_df[\"input_text\"] = sts_train_df.apply(lambda x: \"sentence1: \" + x[\"sentence1\"] + \" sentence2: \" + x[\"sentence2\"], axis=1)\n", - "sts_eval_df[\"input_text\"] = sts_eval_df.apply(lambda x: \"sentence1: \" + x[\"sentence1\"] + \" sentence2: \" + x[\"sentence2\"], axis=1)\n", + "sts_train_df[\"input_text\"] = sts_train_df.apply(\n", + " lambda x: \"sentence1: \" + x[\"sentence1\"] + \" sentence2: \" + x[\"sentence2\"], axis=1\n", + ")\n", + "sts_eval_df[\"input_text\"] = sts_eval_df.apply(\n", + " lambda x: \"sentence1: \" + x[\"sentence1\"] + \" sentence2: \" + x[\"sentence2\"], axis=1\n", + ")\n", "\n", - "sts_train_df[\"target_text\"] = sts_train_df[\"score\"].apply(lambda x: round(x * 5) / 5).astype(str)\n", - "sts_eval_df[\"target_text\"] = sts_eval_df[\"score\"].apply(lambda x: round(x * 5) / 5).astype(str)\n", + "sts_train_df[\"target_text\"] = (\n", + " sts_train_df[\"score\"].apply(lambda x: round(x * 5) / 5).astype(str)\n", + ")\n", + "sts_eval_df[\"target_text\"] = (\n", + " sts_eval_df[\"score\"].apply(lambda x: round(x * 5) / 5).astype(str)\n", + ")\n", "\n", "sts_train_df[\"prefix\"] = \"similarity\"\n", "sts_eval_df[\"prefix\"] = \"similarity\"\n", diff --git a/examples/t5/mt5/data_prep.ipynb b/examples/t5/mt5/data_prep.ipynb index 35b3d4fd..a069b747 100644 --- a/examples/t5/mt5/data_prep.ipynb +++ b/examples/t5/mt5/data_prep.ipynb @@ -37,30 +37,34 @@ } ], "source": [ - "prefix = 'data/binary_classification/'\n", + "prefix = \"data/binary_classification/\"\n", "\n", - "binary_train_df = pd.read_csv(prefix + 'train.csv', header=None)\n", + "binary_train_df = pd.read_csv(prefix + \"train.csv\", header=None)\n", "binary_train_df.head()\n", "\n", - "binary_eval_df = pd.read_csv(prefix + 'test.csv', header=None)\n", + "binary_eval_df = pd.read_csv(prefix + \"test.csv\", header=None)\n", "binary_eval_df.head()\n", "\n", "binary_train_df[0] = (binary_train_df[0] == 2).astype(int)\n", "binary_eval_df[0] = (binary_eval_df[0] == 2).astype(int)\n", "\n", - "binary_train_df = pd.DataFrame({\n", - " 'prefix': [\"binary classification\" for i in range(len(binary_train_df))],\n", - " 'input_text': binary_train_df[1].str.replace('\\n', ' '),\n", - " 'target_text': binary_train_df[0].astype(str),\n", - "})\n", + "binary_train_df = pd.DataFrame(\n", + " {\n", + " \"prefix\": [\"binary classification\" for i in range(len(binary_train_df))],\n", + " \"input_text\": binary_train_df[1].str.replace(\"\\n\", \" \"),\n", + " \"target_text\": binary_train_df[0].astype(str),\n", + " }\n", + ")\n", "\n", "print(binary_train_df.head())\n", "\n", - "binary_eval_df = pd.DataFrame({\n", - " 'prefix': [\"binary classification\" for i in range(len(binary_eval_df))],\n", - " 'input_text': binary_eval_df[1].str.replace('\\n', ' '),\n", - " 'target_text': binary_eval_df[0].astype(str),\n", - "})\n", + "binary_eval_df = pd.DataFrame(\n", + " {\n", + " \"prefix\": [\"binary classification\" for i in range(len(binary_eval_df))],\n", + " \"input_text\": binary_eval_df[1].str.replace(\"\\n\", \" \"),\n", + " \"target_text\": binary_eval_df[0].astype(str),\n", + " }\n", + ")\n", "\n", "\n", "print(binary_eval_df.head())" @@ -156,16 +160,29 @@ "source": [ "prefix = \"data/multilabel_classification/\"\n", "\n", - "multi_train_df = pd.read_csv(prefix + 'train.csv')\n", - "multi_train_df[\"comment_text\"].str.replace('\\n', ' ').str.replace('\\t', ' ')\n", + "multi_train_df = pd.read_csv(prefix + \"train.csv\")\n", + "multi_train_df[\"comment_text\"].str.replace(\"\\n\", \" \").str.replace(\"\\t\", \" \")\n", "\n", "for col in multi_train_df.columns:\n", " if col not in [\"id\", \"comment_text\"]:\n", " multi_train_df[col] = multi_train_df[col].apply(lambda x: col if x else \"\")\n", "\n", - "multi_train_df[\"target_text\"] = multi_train_df['toxic'].str.cat(multi_train_df[[col for col in multi_train_df.columns if col not in [\"id\", \"comment_text\", \"toxic\"]]], sep=',')\n", - "multi_train_df[\"target_text\"] = multi_train_df[\"target_text\"].apply(lambda x: \",\".join(word for word in x.split(\",\") if word)).apply(lambda x: x if x else \"clean\")\n", - "multi_train_df[\"input_text\"] = multi_train_df[\"comment_text\"].str.replace('\\n', ' ')\n", + "multi_train_df[\"target_text\"] = multi_train_df[\"toxic\"].str.cat(\n", + " multi_train_df[\n", + " [\n", + " col\n", + " for col in multi_train_df.columns\n", + " if col not in [\"id\", \"comment_text\", \"toxic\"]\n", + " ]\n", + " ],\n", + " sep=\",\",\n", + ")\n", + "multi_train_df[\"target_text\"] = (\n", + " multi_train_df[\"target_text\"]\n", + " .apply(lambda x: \",\".join(word for word in x.split(\",\") if word))\n", + " .apply(lambda x: x if x else \"clean\")\n", + ")\n", + "multi_train_df[\"input_text\"] = multi_train_df[\"comment_text\"].str.replace(\"\\n\", \" \")\n", "multi_train_df[\"prefix\"] = \"multilabel classification\"\n", "multi_train_df = multi_train_df[[\"prefix\", \"input_text\", \"target_text\"]]\n", "\n", @@ -191,15 +208,25 @@ } ], "source": [ - "prefix = 'data/regression/'\n", + "prefix = \"data/regression/\"\n", "\n", - "sts_train_df = pd.read_csv(prefix + 'train.tsv', sep='\\t', error_bad_lines=False).dropna()\n", - "sts_eval_df = pd.read_csv(prefix + 'dev.tsv', sep='\\t', error_bad_lines=False).dropna()\n", + "sts_train_df = pd.read_csv(\n", + " prefix + \"train.tsv\", sep=\"\\t\", error_bad_lines=False\n", + ").dropna()\n", + "sts_eval_df = pd.read_csv(prefix + \"dev.tsv\", sep=\"\\t\", error_bad_lines=False).dropna()\n", "\n", - "sts_train_df[\"sentence1\"] = sts_train_df[\"sentence1\"].str.replace('\\n', ' ').str.replace('\\t', ' ')\n", - "sts_train_df[\"sentence2\"] = sts_train_df[\"sentence2\"].str.replace('\\n', ' ').str.replace('\\t', ' ')\n", - "sts_eval_df[\"sentence1\"] = sts_eval_df[\"sentence1\"].str.replace('\\n', ' ').str.replace('\\t', ' ')\n", - "sts_eval_df[\"sentence2\"] = sts_eval_df[\"sentence2\"].str.replace('\\n', ' ').str.replace('\\t', ' ')" + "sts_train_df[\"sentence1\"] = (\n", + " sts_train_df[\"sentence1\"].str.replace(\"\\n\", \" \").str.replace(\"\\t\", \" \")\n", + ")\n", + "sts_train_df[\"sentence2\"] = (\n", + " sts_train_df[\"sentence2\"].str.replace(\"\\n\", \" \").str.replace(\"\\t\", \" \")\n", + ")\n", + "sts_eval_df[\"sentence1\"] = (\n", + " sts_eval_df[\"sentence1\"].str.replace(\"\\n\", \" \").str.replace(\"\\t\", \" \")\n", + ")\n", + "sts_eval_df[\"sentence2\"] = (\n", + " sts_eval_df[\"sentence2\"].str.replace(\"\\n\", \" \").str.replace(\"\\t\", \" \")\n", + ")" ] }, { @@ -208,7 +235,7 @@ "metadata": {}, "outputs": [], "source": [ - "sts_train_df.drop(2001, inplace=True) # This line badly formatted. Getting rid." + "sts_train_df.drop(2001, inplace=True) # This line badly formatted. Getting rid." ] }, { @@ -217,11 +244,19 @@ "metadata": {}, "outputs": [], "source": [ - "sts_train_df[\"input_text\"] = sts_train_df.apply(lambda x: \"sentence1: \" + x[\"sentence1\"] + \" sentence2: \" + x[\"sentence2\"], axis=1)\n", - "sts_eval_df[\"input_text\"] = sts_eval_df.apply(lambda x: \"sentence1: \" + x[\"sentence1\"] + \" sentence2: \" + x[\"sentence2\"], axis=1)\n", + "sts_train_df[\"input_text\"] = sts_train_df.apply(\n", + " lambda x: \"sentence1: \" + x[\"sentence1\"] + \" sentence2: \" + x[\"sentence2\"], axis=1\n", + ")\n", + "sts_eval_df[\"input_text\"] = sts_eval_df.apply(\n", + " lambda x: \"sentence1: \" + x[\"sentence1\"] + \" sentence2: \" + x[\"sentence2\"], axis=1\n", + ")\n", "\n", - "sts_train_df[\"target_text\"] = sts_train_df[\"score\"].apply(lambda x: round(x * 5) / 5).astype(str)\n", - "sts_eval_df[\"target_text\"] = sts_eval_df[\"score\"].apply(lambda x: round(x * 5) / 5).astype(str)\n", + "sts_train_df[\"target_text\"] = (\n", + " sts_train_df[\"score\"].apply(lambda x: round(x * 5) / 5).astype(str)\n", + ")\n", + "sts_eval_df[\"target_text\"] = (\n", + " sts_eval_df[\"score\"].apply(lambda x: round(x * 5) / 5).astype(str)\n", + ")\n", "\n", "sts_train_df[\"prefix\"] = \"similarity\"\n", "sts_eval_df[\"prefix\"] = \"similarity\"\n", diff --git a/setup.py b/setup.py index f35ab13f..c996665a 100755 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setup( name="simpletransformers", - version="0.64.3", + version="0.64.5", author="Thilina Rajapakse", author_email="chaturangarajapakshe@gmail.com", description="An easy-to-use wrapper library for the Transformers library.", @@ -32,6 +32,7 @@ "scikit-learn", "seqeval", "tensorboard", + "tensorboardx", "pandas", "tokenizers", "wandb>=0.10.32", diff --git a/simpletransformers/classification/classification_model.py b/simpletransformers/classification/classification_model.py index c3f3d18c..62ef9ea8 100755 --- a/simpletransformers/classification/classification_model.py +++ b/simpletransformers/classification/classification_model.py @@ -182,7 +182,6 @@ def __init__( onnx_execution_provider=None, **kwargs, ): - """ Initializes a ClassificationModel model. @@ -1641,10 +1640,14 @@ def evaluate( if not self.args.sliding_window: # ROC` - wandb.log({"roc": wandb.plot.roc_curve(truth, model_outputs, labels_list)}) + wandb.log( + {"roc": wandb.plot.roc_curve(truth, model_outputs, labels_list)} + ) # Precision Recall - wandb.log({"pr": wandb.plot.pr_curve(truth, model_outputs, labels_list)}) + wandb.log( + {"pr": wandb.plot.pr_curve(truth, model_outputs, labels_list)} + ) return results, model_outputs, wrong diff --git a/simpletransformers/classification/classification_utils.py b/simpletransformers/classification/classification_utils.py index f2fc22f8..74be2a6f 100755 --- a/simpletransformers/classification/classification_utils.py +++ b/simpletransformers/classification/classification_utils.py @@ -803,7 +803,6 @@ def __init__( data_type_extension=None, multi_label=False, ): - self.text_label = text_label if text_label else "text" self.labels_label = labels_label if labels_label else "labels" self.images_label = images_label if images_label else "images" diff --git a/simpletransformers/classification/multi_label_classification_model.py b/simpletransformers/classification/multi_label_classification_model.py index e067d507..f5a56135 100755 --- a/simpletransformers/classification/multi_label_classification_model.py +++ b/simpletransformers/classification/multi_label_classification_model.py @@ -89,7 +89,6 @@ def __init__( cuda_device=-1, **kwargs, ): - """ Initializes a MultiLabelClassification model. diff --git a/simpletransformers/classification/multi_modal_classification_model.py b/simpletransformers/classification/multi_modal_classification_model.py index cbe6b252..c9b51fa9 100644 --- a/simpletransformers/classification/multi_modal_classification_model.py +++ b/simpletransformers/classification/multi_modal_classification_model.py @@ -86,7 +86,6 @@ def __init__( cuda_device=-1, **kwargs, ): - """ Initializes a MultiModalClassificationModel model. diff --git a/simpletransformers/classification/transformer_models/albert_model.py b/simpletransformers/classification/transformer_models/albert_model.py index fe5c0aa3..d869ad39 100755 --- a/simpletransformers/classification/transformer_models/albert_model.py +++ b/simpletransformers/classification/transformer_models/albert_model.py @@ -56,7 +56,6 @@ def forward( inputs_embeds=None, labels=None, ): - outputs = self.albert( input_ids=input_ids, attention_mask=attention_mask, diff --git a/simpletransformers/classification/transformer_models/bert_model.py b/simpletransformers/classification/transformer_models/bert_model.py index c2e3b97a..bfaa4f6a 100755 --- a/simpletransformers/classification/transformer_models/bert_model.py +++ b/simpletransformers/classification/transformer_models/bert_model.py @@ -53,7 +53,6 @@ def forward( inputs_embeds=None, labels=None, ): - outputs = self.bert( input_ids, attention_mask=attention_mask, diff --git a/simpletransformers/classification/transformer_models/electra_model.py b/simpletransformers/classification/transformer_models/electra_model.py index b414b246..476640cb 100755 --- a/simpletransformers/classification/transformer_models/electra_model.py +++ b/simpletransformers/classification/transformer_models/electra_model.py @@ -55,7 +55,6 @@ def forward( inputs_embeds=None, labels=None, ): - discriminator_hidden_states = self.electra( input_ids, attention_mask, diff --git a/simpletransformers/classification/transformer_models/layoutlm_model.py b/simpletransformers/classification/transformer_models/layoutlm_model.py index c3c5ae50..b9f41286 100644 --- a/simpletransformers/classification/transformer_models/layoutlm_model.py +++ b/simpletransformers/classification/transformer_models/layoutlm_model.py @@ -27,7 +27,6 @@ def forward( inputs_embeds=None, labels=None, ): - outputs = self.bert( input_ids=input_ids, bbox=bbox, diff --git a/simpletransformers/classification/transformer_models/mmbt_model.py b/simpletransformers/classification/transformer_models/mmbt_model.py index 83e7011d..4c88d04c 100644 --- a/simpletransformers/classification/transformer_models/mmbt_model.py +++ b/simpletransformers/classification/transformer_models/mmbt_model.py @@ -55,7 +55,6 @@ def forward( inputs_embeds=None, labels=None, ): - outputs = self.mmbt( input_modal=input_modal, input_ids=input_ids, diff --git a/simpletransformers/conv_ai/conv_ai_model.py b/simpletransformers/conv_ai/conv_ai_model.py index 09e157a5..f79307c3 100644 --- a/simpletransformers/conv_ai/conv_ai_model.py +++ b/simpletransformers/conv_ai/conv_ai_model.py @@ -95,7 +95,6 @@ def __init__( cuda_device=-1, **kwargs, ): - """ Initializes a ClassificationModel model. diff --git a/simpletransformers/custom_models/models.py b/simpletransformers/custom_models/models.py index 324ad84a..dcdc44da 100755 --- a/simpletransformers/custom_models/models.py +++ b/simpletransformers/custom_models/models.py @@ -516,7 +516,6 @@ def forward( inputs_embeds=None, labels=None, ): - outputs = self.albert( input_ids=input_ids, attention_mask=attention_mask, @@ -791,7 +790,6 @@ def forward( inputs_embeds=None, labels=None, ): - outputs = self.electra( input_ids, attention_mask, @@ -848,7 +846,6 @@ def forward( inputs_embeds=None, labels=None, ): - outputs = self.electra( input_ids, attention_mask, @@ -903,7 +900,6 @@ def forward( start_positions=None, end_positions=None, ): - outputs = self.electra( input_ids, attention_mask, diff --git a/simpletransformers/experimental/classification/transformer_models/albert_model.py b/simpletransformers/experimental/classification/transformer_models/albert_model.py index 2f074995..578b2902 100755 --- a/simpletransformers/experimental/classification/transformer_models/albert_model.py +++ b/simpletransformers/experimental/classification/transformer_models/albert_model.py @@ -58,7 +58,6 @@ def forward( inputs_embeds=None, labels=None, ): - all_outputs = [] if self.sliding_window: # input_ids is really the list of inputs for each "sequence window" diff --git a/simpletransformers/experimental/classification/transformer_models/bert_model.py b/simpletransformers/experimental/classification/transformer_models/bert_model.py index 60f162db..b950af44 100755 --- a/simpletransformers/experimental/classification/transformer_models/bert_model.py +++ b/simpletransformers/experimental/classification/transformer_models/bert_model.py @@ -54,7 +54,6 @@ def forward( inputs_embeds=None, labels=None, ): - all_outputs = [] if self.sliding_window: # input_ids is really the list of inputs for each "sequence window" diff --git a/simpletransformers/language_generation/language_generation_model.py b/simpletransformers/language_generation/language_generation_model.py index 94a73ea3..89a9b9b1 100644 --- a/simpletransformers/language_generation/language_generation_model.py +++ b/simpletransformers/language_generation/language_generation_model.py @@ -49,7 +49,6 @@ def __init__( cuda_device=-1, **kwargs, ): - """ Initializes a LanguageGenerationModel model. @@ -147,7 +146,6 @@ def __init__( self.model.to(self.device) def generate(self, prompt=None, args=None, verbose=True): - """ Generate text using a LanguageGenerationModel diff --git a/simpletransformers/language_modeling/language_modeling_model.py b/simpletransformers/language_modeling/language_modeling_model.py index 3984f7e6..179464b6 100755 --- a/simpletransformers/language_modeling/language_modeling_model.py +++ b/simpletransformers/language_modeling/language_modeling_model.py @@ -154,7 +154,6 @@ def __init__( cuda_device=-1, **kwargs, ): - """ Initializes a LanguageModelingModel. diff --git a/simpletransformers/language_representation/representation_model.py b/simpletransformers/language_representation/representation_model.py index 10ef94dd..7d9185b4 100644 --- a/simpletransformers/language_representation/representation_model.py +++ b/simpletransformers/language_representation/representation_model.py @@ -73,7 +73,6 @@ def __init__( cuda_device=-1, **kwargs, ): - """ Initializes a RepresentationModel model. diff --git a/simpletransformers/losses/dice_loss.py b/simpletransformers/losses/dice_loss.py index 0a3f513c..d96227e2 100644 --- a/simpletransformers/losses/dice_loss.py +++ b/simpletransformers/losses/dice_loss.py @@ -64,7 +64,6 @@ def __init__( self.smooth: float = smooth def forward(self, input: torch.Tensor, target: torch.Tensor) -> torch.Tensor: - if len(input.shape) == 2: if input.shape[0] != target.shape[0]: raise ValueError( diff --git a/simpletransformers/losses/tversky_loss.py b/simpletransformers/losses/tversky_loss.py index 6658308d..8bb52e7d 100644 --- a/simpletransformers/losses/tversky_loss.py +++ b/simpletransformers/losses/tversky_loss.py @@ -70,7 +70,6 @@ def __init__( self.smooth: float = smooth def forward(self, input: torch.Tensor, target: torch.Tensor) -> torch.Tensor: - if len(input.shape) == 2: if input.shape[0] != target.shape[0]: raise ValueError( diff --git a/simpletransformers/ner/ner_model.py b/simpletransformers/ner/ner_model.py index e09f62cb..7ee94e2a 100755 --- a/simpletransformers/ner/ner_model.py +++ b/simpletransformers/ner/ner_model.py @@ -228,7 +228,7 @@ def __init__( LukeConfig, LukeForTokenClassification, MLukeTokenizer, - ), + ), "mobilebert": ( MobileBertConfig, MobileBertForTokenClassification, @@ -868,7 +868,6 @@ def train( args.evaluate_during_training_steps > 0 and global_step % args.evaluate_during_training_steps == 0 ): - output_dir_current = os.path.join( output_dir, "checkpoint-{}".format(global_step) ) @@ -1501,7 +1500,6 @@ def predict(self, to_predict, split_on_space=True): ] if self.args.onnx: - # Encode model_inputs = self.tokenizer.batch_encode_plus( to_predict, @@ -1751,7 +1749,6 @@ def predict(self, to_predict, split_on_space=True): def _convert_tokens_to_word_logits( self, input_ids, label_ids, attention_mask, logits ): - ignore_ids = [ self.tokenizer.convert_tokens_to_ids(self.tokenizer.pad_token), self.tokenizer.convert_tokens_to_ids(self.tokenizer.sep_token), diff --git a/simpletransformers/question_answering/question_answering_model.py b/simpletransformers/question_answering/question_answering_model.py index 70260fcc..849b190e 100755 --- a/simpletransformers/question_answering/question_answering_model.py +++ b/simpletransformers/question_answering/question_answering_model.py @@ -118,7 +118,6 @@ class QuestionAnsweringModel: def __init__( self, model_type, model_name, args=None, use_cuda=True, cuda_device=-1, **kwargs ): - """ Initializes a QuestionAnsweringModel model. diff --git a/simpletransformers/question_answering/question_answering_utils.py b/simpletransformers/question_answering/question_answering_utils.py index 778a14a1..e069920f 100755 --- a/simpletransformers/question_answering/question_answering_utils.py +++ b/simpletransformers/question_answering/question_answering_utils.py @@ -203,7 +203,7 @@ def convert_example_to_feature(example_row): tok_to_orig_index = [] orig_to_tok_index = [] all_doc_tokens = [] - for (i, token) in enumerate(example.doc_tokens): + for i, token in enumerate(example.doc_tokens): orig_to_tok_index.append(len(all_doc_tokens)) sub_tokens = tokenizer.tokenize(token) for sub_token in sub_tokens: @@ -249,7 +249,7 @@ def convert_example_to_feature(example_row): break start_offset += min(length, doc_stride) - for (doc_span_index, doc_span) in enumerate(doc_spans): + for doc_span_index, doc_span in enumerate(doc_spans): tokens = [] token_to_orig_map = {} token_is_max_context = {} @@ -632,8 +632,7 @@ def convert_examples_to_features( ) else: features = [] - for (example_index, example) in enumerate(tqdm(examples, disable=silent)): - + for example_index, example in enumerate(tqdm(examples, disable=silent)): # if example_index % 100 == 0: # logger.info('Converting %s/%s pos %s neg %s', example_index, len(examples), cnt_pos, cnt_neg) @@ -645,7 +644,7 @@ def convert_examples_to_features( tok_to_orig_index = [] orig_to_tok_index = [] all_doc_tokens = [] - for (i, token) in enumerate(example.doc_tokens): + for i, token in enumerate(example.doc_tokens): orig_to_tok_index.append(len(all_doc_tokens)) sub_tokens = tokenizer.tokenize(token) for sub_token in sub_tokens: @@ -691,7 +690,7 @@ def convert_examples_to_features( break start_offset += min(length, doc_stride) - for (doc_span_index, doc_span) in enumerate(doc_spans): + for doc_span_index, doc_span in enumerate(doc_spans): tokens = [] token_to_orig_map = {} token_is_max_context = {} @@ -891,7 +890,7 @@ def _check_is_max_context(doc_spans, cur_span_index, position): # and 0 right context. best_score = None best_span_index = None - for (span_index, doc_span) in enumerate(doc_spans): + for span_index, doc_span in enumerate(doc_spans): end = doc_span.start + doc_span.length - 1 if position < doc_span.start: continue @@ -947,7 +946,7 @@ def write_predictions( all_nbest_json = collections.OrderedDict() scores_diff_json = collections.OrderedDict() - for (example_index, example) in enumerate(all_examples): + for example_index, example in enumerate(all_examples): features = example_index_to_features[example_index] prelim_predictions = [] @@ -956,7 +955,7 @@ def write_predictions( min_null_feature_index = 0 # the paragraph slice with min null score null_start_logit = 0 # the start logit at the slice with min null score null_end_logit = 0 # the end logit at the slice with min null score - for (feature_index, feature) in enumerate(features): + for feature_index, feature in enumerate(features): result = unique_id_to_result[feature.unique_id] start_indexes = _get_best_indexes(result.start_logits, n_best_size) end_indexes = _get_best_indexes(result.end_logits, n_best_size) @@ -1091,7 +1090,7 @@ def write_predictions( probs = _compute_softmax(total_scores) nbest_json = [] - for (i, entry) in enumerate(nbest): + for i, entry in enumerate(nbest): output = collections.OrderedDict() output["text"] = entry.text output["probability"] = probs[i] @@ -1188,14 +1187,14 @@ def write_predictions_extended( all_nbest_json = collections.OrderedDict() scores_diff_json = collections.OrderedDict() - for (example_index, example) in enumerate(all_examples): + for example_index, example in enumerate(all_examples): features = example_index_to_features[example_index] prelim_predictions = [] # keep track of the minimum score of null start+end of position 0 score_null = 1000000 # large and positive - for (feature_index, feature) in enumerate(features): + for feature_index, feature in enumerate(features): result = unique_id_to_result[feature.unique_id] cur_null_score = result.cls_logits @@ -1306,7 +1305,7 @@ def write_predictions_extended( probs = _compute_softmax(total_scores) nbest_json = [] - for (i, entry) in enumerate(nbest): + for i, entry in enumerate(nbest): output = collections.OrderedDict() output["text"] = entry.text output["probability"] = probs[i] @@ -1363,7 +1362,6 @@ def get_best_predictions( version_2_with_negative, null_score_diff_threshold, ): - example_index_to_features = collections.defaultdict(list) for feature in all_features: example_index_to_features[feature.example_index].append(feature) @@ -1381,7 +1379,7 @@ def get_best_predictions( all_nbest_json = collections.OrderedDict() scores_diff_json = collections.OrderedDict() - for (example_index, example) in enumerate(all_examples): + for example_index, example in enumerate(all_examples): features = example_index_to_features[example_index] prelim_predictions = [] @@ -1390,7 +1388,7 @@ def get_best_predictions( min_null_feature_index = 0 # the paragraph slice with min null score null_start_logit = 0 # the start logit at the slice with min null score null_end_logit = 0 # the end logit at the slice with min null score - for (feature_index, feature) in enumerate(features): + for feature_index, feature in enumerate(features): result = unique_id_to_result[feature.unique_id] start_indexes = _get_best_indexes(result.start_logits, n_best_size) end_indexes = _get_best_indexes(result.end_logits, n_best_size) @@ -1525,7 +1523,7 @@ def get_best_predictions( probs = _compute_softmax(total_scores) nbest_json = [] - for (i, entry) in enumerate(nbest): + for i, entry in enumerate(nbest): output = collections.OrderedDict() output["text"] = entry.text output["probability"] = probs[i] @@ -1599,14 +1597,14 @@ def get_best_predictions_extended( all_nbest_json = collections.OrderedDict() scores_diff_json = collections.OrderedDict() - for (example_index, example) in enumerate(all_examples): + for example_index, example in enumerate(all_examples): features = example_index_to_features[example_index] prelim_predictions = [] # keep track of the minimum score of null start+end of position 0 score_null = 1000000 # large and positive - for (feature_index, feature) in enumerate(features): + for feature_index, feature in enumerate(features): result = unique_id_to_result[feature.unique_id] cur_null_score = result.cls_logits @@ -1722,7 +1720,7 @@ def get_best_predictions_extended( probs = _compute_softmax(total_scores) nbest_json = [] - for (i, entry) in enumerate(nbest): + for i, entry in enumerate(nbest): output = collections.OrderedDict() output["text"] = entry.text output["probability"] = probs[i] @@ -1914,7 +1912,7 @@ def get_final_text(pred_text, orig_text, do_lower_case, verbose_logging=False): def _strip_spaces(text): ns_chars = [] ns_to_s_map = collections.OrderedDict() - for (i, c) in enumerate(text): + for i, c in enumerate(text): if c == " ": continue ns_to_s_map[len(ns_chars)] = i @@ -1952,7 +1950,7 @@ def _strip_spaces(text): # We then project the characters in `pred_text` back to `orig_text` using # the character-to-character alignment. tok_s_to_ns_map = {} - for (i, tok_index) in tok_ns_to_s_map.items(): + for i, tok_index in tok_ns_to_s_map.items(): tok_s_to_ns_map[tok_index] = i orig_start_position = None diff --git a/simpletransformers/seq2seq/seq2seq_model.py b/simpletransformers/seq2seq/seq2seq_model.py index a2a6209f..17a2fa3c 100644 --- a/simpletransformers/seq2seq/seq2seq_model.py +++ b/simpletransformers/seq2seq/seq2seq_model.py @@ -138,7 +138,6 @@ def __init__( cuda_device=-1, **kwargs, ): - """ Initializes a Seq2SeqModel. @@ -312,7 +311,7 @@ def __init__( if encoder_decoder_type in ["bart", "mbart", "mbart50", "marian"]: self.model = model_class.from_pretrained(encoder_decoder_name) - if encoder_decoder_type in ["bart", "mbart","mbart50"]: + if encoder_decoder_type in ["bart", "mbart", "mbart50"]: self.encoder_tokenizer = tokenizer_class.from_pretrained( encoder_decoder_name ) @@ -1316,7 +1315,6 @@ def predict(self, to_predict): num_return_sequences=self.args.num_return_sequences, ) elif self.args.model_type in ["mbart"]: - # tgt_lang_token = self.decoder_tokenizer._convert_token_to_id_with_added_voc( # self.args.tgt_lang # ) @@ -1324,7 +1322,9 @@ def predict(self, to_predict): outputs = self.model.generate( input_ids=input_ids, # decoder_start_token_id=tgt_lang_token, - decoder_start_token_id=self.decoder_tokenizer.lang_code_to_id[self.args.tgt_lang], + decoder_start_token_id=self.decoder_tokenizer.lang_code_to_id[ + self.args.tgt_lang + ], num_beams=self.args.num_beams, # max_length=self.args.max_length, max_new_tokens=self.args.max_length, @@ -1337,7 +1337,6 @@ def predict(self, to_predict): num_return_sequences=self.args.num_return_sequences, ) elif self.args.model_type in ["mbart50"]: - # tgt_lang_token = self.decoder_tokenizer._convert_token_to_id_with_added_voc( # self.args.tgt_lang # ) @@ -1345,7 +1344,9 @@ def predict(self, to_predict): outputs = self.model.generate( input_ids=input_ids, # decoder_start_token_id=tgt_lang_token, - decoder_start_token_id=self.decoder_tokenizer.lang_code_to_id[self.args.tgt_lang], + decoder_start_token_id=self.decoder_tokenizer.lang_code_to_id[ + self.args.tgt_lang + ], num_beams=self.args.num_beams, # max_length=self.args.max_length, max_new_tokens=self.args.max_length, @@ -1356,7 +1357,9 @@ def predict(self, to_predict): top_k=self.args.top_k, top_p=self.args.top_p, num_return_sequences=self.args.num_return_sequences, - forced_bos_token_id=self.decoder_tokenizer.lang_code_to_id[self.args.tgt_lang], + forced_bos_token_id=self.decoder_tokenizer.lang_code_to_id[ + self.args.tgt_lang + ], ) elif self.args.model_type in ["rag-token", "rag-sequence"]: outputs = self.model.generate( @@ -1659,7 +1662,7 @@ def _get_inputs_dict(self, batch): "attention_mask": source_mask.to(device), "labels": labels.to(device), } - elif self.args.model_type in ["mbart","mbart50"]: + elif self.args.model_type in ["mbart", "mbart50"]: inputs = { "input_ids": batch["input_ids"].to(device), "attention_mask": batch["attention_mask"].to(device), diff --git a/simpletransformers/t5/t5_model.py b/simpletransformers/t5/t5_model.py index 2552f1dc..62d8af2d 100644 --- a/simpletransformers/t5/t5_model.py +++ b/simpletransformers/t5/t5_model.py @@ -70,7 +70,6 @@ def __init__( cuda_device=-1, **kwargs, ): - """ Initializes a T5Model model. @@ -922,6 +921,9 @@ def eval_model( self.results.update(result) if self.args.evaluate_generated_text: + raise ValueError( + "evaluate_generated_text not implemented without use_hf_datasets." + ) if self.args.preprocess_inputs: to_predict = [ prefix + ": " + input_text @@ -943,9 +945,7 @@ def eval_model( else: target_text = eval_dataset["target_text"].tolist() - result = self.compute_metrics( - target_text, preds, **kwargs - ) + result = self.compute_metrics(target_text, preds, **kwargs) self.results.update(result) if verbose: @@ -1134,7 +1134,7 @@ def _move_model_to_device(self): def _get_inputs_dict(self, batch): if self.args.use_hf_datasets: - inputs = {**batch, "labels": batch["input_ids"]} + inputs = {**batch} return {key: value.to(self.device) for key, value in inputs.items()} else: diff --git a/simpletransformers/t5/t5_utils.py b/simpletransformers/t5/t5_utils.py index 0a3feb69..42fb9908 100644 --- a/simpletransformers/t5/t5_utils.py +++ b/simpletransformers/t5/t5_utils.py @@ -84,7 +84,7 @@ def load_hf_dataset(data, tokenizer, args): batched=True, ) - dataset.set_format(type="pt", columns=["input_ids", "attention_mask"]) + dataset.set_format(type="pt", columns=["input_ids", "attention_mask", "labels"]) if isinstance(data, str): # This is not necessarily a train dataset. The datasets library insists on calling it train.