From 2771a7c5a3d9f3ef0ab51c7e7ee522781f2d095b Mon Sep 17 00:00:00 2001 From: Mark Aron Szulyovszky Date: Sat, 30 Jul 2022 12:34:37 +0200 Subject: [PATCH 1/5] feature(Types): added Experiment type encompassing Pipelines, metrics, project_name, etc. --- library/examples/hate_speech.py | 42 +++++++++++++----- library/examples/hate_speech_multi_hf.py | 21 +-------- library/examples/hate_speech_sklearn.py | 54 ++++------------------- run.py | 55 +++++++++++------------- runner/runner.py | 26 +++++------ type.py | 14 +++--- 6 files changed, 85 insertions(+), 127 deletions(-) diff --git a/library/examples/hate_speech.py b/library/examples/hate_speech.py index 9a4e0bc..beb32de 100644 --- a/library/examples/hate_speech.py +++ b/library/examples/hate_speech.py @@ -23,6 +23,7 @@ transform_hatespeech_offensive_dataset, ) from datasets.load import load_dataset +from library.evaluation import calibration_metrics, classification_metrics from sklearn.ensemble import GradientBoostingClassifier, VotingClassifier from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.linear_model import LogisticRegression @@ -30,10 +31,10 @@ from sklearn.preprocessing import MinMaxScaler from transformers.training_args import TrainingArguments from type import ( + Experiment, HuggingfaceConfig, LoadOrigin, PreprocessConfig, - RunConfig, SKLearnConfig, ) from utils.flatten import remove_none @@ -228,31 +229,52 @@ def create_nlp_huggingface_pipeline(autocorrect: bool) -> Pipeline: ] ) +### Metrics + +metrics = classification_metrics + calibration_metrics + + ### Run Configs -tweeteval_hate_speech_run_configs = [ - RunConfig( - run_name="hate-speech-detection", +tweeteval_hate_speech_experiments = [ + Experiment( + project_name="hate-speech-detection", + run_name="tweeteval", dataset=data_tweet_eval_hate_speech[0], + pipeline=ensemble_pipeline, + preprocessing_config=preprocess_config, + metrics=metrics, train=True, ), - RunConfig( - run_name="hate-speech-detection", + Experiment( + project_name="hate-speech-detection", + run_name="tweeteval", dataset=data_tweet_eval_hate_speech[1], + pipeline=ensemble_pipeline, + preprocessing_config=preprocess_config, + metrics=metrics, train=False, ), ] -cross_dataset_run_configs = [ - RunConfig( - run_name="hate-speech-detection-cross-val", +cross_dataset_experiments = [ + Experiment( + project_name="hate-speech-detection-cross-val", + run_name="merged_dataset", dataset=data_merged_train, + pipeline=ensemble_pipeline, + preprocessing_config=preprocess_config, + metrics=metrics, train=True, ), - RunConfig( + Experiment( + project_name="hate-speech-detection-cross-val", run_name="hatecheck", dataset=data_hatecheck[1], + pipeline=ensemble_pipeline, + preprocessing_config=preprocess_config, + metrics=metrics, train=False, ), ] diff --git a/library/examples/hate_speech_multi_hf.py b/library/examples/hate_speech_multi_hf.py index fe871a8..ae1d3b7 100644 --- a/library/examples/hate_speech_multi_hf.py +++ b/library/examples/hate_speech_multi_hf.py @@ -1,28 +1,11 @@ from copy import deepcopy -from sklearn.ensemble import GradientBoostingClassifier, VotingClassifier -from sklearn.feature_extraction.text import TfidfVectorizer -from sklearn.linear_model import LogisticRegression -from sklearn.naive_bayes import MultinomialNB -from sklearn.preprocessing import MinMaxScaler -from transformers import TrainingArguments - -from blocks.adaptors import ListOfListsToNumpy -from blocks.augmenters.spelling_autocorrect import SpellAutocorrectAugmenter from blocks.data import DataSource from blocks.ensemble import Ensemble from blocks.models.huggingface import HuggingfaceModel -from blocks.models.sklearn import SKLearnModel from blocks.pipeline import Pipeline -from blocks.transformations import ( - Lemmatizer, - SKLearnTransformation, - SpacyTokenizer, - TextStatisticTransformation, -) -from configs.constants import Const -from library.evaluation import classification, classification_metrics -from type import HuggingfaceConfig, LoadOrigin, PreprocessConfig, SKLearnConfig +from transformers.training_args import TrainingArguments +from type import HuggingfaceConfig, LoadOrigin, PreprocessConfig from utils.flatten import remove_none preprocess_config = PreprocessConfig( diff --git a/library/examples/hate_speech_sklearn.py b/library/examples/hate_speech_sklearn.py index 353b555..4c71200 100644 --- a/library/examples/hate_speech_sklearn.py +++ b/library/examples/hate_speech_sklearn.py @@ -1,42 +1,19 @@ -from datasets.load import load_dataset -from sklearn.feature_extraction.text import TfidfVectorizer -from sklearn.linear_model import LogisticRegression -from sklearn.naive_bayes import MultinomialNB -from sklearn.preprocessing import MinMaxScaler +from typing import Tuple, Union from blocks.augmenters.spelling_autocorrect import SpellAutocorrectAugmenter from blocks.data import DataSource from blocks.ensemble import Ensemble -from blocks.models.huggingface import HuggingfaceModel from blocks.models.sklearn import SKLearnModel from blocks.pipeline import Pipeline -from blocks.transformations import ( - Lemmatizer, - SKLearnTransformation, - SpacyTokenizer, - TextStatisticTransformation, -) +from blocks.transformations import Lemmatizer, SKLearnTransformation, SpacyTokenizer from blocks.transformations.no_lemmatizer import NoLemmatizer -from configs.constants import Const -from data.transformation import transform_dataset -from library.evaluation import classification -from type import ( - HuggingfaceConfig, - LoadOrigin, - PreprocessConfig, - RunConfig, - SKLearnConfig, -) +from sklearn.ensemble import GradientBoostingClassifier, VotingClassifier +from sklearn.feature_extraction.text import TfidfVectorizer +from sklearn.linear_model import LogisticRegression +from sklearn.naive_bayes import MultinomialNB +from type import LoadOrigin, SKLearnConfig from utils.flatten import remove_none -preprocess_config = PreprocessConfig( - train_size=100, - val_size=100, - test_size=100, - input_col="text", - label_col="label", -) - sklearn_config = SKLearnConfig( force_fit=False, save=True, @@ -77,9 +54,9 @@ def create_nlp_sklearn_pipeline( [ SpellAutocorrectAugmenter(fast=True) if autocorrect else None, SpacyTokenizer(), - Lemmatizer(remove_stopwords=False) + Lemmatizer(remove_stopwords=True) if lemmatization - else NoLemmatizer(remove_stopwords=False), + else NoLemmatizer(remove_stopwords=True), SKLearnTransformation( TfidfVectorizer( max_features=tfidf_max_features, @@ -142,16 +119,3 @@ def create_nlp_sklearn_pipeline( sklearn_lemma_1_2_large, ], ) - -hate_speech_data = transform_dataset( - load_dataset("tweet_eval", "hate"), preprocess_config -) - -run_configs = [ - RunConfig( - run_name="hate-speech-detection", dataset=hate_speech_data[0], train=True - ), - RunConfig( - run_name="hate-speech-detection", dataset=hate_speech_data[1], train=False - ), -] diff --git a/run.py b/run.py index ff45e2a..001f2dd 100644 --- a/run.py +++ b/run.py @@ -1,56 +1,52 @@ -from typing import List +from typing import List, Optional from blocks.pipeline import Pipeline from configs.constants import Const from library.evaluation import calibration_metrics, classification_metrics -from library.examples.hate_speech import ( - cross_dataset_run_configs, - ensemble_pipeline_hf, - huggingface_baseline, - preprocess_config, - tweeteval_hate_speech_run_configs, - vader, -) +from library.examples.hate_speech import (cross_dataset_run_configs, + ensemble_pipeline_hf, + huggingface_baseline, + preprocess_config, + tweeteval_hate_speech_run_configs, + vader) from plugins import WandbConfig, WandbPlugin from runner.runner import Runner -from type import Evaluators, PreprocessConfig, RunConfig +from type import Evaluators, Experiment, PreprocessConfig def run( - pipeline: Pipeline, - preprocess_config: PreprocessConfig, - project_id: str, - run_configs: List[RunConfig], - metrics: Evaluators, + experiments: List[Experiment], + save_remote: Optional[ + bool + ] = None, # If set True all models will try uploading (if configured), if set False it overwrites uploading of any models (even if configured) + remote_logging: Optional[ + bool + ] = None, # Switches on and off all remote logging (eg.: wandb) ) -> None: - for config in run_configs: + for experiment in experiments: logger_plugins = ( [ WandbPlugin( WandbConfig( - project_id=project_id, - run_name=config.run_name + "-" + pipeline.id, + project_id=experiment.project_name, + run_name=experiment.run_name + "-" + experiment.pipeline.id, train=True, ), dict( - run_config=config.get_configs(), + run_config=experiment.get_configs(), preprocess_config=preprocess_config.get_configs(), - pipeline_configs=pipeline.get_configs(), + pipeline_configs=experiment.pipeline.get_configs(), ), ) ] - if config.remote_logging + if remote_logging else [] ) runner = Runner( - config, - pipeline, - data={Const.input_col: config.dataset[Const.input_col]}, - labels=config.dataset[Const.label_col] - if hasattr(config.dataset, Const.label_col) - else None, - evaluators=metrics, + experiment, + data={Const.input_col: experiment.dataset[Const.input_col]}, + labels=experiment.dataset[Const.label_col], plugins=logger_plugins, ) runner.run() @@ -58,9 +54,8 @@ def run( if __name__ == "__main__": - metrics = classification_metrics + calibration_metrics - run( + vader, preprocess_config, project_id="hate-speech-detection", diff --git a/runner/runner.py b/runner/runner.py index e26848b..3e7b4da 100644 --- a/runner/runner.py +++ b/runner/runner.py @@ -1,16 +1,13 @@ import datetime -from copy import deepcopy from typing import Dict, List, Optional, Union import pandas as pd - -from blocks.base import Block, DataSource, Element +from blocks.base import DataSource, Element from blocks.pipeline import Pipeline from configs import Const -from configs.constants import LogConst from plugins import IntegrityChecker, PipelineAnalyser from plugins.base import Plugin -from type import Evaluators, RunConfig +from type import Experiment from utils.flatten import flatten from .evaluation import evaluate @@ -19,7 +16,7 @@ obligatory_plugins = [PipelineAnalyser(), IntegrityChecker()] -def overwrite_model_configs(config: RunConfig, pipeline: Pipeline) -> Pipeline: +def overwrite_model_configs(config: Experiment, pipeline: Pipeline) -> Pipeline: for key, value in vars(config).items(): if value is not None: for model in flatten(pipeline.children()): @@ -64,21 +61,18 @@ def append_id(block, pipeline_id: str): class Runner: def __init__( self, - run_config: RunConfig, - pipeline: Pipeline, + experiment: Experiment, data: Dict[str, Union[pd.Series, List]], labels: pd.Series, - evaluators: Evaluators, plugins: List[Optional[Plugin]], ) -> None: - self.config = run_config + self.experiment = experiment self.run_path = f"{Const.output_runs_path}/{datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S')}/" - self.pipeline = pipeline self.store = Store(data, labels, self.run_path) - self.evaluators = evaluators self.plugins = obligatory_plugins + plugins - self.pipeline = overwrite_model_configs(self.config, self.pipeline) + self.pipeline = experiment.pipeline # maybe we want to deepcopy it first? + self.pipeline = overwrite_model_configs(self.experiment, self.pipeline) self.pipeline = add_position_to_block_names(self.pipeline) self.pipeline = append_pipeline_id(self.pipeline) @@ -90,7 +84,7 @@ def run(self): print("💈 Loading existing models") self.pipeline.load(self.plugins) - if self.config.train: + if self.experiment.train: print("🏋️ Training pipeline") self.pipeline.fit(self.store, self.plugins) @@ -101,7 +95,9 @@ def run(self): preds_probs = self.pipeline.predict(self.store, self.plugins) print("🤔 Evaluating entire pipeline") - stats = evaluate(preds_probs, self.store, self.evaluators, self.run_path) + stats = evaluate( + preds_probs, self.store, self.experiment.metrics, self.run_path + ) self.store.set_stats(Const.final_eval_name, stats) for plugin in self.plugins: diff --git a/type.py b/type.py index 81e7f5d..04828ab 100644 --- a/type.py +++ b/type.py @@ -86,21 +86,19 @@ def get_configs(self): return vars(self) -""" Run Configs """ +""" Experiment """ @dataclass -class RunConfig: +class Experiment: + project_name: str run_name: str # Get's appended as a prefix before the pipeline name train: bool # Weather the run should do training dataset: pd.DataFrame + pipeline: "Pipeline" + metrics: Evaluators + preprocessing_config: PreprocessConfig force_fit: Optional[bool] = None # If set to True will make all models train - save_remote: Optional[ - bool - ] = None # If set True all models will try uploading (if configured), if set False it overwrites uploading of any models (even if configured) - remote_logging: Optional[ - bool - ] = None # Switches on and off all remote logging (eg.: wandb) def get_configs(self): return vars(self) From f32cc948240efe0dc5f55659c98c0ecf8dffc4a2 Mon Sep 17 00:00:00 2001 From: Mark Aron Szulyovszky Date: Sat, 30 Jul 2022 12:46:57 +0200 Subject: [PATCH 2/5] fix(Run): updated after Experiment type was added --- run.py | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/run.py b/run.py index 001f2dd..f800d06 100644 --- a/run.py +++ b/run.py @@ -3,12 +3,10 @@ from blocks.pipeline import Pipeline from configs.constants import Const from library.evaluation import calibration_metrics, classification_metrics -from library.examples.hate_speech import (cross_dataset_run_configs, - ensemble_pipeline_hf, - huggingface_baseline, - preprocess_config, - tweeteval_hate_speech_run_configs, - vader) +from library.examples.hate_speech import ( + preprocess_config, + tweeteval_hate_speech_experiments, +) from plugins import WandbConfig, WandbPlugin from runner.runner import Runner from type import Evaluators, Experiment, PreprocessConfig @@ -35,7 +33,7 @@ def run( ), dict( run_config=experiment.get_configs(), - preprocess_config=preprocess_config.get_configs(), + preprocess_config=experiment.preprocessing_config.get_configs(), pipeline_configs=experiment.pipeline.get_configs(), ), ) @@ -55,10 +53,7 @@ def run( if __name__ == "__main__": run( - - vader, - preprocess_config, - project_id="hate-speech-detection", - run_configs=cross_dataset_run_configs, - metrics=metrics, + tweeteval_hate_speech_experiments, + save_remote=False, + remote_logging=True, ) From 4bd9a4891fecfe5dfb8689f79dc8b64c783afb8a Mon Sep 17 00:00:00 2001 From: Mark Aron Szulyovszky Date: Sat, 30 Jul 2022 12:59:07 +0200 Subject: [PATCH 3/5] feature(Experiments): created the relevant experiments --- library/examples/hate_speech.py | 65 +++++++++++++++++++++++---------- run.py | 11 ++---- 2 files changed, 49 insertions(+), 27 deletions(-) diff --git a/library/examples/hate_speech.py b/library/examples/hate_speech.py index beb32de..1dd7a53 100644 --- a/library/examples/hate_speech.py +++ b/library/examples/hate_speech.py @@ -37,7 +37,7 @@ PreprocessConfig, SKLearnConfig, ) -from utils.flatten import remove_none +from utils.flatten import flatten, remove_none preprocess_config = PreprocessConfig( train_size=-1, @@ -164,28 +164,25 @@ def create_nlp_huggingface_pipeline(autocorrect: bool) -> Pipeline: ) huggingface_baseline = create_nlp_huggingface_pipeline(autocorrect=False) -nlp_sklearn = create_nlp_sklearn_pipeline(autocorrect=False) -nlp_sklearn_autocorrect = create_nlp_sklearn_pipeline(autocorrect=True) +sklearn = create_nlp_sklearn_pipeline(autocorrect=False) +sklearn_autocorrect = create_nlp_sklearn_pipeline(autocorrect=True) -nlp_sklearn_simple = create_nlp_sklearn_pipeline(autocorrect=False) +sklearn_simple = create_nlp_sklearn_pipeline(autocorrect=False) random = Pipeline("random", input_data, [RandomModel("random")]) vader = Pipeline("vader", input_data, [VaderModel("vader")]) -ensemble_pipeline = Ensemble( - "ensemble", [nlp_sklearn, nlp_sklearn_autocorrect, text_statistics_pipeline] +ensemble_all = Ensemble( + "ensemble_all-all", + [sklearn, huggingface_baseline, text_statistics_pipeline, vader], ) -ensemble_pipeline_hf = Ensemble( - "ensemble_hf_sklearn", [nlp_sklearn, huggingface_baseline] -) +ensemble_sklearn_vader = Ensemble("ensemble_sklearn_vader", [sklearn, vader]) -ensemble_pipeline_hf_statistic = Ensemble( - "ensemble_hf_statistic", [text_statistics_pipeline, huggingface_baseline] -) +ensemble_sklearn_hf = Ensemble("ensemble_sklearn_hf", [sklearn, huggingface_baseline]) -ensemble_pipeline_hf_statistic_sklearn = Ensemble( - "ensemble_hf_statistic_sklearn", - [nlp_sklearn, text_statistics_pipeline, huggingface_baseline], +ensemble_hf_vader = Ensemble( + "ensemble_hf_vader", + [huggingface_baseline], ) @@ -241,7 +238,7 @@ def create_nlp_huggingface_pipeline(autocorrect: bool) -> Pipeline: project_name="hate-speech-detection", run_name="tweeteval", dataset=data_tweet_eval_hate_speech[0], - pipeline=ensemble_pipeline, + pipeline=sklearn, preprocessing_config=preprocess_config, metrics=metrics, train=True, @@ -250,7 +247,7 @@ def create_nlp_huggingface_pipeline(autocorrect: bool) -> Pipeline: project_name="hate-speech-detection", run_name="tweeteval", dataset=data_tweet_eval_hate_speech[1], - pipeline=ensemble_pipeline, + pipeline=sklearn, preprocessing_config=preprocess_config, metrics=metrics, train=False, @@ -263,7 +260,7 @@ def create_nlp_huggingface_pipeline(autocorrect: bool) -> Pipeline: project_name="hate-speech-detection-cross-val", run_name="merged_dataset", dataset=data_merged_train, - pipeline=ensemble_pipeline, + pipeline=sklearn, preprocessing_config=preprocess_config, metrics=metrics, train=True, @@ -272,9 +269,39 @@ def create_nlp_huggingface_pipeline(autocorrect: bool) -> Pipeline: project_name="hate-speech-detection-cross-val", run_name="hatecheck", dataset=data_hatecheck[1], - pipeline=ensemble_pipeline, + pipeline=sklearn, preprocessing_config=preprocess_config, metrics=metrics, train=False, ), ] + +pipelines_to_evaluate = [ + sklearn, + sklearn_autocorrect, + random, + vader, + huggingface_baseline, + ensemble_all, + ensemble_hf_vader, + ensemble_sklearn_hf, + ensemble_sklearn_vader, +] + + +def set_pipeline(experiment: Experiment, pipeline: Pipeline) -> Experiment: + experiment.pipeline = pipeline + return experiment + + +all_cross_dataset_experiments = flatten( + [ + [ + [ + set_pipeline(experiment, pipeline) + for experiment in cross_dataset_experiments + ] + ] + for pipeline in pipelines_to_evaluate + ] +) diff --git a/run.py b/run.py index f800d06..f1a0be2 100644 --- a/run.py +++ b/run.py @@ -1,15 +1,10 @@ from typing import List, Optional -from blocks.pipeline import Pipeline from configs.constants import Const -from library.evaluation import calibration_metrics, classification_metrics -from library.examples.hate_speech import ( - preprocess_config, - tweeteval_hate_speech_experiments, -) +from library.examples.hate_speech import all_cross_dataset_experiments from plugins import WandbConfig, WandbPlugin from runner.runner import Runner -from type import Evaluators, Experiment, PreprocessConfig +from type import Experiment def run( @@ -53,7 +48,7 @@ def run( if __name__ == "__main__": run( - tweeteval_hate_speech_experiments, + all_cross_dataset_experiments, save_remote=False, remote_logging=True, ) From 85d48ff575a49910e9626da4bcedb6a8fd257ed3 Mon Sep 17 00:00:00 2001 From: Mark Aron Szulyovszky Date: Sat, 30 Jul 2022 13:52:25 +0200 Subject: [PATCH 4/5] fix(Dependencies): pyarrow needs to be 3.0 or above --- environment.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/environment.yml b/environment.yml index 41c945e..be9b8f6 100644 --- a/environment.yml +++ b/environment.yml @@ -4,6 +4,7 @@ channels: - huggingface dependencies: - python=3.9 + - pyarrow>=3.0 - black - ipython - notebook From 49277e014eda410fab51df4d3af2bc3bda142cde Mon Sep 17 00:00:00 2001 From: Mark Aron Szulyovszky Date: Sat, 30 Jul 2022 13:59:43 +0200 Subject: [PATCH 5/5] fix(Models): preferred_loag_origin should be local --- library/examples/hate_speech.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/library/examples/hate_speech.py b/library/examples/hate_speech.py index 1dd7a53..79b2a4d 100644 --- a/library/examples/hate_speech.py +++ b/library/examples/hate_speech.py @@ -50,7 +50,7 @@ ### Models huggingface_config = HuggingfaceConfig( - preferred_load_origin=LoadOrigin.remote, + preferred_load_origin=LoadOrigin.local, pretrained_model="distilbert-base-uncased", user_name="semy", save_remote=True,