From 66b1c104842460e524df964efe9c3b515a3a8784 Mon Sep 17 00:00:00 2001 From: DonHaul Date: Wed, 9 Oct 2024 11:05:31 +0200 Subject: [PATCH] its a test --- inspire_classifier/cli.py | 34 +++++++++++------------ inspire_classifier/domain/models.py | 5 +++- inspire_classifier/domain/preprocessor.py | 8 ++++-- scripts/train_classifier.py | 4 ++- tests/integration/conftest.py | 9 +++--- tests/integration/test_classifier_api.py | 20 ++++++++----- 6 files changed, 46 insertions(+), 34 deletions(-) diff --git a/inspire_classifier/cli.py b/inspire_classifier/cli.py index 3eda875..0d930fe 100644 --- a/inspire_classifier/cli.py +++ b/inspire_classifier/cli.py @@ -43,11 +43,10 @@ def inspire_classifier(): "-b", "--base-path", type=click.Path(exists=True), required=False, nargs=1 ) def predict(title, abstract, base_path): - with click_spinner.spinner(): - with current_app.app_context(): - if base_path: - current_app.config["CLASSIFIER_BASE_PATH"] = base_path - click.echo(predict_coreness(title, abstract)) + with click_spinner.spinner(),current_app.app_context(): + if base_path: + current_app.config["CLASSIFIER_BASE_PATH"] = base_path + click.echo(predict_coreness(title, abstract)) @inspire_classifier.command("train") @@ -58,19 +57,18 @@ def predict(title, abstract, base_path): "-b", "--base-path", type=click.Path(exists=True), required=False, nargs=1 ) def train_classifier(language_model_epochs, classifier_epochs, base_path): - with click_spinner.spinner(): - with current_app.app_context(): - if language_model_epochs: - current_app.config["CLASSIFIER_LANGUAGE_MODEL_CYCLE_LENGTH"] = ( - language_model_epochs - ) - if classifier_epochs: - current_app.config["CLASSIFIER_CLASSIFIER_CYCLE_LENGTH"] = ( - classifier_epochs - ) - if base_path: - current_app.config["CLASSIFIER_BASE_PATH"] = base_path - train() + with click_spinner.spinner(),current_app.app_context(): + if language_model_epochs: + current_app.config["CLASSIFIER_LANGUAGE_MODEL_CYCLE_LENGTH"] = ( + language_model_epochs + ) + if classifier_epochs: + current_app.config["CLASSIFIER_CLASSIFIER_CYCLE_LENGTH"] = ( + classifier_epochs + ) + if base_path: + current_app.config["CLASSIFIER_BASE_PATH"] = base_path + train() @inspire_classifier.command("validate") diff --git a/inspire_classifier/domain/models.py b/inspire_classifier/domain/models.py index cb57b9f..302b639 100644 --- a/inspire_classifier/domain/models.py +++ b/inspire_classifier/domain/models.py @@ -124,8 +124,11 @@ def initialize_learner( self, dropout_multiplier=0.5, weight_decay=1e-6, - learning_rates=np.array([1e-4, 1e-4, 1e-4, 1e-3, 1e-2]), + learning_rates=None, ): + if learning_rates is None: + learning_rates = np.array([1e-4, 1e-4, 1e-4, 1e-3, 1e-2]) + self.learner = text_classifier_learner( self.dataloader, AWD_LSTM, diff --git a/inspire_classifier/domain/preprocessor.py b/inspire_classifier/domain/preprocessor.py index d867949..d87a2bb 100644 --- a/inspire_classifier/domain/preprocessor.py +++ b/inspire_classifier/domain/preprocessor.py @@ -31,9 +31,11 @@ def split_and_save_data_for_training(dataframe_path, dest_dir, val_fraction=0.1): """ Args: - dataframe_path: The path to the pandas dataframe containing the records. The dataframe should have one - column containing the title and abstract text appended (title + abstract). The second - column should contain the label as an integer (0: Rejected, 1: Non-Core, 2: Core). + dataframe_path: The path to the pandas dataframe containing the records. + The dataframe should have one column containing the title and + abstract text appended (title + abstract). The second column + should contain the label as an integer + (0: Rejected, 1: Non-Core, 2: Core). dest_dir: Directory to save the training/validation csv. val_fraction: the fraction of data to use as the validation set. """ diff --git a/scripts/train_classifier.py b/scripts/train_classifier.py index b5e69cb..509e138 100644 --- a/scripts/train_classifier.py +++ b/scripts/train_classifier.py @@ -46,7 +46,9 @@ def train_classifier( print("-----------------") os.system( - f"inspire-classifier train -b classifier --classifier-epochs {number_of_classifier_epochs} --language-model-epochs {number_of_lanuage_model_epochs}" + f"inspire-classifier train -b classifier " + f"--classifier-epochs {number_of_classifier_epochs} " + f"--language-model-epochs {number_of_lanuage_model_epochs}" ) print("training finished successfully!") os.system( diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index 5e43453..060feb4 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -55,9 +55,10 @@ class Mock_Learner(Learner): """ Mocks the fit method of the Learner. - This is done to reduce the model training time during testing by making the fit run once (as opposed to 2 times and - 3 times for the LanguageModel and Classifier respectively). It stores the result of the first run and then returns - the same result for the other times fit is run. + This is done to reduce the model training time during testing by making the fit + run once (as opposed to 2 times and 3 times for the LanguageModel and Classifier + respectively). It stores the result of the first run and then returns the same + result for the other times fit is run. """ def fit(self, *args, **kwargs): @@ -70,7 +71,7 @@ def fit(self, *args, **kwargs): @pytest.fixture(scope="session") @patch("fastai.text.learner.text_classifier_learner", Mock_Learner) -def trained_pipeline(app, tmp_path_factory): +def _trained_pipeline(app, tmp_path_factory): app.config["CLASSIFIER_BASE_PATH"] = tmp_path_factory.getbasetemp() create_directories() shutil.copy( diff --git a/tests/integration/test_classifier_api.py b/tests/integration/test_classifier_api.py index e5b4490..f22f4a0 100644 --- a/tests/integration/test_classifier_api.py +++ b/tests/integration/test_classifier_api.py @@ -24,6 +24,7 @@ from math import isclose import pandas as pd +import pytest from inspire_classifier.api import predict_coreness from inspire_classifier.utils import path_for @@ -42,11 +43,13 @@ " numerical range.") -def test_create_directories(trained_pipeline): +@pytest.mark.usefixtures("_trained_pipeline") +def test_create_directories(): assert path_for("classifier_model").exists() -def test_preprocess_and_save_data(app, trained_pipeline): +@pytest.mark.usefixtures("_trained_pipeline") +def test_preprocess_and_save_data(app): dataframe = pd.read_pickle(path_for("dataframe")) training_valid__csv = pd.read_csv(path_for("train_valid_data")) @@ -64,8 +67,8 @@ def test_preprocess_and_save_data(app, trained_pipeline): abs_tol=1, ) - -def test_vocab(app, trained_pipeline): +@pytest.mark.usefixtures("_trained_pipeline") +def test_vocab(app): with open(path_for("data_itos"), "rb") as file: data_itos = pickle.load(file) # For performance when using mixed precision, the vocabulary is always made of @@ -78,15 +81,18 @@ def test_vocab(app, trained_pipeline): assert len(data_itos) == adjusted_max_vocab -def test_save_language_model(trained_pipeline): +@pytest.mark.usefixtures("_trained_pipeline") +def test_save_language_model(): assert path_for("finetuned_language_model_encoder").exists() -def test_train_and_save_classifier(trained_pipeline): +@pytest.mark.usefixtures("_trained_pipeline") +def test_train_and_save_classifier(): assert path_for("trained_classifier").exists() -def test_predict_coreness(trained_pipeline): +@pytest.mark.usefixtures("_trained_pipeline") +def test_predict_coreness(): assert path_for("data_itos").exists() assert path_for("trained_classifier").exists() output_dict = predict_coreness(title=TEST_TITLE, abstract=TEST_ABSTRACT)