its a test

inspirehep · Oct 9, 2024 · 66b1c10 · 66b1c10
1 parent b24bb59
commit 66b1c10
Show file tree

Hide file tree

Showing 6 changed files with 46 additions and 34 deletions.
diff --git a/inspire_classifier/cli.py b/inspire_classifier/cli.py
@@ -43,11 +43,10 @@ def inspire_classifier():
     "-b", "--base-path", type=click.Path(exists=True), required=False, nargs=1
 )
 def predict(title, abstract, base_path):
-    with click_spinner.spinner():
-        with current_app.app_context():
-            if base_path:
-                current_app.config["CLASSIFIER_BASE_PATH"] = base_path
-            click.echo(predict_coreness(title, abstract))
+    with click_spinner.spinner(),current_app.app_context():
+        if base_path:
+            current_app.config["CLASSIFIER_BASE_PATH"] = base_path
+        click.echo(predict_coreness(title, abstract))
 
 
 @inspire_classifier.command("train")
@@ -58,19 +57,18 @@ def predict(title, abstract, base_path):
     "-b", "--base-path", type=click.Path(exists=True), required=False, nargs=1
 )
 def train_classifier(language_model_epochs, classifier_epochs, base_path):
-    with click_spinner.spinner():
-        with current_app.app_context():
-            if language_model_epochs:
-                current_app.config["CLASSIFIER_LANGUAGE_MODEL_CYCLE_LENGTH"] = (
-                    language_model_epochs
-                )
-            if classifier_epochs:
-                current_app.config["CLASSIFIER_CLASSIFIER_CYCLE_LENGTH"] = (
-                    classifier_epochs
-                )
-            if base_path:
-                current_app.config["CLASSIFIER_BASE_PATH"] = base_path
-            train()
+    with click_spinner.spinner(),current_app.app_context():
+        if language_model_epochs:
+            current_app.config["CLASSIFIER_LANGUAGE_MODEL_CYCLE_LENGTH"] = (
+                language_model_epochs
+            )
+        if classifier_epochs:
+            current_app.config["CLASSIFIER_CLASSIFIER_CYCLE_LENGTH"] = (
+                classifier_epochs
+            )
+        if base_path:
+            current_app.config["CLASSIFIER_BASE_PATH"] = base_path
+        train()
 
 
 @inspire_classifier.command("validate")

diff --git a/inspire_classifier/domain/models.py b/inspire_classifier/domain/models.py
@@ -124,8 +124,11 @@ def initialize_learner(
         self,
         dropout_multiplier=0.5,
         weight_decay=1e-6,
-        learning_rates=np.array([1e-4, 1e-4, 1e-4, 1e-3, 1e-2]),
+        learning_rates=None,
     ):
+        if learning_rates is None:
+            learning_rates = np.array([1e-4, 1e-4, 1e-4, 1e-3, 1e-2])
+
         self.learner = text_classifier_learner(
             self.dataloader,
             AWD_LSTM,

diff --git a/inspire_classifier/domain/preprocessor.py b/inspire_classifier/domain/preprocessor.py
@@ -31,9 +31,11 @@
 def split_and_save_data_for_training(dataframe_path, dest_dir, val_fraction=0.1):
     """
     Args:
-        dataframe_path: The path to the pandas dataframe containing the records. The dataframe should have one
-                        column containing the title and abstract text appended (title + abstract). The second
-                        column should contain the label as an integer (0: Rejected, 1: Non-Core, 2: Core).
+        dataframe_path: The path to the pandas dataframe containing the records.
+                        The dataframe should have one column containing the title and
+                        abstract text appended (title + abstract). The second column
+                         should contain the label as an integer
+                         (0: Rejected, 1: Non-Core, 2: Core).
         dest_dir: Directory to save the training/validation csv.
         val_fraction: the fraction of data to use as the validation set.
     """

diff --git a/scripts/train_classifier.py b/scripts/train_classifier.py
@@ -46,7 +46,9 @@ def train_classifier(
     print("-----------------")
 
     os.system(
-        f"inspire-classifier train -b classifier --classifier-epochs {number_of_classifier_epochs} --language-model-epochs {number_of_lanuage_model_epochs}"
+        f"inspire-classifier train -b classifier "
+        f"--classifier-epochs {number_of_classifier_epochs} "
+        f"--language-model-epochs {number_of_lanuage_model_epochs}"
     )
     print("training finished successfully!")
     os.system(

diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py
@@ -55,9 +55,10 @@ class Mock_Learner(Learner):
     """
     Mocks the fit method of the Learner.
 
-    This is done to reduce the model training time during testing by making the fit run once (as opposed to 2 times and
-    3 times for the LanguageModel and Classifier respectively). It stores the result of the first run and then returns
-    the same result for the other times fit is run.
+    This is done to reduce the model training time during testing by making the fit
+    run once (as opposed to 2 times and 3 times for the LanguageModel and Classifier
+    respectively). It stores the result of the first run and then returns the same
+    result for the other times fit is run.
     """
 
     def fit(self, *args, **kwargs):
@@ -70,7 +71,7 @@ def fit(self, *args, **kwargs):
 
 @pytest.fixture(scope="session")
 @patch("fastai.text.learner.text_classifier_learner", Mock_Learner)
-def trained_pipeline(app, tmp_path_factory):
+def _trained_pipeline(app, tmp_path_factory):
     app.config["CLASSIFIER_BASE_PATH"] = tmp_path_factory.getbasetemp()
     create_directories()
     shutil.copy(

diff --git a/tests/integration/test_classifier_api.py b/tests/integration/test_classifier_api.py
@@ -24,6 +24,7 @@
 from math import isclose
 
 import pandas as pd
+import pytest
 
 from inspire_classifier.api import predict_coreness
 from inspire_classifier.utils import path_for
@@ -42,11 +43,13 @@
      " numerical range.")
 
 
-def test_create_directories(trained_pipeline):
+@pytest.mark.usefixtures("_trained_pipeline")
+def test_create_directories():
     assert path_for("classifier_model").exists()
 
 
-def test_preprocess_and_save_data(app, trained_pipeline):
+@pytest.mark.usefixtures("_trained_pipeline")
+def test_preprocess_and_save_data(app):
     dataframe = pd.read_pickle(path_for("dataframe"))
 
     training_valid__csv = pd.read_csv(path_for("train_valid_data"))
@@ -64,8 +67,8 @@ def test_preprocess_and_save_data(app, trained_pipeline):
         abs_tol=1,
     )
 
-
-def test_vocab(app, trained_pipeline):
+@pytest.mark.usefixtures("_trained_pipeline")
+def test_vocab(app):
     with open(path_for("data_itos"), "rb") as file:
         data_itos = pickle.load(file)
     # For performance when using mixed precision, the vocabulary is always made of
@@ -78,15 +81,18 @@ def test_vocab(app, trained_pipeline):
     assert len(data_itos) == adjusted_max_vocab
 
 
-def test_save_language_model(trained_pipeline):
+@pytest.mark.usefixtures("_trained_pipeline")
+def test_save_language_model():
     assert path_for("finetuned_language_model_encoder").exists()
 
 
-def test_train_and_save_classifier(trained_pipeline):
+@pytest.mark.usefixtures("_trained_pipeline")
+def test_train_and_save_classifier():
     assert path_for("trained_classifier").exists()
 
 
-def test_predict_coreness(trained_pipeline):
+@pytest.mark.usefixtures("_trained_pipeline")
+def test_predict_coreness():
     assert path_for("data_itos").exists()
     assert path_for("trained_classifier").exists()
     output_dict = predict_coreness(title=TEST_TITLE, abstract=TEST_ABSTRACT)