From 15a059d76ba1d1007523e1720b88de1eb24ee69a Mon Sep 17 00:00:00 2001 From: Daniel Young Date: Mon, 29 Apr 2024 17:14:46 -0700 Subject: [PATCH] Updated predictors to load nicer. Still have to test --- .../experiments/predictor_experiments.ipynb | 13 ++-- .../neural_network/neural_net_predictor.py | 71 +++++++------------ use_cases/eluc/predictors/predictor.py | 4 +- .../predictors/sklearn/sklearn_predictor.py | 37 ++++++---- .../prescriptors/nsga2/train_prescriptors.py | 4 +- use_cases/eluc/tests/test_nsga2.py | 2 +- use_cases/eluc/tests/test_predictors.py | 11 ++- 7 files changed, 66 insertions(+), 76 deletions(-) diff --git a/use_cases/eluc/experiments/predictor_experiments.ipynb b/use_cases/eluc/experiments/predictor_experiments.ipynb index e78a781..e7b7e15 100644 --- a/use_cases/eluc/experiments/predictor_experiments.ipynb +++ b/use_cases/eluc/experiments/predictor_experiments.ipynb @@ -82,7 +82,7 @@ " \"train_pct\": 1,\n", " \"step_lr_params\": {\"step_size\": 1, \"gamma\": 0.1},\n", "}\n", - "nnp = NeuralNetPredictor(**nn_config)" + "nnp = NeuralNetPredictor(nn_config)" ] }, { @@ -109,7 +109,7 @@ } ], "source": [ - "nnp.load(\"predictors/neural_network/trained_models/experiment_nn\")\n", + "nnp = NeuralNetPredictor.load(\"predictors/neural_network/trained_models/experiment_nn\")\n", "print(f\"MAE Neural Net: {mean_absolute_error(dataset.test_df[nn_config['label']], nnp.predict(dataset.test_df[nn_config['features']]))}\")" ] }, @@ -130,7 +130,7 @@ " \"features\": constants.DIFF_LAND_USE_COLS,\n", " \"n_jobs\": -1,\n", "}\n", - "linreg = LinearRegressionPredictor(**linreg_config)" + "linreg = LinearRegressionPredictor(linreg_config)" ] }, { @@ -157,7 +157,7 @@ } ], "source": [ - "linreg.load(\"predictors/sklearn/trained_models/experiment_linreg\")\n", + "linreg = LinearRegressionPredictor.load(\"predictors/sklearn/trained_models/experiment_linreg\")\n", "print(f\"MAE Linear Regression: {mean_absolute_error(dataset.test_df['ELUC'], linreg.predict(dataset.test_df[constants.DIFF_LAND_USE_COLS]))}\")" ] }, @@ -175,11 +175,12 @@ "outputs": [], "source": [ "forest_config = {\n", + " \"features\": constants.NN_FEATS,\n", " \"n_jobs\": -1,\n", " \"max_features\": \"sqrt\",\n", " \"random_state\": 42\n", "}\n", - "forest = RandomForestPredictor(features=constants.NN_FEATS, **forest_config)" + "forest = RandomForestPredictor(forest_config)" ] }, { @@ -208,7 +209,7 @@ } ], "source": [ - "forest.load(\"predictors/sklearn/trained_models/experiment_rf\")\n", + "forest = RandomForestPredictor.load(\"predictors/sklearn/trained_models/experiment_rf\")\n", "print(f\"MAE Random Forest: {mean_absolute_error(dataset.test_df['ELUC'], forest.predict(dataset.test_df[constants.NN_FEATS]))}\")" ] }, diff --git a/use_cases/eluc/predictors/neural_network/neural_net_predictor.py b/use_cases/eluc/predictors/neural_network/neural_net_predictor.py index e9f005f..260d1f3 100644 --- a/use_cases/eluc/predictors/neural_network/neural_net_predictor.py +++ b/use_cases/eluc/predictors/neural_network/neural_net_predictor.py @@ -2,7 +2,6 @@ Implementation of predictor.py using a simple feed-forward NeuralNetwork implemented in PyTorch. """ - import copy import json import time @@ -79,64 +78,48 @@ class NeuralNetPredictor(Predictor): in order to take advantage of the linear relationship in the data. Data is automatically standardized and the scaler is saved with the model. """ - def __init__(self, features=None, label=None, hidden_sizes=None, linear_skip=True, - dropout=0, device="mps", epochs=3, batch_size=2048, optim_params=None, - train_pct=1, step_lr_params=None): - # Fix dangerous default param values - if not step_lr_params: - step_lr_params = {"step_size": 1, "gamma": 0.1} - if not hidden_sizes: - hidden_sizes = [4096] - if not optim_params: - optim_params = {} - - self.features=None - self.label=None - - self.set_params(features, label, hidden_sizes, linear_skip, - dropout, device, epochs, batch_size, optim_params, - train_pct, step_lr_params) + def __init__(self, model_config: dict): + + self.features = model_config.get("features", None) + self.label = model_config.get("label", None) + self.hidden_sizes = model_config.get("hidden_sizes", [4096]) + self.linear_skip = model_config.get("linear_skip", True) + self.dropout = model_config.get("dropout", 0) + self.device = model_config.get("device", "cpu") + self.epochs = model_config.get("epochs", 3) + self.batch_size = model_config.get("batch_size", 2048) + self.optim_params = model_config.get("optim_params", {}) + self.train_pct = model_config.get("train_pct", 1) + self.step_lr_params = model_config.get("step_lr_params", {"step_size": 1, "gamma": 0.1}) self.model = None self.scaler = StandardScaler() - def set_params(self, features, label, hidden_sizes, linear_skip, - dropout, device, epochs, batch_size, optim_params, - train_pct, step_lr_params): - """ - Set all the parameters for the neural network. - """ - self.features = features - self.label = label - self.hidden_sizes = hidden_sizes - self.linear_skip = linear_skip - self.dropout = dropout - self.device = device - self.epochs = epochs - self.batch_size = batch_size - self.optim_params = optim_params - self.train_pct = train_pct - self.step_lr_params = step_lr_params - - def load(self, path: str): + @classmethod + def load(cls, path: str): """ Loads a model from a given folder containing a config.json, model.pt, and scaler.joblib. :param path: path to folder containing model files. """ - load_path = Path(path) + if isinstance(path, str): + load_path = Path(path) + else: + load_path = path if not load_path.exists(): raise FileNotFoundError(f"Path {path} does not exist.") # Initialize model with config with open(load_path / "config.json", "r", encoding="utf-8") as file: config = json.load(file) - self.set_params(**config) - self.model = ELUCNeuralNet(len(self.features), self.hidden_sizes, self.linear_skip, self.dropout) - self.model.load_state_dict(torch.load(load_path / "model.pt")) - self.model.to(self.device) - self.model.eval() - self.scaler = joblib.load(load_path / "scaler.joblib") + nnp = cls(config) + + nnp.model = ELUCNeuralNet(len(config["features"]), config["hidden_sizes"], config["linear_skip"], config["dropout"]) + nnp.model.load_state_dict(torch.load(load_path / "model.pt")) + nnp.model.to(config["device"]) + nnp.model.eval() + nnp.scaler = joblib.load(load_path / "scaler.joblib") + return nnp def save(self, path: str): diff --git a/use_cases/eluc/predictors/predictor.py b/use_cases/eluc/predictors/predictor.py index 054899e..e845542 100644 --- a/use_cases/eluc/predictors/predictor.py +++ b/use_cases/eluc/predictors/predictor.py @@ -45,9 +45,9 @@ def save(self, path: str): :param path: path to save the model """ - + @classmethod @abstractmethod - def load(self, path: str): + def load(cls, path: str): """ Loads a model from a path. :param path: path to the model diff --git a/use_cases/eluc/predictors/sklearn/sklearn_predictor.py b/use_cases/eluc/predictors/sklearn/sklearn_predictor.py index 3aef26b..ef19ad6 100644 --- a/use_cases/eluc/predictors/sklearn/sklearn_predictor.py +++ b/use_cases/eluc/predictors/sklearn/sklearn_predictor.py @@ -19,9 +19,10 @@ class SKLearnPredictor(Predictor, ABC): Simple abstract class for sklearn predictors. Keeps track of features fit on and label to predict. """ - def __init__(self, features=None, label=None): - self.features = features - self.label = label + def __init__(self, model_config: dict): + self.features = model_config.get("features", None) + self.label = model_config.get("label", None) + self.model = None def save(self, path: str): @@ -30,7 +31,10 @@ def save(self, path: str): Generates path to folder if it does not exist. :param path: path to folder to save model files. """ - save_path = Path(path) + if isinstance(path, str): + save_path = Path(path) + else: + save_path = path save_path.mkdir(parents=True, exist_ok=True) config = { "features": self.features, @@ -40,7 +44,8 @@ def save(self, path: str): json.dump(config, file) joblib.dump(self.model, save_path / "model.joblib") - def load(self, path): + @classmethod + def load(cls, path): """ Loads saved model and features from a folder. :param path: path to folder to load model files from. @@ -48,9 +53,9 @@ def load(self, path): load_path = Path(path) with open(load_path / "config.json", "r", encoding="utf-8") as file: config = json.load(file) - self.features = config["features"] - self.label = config["label"] - self.model = joblib.load(load_path / "model.joblib") + sklearn_predictor = cls(config) + sklearn_predictor.model = joblib.load(load_path / "model.joblib") + return sklearn_predictor def fit(self, X_train: pd.DataFrame, y_train: pd.Series): """ @@ -83,9 +88,11 @@ class LinearRegressionPredictor(SKLearnPredictor): Simple linear regression predictor. See SKLearnPredictor for more details. """ - def __init__(self, features=None, **kwargs): - super().__init__(features) - self.model = LinearRegression(**kwargs) + def __init__(self, model_config: dict): + super().__init__(model_config) + model_config.pop("features", None) + model_config.pop("label", None) + self.model = LinearRegression(**model_config) class RandomForestPredictor(SKLearnPredictor): """ @@ -93,9 +100,11 @@ class RandomForestPredictor(SKLearnPredictor): See SKLearnPredictor for more details. Overrides save method in order to compress it. """ - def __init__(self, features=None, **kwargs): - super().__init__(features) - self.model = RandomForestRegressor(**kwargs) + def __init__(self, model_config: dict): + super().__init__(model_config) + model_config.pop("features", None) + model_config.pop("label", None) + self.model = RandomForestRegressor(**model_config) def save(self, path: str, compression=0): """ diff --git a/use_cases/eluc/prescriptors/nsga2/train_prescriptors.py b/use_cases/eluc/prescriptors/nsga2/train_prescriptors.py index b7f7cd0..abdfd6f 100644 --- a/use_cases/eluc/prescriptors/nsga2/train_prescriptors.py +++ b/use_cases/eluc/prescriptors/nsga2/train_prescriptors.py @@ -25,9 +25,7 @@ print("Loading predictor...") # TODO: We need to make it so you can load any predictor here - nnp = NeuralNetPredictor() - nnp_path = Path(config["predictor_path"]) - nnp.load(nnp_path) + nnp = NeuralNetPredictor.load(Path(config["predictor_path"])) print("Initializing prescription...") if "seed_dir" in config["evolution_params"].keys(): diff --git a/use_cases/eluc/tests/test_nsga2.py b/use_cases/eluc/tests/test_nsga2.py index 6dcfb20..5114341 100644 --- a/use_cases/eluc/tests/test_nsga2.py +++ b/use_cases/eluc/tests/test_nsga2.py @@ -69,7 +69,7 @@ def setUpClass(cls): fields = get_fields(cls.dummy_data) encoder = ELUCEncoder(fields) - predictor = LinearRegressionPredictor(features=constants.DIFF_LAND_USE_COLS, n_jobs=-1) + predictor = LinearRegressionPredictor(dict(features=constants.DIFF_LAND_USE_COLS, n_jobs=-1)) predictor.fit(cls.dummy_data[constants.DIFF_LAND_USE_COLS], cls.dummy_data["ELUC"]) cls.prescriptor = TorchPrescriptor( 100, diff --git a/use_cases/eluc/tests/test_predictors.py b/use_cases/eluc/tests/test_predictors.py index ddaa9db..c39496e 100644 --- a/use_cases/eluc/tests/test_predictors.py +++ b/use_cases/eluc/tests/test_predictors.py @@ -45,7 +45,7 @@ def test_save_file_names(self): ] for model, config, test_names in zip(self.models, self.configs, save_file_names): with self.subTest(model=model): - predictor = model(**config) + predictor = model(config) predictor.fit(self.dummy_data, self.dummy_target) predictor.save(self.temp_path) files = [f.name for f in self.temp_path.glob("**/*") if f.is_file()] @@ -61,13 +61,12 @@ def test_loaded_same(self): for model, config in zip(self.models, self.configs): with self.subTest(model=model): - predictor = model(**config) + predictor = model(config) predictor.fit(self.dummy_data.iloc[:2], self.dummy_target.iloc[:2]) output = predictor.predict(self.dummy_data.iloc[2:]) predictor.save(self.temp_path) - loaded = model(**config) - loaded.load(self.temp_path) + loaded = model.load(self.temp_path) loaded_output = loaded.predict(self.dummy_data.iloc[2:]) self.assertTrue((output == loaded_output).all().all()) # Pandas is so annoying why is this necessary? @@ -91,7 +90,7 @@ def test_single_input(self): """ Tests the neural net with a single input. """ - predictor = NeuralNetPredictor(hidden_sizes=[4], epochs=1, batch_size=1, device="cpu") + predictor = NeuralNetPredictor(dict(hidden_sizes=[4], epochs=1, batch_size=1, device="cpu")) train_data = pd.DataFrame({"a": [1], "b": [2], "c": [3], "label": [4]}) test_data = pd.DataFrame({"a": [4], "b": [5], "c": [6]}) @@ -104,7 +103,7 @@ def test_multi_input(self): """ Tests the neural net with multiple inputs. """ - predictor = NeuralNetPredictor(hidden_sizes=[4], epochs=1, batch_size=1, device="cpu") + predictor = NeuralNetPredictor(dict(hidden_sizes=[4], epochs=1, batch_size=1, device="cpu")) train_data = pd.DataFrame({"a": [1, 2], "b": [2, 3], "c": [3, 4], "label": [4, 5]}) test_data = pd.DataFrame({"a": [4, 5], "b": [5, 6], "c": [6, 7]})