From 100ada611e39525863d6f116fcc5f7f2db799d91 Mon Sep 17 00:00:00 2001 From: Jan Pecinovsky Date: Fri, 23 Feb 2024 11:22:35 +0000 Subject: [PATCH] disallow negative coefficients --- openenergyid/__init__.py | 2 +- openenergyid/mvlr/main.py | 1 + openenergyid/mvlr/models.py | 16 +++++++++++++++ openenergyid/mvlr/mvlr.py | 41 ++++++++++++++++++++++++------------- 4 files changed, 45 insertions(+), 15 deletions(-) diff --git a/openenergyid/__init__.py b/openenergyid/__init__.py index 40660c6..93d1dea 100644 --- a/openenergyid/__init__.py +++ b/openenergyid/__init__.py @@ -1,6 +1,6 @@ """Open Energy ID Python SDK.""" -__version__ = "0.1.8" +__version__ = "0.1.9" from .enums import Granularity from .models import TimeSeries diff --git a/openenergyid/mvlr/main.py b/openenergyid/mvlr/main.py index aa855e2..1f7cb13 100644 --- a/openenergyid/mvlr/main.py +++ b/openenergyid/mvlr/main.py @@ -18,6 +18,7 @@ def find_best_mvlr( granularity=granularity, allow_negative_predictions=data.allow_negative_predictions, single_use_exog_prefixes=data.single_use_exog_prefixes, + exogs__disallow_negative_coefficient=data.get_disallowed_negative_coefficients(), ) mvlr.do_analysis() if mvlr.validate( diff --git a/openenergyid/mvlr/models.py b/openenergyid/mvlr/models.py index e4e80d1..34ac7a5 100644 --- a/openenergyid/mvlr/models.py +++ b/openenergyid/mvlr/models.py @@ -51,6 +51,11 @@ class IndependentVariableInput(BaseModel): "Eg. `HDD_16.5` will be Heating Degree Days with a base temperature of 16.5°C, " "`CDD_0` will be Cooling Degree Days with a base temperature of 0°C.", ) + allow_negative_coefficient: bool = Field( + default=True, + alias="allowNegativeCoefficient", + description="Whether the coefficient can be negative.", + ) class MultiVariableRegressionInput(BaseModel): @@ -123,6 +128,17 @@ def data_frame(self) -> pd.DataFrame: return frame + def get_disallowed_negative_coefficients(self) -> List[str]: + """Get independent variables that are not allowed to have a negative coefficient.""" + result = [] + for iv in self.independent_variables: # pylint: disable=not-an-iterable + if iv.name == COLUMN_TEMPERATUREEQUIVALENT and iv.variants is not None: + if not iv.allow_negative_coefficient: + result.extend(iv.variants) + elif not iv.allow_negative_coefficient: + result.append(iv.name) + return result + ###################### # MVLR Result Models # diff --git a/openenergyid/mvlr/mvlr.py b/openenergyid/mvlr/mvlr.py index 8ec9ea6..ed6918a 100644 --- a/openenergyid/mvlr/mvlr.py +++ b/openenergyid/mvlr/mvlr.py @@ -41,6 +41,7 @@ def __init__( allow_negative_predictions: bool = False, granularity: Granularity = None, single_use_exog_prefixes: list[str] = None, + exogs__disallow_negative_coefficient: list[str] = None, ): """Parameters ---------- @@ -72,6 +73,8 @@ def __init__( will be used as an independent variable. Once the best fit using a variable with a given prefix is found, the other variables with the same prefix will not be used as independent variables. + exogs__disallow_negative_coefficient : list of str, default=None + List of variable names for which the coefficient is not allowed to be negative. """ self.data = data.copy() if y not in self.data.columns: @@ -87,6 +90,7 @@ def __init__( self.allow_negative_predictions = allow_negative_predictions self.granularity = granularity self.single_use_exog_prefixes = single_use_exog_prefixes + self.exogs__disallow_negative_coefficient = exogs__disallow_negative_coefficient self._fit = None self._list_of_fits = [] self.list_of_cverrors = [] @@ -161,6 +165,15 @@ def _do_analysis_no_cross_validation(self): ref_fit.model.formula.rhs_termlist + [term], ) fit = fm.ols(model_desc, data=self.data).fit() + + # Check if the coefficient of the variable is allowed to be negative + if ( + self.exogs__disallow_negative_coefficient is not None + and x in self.exogs__disallow_negative_coefficient + and fit.params[x] < 0 + ): + continue + if fit.bic < best_bic: best_bic = fit.bic best_fit = fit @@ -174,20 +187,20 @@ def _do_analysis_no_cross_validation(self): ref_fit.model.formula.rhs_termlist, ): break - else: - self._list_of_fits.append(best_fit) - all_model_terms_dict.pop(best_x) - - # Check if `best_x` starts with a prefix that should only be used once - # If so, remove all other variables with the same prefix from the list of candidates - if self.single_use_exog_prefixes: - for prefix in self.single_use_exog_prefixes: - if best_x.startswith(prefix): - all_model_terms_dict = { - k: v - for k, v in all_model_terms_dict.items() - if not k.startswith(prefix) - } + + self._list_of_fits.append(best_fit) + all_model_terms_dict.pop(best_x) + + # Check if `best_x` starts with a prefix that should only be used once + # If so, remove all other variables with the same prefix from the list of candidates + if self.single_use_exog_prefixes: + for prefix in self.single_use_exog_prefixes: + if best_x.startswith(prefix): + all_model_terms_dict = { + k: v + for k, v in all_model_terms_dict.items() + if not k.startswith(prefix) + } self._fit = self._list_of_fits[-1]