disallow negative coefficients

EnergieID · Feb 23, 2024 · 100ada6 · 100ada6
1 parent 49e1aa6
commit 100ada6
Show file tree

Hide file tree

Showing 4 changed files with 45 additions and 15 deletions.
diff --git a/openenergyid/__init__.py b/openenergyid/__init__.py
@@ -1,6 +1,6 @@
 """Open Energy ID Python SDK."""
 
-__version__ = "0.1.8"
+__version__ = "0.1.9"
 
 from .enums import Granularity
 from .models import TimeSeries

diff --git a/openenergyid/mvlr/main.py b/openenergyid/mvlr/main.py
@@ -18,6 +18,7 @@ def find_best_mvlr(
             granularity=granularity,
             allow_negative_predictions=data.allow_negative_predictions,
             single_use_exog_prefixes=data.single_use_exog_prefixes,
+            exogs__disallow_negative_coefficient=data.get_disallowed_negative_coefficients(),
         )
         mvlr.do_analysis()
         if mvlr.validate(

diff --git a/openenergyid/mvlr/models.py b/openenergyid/mvlr/models.py
@@ -51,6 +51,11 @@ class IndependentVariableInput(BaseModel):
         "Eg. `HDD_16.5` will be Heating Degree Days with a base temperature of 16.5°C, "
         "`CDD_0` will be Cooling Degree Days with a base temperature of 0°C.",
     )
+    allow_negative_coefficient: bool = Field(
+        default=True,
+        alias="allowNegativeCoefficient",
+        description="Whether the coefficient can be negative.",
+    )
 
 
 class MultiVariableRegressionInput(BaseModel):
@@ -123,6 +128,17 @@ def data_frame(self) -> pd.DataFrame:
 
         return frame
 
+    def get_disallowed_negative_coefficients(self) -> List[str]:
+        """Get independent variables that are not allowed to have a negative coefficient."""
+        result = []
+        for iv in self.independent_variables:  # pylint: disable=not-an-iterable
+            if iv.name == COLUMN_TEMPERATUREEQUIVALENT and iv.variants is not None:
+                if not iv.allow_negative_coefficient:
+                    result.extend(iv.variants)
+            elif not iv.allow_negative_coefficient:
+                result.append(iv.name)
+        return result
+
 
 ######################
 # MVLR Result Models #

diff --git a/openenergyid/mvlr/mvlr.py b/openenergyid/mvlr/mvlr.py
@@ -41,6 +41,7 @@ def __init__(
         allow_negative_predictions: bool = False,
         granularity: Granularity = None,
         single_use_exog_prefixes: list[str] = None,
+        exogs__disallow_negative_coefficient: list[str] = None,
     ):
         """Parameters
         ----------
@@ -72,6 +73,8 @@ def __init__(
             will be used as an independent variable.
             Once the best fit using a variable with a given prefix is found, the other variables with the same
             prefix will not be used as independent variables.
+        exogs__disallow_negative_coefficient : list of str, default=None
+            List of variable names for which the coefficient is not allowed to be negative.
         """
         self.data = data.copy()
         if y not in self.data.columns:
@@ -87,6 +90,7 @@ def __init__(
         self.allow_negative_predictions = allow_negative_predictions
         self.granularity = granularity
         self.single_use_exog_prefixes = single_use_exog_prefixes
+        self.exogs__disallow_negative_coefficient = exogs__disallow_negative_coefficient
         self._fit = None
         self._list_of_fits = []
         self.list_of_cverrors = []
@@ -161,6 +165,15 @@ def _do_analysis_no_cross_validation(self):
                     ref_fit.model.formula.rhs_termlist + [term],
                 )
                 fit = fm.ols(model_desc, data=self.data).fit()
+
+                # Check if the coefficient of the variable is allowed to be negative
+                if (
+                    self.exogs__disallow_negative_coefficient is not None
+                    and x in self.exogs__disallow_negative_coefficient
+                    and fit.params[x] < 0
+                ):
+                    continue
+
                 if fit.bic < best_bic:
                     best_bic = fit.bic
                     best_fit = fit
@@ -174,20 +187,20 @@ def _do_analysis_no_cross_validation(self):
                 ref_fit.model.formula.rhs_termlist,
             ):
                 break
-            else:
-                self._list_of_fits.append(best_fit)
-                all_model_terms_dict.pop(best_x)
-
-                # Check if `best_x` starts with a prefix that should only be used once
-                # If so, remove all other variables with the same prefix from the list of candidates
-                if self.single_use_exog_prefixes:
-                    for prefix in self.single_use_exog_prefixes:
-                        if best_x.startswith(prefix):
-                            all_model_terms_dict = {
-                                k: v
-                                for k, v in all_model_terms_dict.items()
-                                if not k.startswith(prefix)
-                            }
+
+            self._list_of_fits.append(best_fit)
+            all_model_terms_dict.pop(best_x)
+
+            # Check if `best_x` starts with a prefix that should only be used once
+            # If so, remove all other variables with the same prefix from the list of candidates
+            if self.single_use_exog_prefixes:
+                for prefix in self.single_use_exog_prefixes:
+                    if best_x.startswith(prefix):
+                        all_model_terms_dict = {
+                            k: v
+                            for k, v in all_model_terms_dict.items()
+                            if not k.startswith(prefix)
+                        }
 
         self._fit = self._list_of_fits[-1]