Skip to content

Commit

Permalink
disallow negative coefficients
Browse files Browse the repository at this point in the history
  • Loading branch information
JrtPec committed Feb 23, 2024
1 parent 49e1aa6 commit 100ada6
Show file tree
Hide file tree
Showing 4 changed files with 45 additions and 15 deletions.
2 changes: 1 addition & 1 deletion openenergyid/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""Open Energy ID Python SDK."""

__version__ = "0.1.8"
__version__ = "0.1.9"

from .enums import Granularity
from .models import TimeSeries
Expand Down
1 change: 1 addition & 0 deletions openenergyid/mvlr/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ def find_best_mvlr(
granularity=granularity,
allow_negative_predictions=data.allow_negative_predictions,
single_use_exog_prefixes=data.single_use_exog_prefixes,
exogs__disallow_negative_coefficient=data.get_disallowed_negative_coefficients(),
)
mvlr.do_analysis()
if mvlr.validate(
Expand Down
16 changes: 16 additions & 0 deletions openenergyid/mvlr/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,11 @@ class IndependentVariableInput(BaseModel):
"Eg. `HDD_16.5` will be Heating Degree Days with a base temperature of 16.5°C, "
"`CDD_0` will be Cooling Degree Days with a base temperature of 0°C.",
)
allow_negative_coefficient: bool = Field(
default=True,
alias="allowNegativeCoefficient",
description="Whether the coefficient can be negative.",
)


class MultiVariableRegressionInput(BaseModel):
Expand Down Expand Up @@ -123,6 +128,17 @@ def data_frame(self) -> pd.DataFrame:

return frame

def get_disallowed_negative_coefficients(self) -> List[str]:
"""Get independent variables that are not allowed to have a negative coefficient."""
result = []
for iv in self.independent_variables: # pylint: disable=not-an-iterable
if iv.name == COLUMN_TEMPERATUREEQUIVALENT and iv.variants is not None:
if not iv.allow_negative_coefficient:
result.extend(iv.variants)
elif not iv.allow_negative_coefficient:
result.append(iv.name)
return result


######################
# MVLR Result Models #
Expand Down
41 changes: 27 additions & 14 deletions openenergyid/mvlr/mvlr.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ def __init__(
allow_negative_predictions: bool = False,
granularity: Granularity = None,
single_use_exog_prefixes: list[str] = None,
exogs__disallow_negative_coefficient: list[str] = None,
):
"""Parameters
----------
Expand Down Expand Up @@ -72,6 +73,8 @@ def __init__(
will be used as an independent variable.
Once the best fit using a variable with a given prefix is found, the other variables with the same
prefix will not be used as independent variables.
exogs__disallow_negative_coefficient : list of str, default=None
List of variable names for which the coefficient is not allowed to be negative.
"""
self.data = data.copy()
if y not in self.data.columns:
Expand All @@ -87,6 +90,7 @@ def __init__(
self.allow_negative_predictions = allow_negative_predictions
self.granularity = granularity
self.single_use_exog_prefixes = single_use_exog_prefixes
self.exogs__disallow_negative_coefficient = exogs__disallow_negative_coefficient
self._fit = None
self._list_of_fits = []
self.list_of_cverrors = []
Expand Down Expand Up @@ -161,6 +165,15 @@ def _do_analysis_no_cross_validation(self):
ref_fit.model.formula.rhs_termlist + [term],
)
fit = fm.ols(model_desc, data=self.data).fit()

# Check if the coefficient of the variable is allowed to be negative
if (
self.exogs__disallow_negative_coefficient is not None
and x in self.exogs__disallow_negative_coefficient
and fit.params[x] < 0
):
continue

if fit.bic < best_bic:
best_bic = fit.bic
best_fit = fit
Expand All @@ -174,20 +187,20 @@ def _do_analysis_no_cross_validation(self):
ref_fit.model.formula.rhs_termlist,
):
break
else:
self._list_of_fits.append(best_fit)
all_model_terms_dict.pop(best_x)

# Check if `best_x` starts with a prefix that should only be used once
# If so, remove all other variables with the same prefix from the list of candidates
if self.single_use_exog_prefixes:
for prefix in self.single_use_exog_prefixes:
if best_x.startswith(prefix):
all_model_terms_dict = {
k: v
for k, v in all_model_terms_dict.items()
if not k.startswith(prefix)
}

self._list_of_fits.append(best_fit)
all_model_terms_dict.pop(best_x)

# Check if `best_x` starts with a prefix that should only be used once
# If so, remove all other variables with the same prefix from the list of candidates
if self.single_use_exog_prefixes:
for prefix in self.single_use_exog_prefixes:
if best_x.startswith(prefix):
all_model_terms_dict = {
k: v
for k, v in all_model_terms_dict.items()
if not k.startswith(prefix)
}

self._fit = self._list_of_fits[-1]

Expand Down

0 comments on commit 100ada6

Please sign in to comment.