Skip to content

Commit

Permalink
Merge branch 'AUTOML-28-rd' into 'master'
Browse files Browse the repository at this point in the history
Added report deco fixes

See merge request ai-lab-pmo/mltools/automl/LightAutoML!29
  • Loading branch information
dev-rinchin committed Dec 13, 2024
2 parents 67e2e76 + ae8175c commit 0371973
Show file tree
Hide file tree
Showing 11 changed files with 89 additions and 63 deletions.
10 changes: 5 additions & 5 deletions examples/tutorials/Tutorial_8_CV_preset.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2289,7 +2289,7 @@
}
],
"source": [
"accuracy = (OOFs == preds['label'].map(automl.reader.class_mapping)).mean()\n",
"accuracy = (OOFs == preds['label'].map(automl.reader.target_mapping)).mean()\n",
"print(f'Out-of-fold accuracy: {accuracy}')"
]
},
Expand Down Expand Up @@ -2317,7 +2317,7 @@
}
],
"source": [
"cf_matrix = confusion_matrix(preds['label'].map(automl.reader.class_mapping), \n",
"cf_matrix = confusion_matrix(preds['label'].map(automl.reader.target_mapping), \n",
" OOFs)\n",
"\n",
"plt.figure(figsize = (10, 10))\n",
Expand All @@ -2328,8 +2328,8 @@
"ax.set_xlabel('\\nPredicted Values')\n",
"ax.set_ylabel('Actual Values ');\n",
"\n",
"inverse_class_mapping = {y: x for x,y in automl.reader.class_mapping.items()}\n",
"labels = [inverse_class_mapping[i] for i in range(len(inverse_class_mapping))]\n",
"inverse_target_mapping = {y: x for x,y in automl.reader.target_mapping.items()}\n",
"labels = [inverse_target_mapping[i] for i in range(len(inverse_target_mapping))]\n",
"ax.xaxis.set_ticklabels(labels, rotation = 90)\n",
"ax.yaxis.set_ticklabels(labels, rotation = 0)\n",
"\n",
Expand Down Expand Up @@ -2924,7 +2924,7 @@
}
],
"source": [
"TEs = pd.Series(np.argmax(sub[['pred_' + str(i) for i in range(10)]].values, axis = 1)).map(inverse_class_mapping)\n",
"TEs = pd.Series(np.argmax(sub[['pred_' + str(i) for i in range(10)]].values, axis = 1)).map(inverse_target_mapping)\n",
"TEs"
]
},
Expand Down
2 changes: 1 addition & 1 deletion lightautoml/addons/interpretation/lime.py
Original file line number Diff line number Diff line change
Expand Up @@ -300,7 +300,7 @@ def kernel(d, kernel_width):
self.tokenizer = _tokenizer_by_lang[lang](is_stemmer=False)
self.distance_metric = distance_metric

class_names = automl.reader.class_mapping
class_names = automl.reader.targets_mapping
if class_names is None:
if self.task_name == "reg":
class_names = [0]
Expand Down
20 changes: 18 additions & 2 deletions lightautoml/addons/utilization/utilization.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,7 @@ def fit_predict(

amls = [[] for _ in range(len(self.configs_list))]
aml_preds = [[] for _ in range(len(self.configs_list))]
targets_mapping = None
n_ms = 0
n_cfg = 0
upd_state_val = 0
Expand Down Expand Up @@ -324,6 +325,15 @@ def fit_predict(
log_file=log_file,
)

current_targets_mapping = automl.reader.targets_mapping

if targets_mapping is None:
targets_mapping = current_targets_mapping
else:
assert (
targets_mapping == current_targets_mapping
), "targets_mappings are different for different AutoML for some reason."

logger.info("=" * 50)

amls[n_cfg].append(MLPipeForAutoMLWrapper.from_automl(automl))
Expand Down Expand Up @@ -352,19 +362,25 @@ def fit_predict(

for preds, pipes in zip(aml_preds, amls):
inner_blend = deepcopy(self.inner_blend)
val_pred, inner_pipe = inner_blend.fit_predict(preds, pipes)
val_pred, inner_pipe = inner_blend.fit_predict(preds, pipes, targets_mapping=targets_mapping)
inner_pipe = [x.ml_algos[0].models[0] for x in inner_pipe]

inner_preds.append(val_pred)
inner_pipes.append(MLPipeForAutoMLWrapper.from_blended(inner_pipe, inner_blend))

# outer blend - blend of blends
if not self.return_all_predictions:
val_pred, self.outer_pipes = self.outer_blend.fit_predict(inner_preds, inner_pipes)
val_pred, self.outer_pipes = self.outer_blend.fit_predict(
inner_preds, inner_pipes, targets_mapping=targets_mapping
)
else:
val_pred = concatenate(inner_preds)
self.outer_pipes = inner_pipes

self.targets_order = (
sorted(targets_mapping, key=targets_mapping.get, reverse=False) if targets_mapping else None
)

# saving automl model with joblib
if path_to_save is not None:
# There is 1 parameter for model save:
Expand Down
16 changes: 8 additions & 8 deletions lightautoml/automl/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,17 +190,17 @@ def fit_predict(

# Saving class mapping
if self.reader.task.name == "binary":
self.classes_ = [1]
self.targets_order = [1]
elif self.reader.task.name == "multi:reg":
self.classes_ = roles["target"]
self.targets_order = roles["target"]
elif self.reader.task.name == "reg":
self.classes_ = [roles["target"]]
self.targets_order = [roles["target"]]
elif self.reader.task.name == "multilabel":
self.classes_ = roles["target"]
self.targets_order = roles["target"]
else: # multiclass
self.classes_ = (
sorted(self.reader.class_mapping, key=self.reader.class_mapping.get, reverse=False)
if self.reader.class_mapping
self.targets_order = (
sorted(self.reader.targets_mapping, key=self.reader.targets_mapping.get, reverse=False)
if self.reader.targets_mapping
else None
)

Expand Down Expand Up @@ -278,7 +278,7 @@ def fit_predict(
else:
break

blended_prediction, last_pipes = self.blender.fit_predict(level_predictions, pipes, self.classes_)
blended_prediction, last_pipes = self.blender.fit_predict(level_predictions, pipes, self.targets_order)
self.levels.append(last_pipes)

self.reader.upd_used_features(remove=list(set(self.reader.used_features) - set(self.collect_used_feats())))
Expand Down
19 changes: 12 additions & 7 deletions lightautoml/automl/blend.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from typing import Sequence
from typing import Tuple
from typing import cast
from typing import Dict

import numpy as np

Expand Down Expand Up @@ -42,7 +43,7 @@ def outp_dim(self) -> int: # noqa: D102
return self._outp_dim

def fit_predict(
self, predictions: Sequence[LAMLDataset], pipes: Sequence[MLPipeline], targets_mapping: dict
self, predictions: Sequence[LAMLDataset], pipes: Sequence[MLPipeline], targets_mapping: Dict
) -> Tuple[LAMLDataset, Sequence[MLPipeline]]:
"""Wraps custom ``._fit_predict`` methods of blenders.
Expand All @@ -54,6 +55,7 @@ def fit_predict(
Args:
predictions: Sequence of datasets with predictions.
pipes: Sequence of pipelines.
targets_mapping: Mapping for target classes.
Returns:
Single prediction dataset and sequence of pruned pipelines.
Expand All @@ -66,13 +68,14 @@ def fit_predict(
return self._fit_predict(predictions, pipes, targets_mapping)

def _fit_predict(
self, predictions: Sequence[LAMLDataset], pipes: Sequence[MLPipeline]
self, predictions: Sequence[LAMLDataset], pipes: Sequence[MLPipeline], targets_mapping: Dict
) -> Tuple[LAMLDataset, Sequence[MLPipeline]]:
"""Defines how to fit, predict and prune - Abstract.
Args:
predictions: Sequence of datasets with predictions.
pipes: Sequence of pipelines.
targets_mapping: Mapping for target classes.
Returns: # noqa: DAR202
Single prediction dataset and sequence of pruned ``MLPipelines``.
Expand Down Expand Up @@ -134,7 +137,7 @@ def split_models(self, predictions: Sequence[LAMLDataset]) -> Tuple[Sequence[LAM

return splitted_preds, model_idx, pipe_idx

def _set_metadata(self, predictions: Sequence[LAMLDataset], pipes: Sequence[MLPipeline], targets_mapping: dict):
def _set_metadata(self, predictions: Sequence[LAMLDataset], pipes: Sequence[MLPipeline], targets_mapping: Dict):

pred0 = predictions[0]
pipe0 = pipes[0]
Expand Down Expand Up @@ -170,7 +173,7 @@ class BestModelSelector(Blender):
"""

def _fit_predict(
self, predictions: Sequence[LAMLDataset], pipes: Sequence[MLPipeline]
self, predictions: Sequence[LAMLDataset], pipes: Sequence[MLPipeline], targets_mapping: Dict
) -> Tuple[LAMLDataset, Sequence[MLPipeline]]:
"""Simple fit - just take one best.
Expand Down Expand Up @@ -241,19 +244,20 @@ def _get_mean_pred(self, splitted_preds: Sequence[NumpyDataset]) -> NumpyDataset
return outp

def _fit_predict(
self, predictions: Sequence[NumpyDataset], pipes: Sequence[MLPipeline]
self, predictions: Sequence[NumpyDataset], pipes: Sequence[MLPipeline], targets_mapping: Dict
) -> Tuple[NumpyDataset, Sequence[MLPipeline]]:
"""Simple fit_predict - just average and no prune.
Args:
predictions: Sequence of predictions.
pipes: Sequence of pipelines.
targets_mapping: Mapping for target classes.
Returns:
Single prediction dataset and Sequence of pruned pipelines.
"""
self._set_metadata(predictions, pipes)
self._set_metadata(predictions, pipes, targets_mapping)
splitted_preds, _, __ = cast(List[NumpyDataset], self.split_models(predictions))

outp = self._get_mean_pred(splitted_preds)
Expand Down Expand Up @@ -438,13 +442,14 @@ def _prune_pipe(
return new_pipes, wts

def _fit_predict(
self, predictions: Sequence[NumpyDataset], pipes: Sequence[MLPipeline], targets_mapping: dict
self, predictions: Sequence[NumpyDataset], pipes: Sequence[MLPipeline], targets_mapping: Dict
) -> Tuple[NumpyDataset, Sequence[MLPipeline]]:
"""Perform coordinate descent.
Args:
predictions: Sequence of prediction datasets.
pipes: Sequence of pipelines.
targets_mapping: Mapping for target classes.
Returns:
Single prediction dataset and Sequence of pruned pipelines.
Expand Down
2 changes: 1 addition & 1 deletion lightautoml/automl/presets/tabular_presets.py
Original file line number Diff line number Diff line change
Expand Up @@ -1082,7 +1082,7 @@ def get_feature_scores(
used_feats.update(pipe.ml_algos[0].models[0][0].collect_used_feats())

fi = calc_feats_permutation_imps(
self,
automl,
list(used_feats),
data,
automl.reader.target,
Expand Down
5 changes: 3 additions & 2 deletions lightautoml/automl/presets/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ def calc_feats_permutation_imps(model, used_feats, data, target, metric, silent=

# convert holdout data to LAMLDataset
data = model.reader.read(data, add_array_attrs=False)
used_feats_leveled[0] = [feature for feature in data.features if feature not in target]

# iterate through all the levels
for level in sorted(used_feats_leveled.keys()):
Expand Down Expand Up @@ -226,8 +227,8 @@ def plot_pdp_with_distribution(
else:
g0 = sns.boxplot(data=data, x="x", y="y", ax=axs[0], showfliers=False, color="b")
else:
if reader.class_mapping:
classes = sorted(reader.class_mapping, key=reader.class_mapping.get)[:top_n_classes]
if reader.targets_mapping:
classes = sorted(reader.targets_mapping, key=reader.targets_mapping.get)[:top_n_classes]
else:
classes = np.arange(min(n_classes, top_n_classes))
data = pd.concat(
Expand Down
32 changes: 16 additions & 16 deletions lightautoml/reader/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,7 @@ def __init__(
}

self.params = kwargs
self.class_mapping: Optional[Union[Mapping, Dict[str, Mapping]]] = None
self.targets_mapping: Optional[Union[Mapping, Dict[str, Mapping]]] = None
self._n_classes: Optional[int] = None

def fit_read(
Expand Down Expand Up @@ -415,16 +415,16 @@ def _create_target(self, target: Union[Series, DataFrame]):
Transformed target.
"""
self.class_mapping = None
self.targets_mapping = None

if (self.task.name == "binary") or (self.task.name == "multiclass"):
target, self.class_mapping = self.check_class_target(target)
target, self.targets_mapping = self.check_class_target(target)
elif self.task.name == "multilabel":
self.class_mapping = {}
self.targets_mapping = {}

for col in target.columns:
target_col, class_mapping = self.check_class_target(target[col])
self.class_mapping[col] = class_mapping
target_col, targets_mapping = self.check_class_target(target[col])
self.targets_mapping[col] = targets_mapping
target.loc[:, col] = target_col.values

self._n_classes = len(target.columns) * 2
Expand Down Expand Up @@ -470,8 +470,8 @@ def check_class_target(self, target) -> Tuple[pd.Series, Optional[Union[Mapping,
return target, None

# case - create mapping
class_mapping = {n: x for (x, n) in enumerate(unqiues)}
return target.map(class_mapping).astype(np.int32), class_mapping
targets_mapping = {n: x for (x, n) in enumerate(unqiues)}
return target.map(targets_mapping).astype(np.int32), targets_mapping

def _get_default_role_from_str(self, name) -> RoleType:
"""Get default role for string name according to automl's defaults and user settings.
Expand Down Expand Up @@ -576,17 +576,17 @@ def read(self, data: DataFrame, features_names: Any = None, add_array_attrs: boo
except KeyError:
continue

if array_attr == "target" and self.class_mapping is not None:
if array_attr == "target" and self.targets_mapping is not None:
if len(val.shape) == 1:
val = Series(
val.map(self.class_mapping).values,
val.map(self.targets_mapping).values,
index=data.index,
name=col_name,
)
else:
for col in val.columns:
if self.class_mapping[col] is not None:
val.loc[:, col] = val.loc[:, col].map(self.class_mapping[col])
if self.targets_mapping[col] is not None:
val.loc[:, col] = val.loc[:, col].map(self.targets_mapping[col])

kwargs[array_attr] = val

Expand Down Expand Up @@ -1105,13 +1105,13 @@ def read(self, data, features_names: Any = None, add_array_attrs: bool = False)
except KeyError:
continue

if array_attr == "target" and self.class_mapping is not None:
if array_attr == "target" and self.targets_mapping is not None:
if len(val.shape) == 1:
val = Series(val.map(self.class_mapping).values, index=plain_data.index, name=col_name)
val = Series(val.map(self.targets_mapping).values, index=plain_data.index, name=col_name)
else:
for col in val.columns:
if self.class_mapping[col] is not None:
val.loc[:, col] = val.loc[:, col].map(self.class_mapping[col])
if self.targets_mapping[col] is not None:
val.loc[:, col] = val.loc[:, col].map(self.targets_mapping[col])
kwargs[array_attr] = val

dataset = PandasDataset(
Expand Down
Loading

0 comments on commit 0371973

Please sign in to comment.