Skip to content

Commit

Permalink
Update ReadMe and docstrings
Browse files Browse the repository at this point in the history
  • Loading branch information
ThomasMeissnerDS committed Oct 15, 2023
1 parent c3cbad3 commit 60ba558
Show file tree
Hide file tree
Showing 7 changed files with 42 additions and 12 deletions.
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,7 @@
[![python](https://img.shields.io/badge/Python-3.10-3776AB.svg?style=flat&logo=python&logoColor=white)](https://www.python.org)
[![PRs Welcome](https://img.shields.io/badge/PRs-welcome-brightgreen.svg?style=flat-square)](http://makeapullrequest.com)

A lightweight and fast auto-ml library. This is the successor of the
e2eml automl library. While e2eml tried to cover many model
architectures and a lot of different preprocessing options,
A lightweight and fast auto-ml library.
BlueCast focuses on a few model architectures (on default Xgboost
only) and a few preprocessing options (only what is
needed for Xgboost). This allows for a much faster development
Expand Down Expand Up @@ -486,6 +484,8 @@ class CustomModel(BaseClassMlModel):
) -> None:
self.model = LogisticRegression()
self.model.fit(x_train, y_train)
# if you wih to track experiments using an own ExperimentTracker add it here
# or in the fit method itself

def predict(self, df: pd.DataFrame) -> Tuple[PredictedProbas, PredictedClasses]:
predicted_probas = self.model.predict_proba(df)
Expand Down
14 changes: 7 additions & 7 deletions bluecast/blueprints/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,19 +45,19 @@ class BlueCast:
:param :class_problem: Takes a string containing the class problem type. Either "binary" or "multiclass".
:param :target_column: Takes a string containing the name of the target column.
:param :cat_columns: Takes a list of strings containing the names of the categorical columns. If not provided,
BlueCast will infer these automatically.
BlueCast will infer these automatically.
:param :date_columns: Takes a list of strings containing the names of the date columns. If not provided,
BlueCast will infer these automatically.
BlueCast will infer these automatically.
:param :time_split_column: Takes a string containing the name of the time split column. If not provided,
BlueCast will not split the data by time or order, but do a random split instead.
BlueCast will not split the data by time or order, but do a random split instead.
:param :ml_model: Takes an instance of a XgboostModel class. If not provided, BlueCast will instantiate one.
This is an API to pass any model class. Inherit the baseclass from ml_modelling.base_model.BaseModel.
This is an API to pass any model class. Inherit the baseclass from ml_modelling.base_model.BaseModel.
:param custom_preprocessor: Takes an instance of a CustomPreprocessing class. Allows users to inject custom
preprocessing steps which take place right after the train test spit.
preprocessing steps which take place right after the train test spit.
:param custom_last_mile_computation: Takes an instance of a CustomPreprocessing class. Allows users to inject custom
preprocessing steps which take place right before the model training.
preprocessing steps which take place right before the model training.
:param experiment_tracker: Takes an instance of an ExperimentTracker class. If not provided this will be initialized
automatically.
automatically.
"""

def __init__(
Expand Down
6 changes: 4 additions & 2 deletions bluecast/blueprints/cast_cv.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,11 @@


class BlueCastCV:
"""Wrapper to train and predict multiple blueCast intsances.
"""Wrapper to train and predict multiple blueCast intstances.
A custom splitter can be provided."""
Check the BlueCast class documentation for additional parameter details.
A custom splitter can be provided.
"""

def __init__(
self,
Expand Down
1 change: 1 addition & 0 deletions bluecast/config/training_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ class TrainingConfig(BaseModel):
categorical encoding is done via a ML algorithm. If False, the categorical encoding is done via a target
encoding in the preprocessing steps. See the ReadMe for more details.
:param show_detailed_tuning_logs: Whether to show detailed tuning logs. Not used when custom ML model is passed.
:param experiment_name: Name of the experiment. Will be logged inside the ExperimentTracker.
"""

global_random_state: int = 10
Expand Down
27 changes: 27 additions & 0 deletions bluecast/experimentation/tracking.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,14 @@


class ExperimentTracker(BaseClassExperimentTracker):
"""
Default implementation of ExperimentTracker used in BlueCast
and BlueCastCV pipelines. This triggers automatically as long
as the default Xgboost model is used. For custom ml models
ueers need to create an own Tracker. The base class from
bluecast.config.base_classes can be used as an inspiration.
"""

def __init__(self):
self.experiment_id: List[Union[int, str, float]] = []
self.experiment_name: List[Union[int, str, float]] = []
Expand All @@ -33,6 +41,19 @@ def add_results(
metric_used: str,
metric_higher_is_better: bool,
) -> None:
"""
Add an individual experiment result into the tracker.
:param experiment_id: Sequential id. Make sure add an increment.
:param score_category: Chose one of ["simple_train_test_score", "cv_score", "oof_score"].
"simple_train_test_score" is the default where a simple train-test split is done. "cv_score" is called
when cross validation has been enabled in the TrainingConfig.
:param training_config: TrainingConfig instance from bluecast.config.training_config.
:param model_parameters: Dictionary with parameters of ml model (i.e. learning rate)
:param eval_scores: The actual score of the experiment.
:param metric_used: The name of the eval metric.
:param metric_higher_is_better: True or False.
"""
self.experiment_id.append(experiment_id)
self.score_category.append(score_category)
try:
Expand All @@ -46,6 +67,12 @@ def add_results(
self.created_at.append(datetime.utcnow())

def retrieve_results_as_df(self) -> pd.DataFrame:
"""
Convert ExperimentTracker information into a Pandas DataFrame.
In the default implementation this contains TrainingConfig, XgboostConfig, hyperparameters, eval metric
and score.
"""
model_parameters_df = pd.DataFrame(self.model_parameters)
training_df = pd.DataFrame(self.training_configs)

Expand Down
Binary file modified dist/bluecast-0.40-py3-none-any.whl
Binary file not shown.
Binary file modified dist/bluecast-0.40.tar.gz
Binary file not shown.

0 comments on commit 60ba558

Please sign in to comment.