Skip to content

Commit

Permalink
multiple fixes for jupyter notebook and stat significance
Browse files Browse the repository at this point in the history
  • Loading branch information
dmitry-brazhenko committed Aug 12, 2024
1 parent 3aec604 commit 0012bba
Show file tree
Hide file tree
Showing 4 changed files with 456 additions and 458 deletions.
18 changes: 5 additions & 13 deletions ab_test_advanced_toolkit/stat_significance.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from typing import List, Union, Optional

import numpy as np
from catboost import CatBoostRegressor
from scipy import stats
from sklearn.compose import ColumnTransformer
from sklearn.dummy import DummyRegressor
Expand Down Expand Up @@ -154,18 +155,9 @@ def prepare_dataset(pretest_data: pd.DataFrame, user_properties: Optional[pd.Dat

X_control, categorical_features = prepare_dataset(control_pretest_data, user_properties)

preprocessor = ColumnTransformer(
transformers=[
('cat', TargetEncoder(smoothing=0.8), categorical_features)
],
remainder='passthrough'
)

# Embed the preprocessing step into a pipeline with XGBRegressor
model: Union[XGBRegressor, ZeroPredictor] = Pipeline(steps=[
('preprocessor', preprocessor),
('regressor', XGBRegressor(verbosity=0))
])
# model = CatBoostRegressor(loss_function='RMSE', cat_features=categorical_features, verbose=False)
model = CatBoostRegressor(iterations=500, learning_rate=0.1, depth=4, loss_function='RMSE', cat_features=categorical_features)

logger.debug(f"use_enhansement: {use_enhansement}")
if use_enhansement:
try:
Expand All @@ -179,7 +171,7 @@ def prepare_dataset(pretest_data: pd.DataFrame, user_properties: Optional[pd.Dat

x_train, _ = prepare_dataset(merged_pretest, user_properties)
y_train = merged_intest[value_column]
model.fit(x_train.reset_index(drop=True), y_train.reset_index(drop=True))
model.fit(x_train.reset_index(drop=True), y_train.reset_index(drop=True), verbose=False)

logger.debug(f"Model was fit")
except Exception as e:
Expand Down
5 changes: 4 additions & 1 deletion data_generation/data_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,8 +101,11 @@ def generate_synthetic_data(num_users=1000, alpha=0.5, countries=['US', 'UK', 'D
intermediate_value = generate_intermediate_in_test_value(age, engagement_score, country, platform, user_segment, noise_level)

in_test_value_alpha = alpha * pre_test_value + (1 - alpha) * intermediate_value
# in_test_value_increased = in_test_value_alpha * (1 + base_increase_percentage) + np.random.normal(0, noise_level)
in_test_value_increased = in_test_value_alpha
if ab_group.startswith('b'):
in_test_value_increased = in_test_value_increased * (1 + base_increase_percentage)

in_test_value_increased = in_test_value_increased + np.random.normal(0, noise_level)
data['userid'].append(user_id)
data['country'].append(country)
data['platform'].append(platform)
Expand Down
447 changes: 429 additions & 18 deletions examples/data-generation-manual-test.ipynb

Large diffs are not rendered by default.

444 changes: 18 additions & 426 deletions examples/research.ipynb

Large diffs are not rendered by default.

0 comments on commit 0012bba

Please sign in to comment.