Skip to content

Commit

Permalink
random seed upper limit
Browse files Browse the repository at this point in the history
  • Loading branch information
dmitry-brazhenko committed Jun 16, 2024
1 parent cace5ed commit 3cf01ab
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 8 deletions.
2 changes: 1 addition & 1 deletion data_generation/data_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def describe_dataset(df):

def generate_synthetic_data(num_users, countries, platforms, user_segments, ab_groups, base_increase_percentage, noise_level=1.0, correlation_level=0.5, seed=40):
# Set seed for reproducibility
np.random.seed(seed)
np.random.seed(seed % (2**32 - 1))

# Generate synthetic data
data = {
Expand Down
41 changes: 34 additions & 7 deletions examples/research.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": null,
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -12,7 +12,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -22,9 +22,36 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 3,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:experiment_analysis.experiment_analysis:Analyzing feature 'num_users' with fixed params: {'countries': ['US', 'UK', 'DE', 'FR', 'CA', 'AU', 'JP', 'IN'], 'platforms': ['iOS', 'Android', 'Web', 'Desktop'], 'user_segments': ['Segment_1', 'Segment_2', 'Segment_3', 'Segment_4'], 'ab_groups': ['a1', 'a2', 'b'], 'noise_level': 0.5, 'correlation_level': 0.5, 'base_increase_percentage': 0.0}\n",
" 40%|█████████████████████████████████████████████████████▏ | 4/10 [01:23<02:05, 20.84s/it]\n"
]
},
{
"ename": "ValueError",
"evalue": "Seed must be between 0 and 2**32 - 1",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[3], line 26\u001b[0m\n\u001b[1;32m 21\u001b[0m x_params \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m 22\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mnum_users\u001b[39m\u001b[38;5;124m'\u001b[39m: np\u001b[38;5;241m.\u001b[39marange(\u001b[38;5;241m1000\u001b[39m, \u001b[38;5;241m20000\u001b[39m, \u001b[38;5;241m2000\u001b[39m) \u001b[38;5;66;03m# Range of user numbers to be used on the X-axis\u001b[39;00m\n\u001b[1;32m 23\u001b[0m }\n\u001b[1;32m 25\u001b[0m \u001b[38;5;66;03m# Call the function to analyze and plot for all selected features\u001b[39;00m\n\u001b[0;32m---> 26\u001b[0m \u001b[43manalyze_and_plot_features\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 27\u001b[0m \u001b[43m \u001b[49m\u001b[43mfixed_params\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\n\u001b[1;32m 28\u001b[0m \u001b[43m \u001b[49m\u001b[43mvarying_params\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\n\u001b[1;32m 29\u001b[0m \u001b[43m \u001b[49m\u001b[43mx_params\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\n\u001b[1;32m 30\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# num_iterations=5, # Number of iterations to perform for each analysis\u001b[39;49;00m\n\u001b[1;32m 31\u001b[0m \u001b[43m \u001b[49m\u001b[43mnum_iterations\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m50\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# Original higher number of iterations, can be used for more thorough analysis\u001b[39;49;00m\n\u001b[1;32m 32\u001b[0m \u001b[43m)\u001b[49m\n",
"File \u001b[0;32m~/GithubProjects/ab-test-advanced-toolkit/examples/../experiment_analysis/experiment_analysis.py:164\u001b[0m, in \u001b[0;36manalyze_and_plot_features\u001b[0;34m(fixed_params, varying_params, x_params, num_iterations)\u001b[0m\n\u001b[1;32m 162\u001b[0m logger\u001b[38;5;241m.\u001b[39minfo(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAnalyzing feature \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mx_feature\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m with fixed params: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mparams\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 163\u001b[0m temp_values_ranges \u001b[38;5;241m=\u001b[39m {x_feature: x_values}\n\u001b[0;32m--> 164\u001b[0m results \u001b[38;5;241m=\u001b[39m \u001b[43manalyze_feature\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtemp_values_ranges\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mparams\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mx_feature\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mnum_iterations\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 165\u001b[0m plot_feature_results(results, x_feature, params)\n",
"File \u001b[0;32m~/GithubProjects/ab-test-advanced-toolkit/examples/../experiment_analysis/experiment_analysis.py:50\u001b[0m, in \u001b[0;36manalyze_feature\u001b[0;34m(values_ranges, fixed_params, feature, num_iterations)\u001b[0m\n\u001b[1;32m 48\u001b[0m \u001b[38;5;66;03m# Generate seed using the hash of parameters and iteration number\u001b[39;00m\n\u001b[1;32m 49\u001b[0m seed \u001b[38;5;241m=\u001b[39m get_seed(params, i)\n\u001b[0;32m---> 50\u001b[0m generated_data \u001b[38;5;241m=\u001b[39m \u001b[43mgenerate_synthetic_data\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mparams\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mseed\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mseed\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 51\u001b[0m analysis_results \u001b[38;5;241m=\u001b[39m run_analysis(generated_data)\n\u001b[1;32m 53\u001b[0m no_enhancement_values\u001b[38;5;241m.\u001b[39mappend(analysis_results[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mno_enhancement\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;241m.\u001b[39mresult\u001b[38;5;241m.\u001b[39mstat_significance[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mb\u001b[39m\u001b[38;5;124m'\u001b[39m])\n",
"File \u001b[0;32m~/GithubProjects/ab-test-advanced-toolkit/examples/../data_generation/data_generator.py:34\u001b[0m, in \u001b[0;36mgenerate_synthetic_data\u001b[0;34m(num_users, countries, platforms, user_segments, ab_groups, base_increase_percentage, noise_level, correlation_level, seed)\u001b[0m\n\u001b[1;32m 32\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mgenerate_synthetic_data\u001b[39m(num_users, countries, platforms, user_segments, ab_groups, base_increase_percentage, noise_level\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1.0\u001b[39m, correlation_level\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m0.5\u001b[39m, seed\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m40\u001b[39m):\n\u001b[1;32m 33\u001b[0m \u001b[38;5;66;03m# Set seed for reproducibility\u001b[39;00m\n\u001b[0;32m---> 34\u001b[0m np\u001b[38;5;241m.\u001b[39mrandom\u001b[38;5;241m.\u001b[39mseed(seed)\n\u001b[1;32m 36\u001b[0m \u001b[38;5;66;03m# Generate synthetic data\u001b[39;00m\n\u001b[1;32m 37\u001b[0m data \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m 38\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124muserid\u001b[39m\u001b[38;5;124m'\u001b[39m: \u001b[38;5;28mrange\u001b[39m(\u001b[38;5;241m1\u001b[39m, num_users \u001b[38;5;241m+\u001b[39m \u001b[38;5;241m1\u001b[39m),\n\u001b[1;32m 39\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mcountry\u001b[39m\u001b[38;5;124m'\u001b[39m: np\u001b[38;5;241m.\u001b[39mrandom\u001b[38;5;241m.\u001b[39mchoice(countries, num_users),\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 44\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mengagement_score\u001b[39m\u001b[38;5;124m'\u001b[39m: np\u001b[38;5;241m.\u001b[39mrandom\u001b[38;5;241m.\u001b[39mrand(num_users) \u001b[38;5;241m*\u001b[39m \u001b[38;5;241m10\u001b[39m, \u001b[38;5;66;03m# Random score between 0 and 10\u001b[39;00m\n\u001b[1;32m 45\u001b[0m }\n",
"File \u001b[0;32mnumpy/random/mtrand.pyx:4805\u001b[0m, in \u001b[0;36mnumpy.random.mtrand.seed\u001b[0;34m()\u001b[0m\n",
"File \u001b[0;32mnumpy/random/mtrand.pyx:250\u001b[0m, in \u001b[0;36mnumpy.random.mtrand.RandomState.seed\u001b[0;34m()\u001b[0m\n",
"File \u001b[0;32m_mt19937.pyx:168\u001b[0m, in \u001b[0;36mnumpy.random._mt19937.MT19937._legacy_seeding\u001b[0;34m()\u001b[0m\n",
"File \u001b[0;32m_mt19937.pyx:182\u001b[0m, in \u001b[0;36mnumpy.random._mt19937.MT19937._legacy_seeding\u001b[0;34m()\u001b[0m\n",
"\u001b[0;31mValueError\u001b[0m: Seed must be between 0 and 2**32 - 1"
]
}
],
"source": [
"import numpy as np\n",
"\n",
Expand Down Expand Up @@ -143,7 +170,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
Expand All @@ -157,9 +184,9 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.8"
"version": "3.11.6"
}
},
"nbformat": 4,
"nbformat_minor": 2
"nbformat_minor": 4
}

0 comments on commit 3cf01ab

Please sign in to comment.