random seed upper limit

dmitry-brazhenko · Jun 16, 2024 · 3cf01ab · 3cf01ab
1 parent cace5ed
commit 3cf01ab
Show file tree

Hide file tree

Showing 2 changed files with 35 additions and 8 deletions.
diff --git a/data_generation/data_generator.py b/data_generation/data_generator.py
@@ -31,7 +31,7 @@ def describe_dataset(df):
 
 def generate_synthetic_data(num_users, countries, platforms, user_segments, ab_groups, base_increase_percentage, noise_level=1.0, correlation_level=0.5, seed=40):
     # Set seed for reproducibility
-    np.random.seed(seed)
+    np.random.seed(seed  % (2**32 - 1))
 
     # Generate synthetic data
     data = {

diff --git a/examples/research.ipynb b/examples/research.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -12,7 +12,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -22,9 +22,36 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 3,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:experiment_analysis.experiment_analysis:Analyzing feature 'num_users' with fixed params: {'countries': ['US', 'UK', 'DE', 'FR', 'CA', 'AU', 'JP', 'IN'], 'platforms': ['iOS', 'Android', 'Web', 'Desktop'], 'user_segments': ['Segment_1', 'Segment_2', 'Segment_3', 'Segment_4'], 'ab_groups': ['a1', 'a2', 'b'], 'noise_level': 0.5, 'correlation_level': 0.5, 'base_increase_percentage': 0.0}\n",
+      " 40%|█████████████████████████████████████████████████████▏                                                                               | 4/10 [01:23<02:05, 20.84s/it]\n"
+     ]
+    },
+    {
+     "ename": "ValueError",
+     "evalue": "Seed must be between 0 and 2**32 - 1",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[3], line 26\u001b[0m\n\u001b[1;32m     21\u001b[0m x_params \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m     22\u001b[0m     \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mnum_users\u001b[39m\u001b[38;5;124m'\u001b[39m: np\u001b[38;5;241m.\u001b[39marange(\u001b[38;5;241m1000\u001b[39m, \u001b[38;5;241m20000\u001b[39m, \u001b[38;5;241m2000\u001b[39m)  \u001b[38;5;66;03m# Range of user numbers to be used on the X-axis\u001b[39;00m\n\u001b[1;32m     23\u001b[0m }\n\u001b[1;32m     25\u001b[0m \u001b[38;5;66;03m# Call the function to analyze and plot for all selected features\u001b[39;00m\n\u001b[0;32m---> 26\u001b[0m \u001b[43manalyze_and_plot_features\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m     27\u001b[0m \u001b[43m    \u001b[49m\u001b[43mfixed_params\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\n\u001b[1;32m     28\u001b[0m \u001b[43m    \u001b[49m\u001b[43mvarying_params\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\n\u001b[1;32m     29\u001b[0m \u001b[43m    \u001b[49m\u001b[43mx_params\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\n\u001b[1;32m     30\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;66;43;03m# num_iterations=5,  # Number of iterations to perform for each analysis\u001b[39;49;00m\n\u001b[1;32m     31\u001b[0m \u001b[43m    \u001b[49m\u001b[43mnum_iterations\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m50\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m  \u001b[49m\u001b[38;5;66;43;03m# Original higher number of iterations, can be used for more thorough analysis\u001b[39;49;00m\n\u001b[1;32m     32\u001b[0m \u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/GithubProjects/ab-test-advanced-toolkit/examples/../experiment_analysis/experiment_analysis.py:164\u001b[0m, in \u001b[0;36manalyze_and_plot_features\u001b[0;34m(fixed_params, varying_params, x_params, num_iterations)\u001b[0m\n\u001b[1;32m    162\u001b[0m logger\u001b[38;5;241m.\u001b[39minfo(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAnalyzing feature \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mx_feature\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m with fixed params: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mparams\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m    163\u001b[0m temp_values_ranges \u001b[38;5;241m=\u001b[39m {x_feature: x_values}\n\u001b[0;32m--> 164\u001b[0m results \u001b[38;5;241m=\u001b[39m \u001b[43manalyze_feature\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtemp_values_ranges\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mparams\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mx_feature\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mnum_iterations\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    165\u001b[0m plot_feature_results(results, x_feature, params)\n",
+      "File \u001b[0;32m~/GithubProjects/ab-test-advanced-toolkit/examples/../experiment_analysis/experiment_analysis.py:50\u001b[0m, in \u001b[0;36manalyze_feature\u001b[0;34m(values_ranges, fixed_params, feature, num_iterations)\u001b[0m\n\u001b[1;32m     48\u001b[0m \u001b[38;5;66;03m# Generate seed using the hash of parameters and iteration number\u001b[39;00m\n\u001b[1;32m     49\u001b[0m seed \u001b[38;5;241m=\u001b[39m get_seed(params, i)\n\u001b[0;32m---> 50\u001b[0m generated_data \u001b[38;5;241m=\u001b[39m \u001b[43mgenerate_synthetic_data\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mparams\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mseed\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mseed\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     51\u001b[0m analysis_results \u001b[38;5;241m=\u001b[39m run_analysis(generated_data)\n\u001b[1;32m     53\u001b[0m no_enhancement_values\u001b[38;5;241m.\u001b[39mappend(analysis_results[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mno_enhancement\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;241m.\u001b[39mresult\u001b[38;5;241m.\u001b[39mstat_significance[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mb\u001b[39m\u001b[38;5;124m'\u001b[39m])\n",
+      "File \u001b[0;32m~/GithubProjects/ab-test-advanced-toolkit/examples/../data_generation/data_generator.py:34\u001b[0m, in \u001b[0;36mgenerate_synthetic_data\u001b[0;34m(num_users, countries, platforms, user_segments, ab_groups, base_increase_percentage, noise_level, correlation_level, seed)\u001b[0m\n\u001b[1;32m     32\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mgenerate_synthetic_data\u001b[39m(num_users, countries, platforms, user_segments, ab_groups, base_increase_percentage, noise_level\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1.0\u001b[39m, correlation_level\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m0.5\u001b[39m, seed\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m40\u001b[39m):\n\u001b[1;32m     33\u001b[0m     \u001b[38;5;66;03m# Set seed for reproducibility\u001b[39;00m\n\u001b[0;32m---> 34\u001b[0m     np\u001b[38;5;241m.\u001b[39mrandom\u001b[38;5;241m.\u001b[39mseed(seed)\n\u001b[1;32m     36\u001b[0m     \u001b[38;5;66;03m# Generate synthetic data\u001b[39;00m\n\u001b[1;32m     37\u001b[0m     data \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m     38\u001b[0m         \u001b[38;5;124m'\u001b[39m\u001b[38;5;124muserid\u001b[39m\u001b[38;5;124m'\u001b[39m: \u001b[38;5;28mrange\u001b[39m(\u001b[38;5;241m1\u001b[39m, num_users \u001b[38;5;241m+\u001b[39m \u001b[38;5;241m1\u001b[39m),\n\u001b[1;32m     39\u001b[0m         \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mcountry\u001b[39m\u001b[38;5;124m'\u001b[39m: np\u001b[38;5;241m.\u001b[39mrandom\u001b[38;5;241m.\u001b[39mchoice(countries, num_users),\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m     44\u001b[0m         \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mengagement_score\u001b[39m\u001b[38;5;124m'\u001b[39m: np\u001b[38;5;241m.\u001b[39mrandom\u001b[38;5;241m.\u001b[39mrand(num_users) \u001b[38;5;241m*\u001b[39m \u001b[38;5;241m10\u001b[39m,  \u001b[38;5;66;03m# Random score between 0 and 10\u001b[39;00m\n\u001b[1;32m     45\u001b[0m     }\n",
+      "File \u001b[0;32mnumpy/random/mtrand.pyx:4805\u001b[0m, in \u001b[0;36mnumpy.random.mtrand.seed\u001b[0;34m()\u001b[0m\n",
+      "File \u001b[0;32mnumpy/random/mtrand.pyx:250\u001b[0m, in \u001b[0;36mnumpy.random.mtrand.RandomState.seed\u001b[0;34m()\u001b[0m\n",
+      "File \u001b[0;32m_mt19937.pyx:168\u001b[0m, in \u001b[0;36mnumpy.random._mt19937.MT19937._legacy_seeding\u001b[0;34m()\u001b[0m\n",
+      "File \u001b[0;32m_mt19937.pyx:182\u001b[0m, in \u001b[0;36mnumpy.random._mt19937.MT19937._legacy_seeding\u001b[0;34m()\u001b[0m\n",
+      "\u001b[0;31mValueError\u001b[0m: Seed must be between 0 and 2**32 - 1"
+     ]
+    }
+   ],
    "source": [
     "import numpy as np\n",
     "\n",
@@ -143,7 +170,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -157,9 +184,9 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.8"
+   "version": "3.11.6"
   }
  },
  "nbformat": 4,
- "nbformat_minor": 2
+ "nbformat_minor": 4
 }