Skip to content

Commit

Permalink
fix test_load_errors and renaming
Browse files Browse the repository at this point in the history
  • Loading branch information
cyberosa committed Dec 20, 2023
1 parent 5a25124 commit 9dc9fa9
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 31 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -75,33 +75,35 @@
"outputs": [],
"source": [
"# Dropped to 30 reps to save time. v0.2.5.\n",
"reps = 30\n",
"ci = 95\n",
"reps=30\n",
"ci=95\n",
"POPULATION_N = 10000\n",
"SAMPLE_N = 10\n",
"\n",
"# Create data for hedges g and cohens d.\n",
"CONTROL_MEAN = np.random.randint(1, 1000)\n",
"POP_SD = np.random.randint(1, 15)\n",
"POP_D = np.round(np.random.uniform(-2, 2, 1)[0], 2)\n",
"POP_SD = np.random.randint(1, 15)\n",
"POP_D = np.round(np.random.uniform(-2, 2, 1)[0], 2)\n",
"\n",
"TRUE_STD_DIFFERENCE = CONTROL_MEAN + (POP_D * POP_SD)\n",
"norm_sample_kwargs = dict(scale=POP_SD, size=SAMPLE_N)\n",
"c1 = norm.rvs(loc=CONTROL_MEAN, **norm_sample_kwargs)\n",
"t1 = norm.rvs(loc=CONTROL_MEAN + TRUE_STD_DIFFERENCE, **norm_sample_kwargs)\n",
"t1 = norm.rvs(loc=CONTROL_MEAN+TRUE_STD_DIFFERENCE, **norm_sample_kwargs)\n",
"\n",
"std_diff_df = pd.DataFrame({'Control' : c1, 'Test': t1})\n",
"\n",
"std_diff_df = pd.DataFrame({\"Control\": c1, \"Test\": t1})\n",
"\n",
"\n",
"# Create mean_diff data\n",
"CONTROL_MEAN = np.random.randint(1, 1000)\n",
"POP_SD = np.random.randint(1, 15)\n",
"TRUE_DIFFERENCE = np.random.randint(-POP_SD * 5, POP_SD * 5)\n",
"POP_SD = np.random.randint(1, 15)\n",
"TRUE_DIFFERENCE = np.random.randint(-POP_SD*5, POP_SD*5)\n",
"\n",
"c1 = norm.rvs(loc=CONTROL_MEAN, **norm_sample_kwargs)\n",
"t1 = norm.rvs(loc=CONTROL_MEAN + TRUE_DIFFERENCE, **norm_sample_kwargs)\n",
"t1 = norm.rvs(loc=CONTROL_MEAN+TRUE_DIFFERENCE, **norm_sample_kwargs)\n",
"\n",
"mean_df = pd.DataFrame({'Control' : c1, 'Test': t1})\n",
"\n",
"mean_df = pd.DataFrame({\"Control\": c1, \"Test\": t1})\n",
"\n",
"\n",
"# Create median_diff data\n",
Expand All @@ -110,13 +112,14 @@
"\n",
"skew_kwargs = dict(a=A, scale=5, size=POPULATION_N)\n",
"skewpop1 = skewnorm.rvs(**skew_kwargs, loc=100)\n",
"skewpop2 = skewnorm.rvs(**skew_kwargs, loc=100 + MEDIAN_DIFFERENCE)\n",
"skewpop2 = skewnorm.rvs(**skew_kwargs, loc=100+MEDIAN_DIFFERENCE)\n",
"\n",
"sample_kwargs = dict(replace=False, size=SAMPLE_N)\n",
"skewsample1 = np.random.choice(skewpop1, **sample_kwargs)\n",
"skewsample2 = np.random.choice(skewpop2, **sample_kwargs)\n",
"\n",
"median_df = pd.DataFrame({\"Control\": skewsample1, \"Test\": skewsample2})\n",
"median_df = pd.DataFrame({'Control' : skewsample1, 'Test': skewsample2})\n",
"\n",
"\n",
"\n",
"# Create two populations with a 50% overlap.\n",
Expand All @@ -125,24 +128,25 @@
"\n",
"pop_kwargs = dict(scale=SD, size=POPULATION_N)\n",
"pop1 = norm.rvs(loc=100, **pop_kwargs)\n",
"pop2 = norm.rvs(loc=100 + CD_DIFFERENCE, **pop_kwargs)\n",
"pop2 = norm.rvs(loc=100+CD_DIFFERENCE, **pop_kwargs)\n",
"\n",
"sample_kwargs = dict(replace=False, size=SAMPLE_N)\n",
"sample1 = np.random.choice(pop1, **sample_kwargs)\n",
"sample2 = np.random.choice(pop2, **sample_kwargs)\n",
"\n",
"cd_df = pd.DataFrame({\"Control\": sample1, \"Test\": sample2})\n",
"cd_df = pd.DataFrame({'Control' : sample1, 'Test': sample2})\n",
"\n",
"\n",
"\n",
"# Create several CIs and see if the true population difference lies within.\n",
"error_count_cohens_d = 0\n",
"error_count_hedges_g = 0\n",
"error_count_mean_diff = 0\n",
"error_count_median_diff = 0\n",
"error_count_cohens_d = 0\n",
"error_count_hedges_g = 0\n",
"error_count_mean_diff = 0\n",
"error_count_median_diff = 0\n",
"error_count_cliffs_delta = 0\n",
"\n",
"for i in range(0, reps):\n",
" # print(i) # for debug.\n",
" print(i) # for debug.\n",
" # pick a random seed\n",
" rnd_sd = np.random.randint(0, 999999)\n",
" load_kwargs = dict(ci=ci, random_seed=rnd_sd)\n",
Expand All @@ -151,44 +155,48 @@
" cd = std_diff_data.cohens_d.results\n",
" # print(\"cohen's d\") # for debug.\n",
" cd_low, cd_high = float(cd.bca_low), float(cd.bca_high)\n",
" if not cd_low < POP_D < cd_high:\n",
" if cd_low < POP_D < cd_high is False:\n",
" error_count_cohens_d += 1\n",
"\n",
" hg = std_diff_data.hedges_g.results\n",
" # print(\"hedges' g\") # for debug.\n",
" hg_low, hg_high = float(hg.bca_low), float(hg.bca_high)\n",
" if not hg_low < POP_D < hg_high:\n",
" if hg_low < POP_D < hg_high is False:\n",
" error_count_hedges_g += 1\n",
"\n",
"\n",
" mean_diff_data = load(data=mean_df, idx=(\"Control\", \"Test\"), **load_kwargs)\n",
" mean_d = mean_diff_data.mean_diff.results\n",
" # print(\"mean diff\") # for debug.\n",
" mean_d_low, mean_d_high = float(mean_d.bca_low), float(mean_d.bca_high)\n",
" if not mean_d_low < TRUE_DIFFERENCE < mean_d_high:\n",
" if mean_d_low < TRUE_DIFFERENCE < mean_d_high is False:\n",
" error_count_mean_diff += 1\n",
"\n",
" median_diff_data = load(data=median_df, idx=(\"Control\", \"Test\"), **load_kwargs)\n",
"\n",
" median_diff_data = load(data=median_df, idx=(\"Control\", \"Test\"),\n",
" **load_kwargs)\n",
" median_d = median_diff_data.median_diff.results\n",
" # print(\"median diff\") # for debug.\n",
" median_d_low, median_d_high = float(median_d.bca_low), float(median_d.bca_high)\n",
" if not median_d_low < MEDIAN_DIFFERENCE < median_d_high:\n",
" if median_d_low < MEDIAN_DIFFERENCE < median_d_high is False:\n",
" error_count_median_diff += 1\n",
"\n",
"\n",
" cd_data = load(data=cd_df, idx=(\"Control\", \"Test\"), **load_kwargs)\n",
" cliffs = cd_data.cliffs_delta.results\n",
" # print(\"cliff's delta\") # for debug.\n",
" low, high = float(cliffs.bca_low), float(cliffs.bca_high)\n",
" if not low < 0.5 < high:\n",
" if low < 0.5 < high is False:\n",
" error_count_cliffs_delta += 1\n",
"\n",
"\n",
"max_errors = int(np.ceil(reps * (100 - ci) / 100))\n",
"\n",
"assert error_count_cohens_d <= max_errors\n",
"assert error_count_hedges_g <= max_errors\n",
"assert error_count_mean_diff <= max_errors\n",
"assert error_count_median_diff <= max_errors\n",
"assert error_count_cliffs_delta <= max_errors"
"assert error_count_cohens_d <= max_errors\n",
"assert error_count_hedges_g <= max_errors\n",
"assert error_count_mean_diff <= max_errors\n",
"assert error_count_median_diff <= max_errors\n",
"assert error_count_cliffs_delta <= max_errors\n"
]
},
{
Expand All @@ -205,6 +213,10 @@
"display_name": "python3",
"language": "python",
"name": "python3"
},
"language_info": {
"name": "python",
"version": "3.10.12"
}
},
"nbformat": 4,
Expand Down
2 changes: 1 addition & 1 deletion nbs/tests/test_load_errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def test_wrong_params_combinations():

assert error_msg in str(excinfo.value)

error_msg = "`proportional` and `delta` cannot be True at the same time."
error_msg = "`proportional` and `delta2` cannot be True at the same time."
with pytest.raises(ValueError) as excinfo:
my_data = load(
dummy_df,
Expand Down

0 comments on commit 9dc9fa9

Please sign in to comment.