From 9dc9fa9735e9d6d2a63e020b6a14547f2d0d4dee Mon Sep 17 00:00:00 2001 From: cyberosa Date: Wed, 20 Dec 2023 23:28:02 +0100 Subject: [PATCH] fix test_load_errors and renaming --- ...ynb => test_99_confidence_intervals.ipynb} | 72 +++++++++++-------- nbs/tests/test_load_errors.py | 2 +- 2 files changed, 43 insertions(+), 31 deletions(-) rename nbs/tests/{test_99_confint.ipynb => test_99_confidence_intervals.ipynb} (76%) diff --git a/nbs/tests/test_99_confint.ipynb b/nbs/tests/test_99_confidence_intervals.ipynb similarity index 76% rename from nbs/tests/test_99_confint.ipynb rename to nbs/tests/test_99_confidence_intervals.ipynb index 1f3aef6a..2920463d 100644 --- a/nbs/tests/test_99_confint.ipynb +++ b/nbs/tests/test_99_confidence_intervals.ipynb @@ -75,33 +75,35 @@ "outputs": [], "source": [ "# Dropped to 30 reps to save time. v0.2.5.\n", - "reps = 30\n", - "ci = 95\n", + "reps=30\n", + "ci=95\n", "POPULATION_N = 10000\n", "SAMPLE_N = 10\n", "\n", "# Create data for hedges g and cohens d.\n", "CONTROL_MEAN = np.random.randint(1, 1000)\n", - "POP_SD = np.random.randint(1, 15)\n", - "POP_D = np.round(np.random.uniform(-2, 2, 1)[0], 2)\n", + "POP_SD = np.random.randint(1, 15)\n", + "POP_D = np.round(np.random.uniform(-2, 2, 1)[0], 2)\n", "\n", "TRUE_STD_DIFFERENCE = CONTROL_MEAN + (POP_D * POP_SD)\n", "norm_sample_kwargs = dict(scale=POP_SD, size=SAMPLE_N)\n", "c1 = norm.rvs(loc=CONTROL_MEAN, **norm_sample_kwargs)\n", - "t1 = norm.rvs(loc=CONTROL_MEAN + TRUE_STD_DIFFERENCE, **norm_sample_kwargs)\n", + "t1 = norm.rvs(loc=CONTROL_MEAN+TRUE_STD_DIFFERENCE, **norm_sample_kwargs)\n", + "\n", + "std_diff_df = pd.DataFrame({'Control' : c1, 'Test': t1})\n", "\n", - "std_diff_df = pd.DataFrame({\"Control\": c1, \"Test\": t1})\n", "\n", "\n", "# Create mean_diff data\n", "CONTROL_MEAN = np.random.randint(1, 1000)\n", - "POP_SD = np.random.randint(1, 15)\n", - "TRUE_DIFFERENCE = np.random.randint(-POP_SD * 5, POP_SD * 5)\n", + "POP_SD = np.random.randint(1, 15)\n", + "TRUE_DIFFERENCE = np.random.randint(-POP_SD*5, POP_SD*5)\n", "\n", "c1 = norm.rvs(loc=CONTROL_MEAN, **norm_sample_kwargs)\n", - "t1 = norm.rvs(loc=CONTROL_MEAN + TRUE_DIFFERENCE, **norm_sample_kwargs)\n", + "t1 = norm.rvs(loc=CONTROL_MEAN+TRUE_DIFFERENCE, **norm_sample_kwargs)\n", + "\n", + "mean_df = pd.DataFrame({'Control' : c1, 'Test': t1})\n", "\n", - "mean_df = pd.DataFrame({\"Control\": c1, \"Test\": t1})\n", "\n", "\n", "# Create median_diff data\n", @@ -110,13 +112,14 @@ "\n", "skew_kwargs = dict(a=A, scale=5, size=POPULATION_N)\n", "skewpop1 = skewnorm.rvs(**skew_kwargs, loc=100)\n", - "skewpop2 = skewnorm.rvs(**skew_kwargs, loc=100 + MEDIAN_DIFFERENCE)\n", + "skewpop2 = skewnorm.rvs(**skew_kwargs, loc=100+MEDIAN_DIFFERENCE)\n", "\n", "sample_kwargs = dict(replace=False, size=SAMPLE_N)\n", "skewsample1 = np.random.choice(skewpop1, **sample_kwargs)\n", "skewsample2 = np.random.choice(skewpop2, **sample_kwargs)\n", "\n", - "median_df = pd.DataFrame({\"Control\": skewsample1, \"Test\": skewsample2})\n", + "median_df = pd.DataFrame({'Control' : skewsample1, 'Test': skewsample2})\n", + "\n", "\n", "\n", "# Create two populations with a 50% overlap.\n", @@ -125,24 +128,25 @@ "\n", "pop_kwargs = dict(scale=SD, size=POPULATION_N)\n", "pop1 = norm.rvs(loc=100, **pop_kwargs)\n", - "pop2 = norm.rvs(loc=100 + CD_DIFFERENCE, **pop_kwargs)\n", + "pop2 = norm.rvs(loc=100+CD_DIFFERENCE, **pop_kwargs)\n", "\n", "sample_kwargs = dict(replace=False, size=SAMPLE_N)\n", "sample1 = np.random.choice(pop1, **sample_kwargs)\n", "sample2 = np.random.choice(pop2, **sample_kwargs)\n", "\n", - "cd_df = pd.DataFrame({\"Control\": sample1, \"Test\": sample2})\n", + "cd_df = pd.DataFrame({'Control' : sample1, 'Test': sample2})\n", + "\n", "\n", "\n", "# Create several CIs and see if the true population difference lies within.\n", - "error_count_cohens_d = 0\n", - "error_count_hedges_g = 0\n", - "error_count_mean_diff = 0\n", - "error_count_median_diff = 0\n", + "error_count_cohens_d = 0\n", + "error_count_hedges_g = 0\n", + "error_count_mean_diff = 0\n", + "error_count_median_diff = 0\n", "error_count_cliffs_delta = 0\n", "\n", "for i in range(0, reps):\n", - " # print(i) # for debug.\n", + " print(i) # for debug.\n", " # pick a random seed\n", " rnd_sd = np.random.randint(0, 999999)\n", " load_kwargs = dict(ci=ci, random_seed=rnd_sd)\n", @@ -151,44 +155,48 @@ " cd = std_diff_data.cohens_d.results\n", " # print(\"cohen's d\") # for debug.\n", " cd_low, cd_high = float(cd.bca_low), float(cd.bca_high)\n", - " if not cd_low < POP_D < cd_high:\n", + " if cd_low < POP_D < cd_high is False:\n", " error_count_cohens_d += 1\n", "\n", " hg = std_diff_data.hedges_g.results\n", " # print(\"hedges' g\") # for debug.\n", " hg_low, hg_high = float(hg.bca_low), float(hg.bca_high)\n", - " if not hg_low < POP_D < hg_high:\n", + " if hg_low < POP_D < hg_high is False:\n", " error_count_hedges_g += 1\n", "\n", + "\n", " mean_diff_data = load(data=mean_df, idx=(\"Control\", \"Test\"), **load_kwargs)\n", " mean_d = mean_diff_data.mean_diff.results\n", " # print(\"mean diff\") # for debug.\n", " mean_d_low, mean_d_high = float(mean_d.bca_low), float(mean_d.bca_high)\n", - " if not mean_d_low < TRUE_DIFFERENCE < mean_d_high:\n", + " if mean_d_low < TRUE_DIFFERENCE < mean_d_high is False:\n", " error_count_mean_diff += 1\n", "\n", - " median_diff_data = load(data=median_df, idx=(\"Control\", \"Test\"), **load_kwargs)\n", + "\n", + " median_diff_data = load(data=median_df, idx=(\"Control\", \"Test\"),\n", + " **load_kwargs)\n", " median_d = median_diff_data.median_diff.results\n", " # print(\"median diff\") # for debug.\n", " median_d_low, median_d_high = float(median_d.bca_low), float(median_d.bca_high)\n", - " if not median_d_low < MEDIAN_DIFFERENCE < median_d_high:\n", + " if median_d_low < MEDIAN_DIFFERENCE < median_d_high is False:\n", " error_count_median_diff += 1\n", "\n", + "\n", " cd_data = load(data=cd_df, idx=(\"Control\", \"Test\"), **load_kwargs)\n", " cliffs = cd_data.cliffs_delta.results\n", " # print(\"cliff's delta\") # for debug.\n", " low, high = float(cliffs.bca_low), float(cliffs.bca_high)\n", - " if not low < 0.5 < high:\n", + " if low < 0.5 < high is False:\n", " error_count_cliffs_delta += 1\n", "\n", "\n", "max_errors = int(np.ceil(reps * (100 - ci) / 100))\n", "\n", - "assert error_count_cohens_d <= max_errors\n", - "assert error_count_hedges_g <= max_errors\n", - "assert error_count_mean_diff <= max_errors\n", - "assert error_count_median_diff <= max_errors\n", - "assert error_count_cliffs_delta <= max_errors" + "assert error_count_cohens_d <= max_errors\n", + "assert error_count_hedges_g <= max_errors\n", + "assert error_count_mean_diff <= max_errors\n", + "assert error_count_median_diff <= max_errors\n", + "assert error_count_cliffs_delta <= max_errors\n" ] }, { @@ -205,6 +213,10 @@ "display_name": "python3", "language": "python", "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.10.12" } }, "nbformat": 4, diff --git a/nbs/tests/test_load_errors.py b/nbs/tests/test_load_errors.py index 7e349231..d1c387c5 100644 --- a/nbs/tests/test_load_errors.py +++ b/nbs/tests/test_load_errors.py @@ -35,7 +35,7 @@ def test_wrong_params_combinations(): assert error_msg in str(excinfo.value) - error_msg = "`proportional` and `delta` cannot be True at the same time." + error_msg = "`proportional` and `delta2` cannot be True at the same time." with pytest.raises(ValueError) as excinfo: my_data = load( dummy_df,