diff --git a/README.md b/README.md index e0ba359..901ed2f 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ ## A Hierarchical Resampling Package for Python -Version 1.0.1 +Version 1.1 hierarch is a package for hierarchical resampling (bootstrapping, permutation) of datasets in Python. Because for loops are ultimately intrinsic to cluster-aware resampling, hierarch uses Numba to accelerate many of its key functions. diff --git a/docs/user/confidence.rst b/docs/user/confidence.rst index 2f1b5ed..22af507 100644 --- a/docs/user/confidence.rst +++ b/docs/user/confidence.rst @@ -81,7 +81,7 @@ This interval does not cross 0, so it is consistent with significance at the alp level. Because ha.stats.confidence_interval is based on a hypothesis test, it requires -the same input parameters as two_sample_test or linear_regression_test. However, +the same input parameters as hypothesis_test. However, the new **interval** parameter determines the width of the interval. :: ha.stats.confidence_interval( diff --git a/docs/user/hypothesis.rst b/docs/user/hypothesis.rst index 0357108..b800050 100644 --- a/docs/user/hypothesis.rst +++ b/docs/user/hypothesis.rst @@ -68,13 +68,13 @@ correct order. :: data[columns] -Next, you can call two_sample_test from hierarch's stats module, which will +Next, you can call hypothesis_test from hierarch's stats module, which will calculate the p-value. You have to specify what column is the treatment column - in this case, "Condition." Indexing starts at 0, so you input treatment_col=0. In this case, there are only 6c3 = 20 ways to permute the treatment labels, so you should specify "all" permutations be used. :: - p_val = ha.stats.two_sample_test(data, treatment_col=0, + p_val = ha.stats.hypothesis_test(data, treatment_col=0, compare='means', bootstraps=500, permutations='all', random_state=1) @@ -82,9 +82,9 @@ treatment labels, so you should specify "all" permutations be used. :: #out: p-value = 0.0406 -There are a number of parameters that can be used to modify two_sample_test. :: +There are a number of parameters that can be used to modify hypothesis_test. :: - ha.stats.two_sample_test(data_array, + ha.stats.hypothesis_test(data_array, treatment_col, compare="means", skip=None, @@ -94,11 +94,19 @@ There are a number of parameters that can be used to modify two_sample_test. :: return_null=False, random_state=None) -**compare**: The default "means" assumes that you are testing for a difference in means, so it uses the Welch t-statistic. For flexibility, two_sample_test can take a test statistic function as an argument. +**compare**: The default "means" assumes that you are testing for a difference in means, so it uses the Welch t-statistic. +"corr" uses a studentized covariance based test statistic which gives the same result as the Welch t-statistic for two-sample +datasets, but can be used on datasets with any number of related treatment groups. For flexibility, hypothesis_test can +also take a test statistic function as an argument. **skip**: indicates the indices of columns that should be skipped in the bootstrapping procedure. -A simple rule of thumb is that columns should be resampled with replacement only if they were originally sampled with replacement (or effectively sampled with replacement). For example, consider an imaging experiment in which you image several fields of view in a well, which each contain several cells. While you can consider the cells sampled with replacement from the well (there are so many more cells in the population that this assumption is fair), the cells are not sampled with replacement from the field of view (as you are measuring ALL cells in each field of view). The 10% condition is a reasonable rule of thumb here - if your sample represents less than 10% of the population, you can treat it as sampled with replacement. +A simple rule of thumb is that columns should be resampled with replacement only if they were originally sampled with replacement +(or effectively sampled with replacement). For example, consider an imaging experiment in which you image several fields of view in a well, +which each contain several cells. While you can consider the cells sampled with replacement from the well (there are so many more cells in the +population that this assumption is fair), the cells are not sampled with replacement from the field of view (as you are measuring ALL cells +in each field of view). The 10% condition is a reasonable rule of thumb here - if your sample represents less than 10% of the population, +you can treat it as sampled with replacement. **bootstraps**: indicates the number of bootstrapped samples to be drawn from data_array. @@ -264,14 +272,14 @@ Many-Sample Hypothesis Tests - Single Hypothesis One-way ANOVA and similar tests (like multi_sample_test) are inappropriate when you have several samples meant to test a single hypothesis. For example, perhaps you have several samples with different concentrations of the same drug treatment. -In this case, hierarch provides linear_regression_test, which is equivalent to +In this case, you can set compare to "corr", which is equivalent to performing a hypothesis test on a linear model against the null hypothesis that the slope coefficient is equal to 0. This hypothesis test uses a studentized covariance test statistic - essentially, the sample covariance divided by the standard error of the sample covariance. This test statistic is approximately normally distributed and in the two-sample case, -this test gives the same result as two_sample_test. +this test gives the same result as setting compare="means". First, consider a dataset with two treatment groups, four samples each, and three measurements on each sample. :: @@ -339,20 +347,20 @@ measurements on each sample. :: | 2 | 4 | 3 | 2.902522 | +---+---+---+----------+ -Performing linear_regression_test and two_sample_test on this dataset should +Using studentized covariance or the Welch t statistic on this dataset should give very similar p-values. :: - linear_regression_test(data, treatment_col=0, + hypothesis_test(data, treatment_col=0, compare="corr", bootstraps=1000, permutations='all', random_state=1) 0.013714285714285714 - two_sample_test(data, treatment_col=0, + hypothesis_test(data, treatment_col=0, compare="means", bootstraps=1000, permutations='all', random_state=1) 0.013714285714285714 -However, unlike two_sample_test, this test can handle any number of conditions. Consider instead +However, unlike the Welch t-statistic, studentized covariance can handle any number of conditions. Consider instead a dataset with four treatment conditions that have a linear relationship. :: paramlist = [[0, 2/3, 4/3, 2], [stats.norm], [stats.norm]] @@ -417,7 +425,7 @@ a dataset with four treatment conditions that have a linear relationship. :: For this dataset, there are 8! / (2!^4) = 2,520 total permutations. We will choose a random subset of them to compute the p-value. :: - linear_regression_test(data, treatment_col=0, + hypothesis_test(data, treatment_col=0, bootstraps=100, permutations=1000, random_state=1) 0.00767 diff --git a/docs/user/overview.rst b/docs/user/overview.rst index 1a3216c..1b55068 100644 --- a/docs/user/overview.rst +++ b/docs/user/overview.rst @@ -51,5 +51,5 @@ Here is the sort of data that hierarch is designed to perform hypothesis tests o The code to perform a hierarchical permutation t-test on this dataset looks like:: - hierarch.stats.two_sample_test(data, treatment_col=0, + hierarch.stats.hypothesis_test(data, treatment_col=0, bootstraps=1000, permutations='all') \ No newline at end of file diff --git a/docs/user/power.rst b/docs/user/power.rst index fc0ad68..97d873c 100644 --- a/docs/user/power.rst +++ b/docs/user/power.rst @@ -95,7 +95,7 @@ a significant result, assuming a p-value cutoff of 0.05. :: loops = 100 for i in range(loops): data = sim.generate() - pvalues.append(ha.stats.two_sample_test(data, 0, bootstraps=500, permutations=100)) + pvalues.append(ha.stats.hypothesis_test(data, 0, bootstraps=500, permutations=100)) print(np.less(pvalues, 0.05).sum() / loops) @@ -111,7 +111,7 @@ you determine the column 1 sample size that achieves at least 80% power. :: loops = 100 for i in range(loops): data = sim.generate() - pvalues.append(ha.stats.two_sample_test(data, 0, bootstraps=500, permutations=100)) + pvalues.append(ha.stats.hypothesis_test(data, 0, bootstraps=500, permutations=100)) print(np.less(pvalues, 0.05).sum() / loops) @@ -134,7 +134,7 @@ achieved with an experimental design that makes more column 2 measurements. :: loops = 100 for i in range(loops): data = sim.generate() - pvalues.append(ha.stats.two_sample_test(data, 0, bootstraps=500, permutations=100)) + pvalues.append(ha.stats.hypothesis_test(data, 0, bootstraps=500, permutations=100)) print(np.less(pvalues, 0.05).sum() / loops) @@ -154,7 +154,7 @@ only 30 column 2 samples. :: loops = 100 for i in range(loops): data = sim.generate() - pvalues.append(ha.stats.two_sample_test(data, 0, bootstraps=500, permutations=100)) + pvalues.append(ha.stats.hypothesis_test(data, 0, bootstraps=500, permutations=100)) print(np.less(pvalues, 0.05).sum() / loops) @@ -180,7 +180,7 @@ the error for an event that happens 5% probability is +/- 2%, but at loops = 1000 for i in range(loops): data = sim.generate() - pvalues.append(ha.stats.two_sample_test(data, 0, bootstraps=500, permutations=100)) + pvalues.append(ha.stats.hypothesis_test(data, 0, bootstraps=500, permutations=100)) print(np.less(pvalues, 0.05).sum() / loops) diff --git a/hierarch/stats.py b/hierarch/stats.py index 10618b1..e99b944 100644 --- a/hierarch/stats.py +++ b/hierarch/stats.py @@ -230,8 +230,9 @@ def _welch_stat(X, y): @jit(nopython=True) def _grabber(X, y, treatment_labels): - sample_a = y[X == treatment_labels[0]] - sample_b = y[X == treatment_labels[1]] + slicer = X == treatment_labels[0] + sample_a = y[slicer] + sample_b = y[~slicer] return sample_a, sample_b @@ -264,6 +265,8 @@ def hypothesis_test( compare : str, optional The test statistic to use to perform the hypothesis test, by default "corr" which automatically calls the studentized covariance test statistic. + alternative : {"two-sided", "less", "greater"} + The alternative hypothesis for the test, "two-sided" by default. skip : list of ints, optional Columns to skip in the bootstrap. Skip columns that were sampled without replacement from the prior column, by default None @@ -713,9 +716,12 @@ def multi_sample_test( out[:, :-1] = output out[:, -1] = q_vals output = out - output = pd.DataFrame(output, columns=['Condition 1', 'Condition 2', 'p-value', 'Corrected p-value']) + output = pd.DataFrame( + output, + columns=["Condition 1", "Condition 2", "p-value", "Corrected p-value"], + ) else: - output = pd.DataFrame(output, columns=['Condition 1', 'Condition 2', 'p-value']) + output = pd.DataFrame(output, columns=["Condition 1", "Condition 2", "p-value"]) return output @@ -905,7 +911,7 @@ def confidence_interval( >>> confidence_interval(data, treatment_col=0, interval=95, ... bootstraps=1000, permutations='all', random_state=1) - (1.3158282800277328, 6.1236374727640746) + (1.314807450602109, 6.124658302189696) The true difference is 2, which falls within the interval. We can examine the p-value for the corresponding dataset: @@ -921,7 +927,7 @@ def confidence_interval( >>> confidence_interval(data, treatment_col=0, interval=99.5, ... bootstraps=1000, permutations='all', random_state=1) - (-0.1816044138439996, 7.621070166635812) + (-0.12320618535452432, 7.56267193814634) A permutation t-test can be used to generate the null distribution by specifying compare = "means". This should return the same or a very @@ -930,7 +936,7 @@ def confidence_interval( >>> confidence_interval(data, treatment_col=0, interval=95, ... compare='means', bootstraps=1000, ... permutations='all', random_state=1) - (1.3158282800277328, 6.1236374727640746) + (1.314807450602109, 6.124658302189696) Setting compare = "corr" will generate a confidence interval for the slope in a regression equation. @@ -944,12 +950,14 @@ def confidence_interval( >>> confidence_interval(data, treatment_col=0, interval=95, ... compare='corr', bootstraps=100, ... permutations=1000, random_state=1) - (0.7743600526042317, 1.5558502398469196) + (0.7712039924329259, 1.5597743222883649) The dataset was specified to have a true slope of 1, which is within the interval. """ + rng = np.random.default_rng(random_state) + alpha = (100 - interval) / 200 # turns the input array or dataframe into a float64 array @@ -982,18 +990,21 @@ def confidence_interval( permutations=permutations, kind=kind, return_null=True, - random_state=random_state, + random_state=rng, ) - target_agg = grouper.transform(data, iterations=levels_to_agg) # make a guess as to the lower and upper bounds of the confidence interval - + null_agg = grouper.transform(null_imposed_data, iterations=levels_to_agg) - - current_lower = _compute_interval(np.array(null), null_agg, target_agg, treatment_col, alpha) - current_upper = _compute_interval(np.array(null), null_agg, target_agg, treatment_col, 1-alpha) + + current_lower = _compute_interval( + np.array(null), null_agg, target_agg, treatment_col, alpha + ) + current_upper = _compute_interval( + np.array(null), null_agg, target_agg, treatment_col, 1 - alpha + ) # refine the bounds via iterative hypothesis testing # each bound needs to be found separately @@ -1007,10 +1018,10 @@ def confidence_interval( for i in range(iterations): - - bound_imposed_data = data.copy() - bound_imposed_data[:, -1] -= current_lower * bound_imposed_data[:, treatment_col] + bound_imposed_data[:, -1] -= ( + current_lower * bound_imposed_data[:, treatment_col] + ) current_p, null = hypothesis_test( bound_imposed_data, treatment_col, @@ -1021,15 +1032,17 @@ def confidence_interval( permutations=permutations, kind=kind, return_null=True, - random_state=random_state, + random_state=rng, ) - if np.abs(100*(alpha - current_p)) < tolerance: + if np.abs(100 * (alpha - current_p)) < tolerance: break bound_agg = grouper.transform(bound_imposed_data, iterations=levels_to_agg) - current_lower = _compute_interval(np.array(null), bound_agg, target_agg, treatment_col, alpha) + current_lower = _compute_interval( + np.array(null), bound_agg, target_agg, treatment_col, alpha + ) else: warn( @@ -1038,12 +1051,12 @@ def confidence_interval( stacklevel=2, ) - - for i in range(iterations): bound_imposed_data = data.copy() - bound_imposed_data[:, -1] -= current_upper * bound_imposed_data[:, treatment_col] + bound_imposed_data[:, -1] -= ( + current_upper * bound_imposed_data[:, treatment_col] + ) current_p, null = hypothesis_test( bound_imposed_data, treatment_col, @@ -1054,15 +1067,16 @@ def confidence_interval( permutations=permutations, kind=kind, return_null=True, - random_state=random_state, + random_state=rng, ) - if np.abs(100*(alpha - current_p)) < tolerance: + if np.abs(100 * (alpha - current_p)) < tolerance: break bound_agg = grouper.transform(bound_imposed_data, iterations=levels_to_agg) - current_upper = _compute_interval(np.array(null), bound_agg, target_agg, treatment_col, 1-alpha) - + current_upper = _compute_interval( + np.array(null), bound_agg, target_agg, treatment_col, 1 - alpha + ) else: warn( @@ -1081,6 +1095,7 @@ def __init__(self, message): def __str__(self): return repr(self.message) + @jit(nopython=True, cache=True) def _compute_interval(null, null_data, target_data, treatment_col, alpha): @@ -1097,6 +1112,7 @@ def _compute_interval(null, null_data, target_data, treatment_col, alpha): bound = bound + (bivar_central_moment(x, y) / bivar_central_moment(x, x)) return bound + @jit(nopython=True, cache=True) def _cov_std_error(x, y): n = len(x) @@ -1114,4 +1130,4 @@ def _cov_std_error(x, y): # third term is the square of the covariance of x and y. an approximate bias # correction of n - root(3) is applied denom_3 = ((n - 2) * (bivar_central_moment(x, y, pow=1, ddof=1.75) ** 2)) / (n - 1) - return ((1 / (n - 1.5)) * (denom_1 + denom_2 - denom_3)) ** 0.5 \ No newline at end of file + return ((1 / (n - 1.5)) * (denom_1 + denom_2 - denom_3)) ** 0.5 diff --git a/setup.py b/setup.py index b1946e9..89f559d 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setuptools.setup( name="hierarch", - version="1.0.1", + version="1.1.0", author="Rishi Kulkarni", author_email="rkulk@stanford.edu", description="Hierarchical hypothesis testing library",