From 902e4d53f678c7033e39b83b3c146b80fe628db9 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Thu, 27 May 2021 03:38:33 +0000
Subject: [PATCH 01/12] Bump numpy from 1.20.2 to 1.20.3

Bumps [numpy](https://github.com/numpy/numpy) from 1.20.2 to 1.20.3.
- [Release notes](https://github.com/numpy/numpy/releases)
- [Changelog](https://github.com/numpy/numpy/blob/main/doc/HOWTO_RELEASE.rst.txt)
- [Commits](https://github.com/numpy/numpy/compare/v1.20.2...v1.20.3)

Signed-off-by: dependabot[bot] <support@github.com>
---
 docs/requirements.txt | 2 +-
 requirements.txt      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/requirements.txt b/docs/requirements.txt
index 2000bdd..6ac4509 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -2,6 +2,6 @@ sphinx==4.0.1
 sphinx_rtd_theme==0.5.2
 numpydoc==1.1.0
 numba==0.53.1
-numpy==1.20.2
+numpy==1.20.3
 pandas==1.2.4
 scipy==1.6.3
diff --git a/requirements.txt b/requirements.txt
index 2974d67..7a4f919 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -5,7 +5,7 @@ mkl-random==1.2.1
 mkl-service==2.3.0
 mpmath==1.2.1
 numba==0.53.1
-numpy==1.20.2
+numpy==1.20.3
 pandas==1.2.4
 python-dateutil==2.8.1
 pytz==2021.1

From ed73d29a9546114fbfacb12ac563cdbe80a234ad Mon Sep 17 00:00:00 2001
From: Rishi Kulkarni <insta111@gmail.com>
Date: Tue, 1 Jun 2021 01:13:22 -0700
Subject: [PATCH 02/12] performance improvements to Permuter class to reduce
 function call overhead

---
 hierarch/internal_functions.py | 108 ++++++++++++++++++++++++---------
 hierarch/resampling.py         | 102 +++++++++++++++++++------------
 hierarch/stats.py              |   6 +-
 3 files changed, 144 insertions(+), 72 deletions(-)

diff --git a/hierarch/internal_functions.py b/hierarch/internal_functions.py
index 250af01..16c391e 100644
--- a/hierarch/internal_functions.py
+++ b/hierarch/internal_functions.py
@@ -1,5 +1,4 @@
 import numpy as np
-from numpy.random import shuffle
 from hierarch import numba_overloads
 import numba as nb
 import pandas as pd
@@ -204,7 +203,7 @@ def studentized_covariance(x, y):
 
     # first term is the second symmetric bivariate central moment. an approximate
     # bias correction of n - root(2) is applied
-    denom_1 = bivar_central_moment(x, y, pow=2, ddof=2**0.5)
+    denom_1 = bivar_central_moment(x, y, pow=2, ddof=2 ** 0.5)
 
     # second term is the product of the standard deviations of x and y over n - 1.
     # this term rapidly goes to 0 as n goes to infinity
@@ -215,11 +214,9 @@ def studentized_covariance(x, y):
 
     # third term is the square of the covariance of x and y. an approximate bias
     # correction of n - root(3) is applied
-    denom_3 = ((n - 2) * (bivar_central_moment(x, y, pow=1, ddof=1.75) ** 2)) / (
-        n - 1
-    )
+    denom_3 = ((n - 2) * (bivar_central_moment(x, y, pow=1, ddof=1.75) ** 2)) / (n - 1)
 
-    t = (numerator) / ((1 / (n - 1.5)) * (denom_1 + denom_2 - denom_3))**0.5
+    t = (numerator) / ((1 / (n - 1.5)) * (denom_1 + denom_2 - denom_3)) ** 0.5
     return t
 
 
@@ -298,7 +295,13 @@ def welch_statistic(data, col: int, treatment_labels):
 
 
 @nb.jit(nopython=True, cache=True)
-def iter_return(data, col_to_permute: int, iterator, counts):
+def iter_return(data, col_to_permute, iterator):
+    data[:, col_to_permute] = iterator
+    return data
+
+
+@nb.jit(nopython=True, cache=True)
+def rep_iter_return(data, col_to_permute: int, iterator, counts):
     """In-place shuffles a column based on an input. Cannot be cluster-aware.
 
     Parameters
@@ -316,12 +319,45 @@ def iter_return(data, col_to_permute: int, iterator, counts):
     # the shuffled column is defined by an input variable
 
     shuffled_col_values = np.repeat(np.array(iterator), counts)
-
     data[:, col_to_permute] = shuffled_col_values
+    return data
+
+
+@nb.jit(nopython=True, inline="always")
+def bounded_uint(ub):
+    x = np.random.randint(low=2 ** 32)
+    m = ub * x
+    l = np.bitwise_and(m, 2 ** 32 - 1)
+    if l < ub:
+        t = (2 ** 32 - ub) % ub
+        while l < t:
+            x = np.random.randint(low=2 ** 32)
+            m = ub * x
+            l = np.bitwise_and(m, 2 ** 32 - 1)
+    return m >> 32
+
+
+@nb.jit(nopython=True, cache=True)
+def nb_fast_shuffle(arr):
+    i = arr.shape[0] - 1
+    while i > 0:
+        j = bounded_uint(i + 1)
+        arr[i], arr[j] = arr[j], arr[i]
+        i -= 1
+
+
+@nb.jit(nopython=True, cache=True)
+def nb_strat_shuffle(arr, stratification):
+    for v, w in zip(stratification[:-1], stratification[1:]):
+        i = w - v - 1
+        while i > 0:
+            j = bounded_uint(i + 1)
+            arr[i + v], arr[j + v] = arr[j + v], arr[i + v]
+            i -= 1
 
 
 @nb.jit(nopython=True, cache=True)
-def randomized_return(data, col_to_permute: int, shuffled_col_values, keys, counts):
+def randomized_return(data, col_to_permute: int, keys):
     """Randomly shuffles a column in-place, in a cluster-aware fashion if necessary.
 
     Parameters
@@ -330,43 +366,55 @@ def randomized_return(data, col_to_permute: int, shuffled_col_values, keys, coun
         Target data matrix.
     col_to_permute : int
         Index of the column whose values will be permuted.
-    shuffled_col_values : 1D array-like
-        Labels in the column to be permuted.
     keys : 1D array-like
         Clusters to shuffle within (if col_to_permute > 0).
-    counts : 1D array-like
-        Number of times each value in shuffled_col_values should appear in output.
     """
 
     # if there are no clusters, just shuffle the columns
 
-    if shuffled_col_values.size == data.shape[0]:
+    if col_to_permute == 0:
+        nb_fast_shuffle(data[:, col_to_permute])
 
-        if col_to_permute == 0:
-            shuffle(data[:, col_to_permute])
+    else:
+        nb_strat_shuffle(data[:, col_to_permute], keys)
 
-        else:
-            for idx in range(len(keys) - 1):
+    return data
 
-                shuffle(data[:, col_to_permute][keys[idx] : keys[idx + 1]])
 
-    else:
-        if col_to_permute == 0:
-            shuffle(shuffled_col_values)
+@nb.jit(nopython=True, cache=True)
+def rep_randomized_return(data, col_to_permute: int, shuffled_col_values, keys, counts):
+    """Randomly shuffles a column in-place, in a cluster-aware fashion if necessary.
 
-        # if not, shuffle between the indices in keys
-        else:
+    Parameters
+    ----------
+    data : 2D numeric array
+        Target data matrix.
+    col_to_permute : int
+        Index of the column whose values will be permuted.
+    shuffled_col_values : 1D array-like
+        Labels in the column to be permuted.
+    keys : 1D array-like
+        Clusters to shuffle within (if col_to_permute > 0).
+    counts : 1D array-like
+        Number of times each value in shuffled_col_values should appear in output.
+    """
 
-            for idx in range(len(keys) - 1):
+    # if there are no clusters, just shuffle the columns
+
+    if col_to_permute == 0:
+        nb_fast_shuffle(shuffled_col_values)
 
-                shuffle(shuffled_col_values[keys[idx] : keys[idx + 1]])
+    # if not, shuffle between the indices in keys
+    else:
+        nb_strat_shuffle(shuffled_col_values, keys)
 
-        # check if the shuffled column needs to be repeated to fit back
-        # into the original matrix
+    # check if the shuffled column needs to be repeated to fit back
+    # into the original matrix
 
-        shuffled_col_values = np.repeat(shuffled_col_values, counts)
+    shuffled_col_values = np.repeat(shuffled_col_values, counts)
 
-        data[:, col_to_permute] = shuffled_col_values
+    data[:, col_to_permute] = shuffled_col_values
+    return data
 
 
 @nb.jit(nopython=True, cache=True)
diff --git a/hierarch/resampling.py b/hierarch/resampling.py
index 8af2afd..181578b 100644
--- a/hierarch/resampling.py
+++ b/hierarch/resampling.py
@@ -1,13 +1,17 @@
 import numpy as np
 from itertools import cycle
+from functools import partial
+from numba import jit
 from hierarch.internal_functions import (
     nb_reweighter,
     nb_reweighter_real,
     nb_unique,
     id_cluster_counts,
     msp,
+    rep_randomized_return,
     set_numba_random_state,
     iter_return,
+    rep_iter_return,
     randomized_return,
 )
 
@@ -366,12 +370,12 @@ class Permuter:
     >>> permute = Permuter(random_state=1)
     >>> permute.fit(test, col_to_permute=0, exact=False)
     >>> permute.transform(test)
-    array([[1., 1., 1.],
+    array([[2., 1., 1.],
            [2., 2., 1.],
            [1., 3., 1.],
-           [1., 1., 1.],
-           [2., 2., 1.],
-           [2., 3., 1.]])
+           [2., 1., 1.],
+           [1., 2., 1.],
+           [1., 3., 1.]])
 
     If exact=True, Permuter will not repeat a permutation until all possible
     permutations have been exhausted.
@@ -398,12 +402,12 @@ class Permuter:
     >>> permute = Permuter(random_state=2)
     >>> permute.fit(test, col_to_permute=1, exact=False)
     >>> permute.transform(test)
-    array([[1., 2., 1.],
-           [1., 1., 1.],
+    array([[1., 1., 1.],
+           [1., 2., 1.],
            [1., 3., 1.],
-           [2., 3., 1.],
            [2., 2., 1.],
-           [2., 1., 1.]])
+           [2., 1., 1.],
+           [2., 3., 1.]])
 
     Exact within-cluster permutations are not implemented, but there are typically
     too many to be worth attempting.
@@ -435,7 +439,7 @@ def fit(self, data, col_to_permute: int, exact=False):
             iterate through them one by one, by default False. Only
             works if target column has index 0.
         """
-        self.values, self.indexes, self.counts = np.unique(
+        values, indexes, counts = np.unique(
             data[:, : col_to_permute + 2], return_index=True, return_counts=True, axis=0
         )
 
@@ -443,23 +447,56 @@ def fit(self, data, col_to_permute: int, exact=False):
             raise NotImplementedError(
                 "Exact permutation only available for col_to_permute = 0."
             )
-        self.col_to_permute = col_to_permute
-        self.exact = exact
 
-        try:
-            self.values[:, -3]
-            self.keys = nb_unique(self.values[:, :-2])[1]
-            self.keys = np.append(self.keys, data.shape[0])
-        except IndexError:
-            self.keys = np.empty(0)
-
-        if self.exact is True:
-            col_values = self.values[:, -2].copy()
-            self._len = len(col_values)
+        # transform() is going to be called a lot, so generate a specialized version on the fly
+
+        if exact is True:
+            col_values = values[:, -2].copy()
             self.iterator = cycle(msp(col_values))
+            if len(col_values) == len(data):
+                self.transform = partial(
+                    self._exact_return,
+                    col_to_permute=col_to_permute,
+                    generator=self.iterator,
+                )
+            else:
+                self.transform = partial(
+                    self._exact_repeat_return,
+                    col_to_permute=col_to_permute,
+                    generator=self.iterator,
+                    counts=counts,
+                )
 
         else:
-            self.shuffled_col_values = data[:, self.col_to_permute][self.indexes]
+            try:
+                values[:, -3]
+                keys = nb_unique(values[:, :-2])[1]
+                keys = np.append(keys, data.shape[0])
+            except IndexError:
+                keys = np.empty(0, dtype=np.int64)
+
+            if indexes.size == len(data):
+                self.transform = partial(
+                    randomized_return, col_to_permute=col_to_permute, keys=keys,
+                )
+
+            else:
+                shuffled_col_values = data[:, col_to_permute][indexes]
+                self.transform = partial(
+                    rep_randomized_return,
+                    col_to_permute=col_to_permute,
+                    shuffled_col_values=shuffled_col_values,
+                    keys=keys,
+                    counts=counts,
+                )
+
+    @staticmethod
+    def _exact_return(data, col_to_permute, generator):
+        return iter_return(data, col_to_permute, tuple(next(generator)))
+
+    @staticmethod
+    def _exact_repeat_return(data, col_to_permute, generator, counts):
+        return rep_iter_return(data, col_to_permute, tuple(next(generator)), counts)
 
     def transform(self, data):
         """Permute target column in-place.
@@ -472,21 +509,8 @@ def transform(self, data):
         Returns
         -------
         data : 2D numeric ndarray
-            Original data with target column shuffled, in a cluster-aware fashion if necessary.
+            Original data with target column shuffled, in a stratified fashion if necessary.
         """
-        if self.exact is False:
-            randomized_return(
-                data,
-                self.col_to_permute,
-                self.shuffled_col_values,
-                self.keys,
-                self.counts,
-            )
-        else:
-            if self._len == len(data):
-                data[:, self.col_to_permute] = next(self.iterator)
-            else:
-                iter_return(
-                    data, self.col_to_permute, tuple(next(self.iterator)), self.counts
-                )
-        return data
+
+        # this method is defined on the fly in fit()
+        raise Exception("Use fit() before calling transform.")
diff --git a/hierarch/stats.py b/hierarch/stats.py
index 0b3252b..bcbf9f4 100644
--- a/hierarch/stats.py
+++ b/hierarch/stats.py
@@ -122,7 +122,7 @@ def linear_regression_test(
     >>> linear_regression_test(data, treatment_col=0,
     ...                 bootstraps=100, permutations=1000,
     ...                 random_state=1)
-    0.00767
+    0.00675
 
 
     """
@@ -329,7 +329,7 @@ def two_sample_test(
     >>> two_sample_test(data, treatment_col=0,
     ...                 bootstraps=1000, permutations=70,
     ...                 random_state=1)
-    0.03362857142857143
+    0.03357142857142857
 
     The treatment column does not have to be the outermost column.
 
@@ -356,7 +356,7 @@ def two_sample_test(
     >>> two_sample_test(data, treatment_col=1,
     ...                 bootstraps=100, permutations=1000,
     ...                 random_state=1)
-    0.00285
+    0.00276
     """
 
     # turns the input array or dataframe into a float64 array

From 46efdcb2f53d35ef852e8f353779056fe4b4738f Mon Sep 17 00:00:00 2001
From: Rishi Kulkarni <insta111@gmail.com>
Date: Tue, 1 Jun 2021 11:49:17 -0700
Subject: [PATCH 03/12] improved readability of bounded_uint function

---
 hierarch/internal_functions.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/hierarch/internal_functions.py b/hierarch/internal_functions.py
index 16c391e..a14ebd8 100644
--- a/hierarch/internal_functions.py
+++ b/hierarch/internal_functions.py
@@ -325,15 +325,16 @@ def rep_iter_return(data, col_to_permute: int, iterator, counts):
 
 @nb.jit(nopython=True, inline="always")
 def bounded_uint(ub):
-    x = np.random.randint(low=2 ** 32)
+    rand = np.random.randint
+    x = rand(low=2 ** 32)
     m = ub * x
-    l = np.bitwise_and(m, 2 ** 32 - 1)
+    l = m & (2 ** 32 - 1)
     if l < ub:
         t = (2 ** 32 - ub) % ub
         while l < t:
-            x = np.random.randint(low=2 ** 32)
+            x = rand(low=2 ** 32)
             m = ub * x
-            l = np.bitwise_and(m, 2 ** 32 - 1)
+            l = m & (2 ** 32 - 1)
     return m >> 32
 
 

From 01d5b310148332f533685a1d1a7f1cdf9f9e81ea Mon Sep 17 00:00:00 2001
From: Rishi Kulkarni <insta111@gmail.com>
Date: Tue, 1 Jun 2021 13:26:06 -0700
Subject: [PATCH 04/12] moved Permuter's internal functions into Permuter as
 staticmethods

---
 hierarch/internal_functions.py | 89 +---------------------------------
 hierarch/resampling.py         | 79 +++++++++++++++++++-----------
 2 files changed, 53 insertions(+), 115 deletions(-)

diff --git a/hierarch/internal_functions.py b/hierarch/internal_functions.py
index a14ebd8..9490dd5 100644
--- a/hierarch/internal_functions.py
+++ b/hierarch/internal_functions.py
@@ -295,32 +295,8 @@ def welch_statistic(data, col: int, treatment_labels):
 
 
 @nb.jit(nopython=True, cache=True)
-def iter_return(data, col_to_permute, iterator):
-    data[:, col_to_permute] = iterator
-    return data
-
-
-@nb.jit(nopython=True, cache=True)
-def rep_iter_return(data, col_to_permute: int, iterator, counts):
-    """In-place shuffles a column based on an input. Cannot be cluster-aware.
-
-    Parameters
-    ----------
-    data : 2D numeric array
-        Target data matrix.
-    col_to_permute : int
-        Index of the column whose values will be permuted.
-    iterator : 1D array-like
-        Values to replace target column with.
-    counts : 1D array-like
-        Number of times each value in iterator should appear in output.
-    """
-
-    # the shuffled column is defined by an input variable
-
-    shuffled_col_values = np.repeat(np.array(iterator), counts)
-    data[:, col_to_permute] = shuffled_col_values
-    return data
+def _repeat(target, counts):
+    return np.repeat(np.array(target), counts)
 
 
 @nb.jit(nopython=True, inline="always")
@@ -357,67 +333,6 @@ def nb_strat_shuffle(arr, stratification):
             i -= 1
 
 
-@nb.jit(nopython=True, cache=True)
-def randomized_return(data, col_to_permute: int, keys):
-    """Randomly shuffles a column in-place, in a cluster-aware fashion if necessary.
-
-    Parameters
-    ----------
-    data : 2D numeric array
-        Target data matrix.
-    col_to_permute : int
-        Index of the column whose values will be permuted.
-    keys : 1D array-like
-        Clusters to shuffle within (if col_to_permute > 0).
-    """
-
-    # if there are no clusters, just shuffle the columns
-
-    if col_to_permute == 0:
-        nb_fast_shuffle(data[:, col_to_permute])
-
-    else:
-        nb_strat_shuffle(data[:, col_to_permute], keys)
-
-    return data
-
-
-@nb.jit(nopython=True, cache=True)
-def rep_randomized_return(data, col_to_permute: int, shuffled_col_values, keys, counts):
-    """Randomly shuffles a column in-place, in a cluster-aware fashion if necessary.
-
-    Parameters
-    ----------
-    data : 2D numeric array
-        Target data matrix.
-    col_to_permute : int
-        Index of the column whose values will be permuted.
-    shuffled_col_values : 1D array-like
-        Labels in the column to be permuted.
-    keys : 1D array-like
-        Clusters to shuffle within (if col_to_permute > 0).
-    counts : 1D array-like
-        Number of times each value in shuffled_col_values should appear in output.
-    """
-
-    # if there are no clusters, just shuffle the columns
-
-    if col_to_permute == 0:
-        nb_fast_shuffle(shuffled_col_values)
-
-    # if not, shuffle between the indices in keys
-    else:
-        nb_strat_shuffle(shuffled_col_values, keys)
-
-    # check if the shuffled column needs to be repeated to fit back
-    # into the original matrix
-
-    shuffled_col_values = np.repeat(shuffled_col_values, counts)
-
-    data[:, col_to_permute] = shuffled_col_values
-    return data
-
-
 @nb.jit(nopython=True, cache=True)
 def id_cluster_counts(design):
     """Identifies the hierarchy in a design matrix.
diff --git a/hierarch/resampling.py b/hierarch/resampling.py
index 181578b..7938e7f 100644
--- a/hierarch/resampling.py
+++ b/hierarch/resampling.py
@@ -8,11 +8,10 @@
     nb_unique,
     id_cluster_counts,
     msp,
-    rep_randomized_return,
     set_numba_random_state,
-    iter_return,
-    rep_iter_return,
-    randomized_return,
+    _repeat,
+    nb_fast_shuffle,
+    nb_strat_shuffle,
 )
 
 
@@ -449,22 +448,16 @@ def fit(self, data, col_to_permute: int, exact=False):
             )
 
         # transform() is going to be called a lot, so generate a specialized version on the fly
+        # this keeps us from having to do unnecessary flow control
 
         if exact is True:
             col_values = values[:, -2].copy()
             self.iterator = cycle(msp(col_values))
             if len(col_values) == len(data):
-                self.transform = partial(
-                    self._exact_return,
-                    col_to_permute=col_to_permute,
-                    generator=self.iterator,
-                )
+                self.transform = self._exact_return(col_to_permute, self.iterator)
             else:
-                self.transform = partial(
-                    self._exact_repeat_return,
-                    col_to_permute=col_to_permute,
-                    generator=self.iterator,
-                    counts=counts,
+                self.transform = self._exact_repeat_return(
+                    col_to_permute, self.iterator, counts
                 )
 
         else:
@@ -476,27 +469,57 @@ def fit(self, data, col_to_permute: int, exact=False):
                 keys = np.empty(0, dtype=np.int64)
 
             if indexes.size == len(data):
-                self.transform = partial(
-                    randomized_return, col_to_permute=col_to_permute, keys=keys,
-                )
+                self.transform = self._random_return(col_to_permute, keys)
 
             else:
-                shuffled_col_values = data[:, col_to_permute][indexes]
-                self.transform = partial(
-                    rep_randomized_return,
-                    col_to_permute=col_to_permute,
-                    shuffled_col_values=shuffled_col_values,
-                    keys=keys,
-                    counts=counts,
+                col_values = data[:, col_to_permute][indexes]
+                self.transform = self._random_repeat_return(
+                    col_to_permute, col_values, keys, counts
                 )
 
     @staticmethod
-    def _exact_return(data, col_to_permute, generator):
-        return iter_return(data, col_to_permute, tuple(next(generator)))
+    def _exact_return(col_to_permute, generator):
+        def _exact_return_impl(data):
+            data[:, col_to_permute] = next(generator)
+            return data
+
+        return _exact_return_impl
 
     @staticmethod
-    def _exact_repeat_return(data, col_to_permute, generator, counts):
-        return rep_iter_return(data, col_to_permute, tuple(next(generator)), counts)
+    def _exact_repeat_return(col_to_permute, generator, counts):
+        def _rep_iter_return_impl(data):
+            data[:, col_to_permute] = _repeat(tuple(next(generator)), counts)
+            return data
+
+        return _rep_iter_return_impl
+
+    @staticmethod
+    def _random_return(col_to_permute, keys):
+        @jit(nopython=True, cache=True)
+        def _random_return_impl(data):
+            if col_to_permute == 0:
+                nb_fast_shuffle(data[:, col_to_permute])
+            else:
+                nb_strat_shuffle(data[:, col_to_permute], keys)
+            return data
+
+        return _random_return_impl
+
+    @staticmethod
+    def _random_repeat_return(col_to_permute, col_values, keys, counts):
+        @jit(nopython=True, cache=True)
+        def _random__repeat_return_impl(data):
+            shuffled_col_values = col_values.copy()
+            if col_to_permute == 0:
+                nb_fast_shuffle(shuffled_col_values)
+            else:
+                nb_strat_shuffle(shuffled_col_values, keys)
+
+            shuffled_col_values = np.repeat(shuffled_col_values, counts)
+            data[:, col_to_permute] = shuffled_col_values
+            return data
+
+        return _random__repeat_return_impl
 
     def transform(self, data):
         """Permute target column in-place.

From af7fe336db5a2388790306b9e3f63f0e1aad4bea Mon Sep 17 00:00:00 2001
From: rishi-kulkarni <insta111@gmail.com>
Date: Wed, 2 Jun 2021 16:09:06 -0700
Subject: [PATCH 05/12] updated resampling

---
 hierarch/resampling.py | 50 ++++++++++++++++++++++++------------------
 1 file changed, 29 insertions(+), 21 deletions(-)

diff --git a/hierarch/resampling.py b/hierarch/resampling.py
index 7938e7f..13e25d6 100644
--- a/hierarch/resampling.py
+++ b/hierarch/resampling.py
@@ -14,10 +14,6 @@
     nb_strat_shuffle,
 )
 
-
-BOOTSTRAP_ALGORITHMS = ["weights", "indexes", "bayesian"]
-
-
 class Bootstrapper:
     """Bootstrapper(random_state=None, kind="weights")
 
@@ -216,6 +212,7 @@ class Bootstrapper:
            [2.        , 3.        , 3.        , 1.15970489]])
 
     """
+    BOOTSTRAP_ALGORITHMS = tuple(["weights", "indexes", "bayesian"])
 
     def __init__(self, random_state=None, kind="weights"):
 
@@ -224,7 +221,7 @@ def __init__(self, random_state=None, kind="weights"):
         # this makes it both reproducible and thread-safe enough
         nb_seed = self.random_generator.integers(low=2 ** 32 - 1)
         set_numba_random_state(nb_seed)
-        if kind in BOOTSTRAP_ALGORITHMS:
+        if kind in self.BOOTSTRAP_ALGORITHMS:
             self.kind = kind
         else:
             raise KeyError("Invalid 'kind' argument.")
@@ -477,8 +474,28 @@ def fit(self, data, col_to_permute: int, exact=False):
                     col_to_permute, col_values, keys, counts
                 )
 
+    def transform(self, data):
+        """Permute target column in-place.
+
+        Parameters
+        ----------
+        data : 2D numeric ndarray
+            Target data.
+
+        Returns
+        -------
+        data : 2D numeric ndarray
+            Original data with target column shuffled, in a stratified fashion if necessary.
+        """
+
+        # this method is defined on the fly in fit() based one of the
+        # four static methods defined below
+        raise Exception("Use fit() before calling transform.")
+
     @staticmethod
     def _exact_return(col_to_permute, generator):
+        """Transformer when exact is True and permutations are unrestricted.
+        """        
         def _exact_return_impl(data):
             data[:, col_to_permute] = next(generator)
             return data
@@ -487,6 +504,9 @@ def _exact_return_impl(data):
 
     @staticmethod
     def _exact_repeat_return(col_to_permute, generator, counts):
+        """Transformer when exact is True and permutations are restricted by
+        repetition of treated entities.
+        """        
         def _rep_iter_return_impl(data):
             data[:, col_to_permute] = _repeat(tuple(next(generator)), counts)
             return data
@@ -495,6 +515,8 @@ def _rep_iter_return_impl(data):
 
     @staticmethod
     def _random_return(col_to_permute, keys):
+        """Transformer when exact is False and repetition is not required.
+        """           
         @jit(nopython=True, cache=True)
         def _random_return_impl(data):
             if col_to_permute == 0:
@@ -507,6 +529,8 @@ def _random_return_impl(data):
 
     @staticmethod
     def _random_repeat_return(col_to_permute, col_values, keys, counts):
+        """Transformer when exact is False and repetition is required.
+        """            
         @jit(nopython=True, cache=True)
         def _random__repeat_return_impl(data):
             shuffled_col_values = col_values.copy()
@@ -521,19 +545,3 @@ def _random__repeat_return_impl(data):
 
         return _random__repeat_return_impl
 
-    def transform(self, data):
-        """Permute target column in-place.
-
-        Parameters
-        ----------
-        data : 2D numeric ndarray
-            Target data.
-
-        Returns
-        -------
-        data : 2D numeric ndarray
-            Original data with target column shuffled, in a stratified fashion if necessary.
-        """
-
-        # this method is defined on the fly in fit()
-        raise Exception("Use fit() before calling transform.")

From c9d7ca562a42cca5f3e17a374fe57230c91de8fe Mon Sep 17 00:00:00 2001
From: Rishi Kulkarni <insta111@gmail.com>
Date: Wed, 2 Jun 2021 22:32:48 -0700
Subject: [PATCH 06/12] added confidence_interval function moved test
 statistics to hierarch.stats for better documentation

---
 hierarch/internal_functions.py | 185 ++++--------------
 hierarch/stats.py              | 334 ++++++++++++++++++++++++++++++++-
 2 files changed, 366 insertions(+), 153 deletions(-)

diff --git a/hierarch/internal_functions.py b/hierarch/internal_functions.py
index 9490dd5..2bdbb86 100644
--- a/hierarch/internal_functions.py
+++ b/hierarch/internal_functions.py
@@ -150,150 +150,6 @@ def bivar_central_moment(x, y, pow=1, ddof=1):
     return moment
 
 
-@nb.jit(nopython=True, cache=True)
-def studentized_covariance(x, y):
-    """Studentized sample covariance between two variables.
-
-    Sample covariance between two variables divided by standard error of
-    sample covariance. Uses a bias-corrected approximation of standard error. 
-    This computes an approximately pivotal test statistic.
-
-    Parameters
-    ----------
-    x, y: numeric array-likes
-
-    Returns
-    -------
-    float64
-        Studentized covariance.
-
-    Examples
-    --------
-    >>> x = np.array([[0, 0, 0, 0, 0, 1, 1, 1, 1, 1], 
-    ...               [1, 2, 3, 4, 5, 2, 3, 4, 5, 6]])
-    >>> x.T
-    array([[0, 1],
-           [0, 2],
-           [0, 3],
-           [0, 4],
-           [0, 5],
-           [1, 2],
-           [1, 3],
-           [1, 4],
-           [1, 5],
-           [1, 6]])
-    >>> studentized_covariance(x.T[:,0], x.T[:,1])
-    1.0039690353154482
-
-    This is approximately equal to the t-statistic.
-    >>> import scipy.stats as stats    
-    >>> a = np.array([2, 3, 4, 5, 6])
-    >>> b = np.array([1, 2, 3, 4, 5])
-    >>> stats.ttest_ind(a, b, equal_var=False)[0]
-    1.0
-
-    """
-    n = len(x)
-
-    # numerator is the sample covariance, or the first symmetric bivariate central moment
-    numerator = bivar_central_moment(x, y, pow=1, ddof=1)
-
-    # the denominator is the sample standard deviation of the sample covariance, aka
-    # the standard error of sample covariance. the denominator has three terms.
-
-    # first term is the second symmetric bivariate central moment. an approximate
-    # bias correction of n - root(2) is applied
-    denom_1 = bivar_central_moment(x, y, pow=2, ddof=2 ** 0.5)
-
-    # second term is the product of the standard deviations of x and y over n - 1.
-    # this term rapidly goes to 0 as n goes to infinity
-    denom_2 = (
-        bivar_central_moment(x, x, pow=1, ddof=1)
-        * bivar_central_moment(y, y, pow=1, ddof=1)
-    ) / (n - 1)
-
-    # third term is the square of the covariance of x and y. an approximate bias
-    # correction of n - root(3) is applied
-    denom_3 = ((n - 2) * (bivar_central_moment(x, y, pow=1, ddof=1.75) ** 2)) / (n - 1)
-
-    t = (numerator) / ((1 / (n - 1.5)) * (denom_1 + denom_2 - denom_3)) ** 0.5
-    return t
-
-
-@nb.jit(nopython=True, cache=True)
-def welch_statistic(data, col: int, treatment_labels):
-    """Calculates Welch's t statistic.
-
-    Takes a 2D data matrix, a column to classify data by, and the labels
-    corresponding to the data of interest. Assumes that the largest (-1)
-    column in the data matrix is the dependent variable.
-
-    Parameters
-    ----------
-    data : 2D array
-        Data matrix. Assumes last column contains dependent variable values.
-    col : int
-        Target column to be used to divide the dependent variable into two groups.
-    treatment_labels : 1D array-like
-        Labels in target column to be used.
-
-    Returns
-    -------
-    float64
-        Welch's t statistic.
-
-    Examples
-    --------
-
-    >>> x = np.array([[0, 0, 0, 0, 0, 1, 1, 1, 1, 1], 
-    ...               [1, 2, 3, 4, 5, 10, 11, 12, 13, 14]])
-    >>> x.T
-    array([[ 0,  1],
-           [ 0,  2],
-           [ 0,  3],
-           [ 0,  4],
-           [ 0,  5],
-           [ 1, 10],
-           [ 1, 11],
-           [ 1, 12],
-           [ 1, 13],
-           [ 1, 14]])
-    >>> welch_statistic(x.T, 0, (0, 1))
-    -9.0
-
-    This uses the same calculation as scipy's ttest function.
-    >>> import scipy.stats as stats
-    >>> a = [1, 2, 3, 4, 5]
-    >>> b = [10, 11, 12, 13, 14]
-    >>> stats.ttest_ind(a, b, equal_var=False)[0]
-    -9.0
-    
-    
-    Notes
-    ----------
-    Details on the validity of this test statistic can be found in
-    "Studentized permutation tests for non-i.i.d. hypotheses and the
-    generalized Behrens-Fisher problem" by Arnold Janssen.
-    https://doi.org/10.1016/S0167-7152(97)00043-6.
-
-    """
-    # get our two samples from the data matrix
-    sample_a, sample_b = nb_data_grabber(data, col, treatment_labels)
-    len_a, len_b = len(sample_a), len(sample_b)
-
-    # mean difference
-    meandiff = np.mean(sample_a) - np.mean(sample_b)
-
-    # weighted sample variances
-    var_weight_one = bivar_central_moment(sample_a, sample_a, ddof=1) / len_a
-    var_weight_two = bivar_central_moment(sample_b, sample_b, ddof=1) / len_b
-
-    # compute t statistic
-    t = meandiff / np.sqrt(var_weight_one + var_weight_two)
-
-    return t
-
-
 @nb.jit(nopython=True, cache=True)
 def _repeat(target, counts):
     return np.repeat(np.array(target), counts)
@@ -747,3 +603,44 @@ def class_make_ufunc_list(target, reference, counts):
             i += counts[(reference == target[i]).flatten()].item()
 
     return ufunc_list
+
+
+@nb.jit(nopython=True, cache=True)
+def _compute_interval(null, null_data, target_data, treatment_col, interval):
+    lower_bound = (100 - interval) / 200
+    upper_bound = 1 - lower_bound
+
+    x = null_data[:, treatment_col]
+    y = null_data[:, -1]
+
+    denom = _cov_std_error(x, y)
+    lower = np.quantile(null, lower_bound) * denom / bivar_central_moment(x, x)
+    upper = np.quantile(null, upper_bound) * denom / bivar_central_moment(x, x)
+
+    x = target_data[:, treatment_col]
+    y = target_data[:, -1]
+    denom = _cov_std_error(x, y)
+
+    lower = lower + (bivar_central_moment(x, y) / bivar_central_moment(x, x))
+    upper = upper + (bivar_central_moment(x, y) / bivar_central_moment(x, x))
+    return (lower, upper)
+
+
+@nb.jit(nopython=True, cache=True)
+def _cov_std_error(x, y):
+    n = len(x)
+    # first term is the second symmetric bivariate central moment. an approximate
+    # bias correction of n - root(2) is applied
+    denom_1 = bivar_central_moment(x, y, pow=2, ddof=2 ** 0.5)
+
+    # second term is the product of the standard deviations of x and y over n - 1.
+    # this term rapidly goes to 0 as n goes to infinity
+    denom_2 = (
+        bivar_central_moment(x, x, pow=1, ddof=1)
+        * bivar_central_moment(y, y, pow=1, ddof=1)
+    ) / (n - 1)
+
+    # third term is the square of the covariance of x and y. an approximate bias
+    # correction of n - root(3) is applied
+    denom_3 = ((n - 2) * (bivar_central_moment(x, y, pow=1, ddof=1.75) ** 2)) / (n - 1)
+    return ((1 / (n - 1.5)) * (denom_1 + denom_2 - denom_3)) ** 0.5
diff --git a/hierarch/stats.py b/hierarch/stats.py
index bcbf9f4..7417e82 100644
--- a/hierarch/stats.py
+++ b/hierarch/stats.py
@@ -1,16 +1,165 @@
 import numpy as np
+from numba import jit
 import math
 from itertools import combinations
 import pandas as pd
 from hierarch.internal_functions import (
-    studentized_covariance,
-    welch_statistic,
+    nb_data_grabber,
     preprocess_data,
     GroupbyMean,
+    _compute_interval,
+    bivar_central_moment,
 )
 from hierarch.resampling import Bootstrapper, Permuter
 from warnings import warn, simplefilter
 
+
+@jit(nopython=True, cache=True)
+def studentized_covariance(x, y):
+    """Studentized sample covariance between two variables.
+
+    Sample covariance between two variables divided by standard error of
+    sample covariance. Uses a bias-corrected approximation of standard error. 
+    This computes an approximately pivotal test statistic.
+
+    Parameters
+    ----------
+    x, y: numeric array-likes
+
+    Returns
+    -------
+    float64
+        Studentized covariance.
+
+    Examples
+    --------
+    >>> x = np.array([[0, 0, 0, 0, 0, 1, 1, 1, 1, 1], 
+    ...               [1, 2, 3, 4, 5, 2, 3, 4, 5, 6]])
+    >>> x.T
+    array([[0, 1],
+           [0, 2],
+           [0, 3],
+           [0, 4],
+           [0, 5],
+           [1, 2],
+           [1, 3],
+           [1, 4],
+           [1, 5],
+           [1, 6]])
+    >>> studentized_covariance(x.T[:,0], x.T[:,1])
+    1.0039690353154482
+
+    This is approximately equal to the t-statistic.
+
+    >>> import scipy.stats as stats    
+    >>> a = np.array([2, 3, 4, 5, 6])
+    >>> b = np.array([1, 2, 3, 4, 5])
+    >>> stats.ttest_ind(a, b, equal_var=False)[0]
+    1.0
+
+    """
+    n = len(x)
+
+    # numerator is the sample covariance, or the first symmetric bivariate central moment
+    numerator = bivar_central_moment(x, y, pow=1, ddof=1)
+
+    # the denominator is the sample standard deviation of the sample covariance, aka
+    # the standard error of sample covariance. the denominator has three terms.
+
+    # first term is the second symmetric bivariate central moment. an approximate
+    # bias correction of n - root(2) is applied
+    denom_1 = bivar_central_moment(x, y, pow=2, ddof=2 ** 0.5)
+
+    # second term is the product of the standard deviations of x and y over n - 1.
+    # this term rapidly goes to 0 as n goes to infinity
+    denom_2 = (
+        bivar_central_moment(x, x, pow=1, ddof=1)
+        * bivar_central_moment(y, y, pow=1, ddof=1)
+    ) / (n - 1)
+
+    # third term is the square of the covariance of x and y. an approximate bias
+    # correction of n - root(3) is applied
+    denom_3 = ((n - 2) * (bivar_central_moment(x, y, pow=1, ddof=1.75) ** 2)) / (n - 1)
+
+    t = (numerator) / ((1 / (n - 1.5)) * (denom_1 + denom_2 - denom_3)) ** 0.5
+    return t
+
+
+@jit(nopython=True, cache=True)
+def welch_statistic(data, col: int, treatment_labels):
+    """Calculates Welch's t statistic.
+
+    Takes a 2D data matrix, a column to classify data by, and the labels
+    corresponding to the data of interest. Assumes that the largest (-1)
+    column in the data matrix is the dependent variable.
+
+    Parameters
+    ----------
+    data : 2D array
+        Data matrix. Assumes last column contains dependent variable values.
+    col : int
+        Target column to be used to divide the dependent variable into two groups.
+    treatment_labels : 1D array-like
+        Labels in target column to be used.
+
+    Returns
+    -------
+    float64
+        Welch's t statistic.
+
+    Examples
+    --------
+
+    >>> x = np.array([[0, 0, 0, 0, 0, 1, 1, 1, 1, 1], 
+    ...               [1, 2, 3, 4, 5, 10, 11, 12, 13, 14]])
+    >>> x.T
+    array([[ 0,  1],
+           [ 0,  2],
+           [ 0,  3],
+           [ 0,  4],
+           [ 0,  5],
+           [ 1, 10],
+           [ 1, 11],
+           [ 1, 12],
+           [ 1, 13],
+           [ 1, 14]])
+    >>> welch_statistic(x.T, 0, (0, 1))
+    -9.0
+
+    This uses the same calculation as scipy's ttest function.
+
+    >>> import scipy.stats as stats
+    >>> a = [1, 2, 3, 4, 5]
+    >>> b = [10, 11, 12, 13, 14]
+    >>> stats.ttest_ind(a, b, equal_var=False)[0]
+    -9.0
+    
+    
+    Notes
+    ----------
+    Details on the validity of this test statistic can be found in
+    "Studentized permutation tests for non-i.i.d. hypotheses and the
+    generalized Behrens-Fisher problem" by Arnold Janssen.
+    https://doi.org/10.1016/S0167-7152(97)00043-6.
+
+    """
+    # get our two samples from the data matrix
+    sample_a, sample_b = nb_data_grabber(data, col, treatment_labels)
+    len_a, len_b = len(sample_a), len(sample_b)
+
+    # mean difference
+    meandiff = np.mean(sample_a) - np.mean(sample_b)
+
+    # weighted sample variances
+    var_weight_one = bivar_central_moment(sample_a, sample_a, ddof=1) / len_a
+    var_weight_two = bivar_central_moment(sample_b, sample_b, ddof=1) / len_b
+
+    # compute t statistic
+    t = meandiff / np.sqrt(var_weight_one + var_weight_two)
+
+    return t
+
+
 TEST_STATISTICS = {
     "means": welch_statistic,
     "corr": studentized_covariance,
@@ -58,7 +207,7 @@ def linear_regression_test(
         Bootstrap algorithm - see Bootstrapper class, by default "weights"
     return_null : bool, optional
         Return the null distribution as well as the p value, by default False
-    seed : int or numpy random Generator, optional
+    random_state : int or numpy random Generator, optional
         Seedable for reproducibility., by default None
 
     Returns
@@ -80,15 +229,14 @@ def linear_regression_test(
 
     Examples
     --------
+    Specify the parameters of a dataset with a difference of means of 2.
+
     >>> from hierarch.power import DataSimulator
     >>> import scipy.stats as stats
     >>> paramlist = [[0, 2], [stats.norm], [stats.norm]]
     >>> hierarchy = [2, 4, 3]
     >>> datagen = DataSimulator(paramlist, random_state=2)
     >>> datagen.fit(hierarchy)
-
-    Specify the parameters of a dataset with a difference of means of 2.
-
     >>> data = datagen.generate()
     >>> print(data.shape)
     (24, 4)
@@ -305,15 +453,14 @@ def two_sample_test(
 
     Examples
     --------
+    Specify the parameters of a dataset with a difference of means of 2.
+
     >>> from hierarch.power import DataSimulator
     >>> import scipy.stats as stats
     >>> paramlist = [[0, 2], [stats.norm], [stats.norm]]
     >>> hierarchy = [2, 4, 3]
     >>> datagen = DataSimulator(paramlist, random_state=123)
     >>> datagen.fit(hierarchy)
-
-    Specify the parameters of a dataset with a difference of means of 2.
-
     >>> data = datagen.generate()
     >>> print(data.shape)
     (24, 4)
@@ -815,3 +962,172 @@ def _false_discovery_adjust(pvals, return_index=False):
         return q_vals, sort_key
     else:
         return q_vals
+
+
+def confidence_interval(
+    data,
+    treatment_col,
+    interval=95.0,
+    compare="corr",
+    skip=None,
+    bootstraps=100,
+    permutations=1000,
+    kind="bayesian",
+    random_state=None,
+):
+    """Compute a confidence inverval via test inversion.
+
+    Confidence interval can be calculated by inverting the acceptance region of a hypothesis test.
+    Using a test statistic that is approximately normally distributed under the null makes this
+    much easier.
+
+    Parameters
+    ----------
+    data : 2D numpy array or pandas DataFrame
+        Array-like containing both the independent and dependent variables to
+        be analyzed. It's assumed that the final (rightmost) column
+        contains the dependent variable values.
+    treatment_col : int
+        The index number of the column containing "two samples" to be compared.
+        Indexing starts at 0.
+    interval : float, optional
+        Percentage value indicating the confidence interval's coverage, by default 95
+    compare : str, optional
+        The test statistic to use to perform the hypothesis test, by default "corr"
+        which automatically calls the studentized covariance test statistic.
+    skip : list of ints, optional
+        Columns to skip in the bootstrap. Skip columns that were sampled
+        without replacement from the prior column, by default None
+    bootstraps : int, optional
+        Number of bootstraps to perform, by default 100. Can be set to 1 for a
+        permutation test without any bootstrapping.
+    permutations : int or "all", optional
+        Number of permutations to perform PER bootstrap sample. "all"
+        for exact test (only works if there are only two treatments), by default 1000
+    kind : str, optional
+        Bootstrap algorithm - see Bootstrapper class, by default "bayesian"
+    random_state : int or numpy random Generator, optional
+        Seedable for reproducibility., by default None
+
+    Returns
+    -------
+    tuple of floats
+        Confidence interval with the specified coverage.
+
+    Notes
+    -----
+    While the Efron bootstrap is the default in most of hierarch's statistical functions,
+    using the Bayesian bootstrap here helps get tighter confidence intervals with the
+    correct coverage without having to massively increase the number of resamples.
+
+    The inversion procedure performed by this function is specified in 
+    "Randomization, Bootstrap and Monte Carlo Methods in Biology" by Bryan FJ Manly.
+    https://doi.org/10.1201/9781315273075.
+
+    Examples
+    --------
+    Specify the parameters of a dataset with a difference of means of 2.
+
+    >>> from hierarch.power import DataSimulator
+    >>> import scipy.stats as stats
+    >>> paramlist = [[0, 2], [stats.norm], [stats.norm]]
+    >>> hierarchy = [2, 4, 3]
+    >>> datagen = DataSimulator(paramlist, random_state=2)
+    >>> datagen.fit(hierarchy)
+    >>> data = datagen.generate()
+    >>> print(data.shape)
+    (24, 4)
+
+    >>> confidence_interval(data, treatment_col=0, interval=95, 
+    ...    bootstraps=1000, permutations='all', random_state=1)
+    (1.314807450602109, 6.124658302189696)
+
+    The true difference is 2, which falls within the interval. We can examine
+    the p-value for the corresponding dataset:
+
+    >>> from hierarch.stats import linear_regression_test
+    >>> linear_regression_test(data, treatment_col=0,
+    ...                 bootstraps=1000, permutations='all',
+    ...                 random_state=1)
+    0.013714285714285714
+
+    This suggests that while the 95% confidence interval does not contain 0, the 99.9%
+    confidence interval should.
+
+    >>> confidence_interval(data, treatment_col=0, interval=99.9, 
+    ...    bootstraps=1000, permutations='all', random_state=1)
+    (-0.7799704380838381, 8.219436190875793)
+
+    hierarch.stats.two_sample_test can be used to generate the null distribution by
+    specifying compare = "means". This should return a very similar interval.
+
+    >>> confidence_interval(data, treatment_col=0, interval=95, 
+    ...    compare='means', bootstraps=1000, 
+    ...    permutations='all', random_state=1)
+    (1.3290984115978817, 6.110367341193923)
+
+    Setting compare = "corr" will generate a confidence interval for the slope
+    in a regression equation.     
+
+    >>> paramlist = [[0, 1, 2, 3], [stats.norm], [stats.norm]]
+    >>> hierarchy = [4, 4, 3]
+    >>> datagen = DataSimulator(paramlist, random_state=2)
+    >>> datagen.fit(hierarchy)
+    >>> data = datagen.generate()
+
+    >>> confidence_interval(data, treatment_col=0, interval=95,
+    ...                 compare='corr', bootstraps=100,
+    ...                 permutations=1000, random_state=1)
+    (0.8317584051133191, 1.5597743222883649)
+
+    The dataset was specified to have a true slope of 1, which is within the interval.
+
+    """
+
+    # first compute the null distribution against the null that the effect size is equal to the MLE
+    null_imposed_data = data.copy()
+    levels_to_agg = data.shape[1] - treatment_col - 3
+
+    grouper = GroupbyMean()
+    test = grouper.fit_transform(data, iterations=levels_to_agg)
+    start_slope = bivar_central_moment(
+        test[:, treatment_col], test[:, -1]
+    ) / bivar_central_moment(test[:, treatment_col], test[:, treatment_col])
+
+    # subtract the observed covariance out
+    correction = start_slope * null_imposed_data[:, treatment_col]
+    null_imposed_data[:, -1] -= correction
+
+    # compute the null distribution for the null hypothesis that the true effect size is equal to the MLE
+    if compare == "corr":
+        _, null = linear_regression_test(
+            null_imposed_data,
+            treatment_col,
+            skip=skip,
+            bootstraps=bootstraps,
+            permutations=permutations,
+            kind=kind,
+            return_null=True,
+            random_state=random_state,
+        )
+    elif compare == "means":
+        _, null = two_sample_test(
+            null_imposed_data,
+            treatment_col,
+            skip=skip,
+            bootstraps=bootstraps,
+            permutations=permutations,
+            kind=kind,
+            return_null=True,
+            random_state=random_state,
+        )
+    else:
+        raise ValueError("No such comparison.")
+
+    null_agg = grouper.fit_transform(null_imposed_data, iterations=levels_to_agg)
+    target_agg = grouper.fit_transform(data, iterations=levels_to_agg)
+
+    return _compute_interval(
+        np.array(null), null_agg, target_agg, treatment_col, interval
+    )
+

From e4ba5b41be4716a7aee2bf194d46a9ba345053c3 Mon Sep 17 00:00:00 2001
From: Rishi Kulkarni <insta111@gmail.com>
Date: Wed, 2 Jun 2021 23:17:15 -0700
Subject: [PATCH 07/12] updated readme and requirements in anticipation of 0.4
 release

---
 README.md                   |  5 ++--
 docs/user/setup.rst         |  1 -
 environment.yml             | 51 +++++++++++++++----------------------
 hierarch/numba_overloads.py |  1 +
 hierarch/resampling.py      | 15 +++++++----
 hierarch/stats.py           |  2 +-
 requirements.txt            | 13 +---------
 setup.py                    |  9 +++++--
 8 files changed, 42 insertions(+), 55 deletions(-)

diff --git a/README.md b/README.md
index 206c71e..45f44de 100644
--- a/README.md
+++ b/README.md
@@ -2,11 +2,11 @@
 
 ## A Hierarchical Resampling Package for Python
 
-Version 0.3
+Version 0.4
 
 hierarch is a package for hierarchical resampling (bootstrapping, permutation) of datasets in Python. Because for loops are ultimately intrinsic to cluster-aware resampling, hierarch uses Numba to accelerate many of its key functions.
 
-hierarch has several functions to assist in performing resampling-based (and therefore distribution-free) hypothesis tests on hierarchical data. Additionally, hierarch can be used to construct power analyses for hierarchical experimental designs. 
+hierarch has several functions to assist in performing resampling-based (and therefore distribution-free) hypothesis tests, confidence interval calculations, and power analyses on hierarchical data.
 
 ## Table of Contents
 
@@ -36,7 +36,6 @@ Design-based randomization tests represents the platinum standard for significan
 * pandas (for importing data)
 * numba
 * scipy (for power analysis)
-* sympy (for jackknifing)
 
 ### Installation
 
diff --git a/docs/user/setup.rst b/docs/user/setup.rst
index 1d37c28..cb336b4 100644
--- a/docs/user/setup.rst
+++ b/docs/user/setup.rst
@@ -8,7 +8,6 @@ Dependencies
 * pandas (for importing data)
 * numba
 * scipy (for power analysis)
-* sympy (for jackknifing)
 
 Installation
 ------------
diff --git a/environment.yml b/environment.yml
index 964a8cd..21595a5 100644
--- a/environment.yml
+++ b/environment.yml
@@ -2,36 +2,25 @@ name: hierarch
 channels:
   - rkulk111
   - defaults
+  - conda-forge
 dependencies:
-  - blas=1.0=mkl
-  - ca-certificates=2021.4.13=haa95532_1
-  - certifi=2020.12.5=py38haa95532_0
-  - icc_rt=2019.0.0=h0cc432a_1
-  - intel-openmp=2021.2.0=haa95532_616
-  - llvmlite=0.34.0=py38h1a82afc_4
-  - mkl=2021.2.0=haa95532_296
-  - mkl-service=2.3.0=py38h2bbff1b_1
-  - mkl_fft=1.3.0=py38h277e83a_2
-  - mkl_random=1.2.1=py38hf11a4ad_2
-  - mpmath=1.2.1=py38haa95532_0
-  - numba=0.51.2=py38hf9181ef_1
-  - numpy=1.20.1=py38h34a8a5c_0
-  - numpy-base=1.20.1=py38haf7ebc8_0
-  - openssl=1.1.1k=h2bbff1b_0
-  - pandas=1.2.4=py38hd77b12b_0
-  - pip=21.0.1=py38haa95532_0:
-    - -r file:requirements.txt
-  - python=3.8.8=hdbf39b2_5
-  - python-dateutil=2.8.1=pyhd3eb1b0_0
-  - pytz=2021.1=pyhd3eb1b0_0
-  - scipy=1.6.2=py38h66253e8_1
-  - setuptools=52.0.0=py38haa95532_0
-  - six=1.15.0=py38haa95532_0
-  - sqlite=3.35.4=h2bbff1b_0
-  - sympy=1.8=py38haa95532_0
-  - vc=14.2=h21ff451_1
-  - vs2015_runtime=14.27.29016=h5e58377_2
-  - wheel=0.36.2=pyhd3eb1b0_0
-  - wincertstore=0.2=py38_0
-  - zlib=1.2.11=h62dcd97_4
+  - ca-certificates=2021.5.25
+  - certifi=2021.5.30
+  - openssl=1.1.1k
+  - pip=21.1.1
+  - python=3.8.10
+  - setuptools=52.0.0
+  - sqlite=3.35.4
+  - vc=14.2
+  - vs2015_runtime=14.27.29016
+  - wheel=0.36.2
+  - wincertstore=0.2
+  - llvmlite==0.36.0
+  - numba==0.53.1
+  - numpy==1.21.0rc1
+  - pandas==1.2.4
+  - python-dateutil==2.8.1
+  - pytz==2021.1
+  - scipy==1.6.3
+  - six==1.16.0
 
diff --git a/hierarch/numba_overloads.py b/hierarch/numba_overloads.py
index dda4082..a691c27 100644
--- a/hierarch/numba_overloads.py
+++ b/hierarch/numba_overloads.py
@@ -121,6 +121,7 @@ def dirichlet_impl(alpha, size=None):
             return out
 
     elif isinstance(size, (types.UniTuple)) and isinstance(size.dtype, types.Integer):
+
         def dirichlet_impl(alpha, size=None):
             """
             dirichlet(..., size=tuple)
diff --git a/hierarch/resampling.py b/hierarch/resampling.py
index 13e25d6..2641d1a 100644
--- a/hierarch/resampling.py
+++ b/hierarch/resampling.py
@@ -1,6 +1,5 @@
 import numpy as np
 from itertools import cycle
-from functools import partial
 from numba import jit
 from hierarch.internal_functions import (
     nb_reweighter,
@@ -14,6 +13,7 @@
     nb_strat_shuffle,
 )
 
+
 class Bootstrapper:
     """Bootstrapper(random_state=None, kind="weights")
 
@@ -212,6 +212,7 @@ class Bootstrapper:
            [2.        , 3.        , 3.        , 1.15970489]])
 
     """
+
     BOOTSTRAP_ALGORITHMS = tuple(["weights", "indexes", "bayesian"])
 
     def __init__(self, random_state=None, kind="weights"):
@@ -495,7 +496,8 @@ def transform(self, data):
     @staticmethod
     def _exact_return(col_to_permute, generator):
         """Transformer when exact is True and permutations are unrestricted.
-        """        
+        """
+
         def _exact_return_impl(data):
             data[:, col_to_permute] = next(generator)
             return data
@@ -506,7 +508,8 @@ def _exact_return_impl(data):
     def _exact_repeat_return(col_to_permute, generator, counts):
         """Transformer when exact is True and permutations are restricted by
         repetition of treated entities.
-        """        
+        """
+
         def _rep_iter_return_impl(data):
             data[:, col_to_permute] = _repeat(tuple(next(generator)), counts)
             return data
@@ -516,7 +519,8 @@ def _rep_iter_return_impl(data):
     @staticmethod
     def _random_return(col_to_permute, keys):
         """Transformer when exact is False and repetition is not required.
-        """           
+        """
+
         @jit(nopython=True, cache=True)
         def _random_return_impl(data):
             if col_to_permute == 0:
@@ -530,7 +534,8 @@ def _random_return_impl(data):
     @staticmethod
     def _random_repeat_return(col_to_permute, col_values, keys, counts):
         """Transformer when exact is False and repetition is required.
-        """            
+        """
+
         @jit(nopython=True, cache=True)
         def _random__repeat_return_impl(data):
             shuffled_col_values = col_values.copy()
diff --git a/hierarch/stats.py b/hierarch/stats.py
index 7417e82..e85a490 100644
--- a/hierarch/stats.py
+++ b/hierarch/stats.py
@@ -1020,7 +1020,7 @@ def confidence_interval(
     using the Bayesian bootstrap here helps get tighter confidence intervals with the
     correct coverage without having to massively increase the number of resamples.
 
-    The inversion procedure performed by this function is specified in 
+    The inversion procedure performed by this function is described in detail in 
     "Randomization, Bootstrap and Monte Carlo Methods in Biology" by Bryan FJ Manly.
     https://doi.org/10.1201/9781315273075.
 
diff --git a/requirements.txt b/requirements.txt
index 7a4f919..8be2921 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,15 +1,4 @@
-﻿certifi==2020.12.5
-llvmlite==0.36.0
-mkl-fft==1.3.0
-mkl-random==1.2.1
-mkl-service==2.3.0
-mpmath==1.2.1
-numba==0.53.1
+﻿numba==0.53.1
 numpy==1.20.3
 pandas==1.2.4
-python-dateutil==2.8.1
-pytz==2021.1
 scipy==1.6.3
-six==1.16.0
-sympy==1.8
-wincertstore==0.2
diff --git a/setup.py b/setup.py
index c5ee510..1b0bc43 100644
--- a/setup.py
+++ b/setup.py
@@ -5,7 +5,7 @@
 
 setuptools.setup(
     name="hierarch",
-    version="0.3.0",
+    version="0.4.0",
     author="Rishi Kulkarni",
     author_email="rkulk@stanford.edu",
     description="Hierarchical hypothesis testing library",
@@ -14,7 +14,12 @@
     license_files=("LICENSE.txt",),
     url="https://github.com/rishi-kulkarni/hierarch",
     packages=setuptools.find_packages(),
-    install_requires=["numpy", "sympy", "scipy", "numba", "pandas"],
+    install_requires=[
+        "numpy>=1.20.3",
+        "scipy>=1.6.3",
+        "numba>=0.53.1",
+        "pandas>=1.2.4",
+    ],
     classifiers=[
         "Programming Language :: Python :: 3",
         "License :: OSI Approved :: MIT License",

From 19b076302467a4d4338d911b699d9416d934f65d Mon Sep 17 00:00:00 2001
From: Rishi Kulkarni <insta111@gmail.com>
Date: Fri, 4 Jun 2021 00:09:54 -0700
Subject: [PATCH 08/12] added preprocessing for confidence intervals

---
 hierarch/stats.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/hierarch/stats.py b/hierarch/stats.py
index e85a490..3f40b7f 100644
--- a/hierarch/stats.py
+++ b/hierarch/stats.py
@@ -965,7 +965,7 @@ def _false_discovery_adjust(pvals, return_index=False):
 
 
 def confidence_interval(
-    data,
+    data_array,
     treatment_col,
     interval=95.0,
     compare="corr",
@@ -983,7 +983,7 @@ def confidence_interval(
 
     Parameters
     ----------
-    data : 2D numpy array or pandas DataFrame
+    data_array : 2D numpy array or pandas DataFrame
         Array-like containing both the independent and dependent variables to
         be analyzed. It's assumed that the final (rightmost) column
         contains the dependent variable values.
@@ -1084,6 +1084,12 @@ def confidence_interval(
 
     """
 
+    # turns the input array or dataframe into a float64 array
+    if isinstance(data_array, (np.ndarray, pd.DataFrame)):
+        data = preprocess_data(data_array)
+    else:
+        raise TypeError("Input data must be ndarray or DataFrame.")
+
     # first compute the null distribution against the null that the effect size is equal to the MLE
     null_imposed_data = data.copy()
     levels_to_agg = data.shape[1] - treatment_col - 3

From a7ea27b6757948c21dd4d696aa5d699db57b54fc Mon Sep 17 00:00:00 2001
From: rishi-kulkarni <insta111@gmail.com>
Date: Fri, 4 Jun 2021 14:17:42 -0700
Subject: [PATCH 09/12] non-exact permutation tests no longer can give 0 as a
 p-value

---
 hierarch/stats.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/hierarch/stats.py b/hierarch/stats.py
index e85a490..a017d85 100644
--- a/hierarch/stats.py
+++ b/hierarch/stats.py
@@ -592,6 +592,8 @@ def two_sample_test(
     # initialize empty null distribution list
     null_distribution = []
 
+    total = bootstraps * permutations
+
     # first set of permutations is on the original data
     # this helps to prevent getting a p-value of 0
     for k in range(permutations):
@@ -626,6 +628,9 @@ def two_sample_test(
         0
     ].size / len(null_distribution)
 
+    if pval == 0:
+        pval += 1 / (total)
+
     if return_null is True:
         return pval, null_distribution
 

From 7f3a36444130caf363da4bc8f8f9ab302228dca0 Mon Sep 17 00:00:00 2001
From: rishi-kulkarni <insta111@gmail.com>
Date: Fri, 4 Jun 2021 17:41:54 -0700
Subject: [PATCH 10/12] error checking for tests

---
 hierarch/stats.py | 32 +++++++++++++++++++++++++++-----
 1 file changed, 27 insertions(+), 5 deletions(-)

diff --git a/hierarch/stats.py b/hierarch/stats.py
index 8141640..dc83516 100644
--- a/hierarch/stats.py
+++ b/hierarch/stats.py
@@ -292,6 +292,15 @@ def linear_regression_test(
     else:
         skip = []
 
+    # enforce bounds on bootstraps and permutations
+    if not isinstance(bootstraps, int) or bootstraps < 1:
+        raise TypeError("bootstraps must be an integer greater than 0")
+    if isinstance(permutations, str):
+        if permutations != "all":
+            raise TypeError("permutations must be 'all' or an integer greater than 0")
+    elif not isinstance(permutations, int) or permutations < 1:
+        raise TypeError("permutations must be 'all' or an integer greater than 0")
+
     # initialize and fit the bootstrapper to the data
     bootstrapper = Bootstrapper(random_state=rng, kind=kind)
     bootstrapper.fit(data, skip=skip)
@@ -349,6 +358,7 @@ def linear_regression_test(
 
     # initialize empty null distribution list
     null_distribution = []
+    total = bootstraps * permutations
 
     # first set of permutations is on the original data
     # this helps to prevent getting a p-value of 0
@@ -384,6 +394,9 @@ def linear_regression_test(
         0
     ].size / len(null_distribution)
 
+    if pval == 0:
+        pval += 1 / (total)
+
     if return_null is True:
         return pval, null_distribution
 
@@ -525,6 +538,16 @@ def two_sample_test(
     else:
         skip = []
 
+    # enforce bounds on bootstraps and permutations
+    if not isinstance(bootstraps, int) or bootstraps < 1:
+        raise TypeError("bootstraps must be an integer greater than 0")
+    if isinstance(permutations, str):
+        if permutations != "all":
+            raise TypeError("permutations must be 'all' or an integer greater than 0")
+    elif not isinstance(permutations, int) or permutations < 1:
+        raise TypeError("permutations must be 'all' or an integer greater than 0")
+
+
     # initialize and fit the bootstrapper to the data
     bootstrapper = Bootstrapper(random_state=rng, kind=kind)
     bootstrapper.fit(data, skip=skip)
@@ -591,7 +614,6 @@ def two_sample_test(
 
     # initialize empty null distribution list
     null_distribution = []
-
     total = bootstraps * permutations
 
     # first set of permutations is on the original data
@@ -1045,7 +1067,7 @@ def confidence_interval(
 
     >>> confidence_interval(data, treatment_col=0, interval=95, 
     ...    bootstraps=1000, permutations='all', random_state=1)
-    (1.314807450602109, 6.124658302189696)
+    (1.3158282800277328, 6.1236374727640746)
 
     The true difference is 2, which falls within the interval. We can examine
     the p-value for the corresponding dataset:
@@ -1061,7 +1083,7 @@ def confidence_interval(
 
     >>> confidence_interval(data, treatment_col=0, interval=99.9, 
     ...    bootstraps=1000, permutations='all', random_state=1)
-    (-0.7799704380838381, 8.219436190875793)
+    (-0.9084660904753425, 8.347931843267204)
 
     hierarch.stats.two_sample_test can be used to generate the null distribution by
     specifying compare = "means". This should return a very similar interval.
@@ -1069,7 +1091,7 @@ def confidence_interval(
     >>> confidence_interval(data, treatment_col=0, interval=95, 
     ...    compare='means', bootstraps=1000, 
     ...    permutations='all', random_state=1)
-    (1.3290984115978817, 6.110367341193923)
+    (1.3301109121128554, 6.109354840678951)
 
     Setting compare = "corr" will generate a confidence interval for the slope
     in a regression equation.     
@@ -1083,7 +1105,7 @@ def confidence_interval(
     >>> confidence_interval(data, treatment_col=0, interval=95,
     ...                 compare='corr', bootstraps=100,
     ...                 permutations=1000, random_state=1)
-    (0.8317584051133191, 1.5597743222883649)
+    (0.8346525205544293, 1.5558502398469196)
 
     The dataset was specified to have a true slope of 1, which is within the interval.
 

From 179cc8de11568a8e50f6ad673d3f355a27086e95 Mon Sep 17 00:00:00 2001
From: Rishi Kulkarni <insta111@gmail.com>
Date: Sat, 5 Jun 2021 02:17:15 -0700
Subject: [PATCH 11/12] improved behavior of confidence_interval by
 implementing an iterative search for the bounds of the confidence interval

---
 hierarch/stats.py | 92 +++++++++++++++++++++++++++++++++--------------
 1 file changed, 65 insertions(+), 27 deletions(-)

diff --git a/hierarch/stats.py b/hierarch/stats.py
index dc83516..d2068de 100644
--- a/hierarch/stats.py
+++ b/hierarch/stats.py
@@ -547,7 +547,6 @@ def two_sample_test(
     elif not isinstance(permutations, int) or permutations < 1:
         raise TypeError("permutations must be 'all' or an integer greater than 0")
 
-
     # initialize and fit the bootstrapper to the data
     bootstrapper = Bootstrapper(random_state=rng, kind=kind)
     bootstrapper.fit(data, skip=skip)
@@ -995,13 +994,16 @@ def confidence_interval(
     data_array,
     treatment_col,
     interval=95.0,
+    iterations=7,
+    tolerance=1,
     compare="corr",
     skip=None,
-    bootstraps=100,
-    permutations=1000,
+    bootstraps=50,
+    permutations=100,
     kind="bayesian",
     random_state=None,
 ):
+
     """Compute a confidence inverval via test inversion.
 
     Confidence interval can be calculated by inverting the acceptance region of a hypothesis test.
@@ -1019,6 +1021,12 @@ def confidence_interval(
         Indexing starts at 0.
     interval : float, optional
         Percentage value indicating the confidence interval's coverage, by default 95
+    iterations : int, optional
+        Maximum number of times the interval will be refined, by default 7
+    tolerance : float, optional
+        If the delta between the current bounds and the target interval is less than
+        this value, refinement will stop. Setting this number too close to the Monte Carlo
+        error of the underlying hypothesis test will have a negative effect on coverage. 
     compare : str, optional
         The test statistic to use to perform the hypothesis test, by default "corr"
         which automatically calls the studentized covariance test statistic.
@@ -1039,7 +1047,7 @@ def confidence_interval(
     Returns
     -------
     tuple of floats
-        Confidence interval with the specified coverage.
+        Confidence interval spanning the specified interval.
 
     Notes
     -----
@@ -1067,7 +1075,7 @@ def confidence_interval(
 
     >>> confidence_interval(data, treatment_col=0, interval=95, 
     ...    bootstraps=1000, permutations='all', random_state=1)
-    (1.3158282800277328, 6.1236374727640746)
+    (1.3142402418925743, 6.125225510899227)
 
     The true difference is 2, which falls within the interval. We can examine
     the p-value for the corresponding dataset:
@@ -1083,7 +1091,7 @@ def confidence_interval(
 
     >>> confidence_interval(data, treatment_col=0, interval=99.9, 
     ...    bootstraps=1000, permutations='all', random_state=1)
-    (-0.9084660904753425, 8.347931843267204)
+    (-2.452406804001683, 9.89187255679354)
 
     hierarch.stats.two_sample_test can be used to generate the null distribution by
     specifying compare = "means". This should return a very similar interval.
@@ -1091,7 +1099,7 @@ def confidence_interval(
     >>> confidence_interval(data, treatment_col=0, interval=95, 
     ...    compare='means', bootstraps=1000, 
     ...    permutations='all', random_state=1)
-    (1.3301109121128554, 6.109354840678951)
+    (1.330299483004123, 6.109166269787682)
 
     Setting compare = "corr" will generate a confidence interval for the slope
     in a regression equation.     
@@ -1105,24 +1113,32 @@ def confidence_interval(
     >>> confidence_interval(data, treatment_col=0, interval=95,
     ...                 compare='corr', bootstraps=100,
     ...                 permutations=1000, random_state=1)
-    (0.8346525205544293, 1.5558502398469196)
+    (0.7521535040639042, 1.6431732504510854)
 
     The dataset was specified to have a true slope of 1, which is within the interval.
 
     """
 
+    tests = {"means": two_sample_test, "corr": linear_regression_test}
+
     # turns the input array or dataframe into a float64 array
     if isinstance(data_array, (np.ndarray, pd.DataFrame)):
         data = preprocess_data(data_array)
     else:
         raise TypeError("Input data must be ndarray or DataFrame.")
 
+    try:
+        hypothesis_test = tests[compare]
+    except KeyError:
+        raise KeyError("No such comparison.")
+
     # first compute the null distribution against the null that the effect size is equal to the MLE
     null_imposed_data = data.copy()
     levels_to_agg = data.shape[1] - treatment_col - 3
 
     grouper = GroupbyMean()
-    test = grouper.fit_transform(data, iterations=levels_to_agg)
+    grouper.fit(data)
+    test = grouper.transform(data, iterations=levels_to_agg)
     start_slope = bivar_central_moment(
         test[:, treatment_col], test[:, -1]
     ) / bivar_central_moment(test[:, treatment_col], test[:, treatment_col])
@@ -1132,33 +1148,55 @@ def confidence_interval(
     null_imposed_data[:, -1] -= correction
 
     # compute the null distribution for the null hypothesis that the true effect size is equal to the MLE
-    if compare == "corr":
-        _, null = linear_regression_test(
-            null_imposed_data,
-            treatment_col,
-            skip=skip,
-            bootstraps=bootstraps,
-            permutations=permutations,
-            kind=kind,
-            return_null=True,
-            random_state=random_state,
+    _, null = hypothesis_test(
+        null_imposed_data,
+        treatment_col,
+        skip=skip,
+        bootstraps=bootstraps,
+        permutations=permutations,
+        kind=kind,
+        return_null=True,
+        random_state=random_state,
+    )
+
+    target_agg = grouper.transform(data, iterations=levels_to_agg)
+
+    # the null distribution has a tendency to underestimate the "true" null's variance
+    # by recomputing it a few times, we get the correct coverage
+    for i in range(iterations - 1):
+
+        null_agg = grouper.transform(null_imposed_data, iterations=levels_to_agg)
+
+        lower, upper = _compute_interval(
+            np.array(null), null_agg, target_agg, treatment_col, interval
         )
-    elif compare == "means":
-        _, null = two_sample_test(
+
+        if np.abs(interval - (1 - _) * 100) < tolerance:
+            break
+
+        null_imposed_data = data.copy()
+        null_imposed_data[:, -1] -= lower * null_imposed_data[:, treatment_col]
+        _, null = hypothesis_test(
             null_imposed_data,
             treatment_col,
             skip=skip,
-            bootstraps=bootstraps,
-            permutations=permutations,
+            bootstraps=50,
+            permutations=100,
             kind=kind,
             return_null=True,
             random_state=random_state,
         )
-    else:
-        raise ValueError("No such comparison.")
 
-    null_agg = grouper.fit_transform(null_imposed_data, iterations=levels_to_agg)
-    target_agg = grouper.fit_transform(data, iterations=levels_to_agg)
+    _, null = hypothesis_test(
+        null_imposed_data,
+        treatment_col,
+        skip=skip,
+        bootstraps=bootstraps,
+        permutations=permutations,
+        kind=kind,
+        return_null=True,
+        random_state=random_state,
+    )
 
     return _compute_interval(
         np.array(null), null_agg, target_agg, treatment_col, interval

From 7380088604e3d4b89d1f2c303b4abd3bf5b7db90 Mon Sep 17 00:00:00 2001
From: Rishi Kulkarni <insta111@gmail.com>
Date: Sat, 5 Jun 2021 16:34:58 -0700
Subject: [PATCH 12/12] updated documentation for 1.0.0 release

---
 README.md                                     |   2 +-
 docs/_templates/custom-class-template.rst     |  33 +
 docs/_templates/custom-module-template.rst    |  66 ++
 docs/conf.py                                  |  10 +-
 .../hierarch.power.DataSimulator.rst          |  25 +
 .../reference/_autosummary/hierarch.power.rst |  32 +
 .../hierarch.resampling.Bootstrapper.rst      |  25 +
 .../hierarch.resampling.Permuter.rst          |  25 +
 .../_autosummary/hierarch.resampling.rst      |  33 +
 .../hierarch.stats.confidence_interval.rst    |   6 +
 .../hierarch.stats.linear_regression_test.rst |   6 +
 .../hierarch.stats.multi_sample_test.rst      |   6 +
 .../hierarch.stats.preprocess_data.rst        |   6 +
 .../reference/_autosummary/hierarch.stats.rst |  37 +
 .../hierarch.stats.studentized_covariance.rst |   6 +
 .../hierarch.stats.two_sample_test.rst        |   6 +
 .../hierarch.stats.welch_statistic.rst        |   6 +
 docs/reference/index.rst                      |  13 +-
 docs/reference/power.rst                      |   5 -
 docs/reference/resampling.rst                 |   5 -
 docs/reference/stats.rst                      |   5 -
 docs/user/confidence.rst                      | 291 ++++++++
 docs/user/hypothesis.rst                      | 428 ++++++++++++
 docs/user/importing.rst                       |   9 +
 docs/user/power.rst                           | 196 ++++++
 docs/user/usage.rst                           | 632 +-----------------
 hierarch/internal_functions.py                |  44 --
 hierarch/power.py                             |   7 -
 hierarch/resampling.py                        |  22 +-
 hierarch/stats.py                             |  45 +-
 setup.py                                      |   2 +-
 31 files changed, 1315 insertions(+), 719 deletions(-)
 create mode 100644 docs/_templates/custom-class-template.rst
 create mode 100644 docs/_templates/custom-module-template.rst
 create mode 100644 docs/reference/_autosummary/hierarch.power.DataSimulator.rst
 create mode 100644 docs/reference/_autosummary/hierarch.power.rst
 create mode 100644 docs/reference/_autosummary/hierarch.resampling.Bootstrapper.rst
 create mode 100644 docs/reference/_autosummary/hierarch.resampling.Permuter.rst
 create mode 100644 docs/reference/_autosummary/hierarch.resampling.rst
 create mode 100644 docs/reference/_autosummary/hierarch.stats.confidence_interval.rst
 create mode 100644 docs/reference/_autosummary/hierarch.stats.linear_regression_test.rst
 create mode 100644 docs/reference/_autosummary/hierarch.stats.multi_sample_test.rst
 create mode 100644 docs/reference/_autosummary/hierarch.stats.preprocess_data.rst
 create mode 100644 docs/reference/_autosummary/hierarch.stats.rst
 create mode 100644 docs/reference/_autosummary/hierarch.stats.studentized_covariance.rst
 create mode 100644 docs/reference/_autosummary/hierarch.stats.two_sample_test.rst
 create mode 100644 docs/reference/_autosummary/hierarch.stats.welch_statistic.rst
 delete mode 100644 docs/reference/power.rst
 delete mode 100644 docs/reference/resampling.rst
 delete mode 100644 docs/reference/stats.rst
 create mode 100644 docs/user/confidence.rst
 create mode 100644 docs/user/hypothesis.rst
 create mode 100644 docs/user/importing.rst
 create mode 100644 docs/user/power.rst

diff --git a/README.md b/README.md
index 45f44de..8765fe5 100644
--- a/README.md
+++ b/README.md
@@ -2,7 +2,7 @@
 
 ## A Hierarchical Resampling Package for Python
 
-Version 0.4
+Version 1.0
 
 hierarch is a package for hierarchical resampling (bootstrapping, permutation) of datasets in Python. Because for loops are ultimately intrinsic to cluster-aware resampling, hierarch uses Numba to accelerate many of its key functions.
 
diff --git a/docs/_templates/custom-class-template.rst b/docs/_templates/custom-class-template.rst
new file mode 100644
index 0000000..d248858
--- /dev/null
+++ b/docs/_templates/custom-class-template.rst
@@ -0,0 +1,33 @@
+{{ fullname | escape | underline}}
+
+.. currentmodule:: {{ module }}
+
+.. autoclass:: {{ objname }}
+   :members:
+   :show-inheritance:
+   :inherited-members:
+   :special-members: __call__, __add__, __mul__
+
+   {% block methods %}
+   {% if methods %}
+   .. rubric:: {{ _('Methods') }}
+
+   .. autosummary::
+   {% for item in methods %}
+      {%- if not item.startswith('_') %}
+      ~{{ name }}.{{ item }}
+      {%- endif -%}
+   {%- endfor %}
+   {% endif %}
+   {% endblock %}
+
+   {% block attributes %}
+   {% if attributes %}
+   .. rubric:: {{ _('Attributes') }}
+
+   .. autosummary::
+   {% for item in attributes %}
+      ~{{ name }}.{{ item }}
+   {%- endfor %}
+   {% endif %}
+   {% endblock %}
\ No newline at end of file
diff --git a/docs/_templates/custom-module-template.rst b/docs/_templates/custom-module-template.rst
new file mode 100644
index 0000000..dd90e32
--- /dev/null
+++ b/docs/_templates/custom-module-template.rst
@@ -0,0 +1,66 @@
+{{ fullname | escape | underline}}
+
+.. automodule:: {{ fullname }}
+
+   {% block attributes %}
+   {% if attributes %}
+   .. rubric:: Module attributes
+
+   .. autosummary::
+      :toctree:
+   {% for item in attributes %}
+      {{ item }}
+   {%- endfor %}
+   {% endif %}
+   {% endblock %}
+
+   {% block functions %}
+   {% if functions %}
+   .. rubric:: {{ _('Functions') }}
+
+   .. autosummary::
+      :toctree:
+      :nosignatures:
+   {% for item in functions %}
+      {{ item }}
+   {%- endfor %}
+   {% endif %}
+   {% endblock %}
+
+   {% block classes %}
+   {% if classes %}
+   .. rubric:: {{ _('Classes') }}
+
+   .. autosummary::
+      :toctree:
+      :template: custom-class-template.rst
+      :nosignatures:
+   {% for item in classes %}
+      {{ item }}
+   {%- endfor %}
+   {% endif %}
+   {% endblock %}
+
+   {% block exceptions %}
+   {% if exceptions %}
+   .. rubric:: {{ _('Exceptions') }}
+
+   .. autosummary::
+      :toctree:
+   {% for item in exceptions %}
+      {{ item }}
+   {%- endfor %}
+   {% endif %}
+   {% endblock %}
+
+{% block modules %}
+{% if modules %}
+.. autosummary::
+   :toctree:
+   :template: custom-module-template.rst
+   :recursive:
+{% for item in modules %}
+   {{ item }}
+{%- endfor %}
+{% endif %}
+{% endblock %}
\ No newline at end of file
diff --git a/docs/conf.py b/docs/conf.py
index a74bc6c..1f5c2b2 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -33,7 +33,15 @@
 # Add any Sphinx extension module names here, as strings. They can be
 # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 # ones.
-extensions = ["sphinx_rtd_theme", "numpydoc", "sphinx.ext.autodoc"]
+extensions = [
+    "sphinx_rtd_theme",
+    "numpydoc",
+    "sphinx.ext.autodoc",
+    "sphinx.ext.autosummary",
+]
+
+autosummary_generate = True  # Turn on sphinx.ext.autosummary
+numpydoc_show_class_members = False
 
 # Add any paths that contain templates here, relative to this directory.
 templates_path = ["_templates"]
diff --git a/docs/reference/_autosummary/hierarch.power.DataSimulator.rst b/docs/reference/_autosummary/hierarch.power.DataSimulator.rst
new file mode 100644
index 0000000..973d923
--- /dev/null
+++ b/docs/reference/_autosummary/hierarch.power.DataSimulator.rst
@@ -0,0 +1,25 @@
+hierarch.power.DataSimulator
+============================
+
+.. currentmodule:: hierarch.power
+
+.. autoclass:: DataSimulator
+   :members:
+   :show-inheritance:
+   :inherited-members:
+   :special-members: __call__, __add__, __mul__
+
+   
+   
+   .. rubric:: Methods
+
+   .. autosummary::
+   
+      ~DataSimulator.fit
+      ~DataSimulator.generate
+   
+   
+
+   
+   
+   
\ No newline at end of file
diff --git a/docs/reference/_autosummary/hierarch.power.rst b/docs/reference/_autosummary/hierarch.power.rst
new file mode 100644
index 0000000..90a4b72
--- /dev/null
+++ b/docs/reference/_autosummary/hierarch.power.rst
@@ -0,0 +1,32 @@
+﻿hierarch.power
+==============
+
+.. automodule:: hierarch.power
+
+   
+   
+   
+
+   
+   
+   
+
+   
+   
+   .. rubric:: Classes
+
+   .. autosummary::
+      :toctree:
+      :template: custom-class-template.rst
+      :nosignatures:
+   
+      DataSimulator
+   
+   
+
+   
+   
+   
+
+
+
diff --git a/docs/reference/_autosummary/hierarch.resampling.Bootstrapper.rst b/docs/reference/_autosummary/hierarch.resampling.Bootstrapper.rst
new file mode 100644
index 0000000..dbab531
--- /dev/null
+++ b/docs/reference/_autosummary/hierarch.resampling.Bootstrapper.rst
@@ -0,0 +1,25 @@
+﻿hierarch.resampling.Bootstrapper
+================================
+
+.. currentmodule:: hierarch.resampling
+
+.. autoclass:: Bootstrapper
+   :members:
+   :show-inheritance:
+   :inherited-members:
+   :special-members: __call__, __add__, __mul__
+
+   
+   
+   .. rubric:: Methods
+
+   .. autosummary::
+   
+      ~Bootstrapper.fit
+      ~Bootstrapper.transform
+   
+   
+
+   
+   
+   
\ No newline at end of file
diff --git a/docs/reference/_autosummary/hierarch.resampling.Permuter.rst b/docs/reference/_autosummary/hierarch.resampling.Permuter.rst
new file mode 100644
index 0000000..045a098
--- /dev/null
+++ b/docs/reference/_autosummary/hierarch.resampling.Permuter.rst
@@ -0,0 +1,25 @@
+hierarch.resampling.Permuter
+============================
+
+.. currentmodule:: hierarch.resampling
+
+.. autoclass:: Permuter
+   :members:
+   :show-inheritance:
+   :inherited-members:
+   :special-members: __call__, __add__, __mul__
+
+   
+   
+   .. rubric:: Methods
+
+   .. autosummary::
+   
+      ~Permuter.fit
+      ~Permuter.transform
+   
+   
+
+   
+   
+   
\ No newline at end of file
diff --git a/docs/reference/_autosummary/hierarch.resampling.rst b/docs/reference/_autosummary/hierarch.resampling.rst
new file mode 100644
index 0000000..17cb063
--- /dev/null
+++ b/docs/reference/_autosummary/hierarch.resampling.rst
@@ -0,0 +1,33 @@
+﻿hierarch.resampling
+===================
+
+.. automodule:: hierarch.resampling
+
+   
+   
+   
+
+   
+   
+   
+
+   
+   
+   .. rubric:: Classes
+
+   .. autosummary::
+      :toctree:
+      :template: custom-class-template.rst
+      :nosignatures:
+   
+      Bootstrapper
+      Permuter
+   
+   
+
+   
+   
+   
+
+
+
diff --git a/docs/reference/_autosummary/hierarch.stats.confidence_interval.rst b/docs/reference/_autosummary/hierarch.stats.confidence_interval.rst
new file mode 100644
index 0000000..5be2f74
--- /dev/null
+++ b/docs/reference/_autosummary/hierarch.stats.confidence_interval.rst
@@ -0,0 +1,6 @@
+hierarch.stats.confidence\_interval
+===================================
+
+.. currentmodule:: hierarch.stats
+
+.. autofunction:: confidence_interval
\ No newline at end of file
diff --git a/docs/reference/_autosummary/hierarch.stats.linear_regression_test.rst b/docs/reference/_autosummary/hierarch.stats.linear_regression_test.rst
new file mode 100644
index 0000000..f31efb5
--- /dev/null
+++ b/docs/reference/_autosummary/hierarch.stats.linear_regression_test.rst
@@ -0,0 +1,6 @@
+hierarch.stats.linear\_regression\_test
+=======================================
+
+.. currentmodule:: hierarch.stats
+
+.. autofunction:: linear_regression_test
\ No newline at end of file
diff --git a/docs/reference/_autosummary/hierarch.stats.multi_sample_test.rst b/docs/reference/_autosummary/hierarch.stats.multi_sample_test.rst
new file mode 100644
index 0000000..0704b62
--- /dev/null
+++ b/docs/reference/_autosummary/hierarch.stats.multi_sample_test.rst
@@ -0,0 +1,6 @@
+hierarch.stats.multi\_sample\_test
+==================================
+
+.. currentmodule:: hierarch.stats
+
+.. autofunction:: multi_sample_test
\ No newline at end of file
diff --git a/docs/reference/_autosummary/hierarch.stats.preprocess_data.rst b/docs/reference/_autosummary/hierarch.stats.preprocess_data.rst
new file mode 100644
index 0000000..3f53556
--- /dev/null
+++ b/docs/reference/_autosummary/hierarch.stats.preprocess_data.rst
@@ -0,0 +1,6 @@
+hierarch.stats.preprocess\_data
+===============================
+
+.. currentmodule:: hierarch.stats
+
+.. autofunction:: preprocess_data
\ No newline at end of file
diff --git a/docs/reference/_autosummary/hierarch.stats.rst b/docs/reference/_autosummary/hierarch.stats.rst
new file mode 100644
index 0000000..f787b18
--- /dev/null
+++ b/docs/reference/_autosummary/hierarch.stats.rst
@@ -0,0 +1,37 @@
+﻿hierarch.stats
+==============
+
+.. automodule:: hierarch.stats
+
+   
+   
+   
+
+   
+   
+   .. rubric:: Functions
+
+   .. autosummary::
+      :toctree:
+      :nosignatures:
+   
+      confidence_interval
+      linear_regression_test
+      multi_sample_test
+      preprocess_data
+      studentized_covariance
+      two_sample_test
+      welch_statistic
+   
+   
+
+   
+   
+   
+
+   
+   
+   
+
+
+
diff --git a/docs/reference/_autosummary/hierarch.stats.studentized_covariance.rst b/docs/reference/_autosummary/hierarch.stats.studentized_covariance.rst
new file mode 100644
index 0000000..6f3b081
--- /dev/null
+++ b/docs/reference/_autosummary/hierarch.stats.studentized_covariance.rst
@@ -0,0 +1,6 @@
+hierarch.stats.studentized\_covariance
+======================================
+
+.. currentmodule:: hierarch.stats
+
+.. autofunction:: studentized_covariance
\ No newline at end of file
diff --git a/docs/reference/_autosummary/hierarch.stats.two_sample_test.rst b/docs/reference/_autosummary/hierarch.stats.two_sample_test.rst
new file mode 100644
index 0000000..e97b3c9
--- /dev/null
+++ b/docs/reference/_autosummary/hierarch.stats.two_sample_test.rst
@@ -0,0 +1,6 @@
+hierarch.stats.two\_sample\_test
+================================
+
+.. currentmodule:: hierarch.stats
+
+.. autofunction:: two_sample_test
\ No newline at end of file
diff --git a/docs/reference/_autosummary/hierarch.stats.welch_statistic.rst b/docs/reference/_autosummary/hierarch.stats.welch_statistic.rst
new file mode 100644
index 0000000..1c605d6
--- /dev/null
+++ b/docs/reference/_autosummary/hierarch.stats.welch_statistic.rst
@@ -0,0 +1,6 @@
+hierarch.stats.welch\_statistic
+===============================
+
+.. currentmodule:: hierarch.stats
+
+.. autofunction:: welch_statistic
\ No newline at end of file
diff --git a/docs/reference/index.rst b/docs/reference/index.rst
index 2da9fa4..0e33e3d 100644
--- a/docs/reference/index.rst
+++ b/docs/reference/index.rst
@@ -1,8 +1,11 @@
 Reference 
 =========
 
-.. toctree::
-
-    stats.rst
-    power.rst
-    resampling.rst
\ No newline at end of file
+.. autosummary::
+    :toctree: _autosummary
+    :template: custom-module-template.rst
+    :recursive:
+ 
+    hierarch.stats
+    hierarch.power
+    hierarch.resampling
\ No newline at end of file
diff --git a/docs/reference/power.rst b/docs/reference/power.rst
deleted file mode 100644
index 7b0d085..0000000
--- a/docs/reference/power.rst
+++ /dev/null
@@ -1,5 +0,0 @@
-hierarch.power
-==============
-
-.. automodule:: hierarch.power
-    :members:
\ No newline at end of file
diff --git a/docs/reference/resampling.rst b/docs/reference/resampling.rst
deleted file mode 100644
index c94f9db..0000000
--- a/docs/reference/resampling.rst
+++ /dev/null
@@ -1,5 +0,0 @@
-hierarch.resampling
-===================
-
-.. automodule:: hierarch.resampling
-    :members:
\ No newline at end of file
diff --git a/docs/reference/stats.rst b/docs/reference/stats.rst
deleted file mode 100644
index 3afd251..0000000
--- a/docs/reference/stats.rst
+++ /dev/null
@@ -1,5 +0,0 @@
-hierarch.stats
-==============
-
-.. automodule:: hierarch.stats
-    :members:
\ No newline at end of file
diff --git a/docs/user/confidence.rst b/docs/user/confidence.rst
new file mode 100644
index 0000000..2f1b5ed
--- /dev/null
+++ b/docs/user/confidence.rst
@@ -0,0 +1,291 @@
+Confidence Intervals
+====================
+
+Two-Sample Effect Sizes 
+-----------------------
+Researchers can use hierarch to compute confidence intervals for effect sizes. 
+These intervals are computed via test inversion and, as a result, have the advantage
+of essentially always achieving the nominal coverage. 
+
+To put it another way, hierarch computes a 95% confidence interval by performing a 
+permutation test against the null hypothesis that true effect size is exactly equal 
+to the observed effect size. Then, the bounds of the acceptance region at alpha = 0.05
+are the bounds of the confidence interval. Let's consider the dataset from earlier. ::
+
+    import pandas as pd
+    import numpy as np
+    import hierarch as ha
+
+    data = pd.read_clipboard()
+
+    print(data)
+
++------------+------+-------------+----------+
+|  Condition | Well | Measurement |  Values  |
++============+======+=============+==========+
+|    None    |   1  |      1      | 5.202258 |
++------------+------+-------------+----------+
+|    None    |   1  |      2      | 5.136128 |
++------------+------+-------------+----------+
+|    None    |   1  |      3      | 5.231401 |
++------------+------+-------------+----------+
+|    None    |   2  |      1      | 5.336643 |
++------------+------+-------------+----------+
+|    None    |   2  |      2      | 5.287973 |
++------------+------+-------------+----------+
+|    None    |   2  |      3      | 5.375359 |
++------------+------+-------------+----------+
+|    None    |   3  |      1      | 5.350692 |
++------------+------+-------------+----------+
+|    None    |   3  |      2      | 5.465206 |
++------------+------+-------------+----------+
+|    None    |   3  |      3      | 5.422602 |
++------------+------+-------------+----------+
+| +Treatment |   4  |      1      | 5.695427 |
++------------+------+-------------+----------+
+| +Treatment |   4  |      2      | 5.668457 |
++------------+------+-------------+----------+
+| +Treatment |   4  |      3      | 5.752592 |
++------------+------+-------------+----------+
+| +Treatment |   5  |      1      | 5.583562 |
++------------+------+-------------+----------+
+| +Treatment |   5  |      2      | 5.647895 |
++------------+------+-------------+----------+
+| +Treatment |   5  |      3      | 5.618315 |
++------------+------+-------------+----------+
+| +Treatment |   6  |      1      | 5.642983 |
++------------+------+-------------+----------+
+| +Treatment |   6  |      2      |  5.47072 |
++------------+------+-------------+----------+
+| +Treatment |   6  |      3      | 5.686654 |
++------------+------+-------------+----------+
+
+You can use the confidence_interval function in hierarch.stats to compute the 
+confidence interval. ::
+
+    from hierarch.stats import confidence_interval
+
+    ha.stats.confidence_interval(
+    data,
+    treatment_col=0,
+    compare='means',
+    interval=95,
+    bootstraps=500,
+    permutations="all",
+    random_state=1,
+    )
+
+    (-0.5373088054909549, -0.12010079984237881)
+
+This interval does not cross 0, so it is consistent with significance at the alpha = 0.05
+level.
+
+Because ha.stats.confidence_interval is based on a hypothesis test, it requires
+the same input parameters as two_sample_test or linear_regression_test. However, 
+the new **interval** parameter determines the width of the interval. ::
+
+    ha.stats.confidence_interval(
+    data,
+    treatment_col=0,
+    compare='means',
+    interval=99,
+    bootstraps=500,
+    permutations="all",
+    random_state=1,
+    )
+
+    (-0.9086402840632387, 0.25123067872990457)
+
+    ha.stats.confidence_interval(
+    data,
+    treatment_col=0,
+    compare='means',
+    interval=68,
+    bootstraps=500,
+    permutations="all",
+    random_state=1,
+    )
+
+    (-0.40676489798778065, -0.25064470734555316)
+
+The 99% confidence interval does indeed cross 0, so we could not reject the null hypothesis
+at the alpha = 0.01 level.
+
+To build your confidence, you can perform a simulation analysis to ensure 
+the confidence interval achieves the nominal coverage. You can set up a 
+DataSimulator using the functions in hierarch.power as follows. ::
+
+    from hierarch.power import DataSimulator
+
+    parameters = [[0, 1.525], #difference in means due to treatment
+                [stats.norm, 0, 1], #column 1 distribution - stats.norm(loc=0, scale=1)
+                [stats.lognorm, 0.75]] #column 2 distribution - stats.lognorm(s = 0.75)
+
+    sim = DataSimulator(parameters, random_state=1)
+
+    import scipy.stats as stats
+
+    hierarchy = [2, #treatments
+                3, #samples
+                3] #within-sample measurements
+
+    sim.fit(hierarchy)
+
+The "true" difference between the two samples is 1.525 according to the simulation
+parameters, so 95% of 95% confidence intervals that hierarch calculates should contain
+this value. You can test this with the following code. ::
+
+    true_difference = 1.525
+    coverage = 0
+    loops = 1000
+
+    for i in range(loops):
+        data = sim.generate()
+        lower, upper = ha.stats.confidence_interval(data, 0, interval=95, bootstraps=100, permutations='all')
+        if lower <= true_difference <= upper:
+            coverage += 1
+
+    print("Coverage:", coverage/loops)
+    
+    Coverage: 0.946
+
+This is within the Monte Carlo error of the simulation (+/- 0.7%) of 95%, so we can feel
+confident in this method of interval computation.
+
+Regression Coefficient Confidence Intervals
+-------------------------------------------
+The confidence_interval function can also be used on many-sample datasets that represent
+a hypothesized linear relationship. Let's generate a dataset with a "true" slope of 
+2/3. ::
+
+    paramlist = [[0, 2/3, 4/3, 2], [stats.norm], [stats.norm]]
+    hierarchy = [4, 2, 3]
+    datagen = DataSimulator(paramlist, random_state=2)
+    datagen.fit(hierarchy)
+    data = datagen.generate()
+    data
+
++---+---+---+----------+
+| 0 | 1 | 2 | 3        |
++===+===+===+==========+
+| 1 | 1 | 1 | 0.470264 |
++---+---+---+----------+
+| 1 | 1 | 2 | -0.36477 |
++---+---+---+----------+
+| 1 | 1 | 3 | 1.166621 |
++---+---+---+----------+
+| 1 | 2 | 1 | -0.8333  |
++---+---+---+----------+
+| 1 | 2 | 2 | -0.85157 |
++---+---+---+----------+
+| 1 | 2 | 3 | -1.3149  |
++---+---+---+----------+
+| 2 | 1 | 1 | 0.708561 |
++---+---+---+----------+
+| 2 | 1 | 2 | 0.154405 |
++---+---+---+----------+
+| 2 | 1 | 3 | 0.798892 |
++---+---+---+----------+
+| 2 | 2 | 1 | -2.38199 |
++---+---+---+----------+
+| 2 | 2 | 2 | -1.64797 |
++---+---+---+----------+
+| 2 | 2 | 3 | -2.66707 |
++---+---+---+----------+
+| 3 | 1 | 1 | 3.974506 |
++---+---+---+----------+
+| 3 | 1 | 2 | 3.321076 |
++---+---+---+----------+
+| 3 | 1 | 3 | 3.463612 |
++---+---+---+----------+
+| 3 | 2 | 1 | 2.888003 |
++---+---+---+----------+
+| 3 | 2 | 2 | 1.466742 |
++---+---+---+----------+
+| 3 | 2 | 3 | 3.26068  |
++---+---+---+----------+
+| 4 | 1 | 1 | 3.73128  |
++---+---+---+----------+
+| 4 | 1 | 2 | 0.036135 |
++---+---+---+----------+
+| 4 | 1 | 3 | -0.05483 |
++---+---+---+----------+
+| 4 | 2 | 1 | 1.268975 |
++---+---+---+----------+
+| 4 | 2 | 2 | 3.615265 |
++---+---+---+----------+
+| 4 | 2 | 3 | 2.902522 |
++---+---+---+----------+
+
+You can compute a confidence interval in the same manner as above. This time, you should set the
+**compare** keyword argument to "corr" for clarity, but "corr" is also the default setting
+for **compare** when computing a confidence interval. ::
+
+    from hierarch.stats import confidence_interval
+
+    ha.stats.confidence_interval(
+    data,
+    treatment_col=0,
+    compare='corr',
+    interval=95,
+    bootstraps=500,
+    permutations="all",
+    random_state=1,
+    )
+
+    (0.3410887712843298, 1.7540918236455125)
+
+This confidence interval corresponds to the slope in a linear model. You can check this by
+computing the slope coefficient via Ordinary Least Squares. ::
+
+    import scipy.stats
+    from hierarch.internal_functions import GroupbyMean
+
+    grouper = GroupbyMean()
+    test = grouper.fit_transform(data)
+    stats.linregress(test[:,0], test[:,-1])
+
+    LinregressResult(slope=1.0515132531203024, intercept=-1.6658194480556106, 
+    rvalue=0.6444075548383587, pvalue=0.08456152533094284, 
+    stderr=0.5094006523081002, intercept_stderr=1.3950511403849626)
+
+The slope, 1.0515, is indeed in the center of our computed interval (within Monte Carlo error).
+
+Again, it is worthwhile to check that confidence_interval is performing adequately. You can
+set up a simulation as above to check the coverage of the 95% confidence interval. ::
+
+    true_difference = 2/3
+    coverage = 0
+    loops = 1000
+
+    for i in range(loops):
+        data = datagen.generate()
+        lower, upper = ha.stats.confidence_interval(data, 0, interval=95, bootstraps=100, permutations='all')
+        if lower <= true_difference <= upper:
+            coverage += 1
+
+    print(coverage/loops)
+
+    0.956
+
+This is within the Monte Carlo error of the simulation (+/- 0.7%) of 95% and therefore
+acceptable. You can check the coverage of other intervals by changing the **interval** keyword
+argument, though be aware that Monte Carlo error depends on the probability of the event of
+interest. ::
+
+    true_difference = 2/3
+    coverage = 0
+    loops = 1000
+
+    for i in range(loops):
+        data = datagen.generate()
+        lower, upper = ha.stats.confidence_interval(data, 0, interval=99, bootstraps=100, permutations='all')
+        if lower <= true_difference <= upper:
+            coverage += 1
+
+    print(coverage/loops)
+
+    0.99
+
+Using the confidence_interval function, researchers can rapidly calculate confidence intervals for
+effect sizes that maintain nominal coverage without worrying about distributional assumptions. 
\ No newline at end of file
diff --git a/docs/user/hypothesis.rst b/docs/user/hypothesis.rst
new file mode 100644
index 0000000..0357108
--- /dev/null
+++ b/docs/user/hypothesis.rst
@@ -0,0 +1,428 @@
+Hypothesis Testing
+==================
+
+Two-Sample Hypothesis Tests
+---------------------------
+Performing a hierarchical permutation test for difference of means is simple. 
+Consider an imaging experiment with two treatment groups, three coverslips in 
+each group, and three images (fields of view) within each coverslip. If you have 
+the data stored in an Excel file, you can use pandas to either directly read the 
+file or copy it in from the clipboard, as below. ::
+
+    import pandas as pd
+    import numpy as np
+    import hierarch as ha
+
+    data = pd.read_clipboard()
+
+    print(data)
+
++------------+------+-------------+----------+
+|  Condition | Well | Measurement |  Values  |
++============+======+=============+==========+
+|    None    |   1  |      1      | 5.202258 |
++------------+------+-------------+----------+
+|    None    |   1  |      2      | 5.136128 |
++------------+------+-------------+----------+
+|    None    |   1  |      3      | 5.231401 |
++------------+------+-------------+----------+
+|    None    |   2  |      1      | 5.336643 |
++------------+------+-------------+----------+
+|    None    |   2  |      2      | 5.287973 |
++------------+------+-------------+----------+
+|    None    |   2  |      3      | 5.375359 |
++------------+------+-------------+----------+
+|    None    |   3  |      1      | 5.350692 |
++------------+------+-------------+----------+
+|    None    |   3  |      2      | 5.465206 |
++------------+------+-------------+----------+
+|    None    |   3  |      3      | 5.422602 |
++------------+------+-------------+----------+
+| +Treatment |   4  |      1      | 5.695427 |
++------------+------+-------------+----------+
+| +Treatment |   4  |      2      | 5.668457 |
++------------+------+-------------+----------+
+| +Treatment |   4  |      3      | 5.752592 |
++------------+------+-------------+----------+
+| +Treatment |   5  |      1      | 5.583562 |
++------------+------+-------------+----------+
+| +Treatment |   5  |      2      | 5.647895 |
++------------+------+-------------+----------+
+| +Treatment |   5  |      3      | 5.618315 |
++------------+------+-------------+----------+
+| +Treatment |   6  |      1      | 5.642983 |
++------------+------+-------------+----------+
+| +Treatment |   6  |      2      |  5.47072 |
++------------+------+-------------+----------+
+| +Treatment |   6  |      3      | 5.686654 |
++------------+------+-------------+----------+
+
+It is important to note that the ordering of the columns from left to right 
+reflects the experimental design scheme. This is necessary for hierarch 
+to infer the clustering within your dataset. In case your data is not 
+ordered properly, pandas makes it easy enough to index your data in the 
+correct order. ::
+
+
+    columns = ['Condition', 'Coverslip', 'Field of View', 'Mean Fluorescence']
+
+    data[columns]
+
+Next, you can call two_sample_test from hierarch's stats module, which will 
+calculate the p-value. You have to specify what column is the treatment 
+column - in this case, "Condition." Indexing starts at 0, so you input 
+treatment_col=0. In this case, there are only 6c3 = 20 ways to permute the 
+treatment labels, so you should specify "all" permutations be used. ::
+
+    p_val = ha.stats.two_sample_test(data, treatment_col=0, 
+                                     bootstraps=500, permutations='all', 
+                                     random_state=1)
+
+    print('p-value =', p_val)
+
+    #out: p-value = 0.0406
+
+There are a number of parameters that can be used to modify two_sample_test. ::
+
+    ha.stats.two_sample_test(data_array, 
+                            treatment_col, 
+                            compare="means", 
+                            skip=None, 
+                            bootstraps=100, 
+                            permutations=1000, 
+                            kind='weights', 
+                            return_null=False,
+                            random_state=None)
+
+**compare**: The default "means" assumes that you are testing for a difference in means, so it uses the Welch t-statistic. For flexibility, two_sample_test can take a test statistic function as an argument. 
+
+**skip**: indicates the indices of columns that should be skipped in the bootstrapping procedure. 
+
+A simple rule of thumb is that columns should be resampled with replacement only if they were originally sampled with replacement (or effectively sampled with replacement). For example, consider an imaging experiment in which you image several fields of view in a well, which each contain several cells. While you can consider the cells sampled with replacement from the well (there are so many more cells in the population that this assumption is fair), the cells are not sampled with replacement from the field of view (as you are measuring ALL cells in each field of view). The 10% condition is a reasonable rule of thumb here - if your sample represents less than 10% of the population, you can treat it as sampled with replacement.
+
+**bootstraps**: indicates the number of bootstrapped samples to be drawn from data_array. 
+
+Generally, as the number of possible permutations of your data increases, the number of bootstraps should decrease. If the goal of bootstrapping is to include the standard error in the biological samples in the null distribution, only 50-100 bootstraps is sufficient given a large enough set of possible permutations.
+
+**permutations**: indicates the number of permutations of the treatment label PER bootstrapped sample.
+
+Inputting "all" will enumerate all of the possible permutations and iterate through them one by one. This is done using a generator, so the permutations are not stored in memory, but is still excessively time consuming for large datasets. 
+
+**kind**: "weights" or "indexes" or "bayesian" specifies the bootstrapping algorithm. "weights" returns an array the same size as the input array, but with the data reweighted according to the Efron bootstrap procedure. "indexes" uses the same algorithm, but returns a reindexed array. "bayesian" also returns a reweighted array, but the weights are allowed to be any real number rather than just integers.
+
+**return_null**: setting this to True will also return the empirical null distribution as a list.
+
+**seed**: allows you to specify a random seed for reproducibility. 
+
+Many-Sample Hypothesis Tests - Several Hypotheses
+-------------------------------------------------
+Researchers may want to perform a series of hypothesis tests to determine 
+whether there are significant differences between some parameter in three 
+or more unrelated groups. This is similar to the goal of one-way ANOVA. To 
+this end, hierarch includes the multi_sample_test function, which performs
+multiple two-sample tests in the vein of post-hoc tests after ANOVA. The 
+researcher can also choose to make a multiple-comparison correction in the 
+form of the Benjamini-Hochberg procedure, which controls for False Discovery
+Rate.
+
+Consider an experiment with four treatment groups. We can simulate a dataset
+as follows. ::
+
+    from hierarch.power import DataSimulator
+    import scipy.stats as stats
+
+    paramlist = [[0, 1, 4, 0], [stats.norm], [stats.norm]]
+    hierarchy = [4, 3, 3]
+
+    datagen = DataSimulator(paramlist, random_state=1)
+    datagen.fit(hierarchy)
+    data = datagen.generate()
+    data    
+
++---+---+---+----------+
+| 0 | 1 | 2 | 3        |
++===+===+===+==========+
+| 1 | 1 | 1 | -0.39087 |
++---+---+---+----------+
+| 1 | 1 | 2 | 0.182674 |
++---+---+---+----------+
+| 1 | 1 | 3 | -0.13654 |
++---+---+---+----------+
+| 1 | 2 | 1 | 1.420464 |
++---+---+---+----------+
+| 1 | 2 | 2 | 0.86134  |
++---+---+---+----------+
+| 1 | 2 | 3 | 0.529161 |
++---+---+---+----------+
+| 1 | 3 | 1 | -0.45147 |
++---+---+---+----------+
+| 1 | 3 | 2 | 0.073245 |
++---+---+---+----------+
+| 1 | 3 | 3 | 0.338579 |
++---+---+---+----------+
+| 2 | 1 | 1 | -0.57876 |
++---+---+---+----------+
+| 2 | 1 | 2 | 0.990907 |
++---+---+---+----------+
+| 2 | 1 | 3 | 0.703567 |
++---+---+---+----------+
+| 2 | 2 | 1 | -0.80581 |
++---+---+---+----------+
+| 2 | 2 | 2 | 0.016343 |
++---+---+---+----------+
+| 2 | 2 | 3 | 1.730584 |
++---+---+---+----------+
+| 2 | 3 | 1 | 1.024184 |
++---+---+---+----------+
+| 2 | 3 | 2 | 1.660018 |
++---+---+---+----------+
+| 2 | 3 | 3 | 1.663697 |
++---+---+---+----------+
+| 3 | 1 | 1 | 5.580886 |
++---+---+---+----------+
+| 3 | 1 | 2 | 2.351026 |
++---+---+---+----------+
+| 3 | 1 | 3 | 3.085442 |
++---+---+---+----------+
+| 3 | 2 | 1 | 6.62389  |
++---+---+---+----------+
+| 3 | 2 | 2 | 5.227821 |
++---+---+---+----------+
+| 3 | 2 | 3 | 5.244181 |
++---+---+---+----------+
+| 3 | 3 | 1 | 3.850566 |
++---+---+---+----------+
+| 3 | 3 | 2 | 2.716497 |
++---+---+---+----------+
+| 3 | 3 | 3 | 4.532037 |
++---+---+---+----------+
+| 4 | 1 | 1 | 0.403147 |
++---+---+---+----------+
+| 4 | 1 | 2 | -0.93322 |
++---+---+---+----------+
+| 4 | 1 | 3 | -0.38909 |
++---+---+---+----------+
+| 4 | 2 | 1 | -0.04362 |
++---+---+---+----------+
+| 4 | 2 | 2 | -0.91633 |
++---+---+---+----------+
+| 4 | 2 | 3 | -0.06985 |
++---+---+---+----------+
+| 4 | 3 | 1 | 0.642196 |
++---+---+---+----------+
+| 4 | 3 | 2 | 0.582299 |
++---+---+---+----------+
+| 4 | 3 | 3 | 0.040421 |
++---+---+---+----------+
+
+This dataset has been generated such that treatments 1 and 4 have the same mean, while
+treatment 2 represents a slight difference and treatment 4 represents a large difference.
+There are six total comparisons that can be made, which can be performed automatically
+using multi_sample_test as follows. ::
+
+    multi_sample_test(data, treatment_col=0, hypotheses="all",
+                    correction=None, bootstraps=1000,
+                    permutations="all", random_state=111)
+    
+    array([[2.0, 3.0, 0.0355],
+           [1.0, 3.0, 0.0394],
+           [3.0, 4.0, 0.0407],
+           [2.0, 4.0, 0.1477],
+           [1.0, 2.0, 0.4022],
+           [1.0, 4.0, 0.4559]], dtype=object)
+
+The first two columns indicate the conditions being compared, while the last column indicates
+the uncorrected p-value. Because there are several hypotheses being tested, it is advisable
+to make a multiple comparisons correction. Currently, hierarch can automatically perform the
+Benjamini-Hochberg procedure, which controls False Discovery Rate. By indicating the "fdr"
+correction, the output array has an additional column showing the q-values, or adjusted p-values. ::
+
+    multi_sample_test(data, treatment_col=0, hypotheses="all",
+                    correction='fdr', bootstraps=1000,
+                    permutations="all", random_state=111)
+    array([[2.0, 3.0, 0.0355, 0.0814],
+           [1.0, 3.0, 0.0394, 0.0814],
+           [3.0, 4.0, 0.0407, 0.0814],
+           [2.0, 4.0, 0.1477, 0.22155],
+           [1.0, 2.0, 0.4022, 0.4559],
+           [1.0, 4.0, 0.4559, 0.4559]], dtype=object)
+
+Testing more hypotheses necessarily lowers the p-value required to call a result significant. However,
+we are not always interested in performing every comparison - perhaps condition 2 is a control that all
+other conditions are meant to be compared to. The comparisons of interest can be specified using a list. ::
+
+    tests = [[2.0, 1.0], [2.0, 3.0], [2.0, 4.0]]
+    multi_sample_test(data, treatment_col=0, hypotheses=tests,
+                      correction='fdr', bootstraps=1000,
+                      permutations="all", random_state=222)
+    array([[2.0, 3.0, 0.036, 0.108],
+           [2.0, 4.0, 0.1506, 0.2259],
+           [2.0, 1.0, 0.4036, 0.4036]], dtype=object)
+
+Many-Sample Hypothesis Tests - Single Hypothesis
+------------------------------------------------
+One-way ANOVA and similar tests (like multi_sample_test) are inappropriate when
+you have several samples meant to test a single hypothesis. For example, perhaps
+you have several samples with different concentrations of the same drug treatment.
+In this case, hierarch provides linear_regression_test, which is equivalent to
+performing a hypothesis test on a linear model against the null hypothesis that
+the slope coefficient is equal to 0.
+
+This hypothesis test uses a studentized covariance test statistic - essentially,
+the sample covariance divided by the standard error of the sample covariance. This
+test statistic is approximately normally distributed and in the two-sample case, 
+this test gives the same result as two_sample_test.
+
+First, consider a dataset with two treatment groups, four samples each, and three
+measurements on each sample. ::
+
+    from hierarch.power import DataSimulator
+    import scipy.stats as stats
+
+    paramlist = [[0, 2], [stats.norm], [stats.norm]]
+    hierarchy = [2, 4, 3]
+
+    datagen = DataSimulator(paramlist, random_state=2)
+    datagen.fit(hierarchy)
+    data = datagen.generate()
+    data
+
++---+---+---+----------+
+| 0 | 1 | 2 | 3        |
++===+===+===+==========+
+| 1 | 1 | 1 | 0.470264 |
++---+---+---+----------+
+| 1 | 1 | 2 | -0.36477 |
++---+---+---+----------+
+| 1 | 1 | 3 | 1.166621 |
++---+---+---+----------+
+| 1 | 2 | 1 | -0.8333  |
++---+---+---+----------+
+| 1 | 2 | 2 | -0.85157 |
++---+---+---+----------+
+| 1 | 2 | 3 | -1.3149  |
++---+---+---+----------+
+| 1 | 3 | 1 | 0.041895 |
++---+---+---+----------+
+| 1 | 3 | 2 | -0.51226 |
++---+---+---+----------+
+| 1 | 3 | 3 | 0.132225 |
++---+---+---+----------+
+| 1 | 4 | 1 | -3.04865 |
++---+---+---+----------+
+| 1 | 4 | 2 | -2.31464 |
++---+---+---+----------+
+| 1 | 4 | 3 | -3.33374 |
++---+---+---+----------+
+| 2 | 1 | 1 | 4.641172 |
++---+---+---+----------+
+| 2 | 1 | 2 | 3.987742 |
++---+---+---+----------+
+| 2 | 1 | 3 | 4.130278 |
++---+---+---+----------+
+| 2 | 2 | 1 | 3.55467  |
++---+---+---+----------+
+| 2 | 2 | 2 | 2.133408 |
++---+---+---+----------+
+| 2 | 2 | 3 | 3.927347 |
++---+---+---+----------+
+| 2 | 3 | 1 | 3.73128  |
++---+---+---+----------+
+| 2 | 3 | 2 | 0.036135 |
++---+---+---+----------+
+| 2 | 3 | 3 | -0.05483 |
++---+---+---+----------+
+| 2 | 4 | 1 | 1.268975 |
++---+---+---+----------+
+| 2 | 4 | 2 | 3.615265 |
++---+---+---+----------+
+| 2 | 4 | 3 | 2.902522 |
++---+---+---+----------+
+
+Performing linear_regression_test and two_sample_test on this dataset should
+give very similar p-values. ::
+
+    linear_regression_test(data, treatment_col=0,
+                        bootstraps=1000, permutations='all',
+                        random_state=1)
+    0.013714285714285714
+
+    two_sample_test(data, treatment_col=0,
+                    bootstraps=1000, permutations='all',
+                    random_state=1)
+    0.013714285714285714
+
+However, unlike two_sample_test, this test can handle any number of conditions. Consider instead
+a dataset with four treatment conditions that have a linear relationship. ::
+
+    paramlist = [[0, 2/3, 4/3, 2], [stats.norm], [stats.norm]]
+    hierarchy = [4, 2, 3]
+    datagen = DataSimulator(paramlist, random_state=2)
+    datagen.fit(hierarchy)
+    data = datagen.generate()
+    data
+
++---+---+---+----------+
+| 0 | 1 | 2 | 3        |
++===+===+===+==========+
+| 1 | 1 | 1 | 0.470264 |
++---+---+---+----------+
+| 1 | 1 | 2 | -0.36477 |
++---+---+---+----------+
+| 1 | 1 | 3 | 1.166621 |
++---+---+---+----------+
+| 1 | 2 | 1 | -0.8333  |
++---+---+---+----------+
+| 1 | 2 | 2 | -0.85157 |
++---+---+---+----------+
+| 1 | 2 | 3 | -1.3149  |
++---+---+---+----------+
+| 2 | 1 | 1 | 0.708561 |
++---+---+---+----------+
+| 2 | 1 | 2 | 0.154405 |
++---+---+---+----------+
+| 2 | 1 | 3 | 0.798892 |
++---+---+---+----------+
+| 2 | 2 | 1 | -2.38199 |
++---+---+---+----------+
+| 2 | 2 | 2 | -1.64797 |
++---+---+---+----------+
+| 2 | 2 | 3 | -2.66707 |
++---+---+---+----------+
+| 3 | 1 | 1 | 3.974506 |
++---+---+---+----------+
+| 3 | 1 | 2 | 3.321076 |
++---+---+---+----------+
+| 3 | 1 | 3 | 3.463612 |
++---+---+---+----------+
+| 3 | 2 | 1 | 2.888003 |
++---+---+---+----------+
+| 3 | 2 | 2 | 1.466742 |
++---+---+---+----------+
+| 3 | 2 | 3 | 3.26068  |
++---+---+---+----------+
+| 4 | 1 | 1 | 3.73128  |
++---+---+---+----------+
+| 4 | 1 | 2 | 0.036135 |
++---+---+---+----------+
+| 4 | 1 | 3 | -0.05483 |
++---+---+---+----------+
+| 4 | 2 | 1 | 1.268975 |
++---+---+---+----------+
+| 4 | 2 | 2 | 3.615265 |
++---+---+---+----------+
+| 4 | 2 | 3 | 2.902522 |
++---+---+---+----------+
+
+For this dataset, there are 8! / (2!^4) = 2,520 total permutations. We will choose a random
+subset of them to compute the p-value. ::
+
+    linear_regression_test(data, treatment_col=0,
+                        bootstraps=100, permutations=1000,
+                        random_state=1)
+    0.00767
+
+Between these three tests, researchers can address a large variety of experimental designs. Unfortunately,
+interaction effects are outside the scope of permutation tests - it is not possible to construct an
+exact test for interaction effects in general. However, an asymptotic test for interaction effects
+may be implemented in the future.
diff --git a/docs/user/importing.rst b/docs/user/importing.rst
new file mode 100644
index 0000000..2e852c6
--- /dev/null
+++ b/docs/user/importing.rst
@@ -0,0 +1,9 @@
+Importing Data
+==============
+
+Hierarch is compatible with pandas DataFrames and numpy arrays. 
+Pandas is capable of conveniently importing data from a wide variety 
+of formats, including Excel files. ::
+
+    import pandas as pd
+    data = pd.read_excel(filepath)
\ No newline at end of file
diff --git a/docs/user/power.rst b/docs/user/power.rst
new file mode 100644
index 0000000..fc0ad68
--- /dev/null
+++ b/docs/user/power.rst
@@ -0,0 +1,196 @@
+Power Analysis
+==============
+
+Researchers can also use hierarch to determine the appropriate sample size 
+for a future experiment. hierarch.power provides a class, DataSimulator, 
+to assist in power analyses. DataSimulator is initialized with a list 
+specifying the probability distributions generating the data and an optional 
+random_state for reproducibility. 
+
+In this case, consider an experiment similar to the one above - two treatment 
+conditions, but the sample size at each level of hierarchy is yet to be 
+determined. First, you must posit a data-generating process for the analysis.
+
+Suppose you assume that the column 1 values are normally distributed with 
+mean 0 and variance 1. From past experience, you believe that the column 2 
+values follow a right-tailed distribution, so you choose to model it as a 
+lognormal distribution with a scale parameter of 0.75. Finally, you decide 
+that you want to achieve 80% power for a mean difference equal to one standard 
+deviation. You calculate that the summed standard deviation of the two 
+distributions you specified is 1.525 and input that as a parameter, as well. ::
+
+    from hierarch.power import DataSimulator
+
+    parameters = [[0, 1.525], #difference in means due to treatment
+                [stats.norm, 0, 1], #column 1 distribution - stats.norm(loc=0, scale=1)
+                [stats.lognorm, 0.75]] #column 2 distribution - stats.lognorm(s = 0.75)
+
+    sim = DataSimulator(parameters, random_state=1)
+
+Next, you choose a experimental design to simulate. Perhaps, like above, you 
+decide to start with three samples per treatment condition and three measurements 
+within each sample. Calling the .fit() function will ready the DataSimulator to 
+produce randomly-generated data according to this experimental scheme. ::
+
+    import scipy.stats as stats
+
+    hierarchy = [2, #treatments
+                3, #samples
+                3] #within-sample measurements
+
+    sim.fit(hierarchy)
+
+    By calling the .generate() function, DataSimulator uses the prespecified 
+    parameters to generate a simulated dataset. ::
+
+    print(sim.generate())
+
++---+---+---+----------+
+| 0 | 1 | 2 | 3        |
++===+===+===+==========+
+| 1 | 1 | 1 | 1.014087 |
++---+---+---+----------+
+| 1 | 1 | 2 | 1.891843 |
++---+---+---+----------+
+| 1 | 1 | 3 | 1.660049 |
++---+---+---+----------+
+| 1 | 2 | 1 | 2.068442 |
++---+---+---+----------+
+| 1 | 2 | 2 | 1.843164 |
++---+---+---+----------+
+| 1 | 2 | 3 | 2.328488 |
++---+---+---+----------+
+| 1 | 3 | 1 | 0.906038 |
++---+---+---+----------+
+| 1 | 3 | 2 | 1.215424 |
++---+---+---+----------+
+| 1 | 3 | 3 | 1.027005 |
++---+---+---+----------+
+| 2 | 1 | 1 | 1.788798 |
++---+---+---+----------+
+| 2 | 1 | 2 | 1.252083 |
++---+---+---+----------+
+| 2 | 1 | 3 | 1.024889 |
++---+---+---+----------+
+| 2 | 2 | 1 | 2.986665 |
++---+---+---+----------+
+| 2 | 2 | 2 | 3.254925 |
++---+---+---+----------+
+| 2 | 2 | 3 | 3.436481 |
++---+---+---+----------+
+| 2 | 3 | 1 | 2.784636 |
++---+---+---+----------+
+| 2 | 3 | 2 | 4.610765 |
++---+---+---+----------+
+| 2 | 3 | 3 | 4.099078 |
++---+---+---+----------+    
+
+You can use this to set up a simple power analysis. The following 
+code performs a hierarchical permutation test with 50,000 total 
+permutations (though this is overkill in the 2, 3, 3 case) on each 
+of 100 simulated datasets and prints the fraction of them that return 
+a significant result, assuming a p-value cutoff of 0.05. ::
+
+    pvalues = []
+    loops = 100
+    for i in range(loops):
+        data = sim.generate()
+        pvalues.append(ha.stats.two_sample_test(data, 0, bootstraps=500, permutations=100))
+        
+    print(np.less(pvalues, 0.05).sum() / loops) 
+
+    #out: 0.29
+
+The targeted power is 0.8, so you can fit the DataSimulator with a larger sample 
+size. You can run the following code block with different sample sizes until 
+you determine the column 1 sample size that achieves at least 80% power. ::
+
+    sim.fit([2,10,3])
+
+    pvalues = []
+    loops = 100
+    for i in range(loops):
+        data = sim.generate()
+        pvalues.append(ha.stats.two_sample_test(data, 0, bootstraps=500, permutations=100))
+        
+    print(np.less(pvalues, 0.05).sum() / loops)
+
+    #out: 0.81
+
+You note, however, that increasing the number of column 1 samples is much 
+more laborious than increasing the number of column 2 samples. For example, 
+perhaps the column 1 samples represent mice, while column 2 represents 
+multiple measurements of some feature from each mouse's cells. You have 
+posited that the slight majority of your observed variance comes from the 
+column 2 samples - indeed, in biological samples, within-sample variance 
+can be equal to or greater than between-sample variance. After all, that 
+is why we make multiple measurements within the same biological sample! 
+Given that this is a reasonable assumption, perhaps 80% power can be 
+achieved with an experimental design that makes more column 2 measurements. ::
+
+    sim.fit([2,8,30])
+
+    pvalues = []
+    loops = 100
+    for i in range(loops):
+        data = sim.generate()
+        pvalues.append(ha.stats.two_sample_test(data, 0, bootstraps=500, permutations=100))
+        
+    print(np.less(pvalues, 0.05).sum() / loops)
+
+    #out: 0.84
+
+Of course, adding column 2 samples has a much more limited 
+influence on power compared to adding column 1 samples - with infinite 
+column 2 samples, the standard error for the difference of means is 
+still dependent on the variance of the column 1 data-generating process. 
+This is illustrated with an excessive example of 300 column 2 samples 
+per column 1 sample, which shows no improvement in power over using 
+only 30 column 2 samples. ::
+
+    sim.fit([2,8,300])
+
+    pvalues = []
+    loops = 100
+    for i in range(loops):
+        data = sim.generate()
+        pvalues.append(ha.stats.two_sample_test(data, 0, bootstraps=500, permutations=100))
+        
+    print(np.less(pvalues, 0.05).sum() / loops)
+    
+    #out: 0.83
+
+On the other hand, adding only four column 1 samples to each treatment group 
+(rather than 270 to each column 1 sample) brings the power to 97%. 
+
+Finally, to ensure that hierarchical permutation is valid for the posited 
+data-generating process, you can do another power analysis under the null 
+hypothesis - that there is no difference between groups. To compensate for 
+Monte Carlo error, you should increase the number of loops - at 100 loops, 
+the error for an event that happens 5% probability is +/- 2%, but at 
+1000 loops, it is only +/- 0.7%. ::
+
+    parameters = [[0, 0], #no difference in means because we are sampling under the null hypothesis
+                [stats.norm, 0, 1], #column 1 probability distribution  
+                [stats.lognorm, 0.75]] #column 2 probability distribution
+    sim = ha.power.DataSimulator(parameters, random_state=1)
+    sim.fit([2,12,30])
+
+    pvalues = []
+    loops = 1000
+    for i in range(loops):
+        data = sim.generate()
+        pvalues.append(ha.stats.two_sample_test(data, 0, bootstraps=500, permutations=100))
+        
+    print(np.less(pvalues, 0.05).sum() / loops)
+
+    #out: 0.05
+
+Hierarchical permutation experiences no size distortion for this experimental 
+design and is therefore a valid test.  
+
+Note: because these power calculations are subject to Monte Carlo error, 
+so you should consider upping the number of loops if the precise value for 
+power is of extreme importance. In nonclinical settings, however, small-scale 
+power analyses are sufficient and can be a valuable guide for choosing the 
+sample size for your study. 
\ No newline at end of file
diff --git a/docs/user/usage.rst b/docs/user/usage.rst
index 5a8f89f..843e6ee 100644
--- a/docs/user/usage.rst
+++ b/docs/user/usage.rst
@@ -1,633 +1,11 @@
 Usage 
 =====
 
-Importing Data
---------------
-Hierarch is compatible with pandas DataFrames and numpy arrays. 
-Pandas is capable of conveniently importing data from a wide variety 
-of formats, including Excel files. ::
+.. toctree::
 
-    import pandas as pd
-    data = pd.read_excel(filepath)
+    importing.rst
+    hypothesis.rst
+    confidence.rst
+    power.rst
 
-Two-Sample Hypothesis Tests
----------------------------
-Performing a hierarchical permutation test for difference of means is simple. 
-Consider an imaging experiment with two treatment groups, three coverslips in 
-each group, and three images (fields of view) within each coverslip. If you have 
-the data stored in an Excel file, you can use pandas to either directly read the 
-file or copy it in from the clipboard, as below. ::
 
-    import pandas as pd
-    import numpy as np
-    import hierarch as ha
-
-    data = pd.read_clipboard()
-
-    print(data)
-
-+------------+------+-------------+----------+
-|  Condition | Well | Measurement |  Values  |
-+============+======+=============+==========+
-|    None    |   1  |      1      | 5.202258 |
-+------------+------+-------------+----------+
-|    None    |   1  |      2      | 5.136128 |
-+------------+------+-------------+----------+
-|    None    |   1  |      3      | 5.231401 |
-+------------+------+-------------+----------+
-|    None    |   2  |      1      | 5.336643 |
-+------------+------+-------------+----------+
-|    None    |   2  |      2      | 5.287973 |
-+------------+------+-------------+----------+
-|    None    |   2  |      3      | 5.375359 |
-+------------+------+-------------+----------+
-|    None    |   3  |      1      | 5.350692 |
-+------------+------+-------------+----------+
-|    None    |   3  |      2      | 5.465206 |
-+------------+------+-------------+----------+
-|    None    |   3  |      3      | 5.422602 |
-+------------+------+-------------+----------+
-| +Treatment |   4  |      1      | 5.695427 |
-+------------+------+-------------+----------+
-| +Treatment |   4  |      2      | 5.668457 |
-+------------+------+-------------+----------+
-| +Treatment |   4  |      3      | 5.752592 |
-+------------+------+-------------+----------+
-| +Treatment |   5  |      1      | 5.583562 |
-+------------+------+-------------+----------+
-| +Treatment |   5  |      2      | 5.647895 |
-+------------+------+-------------+----------+
-| +Treatment |   5  |      3      | 5.618315 |
-+------------+------+-------------+----------+
-| +Treatment |   6  |      1      | 5.642983 |
-+------------+------+-------------+----------+
-| +Treatment |   6  |      2      |  5.47072 |
-+------------+------+-------------+----------+
-| +Treatment |   6  |      3      | 5.686654 |
-+------------+------+-------------+----------+
-
-It is important to note that the ordering of the columns from left to right 
-reflects the experimental design scheme. This is necessary for hierarch 
-to infer the clustering within your dataset. In case your data is not 
-ordered properly, pandas makes it easy enough to index your data in the 
-correct order. ::
-
-
-    columns = ['Condition', 'Coverslip', 'Field of View', 'Mean Fluorescence']
-
-    data[columns]
-
-Next, you can call two_sample_test from hierarch's stats module, which will 
-calculate the p-value. You have to specify what column is the treatment 
-column - in this case, "Condition." Indexing starts at 0, so you input 
-treatment_col=0. In this case, there are only 6c3 = 20 ways to permute the 
-treatment labels, so you should specify "all" permutations be used. ::
-
-    p_val = ha.stats.two_sample_test(data, treatment_col=0, 
-                                     bootstraps=500, permutations='all', 
-                                     random_state=1)
-
-    print('p-value =', p_val)
-
-    #out: p-value = 0.0406
-
-There are a number of parameters that can be used to modify two_sample_test. ::
-
-    ha.stats.two_sample_test(data_array, 
-                            treatment_col, 
-                            compare="means", 
-                            skip=None, 
-                            bootstraps=100, 
-                            permutations=1000, 
-                            kind='weights', 
-                            return_null=False,
-                            random_state=None)
-
-**compare**: The default "means" assumes that you are testing for a difference in means, so it uses the Welch t-statistic. For flexibility, two_sample_test can take a test statistic function as an argument. 
-
-**skip**: indicates the indices of columns that should be skipped in the bootstrapping procedure. 
-
-A simple rule of thumb is that columns should be resampled with replacement only if they were originally sampled with replacement (or effectively sampled with replacement). For example, consider an imaging experiment in which you image several fields of view in a well, which each contain several cells. While you can consider the cells sampled with replacement from the well (there are so many more cells in the population that this assumption is fair), the cells are not sampled with replacement from the field of view (as you are measuring ALL cells in each field of view). The 10% condition is a reasonable rule of thumb here - if your sample represents less than 10% of the population, you can treat it as sampled with replacement.
-
-**bootstraps**: indicates the number of bootstrapped samples to be drawn from data_array. 
-
-Generally, as the number of possible permutations of your data increases, the number of bootstraps should decrease. If the goal of bootstrapping is to include the standard error in the biological samples in the null distribution, only 50-100 bootstraps is sufficient given a large enough set of possible permutations.
-
-**permutations**: indicates the number of permutations of the treatment label PER bootstrapped sample.
-
-Inputting "all" will enumerate all of the possible permutations and iterate through them one by one. This is done using a generator, so the permutations are not stored in memory, but is still excessively time consuming for large datasets. 
-
-**kind**: "weights" or "indexes" or "bayesian" specifies the bootstrapping algorithm. "weights" returns an array the same size as the input array, but with the data reweighted according to the Efron bootstrap procedure. "indexes" uses the same algorithm, but returns a reindexed array. "bayesian" also returns a reweighted array, but the weights are allowed to be any real number rather than just integers.
-
-**return_null**: setting this to True will also return the empirical null distribution as a list.
-
-**seed**: allows you to specify a random seed for reproducibility. 
-
-Many-Sample Hypothesis Tests - Several Hypotheses
--------------------------------------------------
-Researchers may want to perform a series of hypothesis tests to determine 
-whether there are significant differences between some parameter in three 
-or more unrelated groups. This is similar to the goal of one-way ANOVA. To 
-this end, hierarch includes the multi_sample_test function, which performs
-multiple two-sample tests in the vein of post-hoc tests after ANOVA. The 
-researcher can also choose to make a multiple-comparison correction in the 
-form of the Benjamini-Hochberg procedure, which controls for False Discovery
-Rate.
-
-Consider an experiment with four treatment groups. We can simulate a dataset
-as follows. ::
-
-    from hierarch.power import DataSimulator
-    import scipy.stats as stats
-
-    paramlist = [[0, 1, 4, 0], [stats.norm], [stats.norm]]
-    hierarchy = [4, 3, 3]
-
-    datagen = DataSimulator(paramlist, random_state=1)
-    datagen.fit(hierarchy)
-    data = datagen.generate()
-    data    
-
-+---+---+---+----------+
-| 0 | 1 | 2 | 3        |
-+===+===+===+==========+
-| 1 | 1 | 1 | -0.39087 |
-+---+---+---+----------+
-| 1 | 1 | 2 | 0.182674 |
-+---+---+---+----------+
-| 1 | 1 | 3 | -0.13654 |
-+---+---+---+----------+
-| 1 | 2 | 1 | 1.420464 |
-+---+---+---+----------+
-| 1 | 2 | 2 | 0.86134  |
-+---+---+---+----------+
-| 1 | 2 | 3 | 0.529161 |
-+---+---+---+----------+
-| 1 | 3 | 1 | -0.45147 |
-+---+---+---+----------+
-| 1 | 3 | 2 | 0.073245 |
-+---+---+---+----------+
-| 1 | 3 | 3 | 0.338579 |
-+---+---+---+----------+
-| 2 | 1 | 1 | -0.57876 |
-+---+---+---+----------+
-| 2 | 1 | 2 | 0.990907 |
-+---+---+---+----------+
-| 2 | 1 | 3 | 0.703567 |
-+---+---+---+----------+
-| 2 | 2 | 1 | -0.80581 |
-+---+---+---+----------+
-| 2 | 2 | 2 | 0.016343 |
-+---+---+---+----------+
-| 2 | 2 | 3 | 1.730584 |
-+---+---+---+----------+
-| 2 | 3 | 1 | 1.024184 |
-+---+---+---+----------+
-| 2 | 3 | 2 | 1.660018 |
-+---+---+---+----------+
-| 2 | 3 | 3 | 1.663697 |
-+---+---+---+----------+
-| 3 | 1 | 1 | 5.580886 |
-+---+---+---+----------+
-| 3 | 1 | 2 | 2.351026 |
-+---+---+---+----------+
-| 3 | 1 | 3 | 3.085442 |
-+---+---+---+----------+
-| 3 | 2 | 1 | 6.62389  |
-+---+---+---+----------+
-| 3 | 2 | 2 | 5.227821 |
-+---+---+---+----------+
-| 3 | 2 | 3 | 5.244181 |
-+---+---+---+----------+
-| 3 | 3 | 1 | 3.850566 |
-+---+---+---+----------+
-| 3 | 3 | 2 | 2.716497 |
-+---+---+---+----------+
-| 3 | 3 | 3 | 4.532037 |
-+---+---+---+----------+
-| 4 | 1 | 1 | 0.403147 |
-+---+---+---+----------+
-| 4 | 1 | 2 | -0.93322 |
-+---+---+---+----------+
-| 4 | 1 | 3 | -0.38909 |
-+---+---+---+----------+
-| 4 | 2 | 1 | -0.04362 |
-+---+---+---+----------+
-| 4 | 2 | 2 | -0.91633 |
-+---+---+---+----------+
-| 4 | 2 | 3 | -0.06985 |
-+---+---+---+----------+
-| 4 | 3 | 1 | 0.642196 |
-+---+---+---+----------+
-| 4 | 3 | 2 | 0.582299 |
-+---+---+---+----------+
-| 4 | 3 | 3 | 0.040421 |
-+---+---+---+----------+
-
-This dataset has been generated such that treatments 1 and 4 have the same mean, while
-treatment 2 represents a slight difference and treatment 4 represents a large difference.
-There are six total comparisons that can be made, which can be performed automatically
-using multi_sample_test as follows. ::
-
-    multi_sample_test(data, treatment_col=0, hypotheses="all",
-                    correction=None, bootstraps=1000,
-                    permutations="all", random_state=111)
-    
-    array([[2.0, 3.0, 0.0355],
-           [1.0, 3.0, 0.0394],
-           [3.0, 4.0, 0.0407],
-           [2.0, 4.0, 0.1477],
-           [1.0, 2.0, 0.4022],
-           [1.0, 4.0, 0.4559]], dtype=object)
-
-The first two columns indicate the conditions being compared, while the last column indicates
-the uncorrected p-value. Because there are several hypotheses being tested, it is advisable
-to make a multiple comparisons correction. Currently, hierarch can automatically perform the
-Benjamini-Hochberg procedure, which controls False Discovery Rate. By indicating the "fdr"
-correction, the output array has an additional column showing the q-values, or adjusted p-values. ::
-
-    multi_sample_test(data, treatment_col=0, hypotheses="all",
-                    correction='fdr', bootstraps=1000,
-                    permutations="all", random_state=111)
-    array([[2.0, 3.0, 0.0355, 0.0814],
-           [1.0, 3.0, 0.0394, 0.0814],
-           [3.0, 4.0, 0.0407, 0.0814],
-           [2.0, 4.0, 0.1477, 0.22155],
-           [1.0, 2.0, 0.4022, 0.4559],
-           [1.0, 4.0, 0.4559, 0.4559]], dtype=object)
-
-Testing more hypotheses necessarily lowers the p-value required to call a result significant. However,
-we are not always interested in performing every comparison - perhaps condition 2 is a control that all
-other conditions are meant to be compared to. The comparisons of interest can be specified using a list. ::
-
-    tests = [[2.0, 1.0], [2.0, 3.0], [2.0, 4.0]]
-    multi_sample_test(data, treatment_col=0, hypotheses=tests,
-                      correction='fdr', bootstraps=1000,
-                      permutations="all", random_state=222)
-    array([[2.0, 3.0, 0.036, 0.108],
-           [2.0, 4.0, 0.1506, 0.2259],
-           [2.0, 1.0, 0.4036, 0.4036]], dtype=object)
-
-Many-Sample Hypothesis Tests - Single Hypothesis
-------------------------------------------------
-One-way ANOVA and similar tests (like multi_sample_test) are inappropriate when
-you have several samples meant to test a single hypothesis. For example, perhaps
-you have several samples with different concentrations of the same drug treatment.
-In this case, hierarch provides linear_regression_test, which is equivalent to
-performing a hypothesis test on a linear model against the null hypothesis that
-the slope coefficient is equal to 0.
-
-This hypothesis test uses a studentized covariance test statistic - essentially,
-the sample covariance divided by the standard error of the sample covariance. This
-test statistic is approximately normally distributed and in the two-sample case, 
-this test gives the same result as two_sample_test.
-
-First, consider a dataset with two treatment groups, four samples each, and three
-measurements on each sample. ::
-
-    from hierarch.power import DataSimulator
-    import scipy.stats as stats
-
-    paramlist = [[0, 2], [stats.norm], [stats.norm]]
-    hierarchy = [2, 4, 3]
-
-    datagen = DataSimulator(paramlist, random_state=2)
-    datagen.fit(hierarchy)
-    data = datagen.generate()
-    data
-
-+---+---+---+----------+
-| 0 | 1 | 2 | 3        |
-+===+===+===+==========+
-| 1 | 1 | 1 | 0.470264 |
-+---+---+---+----------+
-| 1 | 1 | 2 | -0.36477 |
-+---+---+---+----------+
-| 1 | 1 | 3 | 1.166621 |
-+---+---+---+----------+
-| 1 | 2 | 1 | -0.8333  |
-+---+---+---+----------+
-| 1 | 2 | 2 | -0.85157 |
-+---+---+---+----------+
-| 1 | 2 | 3 | -1.3149  |
-+---+---+---+----------+
-| 1 | 3 | 1 | 0.041895 |
-+---+---+---+----------+
-| 1 | 3 | 2 | -0.51226 |
-+---+---+---+----------+
-| 1 | 3 | 3 | 0.132225 |
-+---+---+---+----------+
-| 1 | 4 | 1 | -3.04865 |
-+---+---+---+----------+
-| 1 | 4 | 2 | -2.31464 |
-+---+---+---+----------+
-| 1 | 4 | 3 | -3.33374 |
-+---+---+---+----------+
-| 2 | 1 | 1 | 4.641172 |
-+---+---+---+----------+
-| 2 | 1 | 2 | 3.987742 |
-+---+---+---+----------+
-| 2 | 1 | 3 | 4.130278 |
-+---+---+---+----------+
-| 2 | 2 | 1 | 3.55467  |
-+---+---+---+----------+
-| 2 | 2 | 2 | 2.133408 |
-+---+---+---+----------+
-| 2 | 2 | 3 | 3.927347 |
-+---+---+---+----------+
-| 2 | 3 | 1 | 3.73128  |
-+---+---+---+----------+
-| 2 | 3 | 2 | 0.036135 |
-+---+---+---+----------+
-| 2 | 3 | 3 | -0.05483 |
-+---+---+---+----------+
-| 2 | 4 | 1 | 1.268975 |
-+---+---+---+----------+
-| 2 | 4 | 2 | 3.615265 |
-+---+---+---+----------+
-| 2 | 4 | 3 | 2.902522 |
-+---+---+---+----------+
-
-Performing linear_regression_test and two_sample_test on this dataset should
-give very similar p-values. ::
-
-    linear_regression_test(data, treatment_col=0,
-                        bootstraps=1000, permutations='all',
-                        random_state=1)
-    0.013714285714285714
-
-    two_sample_test(data, treatment_col=0,
-                    bootstraps=1000, permutations='all',
-                    random_state=1)
-    0.013714285714285714
-
-However, unlike two_sample_test, this test can handle any number of conditions. Consider instead
-a dataset with four treatment conditions that have a linear relationship. ::
-
-    paramlist = [[0, 2/3, 4/3, 2], [stats.norm], [stats.norm]]
-    hierarchy = [4, 2, 3]
-    datagen = DataSimulator(paramlist, random_state=2)
-    datagen.fit(hierarchy)
-    data = datagen.generate()
-    data
-
-+---+---+---+----------+
-| 0 | 1 | 2 | 3        |
-+===+===+===+==========+
-| 1 | 1 | 1 | 0.470264 |
-+---+---+---+----------+
-| 1 | 1 | 2 | -0.36477 |
-+---+---+---+----------+
-| 1 | 1 | 3 | 1.166621 |
-+---+---+---+----------+
-| 1 | 2 | 1 | -0.8333  |
-+---+---+---+----------+
-| 1 | 2 | 2 | -0.85157 |
-+---+---+---+----------+
-| 1 | 2 | 3 | -1.3149  |
-+---+---+---+----------+
-| 2 | 1 | 1 | 0.708561 |
-+---+---+---+----------+
-| 2 | 1 | 2 | 0.154405 |
-+---+---+---+----------+
-| 2 | 1 | 3 | 0.798892 |
-+---+---+---+----------+
-| 2 | 2 | 1 | -2.38199 |
-+---+---+---+----------+
-| 2 | 2 | 2 | -1.64797 |
-+---+---+---+----------+
-| 2 | 2 | 3 | -2.66707 |
-+---+---+---+----------+
-| 3 | 1 | 1 | 3.974506 |
-+---+---+---+----------+
-| 3 | 1 | 2 | 3.321076 |
-+---+---+---+----------+
-| 3 | 1 | 3 | 3.463612 |
-+---+---+---+----------+
-| 3 | 2 | 1 | 2.888003 |
-+---+---+---+----------+
-| 3 | 2 | 2 | 1.466742 |
-+---+---+---+----------+
-| 3 | 2 | 3 | 3.26068  |
-+---+---+---+----------+
-| 4 | 1 | 1 | 3.73128  |
-+---+---+---+----------+
-| 4 | 1 | 2 | 0.036135 |
-+---+---+---+----------+
-| 4 | 1 | 3 | -0.05483 |
-+---+---+---+----------+
-| 4 | 2 | 1 | 1.268975 |
-+---+---+---+----------+
-| 4 | 2 | 2 | 3.615265 |
-+---+---+---+----------+
-| 4 | 2 | 3 | 2.902522 |
-+---+---+---+----------+
-
-For this dataset, there are 8! / (2!^4) = 2,520 total permutations. We will choose a random
-subset of them to compute the p-value. ::
-
-    linear_regression_test(data, treatment_col=0,
-                        bootstraps=100, permutations=1000,
-                        random_state=1)
-    0.00767
-
-Between these three tests, researchers can address a large variety of experimental designs. Unfortunately,
-interaction effects are outside the scope of permutation tests - it is not possible to construct an
-exact test for interaction effects in general. However, an asymptotic test for interaction effects
-may be implemented in the future.
-
-Power Analysis
---------------
-Researchers can also use hierarch to determine the appropriate sample size 
-for a future experiment. hierarch.power provides a class, DataSimulator, 
-to assist in power analyses. DataSimulator is initialized with a list 
-specifying the probability distributions generating the data and an optional 
-random_state for reproducibility. 
-
-In this case, consider an experiment similar to the one above - two treatment 
-conditions, but the sample size at each level of hierarchy is yet to be 
-determined. First, you must posit a data-generating process for the analysis.
-
-Suppose you assume that the column 1 values are normally distributed with 
-mean 0 and variance 1. From past experience, you believe that the column 2 
-values follow a right-tailed distribution, so you choose to model it as a 
-lognormal distribution with a scale parameter of 0.75. Finally, you decide 
-that you want to achieve 80% power for a mean difference equal to one standard 
-deviation. You calculate that the summed standard deviation of the two 
-distributions you specified is 1.525 and input that as a parameter, as well. ::
-
-    from hierarch.power import DataSimulator
-
-    parameters = [[0, 1.525], #difference in means due to treatment
-                [stats.norm, 0, 1], #column 1 distribution - stats.norm(loc=0, scale=1)
-                [stats.lognorm, 0.75]] #column 2 distribution - stats.lognorm(s = 0.75)
-
-    sim = DataSimulator(parameters, random_state=1)
-
-Next, you choose a experimental design to simulate. Perhaps, like above, you 
-decide to start with three samples per treatment condition and three measurements 
-within each sample. Calling the .fit() function will ready the DataSimulator to 
-produce randomly-generated data according to this experimental scheme. ::
-
-    import scipy.stats as stats
-
-    hierarchy = [2, #treatments
-                3, #samples
-                3] #within-sample measurements
-
-    sim.fit(hierarchy)
-
-    By calling the .generate() function, DataSimulator uses the prespecified 
-    parameters to generate a simulated dataset. ::
-
-    print(sim.generate())
-
-+---+---+---+----------+
-| 0 | 1 | 2 | 3        |
-+===+===+===+==========+
-| 1 | 1 | 1 | 1.014087 |
-+---+---+---+----------+
-| 1 | 1 | 2 | 1.891843 |
-+---+---+---+----------+
-| 1 | 1 | 3 | 1.660049 |
-+---+---+---+----------+
-| 1 | 2 | 1 | 2.068442 |
-+---+---+---+----------+
-| 1 | 2 | 2 | 1.843164 |
-+---+---+---+----------+
-| 1 | 2 | 3 | 2.328488 |
-+---+---+---+----------+
-| 1 | 3 | 1 | 0.906038 |
-+---+---+---+----------+
-| 1 | 3 | 2 | 1.215424 |
-+---+---+---+----------+
-| 1 | 3 | 3 | 1.027005 |
-+---+---+---+----------+
-| 2 | 1 | 1 | 1.788798 |
-+---+---+---+----------+
-| 2 | 1 | 2 | 1.252083 |
-+---+---+---+----------+
-| 2 | 1 | 3 | 1.024889 |
-+---+---+---+----------+
-| 2 | 2 | 1 | 2.986665 |
-+---+---+---+----------+
-| 2 | 2 | 2 | 3.254925 |
-+---+---+---+----------+
-| 2 | 2 | 3 | 3.436481 |
-+---+---+---+----------+
-| 2 | 3 | 1 | 2.784636 |
-+---+---+---+----------+
-| 2 | 3 | 2 | 4.610765 |
-+---+---+---+----------+
-| 2 | 3 | 3 | 4.099078 |
-+---+---+---+----------+    
-
-You can use this to set up a simple power analysis. The following 
-code performs a hierarchical permutation test with 50,000 total 
-permutations (though this is overkill in the 2, 3, 3 case) on each 
-of 100 simulated datasets and prints the fraction of them that return 
-a significant result, assuming a p-value cutoff of 0.05. ::
-
-    pvalues = []
-    loops = 100
-    for i in range(loops):
-        data = sim.generate()
-        pvalues.append(ha.stats.two_sample_test(data, 0, bootstraps=500, permutations=100))
-        
-    print(np.less(pvalues, 0.05).sum() / loops) 
-
-    #out: 0.29
-
-The targeted power is 0.8, so you can fit the DataSimulator with a larger sample 
-size. You can run the following code block with different sample sizes until 
-you determine the column 1 sample size that achieves at least 80% power. ::
-
-    sim.fit([2,10,3])
-
-    pvalues = []
-    loops = 100
-    for i in range(loops):
-        data = sim.generate()
-        pvalues.append(ha.stats.two_sample_test(data, 0, bootstraps=500, permutations=100))
-        
-    print(np.less(pvalues, 0.05).sum() / loops)
-
-    #out: 0.81
-
-You note, however, that increasing the number of column 1 samples is much 
-more laborious than increasing the number of column 2 samples. For example, 
-perhaps the column 1 samples represent mice, while column 2 represents 
-multiple measurements of some feature from each mouse's cells. You have 
-posited that the slight majority of your observed variance comes from the 
-column 2 samples - indeed, in biological samples, within-sample variance 
-can be equal to or greater than between-sample variance. After all, that 
-is why we make multiple measurements within the same biological sample! 
-Given that this is a reasonable assumption, perhaps 80% power can be 
-achieved with an experimental design that makes more column 2 measurements. ::
-
-    sim.fit([2,8,30])
-
-    pvalues = []
-    loops = 100
-    for i in range(loops):
-        data = sim.generate()
-        pvalues.append(ha.stats.two_sample_test(data, 0, bootstraps=500, permutations=100))
-        
-    print(np.less(pvalues, 0.05).sum() / loops)
-
-    #out: 0.84
-
-Of course, adding column 2 samples has a much more limited 
-influence on power compared to adding column 1 samples - with infinite 
-column 2 samples, the standard error for the difference of means is 
-still dependent on the variance of the column 1 data-generating process. 
-This is illustrated with an excessive example of 300 column 2 samples 
-per column 1 sample, which shows no improvement in power over using 
-only 30 column 2 samples. ::
-
-    sim.fit([2,8,300])
-
-    pvalues = []
-    loops = 100
-    for i in range(loops):
-        data = sim.generate()
-        pvalues.append(ha.stats.two_sample_test(data, 0, bootstraps=500, permutations=100))
-        
-    print(np.less(pvalues, 0.05).sum() / loops)
-    
-    #out: 0.83
-
-On the other hand, adding only four column 1 samples to each treatment group 
-(rather than 270 to each column 1 sample) brings the power to 97%. 
-
-Finally, to ensure that hierarchical permutation is valid for the posited 
-data-generating process, you can do another power analysis under the null 
-hypothesis - that there is no difference between groups. To compensate for 
-Monte Carlo error, you should increase the number of loops - at 100 loops, 
-the error for an event that happens 5% probability is +/- 2%, but at 
-1000 loops, it is only +/- 0.7%. ::
-
-    parameters = [[0, 0], #no difference in means because we are sampling under the null hypothesis
-                [stats.norm, 0, 1], #column 1 probability distribution  
-                [stats.lognorm, 0.75]] #column 2 probability distribution
-    sim = ha.power.DataSimulator(parameters, random_state=1)
-    sim.fit([2,12,30])
-
-    pvalues = []
-    loops = 1000
-    for i in range(loops):
-        data = sim.generate()
-        pvalues.append(ha.stats.two_sample_test(data, 0, bootstraps=500, permutations=100))
-        
-    print(np.less(pvalues, 0.05).sum() / loops)
-
-    #out: 0.05
-
-Hierarchical permutation experiences no size distortion for this experimental 
-design and is therefore a valid test.  
-
-Note: because these power calculations are subject to Monte Carlo error, 
-so you should consider upping the number of loops if the precise value for 
-power is of extreme importance. In nonclinical settings, however, small-scale 
-power analyses are sufficient and can be a valuable guide for choosing the 
-sample size for your study. 
\ No newline at end of file
diff --git a/hierarch/internal_functions.py b/hierarch/internal_functions.py
index 2bdbb86..60bd396 100644
--- a/hierarch/internal_functions.py
+++ b/hierarch/internal_functions.py
@@ -432,50 +432,6 @@ def visit(head):
         yield visit(head)
 
 
-def preprocess_data(data):
-    """Performs label encoding without overwriting numerical variables.
-
-    Parameters
-    ----------
-    data : 2D array or pandas DataFrame
-        Data to be encoded.
-
-    Returns
-    -------
-    2D array of float64s
-        An array identical to data, but all elements that cannot be cast
-        to np.float64s replaced with integer values.
-    """
-    # don't want to overwrite data
-    if isinstance(data, np.ndarray):
-
-        encoded = data.copy()
-
-    # coerce dataframe to numpy array
-    elif isinstance(data, pd.DataFrame):
-
-        encoded = data.to_numpy()
-
-    for idx, v in enumerate(encoded.T):
-        # attempt to cast array as floats
-        try:
-            encoded = encoded.astype(np.float64)
-            # if we can cast the array as floats, encoding is complete
-            break
-
-        except ValueError:
-            # if we can't, attempt to cast one column as floats
-            try:
-                encoded[:, idx] = encoded[:, idx].astype(np.float64)
-            # if we still can't, encode that column
-            except ValueError:
-                encoded[:, idx] = np.unique(v, return_inverse=True)[1]
-    # stable sort sort the output by row
-    encoded = np.unique(encoded, axis=0)
-
-    return encoded
-
-
 class GroupbyMean:
     """Class for performing groupby reductions on numpy arrays.
 
diff --git a/hierarch/power.py b/hierarch/power.py
index 9bb375f..b3455c3 100644
--- a/hierarch/power.py
+++ b/hierarch/power.py
@@ -12,13 +12,6 @@ class DataSimulator:
     random_state : int or numpy.random.Generator instance, optional
         Seedable for reproducibility, by default None
 
-    Methods
-    -------
-    fit:
-        Fit the class to a hierarchical structure.
-    generate:
-        Generate a simulated dataset.
-
     Examples
     --------
     Each sublist in paramlist can either be an integer or a scipy.stats
diff --git a/hierarch/resampling.py b/hierarch/resampling.py
index 2641d1a..a34745b 100644
--- a/hierarch/resampling.py
+++ b/hierarch/resampling.py
@@ -37,13 +37,6 @@ class Bootstrapper:
         "indexes" generates a set of new indexes for the dataset.
         Mathematically, this is equivalent to demanding integer weights.
 
-    Methods
-    -------
-    fit:
-        Fit the class to a dataset.
-    transform:
-        Generate a bootstrapped sample.
-
     Notes
     -----
     These approaches have different outputs - "weights" and "bayesian"
@@ -213,7 +206,9 @@ class Bootstrapper:
 
     """
 
-    BOOTSTRAP_ALGORITHMS = tuple(["weights", "indexes", "bayesian"])
+    #: ("weights", "indexes", "bayesian) The three possible arguments that
+    # can be provided to the "kind" keyword argument.
+    _BOOTSTRAP_ALGORITHMS = tuple(["weights", "indexes", "bayesian"])
 
     def __init__(self, random_state=None, kind="weights"):
 
@@ -222,7 +217,7 @@ def __init__(self, random_state=None, kind="weights"):
         # this makes it both reproducible and thread-safe enough
         nb_seed = self.random_generator.integers(low=2 ** 32 - 1)
         set_numba_random_state(nb_seed)
-        if kind in self.BOOTSTRAP_ALGORITHMS:
+        if kind in self._BOOTSTRAP_ALGORITHMS:
             self.kind = kind
         else:
             raise KeyError("Invalid 'kind' argument.")
@@ -332,14 +327,7 @@ class Permuter:
     ----------
     random_state : int or numpy.random.Generator instance, optional
         Seedable for reproducibility, by default None
-    
-    Methods
-    -------
-    fit:
-        Fit the class to a dataset.
-    transform:
-        Generate a permuted sample.
-    
+        
     Examples
     --------
     When the column to resample is the first column, Permuter performs an
diff --git a/hierarch/stats.py b/hierarch/stats.py
index d2068de..58b282b 100644
--- a/hierarch/stats.py
+++ b/hierarch/stats.py
@@ -5,7 +5,6 @@
 import pandas as pd
 from hierarch.internal_functions import (
     nb_data_grabber,
-    preprocess_data,
     GroupbyMean,
     _compute_interval,
     bivar_central_moment,
@@ -14,6 +13,50 @@
 from warnings import warn, simplefilter
 
 
+def preprocess_data(data):
+    """Performs label encoding without overwriting numerical variables.
+
+    Parameters
+    ----------
+    data : 2D array or pandas DataFrame
+        Data to be encoded.
+
+    Returns
+    -------
+    2D array of float64s
+        An array identical to data, but all elements that cannot be cast
+        to np.float64s replaced with integer values.
+    """
+    # don't want to overwrite data
+    if isinstance(data, np.ndarray):
+
+        encoded = data.copy()
+
+    # coerce dataframe to numpy array
+    elif isinstance(data, pd.DataFrame):
+
+        encoded = data.to_numpy()
+
+    for idx, v in enumerate(encoded.T):
+        # attempt to cast array as floats
+        try:
+            encoded = encoded.astype(np.float64)
+            # if we can cast the array as floats, encoding is complete
+            break
+
+        except ValueError:
+            # if we can't, attempt to cast one column as floats
+            try:
+                encoded[:, idx] = encoded[:, idx].astype(np.float64)
+            # if we still can't, encode that column
+            except ValueError:
+                encoded[:, idx] = np.unique(v, return_inverse=True)[1]
+    # stable sort sort the output by row
+    encoded = np.unique(encoded, axis=0)
+
+    return encoded
+
+
 @jit(nopython=True, cache=True)
 def studentized_covariance(x, y):
     """Studentized sample covariance between two variables.
diff --git a/setup.py b/setup.py
index 1b0bc43..d2e8f6c 100644
--- a/setup.py
+++ b/setup.py
@@ -5,7 +5,7 @@
 
 setuptools.setup(
     name="hierarch",
-    version="0.4.0",
+    version="1.0.0",
     author="Rishi Kulkarni",
     author_email="rkulk@stanford.edu",
     description="Hierarchical hypothesis testing library",