diff --git a/.github/workflows/python-runtests-basic.yml b/.github/workflows/python-runtests-basic.yml
index 74bb0753..178f01cf 100644
--- a/.github/workflows/python-runtests-basic.yml
+++ b/.github/workflows/python-runtests-basic.yml
@@ -15,7 +15,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: ["3.9", "3.10", "3.11", "3.12"]
+        python-version: ["3.10", "3.11", "3.12"]
 
     steps:
     - uses: actions/checkout@v2
diff --git a/wqio/datacollections.py b/wqio/datacollections.py
index 923aa563..6908a4b8 100644
--- a/wqio/datacollections.py
+++ b/wqio/datacollections.py
@@ -20,10 +20,11 @@
 _Stat = namedtuple("_stat", ["stat", "pvalue"])
 
 
-def _dist_compare(x, y, stat_comp_func):
+def _dist_compare(x, y, stat_comp_func, **test_opts):
     if (len(x) == len(y)) and numpy.equal(x, y).all():
         return _Stat(numpy.nan, numpy.nan)
-    return stat_comp_func(x, y, alternative="two-sided")
+
+    return stat_comp_func(x, y, **test_opts)
 
 
 class DataCollection:
@@ -130,6 +131,8 @@ def __init__(
             **{self.cencol: dataframe[self.qualcol].isin(self.ndval)}
         ).reset_index()
 
+        self.pbarfxn = tqdm if (self.showpbar and tqdm) else utils.misc.no_op
+
     @cache_readonly
     def tidy(self):
         if self.useros:
@@ -323,7 +326,7 @@ def mean(self):
 
     @cache_readonly
     def std_dev(self):
-        return self.generic_stat(numpy.std, statname="std. dev.", use_bootstrap=False)
+        return self.generic_stat(numpy.std, statname="std. dev.", use_bootstrap=False, ddof=1)
 
     def percentile(self, percentile):
         """Return the percentiles (0 - 100) for the data."""
@@ -342,7 +345,7 @@ def logmean(self):
     @cache_readonly
     def logstd_dev(self):
         return self.generic_stat(
-            lambda x, axis=0: numpy.std(numpy.log(x), axis=axis),
+            lambda x, axis=0: numpy.std(numpy.log(x), axis=axis, ddof=1),
             use_bootstrap=False,
             statname="Log-std. dev.",
         )
@@ -359,65 +362,93 @@ def geostd_dev(self):
         geostd.columns.names = ["station", "Geo-std. dev."]
         return geostd
 
-    @cache_readonly
-    def shapiro(self):
+    def shapiro(self, **opts):
+        """
+        Run the Shapiro-Wilk test for normality on the datasets.
+
+        Requires at least 3 observations in each dataset.
+
+        See `scipy.stats.shapiro` for info on kwargs you can pass.
+        """
         return self.generic_stat(
             stats.shapiro,
             use_bootstrap=False,
             has_pvalue=True,
             statname="shapiro",
             filterfxn=lambda x: x.shape[0] > 3,
+            **opts,
         )
 
-    @cache_readonly
-    def shapiro_log(self):
+    def shapiro_log(self, **opts):
+        """
+        Run the Shapiro-Wilk test for normality on log-transformed datasets.
+
+        Requires at least 3 observations in each dataset.
+
+        See `scipy.stats.shapiro` for info on kwargs you can pass.
+        """
         return self.generic_stat(
             lambda x: stats.shapiro(numpy.log(x)),
             use_bootstrap=False,
             has_pvalue=True,
             filterfxn=lambda x: x.shape[0] > 3,
             statname="log-shapiro",
+            **opts,
         )
 
-    @cache_readonly
-    def lilliefors(self):
+    def lilliefors(self, **opts):
+        """
+        Run the Lilliefors test for normality on the datasets.
+
+        Requires at least 3 observations in each dataset.
+
+        See `statsmodels.api.stats.lilliefors` for info on kwargs you can pass.
+        """
         return self.generic_stat(
             sm.stats.lilliefors,
             use_bootstrap=False,
             has_pvalue=True,
             statname="lilliefors",
+            **opts,
         )
 
-    @cache_readonly
-    def lilliefors_log(self):
+    def lilliefors_log(self, **opts):
+        """
+        Run the Lilliefors test for normality on the log-transformed datasets.
+
+        Requires at least 3 observations in each dataset.
+
+        See `statsmodels.api.stats.lilliefors` for info on kwargs you can pass.
+        """
         return self.generic_stat(
             lambda x: sm.stats.lilliefors(numpy.log(x)),
             use_bootstrap=False,
             has_pvalue=True,
             statname="log-lilliefors",
+            **opts,
         )
 
-    @cache_readonly
-    def anderson_darling(self):
+    def anderson_darling(self, **opts):
         raise NotImplementedError
         return self.generic_stat(
             utils.anderson_darling,
             use_bootstrap=False,
             has_pvalue=True,
             statname="anderson-darling",
+            **opts,
         )
 
-    @cache_readonly
-    def anderson_darling_log(self):
+    def anderson_darling_log(self, **opts):
         raise NotImplementedError
         return self.generic_stat(
             lambda x: utils.anderson_darling(numpy.log(x)),
             use_bootstrap=False,
             has_pvalue=True,
             statname="log-anderson-darling",
+            **opts,
         )
 
-    def comparison_stat(self, statfxn, statname=None, paired=False, **statopts):
+    def comparison_stat_twoway(self, statfxn, statname=None, paired=False, **statopts):
         """Generic function to apply comparative hypothesis tests to
         the groups of the ``DataCollection``.
 
@@ -430,7 +461,7 @@ def comparison_stat(self, statfxn, statname=None, paired=False, **statopts):
         statname : string, optional
             Name of the statistic. Included as a column name in the
             final dataframe.
-        apired : bool, optional
+        paired : bool, optional
             Set to ``True`` if ``statfxn`` requires paired data.
         **statopts : optional kwargs
             Additional keyword arguments that will be passed to
@@ -455,9 +486,9 @@ def comparison_stat(self, statfxn, statname=None, paired=False, **statopts):
         >>> dc = DataCollection(df, rescol='res', qualcol='qual',
         ...                     stationcol='loc', paramcol='param',
         ...                     ndval='<')
-        >>> mwht = dc.comparison_stat(stats.mannwhitneyu,
-        ...                           statname='mann_whitney',
-        ...                           alternative='two-sided')
+        >>> mwht = dc.comparison_stat_twoway(stats.mannwhitneyu,
+        ...                                  statname='mann_whitney',
+        ...                                  alternative='two-sided')
 
         """
 
@@ -475,46 +506,134 @@ def comparison_stat(self, statfxn, statname=None, paired=False, **statopts):
         index_cols = meta_columns + station_columns
 
         results = generator(
-            data, meta_columns, self.stationcol, rescol, statfxn, statname=statname, **statopts
+            data,
+            meta_columns,
+            self.stationcol,
+            rescol,
+            statfxn,
+            statname=statname,
+            pbarfxn=self.pbarfxn,
+            **statopts,
         )
         return pandas.DataFrame.from_records(results).set_index(index_cols)
 
-    @cache_readonly
-    def mann_whitney(self):
-        return self.comparison_stat(
-            partial(_dist_compare, stat_comp_func=stats.mannwhitneyu),
+    def comparison_stat_allway(self, statfxn, statname, control=None, **statopts):
+        results = utils.numutils._group_comp_stat_generator(
+            self.tidy,
+            self.groupcols_comparison,
+            self.stationcol,
+            self.rescol,
+            statfxn,
+            statname=statname,
+            control=control,
+            pbarfxn=self.pbarfxn,
+            **statopts,
+        )
+        return pandas.DataFrame.from_records(results).set_index(self.groupcols_comparison)
+
+    def mann_whitney(self, **opts):
+        """
+        Run the Mann-Whitney U test across datasets.
+
+        See `scipy.stats.mannwhitneyu` for available options.
+        """
+        return self.comparison_stat_twoway(
+            partial(_dist_compare, stat_comp_func=stats.mannwhitneyu, **opts),
             statname="mann_whitney",
         )
 
-    @cache_readonly
-    def ranksums(self):
-        return self.comparison_stat(stats.ranksums, statname="rank_sums")
+    def ranksums(self, **opts):
+        """
+        Run the unpaired Wilcoxon rank-sum test across datasets.
 
-    @cache_readonly
-    def t_test(self):
-        return self.comparison_stat(stats.ttest_ind, statname="t_test", equal_var=False)
+        See `scipy.stats.ranksums` for available options.
+        """
+        return self.comparison_stat_twoway(stats.ranksums, statname="rank_sums", **opts)
 
-    @cache_readonly
-    def levene(self):
-        return self.comparison_stat(stats.levene, statname="levene", center="median")
+    def t_test(self, **opts):
+        """
+        Run the T-test for independent scores.
 
-    @cache_readonly
-    def wilcoxon(self):
-        return self.comparison_stat(
+        See `scipy.stats.ttest_ind` for available options.
+        """
+        return self.comparison_stat_twoway(stats.ttest_ind, statname="t_test", **opts)
+
+    def levene(self, **opts):
+        """
+        Run the Levene test for equal variances
+
+        See `scipy.stats.levene` for available options.
+        """
+        return self.comparison_stat_twoway(stats.levene, statname="levene", **opts)
+
+    def wilcoxon(self, **opts):
+        """
+        Run the paired Wilcoxon rank-sum test across paired dataset.
+
+        See `scipy.stats.wilcoxon` for available options.
+        """
+        return self.comparison_stat_twoway(
             partial(_dist_compare, stat_comp_func=stats.wilcoxon),
             statname="wilcoxon",
             paired=True,
+            **opts,
         )
 
-    @cache_readonly
-    def kendall(self):
-        return self.comparison_stat(stats.kendalltau, statname="kendalltau", paired=True)
+    def kendall(self, **opts):
+        """
+        Run the paired Kendall-tau test across paired dataset.
 
-    @cache_readonly
-    def spearman(self):
-        return self.comparison_stat(stats.spearmanr, statname="spearmanrho", paired=True)
+        See `scipy.stats.kendalltau` for available options.
+        """
+        return self.comparison_stat_twoway(
+            stats.kendalltau, statname="kendalltau", paired=True, **opts
+        )
+
+    def spearman(self, **opts):
+        """
+        Run the paired Spearman-rho test across paired dataset.
+
+        See `scipy.stats.spearmanr` for available options.
+        """
+        return self.comparison_stat_twoway(
+            stats.spearmanr, statname="spearmanrho", paired=True, **opts
+        )
+
+    def kruskal_wallis(self, **opts):
+        """
+        Run the paired Kruskal-Wallos H-test across paired dataset.
+
+        See `scipy.stats.kruskal` for available options.
+        """
+        return self.comparison_stat_allway(stats.kruskal, statname="K-W H", control=None, **opts)
+
+    def f_test(self, **opts):
+        """
+        One-way ANOVA test across datasets
+
+        See `scipy.stats.f_oneway` for available options.
+        """
+        return self.comparison_stat_allway(stats.f_oneway, statname="f-test", control=None, **opts)
+
+    def tukey_hsd(self) -> tuple[pandas.DataFrame, pandas.DataFrame]:
+        """
+        Tukey Honestly Significant Difference (HSD) test across stations + other groups
+        for each parameter
+        """
+        hsd = utils.tukey_hsd(
+            self.tidy, self.rescol, self.stationcol, self.paramcol, *self.othergroups
+        )
+        scores = utils.process_tukey_hsd_scores(hsd, self.stationcol, self.paramcol)
+        return hsd, scores
+
+    def dunn(self):
+        """
+        Dunn test across the different stations for each pollutant
+        """
+        return self.tidy.groupby(by=[self.paramcol]).apply(
+            lambda g: utils.dunn_test(g, self.rescol, self.stationcol, *self.othergroups).scores
+        )
 
-    @cache_readonly
     def theilslopes(self, logs=False):
         raise NotImplementedError
 
diff --git a/wqio/features.py b/wqio/features.py
index d727d66e..5272ffc5 100644
--- a/wqio/features.py
+++ b/wqio/features.py
@@ -1599,20 +1599,18 @@ def scatterplot(
                     raise ValueError(f"`eqn_pos` must be on of {list.positions.keys()}")
                 # annotate axes with stats
 
+                slope = utils.sig_figs(modelres.params[1], n=3)
+                icept = utils.sig_figs(modelres.params[0], n=3)
                 ax.annotate(
-                    r"$\log(y) = {} \, \log(x) + {}$".format(
-                        utils.sig_figs(modelres.params[1], n=3),
-                        utils.sig_figs(modelres.params[0], n=3),
-                    ),
+                    rf"$\log(y) = {slope} \, \log(x) + {icept}$",
                     (txt_x, txt_y),
                     xycoords="axes fraction",
                 )
 
+                slope_pval = utils.process_p_vals(modelres.pvalues[1])
+                icept_pval = utils.process_p_vals(modelres.pvalues[0])
                 ax.annotate(
-                    "Slope p-value: {}\nIntercept p-value: {}".format(
-                        utils.process_p_vals(modelres.pvalues[1]),
-                        utils.process_p_vals(modelres.pvalues[0]),
-                    ),
+                    f"Slope p-value: {slope_pval}\nIntercept p-value: {icept_pval}",
                     (txt_x, txt_y - vert_offset),
                     xycoords="axes fraction",
                 )
diff --git a/wqio/tests/__init__.py b/wqio/tests/__init__.py
index 179885ce..8018b58f 100644
--- a/wqio/tests/__init__.py
+++ b/wqio/tests/__init__.py
@@ -1,6 +1,5 @@
 import warnings
-
-from pkg_resources import resource_filename
+from importlib import resources
 
 from wqio.tests.helpers import requires
 
@@ -12,7 +11,7 @@
 
 @requires(pytest, "pytest")
 def test(*args):
-    options = [resource_filename("wqio", "")]
+    options = [str(resources.files("wqio"))]
     options.extend(list(args))
     return pytest.main(options)
 
diff --git a/wqio/tests/_data/sed.pkl b/wqio/tests/_data/sed.pkl
new file mode 100644
index 00000000..b17859de
Binary files /dev/null and b/wqio/tests/_data/sed.pkl differ
diff --git a/wqio/tests/_data/wq.pkl b/wqio/tests/_data/wq.pkl
new file mode 100644
index 00000000..677d3198
Binary files /dev/null and b/wqio/tests/_data/wq.pkl differ
diff --git a/wqio/tests/helpers.py b/wqio/tests/helpers.py
index 177c1a3d..8587ffd2 100644
--- a/wqio/tests/helpers.py
+++ b/wqio/tests/helpers.py
@@ -6,13 +6,13 @@
 from collections import namedtuple
 from contextlib import contextmanager
 from functools import wraps
+from importlib import resources
 from io import StringIO
 
 import numpy
 import pandas
 import pytest
 from packaging.version import Version
-from pkg_resources import resource_filename
 
 
 def get_img_tolerance():
@@ -154,8 +154,21 @@ def comp_statfxn(x, y):
     return stat(result, result * 0.25)
 
 
+def group_statfxn(*x):
+    stat = namedtuple("teststat", ("statistic", "pvalue"))
+    result = max([max(_) for _ in x])
+    return stat(result, 0.25)
+
+
+def group_statfxn_with_control(*x, control):
+    stat = namedtuple("teststat", ("statistic", "pvalue"))
+    x = [*x, control]
+    result = max([max(_) for _ in x])
+    return stat(result, 0.25)
+
+
 def test_data_path(filename):
-    path = resource_filename("wqio.tests._data", filename)
+    path = resources.files("wqio.tests._data").joinpath(filename)
     return path
 
 
diff --git a/wqio/tests/test_datacollections.py b/wqio/tests/test_datacollections.py
index 19becdb7..7fbcdf14 100644
--- a/wqio/tests/test_datacollections.py
+++ b/wqio/tests/test_datacollections.py
@@ -59,6 +59,26 @@ def dc():
     return dc
 
 
+@pytest.fixture
+def dc_small():
+    df = helpers.make_dc_data_complex()
+    dc = DataCollection(
+        df,
+        rescol="res",
+        qualcol="qual",
+        stationcol="loc",
+        paramcol="param",
+        ndval="<",
+        othergroups=None,
+        pairgroups=["state", "bmp"],
+        useros=True,
+        filterfxn=lambda g: g.name[1] in ["A", "B", "C"],
+        bsiter=10000,
+    )
+
+    return dc
+
+
 @pytest.fixture
 def dc_noNDs():
     df = helpers.make_dc_data_complex()
@@ -196,12 +216,12 @@ def test_std_dev(dc):
         station,Inflow,Outflow,Reference
         result,std. dev.,std. dev.,std. dev.
         param,,,
-        A,3.58649,8.719371,5.527633
-        B,12.360099,13.60243,10.759285
-        C,0.353755,1.691208,0.493325
-        D,4.811938,2.849393,2.248178
-        E,2.55038,1.096698,2.789238
-        F,34.447565,5.361033,3.398367
+        A,3.675059,8.92456,5.671232
+        B,12.625938,13.922531,11.054115
+        C,0.361364,1.727582,0.503498
+        D,4.915433,2.90815,2.303697
+        E,2.620266,1.132665,2.861698
+        F,35.298251,5.476337,3.502956
     """
     check_stat(known_csv, dc.std_dev)
 
@@ -260,12 +280,12 @@ def test_logstd_dev(dc):
         station,Inflow,Outflow,Reference
         result,Log-std. dev.,Log-std. dev.,Log-std. dev.
         param,,,
-        A,1.374026,1.343662,1.225352
-        B,1.430381,2.07646,1.662001
-        C,0.818504,1.263631,1.057177
-        D,1.530871,1.187246,1.277927
-        E,1.264403,1.121038,1.474431
-        F,2.324063,1.516331,1.701596
+        A,1.407957,1.375282,1.257185
+        B,1.461146,2.125324,1.707543
+        C,0.836108,1.290809,1.078977
+        D,1.563796,1.211728,1.309485
+        E,1.29905,1.157803,1.512734
+        F,2.381456,1.548944,1.753965
     """
     check_stat(known_csv, dc.logstd_dev)
 
@@ -292,12 +312,12 @@ def test_geostd_dev(dc):
         station,Inflow,Outflow,Reference
         Geo-std. dev.,Log-std. dev.,Log-std. dev.,Log-std. dev.
         param,,,
-        A,3.951225,3.833055,3.405365
-        B,4.180294,7.976181,5.269843
-        C,2.267105,3.538244,2.878234
-        D,4.622199,3.278041,3.589191
-        E,3.540977,3.068036,4.368548
-        F,10.217099,4.55548,5.48269
+        A,4.087597,3.956193,3.515511
+        B,4.310897,8.375613,5.515395
+        C,2.30737,3.635725,2.941668
+        D,4.776921,3.359284,3.704266
+        E,3.665813,3.182932,4.539124
+        F,10.820642,4.706498,5.777465
     """
     check_stat(known_csv, dc.geostd_dev)
 
@@ -315,7 +335,7 @@ def test_shapiro(dc):
         E,1.7e-05,0.654137,0.004896,0.818813,0.000165,0.74917
         F,0.0,0.292916,2e-06,0.634671,0.000167,0.713968
     """
-    check_stat(known_csv, dc.shapiro)
+    check_stat(known_csv, dc.shapiro())
 
 
 @helpers.seed
@@ -331,7 +351,7 @@ def test_shapiro_log(dc):
         E,0.955088913,0.479993254,0.95211035,0.523841977,0.963425279,0.61430341
         F,0.97542423,0.847370088,0.982230783,0.933124721,0.966197193,0.749036908
     """
-    check_stat(known_csv, dc.shapiro_log)
+    check_stat(known_csv, dc.shapiro_log())
 
 
 @helpers.seed
@@ -347,7 +367,7 @@ def test_lilliefors(dc):
         E,0.341398,0.001,0.239314,0.016156,0.233773,0.005575
         F,0.419545,0.001,0.331315,0.001,0.284249,0.001
     """
-    check_stat(known_csv, dc.lilliefors)
+    check_stat(known_csv, dc.lilliefors())
 
 
 @helpers.seed
@@ -363,19 +383,19 @@ def test_lilliefors_log(dc):
         E,0.135066,0.471868,0.145523,0.477979,0.091649,0.928608
         F,0.144208,0.306945,0.084633,0.927419,0.085869,0.980029
     """
-    check_stat(known_csv, dc.lilliefors_log)
+    check_stat(known_csv, dc.lilliefors_log())
 
 
 @helpers.seed
 def test_anderson_darling(dc):
     with helpers.raises(NotImplementedError):
-        _ = dc.anderson_darling
+        _ = dc.anderson_darling()
 
 
 @helpers.seed
 def test_anderson_darling_log(dc):
     with helpers.raises(NotImplementedError):
-        _ = dc.anderson_darling_log
+        _ = dc.anderson_darling_log()
 
 
 @helpers.seed
@@ -403,35 +423,35 @@ def test_mann_whitney(dc):
         F,Outflow,303.0,,236.0,0.2505911218,,0.4045186043
         F,Reference,185.0,172.0,,0.8601783903,0.4045186043
     """
-    check_stat(known_csv, dc.mann_whitney, comp=True)
+    check_stat(known_csv, dc.mann_whitney(), comp=True)
 
 
 @helpers.seed
 def test_t_test(dc):
     known_csv = """\
-        ,,pvalue,pvalue,pvalue,t_test,t_test,t_test
-        loc_2,,Inflow,Outflow,Reference,Inflow,Outflow,Reference
-        param,loc_1,,,,,,
-        A,Inflow,,0.2178424157,0.4563196599,,-1.2604458127,-0.7539785777
-        A,Outflow,0.2178424157,,0.5240147979,1.2604458127,,0.643450194
-        A,Reference,0.4563196599,0.5240147979,,0.7539785777,-0.643450194,
-        B,Inflow,,0.8430007638,0.3898358794,,0.1992705833,0.869235357
-        B,Outflow,0.8430007638,,0.5491097882,-0.1992705833,,0.6043850808
-        B,Reference,0.3898358794,0.5491097882,,-0.869235357,-0.6043850808,
-        C,Inflow,,0.1847386316,0.8191392537,,-1.3639360123,-0.2300373632
-        C,Outflow,0.1847386316,,0.2179907667,1.3639360123,,1.2615982727
-        C,Reference,0.8191392537,0.2179907667,,0.2300373632,-1.2615982727,
-        D,Inflow,,0.5484265023,0.344783812,,0.6056706932,0.9582600001
-        D,Outflow,0.5484265023,,0.6299742693,-0.6056706932,,0.4851636024
-        D,Reference,0.344783812,0.6299742693,,-0.9582600001,-0.4851636024,
-        E,Inflow,,0.2304569921,0.6770414622,,1.2287029977,-0.4198288251
-        E,Outflow,0.2304569921,,0.1023435465,-1.2287029977,,-1.6935358498
-        E,Reference,0.6770414622,0.1023435465,,0.4198288251,1.6935358498,
-        F,Inflow,,0.422008391,0.3549979666,,0.8190789273,0.9463539528
-        F,Outflow,0.422008391,,0.4988994144,-0.8190789273,,0.6826435968
-        F,Reference,0.3549979666,0.4988994144,,-0.9463539528,-0.6826435968
+    ,,t_test,t_test,t_test,pvalue,pvalue,pvalue
+    loc_2,,Inflow,Outflow,Reference,Inflow,Outflow,Reference
+    param,loc_1,,,,,,
+    A,Inflow,,-1.239311,-0.761726,,0.222278,0.450806
+    A,Outflow,1.239311,,0.630254,0.222278,,0.532113
+    A,Reference,0.761726,-0.630254,,0.450806,0.532113,
+    B,Inflow,,0.200136,0.855661,,0.842296,0.397158
+    B,Outflow,-0.200136,,0.594193,0.842296,,0.555814
+    B,Reference,-0.855661,-0.594193,,0.397158,0.555814,
+    C,Inflow,,-1.363936,-0.228508,,0.179225,0.820243
+    C,Outflow,1.363936,,1.283982,0.179225,,0.205442
+    C,Reference,0.228508,-1.283982,,0.820243,0.205442,
+    D,Inflow,,0.61178,0.917349,,0.543631,0.364076
+    D,Outflow,-0.61178,,0.475391,0.543631,,0.63686
+    D,Reference,-0.917349,-0.475391,,0.364076,0.63686,
+    E,Inflow,,1.156605,-0.418858,,0.255738,0.677741
+    E,Outflow,-1.156605,,-1.558039,0.255738,,0.128485
+    E,Reference,0.418858,1.558039,,0.677741,0.128485,
+    F,Inflow,,0.874261,0.851029,,0.386833,0.400379
+    F,Outflow,-0.874261,,0.63432,0.386833,,0.529575
+    F,Reference,-0.851029,-0.63432,,0.400379,0.529575,
     """
-    check_stat(known_csv, dc.t_test, comp=True)
+    check_stat(known_csv, dc.t_test(), comp=True)
 
 
 @helpers.seed
@@ -459,7 +479,7 @@ def test_levene(dc):
         F,Outflow,0.841984453,,0.25881357,0.363948105,,0.613802541
         F,Reference,0.734809611,0.25881357,,0.396999375,0.613802541,
     """
-    check_stat(known_csv, dc.levene, comp=True)
+    check_stat(known_csv, dc.levene(), comp=True)
 
 
 @helpers.seed
@@ -488,7 +508,7 @@ def test_wilcoxon(dc):
         F,Reference,22.0,28.0,,0.952765,0.656642,
     """
     with pytest.warns(UserWarning):
-        check_stat(known_csv, dc.wilcoxon, comp=True)
+        check_stat(known_csv, dc.wilcoxon(), comp=True)
 
 
 @helpers.seed
@@ -516,7 +536,7 @@ def test_ranksums(dc):
         F,Outflow,0.2459307,,0.3971011,1.1602902,,0.8468098
         F,Reference,0.8486619,0.3971011,,0.190826,-0.8468098,
     """
-    check_stat(known_csv, dc.ranksums, comp=True)
+    check_stat(known_csv, dc.ranksums(), comp=True)
 
 
 @helpers.seed
@@ -545,7 +565,7 @@ def test_kendall(dc):
         F,Outflow,0.0,,0.388889,1.0,,0.063
         F,Reference,0.07231,0.388889,,0.763851,0.063,
     """
-    check_stat(known_csv, dc.kendall, comp=True)
+    check_stat(known_csv, dc.kendall(), comp=True)
 
 
 @helpers.seed
@@ -573,13 +593,69 @@ def test_spearman(dc):
         F,Outflow,0.9645303744,,0.0560590794,-0.0106277141,,0.5028571429
         F,Reference,0.6759971848,0.0560590794,,0.1348767061,0.5028571429
     """
-    check_stat(known_csv, dc.spearman, comp=True)
+    check_stat(known_csv, dc.spearman(), comp=True)
+
+
+@helpers.seed
+def test_kruskal_wallis(dc):
+    result = dc.kruskal_wallis()
+    _expected = [
+        {"K-W H": 1.798578, "pvalue": 0.406859, "param": "A"},
+        {"K-W H": 5.421338, "pvalue": 0.066492, "param": "B"},
+        {"K-W H": 0.29261, "pvalue": 0.863894, "param": "C"},
+        {"K-W H": 0.39957, "pvalue": 0.818907, "param": "D"},
+        {"K-W H": 0.585441, "pvalue": 0.746231, "param": "E"},
+        {"K-W H": 1.483048, "pvalue": 0.476387, "param": "F"},
+    ]
+    expected = pandas.DataFrame(_expected).set_index("param")
+    pandas.testing.assert_frame_equal(result, expected)
+
+
+@helpers.seed
+def test_f_test(dc):
+    result = dc.f_test()
+    _expected = [
+        {"f-test": 0.861339, "pvalue": 0.427754, "param": "A"},
+        {"f-test": 0.343445, "pvalue": 0.710663, "param": "B"},
+        {"f-test": 1.653142, "pvalue": 0.198833, "param": "C"},
+        {"f-test": 0.526782, "pvalue": 0.592927, "param": "D"},
+        {"f-test": 1.114655, "pvalue": 0.335738, "param": "E"},
+        {"f-test": 0.738629, "pvalue": 0.482134, "param": "F"},
+    ]
+    expected = pandas.DataFrame(_expected).set_index("param")
+    pandas.testing.assert_frame_equal(result, expected)
+
+
+def test_tukey_hsd_smoke_test(dc):
+    hsd, scores = dc.tukey_hsd()
+    assert isinstance(hsd, pandas.DataFrame)
+    assert isinstance(scores, pandas.DataFrame)
+
+    assert hsd.index.names == ["loc 1", "loc 2", "param"]
+    assert hsd.columns.tolist() == [
+        "HSD Stat",
+        "p-value",
+        "CI-Low",
+        "CI-High",
+        "is_diff",
+        "sign_of_diff",
+        "score",
+    ]
+
+    assert scores.index.names == ["param"]
+    assert scores.columns.tolist() == ["Inflow", "Outflow", "Reference"]
+
+
+def test_dunn(dc):
+    dunnres = dc.dunn()
+    assert dunnres.index.tolist() == list("ABCDEF")
+    assert dunnres.columns.tolist() == ["Inflow", "Outflow", "Reference"]
 
 
 @helpers.seed
 def test_theilslopes(dc):
     with helpers.raises(NotImplementedError):
-        _ = dc.theilslopes
+        _ = dc.theilslopes()
 
 
 def test_inventory(dc):
@@ -765,7 +841,7 @@ def test_selectDatasets(dc):
 def test_dist_compare_wrapper(x, all_same, func):
     y = [5, 5, 5, 5, 5]
     with mock.patch.object(stats, func.__name__) as _test:
-        result = _dist_compare(x, y, _test)
+        result = _dist_compare(x, y, _test, alternative="two-sided")
         if all_same:
             assert numpy.isnan(result.stat)
             assert numpy.isnan(result.pvalue)
diff --git a/wqio/tests/utils_tests/test_numutils.py b/wqio/tests/utils_tests/test_numutils.py
index b7ae2275..82ae2740 100644
--- a/wqio/tests/utils_tests/test_numutils.py
+++ b/wqio/tests/utils_tests/test_numutils.py
@@ -2,6 +2,7 @@
 from collections import namedtuple
 from io import StringIO
 from textwrap import dedent
+from typing import Any, Literal
 
 import numpy
 import numpy.testing as nptest
@@ -9,6 +10,8 @@
 import pandas.testing as pdtest
 import pytest
 import statsmodels.api as sm
+from numpy._typing._array_like import NDArray
+from pandas import DataFrame
 from scipy import stats
 
 from wqio.tests import helpers
@@ -92,7 +95,7 @@ def test_process_p_vals(fxn, pval, expected, error_to_raise):
         (1.01, (None, None), ValueError),
     ],
 )
-def test_translate_p_vals(pval, expected, as_emoji, error_to_raise):
+def test_translate_p_vals(pval, expected, as_emoji: bool, error_to_raise):
     with helpers.raises(error_to_raise):
         result = numutils.translate_p_vals(pval, as_emoji=as_emoji)
         assert result == expected[as_emoji]
@@ -125,7 +128,7 @@ def test_anderson_darling():
 
 
 @pytest.mark.parametrize("which", ["good", "bad"])
-def test_processAndersonDarlingResults(which):
+def test_processAndersonDarlingResults(which: Literal["good"] | Literal["bad"]):
     fieldnames = ["statistic", "critical_values", "significance_level"]
     AndersonResult = namedtuple("AndersonResult", fieldnames)
     ARs = {
@@ -215,7 +218,7 @@ def units_norm_data():
     return raw, expected
 
 
-def test_normalize_units(units_norm_data):
+def test_normalize_units(units_norm_data: tuple[DataFrame, DataFrame]):
     unitsmap = {"ug/L": 1e-6, "mg/L": 1e-3, "g/L": 1e0}
 
     targetunits = {"Lead, Total": "ug/L", "Cadmium, Total": "mg/L"}
@@ -226,7 +229,7 @@ def test_normalize_units(units_norm_data):
     pdtest.assert_frame_equal(result, expected)
 
 
-def test_normalize_units_bad_targetunits(units_norm_data):
+def test_normalize_units_bad_targetunits(units_norm_data: tuple[DataFrame, DataFrame]):
     unitsmap = {"ug/L": 1e-6, "mg/L": 1e-3, "g/L": 1e0}
 
     targetunits = {"Lead, Total": "ug/L"}
@@ -243,7 +246,7 @@ def test_normalize_units_bad_targetunits(units_norm_data):
         )
 
 
-def test_normalize_units_bad_normalization(units_norm_data):
+def test_normalize_units_bad_normalization(units_norm_data: tuple[DataFrame, DataFrame]):
     unitsmap = {"mg/L": 1e-3, "g/L": 1e0}
 
     targetunits = {"Lead, Total": "ug/L", "Cadmium, Total": "mg/L"}
@@ -260,7 +263,7 @@ def test_normalize_units_bad_normalization(units_norm_data):
         )
 
 
-def test_normalize_units_bad_conversion(units_norm_data):
+def test_normalize_units_bad_conversion(units_norm_data: tuple[DataFrame, DataFrame]):
     unitsmap = {"ug/L": 1e-6, "mg/L": 1e-3, "g/L": 1e0}
 
     targetunits = {"Lead, Total": "ng/L", "Cadmium, Total": "mg/L"}
@@ -292,7 +295,7 @@ def test_test_pH2concentration(pH, expected, error):
 
 @helpers.seed
 @pytest.mark.parametrize("error", [None, ValueError])
-def test_compute_theilslope_default(error):
+def test_compute_theilslope_default(error: types.NoneType | type[ValueError]):
     with helpers.raises(error):
         y = helpers.getTestROSData()["res"].values
         x = numpy.arange(len(y) - 1) if error else None
@@ -443,7 +446,7 @@ def fit_data():
         (None, "junk", ValueError),
     ],
 )
-def test_fit_line(fit_data, fitlogs, fitprobs, error):
+def test_fit_line(fit_data: dict[str, NDArray[Any]], fitlogs, fitprobs, error):
     xy = {
         (None, None): (fit_data["zscores"], fit_data["data"]),
         ("y", None): (fit_data["zscores"], fit_data["data"]),
@@ -483,13 +486,13 @@ def test_fit_line(fit_data, fitlogs, fitprobs, error):
         assert isinstance(res, sm.regression.linear_model.RegressionResultsWrapper)
 
 
-def test_fit_line_through_origin(fit_data):
+def test_fit_line_through_origin(fit_data: dict[str, NDArray[Any]]):
     x, y = fit_data["zscores"], fit_data["data"]
     x_, y_, res = numutils.fit_line(x, y, through_origin=True)
     assert res.params[0] == 0
 
 
-def test_fit_line_with_xhat(fit_data):
+def test_fit_line_with_xhat(fit_data: dict[str, NDArray[Any]]):
     x, y = fit_data["zscores"], fit_data["data"]
     x_, y_, res = numutils.fit_line(x, y, xhat=[-2, -1, 0, 1, 2])
     expected = [-0.566018, 4.774419, 10.114857, 15.455295, 20.795733]
@@ -685,6 +688,50 @@ def test__comp_stat_generator_single_group_col():
     )
 
 
+@pytest.mark.parametrize(
+    ("control", "statfxn"),
+    [(None, helpers.group_statfxn), ("Reference", helpers.group_statfxn_with_control)],
+)
+def test__group_comp_stat_generator(control, statfxn):
+    df = helpers.make_dc_data_complex().reset_index().loc[lambda df: df["state"].isin(["CA", "OR"])]
+    gen = numutils._group_comp_stat_generator(
+        df, ["bmp", "state"], "loc", "res", statfxn, statname="max_of_max", control=control
+    )
+
+    assert isinstance(gen, types.GeneratorType)
+    result = pandas.DataFrame(gen).sort_values(by=["bmp", "state"]).reset_index(drop=True)
+
+    expected_vals = [
+        10.50949742,
+        14.77713121,
+        23.29366747,
+        25.31733815,
+        15.0478646,
+        163.01000855,
+        81.9363735,
+        12.89370847,
+        16.76919938,
+        5.25222615,
+        8.75794985,
+        24.71916248,
+        13.87664484,
+        15.05442994,
+    ]
+    expected = (
+        pandas.DataFrame(
+            {
+                "bmp": list(map(str, range(1, 8))) * 2,
+                "state": (["CA"] * 7) + (["OR"] * 7),
+                "max_of_max": expected_vals,
+                "pvalue": [0.25] * 14,
+            }
+        )
+        .sort_values(by=["bmp", "state"])
+        .reset_index(drop=True)
+    )
+    pandas.testing.assert_frame_equal(result, expected, rtol=0.01)
+
+
 def test__paired_stat_generator():
     df = helpers.make_dc_data_complex().unstack(level="loc")
     gen = numutils._paired_stat_generator(df, ["param"], "loc", "res", helpers.comp_statfxn)
@@ -755,3 +802,122 @@ def test_remove_outliers():
     x = numpy.random.normal(0, 4, size=37)
 
     assert numutils.remove_outliers(x).shape == expected_shape
+
+
+def test_tukey_hsd_functions():
+    expected_records = [
+        {
+            "chemical_name": "Copper",
+            "Loc_0": -2.0,
+            "Loc_1": 6.0,
+            "Loc_2": -2.0,
+            "Loc_3": -4.0,
+            "Loc_4": 3.0,
+            "Loc_5": -1.0,
+            "Loc_6": 0.0,
+        },
+        {
+            "chemical_name": "Di(2-ethylhexyl)phthalate",
+            "Loc_0": 3.0,
+            "Loc_1": 5.0,
+            "Loc_2": -2.0,
+            "Loc_3": -2.0,
+            "Loc_4": -2.0,
+            "Loc_5": -1.0,
+            "Loc_6": -1.0,
+        },
+        {
+            "chemical_name": "Indeno(1,2,3-cd)pyrene",
+            "Loc_0": 2.0,
+            "Loc_1": 0.0,
+            "Loc_2": 6.0,
+            "Loc_3": -2.0,
+            "Loc_4": -2.0,
+            "Loc_5": -4.0,
+            "Loc_6": 0.0,
+        },
+        {
+            "chemical_name": "Lead",
+            "Loc_0": 0.0,
+            "Loc_1": 6.0,
+            "Loc_2": -2.0,
+            "Loc_3": -3.0,
+            "Loc_4": 4.0,
+            "Loc_5": -3.0,
+            "Loc_6": -2.0,
+        },
+        {
+            "chemical_name": "Phenanthrene",
+            "Loc_0": 1.0,
+            "Loc_1": 0.0,
+            "Loc_2": 1.0,
+            "Loc_3": -3.0,
+            "Loc_4": 0.0,
+            "Loc_5": 0.0,
+            "Loc_6": 1.0,
+        },
+        {
+            "chemical_name": "Pyrene",
+            "Loc_0": 0.0,
+            "Loc_1": -1.0,
+            "Loc_2": 3.0,
+            "Loc_3": -2.0,
+            "Loc_4": -2.0,
+            "Loc_5": -2.0,
+            "Loc_6": 4.0,
+        },
+        {
+            "chemical_name": "Total Suspended Solids",
+            "Loc_0": -1.0,
+            "Loc_1": -1.0,
+            "Loc_2": -1.0,
+            "Loc_3": -1.0,
+            "Loc_4": -1.0,
+            "Loc_5": -1.0,
+            "Loc_6": 6.0,
+        },
+        {
+            "chemical_name": "Zinc",
+            "Loc_0": 0.0,
+            "Loc_1": 1.0,
+            "Loc_2": -1.0,
+            "Loc_3": -6.0,
+            "Loc_4": -1.0,
+            "Loc_5": 4.0,
+            "Loc_6": 3.0,
+        },
+    ]
+    expected = pandas.DataFrame(expected_records).set_index("chemical_name")
+    wq = pandas.read_pickle(helpers.test_data_path("wq.pkl"))
+    hsd = numutils.tukey_hsd(wq, "res", "location", "chemical_name")
+    result = numutils.process_tukey_hsd_scores(hsd, "location", "chemical_name")
+    pandas.testing.assert_frame_equal(result, expected, check_names=False)
+
+
+def test_rank_stats():
+    copper = pandas.read_pickle(helpers.test_data_path("sed.pkl")).loc[
+        lambda df: df["chemical_name"] == "Copper"
+    ]
+    result = numutils.rank_stats(copper, "res", "location")
+    expected = pandas.DataFrame(
+        [
+            {"location": "Loc_0", "count": 8, "sum": 178.0, "mean": 22.25},
+            {"location": "Loc_1", "count": 11, "sum": 459.5, "mean": 41.77272727272727},
+            {"location": "Loc_2", "count": 10, "sum": 248.0, "mean": 24.8},
+            {"location": "Loc_3", "count": 11, "sum": 78.0, "mean": 7.090909090909091},
+            {"location": "Loc_4", "count": 9, "sum": 503.0, "mean": 55.888888888888886},
+            {"location": "Loc_5", "count": 12, "sum": 424.5, "mean": 35.375},
+        ]
+    )
+    pandas.testing.assert_frame_equal(result, expected)
+
+
+def test_dunn_scores():
+    copper = pandas.read_pickle(helpers.test_data_path("sed.pkl")).loc[
+        lambda df: df["chemical_name"] == "Copper"
+    ]
+    dunnres = numutils.dunn_test(copper, "res", "location")
+    expected = pandas.Series(
+        data=[-1, 1, -1, -3, 3, 1], index=["Loc_0", "Loc_1", "Loc_2", "Loc_3", "Loc_4", "Loc_5"]
+    )
+    pandas.testing.assert_series_equal(dunnres.scores, expected, check_names=False)
diff --git a/wqio/utils/numutils.py b/wqio/utils/numutils.py
index d6d3a89e..f3c39f44 100644
--- a/wqio/utils/numutils.py
+++ b/wqio/utils/numutils.py
@@ -1,16 +1,20 @@
 import itertools
 from collections import namedtuple
+from collections.abc import Callable
 from textwrap import dedent
 
 import numpy
+import pandas
 import statsmodels.api as sm
 from probscale.algo import _estimate_from_fit
 from scipy import stats
+from scipy.stats._hypotests import TukeyHSDResult
 
 from wqio import validate
 from wqio.utils import misc
 
 TheilStats = namedtuple("TheilStats", ("slope", "intercept", "low_slope", "high_slope"))
+DunnResult = namedtuple("DunnResult", ("rank_stats", "results", "scores"))
 
 
 def sig_figs(x, n, expthresh=5, tex=False, pval=False, forceint=False):
@@ -338,8 +342,9 @@ def normalize_units(
 
         if normalization.isnull().any():
             nulls = df[normalization.isnull()][unitcol].unique()
-            msg += "Some normalization factors could not be mapped to the {} column ({})\n".format(
-                unitcol, nulls
+            msg += (
+                "Some normalization factors could not "
+                f"be mapped to the {unitcol} column ({nulls})\n"
             )
 
         if conversion.isnull().any():
@@ -625,7 +630,14 @@ def remove_outliers(x, factor=1.5):
 
 
 def _comp_stat_generator(
-    df, groupcols, pivotcol, rescol, statfxn, statname=None, pbarfxn=None, **statopts
+    df: pandas.DataFrame,
+    groupcols: list[str],
+    pivotcol: str,
+    rescol: str,
+    statfxn: Callable,
+    statname: str = "stat",
+    pbarfxn: Callable = misc.no_op,
+    **statopts,
 ):
     """Generator of records containing results of comparitive
     statistical functions.
@@ -657,8 +669,6 @@ def _comp_stat_generator(
     comp_stats : pandas.DataFrame
 
     """
-    if not pbarfxn:
-        pbarfxn = misc.no_op
 
     groupcols = validate.at_least_empty_list(groupcols)
     if statname is None:
@@ -682,8 +692,48 @@ def _comp_stat_generator(
             yield row
 
 
+def _group_comp_stat_generator(
+    df: pandas.DataFrame,
+    groupcols: list[str],
+    pivotcol: str,
+    rescol: str,
+    statfxn: Callable,
+    pbarfxn: Callable = misc.no_op,
+    statname: str = "stat",
+    control: str | None = None,
+    **statopts,
+):
+    groupcols = validate.at_least_empty_list(groupcols)
+
+    for names, main_group in pbarfxn(df.groupby(by=groupcols)):
+        subsets = {
+            pivot: subgroup[rescol].to_numpy()
+            for pivot, subgroup in main_group.groupby(by=pivotcol)
+        }
+
+        _pivots = list(subsets.keys())
+        _samples = list(subsets.values())
+
+        if control is not None:
+            control_sample = _samples.pop(_pivots.index(control))
+            result = statfxn(*_samples, control=control_sample, **statopts)
+
+        else:
+            result = statfxn(*_samples, **statopts)
+
+        row = {**dict(zip(groupcols, names)), statname: result.statistic, "pvalue": result.pvalue}
+        yield row
+
+
 def _paired_stat_generator(
-    df, groupcols, pivotcol, rescol, statfxn, statname=None, pbarfxn=None, **statopts
+    df: pandas.DataFrame,
+    groupcols: list[str],
+    pivotcol: str,
+    rescol: str,
+    statfxn: Callable,
+    statname: str = "stat",
+    pbarfxn: Callable = misc.no_op,
+    **statopts,
 ):
     """Generator of records containing results of comparitive
     statistical functions specifically for paired data.
@@ -715,8 +765,6 @@ def _paired_stat_generator(
     comp_stats : pandsa.DataFrame
 
     """
-    if not pbarfxn:
-        pbarfxn = misc.no_op
 
     groupcols = validate.at_least_empty_list(groupcols)
     if statname is None:
@@ -740,3 +788,195 @@ def _paired_stat_generator(
             stat = statfxn(x, y, **statopts)
             row.update({statname: stat[0], "pvalue": stat.pvalue})
             yield row
+
+
+def _tukey_res_to_df(
+    names: list[str], hsd_res: list[TukeyHSDResult], group_prefix: str
+) -> pandas.DataFrame:
+    """Converts Scipy's TukeyHSDResult to a dataframe
+
+    Parameters
+    ----------
+    names : list of str
+        Name of the groups present in the Tukey HSD Results
+    hsd_res : list of TukeyHSDResult
+        List of Tukey results to be converted to a dateframe
+    group_prefix : str (default = "Loc")
+        Prefix that describes the nature of the groups
+
+    Returns
+    -------
+    hsd_df : pandas.DataFrame
+
+    """
+    rows = []
+    for i, n1 in enumerate(names):
+        for j, n2 in enumerate(names):
+            if i != j:
+                ci_bands = hsd_res.confidence_interval()
+                row = {
+                    f"{group_prefix} 1": names[n1],
+                    f"{group_prefix} 2": names[n2],
+                    "HSD Stat": hsd_res.statistic[i, j],
+                    "p-value": hsd_res.pvalue[i, j],
+                    "CI-Low": ci_bands.low[i, j],
+                    "CI-High": ci_bands.high[i, j],
+                }
+
+                rows.append(row)
+
+    df = pandas.DataFrame(rows).set_index([f"{group_prefix} 1", f"{group_prefix} 2"])
+    return df
+
+
+def tukey_hsd(
+    df: pandas.DataFrame,
+    rescol: str,
+    compcol: str,
+    paramcol: str,
+    *othergroups: str,
+):
+    """
+    Run the Tukey HSD Test on a dataframe based on groupings
+
+    Parameters
+    ----------
+    df : pandas.DataFrame
+    rescol : str
+        Name of the column that contains the values of interest
+    compcol: str
+        Name of the column that defines the groups to be compared
+        (i.e., treatment vs control)
+    paramcol: str
+        Name of the column that contains the measured parameter
+    *othergroups : str
+        Names of any other columsn that need to considered when
+        defining the groups to be considered.
+
+    Returns
+    -------
+    hsd_df : pandas.DataFrame
+
+    """
+    groupcols = [paramcol, *othergroups]
+    scores = []
+    for name, g in df.groupby(by=groupcols):
+        locs = {loc: subg[rescol].values for loc, subg in g.groupby(compcol) if subg.shape[0] > 1}
+        subset_names = {n: loc for n, loc in enumerate(locs)}
+        res = stats.tukey_hsd(*[v for v in locs.values()])
+        df_res = _tukey_res_to_df(subset_names, res, group_prefix=compcol)
+
+        keys = {g: n for g, n in zip(groupcols, name)}
+        scores.append(
+            df_res.assign(
+                is_diff=lambda df: df["p-value"].lt(0.05).astype(int),
+                sign_of_diff=lambda df: numpy.sign(df["HSD Stat"]).astype(int),
+                score=lambda df: df["is_diff"] * df["sign_of_diff"],
+                **keys,
+            ).set_index(groupcols, append=True)
+        )
+
+    return pandas.concat(scores, ignore_index=False, axis="index")
+
+
+def process_tukey_hsd_scores(
+    hsd_df: pandas.DataFrame, compcol: str, paramcol: str
+) -> pandas.DataFrame:
+    """
+    Converts a Tukey HSD Results dataframe into scores that describe
+    the value of groups' magnitude relative to each other.
+
+    Generally speaking:
+    * -7 to -5 -> significantly lower
+    * -5 to -3 -> moderately lower
+    * -3 to -1 -> slightly lower
+    * -1 to +1 -> neutral
+    * +1 to +3 -> slightly higher
+    * +3 to +5 -> moderately higher
+    * +5 to +7 -> significantly higher
+
+    Parameters
+    ----------
+    hsd_df : pandas.DataFrame
+        Dataframe dumped by `tukey_hsd`
+    group_prefix : str (default = "Loc")
+        Prefix that describes the nature of the groups
+
+    Returns
+    -------
+    scores : pandas.DataFrame
+
+    """
+    return (
+        hsd_df["score"]
+        .unstack(level=f"{compcol} 2")
+        .fillna(0)
+        .groupby(level=paramcol, as_index=False, group_keys=False)
+        .apply(lambda g: g.sum(axis="columns"))
+        .unstack(level=f"{compcol} 1")
+    )
+
+
+def rank_stats(
+    df: pandas.DataFrame,
+    rescol: str,
+    compcol: str,
+    *othergroups: str,
+) -> pandas.DataFrame:
+    """
+    Compute basic rank statistics (count, mean, sum) of a dataframe
+    based on logical groupings. Assumes a single parameter is in the
+    dataframe
+
+    Parameters
+    ----------
+    df : pandas.DataFrame
+    rescol : str
+        Name of the column that contains the values of interest
+    compcol: str
+        Name of the column that defines the groups to be compared
+        (i.e., treatment vs control)
+
+    *othergroups : str
+        Names of any other columsn that need to considered when
+        defining the groups to be considered.
+
+    """
+
+    return (
+        df.assign(rank=lambda df: df[rescol].rank())
+        .groupby([compcol, *othergroups])["rank"]
+        .agg(["count", "sum", "mean"])
+        .reset_index()
+    )
+
+
+def _dunn_result(rs: pandas.DataFrame, group_prefix: str):
+    threshold = stats.norm.isf(0.001667 / 2)
+    N = rs["count"].sum()
+    results = (
+        rs.join(rs, how="cross", lsuffix="_1", rsuffix="_2")
+        .assign(
+            diff=lambda df: df["mean_1"] - df["mean_2"],
+            scale=lambda df: ((N * (N + 1) / 12) * (1 / df["count_1"] + 1 / df["count_2"])) ** 0.5,
+            dunn_z=lambda df: df["diff"] / df["scale"],
+            score=lambda df: numpy.where(
+                numpy.abs(df["dunn_z"]) <= threshold, 0, numpy.sign(df["dunn_z"])
+            ).astype(int),
+        )
+        .loc[:, [f"{group_prefix}_1", f"{group_prefix}_2", "dunn_z", "score"]]
+    )
+
+    return results
+
+
+def _dunn_scores(dr: pandas.DataFrame, group_prefix: str) -> pandas.DataFrame:
+    scores = dr.pivot(index=f"{group_prefix}_2", columns=f"{group_prefix}_1", values="score").sum()
+    return scores
+
+
+def dunn_test(df: pandas.DataFrame, rescol: str, compcol: str, *othergroups: str) -> DunnResult:
+    rs = rank_stats(df, rescol, compcol, *othergroups)
+    results = _dunn_result(rs, compcol)
+    scores = _dunn_scores(results, compcol)
+    return DunnResult(rs, results, scores)
diff --git a/wqio/viz.py b/wqio/viz.py
index 5dbc4e42..16f85e3a 100644
--- a/wqio/viz.py
+++ b/wqio/viz.py
@@ -200,7 +200,7 @@ def jointplot(
     return jg
 
 
-def whiskers_and_fliers(x, q1=None, q3=None, transformout=None):
+def whiskers_and_fliers(x, q1=None, q3=None, flierfactor=1.5, transformout=None):
     """Computes extent of whiskers and fliers on optionally transformed
     data for box and whisker plots.
 
@@ -210,6 +210,8 @@ def whiskers_and_fliers(x, q1=None, q3=None, transformout=None):
         Sequence of optionally transformed data.
     q1, q3 : floats, optional
         First and third quartiles of the optionally transformed data.
+    flierfactor : float, optional (default=1.5)
+        multiplier by which outliers are compared to the IQR
     transformout : callable, optional
         Function to un-transform the results back into the original
         space of the data.
@@ -243,12 +245,12 @@ def transformout(x):
 
     iqr = q3 - q1
     # get low extreme
-    loval = q1 - (1.5 * iqr)
+    loval = q1 - (flierfactor * iqr)
     whislo = numpy.compress(x >= loval, x)
     whislo = q1 if len(whislo) == 0 or numpy.min(whislo) > q1 else numpy.min(whislo)
 
     # get high extreme
-    hival = q3 + (1.5 * iqr)
+    hival = q3 + (flierfactor * iqr)
     whishi = numpy.compress(x <= hival, x)
     whishi = q3 if len(whishi) == 0 or numpy.max(whishi) < q3 else numpy.max(whishi)