run ruff, isort, etc

International-BMP-Database · Feb 19, 2024 · 5ef99c6 · 5ef99c6
1 parent 524f86e
commit 5ef99c6
Show file tree

Hide file tree

Showing 29 changed files with 414 additions and 648 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,29 @@
+repos:
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    # Ruff version.
+    rev: v0.2.0
+    hooks:
+      # Run the linter.
+      - id: ruff
+        args: [ --fix ]
+      # Run the formatter.
+      - id: ruff-format
+
+  - repo: https://github.com/pycqa/isort
+    rev: 5.13.2
+    hooks:
+      - id: isort
+        language_version: python3
+
+  - repo: https://github.com/asottile/pyupgrade
+    rev: v3.15.0
+    hooks:
+      - id: pyupgrade
+        args:
+          - --py310-plus
+
+  - repo: https://github.com/MarcoGorelli/absolufy-imports
+    rev: v0.3.1
+    hooks:
+      - id: absolufy-imports
+        name: absolufy-imports
diff --git a/ruff.toml b/ruff.toml
@@ -0,0 +1,18 @@
+line-length = 100
+
+[lint]
+select = [
+    # pycodestyle
+    "E",
+    # Pyflakes
+    "F",
+    # pyupgrade
+    "UP",
+    ## flake8-bugbear
+    # "B",
+    # flake8-simplify
+    "SIM",
+]
+
+[format]
+indent-style = "space"
diff --git a/setup.cfg b/setup.cfg
@@ -18,3 +18,13 @@ markers =
 
 [pep8]
 max-line-length = 100
+
+[isort]
+profile=black
+src_paths=wqio
+# sections = ["FUTURE", "STDLIB", "THIRDPARTY", "FIRSTPARTY"]
+# # profile = "black"
+# skip_gitignore = true
+# force_to_top = ["true"]
+# default_section = "THIRDPARTY"
+# known_firstparty = ["wqio"]
diff --git a/wqio/bootstrap.py b/wqio/bootstrap.py
@@ -7,7 +7,6 @@
 
 from wqio import utils
 
-
 _logger = logging.getLogger(__name__)
 
 fitestimate = namedtuple(
@@ -18,7 +17,7 @@
 
 
 def _acceleration(data):
-    """ Compute the acceleration statistic.
+    """Compute the acceleration statistic.
 
     Parameters
     ----------
@@ -39,11 +38,11 @@ def _acceleration(data):
     sumsqr_resids = max(((data.mean() - data) ** 2).sum(), 1e-12)
 
     # compute and return the acceleration
-    return sumcube_resids / (6 * sumsqr_resids ** 1.5)
+    return sumcube_resids / (6 * sumsqr_resids**1.5)
 
 
 def _make_boot_index(elements, niter):
-    """ Generate an array of bootstrap sample sets
+    """Generate an array of bootstrap sample sets
 
     Parameters
     ----------

diff --git a/wqio/datacollections.py b/wqio/datacollections.py
@@ -3,22 +3,19 @@
 from functools import partial
 
 import numpy
-from scipy import stats
 import pandas
 import statsmodels.api as sm
+from scipy import stats
 from statsmodels.tools.decorators import cache_readonly
 
 try:
     from tqdm import tqdm
 except ImportError:  # pragma: no cover
     tqdm = None
 
-from wqio import utils
-from wqio import bootstrap
+from wqio import bootstrap, utils, validate
+from wqio.features import Dataset, Location
 from wqio.ros import ROS
-from wqio import validate
-from wqio.features import Location, Dataset
-
 
 _Stat = namedtuple("_stat", ["stat", "pvalue"])
 
@@ -29,7 +26,7 @@ def _dist_compare(x, y, stat_comp_func):
     return stat_comp_func(x, y, alternative="two-sided")
 
 
-class DataCollection(object):
+class DataCollection:
     """Generalized water quality comparison object.
 
     Parameters
@@ -92,7 +89,6 @@ def __init__(
         bsiter=10000,
         showpbar=True,
     ):
-
         # cache for all of the properties
         self._cache = {}
 
@@ -203,7 +199,7 @@ def generic_stat(
         statname=None,
         has_pvalue=False,
         filterfxn=None,
-        **statopts
+        **statopts,
     ):
         """Generic function to estimate a statistic and its CIs.
 
@@ -277,11 +273,7 @@ def fxn(x):
 
             return pandas.Series(values, index=statnames)
 
-        groups = (
-            self.tidy.groupby(by=self.groupcols)
-            .filter(filterfxn)
-            .groupby(by=self.groupcols)
-        )
+        groups = self.tidy.groupby(by=self.groupcols).filter(filterfxn).groupby(by=self.groupcols)
 
         if tqdm and self.showpbar:
             tqdm.pandas(desc="Computing stats")
@@ -299,9 +291,7 @@ def fxn(x):
     @cache_readonly
     def count(self):
         return (
-            self.generic_stat(
-                lambda x: x.shape[0], use_bootstrap=False, statname="Count"
-            )
+            self.generic_stat(lambda x: x.shape[0], use_bootstrap=False, statname="Count")
             .fillna(0)
             .astype(int)
         )
@@ -339,7 +329,7 @@ def percentile(self, percentile):
         """Return the percentiles (0 - 100) for the data."""
         return self.generic_stat(
             lambda x: numpy.percentile(x, percentile),
-            statname="pctl {}".format(percentile),
+            statname=f"pctl {percentile}",
             use_bootstrap=False,
         )
 
@@ -485,13 +475,7 @@ def comparison_stat(self, statfxn, statname=None, paired=False, **statopts):
         index_cols = meta_columns + station_columns
 
         results = generator(
-            data,
-            meta_columns,
-            self.stationcol,
-            rescol,
-            statfxn,
-            statname=statname,
-            **statopts
+            data, meta_columns, self.stationcol, rescol, statfxn, statname=statname, **statopts
         )
         return pandas.DataFrame.from_records(results).set_index(index_cols)
 
@@ -524,15 +508,11 @@ def wilcoxon(self):
 
     @cache_readonly
     def kendall(self):
-        return self.comparison_stat(
-            stats.kendalltau, statname="kendalltau", paired=True
-        )
+        return self.comparison_stat(stats.kendalltau, statname="kendalltau", paired=True)
 
     @cache_readonly
     def spearman(self):
-        return self.comparison_stat(
-            stats.spearmanr, statname="spearmanrho", paired=True
-        )
+        return self.comparison_stat(stats.spearmanr, statname="spearmanrho", paired=True)
 
     @cache_readonly
     def theilslopes(self, logs=False):
@@ -542,9 +522,7 @@ def theilslopes(self, logs=False):
     def locations(self):
         _locations = []
         groups = (
-            self.data.groupby(by=self.groupcols)
-            .filter(self.filterfxn)
-            .groupby(by=self.groupcols)
+            self.data.groupby(by=self.groupcols).filter(self.filterfxn).groupby(by=self.groupcols)
         )
         cols = [self._raw_rescol, self.qualcol]
         for names, data in groups:
@@ -569,7 +547,7 @@ def locations(self):
         return _locations
 
     def datasets(self, loc1, loc2):
-        """ Generate ``Dataset`` objects from the raw data of the
+        """Generate ``Dataset`` objects from the raw data of the
         ``DataColletion``.
 
         Data are first grouped by ``self.groupcols`` and
@@ -627,7 +605,7 @@ def _filter_collection(collection, squeeze, **kwargs):
         return items
 
     def selectLocations(self, squeeze=False, **conditions):
-        """ Select ``Location`` objects meeting specified criteria
+        """Select ``Location`` objects meeting specified criteria
         from the ``DataColletion``.
 
         Parameters
@@ -663,13 +641,11 @@ def selectLocations(self, squeeze=False, **conditions):
 
         """
 
-        locations = self._filter_collection(
-            self.locations.copy(), squeeze=squeeze, **conditions
-        )
+        locations = self._filter_collection(self.locations.copy(), squeeze=squeeze, **conditions)
         return locations
 
     def selectDatasets(self, loc1, loc2, squeeze=False, **conditions):
-        """ Select ``Dataset`` objects meeting specified criteria
+        """Select ``Dataset`` objects meeting specified criteria
         from the ``DataColletion``.
 
         Parameters
@@ -709,9 +685,7 @@ def selectDatasets(self, loc1, loc2, squeeze=False, **conditions):
         {'param': 'A'}
         """
 
-        datasets = self._filter_collection(
-            self.datasets(loc1, loc2), squeeze=squeeze, **conditions
-        )
+        datasets = self._filter_collection(self.datasets(loc1, loc2), squeeze=squeeze, **conditions)
         return datasets
 
     def n_unique(self, column):
@@ -728,7 +702,7 @@ def n_unique(self, column):
         )
 
     def stat_summary(self, percentiles=None, groupcols=None, useros=True):
-        """ A generic, high-level summary of the data collection.
+        """A generic, high-level summary of the data collection.
 
         Parameters
         ----------
@@ -745,16 +719,8 @@ def stat_summary(self, percentiles=None, groupcols=None, useros=True):
 
         """
 
-        if useros:
-            col = self.roscol
-        else:
-            col = self.rescol
-
-        if groupcols is None:
-            groupcols = self.groupcols
-        else:
-            groupcols = validate.at_least_empty_list(groupcols)
-
+        col = self.roscol if useros else self.rescol
+        groupcols = validate.at_least_empty_list(groupcols)
         ptiles = percentiles or [0.1, 0.25, 0.5, 0.75, 0.9]
         summary = (
             self.tidy.groupby(by=groupcols)

diff --git a/wqio/datasets.py b/wqio/datasets.py
@@ -1,20 +1,19 @@
 import os
-from zipfile import ZipFile
-from urllib import request
 from pathlib import Path
+from urllib import request
+from zipfile import ZipFile
 
 from wqio import validate
 
 
 def download(dataset, year=None, redownload=True, data_dir=None):
     fname = validate.dataset(dataset)
 
-    if year is None:
-        tag = "master"
-    else:
-        tag = "v{:d}".format(year)
+    tag = "master" if year is None else f"v{year:d}"
 
-    url_template = "https://github.com/Geosyntec/water-quality-datasets/blob/{tag:s}/data/{fname:s}?raw=true"
+    url_template = (
+        "https://github.com/Geosyntec/water-quality-datasets/blob/{tag:s}/data/{fname:s}?raw=true"
+    )
     src_url = url_template.format(tag=tag, fname=fname)
 
     if data_dir is None:
@@ -31,4 +30,4 @@ def download(dataset, year=None, redownload=True, data_dir=None):
         with ZipFile(dst_path, "r") as zip_ref:
             zip_ref.extractall(data_dir)
 
-    return dst_path.parent / "{}.csv".format(dst_path.stem)
+    return dst_path.parent / f"{dst_path.stem}.csv"