Skip to content

Commit

Permalink
run ruff, isort, etc
Browse files Browse the repository at this point in the history
  • Loading branch information
phobson committed Feb 19, 2024
1 parent 524f86e commit 5ef99c6
Show file tree
Hide file tree
Showing 29 changed files with 414 additions and 648 deletions.
29 changes: 29 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff version.
rev: v0.2.0
hooks:
# Run the linter.
- id: ruff
args: [ --fix ]
# Run the formatter.
- id: ruff-format

- repo: https://github.com/pycqa/isort
rev: 5.13.2
hooks:
- id: isort
language_version: python3

- repo: https://github.com/asottile/pyupgrade
rev: v3.15.0
hooks:
- id: pyupgrade
args:
- --py310-plus

- repo: https://github.com/MarcoGorelli/absolufy-imports
rev: v0.3.1
hooks:
- id: absolufy-imports
name: absolufy-imports
18 changes: 18 additions & 0 deletions ruff.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
line-length = 100

[lint]
select = [
# pycodestyle
"E",
# Pyflakes
"F",
# pyupgrade
"UP",
## flake8-bugbear
# "B",
# flake8-simplify
"SIM",
]

[format]
indent-style = "space"
10 changes: 10 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,13 @@ markers =

[pep8]
max-line-length = 100

[isort]
profile=black
src_paths=wqio
# sections = ["FUTURE", "STDLIB", "THIRDPARTY", "FIRSTPARTY"]
# # profile = "black"
# skip_gitignore = true
# force_to_top = ["true"]
# default_section = "THIRDPARTY"
# known_firstparty = ["wqio"]
7 changes: 3 additions & 4 deletions wqio/bootstrap.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@

from wqio import utils


_logger = logging.getLogger(__name__)

fitestimate = namedtuple(
Expand All @@ -18,7 +17,7 @@


def _acceleration(data):
""" Compute the acceleration statistic.
"""Compute the acceleration statistic.
Parameters
----------
Expand All @@ -39,11 +38,11 @@ def _acceleration(data):
sumsqr_resids = max(((data.mean() - data) ** 2).sum(), 1e-12)

# compute and return the acceleration
return sumcube_resids / (6 * sumsqr_resids ** 1.5)
return sumcube_resids / (6 * sumsqr_resids**1.5)


def _make_boot_index(elements, niter):
""" Generate an array of bootstrap sample sets
"""Generate an array of bootstrap sample sets
Parameters
----------
Expand Down
74 changes: 20 additions & 54 deletions wqio/datacollections.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,22 +3,19 @@
from functools import partial

import numpy
from scipy import stats
import pandas
import statsmodels.api as sm
from scipy import stats
from statsmodels.tools.decorators import cache_readonly

try:
from tqdm import tqdm
except ImportError: # pragma: no cover
tqdm = None

from wqio import utils
from wqio import bootstrap
from wqio import bootstrap, utils, validate
from wqio.features import Dataset, Location
from wqio.ros import ROS
from wqio import validate
from wqio.features import Location, Dataset


_Stat = namedtuple("_stat", ["stat", "pvalue"])

Expand All @@ -29,7 +26,7 @@ def _dist_compare(x, y, stat_comp_func):
return stat_comp_func(x, y, alternative="two-sided")


class DataCollection(object):
class DataCollection:
"""Generalized water quality comparison object.
Parameters
Expand Down Expand Up @@ -92,7 +89,6 @@ def __init__(
bsiter=10000,
showpbar=True,
):

# cache for all of the properties
self._cache = {}

Expand Down Expand Up @@ -203,7 +199,7 @@ def generic_stat(
statname=None,
has_pvalue=False,
filterfxn=None,
**statopts
**statopts,
):
"""Generic function to estimate a statistic and its CIs.
Expand Down Expand Up @@ -277,11 +273,7 @@ def fxn(x):

return pandas.Series(values, index=statnames)

groups = (
self.tidy.groupby(by=self.groupcols)
.filter(filterfxn)
.groupby(by=self.groupcols)
)
groups = self.tidy.groupby(by=self.groupcols).filter(filterfxn).groupby(by=self.groupcols)

if tqdm and self.showpbar:
tqdm.pandas(desc="Computing stats")
Expand All @@ -299,9 +291,7 @@ def fxn(x):
@cache_readonly
def count(self):
return (
self.generic_stat(
lambda x: x.shape[0], use_bootstrap=False, statname="Count"
)
self.generic_stat(lambda x: x.shape[0], use_bootstrap=False, statname="Count")
.fillna(0)
.astype(int)
)
Expand Down Expand Up @@ -339,7 +329,7 @@ def percentile(self, percentile):
"""Return the percentiles (0 - 100) for the data."""
return self.generic_stat(
lambda x: numpy.percentile(x, percentile),
statname="pctl {}".format(percentile),
statname=f"pctl {percentile}",
use_bootstrap=False,
)

Expand Down Expand Up @@ -485,13 +475,7 @@ def comparison_stat(self, statfxn, statname=None, paired=False, **statopts):
index_cols = meta_columns + station_columns

results = generator(
data,
meta_columns,
self.stationcol,
rescol,
statfxn,
statname=statname,
**statopts
data, meta_columns, self.stationcol, rescol, statfxn, statname=statname, **statopts
)
return pandas.DataFrame.from_records(results).set_index(index_cols)

Expand Down Expand Up @@ -524,15 +508,11 @@ def wilcoxon(self):

@cache_readonly
def kendall(self):
return self.comparison_stat(
stats.kendalltau, statname="kendalltau", paired=True
)
return self.comparison_stat(stats.kendalltau, statname="kendalltau", paired=True)

@cache_readonly
def spearman(self):
return self.comparison_stat(
stats.spearmanr, statname="spearmanrho", paired=True
)
return self.comparison_stat(stats.spearmanr, statname="spearmanrho", paired=True)

@cache_readonly
def theilslopes(self, logs=False):
Expand All @@ -542,9 +522,7 @@ def theilslopes(self, logs=False):
def locations(self):
_locations = []
groups = (
self.data.groupby(by=self.groupcols)
.filter(self.filterfxn)
.groupby(by=self.groupcols)
self.data.groupby(by=self.groupcols).filter(self.filterfxn).groupby(by=self.groupcols)
)
cols = [self._raw_rescol, self.qualcol]
for names, data in groups:
Expand All @@ -569,7 +547,7 @@ def locations(self):
return _locations

def datasets(self, loc1, loc2):
""" Generate ``Dataset`` objects from the raw data of the
"""Generate ``Dataset`` objects from the raw data of the
``DataColletion``.
Data are first grouped by ``self.groupcols`` and
Expand Down Expand Up @@ -627,7 +605,7 @@ def _filter_collection(collection, squeeze, **kwargs):
return items

def selectLocations(self, squeeze=False, **conditions):
""" Select ``Location`` objects meeting specified criteria
"""Select ``Location`` objects meeting specified criteria
from the ``DataColletion``.
Parameters
Expand Down Expand Up @@ -663,13 +641,11 @@ def selectLocations(self, squeeze=False, **conditions):
"""

locations = self._filter_collection(
self.locations.copy(), squeeze=squeeze, **conditions
)
locations = self._filter_collection(self.locations.copy(), squeeze=squeeze, **conditions)
return locations

def selectDatasets(self, loc1, loc2, squeeze=False, **conditions):
""" Select ``Dataset`` objects meeting specified criteria
"""Select ``Dataset`` objects meeting specified criteria
from the ``DataColletion``.
Parameters
Expand Down Expand Up @@ -709,9 +685,7 @@ def selectDatasets(self, loc1, loc2, squeeze=False, **conditions):
{'param': 'A'}
"""

datasets = self._filter_collection(
self.datasets(loc1, loc2), squeeze=squeeze, **conditions
)
datasets = self._filter_collection(self.datasets(loc1, loc2), squeeze=squeeze, **conditions)
return datasets

def n_unique(self, column):
Expand All @@ -728,7 +702,7 @@ def n_unique(self, column):
)

def stat_summary(self, percentiles=None, groupcols=None, useros=True):
""" A generic, high-level summary of the data collection.
"""A generic, high-level summary of the data collection.
Parameters
----------
Expand All @@ -745,16 +719,8 @@ def stat_summary(self, percentiles=None, groupcols=None, useros=True):
"""

if useros:
col = self.roscol
else:
col = self.rescol

if groupcols is None:
groupcols = self.groupcols
else:
groupcols = validate.at_least_empty_list(groupcols)

col = self.roscol if useros else self.rescol
groupcols = validate.at_least_empty_list(groupcols)
ptiles = percentiles or [0.1, 0.25, 0.5, 0.75, 0.9]
summary = (
self.tidy.groupby(by=groupcols)
Expand Down
15 changes: 7 additions & 8 deletions wqio/datasets.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,19 @@
import os
from zipfile import ZipFile
from urllib import request
from pathlib import Path
from urllib import request
from zipfile import ZipFile

from wqio import validate


def download(dataset, year=None, redownload=True, data_dir=None):
fname = validate.dataset(dataset)

if year is None:
tag = "master"
else:
tag = "v{:d}".format(year)
tag = "master" if year is None else f"v{year:d}"

url_template = "https://github.com/Geosyntec/water-quality-datasets/blob/{tag:s}/data/{fname:s}?raw=true"
url_template = (
"https://github.com/Geosyntec/water-quality-datasets/blob/{tag:s}/data/{fname:s}?raw=true"
)
src_url = url_template.format(tag=tag, fname=fname)

if data_dir is None:
Expand All @@ -31,4 +30,4 @@ def download(dataset, year=None, redownload=True, data_dir=None):
with ZipFile(dst_path, "r") as zip_ref:
zip_ref.extractall(data_dir)

return dst_path.parent / "{}.csv".format(dst_path.stem)
return dst_path.parent / f"{dst_path.stem}.csv"
Loading

0 comments on commit 5ef99c6

Please sign in to comment.