From 5ef99c6493ddf9c5081457d587d709240b9db429 Mon Sep 17 00:00:00 2001 From: Paul Hobson Date: Mon, 19 Feb 2024 10:33:04 -0800 Subject: [PATCH] run ruff, isort, etc --- .pre-commit-config.yaml | 29 ++++++ ruff.toml | 18 ++++ setup.cfg | 10 +++ wqio/bootstrap.py | 7 +- wqio/datacollections.py | 74 +++++---------- wqio/datasets.py | 15 ++-- wqio/features.py | 92 +++++++------------ wqio/hydro.py | 114 +++++++----------------- wqio/ros.py | 50 ++++------- wqio/samples.py | 34 +++---- wqio/tests/__init__.py | 5 +- wqio/tests/helpers.py | 39 ++++---- wqio/tests/test_bootstrap.py | 11 +-- wqio/tests/test_datacollections.py | 36 ++++---- wqio/tests/test_features.py | 97 +++++++------------- wqio/tests/test_hydro.py | 24 ++--- wqio/tests/test_ros.py | 35 +++----- wqio/tests/test_samples.py | 10 +-- wqio/tests/test_theil.py | 9 +- wqio/tests/test_viz.py | 29 ++---- wqio/tests/utils_tests/test_misc.py | 28 +++--- wqio/tests/utils_tests/test_numutils.py | 55 ++++-------- wqio/theil.py | 8 +- wqio/utils/__init__.py | 6 +- wqio/utils/dateutils.py | 13 ++- wqio/utils/misc.py | 41 ++++----- wqio/utils/numutils.py | 90 ++++++++----------- wqio/validate.py | 14 +-- wqio/viz.py | 69 +++++--------- 29 files changed, 414 insertions(+), 648 deletions(-) create mode 100644 .pre-commit-config.yaml create mode 100644 ruff.toml diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000..4a2462b7 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,29 @@ +repos: + - repo: https://github.com/astral-sh/ruff-pre-commit + # Ruff version. + rev: v0.2.0 + hooks: + # Run the linter. + - id: ruff + args: [ --fix ] + # Run the formatter. + - id: ruff-format + + - repo: https://github.com/pycqa/isort + rev: 5.13.2 + hooks: + - id: isort + language_version: python3 + + - repo: https://github.com/asottile/pyupgrade + rev: v3.15.0 + hooks: + - id: pyupgrade + args: + - --py310-plus + + - repo: https://github.com/MarcoGorelli/absolufy-imports + rev: v0.3.1 + hooks: + - id: absolufy-imports + name: absolufy-imports diff --git a/ruff.toml b/ruff.toml new file mode 100644 index 00000000..d53498b4 --- /dev/null +++ b/ruff.toml @@ -0,0 +1,18 @@ +line-length = 100 + +[lint] +select = [ + # pycodestyle + "E", + # Pyflakes + "F", + # pyupgrade + "UP", + ## flake8-bugbear + # "B", + # flake8-simplify + "SIM", +] + +[format] +indent-style = "space" diff --git a/setup.cfg b/setup.cfg index d9114c3d..e50eaa67 100644 --- a/setup.cfg +++ b/setup.cfg @@ -18,3 +18,13 @@ markers = [pep8] max-line-length = 100 + +[isort] +profile=black +src_paths=wqio +# sections = ["FUTURE", "STDLIB", "THIRDPARTY", "FIRSTPARTY"] +# # profile = "black" +# skip_gitignore = true +# force_to_top = ["true"] +# default_section = "THIRDPARTY" +# known_firstparty = ["wqio"] diff --git a/wqio/bootstrap.py b/wqio/bootstrap.py index 43be6098..a105f9b3 100644 --- a/wqio/bootstrap.py +++ b/wqio/bootstrap.py @@ -7,7 +7,6 @@ from wqio import utils - _logger = logging.getLogger(__name__) fitestimate = namedtuple( @@ -18,7 +17,7 @@ def _acceleration(data): - """ Compute the acceleration statistic. + """Compute the acceleration statistic. Parameters ---------- @@ -39,11 +38,11 @@ def _acceleration(data): sumsqr_resids = max(((data.mean() - data) ** 2).sum(), 1e-12) # compute and return the acceleration - return sumcube_resids / (6 * sumsqr_resids ** 1.5) + return sumcube_resids / (6 * sumsqr_resids**1.5) def _make_boot_index(elements, niter): - """ Generate an array of bootstrap sample sets + """Generate an array of bootstrap sample sets Parameters ---------- diff --git a/wqio/datacollections.py b/wqio/datacollections.py index eb4ad495..93ea6add 100644 --- a/wqio/datacollections.py +++ b/wqio/datacollections.py @@ -3,9 +3,9 @@ from functools import partial import numpy -from scipy import stats import pandas import statsmodels.api as sm +from scipy import stats from statsmodels.tools.decorators import cache_readonly try: @@ -13,12 +13,9 @@ except ImportError: # pragma: no cover tqdm = None -from wqio import utils -from wqio import bootstrap +from wqio import bootstrap, utils, validate +from wqio.features import Dataset, Location from wqio.ros import ROS -from wqio import validate -from wqio.features import Location, Dataset - _Stat = namedtuple("_stat", ["stat", "pvalue"]) @@ -29,7 +26,7 @@ def _dist_compare(x, y, stat_comp_func): return stat_comp_func(x, y, alternative="two-sided") -class DataCollection(object): +class DataCollection: """Generalized water quality comparison object. Parameters @@ -92,7 +89,6 @@ def __init__( bsiter=10000, showpbar=True, ): - # cache for all of the properties self._cache = {} @@ -203,7 +199,7 @@ def generic_stat( statname=None, has_pvalue=False, filterfxn=None, - **statopts + **statopts, ): """Generic function to estimate a statistic and its CIs. @@ -277,11 +273,7 @@ def fxn(x): return pandas.Series(values, index=statnames) - groups = ( - self.tidy.groupby(by=self.groupcols) - .filter(filterfxn) - .groupby(by=self.groupcols) - ) + groups = self.tidy.groupby(by=self.groupcols).filter(filterfxn).groupby(by=self.groupcols) if tqdm and self.showpbar: tqdm.pandas(desc="Computing stats") @@ -299,9 +291,7 @@ def fxn(x): @cache_readonly def count(self): return ( - self.generic_stat( - lambda x: x.shape[0], use_bootstrap=False, statname="Count" - ) + self.generic_stat(lambda x: x.shape[0], use_bootstrap=False, statname="Count") .fillna(0) .astype(int) ) @@ -339,7 +329,7 @@ def percentile(self, percentile): """Return the percentiles (0 - 100) for the data.""" return self.generic_stat( lambda x: numpy.percentile(x, percentile), - statname="pctl {}".format(percentile), + statname=f"pctl {percentile}", use_bootstrap=False, ) @@ -485,13 +475,7 @@ def comparison_stat(self, statfxn, statname=None, paired=False, **statopts): index_cols = meta_columns + station_columns results = generator( - data, - meta_columns, - self.stationcol, - rescol, - statfxn, - statname=statname, - **statopts + data, meta_columns, self.stationcol, rescol, statfxn, statname=statname, **statopts ) return pandas.DataFrame.from_records(results).set_index(index_cols) @@ -524,15 +508,11 @@ def wilcoxon(self): @cache_readonly def kendall(self): - return self.comparison_stat( - stats.kendalltau, statname="kendalltau", paired=True - ) + return self.comparison_stat(stats.kendalltau, statname="kendalltau", paired=True) @cache_readonly def spearman(self): - return self.comparison_stat( - stats.spearmanr, statname="spearmanrho", paired=True - ) + return self.comparison_stat(stats.spearmanr, statname="spearmanrho", paired=True) @cache_readonly def theilslopes(self, logs=False): @@ -542,9 +522,7 @@ def theilslopes(self, logs=False): def locations(self): _locations = [] groups = ( - self.data.groupby(by=self.groupcols) - .filter(self.filterfxn) - .groupby(by=self.groupcols) + self.data.groupby(by=self.groupcols).filter(self.filterfxn).groupby(by=self.groupcols) ) cols = [self._raw_rescol, self.qualcol] for names, data in groups: @@ -569,7 +547,7 @@ def locations(self): return _locations def datasets(self, loc1, loc2): - """ Generate ``Dataset`` objects from the raw data of the + """Generate ``Dataset`` objects from the raw data of the ``DataColletion``. Data are first grouped by ``self.groupcols`` and @@ -627,7 +605,7 @@ def _filter_collection(collection, squeeze, **kwargs): return items def selectLocations(self, squeeze=False, **conditions): - """ Select ``Location`` objects meeting specified criteria + """Select ``Location`` objects meeting specified criteria from the ``DataColletion``. Parameters @@ -663,13 +641,11 @@ def selectLocations(self, squeeze=False, **conditions): """ - locations = self._filter_collection( - self.locations.copy(), squeeze=squeeze, **conditions - ) + locations = self._filter_collection(self.locations.copy(), squeeze=squeeze, **conditions) return locations def selectDatasets(self, loc1, loc2, squeeze=False, **conditions): - """ Select ``Dataset`` objects meeting specified criteria + """Select ``Dataset`` objects meeting specified criteria from the ``DataColletion``. Parameters @@ -709,9 +685,7 @@ def selectDatasets(self, loc1, loc2, squeeze=False, **conditions): {'param': 'A'} """ - datasets = self._filter_collection( - self.datasets(loc1, loc2), squeeze=squeeze, **conditions - ) + datasets = self._filter_collection(self.datasets(loc1, loc2), squeeze=squeeze, **conditions) return datasets def n_unique(self, column): @@ -728,7 +702,7 @@ def n_unique(self, column): ) def stat_summary(self, percentiles=None, groupcols=None, useros=True): - """ A generic, high-level summary of the data collection. + """A generic, high-level summary of the data collection. Parameters ---------- @@ -745,16 +719,8 @@ def stat_summary(self, percentiles=None, groupcols=None, useros=True): """ - if useros: - col = self.roscol - else: - col = self.rescol - - if groupcols is None: - groupcols = self.groupcols - else: - groupcols = validate.at_least_empty_list(groupcols) - + col = self.roscol if useros else self.rescol + groupcols = validate.at_least_empty_list(groupcols) ptiles = percentiles or [0.1, 0.25, 0.5, 0.75, 0.9] summary = ( self.tidy.groupby(by=groupcols) diff --git a/wqio/datasets.py b/wqio/datasets.py index f2d7924e..88d7a433 100644 --- a/wqio/datasets.py +++ b/wqio/datasets.py @@ -1,7 +1,7 @@ import os -from zipfile import ZipFile -from urllib import request from pathlib import Path +from urllib import request +from zipfile import ZipFile from wqio import validate @@ -9,12 +9,11 @@ def download(dataset, year=None, redownload=True, data_dir=None): fname = validate.dataset(dataset) - if year is None: - tag = "master" - else: - tag = "v{:d}".format(year) + tag = "master" if year is None else f"v{year:d}" - url_template = "https://github.com/Geosyntec/water-quality-datasets/blob/{tag:s}/data/{fname:s}?raw=true" + url_template = ( + "https://github.com/Geosyntec/water-quality-datasets/blob/{tag:s}/data/{fname:s}?raw=true" + ) src_url = url_template.format(tag=tag, fname=fname) if data_dir is None: @@ -31,4 +30,4 @@ def download(dataset, year=None, redownload=True, data_dir=None): with ZipFile(dst_path, "r") as zip_ref: zip_ref.extractall(data_dir) - return dst_path.parent / "{}.csv".format(dst_path.stem) + return dst_path.parent / f"{dst_path.stem}.csv" diff --git a/wqio/features.py b/wqio/features.py index 72d0e3da..a0beef85 100644 --- a/wqio/features.py +++ b/wqio/features.py @@ -1,18 +1,14 @@ import numpy -from scipy import stats -from matplotlib import pyplot import pandas -import statsmodels.api as sm -from statsmodels.tools.decorators import cache_readonly import seaborn +import statsmodels.api as sm +from matplotlib import pyplot from probscale.algo import _estimate_from_fit +from scipy import stats +from statsmodels.tools.decorators import cache_readonly -from wqio import utils -from wqio import bootstrap +from wqio import bootstrap, utils, validate, viz from wqio.ros import ROS -from wqio import validate -from wqio import viz - # meta data mappings based on station station_names = { @@ -27,8 +23,8 @@ colors = {"Influent": palette[0], "Effluent": palette[1], "Reference Flow": palette[2]} -class Location(object): - """ Object providing convenient access to statistical and +class Location: + """Object providing convenient access to statistical and graphical methods for summarizing a single set of water quality observations for a single pollutant. @@ -179,9 +175,7 @@ def __init__( self.ndvals = ndval # original data and quantity - self.raw_data = dataframe.assign( - **{self.cencol: dataframe[qualcol].isin(self.ndvals)} - ) + self.raw_data = dataframe.assign(**{self.cencol: dataframe[qualcol].isin(self.ndvals)}) self._dataframe = None self._data = None @@ -192,9 +186,7 @@ def dataframe(self): **{self.cencol: lambda df: df[self.qualcol].isin(self.ndvals)} ) if self.useros: - ros = ROS( - df=df, result=self.rescol, censorship=self.cencol, as_array=False - ) + ros = ROS(df=df, result=self.rescol, censorship=self.cencol, as_array=False) self._dataframe = ros[["final", self.cencol]] else: self._dataframe = df[[self.rescol, self.cencol]] @@ -318,16 +310,12 @@ def min(self): @cache_readonly def min_detect(self): if self.hasData: - return self.raw_data[self.rescol][ - ~self.raw_data[self.qualcol].isin(self.ndvals) - ].min() + return self.raw_data[self.rescol][~self.raw_data[self.qualcol].isin(self.ndvals)].min() @cache_readonly def min_DL(self): if self.hasData: - return self.raw_data[self.rescol][ - self.raw_data[self.qualcol].isin(self.ndvals) - ].min() + return self.raw_data[self.rescol][self.raw_data[self.qualcol].isin(self.ndvals)].min() @cache_readonly def max(self): @@ -438,9 +426,7 @@ def boxplot_stats(self, log=True, bacteria=False): transformout=numpy.exp, ) else: - wnf = viz.whiskers_and_fliers( - self.data, self.pctl25, self.pctl75, transformout=None - ) + wnf = viz.whiskers_and_fliers(self.data, self.pctl25, self.pctl75, transformout=None) bxpstats.update(wnf) return [bxpstats] @@ -460,7 +446,7 @@ def boxplot( patch_artist=False, xlims=None, ): - """ Draws a boxplot and whisker on a matplotlib figure + """Draws a boxplot and whisker on a matplotlib figure Parameters ---------- @@ -544,9 +530,9 @@ def probplot( clearYLabels=False, rotateticklabels=True, bestfit=False, - **plotopts + **plotopts, ): - """ Draws a probability plot on a matplotlib figure + """Draws a probability plot on a matplotlib figure Parameters ---------- @@ -623,9 +609,9 @@ def statplot( xlabel=None, axtype="prob", patch_artist=False, - **plotopts + **plotopts, ): - """ Creates a two-axis figure with a boxplot & probability plot. + """Creates a two-axis figure with a boxplot & probability plot. Parameters ---------- @@ -686,12 +672,7 @@ def statplot( ) self.probplot( - ax=ax2, - yscale=yscale, - axtype=axtype, - ylabel=None, - clearYLabels=True, - **plotopts + ax=ax2, yscale=yscale, axtype=axtype, ylabel=None, clearYLabels=True, **plotopts ) ax1.yaxis.tick_left() @@ -702,7 +683,7 @@ def statplot( def verticalScatter( self, ax=None, pos=1, ylabel=None, yscale="log", ignoreROS=True, markersize=6 ): - """ Draws a clustered & jittered scatter plot of the data + """Draws a clustered & jittered scatter plot of the data Parameters ---------- @@ -773,8 +754,8 @@ def verticalScatter( return fig -class Dataset(object): - """ Dataset: object for comparings two Location objects +class Dataset: + """Dataset: object for comparings two Location objects Parameters ---------- @@ -797,7 +778,6 @@ class Dataset(object): # not the other way around. This will allow Dataset.influent = None # by passing in a dataframe where df.shape[0] == 0 def __init__(self, influent, effluent, useros=True, name=None): - # basic attributes self.influent = influent self.effluent = effluent @@ -847,12 +827,10 @@ def _paired_stats(self): return self._non_paired_stats and self.paired_data.shape[0] > 20 def __repr__(self): - x = "\n N influent {0}\n N effluent = {1}".format( - self.influent.N, self.effluent.N - ) + x = f"\n N influent {self.influent.N}\n N effluent = {self.effluent.N}" if self.definition is not None: for k, v in self.definition.items(): - x = "{0}\n {1} = {2}".format(x, k.title(), v) + x = f"{x}\n {k.title()} = {v}" return x @property @@ -1075,9 +1053,7 @@ def _mannwhitney_stats(self): @cache_readonly def _kendall_stats(self): if self._paired_stats: - return stats.kendalltau( - self.paired_data.inflow.res, self.paired_data.outflow.res - ) + return stats.kendalltau(self.paired_data.inflow.res, self.paired_data.outflow.res) @cache_readonly def _spearman_stats(self): @@ -1171,7 +1147,7 @@ def boxplot( offset=0.5, patch_artist=False, ): - """ Adds a boxplot to a matplotlib figure + """Adds a boxplot to a matplotlib figure Parameters ---------- @@ -1268,7 +1244,7 @@ def probplot( rotateticklabels=True, bestfit=False, ): - """ Adds probability plots to a matplotlib figure + """Adds probability plots to a matplotlib figure Parameters ---------- @@ -1316,7 +1292,7 @@ def probplot( xlabels = { "pp": "Theoretical percentiles", "qq": "Theoretical quantiles", - "prob": "Non-exceedance probability (\%)", + "prob": r"Non-exceedance probability (\%)", } ax.set_xlabel(xlabels[axtype]) @@ -1408,7 +1384,7 @@ def statplot( return fig def jointplot(self, hist=False, kde=True, rug=True, **scatter_kws): - """ Create a joint distribution plot for the dataset + """Create a joint distribution plot for the dataset Parameters ---------- @@ -1466,9 +1442,9 @@ def scatterplot( eqn_pos="lower right", equal_scales=True, fitopts=None, - **markeropts + **markeropts, ): - """ Creates an influent/effluent scatter plot + """Creates an influent/effluent scatter plot Parameters ---------- @@ -1506,9 +1482,7 @@ def scatterplot( ax.set_yscale(yscale) # common symbology - commonopts = dict( - linestyle="none", markeredgewidth=0.5, markersize=6, zorder=10 - ) + commonopts = dict(linestyle="none", markeredgewidth=0.5, markersize=6, zorder=10) # plot the ROSd'd result, if requested if useros: @@ -1622,9 +1596,7 @@ def scatterplot( try: txt_x, txt_y = positions.get(eqn_pos.lower()) except KeyError: - raise ValueError( - "`eqn_pos` must be on of {}".format(list.positions.keys()) - ) + raise ValueError(f"`eqn_pos` must be on of {list.positions.keys()}") # annotate axes with stats ax.annotate( diff --git a/wqio/hydro.py b/wqio/hydro.py index 23e31d8c..f10ca3e7 100644 --- a/wqio/hydro.py +++ b/wqio/hydro.py @@ -1,18 +1,13 @@ import warnings import numpy -from matplotlib import pyplot -from matplotlib import dates -from matplotlib import gridspec -import seaborn import pandas - -from wqio import utils -from wqio import viz -from wqio import validate - +import seaborn +from matplotlib import dates, gridspec, pyplot from pandas.plotting import register_matplotlib_converters +from wqio import utils, validate, viz + register_matplotlib_converters() SEC_PER_MINUTE = 60.0 @@ -133,9 +128,7 @@ def parse_storm_events( data.resample(freq) .agg(agg_dict) .loc[:, lambda df: df.columns.isin(cols_to_use)] - .assign( - __wet=lambda df: numpy.any(df[water_columns] > 0, axis=1) & ~df[baseflowcol] - ) + .assign(__wet=lambda df: numpy.any(df[water_columns] > 0, axis=1) & ~df[baseflowcol]) .assign(__windiff=lambda df: _wet_window_diff(df["__wet"], ie_periods)) .pipe(_wet_first_row, "__wet", "__windiff") .assign(__event_start=lambda df: df["__windiff"] == 1) @@ -156,8 +149,8 @@ def parse_storm_events( return res -class Storm(object): - """ Object representing a storm event +class Storm: + """Object representing a storm event Parameters ---------- @@ -189,7 +182,6 @@ def __init__( freqMinutes=5, volume_conversion=1, ): - self.inflowcol = inflowcol self.outflowcol = outflowcol self.precipcol = precipcol @@ -200,7 +192,7 @@ def __init__( # basic data self.data = dataframe[dataframe[stormcol] == self.stormnumber].copy() - self.hydrofreq_label = "{0} min".format(self.freqMinutes) + self.hydrofreq_label = f"{self.freqMinutes} min" # tease out start/stop info self.start = self.data.index[0] @@ -216,9 +208,7 @@ def __init__( prev_storm_mask = dataframe[stormcol] == self.stormnumber - 1 previous_end = dataframe[prev_storm_mask].index[-1] antecedent_timedelta = self.start - previous_end - self.antecedent_period_days = ( - antecedent_timedelta.total_seconds() / SEC_PER_DAY - ) + self.antecedent_period_days = antecedent_timedelta.total_seconds() / SEC_PER_DAY else: self.antecedent_period_days = numpy.nan @@ -316,9 +306,7 @@ def inflow(self): def outflow(self): if self._outflow is None: if self.outflowcol is not None: - self._outflow = self.data[self.data[self.outflowcol] > 0][ - self.outflowcol - ] + self._outflow = self.data[self.data[self.outflowcol] > 0][self.outflowcol] else: self._outflow = numpy.array([]) return self._outflow @@ -388,9 +376,7 @@ def _peak_depth(self): @property def peak_precip_intensity(self): if self._peak_precip_intensity is None and self.has_precip: - self._peak_precip_intensity = ( - self._peak_depth * MIN_PER_HOUR / self.freqMinutes - ) + self._peak_precip_intensity = self._peak_depth * MIN_PER_HOUR / self.freqMinutes return self._peak_precip_intensity @property @@ -414,17 +400,13 @@ def total_precip_depth(self): @property def total_inflow_volume(self): if self._total_inflow_volume is None and self.has_inflow: - self._total_inflow_volume = ( - self.data[self.inflowcol].sum() * self.volume_conversion - ) + self._total_inflow_volume = self.data[self.inflowcol].sum() * self.volume_conversion return self._total_inflow_volume @property def total_outflow_volume(self): if self._total_outflow_volume is None and self.has_outflow: - self._total_outflow_volume = ( - self.data[self.outflowcol].sum() * self.volume_conversion - ) + self._total_outflow_volume = self.data[self.outflowcol].sum() * self.volume_conversion return self._total_outflow_volume @property @@ -486,7 +468,6 @@ def peak_lag_hours(self): and self.peak_outflow_time is not None and self.peak_inflow_time is not None ): - time_delta = self.peak_outflow_time - self.peak_inflow_time self._peak_lag_hours = time_delta.total_seconds() / SEC_PER_HOUR return self._peak_lag_hours @@ -514,7 +495,7 @@ def summary_dict(self): return self._summary_dict def is_small(self, minprecip=0.0, mininflow=0.0, minoutflow=0.0): - """ Determines whether a storm can be considered "small". + """Determines whether a storm can be considered "small". Parameters ---------- @@ -530,18 +511,9 @@ def is_small(self, minprecip=0.0, mininflow=0.0, minoutflow=0.0): """ storm_is_small = ( - ( - self.total_precip_depth is not None - and self.total_precip_depth < minprecip - ) - or ( - self.total_inflow_volume is not None - and self.total_inflow_volume < mininflow - ) - or ( - self.total_outflow_volume is not None - and self.total_outflow_volume < minoutflow - ) + (self.total_precip_depth is not None and self.total_precip_depth < minprecip) + or (self.total_inflow_volume is not None and self.total_inflow_volume < mininflow) + or (self.total_outflow_volume is not None and self.total_outflow_volume < minoutflow) ) return storm_is_small @@ -549,7 +521,7 @@ def _get_event_time(self, column, bound): index_map = {"start": 0, "end": -1} quantity = self.data[self.data[column] > 0] if quantity.shape[0] == 0: - warnings.warn("Storm has no {}".format(column), UserWarning) + warnings.warn(f"Storm has no {column}", UserWarning) else: return quantity.index[index_map[bound]] @@ -558,13 +530,9 @@ def _get_max_quantity(self, column): def _compute_centroid(self, column): # ordinal time index of storm - time_idx = [ - dates.date2num(idx.to_pydatetime()) for idx in self.data.index.tolist() - ] + time_idx = [dates.date2num(idx.to_pydatetime()) for idx in self.data.index.tolist()] - centroid = numpy.sum(self.data[column] * time_idx) / numpy.sum( - self.data[column] - ) + centroid = numpy.sum(self.data[column] * time_idx) / numpy.sum(self.data[column]) if numpy.isnan(centroid): return None @@ -572,7 +540,6 @@ def _compute_centroid(self, column): return pandas.Timestamp(dates.num2date(centroid)).tz_convert(None) def _plot_centroids(self, ax, yfactor=0.5): - artists = [] labels = [] y_val = yfactor * ax.get_ylim()[1] @@ -631,10 +598,8 @@ def _plot_centroids(self, ax, yfactor=0.5): return artists, labels - def plot_hydroquantity( - self, quantity, ax=None, label=None, otherlabels=None, artists=None - ): - """ Draws a hydrologic quantity to a matplotlib axes. + def plot_hydroquantity(self, quantity, ax=None, label=None, otherlabels=None, artists=None): + """Draws a hydrologic quantity to a matplotlib axes. Parameters ---------- @@ -675,7 +640,7 @@ def plot_hydroquantity( try: meta = self.meta[quantity] except KeyError: - raise KeyError("{} not available".format(quantity)) + raise KeyError(f"{quantity} not available") # plot the data self.data[quantity].fillna(0).plot( @@ -724,9 +689,7 @@ def summaryPlot( None """ fig = pyplot.figure(**figopts) - gs = gridspec.GridSpec( - nrows=2, ncols=1, height_ratios=[1, axratio], hspace=0.12 - ) + gs = gridspec.GridSpec(nrows=2, ncols=1, height_ratios=[1, axratio], hspace=0.12) rainax = fig.add_subplot(gs[0]) rainax.yaxis.set_major_locator(pyplot.MaxNLocator(5)) flowax = fig.add_subplot(gs[1], sharex=rainax) @@ -801,8 +764,8 @@ def summaryPlot( return fig, artists, labels -class HydroRecord(object): - """ Class representing an entire hydrologic record. +class HydroRecord: + """Class representing an entire hydrologic record. Parameters ---------- @@ -861,7 +824,6 @@ def __init__( stormclass=None, lowmem=False, ): - # validate input if precipcol is None and inflowcol is None and outflowcol is None: msg = "`hydrodata` must have at least a precip or in/outflow column" @@ -957,16 +919,13 @@ def storm_stats(self): "Centroid Lag Hours", ] if self._storm_stats is None: - storm_stats = pandas.DataFrame( - [self.storms[sn].summary_dict for sn in self.storms] - ) + storm_stats = pandas.DataFrame([self.storms[sn].summary_dict for sn in self.storms]) self._storm_stats = storm_stats[col_order] return self._storm_stats.sort_values(by=["Storm Number"]).reset_index(drop=True) def _define_storms(self, debug=False): - parsed = parse_storm_events( self._raw_data, self.intereventHours, @@ -981,7 +940,7 @@ def _define_storms(self, debug=False): return parsed def getStormFromTimestamp(self, timestamp, lookback_hours=0, smallstorms=False): - """ Get the storm associdated with a give (sample) date + """Get the storm associdated with a give (sample) date Parameters ---------- @@ -1019,12 +978,7 @@ def getStormFromTimestamp(self, timestamp, lookback_hours=0, smallstorms=False): storms = self.data.loc[lookback_time:timestamp, [self.stormcol]] storms = storms[storms > 0].dropna() - if storms.shape[0] == 0: - # no storm - storm_number = None - else: - # storm w/i the lookback period - storm_number = int(storms.iloc[-1]) + storm_number = None if storms.shape[0] == 0 else int(storms.iloc[-1]) # return storm_number and storms if smallstorms: @@ -1033,7 +987,7 @@ def getStormFromTimestamp(self, timestamp, lookback_hours=0, smallstorms=False): return storm_number, self.storms.get(storm_number, None) def histogram(self, valuecol, bins, **factoropts): - """ Plot a faceted, categorical histogram of storms. + """Plot a faceted, categorical histogram of storms. Parameters ---------- @@ -1060,9 +1014,9 @@ def histogram(self, valuecol, bins, **factoropts): return fg -class DrainageArea(object): +class DrainageArea: def __init__(self, total_area=1.0, imp_area=1.0, bmp_area=0.0): - """ A simple object representing the drainage area of a BMP. + """A simple object representing the drainage area of a BMP. Units are not enforced, so keep them consistent yourself. The calculations available assume that the area of the BMP and the @@ -1116,7 +1070,5 @@ def simple_method(self, storm_depth, volume_conversion=1.0, annual_factor=1.0): bmp_conversion = self.bmp_area * volume_conversion # total runoff based on actual storm depth - runoff_volume = ( - drainage_conversion * annual_factor + bmp_conversion - ) * storm_depth + runoff_volume = (drainage_conversion * annual_factor + bmp_conversion) * storm_depth return runoff_volume diff --git a/wqio/ros.py b/wqio/ros.py index 6550bc31..9ed53c75 100644 --- a/wqio/ros.py +++ b/wqio/ros.py @@ -1,13 +1,12 @@ -import warnings import logging +import warnings import numpy -from scipy import stats import pandas +from scipy import stats from wqio import utils - _logger = logging.getLogger(__name__) @@ -64,7 +63,7 @@ def _ros_sort(df, result, censorship, log=True, warn=False): def cohn_numbers(df, result, censorship): - """ + r""" Computes the Cohn numbers for the detection limits in the dataset. The Cohn Numbers are: @@ -100,8 +99,7 @@ def cohn_numbers(df, result, censorship): """ def nuncen_above(row): - """ A, the number of uncensored obs above the given threshold. - """ + """A, the number of uncensored obs above the given threshold.""" # index of results above the lower_dl DL above = df[result] >= row["lower_dl"] @@ -116,7 +114,7 @@ def nuncen_above(row): return df[above & below & detect].shape[0] def nobs_below(row): - """ B, the number of observations (cen & uncen) below the given + """B, the number of observations (cen & uncen) below the given threshold """ @@ -140,7 +138,7 @@ def nobs_below(row): return LTE_censored + LT_uncensored def ncen_equal(row): - """ C, the number of censored observations at the given + """C, the number of censored observations at the given threshold. """ @@ -150,15 +148,15 @@ def ncen_equal(row): return censored_below.sum() def set_upper_limit(cohn): - """ Sets the upper_dl DL for each row of the Cohn dataframe. """ + """Sets the upper_dl DL for each row of the Cohn dataframe.""" if cohn.shape[0] > 1: return cohn["lower_dl"].shift(-1).fillna(value=numpy.inf) else: return [numpy.inf] def compute_PE(A, B): - """ Computes the probability of excedance for each row of the - Cohn dataframe. """ + """Computes the probability of excedance for each row of the + Cohn dataframe.""" N = len(A) PE = numpy.empty(N, dtype="float64") PE[-1] = 0.0 @@ -186,11 +184,7 @@ def compute_PE(A, B): .assign(nobs_below=lambda df: df.apply(nobs_below, axis=1)) .assign(ncen_equal=lambda df: df.apply(ncen_equal, axis=1)) .reindex(range(DLs.shape[0] + 1)) - .assign( - prob_exceedance=lambda df: compute_PE( - df["nuncen_above"], df["nobs_below"] - ) - ) + .assign(prob_exceedance=lambda df: compute_PE(df["nuncen_above"], df["nobs_below"])) ) else: @@ -208,7 +202,7 @@ def compute_PE(A, B): def _detection_limit_index(res, cohn): - """ Helper function to create an array of indices for the detection + """Helper function to create an array of indices for the detection limits (cohn) corresponding to each data point. Parameters @@ -262,9 +256,7 @@ def _ros_group_rank(df, dl_idx, censorship): """ ranks = ( - df.assign(rank=1) - .groupby(by=[dl_idx, censorship])["rank"] - .transform(lambda g: g.cumsum()) + df.assign(rank=1).groupby(by=[dl_idx, censorship])["rank"].transform(lambda g: g.cumsum()) ) return ranks @@ -364,7 +356,7 @@ def plotting_positions(df, censorship, cohn): def _ros_estimate(df, result, censorship, transform_in, transform_out): - """ Computed the estimated censored from the best-fit line of a + """Computed the estimated censored from the best-fit line of a probability plot of the uncensored values. Parameters @@ -467,9 +459,7 @@ def _do_ros( modeled = ( df.pipe(_ros_sort, result=result, censorship=censorship, log=log, warn=warn) .assign( - det_limit_index=lambda df: df[result].apply( - _detection_limit_index, args=(cohn,) - ) + det_limit_index=lambda df: df[result].apply(_detection_limit_index, args=(cohn,)) ) .assign(rank=lambda df: _ros_group_rank(df, "det_limit_index", censorship)) .assign(plot_pos=lambda df: plotting_positions(df, censorship, cohn)) @@ -478,16 +468,12 @@ def _do_ros( ) if floor: - modeled = modeled.assign( - final=modeled["final"].where(lambda x: x >= floor, floor) - ) + modeled = modeled.assign(final=modeled["final"].where(lambda x: x >= floor, floor)) return modeled -def is_valid_to_ros( - df, censorship, max_fraction_censored=0.8, min_uncensored=2, as_obj=False -): +def is_valid_to_ros(df, censorship, max_fraction_censored=0.8, min_uncensored=2, as_obj=False): # basic counts/metrics of the dataset N_observations = df.shape[0] N_censored = df[censorship].astype(int).sum() @@ -611,9 +597,7 @@ def ROS( # substitute w/ fraction of the DLs if there's insufficient # uncensored data else: - final = numpy.where( - df[censorship], df[result] * substitution_fraction, df[result] - ) + final = numpy.where(df[censorship], df[result] * substitution_fraction, df[result]) output = df.assign(final=final)[[result, censorship, "final"]] # convert to an array if necessary diff --git a/wqio/samples.py b/wqio/samples.py index ee045db7..790f81b9 100644 --- a/wqio/samples.py +++ b/wqio/samples.py @@ -1,17 +1,16 @@ -from matplotlib import pyplot -import seaborn import pandas +import seaborn +from matplotlib import pyplot +from pandas.plotting import register_matplotlib_converters from wqio import utils -from pandas.plotting import register_matplotlib_converters - register_matplotlib_converters() -class Parameter(object): +class Parameter: def __init__(self, name, units, usingTex=False): - """ Class representing a single analytical parameter (pollutant). + """Class representing a single analytical parameter (pollutant). (Input) Parameters ------------------ @@ -57,7 +56,7 @@ def usingTex(self, value): raise ValueError("`usingTex` must be of type `bool`") def paramunit(self, usecomma=False): - """ Creates a string representation of the parameter and units. + """Creates a string representation of the parameter and units. Parameters ---------- @@ -67,10 +66,7 @@ def paramunit(self, usecomma=False): format is " ()". """ - if usecomma: - paramunit = "{0}, {1}" - else: - paramunit = "{0} ({1})" + paramunit = "{0}, {1}" if usecomma else "{0} ({1})" n = self.name u = self.units @@ -78,13 +74,13 @@ def paramunit(self, usecomma=False): return paramunit.format(n, u) def __repr__(self): - return " ({})".format(self.paramunit(usecomma=False)) + return f" ({self.paramunit(usecomma=False)})" def __str__(self): - return " ({})".format(self.paramunit(usecomma=False)) + return f" ({self.paramunit(usecomma=False)})" -class SampleMixin(object): +class SampleMixin: def __init__( self, dataframe, @@ -97,7 +93,6 @@ def __init__( dlcol="DL", unitscol="units", ): - self._wqdata = dataframe self._startime = pandas.Timestamp(starttime) self._endtime = pandas.Timestamp(endtime) @@ -185,10 +180,7 @@ def yfactor(self, value): def plot_ts(self, ax, isFocus=True, asrug=False): if self.sample_ts is not None: - if isFocus: - alpha = 0.75 - else: - alpha = 0.35 + alpha = 0.75 if isFocus else 0.35 ymax = ax.get_ylim()[-1] yposition = [self.yfactor * ymax] * len(self.sample_ts) @@ -233,7 +225,7 @@ def plot_ts(self, ax, isFocus=True, asrug=False): class CompositeSample(SampleMixin): - """ Class for composite samples """ + """Class for composite samples""" @property def label(self): @@ -266,7 +258,7 @@ def sample_ts(self): class GrabSample(SampleMixin): - """ Class for grab (discrete) samples """ + """Class for grab (discrete) samples""" @property def label(self): diff --git a/wqio/tests/__init__.py b/wqio/tests/__init__.py index ee77a97a..179885ce 100644 --- a/wqio/tests/__init__.py +++ b/wqio/tests/__init__.py @@ -1,7 +1,8 @@ -from pkg_resources import resource_filename import warnings -from .helpers import requires +from pkg_resources import resource_filename + +from wqio.tests.helpers import requires try: import pytest diff --git a/wqio/tests/helpers.py b/wqio/tests/helpers.py index 694ad776..83099849 100644 --- a/wqio/tests/helpers.py +++ b/wqio/tests/helpers.py @@ -1,19 +1,18 @@ +import difflib import distutils -import sys -import subprocess -import re import os -import difflib -from functools import wraps -from pkg_resources import resource_filename -from io import StringIO +import re +import subprocess +import sys from collections import namedtuple from contextlib import contextmanager +from functools import wraps +from io import StringIO import numpy import pandas - import pytest +from pkg_resources import resource_filename def get_img_tolerance(): @@ -21,7 +20,7 @@ def get_img_tolerance(): def seed(func): - """ Decorator to seed the RNG before any function. """ + """Decorator to seed the RNG before any function.""" @wraps(func) def wrapper(*args, **kwargs): @@ -52,9 +51,7 @@ def outer_wrapper(function): @wraps(function) def inner_wrapper(*args, **kwargs): if module is None: - raise RuntimeError( - "{} required for `{}`".format(modulename, function.__name__) - ) + raise RuntimeError(f"{modulename} required for `{function.__name__}`") else: return function(*args, **kwargs) @@ -123,9 +120,7 @@ def make_dc_data_complex(dropsome=True): ) xtab = ( - pandas.DataFrame(index=index, columns=["res"]) - .unstack(level="param") - .unstack(level="state") + pandas.DataFrame(index=index, columns=["res"]).unstack(level="param").unstack(level="state") ) xtab_rows = xtab.shape[0] @@ -196,17 +191,15 @@ def compare_versions(utility="latex"): # pragma: no cover if present: present = distutils.version.LooseVersion(present) required = distutils.version.LooseVersion(required) - if present >= required: - return True - else: - return False + return present >= required + else: return False def _show_package_info(package, name): # pragma: no cover packagedir = os.path.dirname(package.__file__) - print("%s version %s is installed in %s" % (name, package.__version__, packagedir)) + print(f"{name} version {package.__version__} is installed in {packagedir}") def _show_system_info(): # pragma: no cover @@ -249,11 +242,9 @@ def byte2str(b): return b try: - s = subprocess.Popen( - ["tex", "-version"], stdout=subprocess.PIPE, stderr=subprocess.PIPE - ) + s = subprocess.Popen(["tex", "-version"], stdout=subprocess.PIPE, stderr=subprocess.PIPE) line = byte2str(s.stdout.readlines()[0]) - pattern = "3\.1\d+" + pattern = r"3\.1\d+" match = re.search(pattern, line) v = match.group(0) return v diff --git a/wqio/tests/test_bootstrap.py b/wqio/tests/test_bootstrap.py index 4284d279..8497baf7 100644 --- a/wqio/tests/test_bootstrap.py +++ b/wqio/tests/test_bootstrap.py @@ -1,10 +1,9 @@ -import pytest -import numpy.testing as nptest -from wqio.tests import helpers - import numpy +import numpy.testing as nptest +import pytest from wqio import bootstrap +from wqio.tests import helpers @pytest.fixture @@ -42,9 +41,7 @@ def test_bootstrappers(testdata, bootstrapper, known_ci): def test_fit(uselog): N = 10 x = numpy.arange(1, N + 1, dtype=float) - y = numpy.array( - [4.527, 3.519, 9.653, 8.036, 10.805, 14.329, 13.508, 11.822, 13.281, 10.410] - ) + y = numpy.array([4.527, 3.519, 9.653, 8.036, 10.805, 14.329, 13.508, 11.822, 13.281, 10.410]) bsfit = bootstrap.fit( x, y, numpy.polyfit, niter=2000, xlog=uselog, ylog=uselog, deg=1, full=False ) diff --git a/wqio/tests/test_datacollections.py b/wqio/tests/test_datacollections.py index 81777b7b..bfb5dbef 100644 --- a/wqio/tests/test_datacollections.py +++ b/wqio/tests/test_datacollections.py @@ -1,20 +1,18 @@ from distutils.version import LooseVersion -from textwrap import dedent from io import StringIO +from textwrap import dedent +from unittest import mock import numpy -import scipy -from scipy import stats import pandas - -from unittest import mock -import pytest import pandas.testing as pdtest -from wqio.tests import helpers +import pytest +import scipy +from scipy import stats -from wqio.features import Location, Dataset from wqio.datacollections import DataCollection, _dist_compare - +from wqio.features import Dataset, Location +from wqio.tests import helpers OLD_SCIPY = LooseVersion(scipy.version.version) < LooseVersion("0.19") @@ -642,7 +640,9 @@ def test_inventory_noNDs(dc_noNDs): ) expected = pandas.read_csv(known_csv, index_col=[0, 1]).astype(int) pdtest.assert_frame_equal( - expected, dc_noNDs.inventory.astype(int), check_names=False, + expected, + dc_noNDs.inventory.astype(int), + check_names=False, ) @@ -751,17 +751,17 @@ def test_selectLocations_squeeze_True_None(dc): # since the test_selectLocations* tests stress _filter_collection # enough, we'll mock it out for datasets: def test_selectDatasets(dc): - with mock.patch.object(dc, "_filter_collection") as _fc: - with mock.patch.object(dc, "datasets", return_value=["A", "B"]) as _ds: - dc.selectDatasets("Inflow", "Reference", foo="A", bar="C") - _ds.assert_called_once_with("Inflow", "Reference") - _fc.assert_called_once_with(["A", "B"], foo="A", bar="C", squeeze=False) + with ( + mock.patch.object(dc, "_filter_collection") as _fc, + mock.patch.object(dc, "datasets", return_value=["A", "B"]) as _ds, + ): + dc.selectDatasets("Inflow", "Reference", foo="A", bar="C") + _ds.assert_called_once_with("Inflow", "Reference") + _fc.assert_called_once_with(["A", "B"], foo="A", bar="C", squeeze=False) @pytest.mark.parametrize("func", [stats.mannwhitneyu, stats.wilcoxon]) -@pytest.mark.parametrize( - ("x", "all_same"), [([5, 5, 5, 5, 5], True), ([5, 6, 7, 7, 8], False)] -) +@pytest.mark.parametrize(("x", "all_same"), [([5, 5, 5, 5, 5], True), ([5, 6, 7, 7, 8], False)]) def test_dist_compare_wrapper(x, all_same, func): y = [5, 5, 5, 5, 5] with mock.patch.object(stats, func.__name__) as _test: diff --git a/wqio/tests/test_features.py b/wqio/tests/test_features.py index 0dc3c3c2..cc8c222b 100644 --- a/wqio/tests/test_features.py +++ b/wqio/tests/test_features.py @@ -1,14 +1,12 @@ from distutils.version import LooseVersion -import pytest import numpy.testing as nptest -from wqio.tests import helpers - -import scipy import pandas +import pytest +import scipy -from wqio.features import Location, Dataset - +from wqio.features import Dataset, Location +from wqio.tests import helpers OLD_SCIPY = LooseVersion(scipy.version.version) < LooseVersion("0.19") TOLERANCE = 0.05 @@ -195,13 +193,9 @@ def test_location_anderson(location, attr, index): } result = expected[attr][location.useros][index] if index in [0, 4]: - nptest.assert_approx_equal( - getattr(location, attr)[index], result, significant=5 - ) + nptest.assert_approx_equal(getattr(location, attr)[index], result, significant=5) elif index != 3: - nptest.assert_array_almost_equal( - getattr(location, attr)[index], result, decimal=5 - ) + nptest.assert_array_almost_equal(getattr(location, attr)[index], result, decimal=5) @pytest.fixture @@ -294,33 +288,24 @@ def test_wilcoxon(dataset): known_wilcoxon_p = known_wilcoxon_stats[1] assert hasattr(dataset, "wilcoxon_z") - nptest.assert_allclose( - dataset.wilcoxon_z, known_wilcoxon_z, rtol=TOLERANCE - ) + nptest.assert_allclose(dataset.wilcoxon_z, known_wilcoxon_z, rtol=TOLERANCE) assert hasattr(dataset, "wilcoxon_p") - nptest.assert_allclose( - dataset.wilcoxon_p, known_wilcoxon_p, rtol=TOLERANCE - ) + nptest.assert_allclose(dataset.wilcoxon_p, known_wilcoxon_p, rtol=TOLERANCE) assert hasattr(dataset, "_wilcoxon_stats") - nptest.assert_allclose( - dataset._wilcoxon_stats, known_wilcoxon_stats, rtol=TOLERANCE - ) + nptest.assert_allclose(dataset._wilcoxon_stats, known_wilcoxon_stats, rtol=TOLERANCE) + def test_mannwhitney(dataset): known_mannwhitney_stats = (927.0, 2.251523e-04) known_mannwhitney_u = known_mannwhitney_stats[0] known_mannwhitney_p = known_mannwhitney_stats[1] assert hasattr(dataset, "mannwhitney_u") - nptest.assert_allclose( - dataset.mannwhitney_u, known_mannwhitney_u, rtol=TOLERANCE - ) + nptest.assert_allclose(dataset.mannwhitney_u, known_mannwhitney_u, rtol=TOLERANCE) assert hasattr(dataset, "mannwhitney_p") - nptest.assert_allclose( - dataset.mannwhitney_p, known_mannwhitney_p, rtol=TOLERANCE - ) + nptest.assert_allclose(dataset.mannwhitney_p, known_mannwhitney_p, rtol=TOLERANCE) assert hasattr(dataset, "_mannwhitney_stats") nptest.assert_allclose( @@ -329,42 +314,35 @@ def test_mannwhitney(dataset): rtol=TOLERANCE, ) + @pytest.mark.xfail(OLD_SCIPY, reason="Scipy < 0.19") def test_kendall(dataset): known_kendall_stats = (1.00, 5.482137e-17) known_kendall_tau = known_kendall_stats[0] known_kendall_p = known_kendall_stats[1] assert hasattr(dataset, "kendall_tau") - nptest.assert_allclose( - dataset.kendall_tau, known_kendall_tau, rtol=TOLERANCE - ) + nptest.assert_allclose(dataset.kendall_tau, known_kendall_tau, rtol=TOLERANCE) assert hasattr(dataset, "kendall_p") - nptest.assert_allclose( - dataset.kendall_p, known_kendall_p, rtol=TOLERANCE - ) + nptest.assert_allclose(dataset.kendall_p, known_kendall_p, rtol=TOLERANCE) assert hasattr(dataset, "_kendall_stats") - nptest.assert_allclose( - dataset._kendall_stats, known_kendall_stats, rtol=TOLERANCE - ) + nptest.assert_allclose(dataset._kendall_stats, known_kendall_stats, rtol=TOLERANCE) + def test_spearman(dataset): known_spearman_stats = (1.0, 0.0) known_spearman_rho = known_spearman_stats[0] known_spearman_p = known_spearman_stats[1] assert hasattr(dataset, "spearman_rho") - nptest.assert_allclose( - dataset.spearman_rho, known_spearman_rho, atol=0.0001 - ) + nptest.assert_allclose(dataset.spearman_rho, known_spearman_rho, atol=0.0001) assert hasattr(dataset, "spearman_p") nptest.assert_allclose(dataset.spearman_p, known_spearman_p, atol=0.0001) assert hasattr(dataset, "_spearman_stats") - nptest.assert_allclose( - dataset._spearman_stats, known_spearman_stats, atol=0.0001 - ) + nptest.assert_allclose(dataset._spearman_stats, known_spearman_stats, atol=0.0001) + def test_theil(dataset): known_theil_stats = (1.0, -4.5, 1.0, 1.0) @@ -373,41 +351,25 @@ def test_theil(dataset): known_theil_loslope = known_theil_stats[2] known_theil_medslope = known_theil_stats[3] assert hasattr(dataset, "theil_medslope") - nptest.assert_allclose( - dataset.theil_medslope, known_theil_medslope, rtol=TOLERANCE - ) + nptest.assert_allclose(dataset.theil_medslope, known_theil_medslope, rtol=TOLERANCE) assert hasattr(dataset, "theil_intercept") - nptest.assert_allclose( - dataset.theil_intercept, known_theil_intercept, rtol=TOLERANCE - ) + nptest.assert_allclose(dataset.theil_intercept, known_theil_intercept, rtol=TOLERANCE) assert hasattr(dataset, "theil_loslope") - nptest.assert_allclose( - dataset.theil_loslope, known_theil_loslope, rtol=TOLERANCE - ) + nptest.assert_allclose(dataset.theil_loslope, known_theil_loslope, rtol=TOLERANCE) assert hasattr(dataset, "theil_hislope") - nptest.assert_allclose( - dataset.theil_hislope, known_theil_hislope, rtol=TOLERANCE - ) + nptest.assert_allclose(dataset.theil_hislope, known_theil_hislope, rtol=TOLERANCE) assert hasattr(dataset, "_theil_stats") - nptest.assert_almost_equal( - dataset._theil_stats["medslope"], known_theil_stats[0], decimal=4 - ) + nptest.assert_almost_equal(dataset._theil_stats["medslope"], known_theil_stats[0], decimal=4) - nptest.assert_almost_equal( - dataset._theil_stats["intercept"], known_theil_stats[1], decimal=4 - ) + nptest.assert_almost_equal(dataset._theil_stats["intercept"], known_theil_stats[1], decimal=4) - nptest.assert_almost_equal( - dataset._theil_stats["loslope"], known_theil_stats[2], decimal=4 - ) + nptest.assert_almost_equal(dataset._theil_stats["loslope"], known_theil_stats[2], decimal=4) - nptest.assert_almost_equal( - dataset._theil_stats["hislope"], known_theil_stats[3], decimal=4 - ) + nptest.assert_almost_equal(dataset._theil_stats["hislope"], known_theil_stats[3], decimal=4) assert not dataset._theil_stats["is_inverted"] @@ -415,14 +377,15 @@ def test_theil(dataset): assert "estimate_error" in list(dataset._theil_stats.keys()) - def test_medianCIsOverlap(dataset): known_medianCIsOverlap = False assert known_medianCIsOverlap == dataset.medianCIsOverlap + def test__repr__normal(dataset): dataset.__repr__ + def test_repr__None(dataset): dataset.definition = None dataset.__repr__ diff --git a/wqio/tests/test_hydro.py b/wqio/tests/test_hydro.py index 86bc0d48..01d9b81f 100644 --- a/wqio/tests/test_hydro.py +++ b/wqio/tests/test_hydro.py @@ -1,13 +1,11 @@ -import pytest -import pandas.testing as pdtest -from wqio.tests import helpers - import numpy import pandas +import pandas.testing as pdtest +import pytest from matplotlib import pyplot from wqio import hydro - +from wqio.tests import helpers BASELINE_IMAGES = "_baseline_images/hydro_tests" TOLERANCE = helpers.get_img_tolerance() @@ -213,9 +211,7 @@ def test_HydroRecord_attr(hr_simple_fixture): assert isinstance(hr_simple_fixture.data, pandas.DataFrame) assert isinstance(hr_simple_fixture.data.index, pandas.DatetimeIndex) - assert sorted(hr_simple_fixture.data.columns.tolist()) == sorted( - expected_std_columns - ) + assert sorted(hr_simple_fixture.data.columns.tolist()) == sorted(expected_std_columns) assert isinstance(hr_simple_fixture.all_storms, dict) @@ -354,9 +350,7 @@ def test_HydroRecord_storm_stats(hr_simple_fixture): @pytest.mark.parametrize("smallstorms", [True, False]) def test_getStormFromTimestamp_(hr_simple_fixture, value, smallstorms, error): with helpers.raises(error): - sn, storm = hr_simple_fixture.getStormFromTimestamp( - value, smallstorms=smallstorms - ) + sn, storm = hr_simple_fixture.getStormFromTimestamp(value, smallstorms=smallstorms) assert sn == 2 if not smallstorms: assert storm is None @@ -499,9 +493,7 @@ def test_Storm_peak_outflow(basic_storm): def test_Storm_peak_precip_intensity_time(basic_storm): assert hasattr(basic_storm, "peak_precip_intensity_time") ts = pandas.Timestamp("2013-05-19 08:00") - assert basic_storm.peak_precip_intensity_time.strftime("%X %x") == ts.strftime( - "%X %x" - ) + assert basic_storm.peak_precip_intensity_time.strftime("%X %x") == ts.strftime("%X %x") def test_Storm_peak_inflow_time(basic_storm): @@ -644,9 +636,7 @@ def test_da_bmp_area(drainage_area): ) def test_simple_method(drainage_area, conversion, factor, expected): depth = 1 - result = drainage_area.simple_method( - depth, annual_factor=factor, volume_conversion=conversion - ) + result = drainage_area.simple_method(depth, annual_factor=factor, volume_conversion=conversion) area = drainage_area.total_area + drainage_area.bmp_area storm_volume = depth * conversion * factor * area assert abs(expected - result) < 0.001 diff --git a/wqio/tests/test_ros.py b/wqio/tests/test_ros.py index 4ae3433f..fa3cadba 100644 --- a/wqio/tests/test_ros.py +++ b/wqio/tests/test_ros.py @@ -1,15 +1,14 @@ -from textwrap import dedent from io import StringIO - -import pytest -import numpy.testing as nptest -import pandas.testing as pdtest -from wqio.tests import helpers +from textwrap import dedent import numpy +import numpy.testing as nptest import pandas +import pandas.testing as pdtest +import pytest from wqio import ros +from wqio.tests import helpers @pytest.fixture @@ -559,9 +558,7 @@ def test_cohn_numbers_baseline(basic_data, expected_cohn): def test_cohn_numbers_no_NDs(basic_data): - result = ros.cohn_numbers( - basic_data.assign(qual=False), result="conc", censorship="qual" - ) + result = ros.cohn_numbers(basic_data.assign(qual=False), result="conc", censorship="qual") assert result.shape == (0, 6) @@ -1345,15 +1342,9 @@ class HalfDLs_80pctNDs: cohn = pandas.DataFrame( { - "nuncen_above": numpy.array( - [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, numpy.nan] - ), - "nobs_below": numpy.array( - [6.0, 7.0, 8.0, 9.0, 12.0, 13.0, 14.0, 15.0, numpy.nan] - ), - "ncen_equal": numpy.array( - [6.0, 1.0, 1.0, 1.0, 3.0, 1.0, 1.0, 1.0, numpy.nan] - ), + "nuncen_above": numpy.array([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, numpy.nan]), + "nobs_below": numpy.array([6.0, 7.0, 8.0, 9.0, 12.0, 13.0, 14.0, 15.0, numpy.nan]), + "ncen_equal": numpy.array([6.0, 1.0, 1.0, 1.0, 3.0, 1.0, 1.0, 1.0, numpy.nan]), "prob_exceedance": numpy.array([0.1667] * 8 + [0.0]), } ) @@ -1480,9 +1471,7 @@ def test_ros_from_literature(as_arrays, case): else: result = ros.ROS(case.rescol, case.cencol, df=case.df) - nptest.assert_array_almost_equal( - sorted(result), sorted(case.values), decimal=case.decimal - ) + nptest.assert_array_almost_equal(sorted(result), sorted(case.values), decimal=case.decimal) @pytest.mark.parametrize( @@ -1502,6 +1491,4 @@ def test_ros_from_literature(as_arrays, case): def test_cohn_from_literature(case): cols = ["nuncen_above", "nobs_below", "ncen_equal", "prob_exceedance"] result = ros.cohn_numbers(case.df, case.rescol, case.cencol) - pdtest.assert_frame_equal( - result[cols].round(5), case.cohn[cols].round(5), atol=1e-4 - ) + pdtest.assert_frame_equal(result[cols].round(5), case.cohn[cols].round(5), atol=1e-4) diff --git a/wqio/tests/test_samples.py b/wqio/tests/test_samples.py index 2dc8b198..fa7e70bb 100644 --- a/wqio/tests/test_samples.py +++ b/wqio/tests/test_samples.py @@ -1,11 +1,9 @@ -import pytest -from wqio.tests import helpers - import pandas +import pytest from matplotlib import pyplot from wqio import samples - +from wqio.tests import helpers BASELINE_IMAGES = "_baseline_images/samples_tests" TOLERANCE = helpers.get_img_tolerance() @@ -33,9 +31,7 @@ def grab_sample(sample_data): endtime = "2013-02-24 16:59" freq = None - wqs = samples.GrabSample( - sample_data, starttime, endtime=endtime, samplefreq=freq, storm=None - ) + wqs = samples.GrabSample(sample_data, starttime, endtime=endtime, samplefreq=freq, storm=None) wqs.marker = "D" wqs.markersize = 8 diff --git a/wqio/tests/test_theil.py b/wqio/tests/test_theil.py index 2ee5f8c5..59c5c8ff 100644 --- a/wqio/tests/test_theil.py +++ b/wqio/tests/test_theil.py @@ -1,10 +1,9 @@ import numpy - -import pytest import numpy.testing as nptest -from wqio.tests import helpers +import pytest from wqio import theil +from wqio.tests import helpers from wqio.utils import TheilStats @@ -262,9 +261,7 @@ def test_TheilSenFit_errors(ts): ] ), } - nptest.assert_array_almost_equal( - ts.errors, expected[(ts.log_infl, ts.log_effl)], decimal=5 - ) + nptest.assert_array_almost_equal(ts.errors, expected[(ts.log_infl, ts.log_effl)], decimal=5) def test_TheilSenFit_MAD(ts): diff --git a/wqio/tests/test_viz.py b/wqio/tests/test_viz.py index 428277b6..697f8736 100644 --- a/wqio/tests/test_viz.py +++ b/wqio/tests/test_viz.py @@ -1,16 +1,13 @@ -import pytest -import numpy.testing as nptest - import numpy +import numpy.testing as nptest import pandas -from matplotlib import pyplot +import pytest import seaborn +from matplotlib import pyplot -from wqio import viz -from wqio import utils +from wqio import utils, viz from wqio.tests import helpers - BASELINE_IMAGES = "_baseline_images/viz_tests" TOLERANCE = helpers.get_img_tolerance() seaborn.set(style="ticks") @@ -711,17 +708,13 @@ def test_jointplot_defaultlabels(jp_data): def test_jointplot_xlabeled(jp_data): - jg2 = viz.jointplot( - x="B", y="C", data=jp_data, one2one=False, color="g", xlabel="Quantity B" - ) + jg2 = viz.jointplot(x="B", y="C", data=jp_data, one2one=False, color="g", xlabel="Quantity B") assert jg2.ax_joint.get_xlabel() == "Quantity B" return jg2.fig def test_jointplot_ylabeled(jp_data): - jg3 = viz.jointplot( - x="B", y="C", data=jp_data, one2one=False, color="r", ylabel="Quantity C" - ) + jg3 = viz.jointplot(x="B", y="C", data=jp_data, one2one=False, color="r", ylabel="Quantity C") assert jg3.ax_joint.get_ylabel() == "Quantity C" return jg3.fig @@ -767,7 +760,7 @@ def test_jointplot_one2one_zerominFalse(jp_data): [ ("linear", utils.no_op, utils.no_op), ("natlog", numpy.log, numpy.exp), - ("log10", numpy.log10, lambda x: 10 ** x), + ("log10", numpy.log10, lambda x: 10**x), ], ) @pytest.mark.parametrize("key", ["whishi", "whislo", "fliers"]) @@ -891,9 +884,7 @@ def test_probplot_qq(plot_data): @pytest.mark.mpl_image_compare(baseline_dir=BASELINE_IMAGES, tolerance=TOLERANCE) def test_probplot_pp(plot_data): fig, ax = pyplot.subplots() - scatter_kws = dict( - color="b", linestyle="--", markeredgecolor="g", markerfacecolor="none" - ) + scatter_kws = dict(color="b", linestyle="--", markeredgecolor="g", markerfacecolor="none") fig = viz.probplot( plot_data, ax=ax, @@ -968,7 +959,5 @@ def test_categorical_histogram_simple(cat_hist_data): @pytest.mark.mpl_image_compare(baseline_dir=BASELINE_IMAGES, tolerance=TOLERANCE) def test_categorical_histogram_complex(cat_hist_data): bins = numpy.arange(5, 35, 5) - fig = viz.categorical_histogram( - cat_hist_data, "depth", bins, hue="year", row="has_outflow" - ) + fig = viz.categorical_histogram(cat_hist_data, "depth", bins, hue="year", row="has_outflow") return fig.fig diff --git a/wqio/tests/utils_tests/test_misc.py b/wqio/tests/utils_tests/test_misc.py index f53d3531..586bb89d 100644 --- a/wqio/tests/utils_tests/test_misc.py +++ b/wqio/tests/utils_tests/test_misc.py @@ -1,16 +1,14 @@ from functools import partial -from textwrap import dedent from io import StringIO - - -import pytest -import pandas.testing as pdtest +from textwrap import dedent import numpy import pandas +import pandas.testing as pdtest +import pytest -from wqio.utils import misc from wqio.tests import helpers +from wqio.utils import misc @pytest.fixture @@ -26,25 +24,23 @@ def basic_data(): @pytest.fixture def multiindex_df(): - index = pandas.MultiIndex.from_product( - [["A", "B", "C"], ["mg/L"]], names=["loc", "units"] - ) + index = pandas.MultiIndex.from_product([["A", "B", "C"], ["mg/L"]], names=["loc", "units"]) return pandas.DataFrame([[1, 2], [3, 4], [5, 6]], index=index, columns=["a", "b"]) -class mockDataset(object): +class mockDataset: def __init__(self, inflow, outflow): self.inflow = mockLocation(inflow) self.outflow = mockLocation(outflow) -class mockLocation(object): +class mockLocation: def __init__(self, data): self.data = data self.stats = mockSummary(data) -class mockSummary(object): +class mockSummary: def __init__(self, data): self.N = len(data) self.max = max(data) @@ -54,7 +50,7 @@ def __init__(self, data): def test_add_column_level(basic_data): known_cols = pandas.MultiIndex.from_tuples( - [(u"test", u"A"), (u"test", u"B"), (u"test", u"C"), (u"test", u"D")] + [("test", "A"), ("test", "B"), ("test", "C"), ("test", "D")] ) newdata = misc.add_column_level(basic_data, "test", "testlevel") assert known_cols.tolist() == newdata.columns.tolist() @@ -84,9 +80,7 @@ def test_flatten_columns(multiindex_df, basic_data): expected = ["A_mg/L", "B_mg/L", "C_mg/L"] flat = misc.flatten_columns(multiindex_df.T) assert flat.columns.tolist() == expected - assert ( - misc.flatten_columns(basic_data).columns.tolist() == basic_data.columns.tolist() - ) + assert misc.flatten_columns(basic_data).columns.tolist() == basic_data.columns.tolist() def test_expand_columns(): @@ -211,7 +205,7 @@ def test_categorize_columns(): def test_classifier(value, units, expected): bins = numpy.arange(5, 36, 5) if units is not None: - expected = "{} {}".format(expected, units) + expected = f"{expected} {units}" result = misc.classifier(value, bins, units=units) assert result == expected diff --git a/wqio/tests/utils_tests/test_numutils.py b/wqio/tests/utils_tests/test_numutils.py index b98c9ade..251c8d74 100644 --- a/wqio/tests/utils_tests/test_numutils.py +++ b/wqio/tests/utils_tests/test_numutils.py @@ -3,16 +3,15 @@ from io import StringIO from textwrap import dedent -import pytest -import numpy.testing as nptest -import pandas.testing as pdtest -from wqio.tests import helpers - import numpy -from scipy import stats +import numpy.testing as nptest import pandas +import pandas.testing as pdtest +import pytest import statsmodels.api as sm +from scipy import stats +from wqio.tests import helpers from wqio.utils import numutils @@ -297,9 +296,7 @@ def test_compute_theilslope_default(error): with helpers.raises(error): y = helpers.getTestROSData()["res"].values x = numpy.arange(len(y) - 1) if error else None - assert tuple(numutils.compute_theilslope(y, x)) == stats.mstats.theilslopes( - y, x - ) + assert tuple(numutils.compute_theilslope(y, x)) == stats.mstats.theilslopes(y, x) @pytest.fixture @@ -511,9 +508,7 @@ def test_checkIntervalOverlap_single(oneway, expected): assert result == expected -@pytest.mark.parametrize( - ("oneway", "expected"), [(True, [0, 0, 1]), (False, [0, 1, 1])] -) +@pytest.mark.parametrize(("oneway", "expected"), [(True, [0, 0, 1]), (False, [0, 1, 1])]) def test_checkIntervalOverlap(oneway, expected): x = numpy.array([[1, 2], [1, 4], [1, 3]]) y = numpy.array([[3, 4], [2, 3], [2, 4]]) @@ -534,21 +529,13 @@ def test_checkIntervalOverlap(oneway, expected): ], ) def test_winsorize_dataframe(opts, expected_key): - x = numpy.array( - [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20] - ) + x = numpy.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]) - w_05 = numpy.array( - [1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 19] - ) + w_05 = numpy.array([1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 19]) - w_10 = numpy.array( - [2, 2, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 18, 18] - ) + w_10 = numpy.array([2, 2, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 18, 18]) - w_20 = numpy.array( - [4, 4, 4, 4, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 16, 16, 16, 16] - ) + w_20 = numpy.array([4, 4, 4, 4, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 16, 16, 16, 16]) w_05_20 = numpy.array( [1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 16, 16, 16, 16] @@ -569,9 +556,7 @@ def test_winsorize_dataframe(opts, expected_key): def test__comp_stat_generator(): df = helpers.make_dc_data().reset_index() - gen = numutils._comp_stat_generator( - df, ["param", "bmp"], "loc", "res", helpers.comp_statfxn - ) + gen = numutils._comp_stat_generator(df, ["param", "bmp"], "loc", "res", helpers.comp_statfxn) assert isinstance(gen, types.GeneratorType) result = pandas.DataFrame(gen) @@ -628,9 +613,9 @@ def test__comp_stat_generator(): ], } pdtest.assert_frame_equal( - pandas.DataFrame( - expected, index=[0, 1, 2, 3, 4, 331, 332, 333, 334, 335] - ).sort_index(axis="columns"), + pandas.DataFrame(expected, index=[0, 1, 2, 3, 4, 331, 332, 333, 334, 335]).sort_index( + axis="columns" + ), pandas.concat([result.head(), result.tail()]).sort_index(axis="columns"), ) @@ -693,18 +678,16 @@ def test__comp_stat_generator_single_group_col(): ], } pdtest.assert_frame_equal( - pandas.DataFrame( - expected, index=[0, 1, 2, 3, 4, 43, 44, 45, 46, 47] - ).sort_index(axis="columns"), + pandas.DataFrame(expected, index=[0, 1, 2, 3, 4, 43, 44, 45, 46, 47]).sort_index( + axis="columns" + ), pandas.concat([result.head(), result.tail()]).sort_index(axis="columns"), ) def test__paired_stat_generator(): df = helpers.make_dc_data_complex().unstack(level="loc") - gen = numutils._paired_stat_generator( - df, ["param"], "loc", "res", helpers.comp_statfxn - ) + gen = numutils._paired_stat_generator(df, ["param"], "loc", "res", helpers.comp_statfxn) assert isinstance(gen, types.GeneratorType) result = pandas.DataFrame(gen).sort_index(axis="columns") diff --git a/wqio/theil.py b/wqio/theil.py index b9a4874f..adbee184 100644 --- a/wqio/theil.py +++ b/wqio/theil.py @@ -4,9 +4,9 @@ from wqio import utils -class TheilSenFit(object): +class TheilSenFit: def __init__(self, infl, effl, log_infl=False, log_effl=False, **theil_opts): - """ Theil-Sen Fit object + """Theil-Sen Fit object Parameters ---------- @@ -97,9 +97,7 @@ def med_estimate(self): @property def errors(self): - return self._effl_trans_in(self.effluent_data) - self._effl_trans_in( - self.med_estimate - ) + return self._effl_trans_in(self.effluent_data) - self._effl_trans_in(self.med_estimate) @property def MAD(self): diff --git a/wqio/utils/__init__.py b/wqio/utils/__init__.py index 9256b1b6..457fac3f 100644 --- a/wqio/utils/__init__.py +++ b/wqio/utils/__init__.py @@ -1,3 +1,3 @@ -from .misc import * -from .dateutils import * -from .numutils import * +from wqio.utils.dateutils import * # noqa +from wqio.utils.misc import * # noqa +from wqio.utils.numutils import * # noqa diff --git a/wqio/utils/dateutils.py b/wqio/utils/dateutils.py index cab7f074..636473a0 100644 --- a/wqio/utils/dateutils.py +++ b/wqio/utils/dateutils.py @@ -6,12 +6,11 @@ from wqio import validate from wqio.utils import misc - _logger = logging.getLogger(__name__) def getSeason(date): - """ Defines the season from a given date. + """Defines the season from a given date. Parameters ---------- @@ -52,7 +51,7 @@ def getSeason(date): def makeTimestamp(row, datecol="sampledate", timecol="sampletime", issuewarnings=False): - """ Makes a pandas.Timestamp from separate date/time columns + """Makes a pandas.Timestamp from separate date/time columns Parameters ---------- @@ -86,7 +85,7 @@ def makeTimestamp(row, datecol="sampledate", timecol="sampletime", issuewarnings date = fallback_datetime.date() if issuewarnings: # pragma: no cover misc.log_or_warn( - "Using fallback date from {}".format(row[datecol]), + f"Using fallback date from {row[datecol]}", UserWarning if issuewarnings else None, logger=_logger, ) @@ -104,19 +103,19 @@ def makeTimestamp(row, datecol="sampledate", timecol="sampletime", issuewarnings time = fallback_datetime.time() if issuewarnings: # pragma: no cover misc.log_or_warn( - "Using fallback time from {}".format(row[timecol]), + f"Using fallback time from {row[timecol]}", UserWarning if issuewarnings else None, logger=_logger, ) - dtstring = "{} {}".format(date, time) + dtstring = f"{date} {time}" tstamp = pandas.Timestamp(dtstring) return tstamp def getWaterYear(date): - """ Returns the water year of a given date + """Returns the water year of a given date Parameters ---------- diff --git a/wqio/utils/misc.py b/wqio/utils/misc.py index 53889424..0aadc07c 100644 --- a/wqio/utils/misc.py +++ b/wqio/utils/misc.py @@ -1,6 +1,6 @@ +import warnings from copy import copy from functools import wraps -import warnings import numpy import pandas @@ -11,7 +11,7 @@ def head_tail(df, N=5): def add_column_level(df, levelvalue, levelname): - """ Adds a second level to the column-index if a dataframe. + """Adds a second level to the column-index if a dataframe. Parameters ---------- @@ -50,7 +50,7 @@ def add_column_level(df, levelvalue, levelname): def swap_column_levels(df, level_1, level_2, sort=True): - """ Swaps columns levels in a dataframe with multi-level columns + """Swaps columns levels in a dataframe with multi-level columns Parameters ---------- @@ -101,7 +101,7 @@ def swap_column_levels(df, level_1, level_2, sort=True): def flatten_columns(df: pandas.DataFrame, sep: str = "_"): - """ Completely flattens a multi-level column index + """Completely flattens a multi-level column index Parameters ---------- @@ -155,13 +155,11 @@ def expand_columns(df, names, sep="_"): """ newcols = df.columns.str.split(sep, expand=True) - return df.set_axis(newcols, axis="columns").rename_axis( - names, axis="columns" - ) + return df.set_axis(newcols, axis="columns").rename_axis(names, axis="columns") def redefine_index_level(df, levelname, value, criteria=None, dropold=True): - """ Redefine a index values in a dataframe. + """Redefine a index values in a dataframe. Parameters ---------- @@ -214,14 +212,14 @@ def categorize_columns(df, *columns): newdf = df.copy() for c in columns: if newdf[c].dtype != object: - raise ValueError("column {} is not an object type".format(c)) + raise ValueError(f"column {c} is not an object type") newdf[c] = newdf[c].astype("category") return newdf def nested_getattr(baseobject, attribute): - """ Returns the value of an attribute of an object that is nested + """Returns the value of an attribute of an object that is nested several layers deep. Parameters @@ -254,7 +252,7 @@ def nested_getattr(baseobject, attribute): def stringify(value, fmt, attribute=None): - """ Weird wrapper to format attributes of objects as strings + """Weird wrapper to format attributes of objects as strings Parameters ---------- @@ -330,17 +328,17 @@ def classifier(value, bins, units=None): # below the lower edge elif value <= min(bins): - output = "<{}".format(min(bins)) + output = f"<{min(bins)}" # above the upper edge elif value > max(bins): - output = ">{}".format(max(bins)) + output = f">{max(bins)}" # everything else with the range of bins else: for left, right in zip(bins[:-1], bins[1:]): if left < value <= right: - output = "{} - {}".format(left, right) + output = f"{left} - {right}" break # add the units (or don't) @@ -383,7 +381,7 @@ def unique_categories(classifier, bins): def pop_many(some_dict, *args): - """ Pop several key-values out of a dictionary and return a copy + """Pop several key-values out of a dictionary and return a copy Parameters ---------- @@ -411,7 +409,7 @@ def pop_many(some_dict, *args): def selector(default, *cond_results): - """ Thin wrapper around numpy.select with a more convenient API (maybe). + """Thin wrapper around numpy.select with a more convenient API (maybe). Parameters ---------- @@ -445,7 +443,7 @@ def no_op(value): def assign_multilevel_column(df, val_or_fxn, *collevels): - """ Dataframe-pipeable function to assign new multi-level columns + """Dataframe-pipeable function to assign new multi-level columns Parameters ---------- @@ -474,7 +472,7 @@ def assign_multilevel_column(df, val_or_fxn, *collevels): def symbolize_bools(df, true_symbol, false_symbol, other_symbol=None, join_char=None): - """ Symbolize boolean values in a dataframe with strings + """Symbolize boolean values in a dataframe with strings Parameters ---------- @@ -523,8 +521,7 @@ def symbolize_bools(df, true_symbol, false_symbol, other_symbol=None, join_char= def log_df_shape(logger): # pragma: no cover - """ Decorator to log the shape of a dataframe before and after a function. - """ + """Decorator to log the shape of a dataframe before and after a function.""" def decorate(func): @wraps(func) @@ -532,9 +529,7 @@ def wrapper(*args, **kwargs): shape_init = args[0].shape new_df = func(*args, **kwargs) shape_final = new_df.shape - logger.debug( - f"{func.__name__}: dataframe shape = {shape_init} -> {shape_final}." - ) + logger.debug(f"{func.__name__}: dataframe shape = {shape_init} -> {shape_final}.") return new_df return wrapper diff --git a/wqio/utils/numutils.py b/wqio/utils/numutils.py index 9e71f1e1..6f8df47b 100644 --- a/wqio/utils/numutils.py +++ b/wqio/utils/numutils.py @@ -1,21 +1,20 @@ import itertools -from textwrap import dedent from collections import namedtuple +from textwrap import dedent import numpy -from scipy import stats import statsmodels.api as sm from probscale.algo import _estimate_from_fit +from scipy import stats from wqio import validate from wqio.utils import misc - TheilStats = namedtuple("TheilStats", ("slope", "intercept", "low_slope", "high_slope")) def sigFigs(x, n, expthresh=5, tex=False, pval=False, forceint=False): - """ Formats a number with the correct number of sig figs. + """Formats a number with the correct number of sig figs. Parameters ---------- @@ -55,7 +54,6 @@ def sigFigs(x, n, expthresh=5, tex=False, pval=False, forceint=False): # check on the number provided elif x is not None and not numpy.isinf(x) and not numpy.isnan(x): - # check on the sigFigs if n < 1: raise ValueError("number of sig figs must be greater than zero!") @@ -63,10 +61,10 @@ def sigFigs(x, n, expthresh=5, tex=False, pval=False, forceint=False): elif pval and x < 0.001: out = "<0.001" if tex: - out = "${}$".format(out) + out = f"${out}$" elif forceint: - out = "{:,.0f}".format(x) + out = f"{x:,.0f}" # logic to do all of the rounding elif x != 0.0: @@ -76,7 +74,7 @@ def sigFigs(x, n, expthresh=5, tex=False, pval=False, forceint=False): decimal_places = int(n - 1 - order) if decimal_places <= 0: - out = "{0:,.0f}".format(round(x, decimal_places)) + out = f"{round(x, decimal_places):,.0f}" else: fmt = "{0:,.%df}" % decimal_places @@ -86,7 +84,7 @@ def sigFigs(x, n, expthresh=5, tex=False, pval=False, forceint=False): decimal_places = n - 1 if tex: fmt = r"$%%0.%df \times 10 ^ {%d}$" % (decimal_places, order) - out = fmt % round(x / 10 ** order, decimal_places) + out = fmt % round(x / 10**order, decimal_places) else: fmt = "{0:.%de}" % decimal_places out = fmt.format(x) @@ -102,7 +100,7 @@ def sigFigs(x, n, expthresh=5, tex=False, pval=False, forceint=False): def formatResult(result, qualifier, sigfigs=3): - """ Formats a results with its qualifier + """Formats a results with its qualifier Parameters ---------- @@ -125,11 +123,11 @@ def formatResult(result, qualifier, sigfigs=3): """ - return "{}{}".format(qualifier, sigFigs(result, sigfigs)) + return f"{qualifier}{sigFigs(result, sigfigs)}" def process_p_vals(pval): - """ Processes p-values into nice strings to reporting. When the + """Processes p-values into nice strings to reporting. When the p-values are less than 0.001, "<0.001" is returned. Otherwise, a string with three decimal places is returned. @@ -149,7 +147,7 @@ def process_p_vals(pval): elif 0 < pval < 0.001: out = formatResult(0.001, "<", sigfigs=1) elif pval > 1 or pval < 0: - raise ValueError("p-values must be between 0 and 1 (not {})".format(pval)) + raise ValueError(f"p-values must be between 0 and 1 (not {pval})") else: out = "%0.3f" % pval @@ -157,7 +155,7 @@ def process_p_vals(pval): def translate_p_vals(pval, as_emoji=True): - """ Translates ambiguous p-values into more meaningful emoji. + """Translates ambiguous p-values into more meaningful emoji. Parameters ---------- @@ -182,7 +180,7 @@ def translate_p_vals(pval, as_emoji=True): elif 0.1 < pval <= 1: interpreted = r"(╯°□°)╯︵ ┻━┻" if as_emoji else "nope" else: - raise ValueError("p-values must be between 0 and 1 (not {})".format(pval)) + raise ValueError(f"p-values must be between 0 and 1 (not {pval})") return interpreted @@ -223,7 +221,7 @@ def anderson_darling(data): def processAndersonDarlingResults(ad_results): - """ Return a nice string of Anderson-Darling test results + """Return a nice string of Anderson-Darling test results Parameters ---------- @@ -239,11 +237,11 @@ def processAndersonDarlingResults(ad_results): AD, crit, sig = ad_results try: - ci = 100 - sig[AD > crit][-1] - return "%0.1f%%" % (ci,) + ci = 100 - sig[crit < AD][-1] + return f"{ci:0.1f}%" except IndexError: ci = 100 - sig[0] - return "<%0.1f%%" % (ci,) + return f"<{ci:0.1f}%" def _anderson_darling_p_vals(ad_results, n_points): @@ -269,15 +267,15 @@ def _anderson_darling_p_vals(ad_results, n_points): """ AD, crit, sig = ad_results - AD_star = AD * (1 + 0.75 / n_points + 2.25 / n_points ** 2) + AD_star = AD * (1 + 0.75 / n_points + 2.25 / n_points**2) if AD_star >= 0.6: - p = numpy.exp(1.2397 - (5.709 * AD_star) + (0.0186 * AD_star ** 2)) + p = numpy.exp(1.2397 - (5.709 * AD_star) + (0.0186 * AD_star**2)) elif 0.34 <= AD_star < 0.6: - p = numpy.exp(0.9177 - (4.279 * AD_star) - (1.38 * AD_star ** 2)) + p = numpy.exp(0.9177 - (4.279 * AD_star) - (1.38 * AD_star**2)) elif 0.2 < AD_star < 0.34: - p = 1 - numpy.exp(-8.318 + (42.796 * AD_star) - (59.938 * AD_star ** 2)) + p = 1 - numpy.exp(-8.318 + (42.796 * AD_star) - (59.938 * AD_star**2)) else: - p = 1 - numpy.exp(-13.436 + (101.14 * AD_star) - (223.73 * AD_star ** 2)) + p = 1 - numpy.exp(-13.436 + (101.14 * AD_star) - (223.73 * AD_star**2)) return p @@ -336,9 +334,7 @@ def normalize_units( msg = "" if target.isnull().any(): nulls = df[target.isnull()][paramcol].unique() - msg += "Some target units could not be mapped to the {} column ({})\n".format( - paramcol, nulls - ) + msg += f"Some target units could not be mapped to the {paramcol} column ({nulls})\n" if normalization.isnull().any(): nulls = df[normalization.isnull()][unitcol].unique() @@ -348,23 +344,19 @@ def normalize_units( if conversion.isnull().any(): nulls = target[conversion.isnull()] - msg += "Some conversion factors could not be mapped to the target units ({})".format( - nulls - ) + msg += f"Some conversion factors could not be mapped to the target units ({nulls})" if len(msg) > 0: raise ValueError(msg) # convert results - normalized = df.assign( - **{rescol: df[rescol] * normalization / conversion, unitcol: target} - ) + normalized = df.assign(**{rescol: df[rescol] * normalization / conversion, unitcol: target}) return normalized def pH2concentration(pH, *args): - """ Converts pH values to proton concentrations in mg/L + """Converts pH values to proton concentrations in mg/L Parameters ---------- @@ -398,13 +390,11 @@ def pH2concentration(pH, *args): def compute_theilslope(y, x=None, alpha=0.95, percentile=50): - """ Adapted from stats.mstats.theilslopes so that we can tweak the + f""" Adapted from stats.mstats.theilslopes so that we can tweak the `percentile` parameter. https://goo.gl/nxPF54 - {} - """.format( - dedent(stats.mstats.theilslopes.__doc__) - ) + {dedent(stats.mstats.theilslopes.__doc__)} + """ # We copy both x and y so we can use _find_repeats. y = numpy.array(y).flatten() @@ -413,7 +403,7 @@ def compute_theilslope(y, x=None, alpha=0.95, percentile=50): else: x = numpy.array(x, dtype=float).flatten() if len(x) != len(y): - raise ValueError("Incompatible lengths (%s != %s)" % (len(y), len(x))) + raise ValueError(f"Incompatible lengths ({len(y)} != {len(x)})") # Compute sorted slopes only when deltax > 0 deltax = x[:, numpy.newaxis] - x @@ -454,10 +444,8 @@ def compute_theilslope(y, x=None, alpha=0.95, percentile=50): return TheilStats(outslope, outinter, delta[0], delta[1]) -def fit_line( - x, y, xhat=None, fitprobs=None, fitlogs=None, dist=None, through_origin=False -): - """ Fits a line to x-y data in various forms (raw, log, prob scales) +def fit_line(x, y, xhat=None, fitprobs=None, fitlogs=None, dist=None, through_origin=False): + """Fits a line to x-y data in various forms (raw, log, prob scales) Parameters ---------- @@ -541,7 +529,7 @@ def fit_line( def checkIntervalOverlap(interval1, interval2, oneway=False, axis=None): - """ Checks if two numeric intervals overlaps. + """Checks if two numeric intervals overlaps. Parameters ---------- @@ -572,13 +560,11 @@ def checkIntervalOverlap(interval1, interval2, oneway=False, axis=None): if oneway: return first_check else: - return first_check | checkIntervalOverlap( - interval2, interval1, oneway=True, axis=axis - ) + return first_check | checkIntervalOverlap(interval2, interval1, oneway=True, axis=axis) def winsorize_dataframe(df, **limits): - """ Winsorizes columns in a dataframe + """Winsorizes columns in a dataframe Parameters ---------- @@ -608,7 +594,7 @@ def winsorize_dataframe(df, **limits): def remove_outliers(x, factor=1.5): - """ Removes outliers from an array based on a scaling of the + """Removes outliers from an array based on a scaling of the interquartile range (IQR). Parameters @@ -641,7 +627,7 @@ def remove_outliers(x, factor=1.5): def _comp_stat_generator( df, groupcols, pivotcol, rescol, statfxn, statname=None, pbarfxn=None, **statopts ): - """ Generator of records containing results of comparitive + """Generator of records containing results of comparitive statistical functions. Parameters @@ -699,7 +685,7 @@ def _comp_stat_generator( def _paired_stat_generator( df, groupcols, pivotcol, rescol, statfxn, statname=None, pbarfxn=None, **statopts ): - """ Generator of records containing results of comparitive + """Generator of records containing results of comparitive statistical functions specifically for paired data. Parameters diff --git a/wqio/validate.py b/wqio/validate.py index cb999bf5..91a35268 100644 --- a/wqio/validate.py +++ b/wqio/validate.py @@ -1,8 +1,8 @@ import os import numpy -from matplotlib import pyplot import pandas +from matplotlib import pyplot def dataset(fname): @@ -16,7 +16,7 @@ def dataset(fname): def timestamp(datelike): - """ Converts datetime-like objects to pandas.Timestamp. + """Converts datetime-like objects to pandas.Timestamp. Pretty miuch a direct pass through, but give a slighly more informative error message. @@ -35,14 +35,14 @@ def timestamp(datelike): try: tstamp = pandas.Timestamp(datelike) except ValueError: - msg = "{} could not be coerced into a pandas.Timestamp".format(datelike) + msg = f"{datelike} could not be coerced into a pandas.Timestamp" raise ValueError(msg) return tstamp def axes(ax, fallback="new"): - """ Checks if a value if an Axes. If None, a new one is created or + """Checks if a value if an Axes. If None, a new one is created or the 'current' one is found. Parameters @@ -81,7 +81,7 @@ def axes(ax, fallback="new"): def single_value_in_index(df, index_level): - """ Confirms that a given level of a dataframe's index only has + """Confirms that a given level of a dataframe's index only has one unique value. Useful for confirming consistent units. Raises error if level is not a single value. Returns unique value of the index level. @@ -103,7 +103,7 @@ def single_value_in_index(df, index_level): index = numpy.unique(df.index.get_level_values(index_level).tolist()) if index.shape != (1,): - raise ValueError('index level "{}" is not unique.'.format(index_level)) + raise ValueError(f'index level "{index_level}" is not unique.') return index[0] @@ -123,7 +123,7 @@ def at_least_empty_dict(value, **kwargs): if value is None or value == "": value = {} elif not isinstance(value, dict): - raise ValueError("{} cannot be a dictionary".format(value)) + raise ValueError(f"{value} cannot be a dictionary") else: value = value.copy() diff --git a/wqio/viz.py b/wqio/viz.py index e1b11070..4f5d205f 100644 --- a/wqio/viz.py +++ b/wqio/viz.py @@ -1,18 +1,16 @@ from functools import partial import numpy -from matplotlib import pyplot -from matplotlib import ticker -from pandas.api.types import CategoricalDtype -import seaborn import probscale +import seaborn +from matplotlib import pyplot, ticker +from pandas.api.types import CategoricalDtype -from wqio import utils -from wqio import validate +from wqio import utils, validate def rotateTickLabels(ax, rotation, which, rotation_mode="anchor", ha="right"): - """ Rotates the ticklabels of a matplotlib Axes + """Rotates the ticklabels of a matplotlib Axes Parameters ---------- @@ -54,11 +52,11 @@ def rotateTickLabels(ax, rotation, which, rotation_mode="anchor", ha="right"): def log_formatter(use_1x=True, threshold=5): def _formatter(tick, pos=None, use_1x=True, threshold=3): - """ Formats log axes as `1 x 10^N` when N > 4 or N < -4. """ + """Formats log axes as `1 x 10^N` when N > 4 or N < -4.""" - if 10 ** threshold >= tick > 1: - tick = "{:,d}".format(int(tick)) - elif tick > 10 ** threshold or tick < 10 ** (-1 * threshold): + if 10**threshold >= tick > 1: + tick = f"{int(tick):,d}" + elif tick > 10**threshold or tick < 10 ** (-1 * threshold): if use_1x: tick = r"$1 \times 10 ^ {%d}$" % int(numpy.log10(tick)) else: @@ -70,10 +68,8 @@ def _formatter(tick, pos=None, use_1x=True, threshold=3): return ticker.FuncFormatter(func) -def gridlines( - ax, xlabel=None, ylabel=None, xscale=None, yscale=None, xminor=True, yminor=True -): - """ Standard formatting for gridlines on a matplotlib Axes +def gridlines(ax, xlabel=None, ylabel=None, xscale=None, yscale=None, xminor=True, yminor=True): + """Standard formatting for gridlines on a matplotlib Axes Parameters ---------- @@ -144,7 +140,7 @@ def jointplot( zeromin=True, one2one=True, ): - """ Plots the joint distribution of two variables via seaborn + """Plots the joint distribution of two variables via seaborn Parameters ---------- @@ -167,9 +163,7 @@ def jointplot( jg : seaborn.JointGrid """ - jg = seaborn.jointplot( - x=x, y=y, color=color, data=data, marginal_kws=dict(rug=True, kde=True) - ) + jg = seaborn.jointplot(x=x, y=y, color=color, data=data, marginal_kws=dict(rug=True, kde=True)) if xlabel is None: xlabel = jg.ax_joint.get_xlabel() @@ -205,7 +199,7 @@ def jointplot( def whiskers_and_fliers(x, q1=None, q3=None, transformout=None): - """ Computes extent of whiskers and fliers on optionally transformed + """Computes extent of whiskers and fliers on optionally transformed data for box and whisker plots. Parameters @@ -249,18 +243,12 @@ def transformout(x): # get low extreme loval = q1 - (1.5 * iqr) whislo = numpy.compress(x >= loval, x) - if len(whislo) == 0 or numpy.min(whislo) > q1: - whislo = q1 - else: - whislo = numpy.min(whislo) + whislo = q1 if len(whislo) == 0 or numpy.min(whislo) > q1 else numpy.min(whislo) # get high extreme hival = q3 + (1.5 * iqr) whishi = numpy.compress(x <= hival, x) - if len(whishi) == 0 or numpy.max(whishi) < q3: - whishi = q3 - else: - whishi = numpy.max(whishi) + whishi = q3 if len(whishi) == 0 or numpy.max(whishi) < q3 else numpy.max(whishi) wnf["fliers"] = numpy.hstack( [ @@ -285,7 +273,6 @@ def boxplot( patch_artist=True, showmean=False, ): - """ Draws a boxplot on an axes @@ -328,9 +315,7 @@ def boxplot( elif numpy.isscalar(position): position = [position] - meanprops = dict( - marker=marker, markersize=6, markerfacecolor=color, markeredgecolor="Black" - ) + meanprops = dict(marker=marker, markersize=6, markerfacecolor=color, markeredgecolor="Black") flierprops = dict( marker=marker, @@ -346,9 +331,7 @@ def boxplot( if patch_artist: medianprops = dict(linewidth=1.00, color="k", linestyle="-", zorder=5) - boxprops = dict( - edgecolor="k", facecolor=color, linewidth=0.75, zorder=4, alpha=0.5 - ) + boxprops = dict(edgecolor="k", facecolor=color, linewidth=0.75, zorder=4, alpha=0.5) else: medianprops = dict(linewidth=1.00, color=color, linestyle="-", zorder=3) @@ -385,7 +368,7 @@ def probplot( line_kws=None, return_results=False, ): - """ Probability, percentile, and quantile plots. + """Probability, percentile, and quantile plots. Parameters ---------- @@ -441,7 +424,7 @@ def probplot( def _connect_spines(left_ax, right_ax, left_y, right_y, linestyle="solid", **line_kwds): - """ Connects the y-spines between two Axes + """Connects the y-spines between two Axes Parameters ---------- @@ -466,9 +449,7 @@ def _connect_spines(left_ax, right_ax, left_y, right_y, linestyle="solid", **lin import mpl_toolkits.axes_grid1.inset_locator as inset left_trans = mtrans.blended_transform_factory(left_ax.transData, left_ax.transAxes) - right_trans = mtrans.blended_transform_factory( - right_ax.transData, right_ax.transAxes - ) + right_trans = mtrans.blended_transform_factory(right_ax.transData, right_ax.transAxes) left_data_trans = left_ax.transScale + left_ax.transLimits right_data_trans = right_ax.transScale + right_ax.transLimits @@ -490,10 +471,8 @@ def _connect_spines(left_ax, right_ax, left_y, right_y, linestyle="solid", **lin return connector -def parallel_coordinates( - dataframe, hue, cols=None, palette=None, showlegend=True, **subplot_kws -): - """ Produce a parallel coordinates plot from a dataframe. +def parallel_coordinates(dataframe, hue, cols=None, palette=None, showlegend=True, **subplot_kws): + """Produce a parallel coordinates plot from a dataframe. Parameters ---------- @@ -559,7 +538,7 @@ def parallel_coordinates( def categorical_histogram(df, valuecol, bins, classifier=None, **factoropts): - """ Plot a faceted, categorical histogram. + """Plot a faceted, categorical histogram. Parameters ----------