Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Clean up dependencies #9

Merged
merged 2 commits into from
May 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions noxfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ def mypy(session: Session) -> None:
"""Type-check using mypy."""
args = session.posargs or ["src", "tests"]
session.install(".")
session.install("mypy", "pytest")
session.install("mypy", "pytest", "click")
session.run("mypy", *args)
if not session.posargs:
session.run("mypy", f"--python-executable={sys.executable}", "noxfile.py")
Expand All @@ -154,7 +154,7 @@ def mypy(session: Session) -> None:
def tests(session: Session) -> None:
"""Run the test suite."""
session.install(".")
session.install("coverage[toml]", "pytest", "pygments")
session.install("coverage[toml]", "pytest", "pygments", "click")
try:
session.run(
"coverage",
Expand Down Expand Up @@ -188,7 +188,7 @@ def coverage(session: Session) -> None:
def typeguard(session: Session) -> None:
"""Runtime type checking using Typeguard."""
session.install(".")
session.install("pytest", "typeguard", "pygments")
session.install("pytest", "typeguard", "pygments", "click")
session.run("pytest", f"--typeguard-packages={package}", *session.posargs)


Expand Down
820 changes: 381 additions & 439 deletions poetry.lock

Large diffs are not rendered by default.

27 changes: 15 additions & 12 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,19 +14,16 @@ classifiers = ["Development Status :: 4 - Beta"]
Changelog = "https://github.com/statisticsnorway/ssb-timeseries/releases"

[tool.poetry.dependencies]
python = ">=3.10,<3.13"
python = ">=3.10,<4.0"
dapla-toolbelt = ">=1.3.2"
pandas = "^2.1.1"
pytest = "^7.4.3"
ssb-klass-python = "^0.0.7"
pyarrow = "^14.0.0"
google-cloud-logging = "^3.8.0"
pytz = "^2023.3.post1"
polars = "^0.19.18"
duckdb = "^0.10.0"
bigtree = "^0.17.0"
click = "^8.1.7"
typing-extensions = "^4.11.0"
pandas = ">=2.1.1"
ssb-klass-python = ">=0.0.7"
pyarrow = ">=14.0.0"
pytz = ">=2023.3.post1"
bigtree = ">=0.17.0"
typing-extensions = ">=4.11.0"
python-dateutil = ">=2.9.0.post0"
numpy = ">=1.26.4"

[tool.poetry.group.dev.dependencies]
pygments = ">=2.10.0"
Expand All @@ -46,6 +43,8 @@ sphinx-click = ">=3.0.2"
typeguard = ">=2.13.3"
xdoctest = { extras = ["colors"], version = ">=0.15.10" }
myst-parser = { version = ">=0.16.1" }
deptry = ">=0.16.1"
click = ">=8.1.7"

[tool.pytest.ini_options]
pythonpath = ["src"]
Expand Down Expand Up @@ -73,6 +72,7 @@ warn_unreachable = true
pretty = true
show_column_numbers = true
show_error_context = true
disallow_any_generics = false

[tool.ruff]
force-exclude = true # Apply excludes to pre-commit
Expand Down Expand Up @@ -141,6 +141,9 @@ classmethod-decorators = ["classmethod", "validator", "root_validator", "pydanti
"S101", # asserts are encouraged in pytest
]

[tool.deptry.per_rule_ignores]
DEP001 = ["ssb_timeseries", "nox", "nox_poetry"] # packages available by default

[build-system]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"
38 changes: 25 additions & 13 deletions src/ssb_timeseries/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from typing import no_type_check

import numpy as np
import pandas as pd # type: ignore[import-untyped]
import pandas as pd
from typing_extensions import Self

from ssb_timeseries import io
Expand Down Expand Up @@ -367,11 +367,14 @@ def __getitem__(
"""Access Dataset.data.columns via Dataset[ list[column_names] | pattern | tags].

Arguments:
criteria: (str | dict) Either a string pattern or a dict of tags.
criteria: Either a string pattern or a dict of tags.
kwargs: If criteria is empty, this is passed to filter().

Returns:
Self | None

Raises:
TypeError: If filter() returns another type than Dataset.
"""
# pattern: str = "", regex: str = "", tags: dict = {}):
# Dataset[...] should return a Dataset object (?) with only the requested items (columns).
Expand All @@ -384,14 +387,18 @@ def __getitem__(
# Or, is there a trick using dataframe views?
# --->
if criteria and isinstance(criteria, str):
return self.filter(pattern=criteria)
result = self.filter(pattern=criteria)
elif criteria and isinstance(criteria, dict):
return self.filter(tags=criteria)
result = self.filter(tags=criteria)
elif kwargs:
ts_logger.debug(f"DATASET.__getitem__(:\n\t{kwargs} ")
return self.filter(**kwargs)
result = self.filter(**kwargs)
else:
return None
if isinstance(result, Dataset):
return result # type: ignore[return-value]
else:
raise TypeError("Dataset.filter() did not return a Dataset type.")

def plot(self, *args: Any, **kwargs: Any) -> Any:
"""Plot dataset data.
Expand All @@ -400,7 +407,7 @@ def plot(self, *args: Any, **kwargs: Any) -> Any:
"""
xlabels = self.datetime_columns()[0]
ts_logger.debug(f"Dataset.plot({args!r}, {kwargs!r}) x-labels {xlabels}")
return self.data.plot(
return self.data.plot( # type: ignore[call-overload]
xlabels,
*args,
legend=len(self.data.columns) < 9,
Expand Down Expand Up @@ -452,28 +459,33 @@ def groupby(
period_index = pd.PeriodIndex(self.data[datetime_columns[0]], freq=freq)
ts_logger.debug(f"DATASET {self.name}: period index\n{period_index}.")

# Fix for case when **kwargs contains numeric_only
if "numeric_only" in kwargs:
kwargs.pop("numeric_only")
numeric_only_value = True

match func:
case "mean":
out = self.data.groupby(period_index).mean(
*args, numeric_only=True, **kwargs
out = self.data.groupby(period_index).mean( # type: ignore[misc]
*args, numeric_only=numeric_only_value, **kwargs
)
case "sum":
out = self.data.groupby(period_index).sum(
*args, numeric_only=True, **kwargs
out = self.data.groupby(period_index).sum( # type: ignore[misc]
*args, numeric_only=numeric_only_value, **kwargs
)
case "auto":
# TODO: QA on exact logic / use "real" metadata
# in particular, how to check meta data and blend d1 and df2 values as appropriate
# (this implementation is just to show how it can be done)
# QUESTION: do we need a default for "other" series / what should it be?
df1 = self.data.groupby(period_index).mean(
*args, numeric_only=True, **kwargs
df1 = self.data.groupby(period_index).mean( # type: ignore[misc]
*args, numeric_only=numeric_only_value, **kwargs
)
ts_logger.debug(f"groupby\n{df1}.")

df2 = (
self.data.groupby(period_index)
.sum(*args, numeric_only=True, **kwargs)
.sum(*args, numeric_only=numeric_only_value, **kwargs) # type: ignore[misc]
.filter(regex="mendgde|volum|vekt")
)
ts_logger.warning(f"groupby\n{df2}.")
Expand Down
27 changes: 19 additions & 8 deletions src/ssb_timeseries/meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
Ideally, this functionality should live elsewhere, in ssb-python-klass and other meta data libraries. Likely subject to refactoring later.
"""

import io

import bigtree
import pandas as pd
from klass import get_classification
Expand Down Expand Up @@ -72,7 +74,7 @@ def __init__(
self.definition = {"name": root_name}
if isinstance(id_or_path, int):
# TO DO: handle versions of KLASS
klass = get_classification(id_or_path).get_codes().data
klass = get_classification(str(id_or_path)).get_codes().data
self.entities = add_root_node(
klass, {"code": "0", "parentCode": None, "name": root_name}
)
Expand Down Expand Up @@ -116,9 +118,11 @@ def __eq__(self, other: Self) -> bool:
o_entities = other.entities[fields_to_compare].reset_index(drop=True)

ts_logger.debug(
f"comparing:\n{s_entities.to_string()}\n...and:\n{s_entities.to_string()}"
f"comparing:\n{s_entities.to_string()}\n...and:\n{o_entities.to_string()}"
)
ts_logger.debug(
f".info:\n{_df_info_as_string(s_entities)}\n...and:\n{_df_info_as_string(o_entities)}"
)
ts_logger.debug(f".info:\n{s_entities.info()}\n...and:\n{s_entities.info()}")
entities_equal = all(s_entities == o_entities)

return trees_equal and entities_equal
Expand Down Expand Up @@ -146,15 +150,15 @@ def save(self, path: PathStr) -> None:
self.entities.to_json(path_or_buf=path)


def add_root_node(df: pd.DataFrame, root_node: dict) -> pd.DataFrame:
def add_root_node(df: pd.DataFrame, root_node: dict[str, str | None]) -> pd.DataFrame:
"""Prepend root node row to taxonomy dataframe."""
new_row = dict((c, None) for c in df.columns)
for k in root_node.keys():
new_row = {c: None for c in df.columns}
for k in root_node:
new_row[k] = root_node[k]
df.rename(columns={"name": "fullName"})
df["parentCode"] = df["parentCode"].fillna(value=root_node["code"])
df.loc[-1] = root_node
df.index = df.index + 1
root_df = pd.DataFrame(root_node, index=[0])
df = pd.concat([root_df, df], ignore_index=True)
df.sort_index(inplace=True)
return df

Expand Down Expand Up @@ -275,3 +279,10 @@ def to_str(self, attributes: list(str) = None, separator: str = "_") -> list[str
def __repr__(self) -> str:
"""Return initialization for a copy of the series tag object: SeriesTags(name={self.name}, versioning={self.versioning}, temporality={self.temporality}, tags={self.tags})."""
return f"SeriesTags(name={self.name}, versioning={self.versioning}, temporality={self.temporality}, tags={self.tags})"


def _df_info_as_string(df: pd.DataFrame) -> str:
"""Returns the content of df.info() as a string."""
with io.StringIO() as buffer:
df.info(buf=buffer)
return buffer.getvalue()
2 changes: 1 addition & 1 deletion src/ssb_timeseries/sample_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ def create_df(
"T": "minutes",
"S": "seconds",
}
valid_to = valid_from + pd.DateOffset(**{freq_lookup[freq]: interval})
valid_to = valid_from + pd.DateOffset(**{freq_lookup[freq]: interval}) # type: ignore

# BUGFIX: If *lists receives strings, permutations will be over chars by chars
# Kombiner listene til en enkelt liste av lister
Expand Down
6 changes: 3 additions & 3 deletions tests/test_meta_tagging.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ def test_find_data_using_single_metadata_attribute(
expected_matches = ["a_p_z", "a_q_z", "a_r_z"]

ts_logger.debug(
f"x_attr_A_equals_a: \n\t{x_attr_A_equals_a.series()}\n vs expected:\n\t{expected_matches}"
f"x_attr_A_equals_a: \n\t{x_attr_A_equals_a.series()}\n vs expected:\n\t{expected_matches}" # type: ignore
)
assert isinstance(x_attr_A_equals_a, Dataset)
assert sorted(x_attr_A_equals_a.numeric_columns()) == sorted(expected_matches)
Expand Down Expand Up @@ -196,7 +196,7 @@ def test_find_data_using_multiple_metadata_attributes(
expected_matches = ["a_q_z"]

ts_logger.debug(
f"x_attr_A_equals_a: \n\t{x_attr_A_equals_a.series()}\n vs expected:\n\t{expected_matches}"
f"x_attr_A_equals_a: \n\t{x_attr_A_equals_a.series()}\n vs expected:\n\t{expected_matches}" # type: ignore
)
assert isinstance(x_attr_A_equals_a, Dataset)
assert sorted(x_attr_A_equals_a.numeric_columns()) == sorted(expected_matches)
Expand Down Expand Up @@ -245,7 +245,7 @@ def test_find_data_using_metadata_criteria_with_single_attribute_and_multiple_va
expected_matches = ["a_p_z", "a_q_z", "a_r_z", "b_p_z", "b_q_z", "b_r_z"]

ts_logger.debug(
f"x_attr_A_equals_a: \n\t{x_attr_A_equals_a.series()}\n vs expected:\n\t{expected_matches}"
f"x_attr_A_equals_a: \n\t{x_attr_A_equals_a.series()}\n vs expected:\n\t{expected_matches}" # type: ignore
)
assert isinstance(x_attr_A_equals_a, Dataset)
assert sorted(x_attr_A_equals_a.numeric_columns()) == sorted(expected_matches)
Expand Down