Skip to content

Commit

Permalink
Fix some mypy errors
Browse files Browse the repository at this point in the history
  • Loading branch information
arneso-ssb committed May 7, 2024
1 parent 8661f1c commit 3284df2
Show file tree
Hide file tree
Showing 6 changed files with 50 additions and 26 deletions.
2 changes: 1 addition & 1 deletion noxfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ def precommit(session: Session) -> None:
@session(python=python_versions)
def mypy(session: Session) -> None:
"""Type-check using mypy."""
args = session.posargs or ["src", "tests"]
args = session.posargs or ["src", "tests", "click"]
session.install(".")
session.install("mypy", "pytest")
session.run("mypy", *args)
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ warn_unreachable = true
pretty = true
show_column_numbers = true
show_error_context = true
disallow_any_generics = false

[tool.ruff]
force-exclude = true # Apply excludes to pre-commit
Expand Down
38 changes: 25 additions & 13 deletions src/ssb_timeseries/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from typing import no_type_check

import numpy as np
import pandas as pd # type: ignore[import-untyped]
import pandas as pd
from typing_extensions import Self

from ssb_timeseries import io
Expand Down Expand Up @@ -367,11 +367,14 @@ def __getitem__(
"""Access Dataset.data.columns via Dataset[ list[column_names] | pattern | tags].
Arguments:
criteria: (str | dict) Either a string pattern or a dict of tags.
criteria: Either a string pattern or a dict of tags.
kwargs: If criteria is empty, this is passed to filter().
Returns:
Self | None
Raises:
TypeError: If filter() returns another type than Dataset.
"""
# pattern: str = "", regex: str = "", tags: dict = {}):
# Dataset[...] should return a Dataset object (?) with only the requested items (columns).
Expand All @@ -384,14 +387,18 @@ def __getitem__(
# Or, is there a trick using dataframe views?
# --->
if criteria and isinstance(criteria, str):
return self.filter(pattern=criteria)
result = self.filter(pattern=criteria)
elif criteria and isinstance(criteria, dict):
return self.filter(tags=criteria)
result = self.filter(tags=criteria)
elif kwargs:
ts_logger.debug(f"DATASET.__getitem__(:\n\t{kwargs} ")
return self.filter(**kwargs)
result = self.filter(**kwargs)
else:
return None
if isinstance(result, Dataset):
return result # type: ignore[return-value]
else:
raise TypeError("Dataset.filter() did not return a Dataset type.")

def plot(self, *args: Any, **kwargs: Any) -> Any:
"""Plot dataset data.
Expand All @@ -400,7 +407,7 @@ def plot(self, *args: Any, **kwargs: Any) -> Any:
"""
xlabels = self.datetime_columns()[0]
ts_logger.debug(f"Dataset.plot({args!r}, {kwargs!r}) x-labels {xlabels}")
return self.data.plot(
return self.data.plot( # type: ignore[call-overload]
xlabels,
*args,
legend=len(self.data.columns) < 9,
Expand Down Expand Up @@ -452,28 +459,33 @@ def groupby(
period_index = pd.PeriodIndex(self.data[datetime_columns[0]], freq=freq)
ts_logger.debug(f"DATASET {self.name}: period index\n{period_index}.")

# Fix for case when **kwargs contains numeric_only
if "numeric_only" in kwargs:
kwargs.pop("numeric_only")
numeric_only_value = True

match func:
case "mean":
out = self.data.groupby(period_index).mean(
*args, numeric_only=True, **kwargs
out = self.data.groupby(period_index).mean( # type: ignore[misc]
*args, numeric_only=numeric_only_value, **kwargs
)
case "sum":
out = self.data.groupby(period_index).sum(
*args, numeric_only=True, **kwargs
out = self.data.groupby(period_index).sum( # type: ignore[misc]
*args, numeric_only=numeric_only_value, **kwargs
)
case "auto":
# TODO: QA on exact logic / use "real" metadata
# in particular, how to check meta data and blend d1 and df2 values as appropriate
# (this implementation is just to show how it can be done)
# QUESTION: do we need a default for "other" series / what should it be?
df1 = self.data.groupby(period_index).mean(
*args, numeric_only=True, **kwargs
df1 = self.data.groupby(period_index).mean( # type: ignore[misc]
*args, numeric_only=numeric_only_value, **kwargs
)
ts_logger.debug(f"groupby\n{df1}.")

df2 = (
self.data.groupby(period_index)
.sum(*args, numeric_only=True, **kwargs)
.sum(*args, numeric_only=numeric_only_value, **kwargs) # type: ignore[misc]
.filter(regex="mendgde|volum|vekt")
)
ts_logger.warning(f"groupby\n{df2}.")
Expand Down
27 changes: 19 additions & 8 deletions src/ssb_timeseries/meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
Ideally, this functionality should live elsewhere, in ssb-python-klass and other meta data libraries. Likely subject to refactoring later.
"""

import io

import bigtree
import pandas as pd
from klass import get_classification
Expand Down Expand Up @@ -72,7 +74,7 @@ def __init__(
self.definition = {"name": root_name}
if isinstance(id_or_path, int):
# TO DO: handle versions of KLASS
klass = get_classification(id_or_path).get_codes().data
klass = get_classification(str(id_or_path)).get_codes().data
self.entities = add_root_node(
klass, {"code": "0", "parentCode": None, "name": root_name}
)
Expand Down Expand Up @@ -116,9 +118,11 @@ def __eq__(self, other: Self) -> bool:
o_entities = other.entities[fields_to_compare].reset_index(drop=True)

ts_logger.debug(
f"comparing:\n{s_entities.to_string()}\n...and:\n{s_entities.to_string()}"
f"comparing:\n{s_entities.to_string()}\n...and:\n{o_entities.to_string()}"
)
ts_logger.debug(
f".info:\n{_df_info_as_string(s_entities)}\n...and:\n{_df_info_as_string(o_entities)}"
)
ts_logger.debug(f".info:\n{s_entities.info()}\n...and:\n{s_entities.info()}")
entities_equal = all(s_entities == o_entities)

return trees_equal and entities_equal
Expand Down Expand Up @@ -146,15 +150,15 @@ def save(self, path: PathStr) -> None:
self.entities.to_json(path_or_buf=path)


def add_root_node(df: pd.DataFrame, root_node: dict) -> pd.DataFrame:
def add_root_node(df: pd.DataFrame, root_node: dict[str, str | None]) -> pd.DataFrame:
"""Prepend root node row to taxonomy dataframe."""
new_row = dict((c, None) for c in df.columns)
for k in root_node.keys():
new_row = {c: None for c in df.columns}
for k in root_node:
new_row[k] = root_node[k]
df.rename(columns={"name": "fullName"})
df["parentCode"] = df["parentCode"].fillna(value=root_node["code"])
df.loc[-1] = root_node
df.index = df.index + 1
root_df = pd.DataFrame(root_node, index=[0])
df = pd.concat([root_df, df], ignore_index=True)
df.sort_index(inplace=True)
return df

Expand Down Expand Up @@ -275,3 +279,10 @@ def to_str(self, attributes: list(str) = None, separator: str = "_") -> list[str
def __repr__(self) -> str:
"""Return initialization for a copy of the series tag object: SeriesTags(name={self.name}, versioning={self.versioning}, temporality={self.temporality}, tags={self.tags})."""
return f"SeriesTags(name={self.name}, versioning={self.versioning}, temporality={self.temporality}, tags={self.tags})"


def _df_info_as_string(df: pd.DataFrame) -> str:
"""Returns the content of df.info() as a string."""
with io.StringIO() as buffer:
df.info(buf=buffer)
return buffer.getvalue()
2 changes: 1 addition & 1 deletion src/ssb_timeseries/sample_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ def create_df(
"T": "minutes",
"S": "seconds",
}
valid_to = valid_from + pd.DateOffset(**{freq_lookup[freq]: interval})
valid_to = valid_from + pd.DateOffset(**{freq_lookup[freq]: interval}) # type: ignore

# BUGFIX: If *lists receives strings, permutations will be over chars by chars
# Kombiner listene til en enkelt liste av lister
Expand Down
6 changes: 3 additions & 3 deletions tests/test_meta_tagging.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ def test_find_data_using_single_metadata_attribute(
expected_matches = ["a_p_z", "a_q_z", "a_r_z"]

ts_logger.debug(
f"x_attr_A_equals_a: \n\t{x_attr_A_equals_a.series()}\n vs expected:\n\t{expected_matches}"
f"x_attr_A_equals_a: \n\t{x_attr_A_equals_a.series()}\n vs expected:\n\t{expected_matches}" # type: ignore
)
assert isinstance(x_attr_A_equals_a, Dataset)
assert sorted(x_attr_A_equals_a.numeric_columns()) == sorted(expected_matches)
Expand Down Expand Up @@ -196,7 +196,7 @@ def test_find_data_using_multiple_metadata_attributes(
expected_matches = ["a_q_z"]

ts_logger.debug(
f"x_attr_A_equals_a: \n\t{x_attr_A_equals_a.series()}\n vs expected:\n\t{expected_matches}"
f"x_attr_A_equals_a: \n\t{x_attr_A_equals_a.series()}\n vs expected:\n\t{expected_matches}" # type: ignore
)
assert isinstance(x_attr_A_equals_a, Dataset)
assert sorted(x_attr_A_equals_a.numeric_columns()) == sorted(expected_matches)
Expand Down Expand Up @@ -245,7 +245,7 @@ def test_find_data_using_metadata_criteria_with_single_attribute_and_multiple_va
expected_matches = ["a_p_z", "a_q_z", "a_r_z", "b_p_z", "b_q_z", "b_r_z"]

ts_logger.debug(
f"x_attr_A_equals_a: \n\t{x_attr_A_equals_a.series()}\n vs expected:\n\t{expected_matches}"
f"x_attr_A_equals_a: \n\t{x_attr_A_equals_a.series()}\n vs expected:\n\t{expected_matches}" # type: ignore
)
assert isinstance(x_attr_A_equals_a, Dataset)
assert sorted(x_attr_A_equals_a.numeric_columns()) == sorted(expected_matches)
Expand Down

0 comments on commit 3284df2

Please sign in to comment.