diff --git a/setup.cfg b/setup.cfg index 5be562b..bb5eded 100644 --- a/setup.cfg +++ b/setup.cfg @@ -8,9 +8,9 @@ long_description = file: README.md long_description_content_type = text/markdown classifiers = Programming Language :: Python :: 3 :: Only - Programming Language :: Python :: 3.9 Programming Language :: Python :: 3.10 Programming Language :: Python :: 3.11 + Programming Language :: Python :: 3.12 License :: OSI Approved :: GNU Lesser General Public License v3 (LGPLv3) Operating System :: OS Independent Development Status :: 4 - Beta @@ -21,7 +21,7 @@ url = https://pyaro.readthedocs.io [options] -python_version = >=3.9 +python_version = >=3.10 install_requires = numpy importlib-metadata >= 3.6; python_version < "3.10" @@ -33,9 +33,9 @@ test_require = tox:tox [tox:tox] min_version = 4.0 env_list = + py312 py311 py310 - py39 depends = pandas diff --git a/src/pyaro/timeseries/AutoFilterReaderEngine.py b/src/pyaro/timeseries/AutoFilterReaderEngine.py index fba17be..5bd2dd5 100644 --- a/src/pyaro/timeseries/AutoFilterReaderEngine.py +++ b/src/pyaro/timeseries/AutoFilterReaderEngine.py @@ -7,7 +7,7 @@ from .Filter import VariableNameFilter, Filter, filters, FilterFactory -class UnkownFilterException(Exception): +class UnknownFilterException(Exception): pass @@ -26,20 +26,15 @@ class AutoFilterReader(Reader): """ @classmethod - def supported_filters(cls) -> [Filter]: + def supported_filters(cls) -> list[Filter]: """Get the default list of implemented filters. :return: list of filters """ - filts = [] - for ( - f - ) in "variables,stations,countries,bounding_boxes,time_bounds,flags,time_variable_station".split( + supported = "variables,stations,countries,bounding_boxes,duplicates,time_bounds,flags,time_variable_station".split( "," - ): - filts.append(filters.get(f)) - - return filts + ) + return [filters.get(name) for name in supported] def _set_filters(self, filters): supported = set() @@ -53,12 +48,12 @@ def _set_filters(self, filters): filters = filtlist for filt in filters: if filt.__class__ not in supported: - raise UnkownFilterException( + raise UnknownFilterException( f"Filter {filt.__class__} not supported in {supported}." ) self._filters = filters - def _get_filters(self) -> [Filter]: + def _get_filters(self) -> list[Filter]: """Get a list of filters actually set during initialization of this object. :return: list of filters @@ -116,7 +111,7 @@ def reader_class(self) -> Reader: """ pass - def supported_filters(self) -> [Filter]: + def supported_filters(self) -> list[Filter]: """The supported filters by this Engine. Maps to the Readers supported_filters. :return: a list of filters diff --git a/src/pyaro/timeseries/Filter.py b/src/pyaro/timeseries/Filter.py index 15e2666..fb048f7 100644 --- a/src/pyaro/timeseries/Filter.py +++ b/src/pyaro/timeseries/Filter.py @@ -43,7 +43,9 @@ def name(self) -> str: :return: a string to be used by FilterFactory """ - def filter_data(self, data: Data, stations: [Station], variables: [str]) -> Data: + def filter_data( + self, data: Data, stations: list[Station], variables: list[str] + ) -> Data: """Filtering of data :param data: Data of e.g. a Reader.data(varname) call @@ -61,7 +63,7 @@ def filter_stations(self, stations: dict[str, Station]) -> dict[str, Station]: """ return stations - def filter_variables(self, variables: [str]) -> [str]: + def filter_variables(self, variables: list[str]) -> list[str]: """Filtering of variables :param variables: List of variables, e.g. from a Reader.variables() call @@ -203,8 +205,8 @@ class VariableNameFilter(Filter): def __init__( self, reader_to_new: dict[str, str] = {}, - include: [str] = [], - exclude: [str] = [], + include: list[str] = [], + exclude: list[str] = [], ): """Create a new variable name filter. @@ -251,7 +253,7 @@ def filter_data(self, data, stations, variables): data._set_variable(self._reader_to_new.get(data.variable, data.variable)) return data - def filter_variables(self, variables: [str]) -> [str]: + def filter_variables(self, variables: list[str]) -> list[str]: """change variable name and reduce variables applying include and exclude parameters :param variables: variable names as in the reader @@ -308,7 +310,7 @@ def filter_data_idx(self, data: Data, stations: dict[str, Station], variables: s @registered_filter class StationFilter(StationReductionFilter): - def __init__(self, include: [str] = [], exclude: [str] = []): + def __init__(self, include: list[str] = [], exclude: list[str] = []): self._include = set(include) self._exclude = set(exclude) return @@ -333,7 +335,7 @@ def filter_stations(self, stations: dict[str, Station]) -> dict[str, Station]: @registered_filter class CountryFilter(StationReductionFilter): - def __init__(self, include: [str] = [], exclude: [str] = []): + def __init__(self, include: list[str] = [], exclude: list[str] = []): """Filter countries by ISO2 names (capitals!) :param include: countries to include, defaults to [], meaning all countries @@ -371,8 +373,8 @@ class BoundingBoxFilter(StationReductionFilter): def __init__( self, - include: [(float, float, float, float)] = [], - exclude: [(float, float, float, float)] = [], + include: list[(float, float, float, float)] = [], + exclude: list[(float, float, float, float)] = [], ): """Filter using geographical bounding-boxes. Coordinates should be given in the range [-180,180] (degrees_east) for longitude and [-90,90] (degrees_north) for latitude. @@ -460,7 +462,7 @@ def filter_stations(self, stations: dict[str, Station]) -> dict[str, Station]: @registered_filter class FlagFilter(DataIndexFilter): - def __init__(self, include: [Flag] = [], exclude: [Flag] = []): + def __init__(self, include: list[Flag] = [], exclude: list[Flag] = []): """Filter data by Flags :param include: flags to include, defaults to [], meaning all flags @@ -498,12 +500,12 @@ class TimeBoundsException(Exception): class TimeBoundsFilter(DataIndexFilter): def __init__( self, - start_include: [(str, str)] = [], - start_exclude: [(str, str)] = [], - startend_include: [(str, str)] = [], - startend_exclude: [(str, str)] = [], - end_include: [(str, str)] = [], - end_exclude: [(str, str)] = [], + start_include: list[(str, str)] = [], + start_exclude: list[(str, str)] = [], + startend_include: list[(str, str)] = [], + startend_exclude: list[(str, str)] = [], + end_include: list[(str, str)] = [], + end_exclude: list[(str, str)] = [], ): """Filter data by start and/or end-times of the measurements. Each timebound consists of a bound-start and bound-end (both included). Timestamps are given as YYYY-MM-DD HH:MM:SS @@ -527,7 +529,7 @@ def __init__( def name(self): return "time_bounds" - def _str_list_to_datetime_list(self, tuple_list: [(str, str)]): + def _str_list_to_datetime_list(self, tuple_list: list[(str, str)]): retlist = [] for start, end in tuple_list: start_dt = datetime.strptime(start, self.time_format) @@ -539,7 +541,7 @@ def _str_list_to_datetime_list(self, tuple_list: [(str, str)]): retlist.append((start_dt, end_dt)) return retlist - def _datetime_list_to_str_list(self, tuple_list) -> [(str, str)]: + def _datetime_list_to_str_list(self, tuple_list) -> list[(str, str)]: retlist = [] for start_dt, end_dt in tuple_list: retlist.append( @@ -579,7 +581,7 @@ def has_envelope(self): or len(self._end_include) ) - def envelope(self) -> (datetime, datetime): + def envelope(self) -> tuple[datetime, datetime]: """Get the earliest and latest time possible for this filter. :return: earliest start and end-time (approximately) @@ -709,6 +711,37 @@ def filter_data_idx(self, data: Data, stations: dict[str, Station], variables: s return idx +@registered_filter +class DuplicateFilter(DataIndexFilter): + default_keys = ["stations", "start_times", "end_times"] + + def __init__(self, duplicate_keys: list[str] | None = None): + """remove duplicates from the data. By default, data with common + station, start_time, end_time are consider duplicates. Only one of the duplicates + is kept. + + :param duplicate_keys: list of data-fields/columns, defaults to None, being the same + as ["stations", "start_times", "end_times"] + """ + self._keys = duplicate_keys + + def init_kwargs(self): + if self._keys is None: + return {} + else: + return {"duplicate_keys": self._keys} + + def name(self): + return "duplicates" + + def filter_data_idx(self, data: Data, stations: dict[str, Station], variables: str): + if self._keys is None: + xkeys = self.default_keys + else: + xkeys = self._keys + return np.unique(data[xkeys], return_index=True)[1] + + if __name__ == "__main__": for name, fil in filters._filters.items(): assert name == fil.name() diff --git a/tests/test_CSVTimeSeriesReader.py b/tests/test_CSVTimeSeriesReader.py index eaa01d5..976b204 100644 --- a/tests/test_CSVTimeSeriesReader.py +++ b/tests/test_CSVTimeSeriesReader.py @@ -329,6 +329,21 @@ def test_variables_filter(self): self.assertEqual(ts.data(newsox).variable, newsox) pass + def test_duplicate_filter(self): + engine = pyaro.list_timeseries_engines()["csv_timeseries"] + with engine.open( + self.multifile_dir + "/csvReader_testdata2.csv", + filters={"duplicates": {"duplicate_keys": None}}, + ) as ts: + self.assertEqual(len(ts.data("NOx")), 8) + with engine.open( + self.multifile_dir + "/csvReader_testdata2.csv", + filters={ + "duplicates": {"duplicate_keys": ["stations", "start_times", "values"]} + }, + ) as ts: + self.assertEqual(len(ts.data("NOx")), 10) + def test_filterFactory(self): filters = pyaro.timeseries.filters.list() print(filters["variables"]) diff --git a/tests/testdata/datadir/csvReader_testdata2.csv b/tests/testdata/datadir/csvReader_testdata2.csv index cd0c3d1..83c718a 100644 --- a/tests/testdata/datadir/csvReader_testdata2.csv +++ b/tests/testdata/datadir/csvReader_testdata2.csv @@ -1,6 +1,6 @@ NOx,station1,8,58,9.050856687678724,Gg,1998-01-01 00:00:00,1998-01-02 00:00:00,Rural -NOx,station1,8,58,14.281596629076395,Gg,1998-01-02 00:00:00,1998-01-03 00:00:00,Rural -NOx,station1,8,58,15.28751469312069,Gg,1998-01-03 00:00:00,1998-01-04 00:00:00,Rural +NOx,station1,8,58,14.281596629076395,Gg,1998-01-01 00:00:00,1998-01-02 00:00:00,Rural +NOx,station1,8,58,15.28751469312069,Gg,1998-01-01 00:00:00,1998-01-02 00:00:00,Rural NOx,station1,8,58,7.9515805412807055,Gg,1998-01-04 00:00:00,1998-01-05 00:00:00,Rural NOx,station1,8,58,13.455696217690287,Gg,1998-01-05 00:00:00,1998-01-06 00:00:00,Rural NOx,station1,8,58,12.07610491450203,Gg,1998-01-06 00:00:00,1998-01-07 00:00:00,Rural