From 178b00346bffedfcf9bff041284b3398e0b11d06 Mon Sep 17 00:00:00 2001 From: Daniel Heinesen Date: Thu, 5 Sep 2024 13:46:54 +0200 Subject: [PATCH 1/2] Adds better definition of filters. Adds tqdm to read_pyaro --- pyaerocom/io/pyaro/pyaro_config.py | 2 +- pyaerocom/io/pyaro/read_pyaro.py | 22 +++++++++++++++------- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/pyaerocom/io/pyaro/pyaro_config.py b/pyaerocom/io/pyaro/pyaro_config.py index 3ee96e529..4593d9315 100644 --- a/pyaerocom/io/pyaro/pyaro_config.py +++ b/pyaerocom/io/pyaro/pyaro_config.py @@ -29,7 +29,7 @@ class PyaroConfig(BaseModel): name: str data_id: str filename_or_obj_or_url: str - filters: dict[str, dict[str, list[str]]] + filters: dict[str, dict[str, list[str]] | dict[str, list[tuple]]] name_map: dict[str, str] | None = None # no Unit conversion option ########################## diff --git a/pyaerocom/io/pyaro/read_pyaro.py b/pyaerocom/io/pyaro/read_pyaro.py index 41cfff05f..b44f08249 100644 --- a/pyaerocom/io/pyaro/read_pyaro.py +++ b/pyaerocom/io/pyaro/read_pyaro.py @@ -4,6 +4,8 @@ from copy import deepcopy from typing import NewType +from tqdm import tqdm + import numpy as np from pyaro import list_timeseries_engines, open_timeseries from pyaro.timeseries import Data, Reader, Station @@ -97,7 +99,9 @@ class PyaroToUngriddedData: _DATAERRINDEX = 8 # col where errors can be stored _DATAFLAGINDEX = 9 # can be used to store flags _STOPTIMEINDEX = 10 # can be used to store stop time of acq. - _TRASHINDEX = 11 # index where invalid data can be moved to (e.g. when outliers are removed) + _TRASHINDEX = ( + 11 # index where invalid data can be moved to (e.g. when outliers are removed) + ) # List of keys needed by every station from Pyaro. Used to find extra metadata STATION_KEYS = ( @@ -161,9 +165,7 @@ def _convert_to_ungriddeddata(self, pyaro_data: dict[str, Data]) -> UngriddedDat metadata_idx = 0 for var, var_data in pyaro_data.items(): size = var_size[var] - for i in range( - 0, size - ): # The 1 start is a temp fix for the empty first row of the current Data implementation from pyaro + for i in tqdm(range(size), disable=None): data_line = var_data[i] current_station = data_line["stations"] @@ -190,7 +192,9 @@ def _convert_to_ungriddeddata(self, pyaro_data: dict[str, Data]) -> UngriddedDat # Fills meta_idx if station_idx[current_station][ts_type] not in meta_idx: - meta_idx[station_idx[current_station][ts_type]] = {v: [] for v in vars} + meta_idx[station_idx[current_station][ts_type]] = { + v: [] for v in vars + } meta_idx[station_idx[current_station][ts_type]][var].append(idx) @@ -200,7 +204,9 @@ def _convert_to_ungriddeddata(self, pyaro_data: dict[str, Data]) -> UngriddedDat for station_id in meta_idx: new_meta_idx[station_id] = {} for var_id in meta_idx[station_id]: - new_meta_idx[station_id][var_id] = np.array(meta_idx[station_id][var_id]) + new_meta_idx[station_id][var_id] = np.array( + meta_idx[station_id][var_id] + ) self.data._data = data_array self.data.meta_idx = new_meta_idx @@ -289,7 +295,9 @@ def _add_ts_type_to_metadata( for idx in new_metadata: station_name = new_metadata[idx]["station_name"] ts_type = str(ts_types[station_name]) - new_metadata[idx]["ts_type"] = ts_type if ts_type is not None else "undefined" + new_metadata[idx]["ts_type"] = ( + ts_type if ts_type is not None else "undefined" + ) return new_metadata def get_variables(self) -> list[str]: From 80a44e1a15cb8517c8d036423151f16cc6c6c6f5 Mon Sep 17 00:00:00 2001 From: Daniel Heinesen Date: Thu, 5 Sep 2024 13:54:03 +0200 Subject: [PATCH 2/2] Ruff linting --- pyaerocom/io/pyaro/read_pyaro.py | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/pyaerocom/io/pyaro/read_pyaro.py b/pyaerocom/io/pyaro/read_pyaro.py index b44f08249..bbe77fbad 100644 --- a/pyaerocom/io/pyaro/read_pyaro.py +++ b/pyaerocom/io/pyaro/read_pyaro.py @@ -99,9 +99,7 @@ class PyaroToUngriddedData: _DATAERRINDEX = 8 # col where errors can be stored _DATAFLAGINDEX = 9 # can be used to store flags _STOPTIMEINDEX = 10 # can be used to store stop time of acq. - _TRASHINDEX = ( - 11 # index where invalid data can be moved to (e.g. when outliers are removed) - ) + _TRASHINDEX = 11 # index where invalid data can be moved to (e.g. when outliers are removed) # List of keys needed by every station from Pyaro. Used to find extra metadata STATION_KEYS = ( @@ -192,9 +190,7 @@ def _convert_to_ungriddeddata(self, pyaro_data: dict[str, Data]) -> UngriddedDat # Fills meta_idx if station_idx[current_station][ts_type] not in meta_idx: - meta_idx[station_idx[current_station][ts_type]] = { - v: [] for v in vars - } + meta_idx[station_idx[current_station][ts_type]] = {v: [] for v in vars} meta_idx[station_idx[current_station][ts_type]][var].append(idx) @@ -204,9 +200,7 @@ def _convert_to_ungriddeddata(self, pyaro_data: dict[str, Data]) -> UngriddedDat for station_id in meta_idx: new_meta_idx[station_id] = {} for var_id in meta_idx[station_id]: - new_meta_idx[station_id][var_id] = np.array( - meta_idx[station_id][var_id] - ) + new_meta_idx[station_id][var_id] = np.array(meta_idx[station_id][var_id]) self.data._data = data_array self.data.meta_idx = new_meta_idx @@ -295,9 +289,7 @@ def _add_ts_type_to_metadata( for idx in new_metadata: station_name = new_metadata[idx]["station_name"] ts_type = str(ts_types[station_name]) - new_metadata[idx]["ts_type"] = ( - ts_type if ts_type is not None else "undefined" - ) + new_metadata[idx]["ts_type"] = ts_type if ts_type is not None else "undefined" return new_metadata def get_variables(self) -> list[str]: