From d35cc601cf92e7d2e9cd3535fb71a48c0dd7bb5c Mon Sep 17 00:00:00 2001 From: Alvaro Valdebenito Date: Mon, 4 Jul 2022 14:10:50 +0200 Subject: [PATCH 1/7] move testdata-minimal scripts out of package --- {pyaerocom/scripts => scripts}/testdata-minimal/README.md | 0 {pyaerocom/scripts => scripts}/testdata-minimal/TM5_subset.sh | 0 .../scripts => scripts}/testdata-minimal/calc_example_coldata.py | 0 .../scripts => scripts}/testdata-minimal/create_subset_ebas.py | 0 .../testdata-minimal/create_subsets_aeronet.py | 0 .../scripts => scripts}/testdata-minimal/create_subsets_emep.sh | 0 .../scripts => scripts}/testdata-minimal/create_subsets_ghost.py | 0 {pyaerocom/scripts => scripts}/testdata-minimal/ebas_files.json | 0 8 files changed, 0 insertions(+), 0 deletions(-) rename {pyaerocom/scripts => scripts}/testdata-minimal/README.md (100%) rename {pyaerocom/scripts => scripts}/testdata-minimal/TM5_subset.sh (100%) rename {pyaerocom/scripts => scripts}/testdata-minimal/calc_example_coldata.py (100%) rename {pyaerocom/scripts => scripts}/testdata-minimal/create_subset_ebas.py (100%) rename {pyaerocom/scripts => scripts}/testdata-minimal/create_subsets_aeronet.py (100%) rename {pyaerocom/scripts => scripts}/testdata-minimal/create_subsets_emep.sh (100%) rename {pyaerocom/scripts => scripts}/testdata-minimal/create_subsets_ghost.py (100%) rename {pyaerocom/scripts => scripts}/testdata-minimal/ebas_files.json (100%) diff --git a/pyaerocom/scripts/testdata-minimal/README.md b/scripts/testdata-minimal/README.md similarity index 100% rename from pyaerocom/scripts/testdata-minimal/README.md rename to scripts/testdata-minimal/README.md diff --git a/pyaerocom/scripts/testdata-minimal/TM5_subset.sh b/scripts/testdata-minimal/TM5_subset.sh similarity index 100% rename from pyaerocom/scripts/testdata-minimal/TM5_subset.sh rename to scripts/testdata-minimal/TM5_subset.sh diff --git a/pyaerocom/scripts/testdata-minimal/calc_example_coldata.py b/scripts/testdata-minimal/calc_example_coldata.py similarity index 100% rename from pyaerocom/scripts/testdata-minimal/calc_example_coldata.py rename to scripts/testdata-minimal/calc_example_coldata.py diff --git a/pyaerocom/scripts/testdata-minimal/create_subset_ebas.py b/scripts/testdata-minimal/create_subset_ebas.py similarity index 100% rename from pyaerocom/scripts/testdata-minimal/create_subset_ebas.py rename to scripts/testdata-minimal/create_subset_ebas.py diff --git a/pyaerocom/scripts/testdata-minimal/create_subsets_aeronet.py b/scripts/testdata-minimal/create_subsets_aeronet.py similarity index 100% rename from pyaerocom/scripts/testdata-minimal/create_subsets_aeronet.py rename to scripts/testdata-minimal/create_subsets_aeronet.py diff --git a/pyaerocom/scripts/testdata-minimal/create_subsets_emep.sh b/scripts/testdata-minimal/create_subsets_emep.sh similarity index 100% rename from pyaerocom/scripts/testdata-minimal/create_subsets_emep.sh rename to scripts/testdata-minimal/create_subsets_emep.sh diff --git a/pyaerocom/scripts/testdata-minimal/create_subsets_ghost.py b/scripts/testdata-minimal/create_subsets_ghost.py similarity index 100% rename from pyaerocom/scripts/testdata-minimal/create_subsets_ghost.py rename to scripts/testdata-minimal/create_subsets_ghost.py diff --git a/pyaerocom/scripts/testdata-minimal/ebas_files.json b/scripts/testdata-minimal/ebas_files.json similarity index 100% rename from pyaerocom/scripts/testdata-minimal/ebas_files.json rename to scripts/testdata-minimal/ebas_files.json From d55fc88e59068936df0e23c6046b094ff278eeb3 Mon Sep 17 00:00:00 2001 From: Alvaro Valdebenito Date: Mon, 4 Jul 2022 15:07:19 +0200 Subject: [PATCH 2/7] import from tests.fixtures.data_access --- .../testdata-minimal/calc_example_coldata.py | 19 ++----- .../testdata-minimal/create_subset_ebas.py | 36 +++---------- .../create_subsets_aeronet.py | 17 ++++--- .../testdata-minimal/create_subsets_ghost.py | 50 ++++++++----------- 4 files changed, 45 insertions(+), 77 deletions(-) diff --git a/scripts/testdata-minimal/calc_example_coldata.py b/scripts/testdata-minimal/calc_example_coldata.py index a5090544a..4b0b687aa 100755 --- a/scripts/testdata-minimal/calc_example_coldata.py +++ b/scripts/testdata-minimal/calc_example_coldata.py @@ -1,27 +1,18 @@ #!/usr/bin/env python3 -# -*- coding: utf-8 -*- import matplotlib.pyplot as plt import pyaerocom as pya +from tests.fixtures.data_access import TestData +from tests.fixtures.tm5 import CHECK_PATHS plt.close("all") -from pathlib import Path -import pyaerocom.testdata_access as td -from pyaerocom.conftest import CHECK_PATHS +OUTBASE = TestData("coldata").path +OUTBASE.mkdir(exist_ok=True) -tda = td.TestDataAccess() - -TESTDATADIR = Path(tda.testdatadir) - -OUTBASE = TESTDATADIR.joinpath("coldata") - -if not OUTBASE.exists(): - OUTBASE.mkdir() - -fpath = TESTDATADIR.joinpath(CHECK_PATHS["tm5aod"]) +fpath = TestData(CHECK_PATHS.tm5aod).path if not fpath.exists(): raise Exception("Unexpected error, please debug") mod = pya.GriddedData(fpath) diff --git a/scripts/testdata-minimal/create_subset_ebas.py b/scripts/testdata-minimal/create_subset_ebas.py index 67270ee54..fee35ab6d 100755 --- a/scripts/testdata-minimal/create_subset_ebas.py +++ b/scripts/testdata-minimal/create_subset_ebas.py @@ -1,7 +1,6 @@ #!/usr/bin/env python3 -# -*- coding: utf-8 -*- - -"""simple script to generate a small enough test data set for the EBAS obs network +""" +Simple script to generate a small enough test data set for the EBAS obs network Works only if the user has access to the standard EBAS data path at Met Norway """ @@ -12,40 +11,21 @@ import simplejson import pyaerocom as pya +from tests.fixtures.data_access import TestData -# import pyaerocom.access_testdata as td -from pyaerocom.access_testdata import AccessTestData - -# from getpass import getuser -# -# if getuser() == 'jonasg': -# ebas_local = os.path.join(pya.const.OUTPUTDIR, 'data/obsdata/EBASMultiColumn/data') -# assert os.path.exists(ebas_local) -# else: -# ebas_local=None - +OUTBASE = TestData("testdata-minimal/obsdata/EBASMultiColumn").path +SCRIPT_BASE_DIR = TestData("testdata-minimal/scripts").path -tda = AccessTestData() - -TESTDATADIR = tda.basedir - -OUTBASE = Path(TESTDATADIR).joinpath("testdata-minimal/obsdata/EBASMultiColumn") -SCRIPT_BASE_DIR = Path(TESTDATADIR).joinpath("testdata-minimal/scripts") - -FILES_DEST = OUTBASE.joinpath("data") +FILES_DEST = OUTBASE / "data" UPDATE = True UPDATE_EXISTING = False SEARCH_PROBLEM_FILES = False NAME = "EBASMC" -# if ebas_local is not None: -# FILES_SRC = ebas_local -# else: EBAS_BASE_DIR = "/lustre/storeA/project/aerocom/aerocom1/AEROCOM_OBSDATA/EBASMultiColumn/data/" -assert os.path.exists(EBAS_BASE_DIR) - -JSON_FILE = SCRIPT_BASE_DIR.joinpath("ebas_files.json") +assert Path(EBAS_BASE_DIR).is_dir(), f"missing {EBAS_BASE_DIR}" +JSON_FILE = SCRIPT_BASE_DIR / "ebas_files.json" # ------------------------------------------------------------ # add some files with known problems diff --git a/scripts/testdata-minimal/create_subsets_aeronet.py b/scripts/testdata-minimal/create_subsets_aeronet.py index e3f9ab38c..07336d286 100755 --- a/scripts/testdata-minimal/create_subsets_aeronet.py +++ b/scripts/testdata-minimal/create_subsets_aeronet.py @@ -1,7 +1,6 @@ #!/usr/bin/env python3 -# -*- coding: utf-8 -*- """ -Goal +Minimal Aeronet subset for testing purposes """ import os @@ -12,11 +11,10 @@ import numpy as np import pyaerocom as pya +from tests.fixtures.data_access import TestData -OUTBASE = Path(pya.const._TESTDATADIR).joinpath("obsdata") - -if not OUTBASE.exists(): - OUTBASE.mkdir() +OUTBASE = TestData("obsdata").path +OUTBASE.mkdir(exist_ok=True) MIN_NUM_VALID = 300 @@ -36,8 +34,9 @@ ] revision_files = {} -if __name__ == "__main__": + +def main(): loaded = {} for name, varlist in NETWORKS.items(): reader = pya.io.ReadUngridded() @@ -125,3 +124,7 @@ len(filelist), name, os.path.dirname(filelist[0]) ) ) + + +if __name__ == "__main__": + main() diff --git a/scripts/testdata-minimal/create_subsets_ghost.py b/scripts/testdata-minimal/create_subsets_ghost.py index 46d3e77a1..615d6a892 100644 --- a/scripts/testdata-minimal/create_subsets_ghost.py +++ b/scripts/testdata-minimal/create_subsets_ghost.py @@ -1,27 +1,22 @@ #!/usr/bin/env python3 -# -*- coding: utf-8 -*- """ Create minimal testdataset for GHOST reader - -Created on Fri Feb 26 09:17:09 2021 - -@author: jonasg """ -import os +from pathlib import Path import matplotlib.pyplot as plt - -plt.close("all") import xarray as xr import pyaerocom as pya +from tests.fixtures.data_access import TestData -path_in = os.path.join(pya.const.OUTPUTDIR, "data/obsdata/GHOST/data") +plt.close("all") -path_out = os.path.join(pya.const.OUTPUTDIR, "testdata-minimal/obsdata/GHOST/data") +path_in = Path(pya.const.OUTPUTDIR) / "data/obsdata/GHOST/data" +path_out = TestData("obsdata/GHOST/data").path -assert os.path.exists(path_in) -assert os.path.exists(path_out) +assert path_in.is_dir(), f"missing {path_in}" +assert path_out.is_dir(), f"missing {path_out}" datasets = ["EEA_AQ_eReporting", "EBAS"] @@ -32,36 +27,35 @@ filename = lambda var, date: f"{var}_{date}.nc" -files_out = [] for dsname in datasets: for freq in freqs: - indir = os.path.join(path_in, dsname, freq) - assert os.path.exists(indir) - outdir = os.path.join(path_out, dsname, freq) - os.makedirs(outdir, exist_ok=True) - assert os.path.exists(outdir) + indir = path_in / dsname / freq + assert indir.is_dir(), f"missing {indir}" + + outdir = path_out / dsname / freq + outdir.mkdir(exist_ok=True) for var in varis: if var == "pm10": dates = datesfiles numst = 3 - numts = None if freq == "daily" else 3 - else: - dates = [datesfiles[0]] + dates = datesfiles[0:1] numst = 1 numts = 3 for date in dates: - dir_in = os.path.join(indir, var) - dir_out = os.path.join(outdir, var) - os.makedirs(dir_out, exist_ok=True) - assert os.path.exists(dir_in) + dir_in = indir / var + assert dir_in.is_dir(), f"missing {dir_in}" + + dir_out = outdir / var + dir_out.mkdir(exist_ok=True) + fname = filename(var, date) - file_in = os.path.join(dir_in, fname) - file_out = os.path.join(dir_out, fname) + file_in = dir_in / fname + file_out = dir_out / fname print(file_in) print(file_out) - assert os.path.exists(file_in) + assert file_in.exists, f"missing {file_in}" ds = xr.open_dataset(file_in) subset = ds.isel(station=slice(0, numst)) From bbd2d7b27ef68dcc7ed091f20c21cf5e6fe0bf61 Mon Sep 17 00:00:00 2001 From: Alvaro Valdebenito Date: Mon, 4 Jul 2022 15:07:53 +0200 Subject: [PATCH 3/7] clean up testdata-minimal scripts --- scripts/testdata-minimal/README.md | 15 ++-- .../testdata-minimal/calc_example_coldata.py | 41 +++++------ .../testdata-minimal/create_subsets_ghost.py | 68 +++++++------------ 3 files changed, 56 insertions(+), 68 deletions(-) mode change 100644 => 100755 scripts/testdata-minimal/create_subsets_ghost.py diff --git a/scripts/testdata-minimal/README.md b/scripts/testdata-minimal/README.md index 5cdc98968..bc781582a 100644 --- a/scripts/testdata-minimal/README.md +++ b/scripts/testdata-minimal/README.md @@ -1,4 +1,5 @@ # Scripts for test dataset creation of pyaerocom + This directory consists of scripts to create the minimal test dataset needed for automatic testing and continuous integration of pyaerocom. The scripts need access to Met Norway's internal file storage and are therefore @@ -8,8 +9,7 @@ they are included in the main pyaerocom gihub repository anyway. The minimal test data created from these scripts will usually go to the subdirectory `~/MyPyaerocom/testdata-minimal` Example model and observation data can be found in sub-directories `modeldata` and `obsdata`, respectively. -At this time only `create_subset_ebas.py` is running with the -latest version of pyaerocom +At this time only `create_subset_ebas.py` is running with the latest version of pyaerocom. ## Data usage guidelines @@ -18,31 +18,34 @@ The data is generally NOT intended to be downloaded and used. If you download th general data policy terms and restrictions of each provided dataset apply. These will be listed in the following. ### AERONET data + See: [https://aeronet.gsfc.nasa.gov/new_web/data_usage.html](https://aeronet.gsfc.nasa.gov/new_web/data_usage.html) ### EBAS data + See: [https://ebas.nilu.no/](https://ebas.nilu.no/) Under "Data policy". ### Model data -- TM5 :Courtesy of Twan van Noije (KNMI) +- TM5: Courtesy of Twan van Noije (KNMI) ### Satellite data - MODIS: start with the [MODIS landing page](https://modis.gsfc.nasa.gov/data/) ## Updating testdata for CI + **Note:** The test data has to be updated by hand for CI to pickup the changes! Howto for that: -``` + +``` bash cd ~/MyPyaerocom mkdir -p ~/tmp tar -cvzf ~/tmp/testdata-minimal.tar.gz testdata-minimal ``` + The resulting file `~/tmp/testdata-minimal.tar.gz` then needs to be copied to the right place. Please ask your fellow developers in case you do not know how to do that. - - diff --git a/scripts/testdata-minimal/calc_example_coldata.py b/scripts/testdata-minimal/calc_example_coldata.py index 4b0b687aa..ee935db6b 100755 --- a/scripts/testdata-minimal/calc_example_coldata.py +++ b/scripts/testdata-minimal/calc_example_coldata.py @@ -1,38 +1,39 @@ #!/usr/bin/env python3 -import matplotlib.pyplot as plt - import pyaerocom as pya from tests.fixtures.data_access import TestData from tests.fixtures.tm5 import CHECK_PATHS -plt.close("all") - - OUTBASE = TestData("coldata").path OUTBASE.mkdir(exist_ok=True) -fpath = TestData(CHECK_PATHS.tm5aod).path -if not fpath.exists(): - raise Exception("Unexpected error, please debug") -mod = pya.GriddedData(fpath) -obs = pya.io.ReadAeronetSunV3("AeronetSunV3L2Subset.daily").read("od550aer") +def main(): + + path = TestData(CHECK_PATHS.tm5aod).path + assert path.exists(), f"missing {path}" + + mod = pya.GriddedData(path) + obs = pya.io.ReadAeronetSunV3("AeronetSunV3L2Subset.daily").read("od550aer") -coldata = pya.colocation.colocate_gridded_ungridded(mod, obs) + coldata = pya.colocation.colocate_gridded_ungridded(mod, obs) + coldata.to_netcdf(OUTBASE) + print(coldata.calc_statistics()) -coldata.to_netcdf(OUTBASE) + coldata.plot_coordinates() -print(coldata.calc_statistics()) + mod = mod.sel(latitude=(0, 3), longitude=(0, 4)) + cgg = pya.colocation.colocate_gridded_gridded(mod, mod) + cgg.data = cgg.data[:, :3] -coldata.plot_coordinates() + cgg.plot_scatter() + cgg.to_netcdf(OUTBASE) -mod = mod.sel(latitude=(0, 3), longitude=(0, 4)) -cgg = pya.colocation.colocate_gridded_gridded(mod, mod) -cgg.data = cgg.data[:, :3] + pya.plot.mapping.plot_nmb_map_colocateddata(cgg) -cgg.plot_scatter() -cgg.to_netcdf(OUTBASE) +if __name__ == "__main__": + import matplotlib.pyplot as plt -pya.plot.mapping.plot_nmb_map_colocateddata(cgg) + plt.close("all") + main() diff --git a/scripts/testdata-minimal/create_subsets_ghost.py b/scripts/testdata-minimal/create_subsets_ghost.py old mode 100644 new mode 100755 index 615d6a892..6296cfa20 --- a/scripts/testdata-minimal/create_subsets_ghost.py +++ b/scripts/testdata-minimal/create_subsets_ghost.py @@ -2,16 +2,14 @@ """ Create minimal testdataset for GHOST reader """ +from itertools import product from pathlib import Path -import matplotlib.pyplot as plt import xarray as xr import pyaerocom as pya from tests.fixtures.data_access import TestData -plt.close("all") - path_in = Path(pya.const.OUTPUTDIR) / "data/obsdata/GHOST/data" path_out = TestData("obsdata/GHOST/data").path @@ -25,42 +23,28 @@ varis = ["pm10", "sconco3"] datesfiles = ["201810", "201911", "201912"] -filename = lambda var, date: f"{var}_{date}.nc" - -for dsname in datasets: - for freq in freqs: - indir = path_in / dsname / freq - assert indir.is_dir(), f"missing {indir}" - - outdir = path_out / dsname / freq - outdir.mkdir(exist_ok=True) - for var in varis: - if var == "pm10": - dates = datesfiles - numst = 3 - numts = None if freq == "daily" else 3 - else: - dates = datesfiles[0:1] - numst = 1 - numts = 3 - for date in dates: - dir_in = indir / var - assert dir_in.is_dir(), f"missing {dir_in}" - - dir_out = outdir / var - dir_out.mkdir(exist_ok=True) - - fname = filename(var, date) - file_in = dir_in / fname - file_out = dir_out / fname - print(file_in) - print(file_out) - assert file_in.exists, f"missing {file_in}" - - ds = xr.open_dataset(file_in) - subset = ds.isel(station=slice(0, numst)) - if numts is not None: - subset = subset.isel(time=slice(0, numts)) - - subset.to_netcdf(file_out) - print("Saved") +for dsname, freq, var in product(datasets, freqs, varis): + if var == "pm10": + dates = datesfiles + numst = 3 + numts = None if freq == "daily" else 3 + else: + dates = datesfiles[0:1] + numst = 1 + numts = 3 + for date in dates: + file_in = path_in / dsname / freq / var / f"{var}_{date}.nc" + assert file_in.exists(), f"missing {file_in}" + + file_out = path_out / file_in.relative_to(path_in) + file_out.parent.mkdir(exist_ok=True, parents=True) + print(file_in) + print(file_out) + + ds = xr.open_dataset(file_in) + ds = ds.isel(station=slice(0, numst)) + if numts is not None: + ds = ds.isel(time=slice(0, numts)) + + ds.to_netcdf(file_out) + print("Saved") From f5229dbce4f49bc20ec7a87efb091b0b8a996ceb Mon Sep 17 00:00:00 2001 From: Alvaro Valdebenito Date: Wed, 6 Jul 2022 11:23:51 +0200 Subject: [PATCH 4/7] implement create_subsets_emep in python --- .../testdata-minimal/create_subsets_emep.py | 46 +++++++++++++++++++ .../testdata-minimal/create_subsets_emep.sh | 28 ----------- 2 files changed, 46 insertions(+), 28 deletions(-) create mode 100755 scripts/testdata-minimal/create_subsets_emep.py delete mode 100644 scripts/testdata-minimal/create_subsets_emep.sh diff --git a/scripts/testdata-minimal/create_subsets_emep.py b/scripts/testdata-minimal/create_subsets_emep.py new file mode 100755 index 000000000..3ddc7e4ff --- /dev/null +++ b/scripts/testdata-minimal/create_subsets_emep.py @@ -0,0 +1,46 @@ +#!/usr/bin/env python3 +from __future__ import annotations + +from pathlib import Path + +import xarray as xr + +from tests.fixtures.mscw_ctm import EMEP_DATA_PATH + +SRC_DATA_PATH = Path("/lustre/storeB/project/fou/kl/emep/ModelRuns") +SRC_DATA_PATH /= "2019_REPORTING/EMEP01_L20EC_rv4_33.2017" + +VARIABLES = ["SURF_ug_O3", "SURF_ppb_O3", "SURF_ug_PM10_rh50", "SURF_ug_PM25_rh50", "SURF_ug_NO2"] +LAT, LON = slice(50, 52), slice(10, 12) + +PATHS = ( + EMEP_DATA_PATH / "Base_day.nc", + EMEP_DATA_PATH / "Base_month.nc", + EMEP_DATA_PATH / "Base_fullrun.nc", +) + + +def reduce_dims(ds: xr.Dataset) -> xr.Dataset: + """crop domain and remove "unlimited" from time coordinate""" + del ds.encoding["unlimited_dims"] + return ds.isel(lon=LON, lat=LAT) + + +def atomic_write(ds: xr.Dataset, path: Path, **kwargs) -> None: + """write dataset to a netcdf file atomically""" + tmp = path.with_suffix(".tmp") + try: + ds.to_netcdf(tmp, **kwargs) + tmp.rename(path) + finally: + tmp.unlink(missing_ok=True) + + +def main(): + for path in PATHS: + ds = xr.open_dataset(SRC_DATA_PATH / path.name)[VARIABLES].pipe(reduce_dims) + atomic_write(ds, path) + + +if __name__ == "__main__": + main() diff --git a/scripts/testdata-minimal/create_subsets_emep.sh b/scripts/testdata-minimal/create_subsets_emep.sh deleted file mode 100644 index aeffe1dcd..000000000 --- a/scripts/testdata-minimal/create_subsets_emep.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/bin/bash -x - -INFILEPATH="/lustre/storeB/project/fou/kl/emep/ModelRuns/2019_REPORTING/EMEP01_L20EC_rv4_33.2017/Base_fullrun.nc" -VARIABLES="SURF_ug_O3,SURF_ppb_O3,SURF_ug_PM10_rh50,SURF_ug_PM25_rh50,SURF_ug_NO2" -TMPFILE="./tmp.nc" -LAT=50,52 -LON=10,12 - -ncks -d lat,"$LAT" -d lon,"$LON" -v "$VARIABLES" "$INFILEPATH" "$TMPFILE" - -# netcdf files with dimension set to unlimited takes up a lot of space. -# dump the file, change UNLIMITED to an integer and regenerate the file -OUTFILEPATH="./Base_fullrun.nc" -ncdump "$TMPFILE"| sed -e "s/UNLIMITED/1/" | ncgen -o "$OUTFILEPATH" -rm "$TMPFILE" - - -INFILEPATH="/lustre/storeB/project/fou/kl/emep/ModelRuns/2019_REPORTING/EMEP01_L20EC_rv4_33.2017/Base_month.nc" -ncks -d time,0,2 -d lat,"$LAT" -d lon,"$LON" -v "$VARIABLES" "$INFILEPATH" "$TMPFILE" -OUTFILEPATH="./Base_month.nc" -ncdump "$TMPFILE"| sed -e "s/UNLIMITED/3/" | ncgen -o "$OUTFILEPATH" -rm "$TMPFILE" - -INFILEPATH="/lustre/storeB/project/fou/kl/emep/ModelRuns/2019_REPORTING/EMEP01_L20EC_rv4_33.2017/Base_day.nc" -ncks -d time,0,2 -d lat,"$LAT" -d lon,"$LON" -v "$VARIABLES" "$INFILEPATH" "$TMPFILE" -OUTFILEPATH="./Base_day.nc" -ncdump "$TMPFILE"| sed -e "s/UNLIMITED/3/" | ncgen -o "$OUTFILEPATH" -rm "$TMPFILE" From cc9b8ecf5cf9a55d1fbcddd855795bd95723fb4b Mon Sep 17 00:00:00 2001 From: Alvaro Valdebenito Date: Wed, 6 Jul 2022 13:24:17 +0200 Subject: [PATCH 5/7] implement TM5_subset in python --- scripts/testdata-minimal/TM5_subset.sh | 8 ---- .../testdata-minimal/create_subsets_tm5.py | 43 +++++++++++++++++++ 2 files changed, 43 insertions(+), 8 deletions(-) delete mode 100755 scripts/testdata-minimal/TM5_subset.sh create mode 100755 scripts/testdata-minimal/create_subsets_tm5.py diff --git a/scripts/testdata-minimal/TM5_subset.sh b/scripts/testdata-minimal/TM5_subset.sh deleted file mode 100755 index b0e537e9f..000000000 --- a/scripts/testdata-minimal/TM5_subset.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/bash -x -# Extract a few lat/lon points to decrease filesize -LON=20,30 -LAT=20,30 - -ncks -d lat,"$LAT" -d lon,"$LON" aerocom3_TM5-met2010_AP3-CTRL2019_abs550aer_Column_2010_daily.nc ./aerocom3_TM5-met2010_AP3-CTRL2019_abs550aer_Column_2010_daily.nc - -ncks -d lat,"$LAT" -d lon,"$LON" aerocom3_TM5-met2010_AP3-CTRL2019_od550aer_Column_2010_daily.nc ./aerocom3_TM5-met2010_AP3-CTRL2019_od550aer_Column_2010_daily.nc diff --git a/scripts/testdata-minimal/create_subsets_tm5.py b/scripts/testdata-minimal/create_subsets_tm5.py new file mode 100755 index 000000000..08b408e81 --- /dev/null +++ b/scripts/testdata-minimal/create_subsets_tm5.py @@ -0,0 +1,43 @@ +#!/usr/bin/env python3 +from __future__ import annotations + +from pathlib import Path + +import xarray as xr + +from tests.fixtures.tm5 import TM5_DATA_PATH + +SRC_DATA_PATH = Path("/lustre/storeA/project/aerocom/aerocom-users-database") +SRC_DATA_PATH /= "AEROCOM-PHASE-III-2019/TM5-met2010_AP3-CTRL2019/renamed" + +LON, LAT = slice(20, 30), slice(20, 30) + +PATHS = { + TM5_DATA_PATH / "aerocom3_TM5-met2010_AP3-CTRL2019_abs550aer_Column_2010_daily.nc", + TM5_DATA_PATH / "aerocom3_TM5-met2010_AP3-CTRL2019_od550aer_Column_2010_daily.nc", +} + + +def reduce_dims(ds: xr.Dataset) -> xr.Dataset: + """crop domain""" + return ds.isel(lon=LON, lat=LAT) + + +def atomic_write(ds: xr.Dataset, path: Path, **kwargs) -> None: + """write dataset to a netcdf file atomically""" + tmp = path.with_suffix(".tmp") + try: + ds.to_netcdf(tmp, **kwargs) + tmp.rename(path) + finally: + tmp.unlink(missing_ok=True) + + +def main(): + for path in PATHS: + ds = xr.open_dataset(SRC_DATA_PATH / path.name).pipe(reduce_dims) + atomic_write(ds, path) + + +if __name__ == "__main__": + main() From 37b67de9941837eb345c9fe2f785b6cd0cd515a4 Mon Sep 17 00:00:00 2001 From: Alvaro Valdebenito Date: Thu, 7 Jul 2022 11:40:15 +0200 Subject: [PATCH 6/7] remove EEA_AQ_eReporting from testdata-minimal --- scripts/testdata-minimal/create_subsets_ghost.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/testdata-minimal/create_subsets_ghost.py b/scripts/testdata-minimal/create_subsets_ghost.py index 6296cfa20..5ddcef294 100755 --- a/scripts/testdata-minimal/create_subsets_ghost.py +++ b/scripts/testdata-minimal/create_subsets_ghost.py @@ -16,7 +16,7 @@ assert path_in.is_dir(), f"missing {path_in}" assert path_out.is_dir(), f"missing {path_out}" -datasets = ["EEA_AQ_eReporting", "EBAS"] +datasets = ["EBAS"] freqs = ["hourly", "daily"] From 3f6fbb8137be2f31535c47521d4767faccb4cf96 Mon Sep 17 00:00:00 2001 From: Alvaro Valdebenito Date: Thu, 7 Jul 2022 12:58:44 +0200 Subject: [PATCH 7/7] combine testdata-minimal scripts into single CLI --- scripts/testdata-minimal/README.md | 21 +++++++- scripts/testdata-minimal/__init__.py | 0 scripts/testdata-minimal/__main__.py | 13 +++++ .../{create_subsets_aeronet.py => aeronet.py} | 19 +++---- .../{calc_example_coldata.py => coldata.py} | 32 ++++++------ .../testdata-minimal/create_subsets_ghost.py | 50 ------------------- .../{create_subset_ebas.py => ebas.py} | 26 +++++----- .../{create_subsets_emep.py => emep.py} | 11 ++-- scripts/testdata-minimal/ghost.py | 50 +++++++++++++++++++ .../{create_subsets_tm5.py => tm5.py} | 23 +++------ 10 files changed, 128 insertions(+), 117 deletions(-) create mode 100644 scripts/testdata-minimal/__init__.py create mode 100644 scripts/testdata-minimal/__main__.py rename scripts/testdata-minimal/{create_subsets_aeronet.py => aeronet.py} (93%) mode change 100755 => 100644 rename scripts/testdata-minimal/{calc_example_coldata.py => coldata.py} (52%) mode change 100755 => 100644 delete mode 100755 scripts/testdata-minimal/create_subsets_ghost.py rename scripts/testdata-minimal/{create_subset_ebas.py => ebas.py} (93%) mode change 100755 => 100644 rename scripts/testdata-minimal/{create_subsets_emep.py => emep.py} (83%) mode change 100755 => 100644 create mode 100644 scripts/testdata-minimal/ghost.py rename scripts/testdata-minimal/{create_subsets_tm5.py => tm5.py} (61%) mode change 100755 => 100644 diff --git a/scripts/testdata-minimal/README.md b/scripts/testdata-minimal/README.md index bc781582a..b09deed85 100644 --- a/scripts/testdata-minimal/README.md +++ b/scripts/testdata-minimal/README.md @@ -9,7 +9,26 @@ they are included in the main pyaerocom gihub repository anyway. The minimal test data created from these scripts will usually go to the subdirectory `~/MyPyaerocom/testdata-minimal` Example model and observation data can be found in sub-directories `modeldata` and `obsdata`, respectively. -At this time only `create_subset_ebas.py` is running with the latest version of pyaerocom. +``` bash +python -m scripts.testdata-minimal --help +``` + +``` man +Usage: python -m scripts.testdata-minimal [OPTIONS] COMMAND [ARGS]... + + Crete minimal test datasets for pyaerocom + +Options: + --help Show this message and exit. + +Commands: + Aeronet minimal Aeronet dataset + Colocated collocated data example + EBAS minimal EBAS dataset + EMEP minimal EMEP dataset + GHOST minimal GHOST dataset + TM5 minimal TM5 dataset +``` ## Data usage guidelines diff --git a/scripts/testdata-minimal/__init__.py b/scripts/testdata-minimal/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/scripts/testdata-minimal/__main__.py b/scripts/testdata-minimal/__main__.py new file mode 100644 index 000000000..aa0ec1d67 --- /dev/null +++ b/scripts/testdata-minimal/__main__.py @@ -0,0 +1,13 @@ +import typer + +from . import aeronet, coldata, ebas, emep, ghost, tm5 + +main = typer.Typer(help="Crete minimal test datasets for pyaerocom", add_completion=False) +main.command(name="Aeronet")(aeronet.main) +main.command(name="Colocated")(coldata.main) +main.command(name="EBAS")(ebas.main) +main.command(name="EMEP")(emep.main) +main.command(name="GHOST")(ghost.main) +main.command(name="TM5")(tm5.main) + +main() diff --git a/scripts/testdata-minimal/create_subsets_aeronet.py b/scripts/testdata-minimal/aeronet.py old mode 100755 new mode 100644 similarity index 93% rename from scripts/testdata-minimal/create_subsets_aeronet.py rename to scripts/testdata-minimal/aeronet.py index 07336d286..214dc8225 --- a/scripts/testdata-minimal/create_subsets_aeronet.py +++ b/scripts/testdata-minimal/aeronet.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python3 """ Minimal Aeronet subset for testing purposes """ @@ -9,12 +8,10 @@ from pathlib import Path import numpy as np +import typer import pyaerocom as pya -from tests.fixtures.data_access import TestData - -OUTBASE = TestData("obsdata").path -OUTBASE.mkdir(exist_ok=True) +from tests.fixtures.data_access import DataForTests MIN_NUM_VALID = 300 @@ -36,7 +33,11 @@ revision_files = {} -def main(): +def main( + out_path: Path = typer.Argument(DataForTests("obsdata").path, exists=True, dir_okay=True) +): + """minimal Aeronet dataset""" + loaded = {} for name, varlist in NETWORKS.items(): reader = pya.io.ReadUngridded() @@ -94,7 +95,7 @@ def main(): for name, data in loaded.items(): data_id = IDS[name] - outdir = OUTBASE.joinpath(data_id) + outdir = out_path / data_id # make sure to remove old data if outdir.exists(): print("REMOVING EXISTING DATA FOR {}".format(data_id)) @@ -124,7 +125,3 @@ def main(): len(filelist), name, os.path.dirname(filelist[0]) ) ) - - -if __name__ == "__main__": - main() diff --git a/scripts/testdata-minimal/calc_example_coldata.py b/scripts/testdata-minimal/coldata.py old mode 100755 new mode 100644 similarity index 52% rename from scripts/testdata-minimal/calc_example_coldata.py rename to scripts/testdata-minimal/coldata.py index ee935db6b..938121187 --- a/scripts/testdata-minimal/calc_example_coldata.py +++ b/scripts/testdata-minimal/coldata.py @@ -1,23 +1,26 @@ -#!/usr/bin/env python3 +from pathlib import Path + +import typer import pyaerocom as pya -from tests.fixtures.data_access import TestData +from tests.fixtures.data_access import DataForTests from tests.fixtures.tm5 import CHECK_PATHS -OUTBASE = TestData("coldata").path -OUTBASE.mkdir(exist_ok=True) - +MOD_PATH = DataForTests(CHECK_PATHS.tm5aod).path +OUT_PATH = DataForTests("coldata").path -def main(): - path = TestData(CHECK_PATHS.tm5aod).path - assert path.exists(), f"missing {path}" +def main( + mod_path: Path = typer.Argument(MOD_PATH, exists=True, dir_okay=True), + out_path: Path = typer.Argument(OUT_PATH, exists=True, dir_okay=True), +): + """collocated data example""" - mod = pya.GriddedData(path) + mod = pya.GriddedData(mod_path) obs = pya.io.ReadAeronetSunV3("AeronetSunV3L2Subset.daily").read("od550aer") coldata = pya.colocation.colocate_gridded_ungridded(mod, obs) - coldata.to_netcdf(OUTBASE) + coldata.to_netcdf(out_path) print(coldata.calc_statistics()) coldata.plot_coordinates() @@ -27,13 +30,6 @@ def main(): cgg.data = cgg.data[:, :3] cgg.plot_scatter() - cgg.to_netcdf(OUTBASE) + cgg.to_netcdf(out_path) pya.plot.mapping.plot_nmb_map_colocateddata(cgg) - - -if __name__ == "__main__": - import matplotlib.pyplot as plt - - plt.close("all") - main() diff --git a/scripts/testdata-minimal/create_subsets_ghost.py b/scripts/testdata-minimal/create_subsets_ghost.py deleted file mode 100755 index 5ddcef294..000000000 --- a/scripts/testdata-minimal/create_subsets_ghost.py +++ /dev/null @@ -1,50 +0,0 @@ -#!/usr/bin/env python3 -""" -Create minimal testdataset for GHOST reader -""" -from itertools import product -from pathlib import Path - -import xarray as xr - -import pyaerocom as pya -from tests.fixtures.data_access import TestData - -path_in = Path(pya.const.OUTPUTDIR) / "data/obsdata/GHOST/data" -path_out = TestData("obsdata/GHOST/data").path - -assert path_in.is_dir(), f"missing {path_in}" -assert path_out.is_dir(), f"missing {path_out}" - -datasets = ["EBAS"] - -freqs = ["hourly", "daily"] - -varis = ["pm10", "sconco3"] -datesfiles = ["201810", "201911", "201912"] - -for dsname, freq, var in product(datasets, freqs, varis): - if var == "pm10": - dates = datesfiles - numst = 3 - numts = None if freq == "daily" else 3 - else: - dates = datesfiles[0:1] - numst = 1 - numts = 3 - for date in dates: - file_in = path_in / dsname / freq / var / f"{var}_{date}.nc" - assert file_in.exists(), f"missing {file_in}" - - file_out = path_out / file_in.relative_to(path_in) - file_out.parent.mkdir(exist_ok=True, parents=True) - print(file_in) - print(file_out) - - ds = xr.open_dataset(file_in) - ds = ds.isel(station=slice(0, numst)) - if numts is not None: - ds = ds.isel(time=slice(0, numts)) - - ds.to_netcdf(file_out) - print("Saved") diff --git a/scripts/testdata-minimal/create_subset_ebas.py b/scripts/testdata-minimal/ebas.py old mode 100755 new mode 100644 similarity index 93% rename from scripts/testdata-minimal/create_subset_ebas.py rename to scripts/testdata-minimal/ebas.py index fee35ab6d..9530b9413 --- a/scripts/testdata-minimal/create_subset_ebas.py +++ b/scripts/testdata-minimal/ebas.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python3 """ Simple script to generate a small enough test data set for the EBAS obs network Works only if the user has access to the standard EBAS data path at Met Norway @@ -6,15 +5,17 @@ import os import shutil +from importlib import resources from pathlib import Path import simplejson +import typer import pyaerocom as pya -from tests.fixtures.data_access import TestData +from tests.fixtures.data_access import DataForTests -OUTBASE = TestData("testdata-minimal/obsdata/EBASMultiColumn").path -SCRIPT_BASE_DIR = TestData("testdata-minimal/scripts").path +OUTBASE = DataForTests("obsdata/EBASMultiColumn").path +SCRIPT_BASE_DIR = DataForTests("scripts").path FILES_DEST = OUTBASE / "data" @@ -24,7 +25,6 @@ NAME = "EBASMC" EBAS_BASE_DIR = "/lustre/storeA/project/aerocom/aerocom1/AEROCOM_OBSDATA/EBASMultiColumn/data/" -assert Path(EBAS_BASE_DIR).is_dir(), f"missing {EBAS_BASE_DIR}" JSON_FILE = SCRIPT_BASE_DIR / "ebas_files.json" # ------------------------------------------------------------ @@ -53,9 +53,10 @@ def check_outdated(filedir): files_invalid = [] files_valid = [] - with open(JSON_FILE, "r") as f: + with resources.path(__package__, JSON_FILE.name) as path: + shutil.copy(path, JSON_FILE) - data = simplejson.load(f) + data = simplejson.loads(JSON_FILE.read_text()) for var, stats in data.items(): for stat, files in stats.items(): @@ -135,12 +136,11 @@ def get_files_var_statnum(data, var, statnum): return files -def main(): +def main(ebas_path: Path = typer.Argument(EBAS_BASE_DIR, exists=True, dir_okay=True)): + """minimal EBAS dataset""" - # reader = pya.io.ReadUngridded(NAME, data_dir=EBAS_BASE_DIR) - reader = pya.io.ReadUngridded( - NAME, - ) + # reader = pya.io.ReadUngridded(NAME, data_dir=ebas_path) + reader = pya.io.ReadUngridded(NAME) r_lowlev = reader.get_lowlevel_reader(NAME) # r_lowlev._dataset_path = ebas_local @@ -226,7 +226,7 @@ def main(): print("NOTHING WILL BE COPIED TO TEST DATA") else: - src = Path(EBAS_BASE_DIR).joinpath("data") + src = ebas_path / "data" print(f"updating test data @ {r_lowlev.DATASET_PATH}") # copy revision file diff --git a/scripts/testdata-minimal/create_subsets_emep.py b/scripts/testdata-minimal/emep.py old mode 100755 new mode 100644 similarity index 83% rename from scripts/testdata-minimal/create_subsets_emep.py rename to scripts/testdata-minimal/emep.py index 3ddc7e4ff..e4a95c7fa --- a/scripts/testdata-minimal/create_subsets_emep.py +++ b/scripts/testdata-minimal/emep.py @@ -1,8 +1,8 @@ -#!/usr/bin/env python3 from __future__ import annotations from pathlib import Path +import typer import xarray as xr from tests.fixtures.mscw_ctm import EMEP_DATA_PATH @@ -36,11 +36,8 @@ def atomic_write(ds: xr.Dataset, path: Path, **kwargs) -> None: tmp.unlink(missing_ok=True) -def main(): +def main(emep_path: Path = typer.Argument(SRC_DATA_PATH, exists=True, dir_okay=True)): + """minimal EMEP dataset""" for path in PATHS: - ds = xr.open_dataset(SRC_DATA_PATH / path.name)[VARIABLES].pipe(reduce_dims) + ds = xr.open_dataset(emep_path / path.name)[VARIABLES].pipe(reduce_dims) atomic_write(ds, path) - - -if __name__ == "__main__": - main() diff --git a/scripts/testdata-minimal/ghost.py b/scripts/testdata-minimal/ghost.py new file mode 100644 index 000000000..2aef202c2 --- /dev/null +++ b/scripts/testdata-minimal/ghost.py @@ -0,0 +1,50 @@ +""" +Create minimal testdataset for GHOST reader +""" +from itertools import product +from pathlib import Path + +import typer +import xarray as xr + +import pyaerocom as pya +from tests.fixtures.data_access import DataForTests + +PATH_IN = Path(pya.const.OUTPUTDIR) / "data/obsdata/GHOST/data" +PATH_OUT = DataForTests("obsdata/GHOST/data").path +DATASETS = ["EBAS"] +FREQS = ["hourly", "daily"] +VARS = ["pm10", "sconco3"] +DATES = ["201810", "201911", "201912"] + + +def main( + path_in: Path = typer.Argument(PATH_IN, exists=True, dir_okay=True), + path_out: Path = typer.Argument(PATH_OUT, exists=True, dir_okay=True), +): + """minimal GHOST dataset""" + for dsname, freq, var in product(DATASETS, FREQS, VARS): + if var == "pm10": + dates = DATES + numst = 3 + numts = None if freq == "daily" else 3 + else: + dates = DATES[0:1] + numst = 1 + numts = 3 + for date in dates: + file_in = path_in / dsname / freq / var / f"{var}_{date}.nc" + assert file_in.exists(), f"missing {file_in}" + + file_out = path_out / file_in.relative_to(path_in) + file_out.parent.mkdir(exist_ok=True, parents=True) + print(file_in) + print(file_out) + + ds = xr.open_dataset(file_in) + ds = ds.isel(station=slice(0, numst)) + if numts is not None: + ds = ds.isel(time=slice(0, numts)) + + ds.to_netcdf(file_out) + print("Saved") diff --git a/scripts/testdata-minimal/create_subsets_tm5.py b/scripts/testdata-minimal/tm5.py old mode 100755 new mode 100644 similarity index 61% rename from scripts/testdata-minimal/create_subsets_tm5.py rename to scripts/testdata-minimal/tm5.py index 08b408e81..f4282e594 --- a/scripts/testdata-minimal/create_subsets_tm5.py +++ b/scripts/testdata-minimal/tm5.py @@ -1,12 +1,14 @@ -#!/usr/bin/env python3 from __future__ import annotations from pathlib import Path +import typer import xarray as xr from tests.fixtures.tm5 import TM5_DATA_PATH +from .emep import atomic_write + SRC_DATA_PATH = Path("/lustre/storeA/project/aerocom/aerocom-users-database") SRC_DATA_PATH /= "AEROCOM-PHASE-III-2019/TM5-met2010_AP3-CTRL2019/renamed" @@ -23,21 +25,8 @@ def reduce_dims(ds: xr.Dataset) -> xr.Dataset: return ds.isel(lon=LON, lat=LAT) -def atomic_write(ds: xr.Dataset, path: Path, **kwargs) -> None: - """write dataset to a netcdf file atomically""" - tmp = path.with_suffix(".tmp") - try: - ds.to_netcdf(tmp, **kwargs) - tmp.rename(path) - finally: - tmp.unlink(missing_ok=True) - - -def main(): +def main(tm5_path: Path = typer.Argument(SRC_DATA_PATH, exists=True, dir_okay=True)): + """minimal TM5 dataset""" for path in PATHS: - ds = xr.open_dataset(SRC_DATA_PATH / path.name).pipe(reduce_dims) + ds = xr.open_dataset(tm5_path / path.name).pipe(reduce_dims) atomic_write(ds, path) - - -if __name__ == "__main__": - main()