Skip to content

Commit

Permalink
clean up testdata-minimal scripts
Browse files Browse the repository at this point in the history
  • Loading branch information
avaldebe committed Jul 6, 2022
1 parent d55fc88 commit bbd2d7b
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 68 deletions.
15 changes: 9 additions & 6 deletions scripts/testdata-minimal/README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# Scripts for test dataset creation of pyaerocom

This directory consists of scripts to create the minimal test dataset needed
for automatic testing and continuous integration of pyaerocom. The scripts need access to Met Norway's
internal file storage and are therefore
Expand All @@ -8,8 +9,7 @@ they are included in the main pyaerocom gihub repository anyway.
The minimal test data created from these scripts will usually go to the subdirectory `~/MyPyaerocom/testdata-minimal`
Example model and observation data can be found in sub-directories `modeldata` and `obsdata`, respectively.

At this time only `create_subset_ebas.py` is running with the
latest version of pyaerocom
At this time only `create_subset_ebas.py` is running with the latest version of pyaerocom.

## Data usage guidelines

Expand All @@ -18,31 +18,34 @@ The data is generally NOT intended to be downloaded and used. If you download th
general data policy terms and restrictions of each provided dataset apply. These will be listed in the following.

### AERONET data

See: [https://aeronet.gsfc.nasa.gov/new_web/data_usage.html](https://aeronet.gsfc.nasa.gov/new_web/data_usage.html)

### EBAS data

See: [https://ebas.nilu.no/](https://ebas.nilu.no/)

Under "Data policy".

### Model data

- TM5 :Courtesy of Twan van Noije (KNMI)
- TM5: Courtesy of Twan van Noije (KNMI)

### Satellite data

- MODIS: start with the [MODIS landing page](https://modis.gsfc.nasa.gov/data/)

## Updating testdata for CI

**Note:** The test data has to be updated by hand for CI to pickup the changes!

Howto for that:
```

``` bash
cd ~/MyPyaerocom
mkdir -p ~/tmp
tar -cvzf ~/tmp/testdata-minimal.tar.gz testdata-minimal
```

The resulting file `~/tmp/testdata-minimal.tar.gz` then needs to be copied to the right place.
Please ask your fellow developers in case you do not know how to do that.


41 changes: 21 additions & 20 deletions scripts/testdata-minimal/calc_example_coldata.py
Original file line number Diff line number Diff line change
@@ -1,38 +1,39 @@
#!/usr/bin/env python3

import matplotlib.pyplot as plt

import pyaerocom as pya
from tests.fixtures.data_access import TestData
from tests.fixtures.tm5 import CHECK_PATHS

plt.close("all")


OUTBASE = TestData("coldata").path
OUTBASE.mkdir(exist_ok=True)

fpath = TestData(CHECK_PATHS.tm5aod).path
if not fpath.exists():
raise Exception("Unexpected error, please debug")
mod = pya.GriddedData(fpath)

obs = pya.io.ReadAeronetSunV3("AeronetSunV3L2Subset.daily").read("od550aer")
def main():

path = TestData(CHECK_PATHS.tm5aod).path
assert path.exists(), f"missing {path}"

mod = pya.GriddedData(path)
obs = pya.io.ReadAeronetSunV3("AeronetSunV3L2Subset.daily").read("od550aer")

coldata = pya.colocation.colocate_gridded_ungridded(mod, obs)
coldata = pya.colocation.colocate_gridded_ungridded(mod, obs)
coldata.to_netcdf(OUTBASE)
print(coldata.calc_statistics())

coldata.to_netcdf(OUTBASE)
coldata.plot_coordinates()

print(coldata.calc_statistics())
mod = mod.sel(latitude=(0, 3), longitude=(0, 4))
cgg = pya.colocation.colocate_gridded_gridded(mod, mod)
cgg.data = cgg.data[:, :3]

coldata.plot_coordinates()
cgg.plot_scatter()
cgg.to_netcdf(OUTBASE)

mod = mod.sel(latitude=(0, 3), longitude=(0, 4))
cgg = pya.colocation.colocate_gridded_gridded(mod, mod)
cgg.data = cgg.data[:, :3]
pya.plot.mapping.plot_nmb_map_colocateddata(cgg)

cgg.plot_scatter()

cgg.to_netcdf(OUTBASE)
if __name__ == "__main__":
import matplotlib.pyplot as plt

pya.plot.mapping.plot_nmb_map_colocateddata(cgg)
plt.close("all")
main()
68 changes: 26 additions & 42 deletions scripts/testdata-minimal/create_subsets_ghost.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,14 @@
"""
Create minimal testdataset for GHOST reader
"""
from itertools import product
from pathlib import Path

import matplotlib.pyplot as plt
import xarray as xr

import pyaerocom as pya
from tests.fixtures.data_access import TestData

plt.close("all")

path_in = Path(pya.const.OUTPUTDIR) / "data/obsdata/GHOST/data"
path_out = TestData("obsdata/GHOST/data").path

Expand All @@ -25,42 +23,28 @@
varis = ["pm10", "sconco3"]
datesfiles = ["201810", "201911", "201912"]

filename = lambda var, date: f"{var}_{date}.nc"

for dsname in datasets:
for freq in freqs:
indir = path_in / dsname / freq
assert indir.is_dir(), f"missing {indir}"

outdir = path_out / dsname / freq
outdir.mkdir(exist_ok=True)
for var in varis:
if var == "pm10":
dates = datesfiles
numst = 3
numts = None if freq == "daily" else 3
else:
dates = datesfiles[0:1]
numst = 1
numts = 3
for date in dates:
dir_in = indir / var
assert dir_in.is_dir(), f"missing {dir_in}"

dir_out = outdir / var
dir_out.mkdir(exist_ok=True)

fname = filename(var, date)
file_in = dir_in / fname
file_out = dir_out / fname
print(file_in)
print(file_out)
assert file_in.exists, f"missing {file_in}"

ds = xr.open_dataset(file_in)
subset = ds.isel(station=slice(0, numst))
if numts is not None:
subset = subset.isel(time=slice(0, numts))

subset.to_netcdf(file_out)
print("Saved")
for dsname, freq, var in product(datasets, freqs, varis):
if var == "pm10":
dates = datesfiles
numst = 3
numts = None if freq == "daily" else 3
else:
dates = datesfiles[0:1]
numst = 1
numts = 3
for date in dates:
file_in = path_in / dsname / freq / var / f"{var}_{date}.nc"
assert file_in.exists(), f"missing {file_in}"

file_out = path_out / file_in.relative_to(path_in)
file_out.parent.mkdir(exist_ok=True, parents=True)
print(file_in)
print(file_out)

ds = xr.open_dataset(file_in)
ds = ds.isel(station=slice(0, numst))
if numts is not None:
ds = ds.isel(time=slice(0, numts))

ds.to_netcdf(file_out)
print("Saved")

0 comments on commit bbd2d7b

Please sign in to comment.