Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update testdata-minimal scripts #691

Draft
wants to merge 8 commits into
base: main-dev
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 0 additions & 8 deletions pyaerocom/scripts/testdata-minimal/TM5_subset.sh

This file was deleted.

47 changes: 0 additions & 47 deletions pyaerocom/scripts/testdata-minimal/calc_example_coldata.py

This file was deleted.

28 changes: 0 additions & 28 deletions pyaerocom/scripts/testdata-minimal/create_subsets_emep.sh

This file was deleted.

72 changes: 0 additions & 72 deletions pyaerocom/scripts/testdata-minimal/create_subsets_ghost.py

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# Scripts for test dataset creation of pyaerocom

This directory consists of scripts to create the minimal test dataset needed
for automatic testing and continuous integration of pyaerocom. The scripts need access to Met Norway's
internal file storage and are therefore
Expand All @@ -8,8 +9,7 @@ they are included in the main pyaerocom gihub repository anyway.
The minimal test data created from these scripts will usually go to the subdirectory `~/MyPyaerocom/testdata-minimal`
Example model and observation data can be found in sub-directories `modeldata` and `obsdata`, respectively.

At this time only `create_subset_ebas.py` is running with the
latest version of pyaerocom
At this time only `create_subset_ebas.py` is running with the latest version of pyaerocom.

## Data usage guidelines

Expand All @@ -18,31 +18,34 @@ The data is generally NOT intended to be downloaded and used. If you download th
general data policy terms and restrictions of each provided dataset apply. These will be listed in the following.

### AERONET data

See: [https://aeronet.gsfc.nasa.gov/new_web/data_usage.html](https://aeronet.gsfc.nasa.gov/new_web/data_usage.html)

### EBAS data

See: [https://ebas.nilu.no/](https://ebas.nilu.no/)

Under "Data policy".

### Model data

- TM5 :Courtesy of Twan van Noije (KNMI)
- TM5: Courtesy of Twan van Noije (KNMI)

### Satellite data

- MODIS: start with the [MODIS landing page](https://modis.gsfc.nasa.gov/data/)

## Updating testdata for CI

**Note:** The test data has to be updated by hand for CI to pickup the changes!

Howto for that:
```

``` bash
cd ~/MyPyaerocom
mkdir -p ~/tmp
tar -cvzf ~/tmp/testdata-minimal.tar.gz testdata-minimal
```

The resulting file `~/tmp/testdata-minimal.tar.gz` then needs to be copied to the right place.
Please ask your fellow developers in case you do not know how to do that.


39 changes: 39 additions & 0 deletions scripts/testdata-minimal/calc_example_coldata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
#!/usr/bin/env python3

import pyaerocom as pya
from tests.fixtures.data_access import TestData
from tests.fixtures.tm5 import CHECK_PATHS

OUTBASE = TestData("coldata").path
OUTBASE.mkdir(exist_ok=True)


def main():

path = TestData(CHECK_PATHS.tm5aod).path
assert path.exists(), f"missing {path}"

mod = pya.GriddedData(path)
obs = pya.io.ReadAeronetSunV3("AeronetSunV3L2Subset.daily").read("od550aer")

coldata = pya.colocation.colocate_gridded_ungridded(mod, obs)
coldata.to_netcdf(OUTBASE)
print(coldata.calc_statistics())

coldata.plot_coordinates()

mod = mod.sel(latitude=(0, 3), longitude=(0, 4))
cgg = pya.colocation.colocate_gridded_gridded(mod, mod)
cgg.data = cgg.data[:, :3]

cgg.plot_scatter()
cgg.to_netcdf(OUTBASE)

pya.plot.mapping.plot_nmb_map_colocateddata(cgg)


if __name__ == "__main__":
import matplotlib.pyplot as plt

plt.close("all")
main()
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

"""simple script to generate a small enough test data set for the EBAS obs network
"""
Simple script to generate a small enough test data set for the EBAS obs network
Works only if the user has access to the standard EBAS data path at Met Norway
"""

Expand All @@ -12,40 +11,21 @@
import simplejson

import pyaerocom as pya
from tests.fixtures.data_access import TestData

# import pyaerocom.access_testdata as td
from pyaerocom.access_testdata import AccessTestData

# from getpass import getuser
#
# if getuser() == 'jonasg':
# ebas_local = os.path.join(pya.const.OUTPUTDIR, 'data/obsdata/EBASMultiColumn/data')
# assert os.path.exists(ebas_local)
# else:
# ebas_local=None

OUTBASE = TestData("testdata-minimal/obsdata/EBASMultiColumn").path
SCRIPT_BASE_DIR = TestData("testdata-minimal/scripts").path

tda = AccessTestData()

TESTDATADIR = tda.basedir

OUTBASE = Path(TESTDATADIR).joinpath("testdata-minimal/obsdata/EBASMultiColumn")
SCRIPT_BASE_DIR = Path(TESTDATADIR).joinpath("testdata-minimal/scripts")

FILES_DEST = OUTBASE.joinpath("data")
FILES_DEST = OUTBASE / "data"

UPDATE = True
UPDATE_EXISTING = False
SEARCH_PROBLEM_FILES = False
NAME = "EBASMC"

# if ebas_local is not None:
# FILES_SRC = ebas_local
# else:
EBAS_BASE_DIR = "/lustre/storeA/project/aerocom/aerocom1/AEROCOM_OBSDATA/EBASMultiColumn/data/"
assert os.path.exists(EBAS_BASE_DIR)

JSON_FILE = SCRIPT_BASE_DIR.joinpath("ebas_files.json")
assert Path(EBAS_BASE_DIR).is_dir(), f"missing {EBAS_BASE_DIR}"
JSON_FILE = SCRIPT_BASE_DIR / "ebas_files.json"

# ------------------------------------------------------------
# add some files with known problems
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Goal
Minimal Aeronet subset for testing purposes
"""

import os
Expand All @@ -12,11 +11,10 @@
import numpy as np

import pyaerocom as pya
from tests.fixtures.data_access import TestData

OUTBASE = Path(pya.const._TESTDATADIR).joinpath("obsdata")

if not OUTBASE.exists():
OUTBASE.mkdir()
OUTBASE = TestData("obsdata").path
OUTBASE.mkdir(exist_ok=True)

MIN_NUM_VALID = 300

Expand All @@ -36,8 +34,9 @@
]

revision_files = {}
if __name__ == "__main__":


def main():
loaded = {}
for name, varlist in NETWORKS.items():
reader = pya.io.ReadUngridded()
Expand Down Expand Up @@ -125,3 +124,7 @@
len(filelist), name, os.path.dirname(filelist[0])
)
)


if __name__ == "__main__":
main()
Loading