Skip to content

Commit

Permalink
modified init: paths in config file now point to the users home folder
Browse files Browse the repository at this point in the history
  • Loading branch information
ajwdewit committed Dec 8, 2023
1 parent 49a984f commit ebacae6
Show file tree
Hide file tree
Showing 10 changed files with 193 additions and 74 deletions.
54 changes: 29 additions & 25 deletions agera5tools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

from . import util

__version__ = "2.0.10"
__version__ = "2.0.11"

def setup_logging(config, has_filesystem):
"""sets up the logging system for both logging to file and to console.
Expand Down Expand Up @@ -107,41 +107,45 @@ def read_config(mk_paths=True):
:return:a DotMap object with the configuration
"""

default_config = True
has_config = False
if "AGERA5TOOLS_CONFIG" in os.environ:
agera5t_config = Path(os.environ["AGERA5TOOLS_CONFIG"]).absolute()
default_config = False
print(f"using config from {agera5t_config}")
has_config = True
else:
agera5t_config = Path(__file__).parent / "agera5tools.yaml"
msg = "No config found: Using default AGERA5TOOLS configuration!"
msg = "No config found, use `agera5tools init` to generate one!"
click.echo(msg)
print(f"using config from {agera5t_config}")

try:
with open(agera5t_config) as fp:
r = yaml.safe_load(fp)
except Exception as e:
msg = f"Failed to read AGERA5Tools configuration from {agera5t_config}"
click.echo(msg)
sys.exit()

c = DotMap(r, _dynamic=False)
# Update config values into proper objects
c.region.boundingbox = util.BoundingBox(**c.region.boundingbox)
c.data_storage.netcdf_path = Path(c.data_storage.netcdf_path)
c.data_storage.tmp_path = Path(c.data_storage.tmp_path)
c.data_storage.csv_path = Path(c.data_storage.csv_path)
c.logging.log_path = Path(c.logging.log_path)
if mk_paths:
c.data_storage.tmp_path.mkdir(exist_ok=True, parents=True)
c.data_storage.csv_path.mkdir(exist_ok=True, parents=True)
c.logging.log_path.mkdir(exist_ok=True, parents=True)
c = None
if has_config:
try:
with open(agera5t_config) as fp:
r = yaml.safe_load(fp)
except Exception as e:
msg = f"Failed to read AGERA5Tools configuration from {agera5t_config}"
click.echo(msg)
sys.exit()

c = DotMap(r, _dynamic=False)
# Update config values into proper objects
c.region.boundingbox = util.BoundingBox(**c.region.boundingbox)
c.data_storage.netcdf_path = Path(c.data_storage.netcdf_path)
c.data_storage.tmp_path = Path(c.data_storage.tmp_path)
c.data_storage.csv_path = Path(c.data_storage.csv_path)
c.logging.log_path = Path(c.logging.log_path)
if mk_paths:
c.data_storage.netcdf_path.mkdir(exist_ok=True, parents=True)
c.data_storage.tmp_path.mkdir(exist_ok=True, parents=True)
c.data_storage.csv_path.mkdir(exist_ok=True, parents=True)
c.logging.log_path.mkdir(exist_ok=True, parents=True)

return c

has_filesystem = False if "READTHEDOCS" in os.environ else True
config = read_config(mk_paths=has_filesystem)
setup_logging(config, has_filesystem)
if config:
setup_logging(config, has_filesystem)


from .dump_grid import dump_grid
Expand Down
10 changes: 5 additions & 5 deletions agera5tools/agera5tools.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
version: 0.1
logging:
# Details for the log. Log levels follow conventions of the python logging framework.
log_path: /tmp/agera5/logs
log_path: /USERHOME/agera5/logs
log_fname: agera5tools.log
log_level_console: WARNING
log_level_file: INFO
Expand Down Expand Up @@ -57,16 +57,16 @@ database:
# SQLAlchemy database URL: https://docs.sqlalchemy.org/en/20/core/engines.html
# Note that the URL may contain the database password in plain text which is a security
# risk.
dsn: sqlite:////tmp/agera5/agera5.db
dsn: sqlite:////USERHOME/agera5/agera5.db
agera5_table_name: weather_grid_agera5
grid_table_name: grid_agera5
chunk_size: 10000
data_storage:
# Storage path for NetCDF files, CSV files and temporary storage.
netcdf_path: /tmp/agera5/ncfiles/
netcdf_path: /USERHOME/agera5/ncfiles/
keep_netcdf: yes
tmp_path: /tmp/agera5/tmp
csv_path: /tmp/agera5/csv
tmp_path: /USERHOME/agera5/tmp
csv_path: /USERHOME/agera5/csv
variables:
# Select which variables should be downloaded from the CDS
Temperature_Air_2m_Mean_24h: yes
Expand Down
7 changes: 4 additions & 3 deletions agera5tools/cmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
from . import config
from . import __version__

selected_variables = [varname for varname, selected in config.variables.items() if selected]

@click.group()
@click.version_option(version=__version__, prog_name="agera5tools")
Expand Down Expand Up @@ -52,6 +51,7 @@ def cmd_extract_point(longitude, latitude, startdate, enddate, output=None):
f"not within the boundingbox of this setup"))
sys.exit()
startdate, enddate = check_date_range(startdate, enddate)
selected_variables = [varname for varname, selected in config.variables.items() if selected]
df = extract_point(selected_variables, point, startdate, enddate)
if output is not None:
output = Path(output)
Expand Down Expand Up @@ -132,8 +132,9 @@ def cmd_init():
- Filling the grid table with the reference grid.
"""
try:
init()
print(f"AgERA5tools successfully initialized!.")
success = init()
if success:
print(f"AgERA5tools successfully initialized!.")
# except RuntimeError as e:
# print(f"AgERA5tools failed to initialize: {e}")
except KeyboardInterrupt:
Expand Down
5 changes: 3 additions & 2 deletions agera5tools/dump_clip.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@

CMD_MODE = True if os.environ["CMD_MODE"] == "1" else False

selected_variables = [varname for varname, selected in config.variables.items() if selected]

def dump(day, bbox, add_gridid=False):
"""Converts the data for all AgERA5 variables for given day to a pandas dataframe.
Expand All @@ -31,6 +30,7 @@ def dump(day, bbox, add_gridid=False):
else:
raise RuntimeError(msg)

selected_variables = [varname for varname, selected in config.variables.items() if selected]
fnames = create_agera5_fnames(config.data_storage.netcdf_path, selected_variables, day)
ds = xr.open_mfdataset(fnames)
ds = ds.sel(lon=slice(bbox.lon_min, bbox.lon_max), lat=slice(bbox.lat_max, bbox.lat_min))
Expand All @@ -48,7 +48,7 @@ def clip(day, bbox, add_gridid=False):
:param day: the date for which to clip
:param bbox: a BoundingBox object
:param add_idgrid: Add a grid ID (True) or not (False - default)
:param add_gridid: Add a grid ID (True) or not (False - default)
:return: an xarray dataset containing all select AgERA5 variables for the given bounding box and day
"""
in_bbox = config.region.boundingbox.region_in_bbox(bbox)
Expand All @@ -60,6 +60,7 @@ def clip(day, bbox, add_gridid=False):
else:
raise RuntimeError(msg)

selected_variables = [varname for varname, selected in config.variables.items() if selected]
fnames = create_agera5_fnames(config.data_storage.netcdf_path, selected_variables, day)
ds = xr.open_mfdataset(fnames)
if add_gridid:
Expand Down
3 changes: 1 addition & 2 deletions agera5tools/extract_point.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
CMD_MODE = True if os.environ["CMD_MODE"] == "1" else False
from .util import create_target_fname, convert_to_celsius
from . import config
selected_variables = [varname for varname, selected in config.variables.items() if selected]


def extract_point(point, startday, endday):
Expand All @@ -19,7 +18,7 @@ def extract_point(point, startday, endday):
:param endday: the end date
:return: a dataframe with AgERA5 meteo variables
"""

selected_variables = [varname for varname, selected in config.variables.items() if selected]
df_final = pd.DataFrame()
for day in pd.date_range(startday, endday):
fnames = [create_target_fname(v, day, config.data_storage.netcdf_path) for v in selected_variables]
Expand Down
129 changes: 103 additions & 26 deletions agera5tools/init.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,51 +10,104 @@
- The start year to begin downloading AgERA5
"""
import sys, os
import platform
import time
from pathlib import Path
import shutil
import logging
from types import SimpleNamespace

import click
import sqlalchemy as sa

from . import config
from .dump_grid import dump_grid
from .util import chunker
from .util import chunker, get_user_home


def make_paths():
config.data_storage.netcdf_path.mkdir(exist_ok=True, parents=True)
config.data_storage.tmp_path.mkdir(exist_ok=True, parents=True)
config.data_storage.csv_path.mkdir(exist_ok=True, parents=True)
config.logging.log_path.mkdir(exist_ok=True, parents=True)


def read_CDS_config(path):
config = {}
with open(path) as f:
for line in f.readlines():
if ":" in line:
k, v = line.strip().split(":", 1)
if k in ("url", "key", "verify"):
config[k] = v.strip()
return SimpleNamespace(**config)


def check_CDS_credentials(config, CDS_config):
"""Checks if the UID/key in the current .cdsapirc file (CDS_config) matches with the ones
provided in the YAML file (config).
"""
uid1, key1 = CDS_config.key.replace(" ", "").split(":")
uid2, key2 = str(config.cdsapi.uid), str(config.cdsapi.key)

return True if (uid1, key1) == (uid2, key2) else False



def set_CDSAPI_credentials():
"""Sets the credentials for the Copernicus Climate Data Store.
"""
home = Path.home()
cdsapirc = home / ".cdsapirc"
credentials = (f"url: {config.cdsapi.url}\n"
f"key: {config.cdsapi.uid}:{config.cdsapi.key}\n"
"verify: 1\n")

click.echo(f"Checking credentials for the Copernicus Climate Data Store.")
if not cdsapirc.exists():
credentials = (f"url: {config.cdsapi.url}\n"
f"key: {config.cdsapi.uid}:{config.cdsapi.key}\n"
"verify: 1\n")
with open(cdsapirc, "w") as fp:
fp.write(credentials)
click.echo(f"Succesfully created .cdsapirc file at {cdsapirc}")
click.echo(f"Successfully created .cdsapirc file at {cdsapirc}")
else:
click.echo(f"The .cdsapirc file already exists at {cdsapirc}")
CDS_config = read_CDS_config(cdsapirc)
matches = check_CDS_credentials(config, CDS_config)
if matches:
click.echo("OK: Credentials in .cdsapirc file match with the ones in agera5tools.yaml.")
else:
msg = "WARNING: Credentials in .cdsapirc file do NOT match with ones in agera5tools.yaml."
click.echo(msg)
r = click.confirm(f"Generate a new .cdsapirc file?")
if r:
with open(cdsapirc, "w") as fp:
fp.write(credentials)
click.echo(f"Successfully created .cdsapirc file at {cdsapirc}")
else:
click.echo("Leaving current .cdsapirc file as is.")


def create_AgERA5_config():
"""Create a config file for AgERA5tools in the current folder.
"""
if "AGERA5TOOLS_CONFIG" in os.environ:
# Config already defined, we do not need a new one
return

template_agera5t_config = Path(__file__).parent / "agera5tools.yaml"
agera5_conf = Path.cwd() / "agera5tools.yaml"
if agera5_conf.exists():
r = click.confirm(f"the file '{agera5_conf}' already exists, overwrite?")
if r is False:
return
return False

# Write a new config file, but first replace the /USERHOME/ with the users
# actual home directory
home = get_user_home() + "/"
template_agera5t_config = Path(__file__).parent / "agera5tools.yaml"
agera5t_config = open(template_agera5t_config).read()
agera5t_config = agera5t_config.replace("/USERHOME/", home)
with open(agera5_conf, "w") as fp:
fp.write(agera5t_config)

shutil.copy(template_agera5t_config, agera5_conf)
click.echo(f"Successfully created agera5tools config file at: {agera5_conf}")
msg = f"Successfully created agera5tools config file at: \n {agera5_conf}"
click.echo(msg)

return True


def fill_grid_table():
Expand All @@ -78,16 +131,20 @@ def fill_grid_table():
recs = df.to_dict(orient="records")
nrecs_written = 0
t1 = time.time()
with engine.begin() as DBconn:
ins = tbl.insert()
for chunk in chunker(recs, config.database.chunk_size):
DBconn.execute(ins, chunk)
nrecs_written += len(chunk)
msg = f"Written {nrecs_written} from total {len(recs)} records to database."
logger.info(msg)
msg = f"Written grid definition to database in {time.time() - t1} seconds."
logger.info(msg)

try:
with engine.begin() as DBconn:
ins = tbl.insert()
for chunk in chunker(recs, config.database.chunk_size):
DBconn.execute(ins, chunk)
nrecs_written += len(chunk)
msg = f"Written {nrecs_written} from total {len(recs)} records to database."
logger.info(msg)
msg = f"Written grid definition to database in {time.time() - t1} seconds."
logger.info(msg)
except sa.exc.IntegrityError as e:
msg = f"Grid definition already exists in grid table! No records written."
click.echo(msg)
logger.info(msg)


def build_database():
Expand Down Expand Up @@ -120,12 +177,32 @@ def build_database():
sys.exit()

def init():
create_AgERA5_config()
click.confirm(("\nIf this is the first time you run `init` you probably want to abort now and "
"inspect/update your configuration file first. Continue?"), abort=True)

if "AGERA5TOOLS_CONFIG" not in os.environ:
first_time = create_AgERA5_config()
if first_time:
msg = ("\nYou just created a new configuration file time. Now carry out the following steps:\n"
"1) inspect/update your configuration file first and update the paths for data storage. "
"Currently all paths point to your home folder, which may not be suitable.\n"
"2) Set the AGERA5TOOLS_CONFIG environment variable to the location of the "
"configuration file.\n"
"3) Next rerun `init` to finalize the initialization\n")
else:
msg = ("\nExisting configuration file was found. Now carry out the following steps:\n"
"1) inspect/update your configuration file first and update the paths for data storage. "
"Currently all paths point to your home folder, which may not be suitable.\n"
"2) Set the AGERA5TOOLS_CONFIG environment variable to the location of the "
"configuration file\n"
"3) Next rerun `init` to finalize the initialization\n")
click.echo(msg)
return False

set_CDSAPI_credentials()
make_paths()
build_database()
fill_grid_table()

return True

if __name__ == "__main__":
init()
3 changes: 1 addition & 2 deletions agera5tools/mirror.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,6 @@
from .build import unpack_cds_download, convert_ncfiles_to_dataframe, df_to_csv, df_to_database
from . import config

selected_variables = [varname for varname, selected in config.variables.items() if selected]


def find_days_in_database():
"""Finds the available days in the AgERA5 database by querying the time-series on the
Expand Down Expand Up @@ -106,6 +104,7 @@ def mirror(to_csv=True):
:param to_csv: Flag indicating if a compressed CSV file should be written.
"""
logger = logging.getLogger(__name__)
selected_variables = [varname for varname, selected in config.variables.items() if selected]
days = find_days_to_update()
if days:
logger.info(f"Found following days for updating AgERA5: {days}")
Expand Down
Loading

0 comments on commit ebacae6

Please sign in to comment.