Skip to content

Commit

Permalink
write profile json 1st draft
Browse files Browse the repository at this point in the history
  • Loading branch information
lewisblake committed Jul 21, 2023
1 parent 06cc5b8 commit 2e67107
Show file tree
Hide file tree
Showing 2 changed files with 168 additions and 128 deletions.
31 changes: 29 additions & 2 deletions pyaerocom/aeroval/coldatatojson_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
get_timeseries_file_name,
init_regions_web,
update_regions_json,
process_profile_data,
get_profile_filename,
)
from pyaerocom.exceptions import AeroValConfigError, TemporalResolutionError

Expand Down Expand Up @@ -213,7 +215,7 @@ def process_coldata(self, coldata: ColocatedData):
add_trends,
trends_min_yrs,
)
breakpoint()

for freq, hm_data in hm_all.items():
fname = get_heatmap_filename(freq)

Expand Down Expand Up @@ -282,10 +284,35 @@ def process_coldata(self, coldata: ColocatedData):
# writes json file
_write_stationdata_json(ts_data_weekly_reg, outdir)

if (
"vertical_layer" in coldata.data.attrs
): # LB: Will need some sort of additional flag to deal with the two colocation level types
logger.info("Processing profile data for vizualization")

for regid in regnames:
profile_viz = process_profile_data(
data,
regid,
use_country,
periods,
seasons,
)

fname = get_profile_filename(regid, obs_name, var_name_web)

add_profile_entry_json(fname, data, profile_viz, periods, seasons)

breakpoint()

# for reg in regions:
# fname = get_profile_filename(reg, obs_name, var_name_web)

# add_profile_entry(fname, )

logger.info(
f"Finished computing json files for {model_name} ({model_var}) vs. "
f"{obs_name} ({obs_var})"
)

dt = time() - t00
logger.info(f"Time expired (TOTAL): {dt:.2f} s")
logger.info(f"Time expired: {dt:.2f} s")
265 changes: 139 additions & 126 deletions pyaerocom/aeroval/coldatatojson_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1160,7 +1160,7 @@ def _process_heatmap_data(
except AeroValTrendsError as e:
msg = f"Failed to calculate trends, and will skip. This was due to {e}"
logger.warning(msg)
breakpoint()

subset = subset.filter_region(
region_id=regid, check_country_meta=use_country
)
Expand Down Expand Up @@ -1365,128 +1365,141 @@ def _start_stop_from_periods(periods):
return start_stop(start, stop + 1)


# def get_profile_filename(region, obs_name, var_name_web):
# return f"{region}_{obs_name}_{var_name_web}.json"


# def process_profile_data(
# data,
# region_ids,
# use_weights,
# use_country,
# meta_glob,
# periods,
# seasons,
# add_trends,
# trends_min_yrs,
# ):
# # basically need to do something like process_heatmap_data
# output = {}
# stats_dummy = _init_stats_dummy()
# for freq, coldata in data.items():
# # output[freq] = hm_freq = {}
# for regid, regname in region_ids.items():
# # hm_freq[regname] = {}
# for per in periods:
# for season in seasons:
# use_dummy = coldata is None
# perstr = f"{per}-{season}"
# if use_dummy:
# stats = stats_dummy
# else:
# try:
# subset = _select_period_season_coldata(coldata, per, season)

# trends_successful = False
# if add_trends and freq != "daily":
# # Calculates the start and stop years. min_yrs have a test value of 7 years. Should be set in cfg
# (start, stop) = _get_min_max_year_periods([per])

# if stop - start >= trends_min_yrs:
# try:
# subset_time_series = subset.get_regional_timeseries(
# regid, check_country_meta=use_country
# )

# # (obs_trend, mod_trend) = _make_trends_from_timeseries(
# # subset_time_series["obs"],
# # subset_time_series["mod"],
# # freq,
# # season,
# # start,
# # stop,
# # trends_min_yrs,
# # )

# # trends_successful = True
# except as e:
# msg = f"Failed to access subset, and will skip. This was due to {e}"
# logger.warning(msg)

# subset = subset.filter_region(
# region_id=regid, check_country_meta=use_country
# )

# output["obs"][freq] = np.nanmean(subset.data[0, :, :])
# output["mod"][freq] = np.nanmean(subset.data[1, :, :])
# # stats = _get_extended_stats(subset, use_weights)

# # if add_trends and freq != "daily" and trends_successful:
# # # The whole trends dicts are placed in the stats dict
# # stats["obs_trend"] = obs_trend
# # stats["mod_trend"] = mod_trend

# except (DataCoverageError, TemporalResolutionError) as e:
# output["obs"][freq] = np.nan
# output["mod"][freq] = np.nan

# return output


# def add_profile_entry_json(profile_file, coldata, period, season):
# if os.path.exists(profile_file):
# current = read_json(profile_file)
# else:
# current = {}
# # if not var_name_web in current:
# # current[var_name_web] = {}
# # ov = current[var_name_web]
# model_name = coldata.obs_name
# if not model_name in current:
# current[model_name] = {}
# # on = ov[obs_name]

# if not "z" in current[model_name]:
# current[model_name]["z"] = [
# 0
# ] # initalize with 0 # LB: try writing this to a list and see is simple_json complains
# current[model_name]["z"].append(coldata.data.attrs["vertical_layer"]["end"])

# if not "obs" in current[model_name]:
# current[model_name]["obs"] = {}

# if not coldata.ts_type in current[model_name]["obs"]:
# current[model_name]["obs"][coldata.ts_type] = {}

# if not "mod" in current[model_name]:
# current[model_name]["mod"][coldata.ts_type] = {}

# if not "metadata" in current[model_name]:
# # should be same for all. hardcoded because no way to pass this all along now
# current["metadata"] = {
# "z_unit": "km",
# "z_description": "Altitude ASL",
# "z_long_description": "Altitude Above Sea Level"
# "unit": "km-1", #coldata.meta["var_units"][0], # list with two elemetns, get one. pyaerocm will try to get into units of obs, so should be this one but check later

# }

# # current[obs_name]["obs"][coldata.ts_type]
# # if not vert_code in on:
# # on[vert_code] = {}
# # ovc = on[vert_code]
# # if not model_name in ovc:
# # ovc[model_name] = {}
# # mn = ovc[model_name]
# # mn[model_var] = result
# # write_json(current, profile_file, ignore_nan=True)
def get_profile_filename(region, obs_name, var_name_web):
return f"{region}_{obs_name}_{var_name_web}.json"


def process_profile_data(
data,
region_id,
use_country,
periods,
seasons,
):
breakpoint()
# basically need to do something like process_heatmap_data
output = {"obs": {}, "mod": {}}
# stats_dummy = _init_stats_dummy()
for freq, coldata in data.items():
# output[freq] = hm_freq = {}

if freq not in output["obs"]:
output["obs"][freq] = {}
if freq not in output["mod"]:
output["mod"][freq] = {}
# for regid, regname in region_ids.items():
# hm_freq[regname] = {}
for per in periods:
for season in seasons:
use_dummy = coldata is None
perstr = f"{per}-{season}"
if use_dummy:
# stats = stats_dummy
output["obs"][freq][perstr] = np.nan
output["mod"][freq][perstr] = np.nan
else:
try:
subset = _select_period_season_coldata(coldata, per, season)

# trends_successful = False
# if add_trends and freq != "daily":
# Calculates the start and stop years. min_yrs have a test value of 7 years. Should be set in cfg
# (start, stop) = _get_min_max_year_periods([per])

# if stop - start >= trends_min_yrs:
# try:
subset_time_series = subset.get_regional_timeseries(
region_id, check_country_meta=use_country
)

# (obs_trend, mod_trend) = _make_trends_from_timeseries(
# subset_time_series["obs"],
# subset_time_series["mod"],
# freq,
# season,
# start,
# stop,
# trends_min_yrs,
# )

# trends_successful = True

output["obs"][freq][perstr] = np.nanmean(subset_time_series["obs"])
output["mod"][freq][perstr] = np.nanmean(subset_time_series["mod"])
except:
msg = f"Failed to access subset timeseries, and will skip."
logger.warning(msg)

# LB: I think this should be not needed and covered in the time series above but probably need to double check that
# subset = subset.filter_region(
# region_id=regid, check_country_meta=use_country
# )

# stats = _get_extended_stats(subset, use_weights)

# if add_trends and freq != "daily" and trends_successful:
# # The whole trends dicts are placed in the stats dict
# stats["obs_trend"] = obs_trend
# stats["mod_trend"] = mod_trend

# except (DataCoverageError, TemporalResolutionError) as e:
output["obs"][freq][perstr] = np.nan
output["mod"][freq][perstr] = np.nan

return output


def add_profile_entry_json(profile_file, data, profile_viz, periods, seasons):
if os.path.exists(profile_file):
current = read_json(profile_file)
else:
current = {}
# if not var_name_web in current:
# current[var_name_web] = {}
# ov = current[var_name_web]
for freq, coldata in data.items():
model_name = coldata.model_name
if not model_name in current:
current[model_name] = {}
# on = ov[obs_name]

if not "z" in current[model_name]:
current[model_name]["z"] = [
0
] # initalize with 0 # LB: try writing this to a list and see is simple_json complains
current[model_name]["z"].append(coldata.data.attrs["vertical_layer"]["end"])

if not "obs" in current[model_name]:
current[model_name]["obs"] = {}

if not freq in current[model_name]["obs"]:
current[model_name]["obs"][freq] = {}

if not "mod" in current[model_name]:
current[model_name]["mod"][freq] = {}

if not freq in current[model_name]["mod"]:
current[model_name]["mod"][freq] = {}

for per in periods:
for season in seasons:
perstr = f"{per}-{season}"

if not perstr in current[model_name]["obs"][freq]:
current[model_name]["obs"][freq] = []
if not perstr in current[model_name]["mod"][freq]:
current[model_name]["mod"][freq] = []

current[model_name]["obs"][freq].append(profile_viz["obs"][freq][perstr])
current[model_name]["mod"][freq].append(profile_viz["mod"][freq][perstr])

if not "metadata" in current[model_name]:
# should be same for all. hardcoded because no way to pass this all along now
current["metadata"] = {
"z_unit": "km",
"z_description": "Altitude ASL",
"z_long_description": "Altitude Above Sea Level",
"unit": "km-1", # coldata.meta["var_units"][0], # list with two elemetns, get one. pyaerocm will try to get into units of obs, so should be this one but check later
}

write_json(current, profile_file, ignore_nan=True)

0 comments on commit 2e67107

Please sign in to comment.