write profile json 1st draft

metno · Jul 21, 2023 · 2e67107 · 2e67107
1 parent 06cc5b8
commit 2e67107
Show file tree

Hide file tree

Showing 2 changed files with 168 additions and 128 deletions.
diff --git a/pyaerocom/aeroval/coldatatojson_engine.py b/pyaerocom/aeroval/coldatatojson_engine.py
@@ -23,6 +23,8 @@
     get_timeseries_file_name,
     init_regions_web,
     update_regions_json,
+    process_profile_data,
+    get_profile_filename,
 )
 from pyaerocom.exceptions import AeroValConfigError, TemporalResolutionError
 
@@ -213,7 +215,7 @@ def process_coldata(self, coldata: ColocatedData):
                 add_trends,
                 trends_min_yrs,
             )
-            breakpoint()
+
             for freq, hm_data in hm_all.items():
                 fname = get_heatmap_filename(freq)
 
@@ -282,10 +284,35 @@ def process_coldata(self, coldata: ColocatedData):
                     # writes json file
                     _write_stationdata_json(ts_data_weekly_reg, outdir)
 
+        if (
+            "vertical_layer" in coldata.data.attrs
+        ):  # LB: Will need some sort of additional flag to deal with the two colocation level types
+            logger.info("Processing profile data for vizualization")
+
+            for regid in regnames:
+                profile_viz = process_profile_data(
+                    data,
+                    regid,
+                    use_country,
+                    periods,
+                    seasons,
+                )
+
+                fname = get_profile_filename(regid, obs_name, var_name_web)
+
+                add_profile_entry_json(fname, data, profile_viz, periods, seasons)
+
+                breakpoint()
+
+            # for reg in regions:
+            #     fname = get_profile_filename(reg, obs_name, var_name_web)
+
+            # add_profile_entry(fname, )
+
         logger.info(
             f"Finished computing json files for {model_name} ({model_var}) vs. "
             f"{obs_name} ({obs_var})"
         )
 
         dt = time() - t00
-        logger.info(f"Time expired (TOTAL): {dt:.2f} s")
+        logger.info(f"Time expired: {dt:.2f} s")
diff --git a/pyaerocom/aeroval/coldatatojson_helpers.py b/pyaerocom/aeroval/coldatatojson_helpers.py
@@ -1160,7 +1160,7 @@ def _process_heatmap_data(
                                     except AeroValTrendsError as e:
                                         msg = f"Failed to calculate trends, and will skip. This was due to {e}"
                                         logger.warning(msg)
-                            breakpoint()
+
                             subset = subset.filter_region(
                                 region_id=regid, check_country_meta=use_country
                             )
@@ -1365,128 +1365,141 @@ def _start_stop_from_periods(periods):
     return start_stop(start, stop + 1)
 
 
-# def get_profile_filename(region, obs_name, var_name_web):
-#     return f"{region}_{obs_name}_{var_name_web}.json"
-
-
-# def process_profile_data(
-#     data,
-#     region_ids,
-#     use_weights,
-#     use_country,
-#     meta_glob,
-#     periods,
-#     seasons,
-#     add_trends,
-#     trends_min_yrs,
-# ):
-#     # basically need to do something like process_heatmap_data
-#     output = {}
-#     stats_dummy = _init_stats_dummy()
-#     for freq, coldata in data.items():
-#         # output[freq] = hm_freq = {}
-#         for regid, regname in region_ids.items():
-#             # hm_freq[regname] = {}
-#             for per in periods:
-#                 for season in seasons:
-#                     use_dummy = coldata is None
-#                     perstr = f"{per}-{season}"
-#                     if use_dummy:
-#                         stats = stats_dummy
-#                     else:
-#                         try:
-#                             subset = _select_period_season_coldata(coldata, per, season)
-
-#                             trends_successful = False
-#                             if add_trends and freq != "daily":
-#                                 # Calculates the start and stop years. min_yrs have a test value of 7 years. Should be set in cfg
-#                                 (start, stop) = _get_min_max_year_periods([per])
-
-#                                 if stop - start >= trends_min_yrs:
-#                                     try:
-#                                         subset_time_series = subset.get_regional_timeseries(
-#                                             regid, check_country_meta=use_country
-#                                         )
-
-#                                         # (obs_trend, mod_trend) = _make_trends_from_timeseries(
-#                                         #     subset_time_series["obs"],
-#                                         #     subset_time_series["mod"],
-#                                         #     freq,
-#                                         #     season,
-#                                         #     start,
-#                                         #     stop,
-#                                         #     trends_min_yrs,
-#                                         # )
-
-#                                         # trends_successful = True
-#                                     except as e:
-#                                         msg = f"Failed to access subset, and will skip. This was due to {e}"
-#                                         logger.warning(msg)
-
-#                             subset = subset.filter_region(
-#                                 region_id=regid, check_country_meta=use_country
-#                             )
-
-#                             output["obs"][freq] = np.nanmean(subset.data[0, :, :])
-#                             output["mod"][freq] = np.nanmean(subset.data[1, :, :])
-#                             # stats = _get_extended_stats(subset, use_weights)
-
-#                             # if add_trends and freq != "daily" and trends_successful:
-#                             #     # The whole trends dicts are placed in the stats dict
-#                             #     stats["obs_trend"] = obs_trend
-#                             #     stats["mod_trend"] = mod_trend
-
-#                         except (DataCoverageError, TemporalResolutionError) as e:
-#                             output["obs"][freq] = np.nan
-#                             output["mod"][freq] = np.nan
-
-#     return output
-
-
-# def add_profile_entry_json(profile_file, coldata, period, season):
-#     if os.path.exists(profile_file):
-#         current = read_json(profile_file)
-#     else:
-#         current = {}
-#     # if not var_name_web in current:
-#     #     current[var_name_web] = {}
-#     # ov = current[var_name_web]
-#     model_name = coldata.obs_name
-#     if not model_name in current:
-#         current[model_name] = {}
-#     # on = ov[obs_name]
-
-#     if not "z" in current[model_name]:
-#         current[model_name]["z"] = [
-#             0
-#         ]  # initalize with 0 # LB: try writing this to a list and see is simple_json complains
-#     current[model_name]["z"].append(coldata.data.attrs["vertical_layer"]["end"])
-
-#     if not "obs" in current[model_name]:
-#         current[model_name]["obs"] = {}
-
-#     if not coldata.ts_type in current[model_name]["obs"]:
-#         current[model_name]["obs"][coldata.ts_type] = {}
-
-#     if not "mod" in current[model_name]:
-#         current[model_name]["mod"][coldata.ts_type] = {}
-
-#     if not "metadata" in current[model_name]:
-#         # should be same for all. hardcoded because no way to pass this all along now
-#         current["metadata"] = {
-#             "z_unit": "km",
-#             "z_description": "Altitude ASL",
-#             "z_long_description": "Altitude Above Sea Level"
-#             "unit": "km-1", #coldata.meta["var_units"][0], # list with two elemetns, get one. pyaerocm will try to get into units of obs, so should be this one but check later
-
-#         }
-
-#     # current[obs_name]["obs"][coldata.ts_type]
-#     # if not vert_code in on:
-#     #     on[vert_code] = {}
-#     # ovc = on[vert_code]
-#     # if not model_name in ovc:
-#     #     ovc[model_name] = {}
-#     # mn = ovc[model_name]
-#     # mn[model_var] = result
-#     # write_json(current, profile_file, ignore_nan=True)
+def get_profile_filename(region, obs_name, var_name_web):
+    return f"{region}_{obs_name}_{var_name_web}.json"
+
+
+def process_profile_data(
+    data,
+    region_id,
+    use_country,
+    periods,
+    seasons,
+):
+    breakpoint()
+    # basically need to do something like process_heatmap_data
+    output = {"obs": {}, "mod": {}}
+    # stats_dummy = _init_stats_dummy()
+    for freq, coldata in data.items():
+        # output[freq] = hm_freq = {}
+
+        if freq not in output["obs"]:
+            output["obs"][freq] = {}
+        if freq not in output["mod"]:
+            output["mod"][freq] = {}
+        # for regid, regname in region_ids.items():
+        # hm_freq[regname] = {}
+        for per in periods:
+            for season in seasons:
+                use_dummy = coldata is None
+                perstr = f"{per}-{season}"
+                if use_dummy:
+                    # stats = stats_dummy
+                    output["obs"][freq][perstr] = np.nan
+                    output["mod"][freq][perstr] = np.nan
+                else:
+                    try:
+                        subset = _select_period_season_coldata(coldata, per, season)
+
+                        # trends_successful = False
+                        # if add_trends and freq != "daily":
+                        # Calculates the start and stop years. min_yrs have a test value of 7 years. Should be set in cfg
+                        # (start, stop) = _get_min_max_year_periods([per])
+
+                        # if stop - start >= trends_min_yrs:
+                        # try:
+                        subset_time_series = subset.get_regional_timeseries(
+                            region_id, check_country_meta=use_country
+                        )
+
+                        # (obs_trend, mod_trend) = _make_trends_from_timeseries(
+                        #     subset_time_series["obs"],
+                        #     subset_time_series["mod"],
+                        #     freq,
+                        #     season,
+                        #     start,
+                        #     stop,
+                        #     trends_min_yrs,
+                        # )
+
+                        # trends_successful = True
+
+                        output["obs"][freq][perstr] = np.nanmean(subset_time_series["obs"])
+                        output["mod"][freq][perstr] = np.nanmean(subset_time_series["mod"])
+                    except:
+                        msg = f"Failed to access subset timeseries, and will skip."
+                        logger.warning(msg)
+
+                        # LB: I think this should be not needed and covered in the time series above but probably need to double check that
+                        # subset = subset.filter_region(
+                        #     region_id=regid, check_country_meta=use_country
+                        # )
+
+                        # stats = _get_extended_stats(subset, use_weights)
+
+                        # if add_trends and freq != "daily" and trends_successful:
+                        #     # The whole trends dicts are placed in the stats dict
+                        #     stats["obs_trend"] = obs_trend
+                        #     stats["mod_trend"] = mod_trend
+
+                        # except (DataCoverageError, TemporalResolutionError) as e:
+                        output["obs"][freq][perstr] = np.nan
+                        output["mod"][freq][perstr] = np.nan
+
+    return output
+
+
+def add_profile_entry_json(profile_file, data, profile_viz, periods, seasons):
+    if os.path.exists(profile_file):
+        current = read_json(profile_file)
+    else:
+        current = {}
+    # if not var_name_web in current:
+    #     current[var_name_web] = {}
+    # ov = current[var_name_web]
+    for freq, coldata in data.items():
+        model_name = coldata.model_name
+        if not model_name in current:
+            current[model_name] = {}
+        # on = ov[obs_name]
+
+        if not "z" in current[model_name]:
+            current[model_name]["z"] = [
+                0
+            ]  # initalize with 0 # LB: try writing this to a list and see is simple_json complains
+        current[model_name]["z"].append(coldata.data.attrs["vertical_layer"]["end"])
+
+        if not "obs" in current[model_name]:
+            current[model_name]["obs"] = {}
+
+        if not freq in current[model_name]["obs"]:
+            current[model_name]["obs"][freq] = {}
+
+        if not "mod" in current[model_name]:
+            current[model_name]["mod"][freq] = {}
+
+        if not freq in current[model_name]["mod"]:
+            current[model_name]["mod"][freq] = {}
+
+        for per in periods:
+            for season in seasons:
+                perstr = f"{per}-{season}"
+
+                if not perstr in current[model_name]["obs"][freq]:
+                    current[model_name]["obs"][freq] = []
+                if not perstr in current[model_name]["mod"][freq]:
+                    current[model_name]["mod"][freq] = []
+
+                current[model_name]["obs"][freq].append(profile_viz["obs"][freq][perstr])
+                current[model_name]["mod"][freq].append(profile_viz["mod"][freq][perstr])
+
+        if not "metadata" in current[model_name]:
+            # should be same for all. hardcoded because no way to pass this all along now
+            current["metadata"] = {
+                "z_unit": "km",
+                "z_description": "Altitude ASL",
+                "z_long_description": "Altitude Above Sea Level",
+                "unit": "km-1",  # coldata.meta["var_units"][0], # list with two elemetns, get one. pyaerocm will try to get into units of obs, so should be this one but check later
+            }
+
+    write_json(current, profile_file, ignore_nan=True)