From ef2428b8f2ae00a07943d17e1bea47b9d0eb1c53 Mon Sep 17 00:00:00 2001 From: blychs Date: Wed, 20 Nov 2024 18:19:09 -0700 Subject: [PATCH 01/33] region select for custom regions --- melodies_monet/util/region_select.py | 33 ++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 melodies_monet/util/region_select.py diff --git a/melodies_monet/util/region_select.py b/melodies_monet/util/region_select.py new file mode 100644 index 00000000..4d02a29a --- /dev/null +++ b/melodies_monet/util/region_select.py @@ -0,0 +1,33 @@ +import xarray as xr +import regionmask + +def create_custom_mask(data, mask_info) -> xr.Dataset: + """Creates a 3D mask using regionmask. + + Parameters + ---------- + data : xr.Dataset + Dataset with the data that the user wishes to mask. + mask_info : list[list[float, float]] | dict[str, list[list[float, float]]] + List containing the vertices of the polygon to mask. + + Returns + ------- + xr.Dataset + mask for input data + """ + if not (isinstance(mask_info, list) or isinstance(mask_info, dict)): + raise f"mask_info type={type(mask_info)}, not valid for create_custom_mask." + elif isinstance(mask_info, list): + regions = regionmask.Regions([mask_info]) + else: + all_regions = [] + abbrevs = [] + for rname, rpolygon in mask_info.items(): + abbrevs.append(rname) + all_regions.append(rpolygon) + regions = regionmask.Regions(all_regions, abbrevs=abbrevs, name='custom_regions') + return regions + + + From da339195732c23f78bbdfd96ee74613106a564b6 Mon Sep 17 00:00:00 2001 From: blychs Date: Fri, 22 Nov 2024 16:22:42 -0700 Subject: [PATCH 02/33] Regionmask tool --- melodies_monet/util/region_select.py | 90 +++++++++++++++++++++++++--- 1 file changed, 82 insertions(+), 8 deletions(-) diff --git a/melodies_monet/util/region_select.py b/melodies_monet/util/region_select.py index 4d02a29a..bb2e4a32 100644 --- a/melodies_monet/util/region_select.py +++ b/melodies_monet/util/region_select.py @@ -1,8 +1,21 @@ -import xarray as xr +""" +Masking arbitrary regions with regionmask +""" + +import warnings +from ast import literal_eval + +import geopandas as gp import regionmask -def create_custom_mask(data, mask_info) -> xr.Dataset: - """Creates a 3D mask using regionmask. +try: + import pooch +except ImportError: + pooch = None + + +def create_custom_mask(data, mask_info): + """Creates mask using regionmask. Parameters ---------- @@ -16,9 +29,9 @@ def create_custom_mask(data, mask_info) -> xr.Dataset: xr.Dataset mask for input data """ - if not (isinstance(mask_info, list) or isinstance(mask_info, dict)): - raise f"mask_info type={type(mask_info)}, not valid for create_custom_mask." - elif isinstance(mask_info, list): + if not isinstance(mask_info, (list, dict)): + raise TypeError(f"mask_info type={type(mask_info)}, not valid for create_custom_mask.") + if isinstance(mask_info, list): regions = regionmask.Regions([mask_info]) else: all_regions = [] @@ -26,8 +39,69 @@ def create_custom_mask(data, mask_info) -> xr.Dataset: for rname, rpolygon in mask_info.items(): abbrevs.append(rname) all_regions.append(rpolygon) - regions = regionmask.Regions(all_regions, abbrevs=abbrevs, name='custom_regions') - return regions + regions = regionmask.Regions(all_regions, abbrevs=abbrevs, name="custom_regions") + region_mask = regions.mask(data) + return region_mask + + +def create_predefined_mask(data, name_regiontype): + """Creates mask using regionmask. + + Parameters + ---------- + data : xr.Dataset + Dataset with the data that the user wishes to mask. + name_regiontype : str + name of regionmask defined regions (e.g., "srex", "giorgi") + Returns + ------- + xr.Dataset + mask for input data + """ + regions = literal_eval(f"regionmask.defined_regions.{name_regiontype}") + region_mask = regions.mask(data) + return region_mask + + +def create_shapefile_mask(data, mask_path=None, mask_url=None, **kwargs): + """Creates mask from shapefile using regionmask and geopandas. + + Parameters + ---------- + data : xr.Dataset + Dataset with the data that the user wishes to mask. + mask_path : str | pathobject + Path to the shapefiles. They can be zipped. + mask_url : str + url to download the shapefiles. Requires pooch. + If both mask_url and mask_path are provided, this will be + ignored. + **kwargs + Arguments to pass to regionmask.from_geopandas + + Returns + ------- + xr.Dataset + mask for the input data + """ + + if mask_url is not None and mask_path is not None: + warnings.warn("mask_url and mask_path provided. Only one can be used. Selecting mask_path") + + if mask_path is not None: + file = mask_path + elif mask_url is not None: + if pooch is None: + raise ImportError("pooch is not installed, cannot download URL.") + file = pooch.retrieve(mask_url, None) + else: + raise ValueError("Either mask_path or mask_url have to be provided") + if file[-4:] == ".zip": + file = "zip://" + file + regions = regionmask.from_geopandas(gp.read_file(file), **kwargs) + # region_mask = regionmask.mask_geopandas(regions, data["lon"].values, data["lat"].values) + region_mask = regions.mask(data) + return region_mask From 7c53f29e90ff01da764d431eaed5c921964b1178 Mon Sep 17 00:00:00 2001 From: blychs Date: Fri, 22 Nov 2024 16:23:32 -0700 Subject: [PATCH 03/33] Delete old unused code --- melodies_monet/util/region_select.py | 1 - 1 file changed, 1 deletion(-) diff --git a/melodies_monet/util/region_select.py b/melodies_monet/util/region_select.py index bb2e4a32..061b8365 100644 --- a/melodies_monet/util/region_select.py +++ b/melodies_monet/util/region_select.py @@ -102,6 +102,5 @@ def create_shapefile_mask(data, mask_path=None, mask_url=None, **kwargs): file = "zip://" + file regions = regionmask.from_geopandas(gp.read_file(file), **kwargs) - # region_mask = regionmask.mask_geopandas(regions, data["lon"].values, data["lat"].values) region_mask = regions.mask(data) return region_mask From 0432ace3f06f753e3f41dc95c13118c335fe1889 Mon Sep 17 00:00:00 2001 From: blychs Date: Fri, 22 Nov 2024 17:02:13 -0700 Subject: [PATCH 04/33] Return masked_dataset directly --- melodies_monet/util/region_select.py | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/melodies_monet/util/region_select.py b/melodies_monet/util/region_select.py index 061b8365..bf8373bb 100644 --- a/melodies_monet/util/region_select.py +++ b/melodies_monet/util/region_select.py @@ -27,7 +27,7 @@ def create_custom_mask(data, mask_info): Returns ------- xr.Dataset - mask for input data + masked data """ if not isinstance(mask_info, (list, dict)): raise TypeError(f"mask_info type={type(mask_info)}, not valid for create_custom_mask.") @@ -41,10 +41,11 @@ def create_custom_mask(data, mask_info): all_regions.append(rpolygon) regions = regionmask.Regions(all_regions, abbrevs=abbrevs, name="custom_regions") region_mask = regions.mask(data) - return region_mask + masked_data = data.where(region_mask.notnull()) + return masked_data -def create_predefined_mask(data, name_regiontype): +def create_predefined_mask(data, name_regiontype, region_name): """Creates mask using regionmask. Parameters @@ -53,6 +54,9 @@ def create_predefined_mask(data, name_regiontype): Dataset with the data that the user wishes to mask. name_regiontype : str name of regionmask defined regions (e.g., "srex", "giorgi") + region_name : str + region to mask. If it can be parsed to an integer, that is used. + Otherwise, it is assumed to be an abbrev. Returns ------- @@ -61,10 +65,14 @@ def create_predefined_mask(data, name_regiontype): """ regions = literal_eval(f"regionmask.defined_regions.{name_regiontype}") region_mask = regions.mask(data) - return region_mask + try: + selected_region = data.where(region_mask == int(region_name)) + except ValueError: + selected_region = data.where(region_mask.cf == region_name) + return selected_region -def create_shapefile_mask(data, mask_path=None, mask_url=None, **kwargs): +def create_shapefile_mask(data, mask_path=None, mask_url=None, region_name=None, **kwargs): """Creates mask from shapefile using regionmask and geopandas. Parameters @@ -77,6 +85,9 @@ def create_shapefile_mask(data, mask_path=None, mask_url=None, **kwargs): url to download the shapefiles. Requires pooch. If both mask_url and mask_path are provided, this will be ignored. + region_name : str + region to mask. If it can be parsed to an integer, that is used. + Otherwise, it is assumed to be an abbrev. **kwargs Arguments to pass to regionmask.from_geopandas @@ -103,4 +114,8 @@ def create_shapefile_mask(data, mask_path=None, mask_url=None, **kwargs): regions = regionmask.from_geopandas(gp.read_file(file), **kwargs) region_mask = regions.mask(data) - return region_mask + try: + selected_region = data.where(region_mask == int(region_name)) + except ValueError: + selected_region = data.where(region_mask.cf == region_name) + return selected_region From 85f91224ad8bdcb66a4caa828dab0653a4a5ef0d Mon Sep 17 00:00:00 2001 From: blychs Date: Tue, 3 Dec 2024 09:55:30 -0700 Subject: [PATCH 05/33] test region support --- melodies_monet/plots/surfplots.py | 105 +++++++++++++++++++ melodies_monet/util/region_select.py | 148 ++++++++++++++++++++++++++- 2 files changed, 250 insertions(+), 3 deletions(-) mode change 100755 => 100644 melodies_monet/plots/surfplots.py diff --git a/melodies_monet/plots/surfplots.py b/melodies_monet/plots/surfplots.py old mode 100755 new mode 100644 index 8e541343..9bfe23e6 --- a/melodies_monet/plots/surfplots.py +++ b/melodies_monet/plots/surfplots.py @@ -497,6 +497,111 @@ def make_timeseries(df, df_reg=None, column=None, label=None, ax=None, avg_windo ax.set_title(domain_name,fontweight='bold',**text_kwargs) return ax +def make_diurnal_cycle(df, column=None, label=None, ax=None, avg_window=None, ylabel=None, + vmin = None, vmax = None, + domain_type=None, domain_name=None, + plot_dict=None, fig_dict=None, text_dict=None,debug=False, **kwargs): + """Creates timeseries plot. + + Parameters + ---------- + df : pandas.DataFrame + model/obs paired data to plot + column : str + Column label of variable to plot + label : str + Name of variable to use in plot legend + ax : ax + matplotlib ax from previous occurrence so can overlay obs and model + results on the same plot + avg_window : rule + Pandas resampling rule (e.g., 'h', 'D') + ylabel : str + Title of y-axis + vmin : real number + Min value to use on y-axis + vmax : real number + Max value to use on y-axis + domain_type : str + Domain type specified in input yaml file + domain_name : str + Domain name specified in input yaml file + plot_dict : dictionary + Dictionary containing information about plotting for each pair + (e.g., color, linestyle, markerstyle) + fig_dict : dictionary + Dictionary containing information about figure + text_dict : dictionary + Dictionary containing information about text + debug : boolean + Whether to plot interactively (True) or not (False). Flag for + submitting jobs to supercomputer turn off interactive mode. + + Returns + ------- + ax + matplotlib ax such that driver.py can iterate to overlay multiple models on the + same plot + + """ + if debug == False: + plt.ioff() + #First define items for all plots + #set default text size + def_text = dict(fontsize=14) + if text_dict is not None: + text_kwargs = {**def_text, **text_dict} + else: + text_kwargs = def_text + # set ylabel to column if not specified. + if ylabel is None: + ylabel = column + if label is not None: + plot_dict['label'] = label + if vmin is not None and vmax is not None: + plot_dict['ylim'] = [vmin,vmax] + #scale the fontsize for the x and y labels by the text_kwargs + plot_dict['fontsize'] = text_kwargs['fontsize']*0.8 + #Then, if no plot has been created yet, create a plot and plot the obs. + if ax is None: + #First define the colors for the observations. + obs_dict = dict(color='k', linestyle='-',marker='*', linewidth=1.2, markersize=6.) + if plot_dict is not None: + #Whatever is not defined in the yaml file is filled in with the obs_dict here. + plot_kwargs = {**obs_dict, **plot_dict} + else: + plot_kwargs = obs_dict + # create the figure + if fig_dict is not None: + f,ax = plt.subplots(**fig_dict) + else: + f,ax = plt.subplots(figsize=(10,6)) + # plot the line + time = pd.DatetimeIndex(df["time"]) + else: + plot_kwargs = { **dict(linestyle='-', marker='*', linewidth=1.2, markersize=6.), **plot_dict} + ax = df[column].groupby(time.dt.hour).mean().plot(ax=ax, legend=True, **plot_kwargs) + max_vals = df[column].groupby(time.dt.hour).max() + min_vals = df[column].groupby(time.dt.hour).min() + ax.fill_between(max_vals, min_vals, color='grey', alpha=0.2) + + + #Set parameters for all plots + ax.set_ylabel(ylabel,fontweight='bold',**text_kwargs) + ax.set_xlabel("Hour",fontweight='bold',**text_kwargs) + ax.legend(frameon=False,fontsize=text_kwargs['fontsize']*0.8) + ax.tick_params(axis='both',length=10.0,direction='inout') + ax.tick_params(axis='both',which='minor',length=5.0,direction='out') + ax.legend(frameon=False,fontsize=text_kwargs['fontsize']*0.8, + bbox_to_anchor=(1.0, 0.9), loc='center left') + if domain_type is not None and domain_name is not None: + if domain_type == 'epa_region': + ax.set_title('EPA Region ' + domain_name,fontweight='bold',**text_kwargs) + else: + ax.set_title(domain_name,fontweight='bold',**text_kwargs) + return ax + + def make_taylor(df, df_reg=None, column_o=None, label_o='Obs', column_m=None, label_m='Model', dia=None, ylabel=None, ty_scale=1.5, domain_type=None, domain_name=None, diff --git a/melodies_monet/util/region_select.py b/melodies_monet/util/region_select.py index bf8373bb..468c2b7f 100644 --- a/melodies_monet/util/region_select.py +++ b/melodies_monet/util/region_select.py @@ -5,8 +5,18 @@ import warnings from ast import literal_eval -import geopandas as gp -import regionmask +import pandas as pd + +from melodies_monet.util.tools import get_epa_region_bounds, get_giorgi_region_bounds + +try: + import geopandas as gp +except ImportError: + gp = None +try: + import regionmask +except ImportError: + regionmask = None try: import pooch @@ -67,7 +77,7 @@ def create_predefined_mask(data, name_regiontype, region_name): region_mask = regions.mask(data) try: selected_region = data.where(region_mask == int(region_name)) - except ValueError: + except TypeError: selected_region = data.where(region_mask.cf == region_name) return selected_region @@ -119,3 +129,135 @@ def create_shapefile_mask(data, mask_path=None, mask_url=None, region_name=None, except ValueError: selected_region = data.where(region_mask.cf == region_name) return selected_region + + +def control_custom_mask(data, domain_type, domain_name, domain_info=None, **kwargs): + """Parses region information to return the right type of data. + + Parameters + ---------- + data : xr.Dataset + data to be masked + domain_type : str + type of data. Used to decide which function to apply. Should begin with + "custom" + domain_name : str + It is used as the region name, or to read the info. + **kwargs: + Extra kwargs to pass to regionmask + + Returns + ------- + xr.Dataset + masked Dataset + """ + if "custom" not in domain_type: + raise ValueError("If regionmask is used, the domain_type should be starting with 'custom'") + if "auto_polygon" in domain_type: + masked_data = create_custom_mask(data, domain_info[domain_name]) + elif "defined_region" in domain_type: + name_regiontype = domain_info[domain_name]["name_regiontype"] + region_name = domain_info[domain_name]["region_name"] + masked_data = create_predefined_mask(data, name_regiontype, region_name) + elif "custom_file" in domain_type: + mask_path = domain_info[domain_name].get("mask_path", None) + mask_url = domain_info[domain_name].get("mask_url", None) + region_name = domain_info[domain_name].get("region_name", None) + masked_data = create_shapefile_mask(data, mask_path, mask_url, region_name, **kwargs) + else: + raise ValueError( + "Could not identify the type of domain. Should be 'auto-polygon'," + + " 'defined_region' or 'custom_file'" + ) + return masked_data + + +def create_autoregion(data, domain_type, domain_name, **kwargs): + """Selects a region using predefined boundaries. + + Parameters + ---------- + data : xr.Dataset | pd.DataFrame + data to be masked + domain_type : str + type of data. Used to decide which function to apply. + domain_name : str + This is used as the region name, or to read the info. + kwargs + if domain_type is auto-region:custom-name_of_domain, + this is used to read in the information on the custom + box + + Returns + ------- + xr.Dataset | pd.DataFrame + Data as it was provided to the function + """ + auto_region_id = domain_type.split(":")[1] + if auto_region_id == "epa": + bounds = get_epa_region_bounds(acronym=domain_name) + elif auto_region_id == "giorgi": + bounds = get_giorgi_region_bounds(acronym=domain_name) + elif auto_region_id == "custom": + bounds = kwargs["domain_box"][auto_region_id.split("-")[1]] + else: + raise ValueError( + "Currently, region selections whithout a domain query have only " + "been implemented for Giorgi and EPA regions. You asked for " + f"{domain_type!r}. Soon, arbitrary rectangular boxes, US states and " + "others will be included." + ) + if isinstance(data, pd.DataFrame): + data_all = data.loc[ + (data["latitude"] >= bounds[0]) + & (data["longitude"] >= bounds[1]) + & (data["latitude"] <= bounds[2]) + & (data["longitude"] <= bounds[3]) + ] + else: + data_all = data.where( + (data["latitude"] >= bounds[0]) + & (data["longitude"] >= bounds[1]) + & (data["latitude"] <= bounds[2]) + & (data["longitude"] <= bounds[3]), + drop=True, + ) + return data_all + + +def select_region(data, domain_type, domain_name, **kwargs): + """Selects a region in whichever format it was provided + + Parameters + ---------- + data : xr.Dataset | pd.DataFrame + data to be masked + domain_type : str + type of data. Used to decide which function to apply. + domain_name : str + This is used as the region name, or to read the info. + **kwargs: + extra kwargs to pass to the selector, depending on the type of + data or region. + + Returns + ------- + xr.Dataset | pd.DataFrame + Region selected. The type will be the same as the input data. + """ + + if domain_type != "all": + if domain_type.startswith("auto-region"): + data_masked = create_autoregion(data, domain_type, domain_name, **kwargs) + elif domain_type.startswith("custom"): + if regionmask is None: + raise ImportError( + "regionmask is not installed, cannot create 'custom:' type domain." + + " If your domain is a simple box, try using auto-region:custom-domain_name." + ) + data_masked = control_custom_mask(data, domain_name, domain_type, **kwargs) + + else: + data_masked = data.query(domain_type + " == " + '"' + domain_name + '"', inplace=True) + return data_masked + return data From 629c70054ca74bfcee1a91ef2f3602ef0eb0ab86 Mon Sep 17 00:00:00 2001 From: blychs Date: Tue, 3 Dec 2024 20:49:57 -0700 Subject: [PATCH 06/33] Add explicit parameter domain_info --- melodies_monet/util/region_select.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/melodies_monet/util/region_select.py b/melodies_monet/util/region_select.py index 468c2b7f..5d189088 100644 --- a/melodies_monet/util/region_select.py +++ b/melodies_monet/util/region_select.py @@ -172,7 +172,7 @@ def control_custom_mask(data, domain_type, domain_name, domain_info=None, **kwar return masked_data -def create_autoregion(data, domain_type, domain_name, **kwargs): +def create_autoregion(data, domain_type, domain_name, domain_info=None): """Selects a region using predefined boundaries. Parameters @@ -181,13 +181,13 @@ def create_autoregion(data, domain_type, domain_name, **kwargs): data to be masked domain_type : str type of data. Used to decide which function to apply. + If domain_type == auto-region:custom, domain_info is required. domain_name : str This is used as the region name, or to read the info. - kwargs - if domain_type is auto-region:custom-name_of_domain, - this is used to read in the information on the custom - box - + domain_info: None | dict[str, tuple[float, float, float, float]] + if not None, dict containing the domain name and a tuple with + latmin, lonmin, latmax, lonmax. Only required if domain_type + starts with auto-region:custom Returns ------- xr.Dataset | pd.DataFrame @@ -199,7 +199,7 @@ def create_autoregion(data, domain_type, domain_name, **kwargs): elif auto_region_id == "giorgi": bounds = get_giorgi_region_bounds(acronym=domain_name) elif auto_region_id == "custom": - bounds = kwargs["domain_box"][auto_region_id.split("-")[1]] + bounds = domain_info[domain_name] else: raise ValueError( "Currently, region selections whithout a domain query have only " @@ -225,7 +225,7 @@ def create_autoregion(data, domain_type, domain_name, **kwargs): return data_all -def select_region(data, domain_type, domain_name, **kwargs): +def select_region(data, domain_type, domain_name, domain_info=None, **kwargs): """Selects a region in whichever format it was provided Parameters @@ -255,6 +255,8 @@ def select_region(data, domain_type, domain_name, **kwargs): "regionmask is not installed, cannot create 'custom:' type domain." + " If your domain is a simple box, try using auto-region:custom-domain_name." ) + if domain_info is None: + raise KeyError("If regionmask is used, domain_info is needed.") data_masked = control_custom_mask(data, domain_name, domain_type, **kwargs) else: From 9a3a07cd60fbd9acaffe6a849b43e9822d478b21 Mon Sep 17 00:00:00 2001 From: blychs Date: Tue, 3 Dec 2024 21:17:31 -0700 Subject: [PATCH 07/33] Use domain_info for extra data --- melodies_monet/util/region_select.py | 42 +++++++++++++++------------- 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/melodies_monet/util/region_select.py b/melodies_monet/util/region_select.py index 5d189088..9ffcb176 100644 --- a/melodies_monet/util/region_select.py +++ b/melodies_monet/util/region_select.py @@ -143,6 +143,8 @@ def control_custom_mask(data, domain_type, domain_name, domain_info=None, **kwar "custom" domain_name : str It is used as the region name, or to read the info. + domain_info : dict + Dictionary containing relevant information on domain, like url, name, etc. **kwargs: Extra kwargs to pass to regionmask @@ -181,13 +183,13 @@ def create_autoregion(data, domain_type, domain_name, domain_info=None): data to be masked domain_type : str type of data. Used to decide which function to apply. - If domain_type == auto-region:custom, domain_info is required. + If domain_type == 'auto-region:custom_box', domain_info is required. domain_name : str This is used as the region name, or to read the info. domain_info: None | dict[str, tuple[float, float, float, float]] if not None, dict containing the domain name and a tuple with latmin, lonmin, latmax, lonmax. Only required if domain_type - starts with auto-region:custom + is auto-region:custom_box Returns ------- xr.Dataset | pd.DataFrame @@ -236,6 +238,9 @@ def select_region(data, domain_type, domain_name, domain_info=None, **kwargs): type of data. Used to decide which function to apply. domain_name : str This is used as the region name, or to read the info. + domain_info : dict + Dict containing the domain_name and other relevant information, e. g., + lonlat box, mask_url, mask_file, etc. **kwargs: extra kwargs to pass to the selector, depending on the type of data or region. @@ -246,20 +251,19 @@ def select_region(data, domain_type, domain_name, domain_info=None, **kwargs): Region selected. The type will be the same as the input data. """ - if domain_type != "all": - if domain_type.startswith("auto-region"): - data_masked = create_autoregion(data, domain_type, domain_name, **kwargs) - elif domain_type.startswith("custom"): - if regionmask is None: - raise ImportError( - "regionmask is not installed, cannot create 'custom:' type domain." - + " If your domain is a simple box, try using auto-region:custom-domain_name." - ) - if domain_info is None: - raise KeyError("If regionmask is used, domain_info is needed.") - data_masked = control_custom_mask(data, domain_name, domain_type, **kwargs) - - else: - data_masked = data.query(domain_type + " == " + '"' + domain_name + '"', inplace=True) - return data_masked - return data + if domain_type == "all": + return data + if domain_type.startswith("auto-region"): + data_masked = create_autoregion(data, domain_type, domain_name, domain_info) + elif domain_type == "custom": + if regionmask is None: + raise ImportError( + "regionmask is not installed, cannot create 'custom' type domain." + + " If your domain is a simple box, try using auto-region:custom_box." + ) + if domain_info is None: + raise KeyError("If regionmask is used, domain_info must exist.") + data_masked = control_custom_mask(data, domain_type, domain_name, domain_info, **kwargs) + else: + data_masked = data.query(domain_type + " == " + '"' + domain_name + '"', inplace=True) + return data_masked From 99acbb30152868b2ce6e0f675525c32c038a70a0 Mon Sep 17 00:00:00 2001 From: blychs Date: Thu, 5 Dec 2024 15:56:27 -0700 Subject: [PATCH 08/33] deal with pandas, stil in testing --- melodies_monet/driver.py | 33 +++++++--------------------- melodies_monet/util/region_select.py | 29 ++++++++++++++---------- 2 files changed, 26 insertions(+), 36 deletions(-) diff --git a/melodies_monet/driver.py b/melodies_monet/driver.py index 1b3eec94..fdeda91d 100644 --- a/melodies_monet/driver.py +++ b/melodies_monet/driver.py @@ -1444,7 +1444,8 @@ def plotting(self): None """ - from .util.tools import resample_stratify, get_epa_region_bounds, get_giorgi_region_bounds + from .util.tools import resample_stratify + from .util.region_select import select_region import matplotlib.pyplot as plt pair_keys = list(self.paired.keys()) if self.paired[pair_keys[0]].type.lower() in ['sat_grid_clm','sat_swath_clm']: @@ -1512,9 +1513,11 @@ def plotting(self): # Loop also over the domain types. So can easily create several overview and zoomed in plots. domain_types = grp_dict['domain_type'] domain_names = grp_dict['domain_name'] + domain_infos = grp_dict.get('domain_info', {}) for domain in range(len(domain_types)): domain_type = domain_types[domain] domain_name = domain_names[domain] + domain_info = domain_infos.get(domain_name, None) # Then loop through each of the pairs to add to the plot. for p_index, p_label in enumerate(pair_labels): @@ -1533,9 +1536,8 @@ def plotting(self): modvar = modvar + 'trpcol' # for pt_sfc data, convert to pandas dataframe, format, and trim - if obs_type in ["sat_swath_sfc", "sat_swath_clm", - "sat_grid_sfc", "sat_grid_clm", - "sat_swath_prof"]: + if obs_type in ["sat_swath_sfc", "sat_swath_clm", "sat_grid_sfc", + "sat_grid_clm", "sat_swath_prof"]: # convert index to time; setup for sat_swath_clm if 'time' not in p.obj.dims and obs_type == 'sat_swath_clm': @@ -1618,27 +1620,8 @@ def plotting(self): # Query selected points if applicable if domain_type != 'all': - if domain_type.startswith("auto-region"): - _, auto_region_id = domain_type.split(":") - if auto_region_id == 'epa': - bounds = get_epa_region_bounds(acronym=domain_name) - elif auto_region_id == 'giorgi': - bounds = get_giorgi_region_bounds(acronym=domain_name) - else: - raise ValueError( - "Currently, region selections whithout a domain query have only " - "been implemented for Giorgi and EPA regions. You asked for " - f"{domain_type!r}. Soon, arbitrary rectangular boxes, US states and " - "others will be included." - ) - pairdf_all = pairdf_all.loc[ - (pairdf_all["latitude"] > bounds[0]) - & (pairdf_all["longitude"] > bounds[1]) - & (pairdf_all["latitude"] < bounds[2]) - & (pairdf_all["longitude"] < bounds[3]) - ] - else: - pairdf_all.query(domain_type + ' == ' + '"' + domain_name + '"', inplace=True) + pairdf_all = select_region(pairdf_all, domain_type, domain_name, domain_info) + # Query with filter options if 'filter_dict' in grp_dict['data_proc'] and 'filter_string' in grp_dict['data_proc']: diff --git a/melodies_monet/util/region_select.py b/melodies_monet/util/region_select.py index 9ffcb176..88b949d8 100644 --- a/melodies_monet/util/region_select.py +++ b/melodies_monet/util/region_select.py @@ -3,7 +3,6 @@ """ import warnings -from ast import literal_eval import pandas as pd @@ -73,8 +72,16 @@ def create_predefined_mask(data, name_regiontype, region_name): xr.Dataset mask for input data """ - regions = literal_eval(f"regionmask.defined_regions.{name_regiontype}") - region_mask = regions.mask(data) + #region = literal_eval(f"regionmask.defined_regions.{name_regiontype}") + name_regiontype_split = name_regiontype.split('.') + regions = regionmask.defined_regions + for r in name_regiontype_split: + regions = getattr(regions, r) + import pdb; pdb.set_trace() + if isinstance(data, pd.DataFrame): + region_mask = regions.mask(data.to_xarray().rename({'latitude': 'lat', 'longitude': 'lon'}))# .to_dataframe() + else: + region_mask = regions.mask(data.rename({'latitude':'lat', 'longitude':'lon'})) try: selected_region = data.where(region_mask == int(region_name)) except TypeError: @@ -156,15 +163,15 @@ def control_custom_mask(data, domain_type, domain_name, domain_info=None, **kwar if "custom" not in domain_type: raise ValueError("If regionmask is used, the domain_type should be starting with 'custom'") if "auto_polygon" in domain_type: - masked_data = create_custom_mask(data, domain_info[domain_name]) + masked_data = create_custom_mask(data, domain_info) elif "defined_region" in domain_type: - name_regiontype = domain_info[domain_name]["name_regiontype"] - region_name = domain_info[domain_name]["region_name"] + name_regiontype = domain_info["name_regiontype"] + region_name = domain_info["region_name"] masked_data = create_predefined_mask(data, name_regiontype, region_name) elif "custom_file" in domain_type: - mask_path = domain_info[domain_name].get("mask_path", None) - mask_url = domain_info[domain_name].get("mask_url", None) - region_name = domain_info[domain_name].get("region_name", None) + mask_path = domain_info.get("mask_path", None) + mask_url = domain_info.get("mask_url", None) + region_name = domain_info.get("region_name", None) masked_data = create_shapefile_mask(data, mask_path, mask_url, region_name, **kwargs) else: raise ValueError( @@ -255,7 +262,7 @@ def select_region(data, domain_type, domain_name, domain_info=None, **kwargs): return data if domain_type.startswith("auto-region"): data_masked = create_autoregion(data, domain_type, domain_name, domain_info) - elif domain_type == "custom": + elif domain_type.startswith("custom"): if regionmask is None: raise ImportError( "regionmask is not installed, cannot create 'custom' type domain." @@ -265,5 +272,5 @@ def select_region(data, domain_type, domain_name, domain_info=None, **kwargs): raise KeyError("If regionmask is used, domain_info must exist.") data_masked = control_custom_mask(data, domain_type, domain_name, domain_info, **kwargs) else: - data_masked = data.query(domain_type + " == " + '"' + domain_name + '"', inplace=True) + data_masked = data.query(domain_type + " == " + '"' + domain_name + '"') return data_masked From 330bc9d05152092ef8b77584ca7c33e1b317c3f3 Mon Sep 17 00:00:00 2001 From: blychs Date: Fri, 6 Dec 2024 17:36:48 -0700 Subject: [PATCH 09/33] Control for region selection --- melodies_monet/driver.py | 41 +++++++++++++++++----------- melodies_monet/util/region_select.py | 26 ++++++++++++------ 2 files changed, 42 insertions(+), 25 deletions(-) diff --git a/melodies_monet/driver.py b/melodies_monet/driver.py index fdeda91d..1824848e 100644 --- a/melodies_monet/driver.py +++ b/melodies_monet/driver.py @@ -1536,6 +1536,13 @@ def plotting(self): modvar = modvar + 'trpcol' # for pt_sfc data, convert to pandas dataframe, format, and trim + # Query selected points if applicable + if domain_type != 'all': + p_region = select_region(p.obj, domain_type, domain_name, domain_info) + else: + p_region = p.obj + + if obs_type in ["sat_swath_sfc", "sat_swath_clm", "sat_grid_sfc", "sat_grid_clm", "sat_swath_prof"]: # convert index to time; setup for sat_swath_clm @@ -1549,12 +1556,12 @@ def plotting(self): # pairdf_all = p.obj.stack(ll=['x','y']) # pairdf_all = pairdf_all.rename_dims({'ll':'y'}) else: - pairdf_all = p.obj + pairdf_all = p_region # Select only the analysis time window. pairdf_all = pairdf_all.sel(time=slice(self.start_time,self.end_time)) else: # convert to dataframe - pairdf_all = p.obj.to_dataframe(dim_order=["time", "x"]) + pairdf_all = p_region.to_dataframe(dim_order=["time", "x"]) # Select only the analysis time window. pairdf_all = pairdf_all.loc[self.start_time : self.end_time] @@ -1618,11 +1625,6 @@ def plotting(self): # Determine outname outname = "{}.{}.{}.{}.{}.{}.{}".format(grp, plot_type, obsvar, startdatename, enddatename, domain_type, domain_name) - # Query selected points if applicable - if domain_type != 'all': - pairdf_all = select_region(pairdf_all, domain_type, domain_name, domain_info) - - # Query with filter options if 'filter_dict' in grp_dict['data_proc'] and 'filter_string' in grp_dict['data_proc']: raise Exception("""For plot group: {}, only one of filter_dict and filter_string can be specified.""".format(grp)) @@ -2545,6 +2547,8 @@ def plotting(self): vmodel = self.models[p.model].obj.loc[dict(time=slice(self.start_time, self.end_time))] except KeyError as e: raise Exception("MONET requires an altitude dimension named 'z'") from e + if grp_dict.get('data_proc', {}).get('crop_model', False) and domain_name != all: + vmodel = select_region(vmodel, domain_type, domain_name, domain_info) # Determine proj to use for spatial plots proj = splots.map_projection(self.models[p.model]) @@ -2596,6 +2600,7 @@ def stats(self): """ from .stats import proc_stats as proc_stats from .plots import surfplots as splots + from .util.region_selct import select_region # first get the stats dictionary from the yaml file stat_dict = self.control_dict['stats'] @@ -2635,9 +2640,11 @@ def stats(self): # Loop also over the domain types. domain_types = stat_dict['domain_type'] domain_names = stat_dict['domain_name'] + domain_infos = stat_dict.get('domain_info', {}) for domain in range(len(domain_types)): domain_type = domain_types[domain] domain_name = domain_names[domain] + domain_info = domain_infos.get(domain_name, None) # The tables and text files will be output at this step in loop. # Create an empty pandas dataarray. @@ -2690,22 +2697,24 @@ def stats(self): if obsvar == 'nitrogendioxide_tropospheric_column': modvar = modvar + 'trpcol' + # Query selected points if applicable + if domain_type != 'all': + p_region = select_region(p.obj, domain_type, domain_name, domain_info) + else: + p_region = p.obj + # convert to dataframe # handle different dimensios, M.Li - if ('y' in p.obj.dims) and ('x' in p.obj.dims): - pairdf_all = p.obj.to_dataframe(dim_order=["x", "y"]) - elif ('y' in p.obj.dims) and ('time' in p.obj.dims): - pairdf_all = p.obj.to_dataframe(dim_order=["time", "y"]) + if ('y' in p_region.dims) and ('x' in p_region.dims): + pairdf_all = p_region.to_dataframe(dim_order=["x", "y"]) + elif ('y' in p_region.dims) and ('time' in p_region.dims): + pairdf_all = p_region.to_dataframe(dim_order=["time", "y"]) else: - pairdf_all = p.obj.to_dataframe(dim_order=["time", "x"]) + pairdf_all = p_region.to_dataframe(dim_order=["time", "x"]) # Select only the analysis time window. pairdf_all = pairdf_all.loc[self.start_time : self.end_time] - # Query selected points if applicable - if domain_type != 'all': - pairdf_all.query(domain_type + ' == ' + '"' + domain_name + '"', inplace=True) - # Query with filter options if 'data_proc' in stat_dict: if 'filter_dict' in stat_dict['data_proc'] and 'filter_string' in stat_dict['data_proc']: diff --git a/melodies_monet/util/region_select.py b/melodies_monet/util/region_select.py index 88b949d8..d94dc2cb 100644 --- a/melodies_monet/util/region_select.py +++ b/melodies_monet/util/region_select.py @@ -49,7 +49,11 @@ def create_custom_mask(data, mask_info): abbrevs.append(rname) all_regions.append(rpolygon) regions = regionmask.Regions(all_regions, abbrevs=abbrevs, name="custom_regions") - region_mask = regions.mask(data) + # Regionmask requires "lat" and "lon" + region_mask = regions.mask(data.rename({'latitude':'lat', 'longitude':'lon'})) + # But MM requires "latitude" and "longitude" + if "lat" in region_mask.coords: + region_mask = region_mask.rename({"lat": "latitude", "lon": "longitude"}) masked_data = data.where(region_mask.notnull()) return masked_data @@ -72,19 +76,18 @@ def create_predefined_mask(data, name_regiontype, region_name): xr.Dataset mask for input data """ - #region = literal_eval(f"regionmask.defined_regions.{name_regiontype}") name_regiontype_split = name_regiontype.split('.') regions = regionmask.defined_regions for r in name_regiontype_split: regions = getattr(regions, r) - import pdb; pdb.set_trace() - if isinstance(data, pd.DataFrame): - region_mask = regions.mask(data.to_xarray().rename({'latitude': 'lat', 'longitude': 'lon'}))# .to_dataframe() - else: - region_mask = regions.mask(data.rename({'latitude':'lat', 'longitude':'lon'})) + # Regionmask requires "lat" and "lon" + region_mask = regions.mask(data.rename({'latitude':'lat', 'longitude':'lon'})) + # But MM requires "latitude" and "longitude" + if "lat" in region_mask.coords: + region_mask = region_mask.rename({"lat": "latitude", "lon": "longitude"}) try: selected_region = data.where(region_mask == int(region_name)) - except TypeError: + except ValueError: selected_region = data.where(region_mask.cf == region_name) return selected_region @@ -130,7 +133,12 @@ def create_shapefile_mask(data, mask_path=None, mask_url=None, region_name=None, file = "zip://" + file regions = regionmask.from_geopandas(gp.read_file(file), **kwargs) - region_mask = regions.mask(data) + # Regionmask requires "lat" and "lon" + region_mask = regions.mask(data.rename({'latitude':'lat', 'longitude':'lon'})) + # But MM requires "latitude" and "longitude" + if "lat" in region_mask.coords: + region_mask = region_mask.rename({"lat": "latitude", "lon": "longitude"}) + try: selected_region = data.where(region_mask == int(region_name)) except ValueError: From 3733a175017ed5dbc40e5cba9c4cd27422886011 Mon Sep 17 00:00:00 2001 From: blychs Date: Mon, 16 Dec 2024 16:20:06 -0700 Subject: [PATCH 10/33] Start adding test for regionsupport --- melodies_monet/tests/test_regionsupport.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 melodies_monet/tests/test_regionsupport.py diff --git a/melodies_monet/tests/test_regionsupport.py b/melodies_monet/tests/test_regionsupport.py new file mode 100644 index 00000000..7ed8ccbc --- /dev/null +++ b/melodies_monet/tests/test_regionsupport.py @@ -0,0 +1,15 @@ +from melodies_monet import region_select + +import xarray as xr +import numpy as np + +def test_create_custom_mask(): + + np.random.seed(0) + fake_model = 10 + 2 * np.random.randn(30,20,4) + np.random.seed(1) + fake_obs = 10 + 2 * np.random.randn(30, 20, 4) + longitude = np.linspace() + mock_data = xr.Dataset() + + From 72f7b9c9c1cfd0396f27983efda2721df2630683 Mon Sep 17 00:00:00 2001 From: blychs Date: Fri, 27 Dec 2024 11:53:54 -0700 Subject: [PATCH 11/33] Change download scheme avoiding pooch. This commit has advantages and disadvantages: Advantages ---------- - It does not require pooch, and only uses the standard library. - It can deal with URL's not ending with the filename. Disadvantages ------------- - It downloads the files locally instead of into the cache (may be actually good). - It does not add any checksum to the name, risking overwriting files. --- melodies_monet/util/region_select.py | 44 ++++++++++++++++++++-------- 1 file changed, 32 insertions(+), 12 deletions(-) diff --git a/melodies_monet/util/region_select.py b/melodies_monet/util/region_select.py index d94dc2cb..ac80023f 100644 --- a/melodies_monet/util/region_select.py +++ b/melodies_monet/util/region_select.py @@ -2,9 +2,11 @@ Masking arbitrary regions with regionmask """ +import re import warnings import pandas as pd +import requests from melodies_monet.util.tools import get_epa_region_bounds, get_giorgi_region_bounds @@ -17,10 +19,27 @@ except ImportError: regionmask = None -try: - import pooch -except ImportError: - pooch = None + +def _download_with_name(url): + """Automates download while reading content disposition if needed. + + Parameters: + ----------- + url : str + URL of file to download + + Returns + ------- + str + Path to downloaded file + """ + r = requests.get(url, allow_redirects=True, timeout=10) + fname = re.findall("filename=(.+)", r.headers.get("content-disposition")) + if len(fname) == 0: + fname = url.rsplit("/", 1)[1] + with open(fname, "wb") as f: + f.write(r.content) + return fname def create_custom_mask(data, mask_info): @@ -50,7 +69,7 @@ def create_custom_mask(data, mask_info): all_regions.append(rpolygon) regions = regionmask.Regions(all_regions, abbrevs=abbrevs, name="custom_regions") # Regionmask requires "lat" and "lon" - region_mask = regions.mask(data.rename({'latitude':'lat', 'longitude':'lon'})) + region_mask = regions.mask(data.rename({"latitude": "lat", "longitude": "lon"})) # But MM requires "latitude" and "longitude" if "lat" in region_mask.coords: region_mask = region_mask.rename({"lat": "latitude", "lon": "longitude"}) @@ -76,12 +95,12 @@ def create_predefined_mask(data, name_regiontype, region_name): xr.Dataset mask for input data """ - name_regiontype_split = name_regiontype.split('.') + name_regiontype_split = name_regiontype.split(".") regions = regionmask.defined_regions for r in name_regiontype_split: regions = getattr(regions, r) # Regionmask requires "lat" and "lon" - region_mask = regions.mask(data.rename({'latitude':'lat', 'longitude':'lon'})) + region_mask = regions.mask(data.rename({"latitude": "lat", "longitude": "lon"})) # But MM requires "latitude" and "longitude" if "lat" in region_mask.coords: region_mask = region_mask.rename({"lat": "latitude", "lon": "longitude"}) @@ -118,14 +137,15 @@ def create_shapefile_mask(data, mask_path=None, mask_url=None, region_name=None, """ if mask_url is not None and mask_path is not None: - warnings.warn("mask_url and mask_path provided. Only one can be used. Selecting mask_path") + warnings.warn( + "mask_url and mask_path provided. Only one can be used." + + "Selecting mask_path and discarding URL." + ) if mask_path is not None: file = mask_path elif mask_url is not None: - if pooch is None: - raise ImportError("pooch is not installed, cannot download URL.") - file = pooch.retrieve(mask_url, None) + file = _download_with_name(mask_url) else: raise ValueError("Either mask_path or mask_url have to be provided") @@ -134,7 +154,7 @@ def create_shapefile_mask(data, mask_path=None, mask_url=None, region_name=None, regions = regionmask.from_geopandas(gp.read_file(file), **kwargs) # Regionmask requires "lat" and "lon" - region_mask = regions.mask(data.rename({'latitude':'lat', 'longitude':'lon'})) + region_mask = regions.mask(data.rename({"latitude": "lat", "longitude": "lon"})) # But MM requires "latitude" and "longitude" if "lat" in region_mask.coords: region_mask = region_mask.rename({"lat": "latitude", "lon": "longitude"}) From 725f967b022cc129be56dfa2b6d10617c8c63e3f Mon Sep 17 00:00:00 2001 From: blychs Date: Mon, 30 Dec 2024 14:52:24 -0700 Subject: [PATCH 12/33] Eliminate drop=True in auto-region It hangs if latitude and longitude are not coordinates --- melodies_monet/util/region_select.py | 80 +++++++++++++++++----------- 1 file changed, 48 insertions(+), 32 deletions(-) diff --git a/melodies_monet/util/region_select.py b/melodies_monet/util/region_select.py index ac80023f..1d43211e 100644 --- a/melodies_monet/util/region_select.py +++ b/melodies_monet/util/region_select.py @@ -12,6 +12,7 @@ try: import geopandas as gp + from shapely.geometry import MultiPolygon, Polygon except ImportError: gp = None try: @@ -20,25 +21,30 @@ regionmask = None -def _download_with_name(url): +def _download_with_name(url, verbose=False): """Automates download while reading content disposition if needed. Parameters: ----------- url : str URL of file to download - + verbose : boolean + Whether to add verbosity Returns ------- str Path to downloaded file """ r = requests.get(url, allow_redirects=True, timeout=10) - fname = re.findall("filename=(.+)", r.headers.get("content-disposition")) + fname = re.findall("filename=(.+)", r.headers.get("content-disposition"))[0] if len(fname) == 0: fname = url.rsplit("/", 1)[1] + if fname[0] == '"': + fname = fname[1:-1] with open(fname, "wb") as f: f.write(r.content) + if verbose: + print(f"Downloaded {url} as {fname}.") return fname @@ -60,7 +66,14 @@ def create_custom_mask(data, mask_info): if not isinstance(mask_info, (list, dict)): raise TypeError(f"mask_info type={type(mask_info)}, not valid for create_custom_mask.") if isinstance(mask_info, list): - regions = regionmask.Regions([mask_info]) + if isinstance(mask_info[0][0], (float, int)): + poly = Polygon(mask_info) + else: + polys = [] + for p in mask_info: + polys.append(Polygon(p)) + poly = MultiPolygon(polys) + regions = regionmask.Regions([poly]) else: all_regions = [] abbrevs = [] @@ -77,7 +90,7 @@ def create_custom_mask(data, mask_info): return masked_data -def create_predefined_mask(data, name_regiontype, region_name): +def create_predefined_mask(data, name_regiontype, region=None): """Creates mask using regionmask. Parameters @@ -86,7 +99,7 @@ def create_predefined_mask(data, name_regiontype, region_name): Dataset with the data that the user wishes to mask. name_regiontype : str name of regionmask defined regions (e.g., "srex", "giorgi") - region_name : str + region : str region to mask. If it can be parsed to an integer, that is used. Otherwise, it is assumed to be an abbrev. @@ -96,18 +109,18 @@ def create_predefined_mask(data, name_regiontype, region_name): mask for input data """ name_regiontype_split = name_regiontype.split(".") - regions = regionmask.defined_regions + all_regions = regionmask.defined_regions for r in name_regiontype_split: - regions = getattr(regions, r) + all_regions = getattr(all_regions, r) # Regionmask requires "lat" and "lon" - region_mask = regions.mask(data.rename({"latitude": "lat", "longitude": "lon"})) + region_mask = all_regions.mask(data.rename({"latitude": "lat", "longitude": "lon"})) # But MM requires "latitude" and "longitude" if "lat" in region_mask.coords: region_mask = region_mask.rename({"lat": "latitude", "lon": "longitude"}) try: - selected_region = data.where(region_mask == int(region_name)) + selected_region = data.where(region_mask == int(region)) except ValueError: - selected_region = data.where(region_mask.cf == region_name) + selected_region = data.where(region_mask.cf == region) return selected_region @@ -145,7 +158,7 @@ def create_shapefile_mask(data, mask_path=None, mask_url=None, region_name=None, if mask_path is not None: file = mask_path elif mask_url is not None: - file = _download_with_name(mask_url) + file = _download_with_name(mask_url, verbose=True) else: raise ValueError("Either mask_path or mask_url have to be provided") @@ -166,7 +179,7 @@ def create_shapefile_mask(data, mask_path=None, mask_url=None, region_name=None, return selected_region -def control_custom_mask(data, domain_type, domain_name, domain_info=None, **kwargs): +def control_custom_mask(data, domain_type, domain_info=None, **kwargs): """Parses region information to return the right type of data. Parameters @@ -175,9 +188,7 @@ def control_custom_mask(data, domain_type, domain_name, domain_info=None, **kwar data to be masked domain_type : str type of data. Used to decide which function to apply. Should begin with - "custom" - domain_name : str - It is used as the region name, or to read the info. + "custom". domain_info : dict Dictionary containing relevant information on domain, like url, name, etc. **kwargs: @@ -191,19 +202,21 @@ def control_custom_mask(data, domain_type, domain_name, domain_info=None, **kwar if "custom" not in domain_type: raise ValueError("If regionmask is used, the domain_type should be starting with 'custom'") if "auto_polygon" in domain_type: - masked_data = create_custom_mask(data, domain_info) + masked_data = create_custom_mask(data, domain_info["mask_info"]) elif "defined_region" in domain_type: name_regiontype = domain_info["name_regiontype"] - region_name = domain_info["region_name"] - masked_data = create_predefined_mask(data, name_regiontype, region_name) + region = domain_info["region"] + masked_data = create_predefined_mask(data, name_regiontype, region) elif "custom_file" in domain_type: - mask_path = domain_info.get("mask_path", None) - mask_url = domain_info.get("mask_url", None) - region_name = domain_info.get("region_name", None) - masked_data = create_shapefile_mask(data, mask_path, mask_url, region_name, **kwargs) + params = domain_info + params["mask_path"] = domain_info.get("mask_path", None) + params["mask_url"] = domain_info.get("mask_url", None) + params["region_name"] = domain_info.get("region_name", None) + params["abbrevs"] = domain_info.get("abbrevs", "_from_name") + masked_data = create_shapefile_mask(data, **params, **kwargs) else: raise ValueError( - "Could not identify the type of domain. Should be 'auto-polygon'," + "Could not identify the type of domain. Should be 'auto_polygon'," + " 'defined_region' or 'custom_file'" ) return masked_data @@ -230,19 +243,19 @@ def create_autoregion(data, domain_type, domain_name, domain_info=None): xr.Dataset | pd.DataFrame Data as it was provided to the function """ - auto_region_id = domain_type.split(":")[1] + auto_region_id = domain_type.split(":")[1].lower() if auto_region_id == "epa": bounds = get_epa_region_bounds(acronym=domain_name) elif auto_region_id == "giorgi": bounds = get_giorgi_region_bounds(acronym=domain_name) elif auto_region_id == "custom": - bounds = domain_info[domain_name] + bounds = domain_info["bounds"] else: raise ValueError( - "Currently, region selections whithout a domain query have only " + "Currently, auto-region selections whithout a domain query have only " "been implemented for Giorgi and EPA regions. You asked for " - f"{domain_type!r}. Soon, arbitrary rectangular boxes, US states and " - "others will be included." + f"{domain_type!r}. If you need more capabilities, checkout the custom:" + "regions capabilities. Be aware that they require regionmask." ) if isinstance(data, pd.DataFrame): data_all = data.loc[ @@ -257,7 +270,7 @@ def create_autoregion(data, domain_type, domain_name, domain_info=None): & (data["longitude"] >= bounds[1]) & (data["latitude"] <= bounds[2]) & (data["longitude"] <= bounds[3]), - drop=True, + # drop=True, ) return data_all @@ -298,7 +311,10 @@ def select_region(data, domain_type, domain_name, domain_info=None, **kwargs): ) if domain_info is None: raise KeyError("If regionmask is used, domain_info must exist.") - data_masked = control_custom_mask(data, domain_type, domain_name, domain_info, **kwargs) + data_masked = control_custom_mask(data, domain_type, domain_info, **kwargs) else: - data_masked = data.query(domain_type + " == " + '"' + domain_name + '"') + if isinstance(data, pd.DataFrame): + data_masked = data.query(domain_type + " == " + '"' + domain_name + '"') + else: + data_masked = data.where(data[domain_type].cf == domain_name) return data_masked From 895c468eb3ceb9cc86c4854b8626bea0b6299ae1 Mon Sep 17 00:00:00 2001 From: blychs Date: Mon, 30 Dec 2024 21:04:33 -0700 Subject: [PATCH 13/33] Fix typo in import region_select for stats --- melodies_monet/driver.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/melodies_monet/driver.py b/melodies_monet/driver.py index 1824848e..a424acf7 100644 --- a/melodies_monet/driver.py +++ b/melodies_monet/driver.py @@ -2600,7 +2600,7 @@ def stats(self): """ from .stats import proc_stats as proc_stats from .plots import surfplots as splots - from .util.region_selct import select_region + from .util.region_select import select_region # first get the stats dictionary from the yaml file stat_dict = self.control_dict['stats'] From eb2f820cfbdf923d81ed7aeda64c7a7bbe757a40 Mon Sep 17 00:00:00 2001 From: blychs Date: Mon, 30 Dec 2024 21:10:38 -0700 Subject: [PATCH 14/33] Fix bug in query-like search and typo in auto-region:custom_box -The code was originally looking for data["domain_type"].cf == "domain_name" instead of data["domain_type"] == "domain_name". -auto-region:custom_box was wirtten as auto-region:custom. --- melodies_monet/util/region_select.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/melodies_monet/util/region_select.py b/melodies_monet/util/region_select.py index 1d43211e..e23e69a3 100644 --- a/melodies_monet/util/region_select.py +++ b/melodies_monet/util/region_select.py @@ -248,7 +248,7 @@ def create_autoregion(data, domain_type, domain_name, domain_info=None): bounds = get_epa_region_bounds(acronym=domain_name) elif auto_region_id == "giorgi": bounds = get_giorgi_region_bounds(acronym=domain_name) - elif auto_region_id == "custom": + elif auto_region_id == "custom_box": bounds = domain_info["bounds"] else: raise ValueError( @@ -316,5 +316,5 @@ def select_region(data, domain_type, domain_name, domain_info=None, **kwargs): if isinstance(data, pd.DataFrame): data_masked = data.query(domain_type + " == " + '"' + domain_name + '"') else: - data_masked = data.where(data[domain_type].cf == domain_name) + data_masked = data.where(data[domain_type] == domain_name) return data_masked From 15a680a903e52e4692e2433a37457abc451157c9 Mon Sep 17 00:00:00 2001 From: blychs Date: Mon, 30 Dec 2024 21:23:38 -0700 Subject: [PATCH 15/33] Update the docs to new region support --- docs/appendix/yaml.rst | 16 ++++++++++++++++ docs/getting_started/installation.rst | 1 + docs/index.rst | 1 + 3 files changed, 18 insertions(+) diff --git a/docs/appendix/yaml.rst b/docs/appendix/yaml.rst index 5f1f3030..65744a19 100644 --- a/docs/appendix/yaml.rst +++ b/docs/appendix/yaml.rst @@ -321,6 +321,22 @@ For automatic EPA or Giorgi region boxes (if they are not included with the columns in the observation file), choose ``auto-region:epa`` or ``auto-region:giorgi``. Take into account that ``auto-region:epa`` is only a rough approximation, since it assumes perfect, rectangular lonlat boxes. +If you only need a rectangular, lonlat box which does not cross the antimeridian, you can use +``auto-region:custom_box``, which needs to be combined with the ``domain_info`` parameter and +a box of ``bounds: [minlat, minlon, maxlat, maxlon]``. See :doc:`/users_guide/region_selection` for examples. + +If you have ``regionmask`` installed, you can also use it for advanced region support. +These regions can be arbitrary, and its use require providing ``domain_type`` parameters starting +with ``custom:``. +There are three ways to use ``regionmask``. ``custom:auto_polygon`` lets the user define their own +polygon in the section ``domain_info``, using the keyword ``mask_info``. +``custom:defined_region`` lets the user utilize any region predefined by +`regionmask `__, defined in ``domain_info`` using +the keywords ``name_regiontype`` and ``region``. +The third option is using the keyword `custom:custom_file`, which is defined in ``domain_info`` with +either ``mask_path:path_shapefile_or_geojson`` or ``mask_url:url_of_shapefile_or_geojson``, +``abbrevs``, ``name`` and ``region_name``. See :doc:`/users_guide/region_selection` for examples and a more +detailed explanation. **domain_name:** List of domain names to be plotted. If domain_type = all, all data will be used and the domain_name is used only in the plot title. If diff --git a/docs/getting_started/installation.rst b/docs/getting_started/installation.rst index a3d72a8b..40e173c0 100644 --- a/docs/getting_started/installation.rst +++ b/docs/getting_started/installation.rst @@ -18,6 +18,7 @@ Optional dependencies - ``typer`` (to use the :doc:`/cli`; add ``rich`` `for `__ fancy tracebacks and ``--help``) - ``pooch`` (to enable automatic downloading of :doc:`tutorial datasets `) +- ``regionmask`` (`for complex region masking support `__; can read shapefiles, geojson, arbitrary polygons and predefined regions.) Incompatibilities ----------------- diff --git a/docs/index.rst b/docs/index.rst index 76b522f0..eab33e99 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -49,6 +49,7 @@ MONETIO please refer to: users_guide/supported_stats users_guide/time_chunking users_guide/gridded_datasets + users_guide/region_selection .. toctree:: :hidden: From 76d86dfb971621e7db43f9d239bb5f46c3a7f56d Mon Sep 17 00:00:00 2001 From: blychs Date: Mon, 30 Dec 2024 22:49:15 -0700 Subject: [PATCH 16/33] remove changes from surfplots.py --- melodies_monet/plots/surfplots.py | 105 ------------------------------ 1 file changed, 105 deletions(-) mode change 100644 => 100755 melodies_monet/plots/surfplots.py diff --git a/melodies_monet/plots/surfplots.py b/melodies_monet/plots/surfplots.py old mode 100644 new mode 100755 index 9bfe23e6..8e541343 --- a/melodies_monet/plots/surfplots.py +++ b/melodies_monet/plots/surfplots.py @@ -497,111 +497,6 @@ def make_timeseries(df, df_reg=None, column=None, label=None, ax=None, avg_windo ax.set_title(domain_name,fontweight='bold',**text_kwargs) return ax -def make_diurnal_cycle(df, column=None, label=None, ax=None, avg_window=None, ylabel=None, - vmin = None, vmax = None, - domain_type=None, domain_name=None, - plot_dict=None, fig_dict=None, text_dict=None,debug=False, **kwargs): - """Creates timeseries plot. - - Parameters - ---------- - df : pandas.DataFrame - model/obs paired data to plot - column : str - Column label of variable to plot - label : str - Name of variable to use in plot legend - ax : ax - matplotlib ax from previous occurrence so can overlay obs and model - results on the same plot - avg_window : rule - Pandas resampling rule (e.g., 'h', 'D') - ylabel : str - Title of y-axis - vmin : real number - Min value to use on y-axis - vmax : real number - Max value to use on y-axis - domain_type : str - Domain type specified in input yaml file - domain_name : str - Domain name specified in input yaml file - plot_dict : dictionary - Dictionary containing information about plotting for each pair - (e.g., color, linestyle, markerstyle) - fig_dict : dictionary - Dictionary containing information about figure - text_dict : dictionary - Dictionary containing information about text - debug : boolean - Whether to plot interactively (True) or not (False). Flag for - submitting jobs to supercomputer turn off interactive mode. - - Returns - ------- - ax - matplotlib ax such that driver.py can iterate to overlay multiple models on the - same plot - - """ - if debug == False: - plt.ioff() - #First define items for all plots - #set default text size - def_text = dict(fontsize=14) - if text_dict is not None: - text_kwargs = {**def_text, **text_dict} - else: - text_kwargs = def_text - # set ylabel to column if not specified. - if ylabel is None: - ylabel = column - if label is not None: - plot_dict['label'] = label - if vmin is not None and vmax is not None: - plot_dict['ylim'] = [vmin,vmax] - #scale the fontsize for the x and y labels by the text_kwargs - plot_dict['fontsize'] = text_kwargs['fontsize']*0.8 - #Then, if no plot has been created yet, create a plot and plot the obs. - if ax is None: - #First define the colors for the observations. - obs_dict = dict(color='k', linestyle='-',marker='*', linewidth=1.2, markersize=6.) - if plot_dict is not None: - #Whatever is not defined in the yaml file is filled in with the obs_dict here. - plot_kwargs = {**obs_dict, **plot_dict} - else: - plot_kwargs = obs_dict - # create the figure - if fig_dict is not None: - f,ax = plt.subplots(**fig_dict) - else: - f,ax = plt.subplots(figsize=(10,6)) - # plot the line - time = pd.DatetimeIndex(df["time"]) - else: - plot_kwargs = { **dict(linestyle='-', marker='*', linewidth=1.2, markersize=6.), **plot_dict} - ax = df[column].groupby(time.dt.hour).mean().plot(ax=ax, legend=True, **plot_kwargs) - max_vals = df[column].groupby(time.dt.hour).max() - min_vals = df[column].groupby(time.dt.hour).min() - ax.fill_between(max_vals, min_vals, color='grey', alpha=0.2) - - - #Set parameters for all plots - ax.set_ylabel(ylabel,fontweight='bold',**text_kwargs) - ax.set_xlabel("Hour",fontweight='bold',**text_kwargs) - ax.legend(frameon=False,fontsize=text_kwargs['fontsize']*0.8) - ax.tick_params(axis='both',length=10.0,direction='inout') - ax.tick_params(axis='both',which='minor',length=5.0,direction='out') - ax.legend(frameon=False,fontsize=text_kwargs['fontsize']*0.8, - bbox_to_anchor=(1.0, 0.9), loc='center left') - if domain_type is not None and domain_name is not None: - if domain_type == 'epa_region': - ax.set_title('EPA Region ' + domain_name,fontweight='bold',**text_kwargs) - else: - ax.set_title(domain_name,fontweight='bold',**text_kwargs) - return ax - - def make_taylor(df, df_reg=None, column_o=None, label_o='Obs', column_m=None, label_m='Model', dia=None, ylabel=None, ty_scale=1.5, domain_type=None, domain_name=None, From f467367bd622b78e0b6de25d7ef88dc08a0269a2 Mon Sep 17 00:00:00 2001 From: blychs Date: Mon, 30 Dec 2024 22:56:57 -0700 Subject: [PATCH 17/33] Remove test_regionmask.py, which was incomplete. Better tests are provided in the PR. --- melodies_monet/tests/test_regionsupport.py | 15 --------------- 1 file changed, 15 deletions(-) delete mode 100644 melodies_monet/tests/test_regionsupport.py diff --git a/melodies_monet/tests/test_regionsupport.py b/melodies_monet/tests/test_regionsupport.py deleted file mode 100644 index 7ed8ccbc..00000000 --- a/melodies_monet/tests/test_regionsupport.py +++ /dev/null @@ -1,15 +0,0 @@ -from melodies_monet import region_select - -import xarray as xr -import numpy as np - -def test_create_custom_mask(): - - np.random.seed(0) - fake_model = 10 + 2 * np.random.randn(30,20,4) - np.random.seed(1) - fake_obs = 10 + 2 * np.random.randn(30, 20, 4) - longitude = np.linspace() - mock_data = xr.Dataset() - - From 0a7bd17acd13c1c4191e844db04cd2a2d0588f41 Mon Sep 17 00:00:00 2001 From: blychs Date: Mon, 30 Dec 2024 23:03:10 -0700 Subject: [PATCH 18/33] Add region_selection.rst to docs --- docs/users_guide/region_selection.rst | 53 +++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 docs/users_guide/region_selection.rst diff --git a/docs/users_guide/region_selection.rst b/docs/users_guide/region_selection.rst new file mode 100644 index 00000000..672030ed --- /dev/null +++ b/docs/users_guide/region_selection.rst @@ -0,0 +1,53 @@ +Selecting regions +================= + +MELODIES-MONET has different ways to mask/select a specific region. + +The easiest, and often most precise, is to have that region defined in the observations/paired data. +In this case, the variable/column that defines the region of interest should be selected as +`domain_type` in the YAML file, and the name of the region to select should be provided as `domain_name`. + +If that data are not available in the observations, another option is to provide a lonlat box, which is +defined by setting `domain_type: auto-region:xxxxx`, where `xxxxx` can be `epa`, `giorgi` or `custom_box`. +`Giorgi` regions and a rough, rectangular approximation to `EPA` regions have already been hardcoded into +MELODIES-MONET. +In the case of `EPA` regions, be aware that the approximation is quite rough to force it into a rectangular lonlat box, and although it is probably sufficient for plotting maps, it can lead to errors if used for anything else. +If `auto-region:custom_box` is selected a lonlat box in the form of `bounds: [minlat, minlon, maxlat, maxlon]` needs to be provided in `domain_info` (see example below). +`auto-region:custom_box` has, however, some limitations: `minlon` and `maxlon` need to be in the range of `[-180, 180]`, and the box cannot cross the antimeridian. + +A third, and more sofisticated option, consists in utilizing the optional dependency `regionmask `__. +This is selected by defining `domain_type: custom:xxxxx`, where `xxxxx` can be `auto_polygon`, `defined_region` or `custom_file`. +All of these options require extra data provided in a `domain_info` keyword in the YAML file. +This option includes a multiplicity of capabilities: +* If `auto_polygon` is selected, the vertices of one or more arbitrary polygons need to be provided (anti clockwise). +Currently no holes inside the polygon are supported. +* If `defined_region` is selected, any defined region supported by `regionmask` can be provided in `domain_info`. +* If `custom_file` is provided, the path to a shapefile/geojson file has to be provided. There is no need to decompress `.zip` shapefiles. Alternatively, the download URL can be provided, and the code will download the file automatically. Be aware that if a file with the same name is already in the working directory, it will be silently overwritten. + + +An example of the plotting part of an arbitrary plot for eact type of region is shown below: + +.. code-block:: yaml + + domain_type: ["all", "all", "state_name", "epa_region", "auto-region:giorgi", "auto-region:custom_box", "custom:auto_polygon", "custom:auto_polygon", "custom:defined_region", "custom:custom_file", "custom:custom_file"] + domain_name: ["CONUS", "model", "CO", "R8", "CNA", "R8box", "onepoly", "twopolys", "colorado", "denverfile", "denverurl"] + domain_info: + R8box: + bounds: [39.8, -105.30, 40.2, -105.1] + onepoly: + mask_info: [[-104.968, 39.47], [-104.618, 39.75], [-104.968, 40.06], [-105.32, 39.75]] + twopolys: + mask_info: [[[-104.968, 39.47], [-104.618, 39.75], [-104.968, 40.06], [-105.32, 39.75]], [[-107.474, 37.693], [-108.037, 37.659], [-108.423, 36.97], [-106.444, 36.97], [-106.497, 37.473], [-107.4597, 37.4693]]] + colorado: + name_regiontype: natural_earth_v5_1_2.us_states_10 + region: CO + denverfile: + mask_path: "Colorado_County_Boundaries.zip" + abbrevs: COUNTY + name: Counties + region_name: DENVER + denverurl: + mask_url: "https://hub.arcgis.com/api/v3/datasets/66c2642209684b90af84afcc559a5a02_5/downloads/data?format=shp&spatialRefId=4269&where=1%3D1" + abbrevs: "COUNTY" + name: "Counties" + region_name: "DENVER" From 3de40f2ca7cfa23ff9a98afeaf589f01d04f48fd Mon Sep 17 00:00:00 2001 From: blychs Date: Mon, 30 Dec 2024 23:15:44 -0700 Subject: [PATCH 19/33] Add Copyright notice --- melodies_monet/util/region_select.py | 1 + 1 file changed, 1 insertion(+) diff --git a/melodies_monet/util/region_select.py b/melodies_monet/util/region_select.py index e23e69a3..d33ac926 100644 --- a/melodies_monet/util/region_select.py +++ b/melodies_monet/util/region_select.py @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: Apache-2.0 """ Masking arbitrary regions with regionmask """ From c99929cb09d7a539f076c34862d9b8b3975cd963 Mon Sep 17 00:00:00 2001 From: blychs Date: Mon, 13 Jan 2025 16:02:24 -0700 Subject: [PATCH 20/33] Change names of region types --- docs/appendix/yaml.rst | 8 ++++---- docs/users_guide/region_selection.rst | 16 ++++++++-------- melodies_monet/util/region_select.py | 18 +++++++++--------- 3 files changed, 21 insertions(+), 21 deletions(-) diff --git a/docs/appendix/yaml.rst b/docs/appendix/yaml.rst index 65744a19..e0b31931 100644 --- a/docs/appendix/yaml.rst +++ b/docs/appendix/yaml.rst @@ -322,18 +322,18 @@ with the columns in the observation file), choose ``auto-region:epa`` or ``auto-region:giorgi``. Take into account that ``auto-region:epa`` is only a rough approximation, since it assumes perfect, rectangular lonlat boxes. If you only need a rectangular, lonlat box which does not cross the antimeridian, you can use -``auto-region:custom_box``, which needs to be combined with the ``domain_info`` parameter and +``auto-region:box``, which needs to be combined with the ``domain_info`` parameter and a box of ``bounds: [minlat, minlon, maxlat, maxlon]``. See :doc:`/users_guide/region_selection` for examples. If you have ``regionmask`` installed, you can also use it for advanced region support. These regions can be arbitrary, and its use require providing ``domain_type`` parameters starting with ``custom:``. -There are three ways to use ``regionmask``. ``custom:auto_polygon`` lets the user define their own +There are three ways to use ``regionmask``. ``custom:polygon`` lets the user define their own polygon in the section ``domain_info``, using the keyword ``mask_info``. -``custom:defined_region`` lets the user utilize any region predefined by +``custom:defined-region`` lets the user utilize any region predefined by `regionmask `__, defined in ``domain_info`` using the keywords ``name_regiontype`` and ``region``. -The third option is using the keyword `custom:custom_file`, which is defined in ``domain_info`` with +The third option is using the keyword `custom:file`, which is defined in ``domain_info`` with either ``mask_path:path_shapefile_or_geojson`` or ``mask_url:url_of_shapefile_or_geojson``, ``abbrevs``, ``name`` and ``region_name``. See :doc:`/users_guide/region_selection` for examples and a more detailed explanation. diff --git a/docs/users_guide/region_selection.rst b/docs/users_guide/region_selection.rst index 672030ed..fa2dae0a 100644 --- a/docs/users_guide/region_selection.rst +++ b/docs/users_guide/region_selection.rst @@ -8,28 +8,28 @@ In this case, the variable/column that defines the region of interest should be `domain_type` in the YAML file, and the name of the region to select should be provided as `domain_name`. If that data are not available in the observations, another option is to provide a lonlat box, which is -defined by setting `domain_type: auto-region:xxxxx`, where `xxxxx` can be `epa`, `giorgi` or `custom_box`. +defined by setting `domain_type: auto-region:xxxxx`, where `xxxxx` can be `epa`, `giorgi` or `box`. `Giorgi` regions and a rough, rectangular approximation to `EPA` regions have already been hardcoded into MELODIES-MONET. In the case of `EPA` regions, be aware that the approximation is quite rough to force it into a rectangular lonlat box, and although it is probably sufficient for plotting maps, it can lead to errors if used for anything else. -If `auto-region:custom_box` is selected a lonlat box in the form of `bounds: [minlat, minlon, maxlat, maxlon]` needs to be provided in `domain_info` (see example below). -`auto-region:custom_box` has, however, some limitations: `minlon` and `maxlon` need to be in the range of `[-180, 180]`, and the box cannot cross the antimeridian. +If `auto-region:box` is selected a lonlat box in the form of `bounds: [minlat, minlon, maxlat, maxlon]` needs to be provided in `domain_info` (see example below). +`auto-region:box` has, however, some limitations: `minlon` and `maxlon` need to be in the range of `[-180, 180]`, and the box cannot cross the antimeridian. A third, and more sofisticated option, consists in utilizing the optional dependency `regionmask `__. -This is selected by defining `domain_type: custom:xxxxx`, where `xxxxx` can be `auto_polygon`, `defined_region` or `custom_file`. +This is selected by defining `domain_type: custom:xxxxx`, where `xxxxx` can be `polygon`, `region` or `file`. All of these options require extra data provided in a `domain_info` keyword in the YAML file. This option includes a multiplicity of capabilities: -* If `auto_polygon` is selected, the vertices of one or more arbitrary polygons need to be provided (anti clockwise). +* If `polygon` is selected, the vertices of one or more arbitrary polygons need to be provided (anti clockwise). Currently no holes inside the polygon are supported. -* If `defined_region` is selected, any defined region supported by `regionmask` can be provided in `domain_info`. -* If `custom_file` is provided, the path to a shapefile/geojson file has to be provided. There is no need to decompress `.zip` shapefiles. Alternatively, the download URL can be provided, and the code will download the file automatically. Be aware that if a file with the same name is already in the working directory, it will be silently overwritten. +* If `region` is selected, any defined region supported by `regionmask` can be provided in `domain_info`. +* If `file` is provided, the path to a shapefile/geojson file has to be provided. There is no need to decompress `.zip` shapefiles. Alternatively, the download URL can be provided, and the code will download the file automatically. Be aware that if a file with the same name is already in the working directory, it will be silently overwritten. An example of the plotting part of an arbitrary plot for eact type of region is shown below: .. code-block:: yaml - domain_type: ["all", "all", "state_name", "epa_region", "auto-region:giorgi", "auto-region:custom_box", "custom:auto_polygon", "custom:auto_polygon", "custom:defined_region", "custom:custom_file", "custom:custom_file"] + domain_type: ["all", "all", "state_name", "epa_region", "auto-region:giorgi", "auto-region:box", "custom:polygon", "custom:polygon", "custom:region", "custom:file", "custom:file"] domain_name: ["CONUS", "model", "CO", "R8", "CNA", "R8box", "onepoly", "twopolys", "colorado", "denverfile", "denverurl"] domain_info: R8box: diff --git a/melodies_monet/util/region_select.py b/melodies_monet/util/region_select.py index d33ac926..2ef3bcf7 100644 --- a/melodies_monet/util/region_select.py +++ b/melodies_monet/util/region_select.py @@ -202,13 +202,13 @@ def control_custom_mask(data, domain_type, domain_info=None, **kwargs): """ if "custom" not in domain_type: raise ValueError("If regionmask is used, the domain_type should be starting with 'custom'") - if "auto_polygon" in domain_type: + if "polygon" in domain_type: masked_data = create_custom_mask(data, domain_info["mask_info"]) - elif "defined_region" in domain_type: + elif "defined-region" in domain_type: name_regiontype = domain_info["name_regiontype"] region = domain_info["region"] masked_data = create_predefined_mask(data, name_regiontype, region) - elif "custom_file" in domain_type: + elif "file" in domain_type: params = domain_info params["mask_path"] = domain_info.get("mask_path", None) params["mask_url"] = domain_info.get("mask_url", None) @@ -217,8 +217,8 @@ def control_custom_mask(data, domain_type, domain_info=None, **kwargs): masked_data = create_shapefile_mask(data, **params, **kwargs) else: raise ValueError( - "Could not identify the type of domain. Should be 'auto_polygon'," - + " 'defined_region' or 'custom_file'" + "Could not identify the type of domain. Should be 'polygon'," + + " 'defined-region' or 'file'" ) return masked_data @@ -232,13 +232,13 @@ def create_autoregion(data, domain_type, domain_name, domain_info=None): data to be masked domain_type : str type of data. Used to decide which function to apply. - If domain_type == 'auto-region:custom_box', domain_info is required. + If domain_type == 'auto-region:', domain_info is required. domain_name : str This is used as the region name, or to read the info. domain_info: None | dict[str, tuple[float, float, float, float]] if not None, dict containing the domain name and a tuple with latmin, lonmin, latmax, lonmax. Only required if domain_type - is auto-region:custom_box + is auto-region: Returns ------- xr.Dataset | pd.DataFrame @@ -249,7 +249,7 @@ def create_autoregion(data, domain_type, domain_name, domain_info=None): bounds = get_epa_region_bounds(acronym=domain_name) elif auto_region_id == "giorgi": bounds = get_giorgi_region_bounds(acronym=domain_name) - elif auto_region_id == "custom_box": + elif auto_region_id == "box": bounds = domain_info["bounds"] else: raise ValueError( @@ -308,7 +308,7 @@ def select_region(data, domain_type, domain_name, domain_info=None, **kwargs): if regionmask is None: raise ImportError( "regionmask is not installed, cannot create 'custom' type domain." - + " If your domain is a simple box, try using auto-region:custom_box." + + " If your domain is a simple box, try using auto-region:box." ) if domain_info is None: raise KeyError("If regionmask is used, domain_info must exist.") From ae78b1b84331422c1514f417e6fcc59c4e39d022 Mon Sep 17 00:00:00 2001 From: blychs Date: Wed, 5 Feb 2025 11:42:22 -0700 Subject: [PATCH 21/33] Fix bug using p.obj instead of p_region --- melodies_monet/driver.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/melodies_monet/driver.py b/melodies_monet/driver.py index a424acf7..a3045398 100644 --- a/melodies_monet/driver.py +++ b/melodies_monet/driver.py @@ -1547,9 +1547,9 @@ def plotting(self): "sat_grid_clm", "sat_swath_prof"]: # convert index to time; setup for sat_swath_clm - if 'time' not in p.obj.dims and obs_type == 'sat_swath_clm': + if 'time' not in p_region.dims and obs_type == 'sat_swath_clm': - pairdf_all = p.obj.swap_dims({'x':'time'}) + pairdf_all = p_region.swap_dims({'x':'time'}) # squash lat/lon dimensions into single dimension ## 2024-03 MEB rechecking necessity of this. #elif obs_type == 'sat_grid_clm': From 36ad24264faa9178c36a572fe184307c6eb089b7 Mon Sep 17 00:00:00 2001 From: blychs Date: Wed, 5 Feb 2025 15:31:30 -0700 Subject: [PATCH 22/33] Rename auto-region:box as custom:box --- docs/appendix/yaml.rst | 2 +- docs/users_guide/region_selection.rst | 13 +++++++------ melodies_monet/util/region_select.py | 5 ++--- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/docs/appendix/yaml.rst b/docs/appendix/yaml.rst index e0b31931..9d83c054 100644 --- a/docs/appendix/yaml.rst +++ b/docs/appendix/yaml.rst @@ -322,7 +322,7 @@ with the columns in the observation file), choose ``auto-region:epa`` or ``auto-region:giorgi``. Take into account that ``auto-region:epa`` is only a rough approximation, since it assumes perfect, rectangular lonlat boxes. If you only need a rectangular, lonlat box which does not cross the antimeridian, you can use -``auto-region:box``, which needs to be combined with the ``domain_info`` parameter and +``custom:box``, which needs to be combined with the ``domain_info`` parameter and a box of ``bounds: [minlat, minlon, maxlat, maxlon]``. See :doc:`/users_guide/region_selection` for examples. If you have ``regionmask`` installed, you can also use it for advanced region support. diff --git a/docs/users_guide/region_selection.rst b/docs/users_guide/region_selection.rst index fa2dae0a..a741dfb2 100644 --- a/docs/users_guide/region_selection.rst +++ b/docs/users_guide/region_selection.rst @@ -7,13 +7,14 @@ The easiest, and often most precise, is to have that region defined in the obser In this case, the variable/column that defines the region of interest should be selected as `domain_type` in the YAML file, and the name of the region to select should be provided as `domain_name`. -If that data are not available in the observations, another option is to provide a lonlat box, which is -defined by setting `domain_type: auto-region:xxxxx`, where `xxxxx` can be `epa`, `giorgi` or `box`. +If that data are not available in the observations, another option is to provide a lonlat box, which +can be defined by setting `domain_type: auto-region:xxxxx`, where `xxxxx` can be `epa`, `giorgi` or +as `custom:box`. `Giorgi` regions and a rough, rectangular approximation to `EPA` regions have already been hardcoded into MELODIES-MONET. In the case of `EPA` regions, be aware that the approximation is quite rough to force it into a rectangular lonlat box, and although it is probably sufficient for plotting maps, it can lead to errors if used for anything else. -If `auto-region:box` is selected a lonlat box in the form of `bounds: [minlat, minlon, maxlat, maxlon]` needs to be provided in `domain_info` (see example below). -`auto-region:box` has, however, some limitations: `minlon` and `maxlon` need to be in the range of `[-180, 180]`, and the box cannot cross the antimeridian. +If `custom:box` is selected a lonlat box in the form of `bounds: [minlat, minlon, maxlat, maxlon]` needs to be provided in `domain_info` (see example below). +`custom:box` has, however, some limitations: `minlon` and `maxlon` need to be in the range of `[-180, 180]`, and the box cannot cross the antimeridian. A third, and more sofisticated option, consists in utilizing the optional dependency `regionmask `__. This is selected by defining `domain_type: custom:xxxxx`, where `xxxxx` can be `polygon`, `region` or `file`. @@ -29,8 +30,8 @@ An example of the plotting part of an arbitrary plot for eact type of region is .. code-block:: yaml - domain_type: ["all", "all", "state_name", "epa_region", "auto-region:giorgi", "auto-region:box", "custom:polygon", "custom:polygon", "custom:region", "custom:file", "custom:file"] - domain_name: ["CONUS", "model", "CO", "R8", "CNA", "R8box", "onepoly", "twopolys", "colorado", "denverfile", "denverurl"] + domain_type: ["all", "all", "state_name", "epa_region", "auto-region:giorgi", "custom:box", "custom:polygon", "custom:polygon", "custom:region", "custom:file", "custom:file"] + domain_name: ["CONUS", "model", "CO", "R8", "CNA", "R8box", "onepoly", "twopolys", "colorado", "denverfile", "denverurl"] domain_info: R8box: bounds: [39.8, -105.30, 40.2, -105.1] diff --git a/melodies_monet/util/region_select.py b/melodies_monet/util/region_select.py index 2ef3bcf7..18fdca70 100644 --- a/melodies_monet/util/region_select.py +++ b/melodies_monet/util/region_select.py @@ -302,13 +302,12 @@ def select_region(data, domain_type, domain_name, domain_info=None, **kwargs): if domain_type == "all": return data - if domain_type.startswith("auto-region"): + if domain_type.startswith("auto-region") or (domain_type == "custom-box"): data_masked = create_autoregion(data, domain_type, domain_name, domain_info) elif domain_type.startswith("custom"): if regionmask is None: raise ImportError( - "regionmask is not installed, cannot create 'custom' type domain." - + " If your domain is a simple box, try using auto-region:box." + "regionmask is not installed, try either auto-region: options or custom:box." ) if domain_info is None: raise KeyError("If regionmask is used, domain_info must exist.") From df584e94ef1cc1826f7ab16573b20b741dc83f30 Mon Sep 17 00:00:00 2001 From: Pablo Lichtig Date: Thu, 6 Feb 2025 11:18:47 -0700 Subject: [PATCH 23/33] Update docs/users_guide/region_selection.rst Co-authored-by: Becky Schwantes <38259446+rschwant@users.noreply.github.com> --- docs/users_guide/region_selection.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/users_guide/region_selection.rst b/docs/users_guide/region_selection.rst index a741dfb2..34c3fc25 100644 --- a/docs/users_guide/region_selection.rst +++ b/docs/users_guide/region_selection.rst @@ -7,7 +7,7 @@ The easiest, and often most precise, is to have that region defined in the obser In this case, the variable/column that defines the region of interest should be selected as `domain_type` in the YAML file, and the name of the region to select should be provided as `domain_name`. -If that data are not available in the observations, another option is to provide a lonlat box, which +If the regional metadata are not available in the observations, another option is to provide a lonlat box, which can be defined by setting `domain_type: auto-region:xxxxx`, where `xxxxx` can be `epa`, `giorgi` or as `custom:box`. `Giorgi` regions and a rough, rectangular approximation to `EPA` regions have already been hardcoded into From e9fb351825f348bfb5dc521ec3b527495a7fe3c2 Mon Sep 17 00:00:00 2001 From: Pablo Lichtig Date: Thu, 6 Feb 2025 11:19:00 -0700 Subject: [PATCH 24/33] Update docs/users_guide/region_selection.rst Co-authored-by: Becky Schwantes <38259446+rschwant@users.noreply.github.com> --- docs/users_guide/region_selection.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/users_guide/region_selection.rst b/docs/users_guide/region_selection.rst index 34c3fc25..1099e917 100644 --- a/docs/users_guide/region_selection.rst +++ b/docs/users_guide/region_selection.rst @@ -8,7 +8,7 @@ In this case, the variable/column that defines the region of interest should be `domain_type` in the YAML file, and the name of the region to select should be provided as `domain_name`. If the regional metadata are not available in the observations, another option is to provide a lonlat box, which -can be defined by setting `domain_type: auto-region:xxxxx`, where `xxxxx` can be `epa`, `giorgi` or +can be defined by setting `domain_type: auto-region:xxxxx`, where `xxxxx` can be `epa` or `giorgi`, or setting as `custom:box`. `Giorgi` regions and a rough, rectangular approximation to `EPA` regions have already been hardcoded into MELODIES-MONET. From 215f5e5914c348f4620346fe3fe6cb844250fac5 Mon Sep 17 00:00:00 2001 From: blychs Date: Thu, 6 Feb 2025 11:20:52 -0700 Subject: [PATCH 25/33] Fix typo en custom:box --- melodies_monet/util/region_select.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/melodies_monet/util/region_select.py b/melodies_monet/util/region_select.py index 18fdca70..5f90adea 100644 --- a/melodies_monet/util/region_select.py +++ b/melodies_monet/util/region_select.py @@ -302,7 +302,7 @@ def select_region(data, domain_type, domain_name, domain_info=None, **kwargs): if domain_type == "all": return data - if domain_type.startswith("auto-region") or (domain_type == "custom-box"): + if domain_type.startswith("auto-region") or (domain_type == "custom:box"): data_masked = create_autoregion(data, domain_type, domain_name, domain_info) elif domain_type.startswith("custom"): if regionmask is None: From 0327987b51229b9ff67b4be205ed4a3c979a6ba0 Mon Sep 17 00:00:00 2001 From: Pablo Lichtig Date: Mon, 10 Feb 2025 09:42:13 -0700 Subject: [PATCH 26/33] Update melodies_monet/util/region_select.py correct spelling and typos Co-authored-by: Zachary Moon --- melodies_monet/util/region_select.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/melodies_monet/util/region_select.py b/melodies_monet/util/region_select.py index 5f90adea..f2ed9ef5 100644 --- a/melodies_monet/util/region_select.py +++ b/melodies_monet/util/region_select.py @@ -255,7 +255,7 @@ def create_autoregion(data, domain_type, domain_name, domain_info=None): raise ValueError( "Currently, auto-region selections whithout a domain query have only " "been implemented for Giorgi and EPA regions. You asked for " - f"{domain_type!r}. If you need more capabilities, checkout the custom:" + f"{domain_type!r}. If you need more capabilities, check out the custom: " "regions capabilities. Be aware that they require regionmask." ) if isinstance(data, pd.DataFrame): From 863bea7f733e3cb3371bea6785c0ebcad82bda9a Mon Sep 17 00:00:00 2001 From: Pablo Lichtig Date: Mon, 10 Feb 2025 09:43:15 -0700 Subject: [PATCH 27/33] Update melodies_monet/util/region_select.py Add raise_for_status() Co-authored-by: Zachary Moon --- melodies_monet/util/region_select.py | 1 + 1 file changed, 1 insertion(+) diff --git a/melodies_monet/util/region_select.py b/melodies_monet/util/region_select.py index f2ed9ef5..77d71758 100644 --- a/melodies_monet/util/region_select.py +++ b/melodies_monet/util/region_select.py @@ -37,6 +37,7 @@ def _download_with_name(url, verbose=False): Path to downloaded file """ r = requests.get(url, allow_redirects=True, timeout=10) + r.raise_for_status() fname = re.findall("filename=(.+)", r.headers.get("content-disposition"))[0] if len(fname) == 0: fname = url.rsplit("/", 1)[1] From 8900536b864854672f3e1ae3d6b1a55972bffaa4 Mon Sep 17 00:00:00 2001 From: blychs Date: Thu, 13 Feb 2025 10:39:36 -0700 Subject: [PATCH 28/33] Turn methods private --- melodies_monet/util/region_select.py | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/melodies_monet/util/region_select.py b/melodies_monet/util/region_select.py index 77d71758..bf0e2c47 100644 --- a/melodies_monet/util/region_select.py +++ b/melodies_monet/util/region_select.py @@ -14,9 +14,6 @@ try: import geopandas as gp from shapely.geometry import MultiPolygon, Polygon -except ImportError: - gp = None -try: import regionmask except ImportError: regionmask = None @@ -50,7 +47,7 @@ def _download_with_name(url, verbose=False): return fname -def create_custom_mask(data, mask_info): +def _create_custom_mask(data, mask_info): """Creates mask using regionmask. Parameters @@ -92,7 +89,7 @@ def create_custom_mask(data, mask_info): return masked_data -def create_predefined_mask(data, name_regiontype, region=None): +def _create_predefined_mask(data, name_regiontype, region=None): """Creates mask using regionmask. Parameters @@ -126,7 +123,7 @@ def create_predefined_mask(data, name_regiontype, region=None): return selected_region -def create_shapefile_mask(data, mask_path=None, mask_url=None, region_name=None, **kwargs): +def _create_shapefile_mask(data, mask_path=None, mask_url=None, region_name=None, **kwargs): """Creates mask from shapefile using regionmask and geopandas. Parameters @@ -181,7 +178,7 @@ def create_shapefile_mask(data, mask_path=None, mask_url=None, region_name=None, return selected_region -def control_custom_mask(data, domain_type, domain_info=None, **kwargs): +def _control_custom_mask(data, domain_type, domain_info=None, **kwargs): """Parses region information to return the right type of data. Parameters @@ -204,18 +201,18 @@ def control_custom_mask(data, domain_type, domain_info=None, **kwargs): if "custom" not in domain_type: raise ValueError("If regionmask is used, the domain_type should be starting with 'custom'") if "polygon" in domain_type: - masked_data = create_custom_mask(data, domain_info["mask_info"]) + masked_data = _create_custom_mask(data, domain_info["mask_info"]) elif "defined-region" in domain_type: name_regiontype = domain_info["name_regiontype"] region = domain_info["region"] - masked_data = create_predefined_mask(data, name_regiontype, region) + masked_data = _create_predefined_mask(data, name_regiontype, region) elif "file" in domain_type: params = domain_info params["mask_path"] = domain_info.get("mask_path", None) params["mask_url"] = domain_info.get("mask_url", None) params["region_name"] = domain_info.get("region_name", None) params["abbrevs"] = domain_info.get("abbrevs", "_from_name") - masked_data = create_shapefile_mask(data, **params, **kwargs) + masked_data = _create_shapefile_mask(data, **params, **kwargs) else: raise ValueError( "Could not identify the type of domain. Should be 'polygon'," @@ -312,7 +309,7 @@ def select_region(data, domain_type, domain_name, domain_info=None, **kwargs): ) if domain_info is None: raise KeyError("If regionmask is used, domain_info must exist.") - data_masked = control_custom_mask(data, domain_type, domain_info, **kwargs) + data_masked = _control_custom_mask(data, domain_type, domain_info, **kwargs) else: if isinstance(data, pd.DataFrame): data_masked = data.query(domain_type + " == " + '"' + domain_name + '"') From eeb7f37447059cab3c15195170e55c28d873f51f Mon Sep 17 00:00:00 2001 From: Pablo Lichtig Date: Thu, 13 Feb 2025 10:44:19 -0700 Subject: [PATCH 29/33] Update melodies_monet/util/region_select.py Make the content_disposition more robust if missing Co-authored-by: Zachary Moon --- melodies_monet/util/region_select.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/melodies_monet/util/region_select.py b/melodies_monet/util/region_select.py index bf0e2c47..5f27deae 100644 --- a/melodies_monet/util/region_select.py +++ b/melodies_monet/util/region_select.py @@ -35,11 +35,14 @@ def _download_with_name(url, verbose=False): """ r = requests.get(url, allow_redirects=True, timeout=10) r.raise_for_status() - fname = re.findall("filename=(.+)", r.headers.get("content-disposition"))[0] - if len(fname) == 0: - fname = url.rsplit("/", 1)[1] - if fname[0] == '"': - fname = fname[1:-1] + fname = None + content_disposition = r.headers.get("Content-Disposition") + if content_disposition: + m = re.search(r"filename=(.+?)(?:;|$)", content_disposition) + if m is not None: + fname = m.group(1).strip('"') + if fname is None: + fname = url.rsplit("/", 1)[-1] with open(fname, "wb") as f: f.write(r.content) if verbose: From 8c9a90a46735f6dfd6d96fa92a249209310d90fe Mon Sep 17 00:00:00 2001 From: Pablo Lichtig Date: Thu, 13 Feb 2025 10:45:40 -0700 Subject: [PATCH 30/33] Update melodies_monet/util/region_select.py boolean type hint to bool Co-authored-by: Zachary Moon --- melodies_monet/util/region_select.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/melodies_monet/util/region_select.py b/melodies_monet/util/region_select.py index 5f27deae..00b06930 100644 --- a/melodies_monet/util/region_select.py +++ b/melodies_monet/util/region_select.py @@ -26,7 +26,7 @@ def _download_with_name(url, verbose=False): ----------- url : str URL of file to download - verbose : boolean + verbose : bool Whether to add verbosity Returns ------- From c8d69bd6b160a4ab660a911785fc1c14923c3240 Mon Sep 17 00:00:00 2001 From: Pablo Lichtig Date: Thu, 13 Feb 2025 10:45:56 -0700 Subject: [PATCH 31/33] Update melodies_monet/util/region_select.py Add space in docs Co-authored-by: Zachary Moon --- melodies_monet/util/region_select.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/melodies_monet/util/region_select.py b/melodies_monet/util/region_select.py index 00b06930..5d4b2bf7 100644 --- a/melodies_monet/util/region_select.py +++ b/melodies_monet/util/region_select.py @@ -153,8 +153,8 @@ def _create_shapefile_mask(data, mask_path=None, mask_url=None, region_name=None if mask_url is not None and mask_path is not None: warnings.warn( - "mask_url and mask_path provided. Only one can be used." - + "Selecting mask_path and discarding URL." + "mask_url and mask_path provided. Only one can be used. " + "Selecting mask_path and discarding URL." ) if mask_path is not None: From 2062485314820f18276a6ee4d913e7ecc0be6ebe Mon Sep 17 00:00:00 2001 From: blychs Date: Thu, 13 Feb 2025 11:17:27 -0700 Subject: [PATCH 32/33] Change location of error message and make function public --- melodies_monet/util/region_select.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/melodies_monet/util/region_select.py b/melodies_monet/util/region_select.py index 5d4b2bf7..d945d6f5 100644 --- a/melodies_monet/util/region_select.py +++ b/melodies_monet/util/region_select.py @@ -181,7 +181,7 @@ def _create_shapefile_mask(data, mask_path=None, mask_url=None, region_name=None return selected_region -def _control_custom_mask(data, domain_type, domain_info=None, **kwargs): +def control_custom_mask(data, domain_type, domain_info=None, **kwargs): """Parses region information to return the right type of data. Parameters @@ -201,6 +201,13 @@ def _control_custom_mask(data, domain_type, domain_info=None, **kwargs): xr.Dataset masked Dataset """ + if regionmask is None: + raise ImportError( + "regionmask is not installed, try alternative functions." + + " create_autoregion can probably do the trick." + ) + if domain_info is None: + raise KeyError("If regionmask is used, domain_info must exist.") if "custom" not in domain_type: raise ValueError("If regionmask is used, the domain_type should be starting with 'custom'") if "polygon" in domain_type: @@ -306,13 +313,7 @@ def select_region(data, domain_type, domain_name, domain_info=None, **kwargs): if domain_type.startswith("auto-region") or (domain_type == "custom:box"): data_masked = create_autoregion(data, domain_type, domain_name, domain_info) elif domain_type.startswith("custom"): - if regionmask is None: - raise ImportError( - "regionmask is not installed, try either auto-region: options or custom:box." - ) - if domain_info is None: - raise KeyError("If regionmask is used, domain_info must exist.") - data_masked = _control_custom_mask(data, domain_type, domain_info, **kwargs) + data_masked = control_custom_mask(data, domain_type, domain_info, **kwargs) else: if isinstance(data, pd.DataFrame): data_masked = data.query(domain_type + " == " + '"' + domain_name + '"') From 8a2c91c75f5fc4743c5aee3a331a1db64d7c24e6 Mon Sep 17 00:00:00 2001 From: blychs Date: Thu, 13 Feb 2025 11:34:48 -0700 Subject: [PATCH 33/33] Update license header --- melodies_monet/util/region_select.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/melodies_monet/util/region_select.py b/melodies_monet/util/region_select.py index d945d6f5..a32afe25 100644 --- a/melodies_monet/util/region_select.py +++ b/melodies_monet/util/region_select.py @@ -1,4 +1,6 @@ # SPDX-License-Identifier: Apache-2.0 +# + """ Masking arbitrary regions with regionmask """