From ff6ad97c5c404dac271150e01dd054b5276ad522 Mon Sep 17 00:00:00 2001 From: Brian Blaylock Date: Fri, 29 Dec 2023 07:04:12 -0800 Subject: [PATCH 1/4] fix some docstrings formatting --- herbie/core.py | 39 ++++++++++++++++----------------------- 1 file changed, 16 insertions(+), 23 deletions(-) diff --git a/herbie/core.py b/herbie/core.py index fa12117d..ac2c6aba 100644 --- a/herbie/core.py +++ b/herbie/core.py @@ -210,9 +210,7 @@ def __init__( verbose=config["default"].get("verbose", True), **kwargs, ): - """ - Specify model output and find GRIB2 file at one of the sources. - """ + """Specify model output and find GRIB2 file at one of the sources.""" self.fxx = fxx if isinstance(self.fxx, (str, pd.Timedelta)): @@ -291,7 +289,7 @@ def __init__( # ANSI colors added for style points if any([self.grib is not None, self.idx is not None]): print( - f"✅ Found", + "✅ Found", f"┊ model={self.model}", f"┊ {ANSI.italic}product={self.product}{ANSI.reset}", f"┊ {ANSI.green}{self.date:%Y-%b-%d %H:%M UTC}{ANSI.bright_green} F{self.fxx:02d}{ANSI.reset}", @@ -300,14 +298,14 @@ def __init__( ) else: print( - f"💔 Did not find", + "💔 Did not find", f"┊ model={self.model}", f"┊ {ANSI.italic}product={self.product}{ANSI.reset}", f"┊ {ANSI.green}{self.date:%Y-%b-%d %H:%M UTC}{ANSI.bright_green} F{self.fxx:02d}{ANSI.reset}", ) def __repr__(self): - """Representation in Notebook""" + """Representation in Notebook.""" msg = ( f"{ANSI.herbie} {self.model.upper()} model", f"{ANSI.italic}{self.product}{ANSI.reset} product initialized", @@ -323,7 +321,7 @@ def __str__(self): return " ".join(msg) def tell_me_everything(self): - """Print all the attributes of the Herbie object""" + """Print all the attributes of the Herbie object.""" msg = [] for i in dir(self): if isinstance(getattr(self, i), (int, str, dict)): @@ -333,12 +331,11 @@ def tell_me_everything(self): print(msg) def __logo__(self): - """For Fun, show the Herbie Logo""" + """For Fun, show the Herbie Logo.""" print(ANSI.ascii) def _validate(self): - """Validate the Herbie class input arguments""" - + """Validate the Herbie class input arguments.""" # Accept model alias if self.model.lower() == "alaska": self.model = "hrrrak" @@ -423,20 +420,19 @@ def _check_idx(self, url, verbose=False): if verbose: print( - f"⚠ Herbie didn't find any inventory files that", + "⚠ Herbie didn't find any inventory files that", f"exists from {self.IDX_SUFFIX}", ) return False, None def find_grib(self): - """Find a GRIB file from the archive sources + """Find a GRIB file from the archive sources. Returns ------- 1) The URL or pathlib.Path to the GRIB2 files that exists 2) The source of the GRIB2 file """ - # But first, check if the GRIB2 file exists locally. local_grib = self.get_localFilePath() if local_grib.exists() and not self.overwrite: @@ -476,7 +472,7 @@ def find_grib(self): return [None, None] def find_idx(self): - """Find an index file for the GRIB file""" + """Find an index file for the GRIB file.""" # If priority list is set, we want to search SOURCES in that # priority order. If priority is None, then search all SOURCES # in the order given by the model template file. @@ -525,7 +521,7 @@ def get_localFileName(self): return self.LOCALFILE def get_localFilePath(self, searchString=None): - """Get full path to the local file""" + """Get full path to the local file.""" # Predict the localFileName from the first model template SOURCE. localFilePath = ( @@ -590,7 +586,7 @@ def get_localFilePath(self, searchString=None): @functools.cached_property def index_as_dataframe(self): - """Read and cache the full index file""" + """Read and cache the full index file.""" if self.grib_source == "local" and wgrib2: # Generate IDX inventory with wgrib2 @@ -867,9 +863,7 @@ def _reporthook(a, b, c): ) def subset(searchString, outFile): - """ - Download a subset specified by the regex searchString - """ + """Download a subset specified by the regex searchString.""" # TODO: Maybe optimize downloading multiple subsets with MultiThreading # TODO An alternative to downloadling subset with curl is @@ -920,7 +914,7 @@ def subset(searchString, outFile): else: # ...all other messages are appended to the subset file. curl = f'''curl -s --range {range} "{grib_source}" >> "{outFile}"''' - + if verbose: print(curl) os.system(curl) @@ -1024,7 +1018,7 @@ def xarray( **download_kwargs, ): """ - Open GRIB2 data as xarray DataSet + Open GRIB2 data as xarray DataSet. Parameters ---------- @@ -1034,7 +1028,6 @@ def xarray( If True, grib file will be removed ONLY IF it didn't exist before we downloaded it. """ - download_kwargs = {**dict(overwrite=False), **download_kwargs} local_file = self.get_localFilePath(searchString=searchString) @@ -1151,7 +1144,7 @@ def xarray( # Shortcut Methods below def terrain(self, water_masked=True): - """Return model terrain as an xarray.Dataset""" + """Return model terrain as an xarray.Dataset.""" ds = self.xarray(":(?:HGT|LAND):surface") if water_masked: ds["orog"] = ds.orog.where(ds.lsm > 0) From ea919f6b21f2be3b0bd5284612b92cef094f81b5 Mon Sep 17 00:00:00 2001 From: Brian Blaylock Date: Fri, 29 Dec 2023 08:01:58 -0800 Subject: [PATCH 2/4] some cleanup --- environment-dev.yml | 6 +++--- herbie/help.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/environment-dev.yml b/environment-dev.yml index 176fc194..065d3966 100644 --- a/environment-dev.yml +++ b/environment-dev.yml @@ -14,7 +14,7 @@ dependencies: # =========== # Handy Tools # =========== - # - cartopy>=0.22 + - cartopy>=0.22 - cfgrib>=0.9.10.4 - eccodes>=2.31 - geopandas @@ -64,8 +64,8 @@ dependencies: - myst-parser - linkify-it-py - - pip: - - cartopy>=0.22 + #- pip: + #- cartopy>=0.22 # Herbie: Development version from GitHub #- git+https://github.com/blaylockbk/Herbie.git diff --git a/herbie/help.py b/herbie/help.py index b6e99419..9575d148 100644 --- a/herbie/help.py +++ b/herbie/help.py @@ -23,7 +23,7 @@ def _searchString_help(kind="wgrib2"): """ if kind == "wgrib2": - msg = """ + msg = r""" Use regular expression to search for lines in the index file. Here are some examples you can use for the wgrib2-style `searchString` From 06c3febf9fa4b03d79663bf2215011328475ed53 Mon Sep 17 00:00:00 2001 From: Brian Blaylock Date: Fri, 29 Dec 2023 08:02:10 -0800 Subject: [PATCH 3/4] fixed bug in curl range for submessages --- herbie/core.py | 9 +++++++++ tests/test_rap.py | 25 ++++++++++++++++++++++++- 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/herbie/core.py b/herbie/core.py index ac2c6aba..3b3e518b 100644 --- a/herbie/core.py +++ b/herbie/core.py @@ -904,10 +904,19 @@ def subset(searchString, outFile): print( f" {row.grib_message:<3g} {ANSI.orange}{row.search_this}{ANSI.reset}" ) + range = f"{curl_group.start_byte.min():.0f}-{curl_group.end_byte.max():.0f}".replace( "nan", "" ) + if curl_group.end_byte.max() - curl_group.start_byte.min() < 0: + # The byte range for GRIB submessages (like in the + # RAP model's UGRD/VGRD) need to be handled differently. + # See https://github.com/blaylockbk/Herbie/issues/259 + if verbose: + print(f" ERROR: Invalid cURL range {range}; Skip message.") + continue + if i == 1: # If we are working on the first item, overwrite the existing file... curl = f'''curl -s --range {range} "{grib_source}" > "{outFile}"''' diff --git a/tests/test_rap.py b/tests/test_rap.py index 88065b2e..2f7b0cf9 100644 --- a/tests/test_rap.py +++ b/tests/test_rap.py @@ -10,7 +10,7 @@ from herbie import Herbie from tests.util import is_time_between -today = pd.to_datetime("today").floor("1D") +today = pd.to_datetime("today").floor("1D")-pd.to_timedelta('1D') save_dir = "$TMPDIR/Herbie-Tests/" @@ -69,3 +69,26 @@ def test_rap_ncei(): save_dir=save_dir, ) assert H.grib is not None + + +# =========================== +# Check Downloaded File Sizes +# =========================== + + +def test_rap_file_size_subset1(): + """Test that the U/V wind components are downloaded correctly. + + This test is important for the RAP model which uses + GRIB submessages for storing UGRD and VGRD + + See https://github.com/blaylockbk/Herbie/issues/259 + """ + var = ":.GRD:10 m" + H = Herbie('2023-12-01', model="rap", save_dir=save_dir, overwrite=True) + H.download(var) + + idx = H.inventory(var) + stated_size = ((idx.end_byte + 1) - idx.start_byte).sum() + + assert stated_size == H.get_localFilePath(var).stat().st_size From 8442e77178611d65bc359121def99399ad584c9b Mon Sep 17 00:00:00 2001 From: Brian Blaylock Date: Fri, 29 Dec 2023 08:08:30 -0800 Subject: [PATCH 4/4] apply ruff format --- tests/test_rap.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_rap.py b/tests/test_rap.py index 2f7b0cf9..65a43802 100644 --- a/tests/test_rap.py +++ b/tests/test_rap.py @@ -10,7 +10,7 @@ from herbie import Herbie from tests.util import is_time_between -today = pd.to_datetime("today").floor("1D")-pd.to_timedelta('1D') +today = pd.to_datetime("today").floor("1D") - pd.to_timedelta("1D") save_dir = "$TMPDIR/Herbie-Tests/" @@ -85,7 +85,7 @@ def test_rap_file_size_subset1(): See https://github.com/blaylockbk/Herbie/issues/259 """ var = ":.GRD:10 m" - H = Herbie('2023-12-01', model="rap", save_dir=save_dir, overwrite=True) + H = Herbie("2023-12-01", model="rap", save_dir=save_dir, overwrite=True) H.download(var) idx = H.inventory(var)