Skip to content

Commit

Permalink
Merge pull request #260 from blaylockbk/259-2023123-unable-to-subset-…
Browse files Browse the repository at this point in the history
…by-multiple-items-in-searchstring-with-herbiexarray-but-works-for-herbieinventory

Bug Fix: Skip downloading invalid cURL range, such was in the RAP model wind messages.
  • Loading branch information
blaylockbk authored Dec 29, 2023
2 parents 06506b6 + 8442e77 commit 2260d3e
Show file tree
Hide file tree
Showing 4 changed files with 53 additions and 28 deletions.
6 changes: 3 additions & 3 deletions environment-dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ dependencies:
# ===========
# Handy Tools
# ===========
# - cartopy>=0.22
- cartopy>=0.22
- cfgrib>=0.9.10.4
- eccodes>=2.31
- geopandas
Expand Down Expand Up @@ -64,8 +64,8 @@ dependencies:
- myst-parser
- linkify-it-py

- pip:
- cartopy>=0.22
#- pip:
#- cartopy>=0.22
# Herbie: Development version from GitHub
#- git+https://github.com/blaylockbk/Herbie.git

Expand Down
48 changes: 25 additions & 23 deletions herbie/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,9 +210,7 @@ def __init__(
verbose=config["default"].get("verbose", True),
**kwargs,
):
"""
Specify model output and find GRIB2 file at one of the sources.
"""
"""Specify model output and find GRIB2 file at one of the sources."""
self.fxx = fxx

if isinstance(self.fxx, (str, pd.Timedelta)):
Expand Down Expand Up @@ -291,7 +289,7 @@ def __init__(
# ANSI colors added for style points
if any([self.grib is not None, self.idx is not None]):
print(
f"✅ Found",
"✅ Found",
f"┊ model={self.model}",
f"┊ {ANSI.italic}product={self.product}{ANSI.reset}",
f"┊ {ANSI.green}{self.date:%Y-%b-%d %H:%M UTC}{ANSI.bright_green} F{self.fxx:02d}{ANSI.reset}",
Expand All @@ -300,14 +298,14 @@ def __init__(
)
else:
print(
f"💔 Did not find",
"💔 Did not find",
f"┊ model={self.model}",
f"┊ {ANSI.italic}product={self.product}{ANSI.reset}",
f"┊ {ANSI.green}{self.date:%Y-%b-%d %H:%M UTC}{ANSI.bright_green} F{self.fxx:02d}{ANSI.reset}",
)

def __repr__(self):
"""Representation in Notebook"""
"""Representation in Notebook."""
msg = (
f"{ANSI.herbie} {self.model.upper()} model",
f"{ANSI.italic}{self.product}{ANSI.reset} product initialized",
Expand All @@ -323,7 +321,7 @@ def __str__(self):
return " ".join(msg)

def tell_me_everything(self):
"""Print all the attributes of the Herbie object"""
"""Print all the attributes of the Herbie object."""
msg = []
for i in dir(self):
if isinstance(getattr(self, i), (int, str, dict)):
Expand All @@ -333,12 +331,11 @@ def tell_me_everything(self):
print(msg)

def __logo__(self):
"""For Fun, show the Herbie Logo"""
"""For Fun, show the Herbie Logo."""
print(ANSI.ascii)

def _validate(self):
"""Validate the Herbie class input arguments"""

"""Validate the Herbie class input arguments."""
# Accept model alias
if self.model.lower() == "alaska":
self.model = "hrrrak"
Expand Down Expand Up @@ -423,20 +420,19 @@ def _check_idx(self, url, verbose=False):

if verbose:
print(
f"⚠ Herbie didn't find any inventory files that",
"⚠ Herbie didn't find any inventory files that",
f"exists from {self.IDX_SUFFIX}",
)
return False, None

def find_grib(self):
"""Find a GRIB file from the archive sources
"""Find a GRIB file from the archive sources.
Returns
-------
1) The URL or pathlib.Path to the GRIB2 files that exists
2) The source of the GRIB2 file
"""

# But first, check if the GRIB2 file exists locally.
local_grib = self.get_localFilePath()
if local_grib.exists() and not self.overwrite:
Expand Down Expand Up @@ -476,7 +472,7 @@ def find_grib(self):
return [None, None]

def find_idx(self):
"""Find an index file for the GRIB file"""
"""Find an index file for the GRIB file."""
# If priority list is set, we want to search SOURCES in that
# priority order. If priority is None, then search all SOURCES
# in the order given by the model template file.
Expand Down Expand Up @@ -525,7 +521,7 @@ def get_localFileName(self):
return self.LOCALFILE

def get_localFilePath(self, searchString=None):
"""Get full path to the local file"""
"""Get full path to the local file."""

# Predict the localFileName from the first model template SOURCE.
localFilePath = (
Expand Down Expand Up @@ -590,7 +586,7 @@ def get_localFilePath(self, searchString=None):

@functools.cached_property
def index_as_dataframe(self):
"""Read and cache the full index file"""
"""Read and cache the full index file."""

if self.grib_source == "local" and wgrib2:
# Generate IDX inventory with wgrib2
Expand Down Expand Up @@ -867,9 +863,7 @@ def _reporthook(a, b, c):
)

def subset(searchString, outFile):
"""
Download a subset specified by the regex searchString
"""
"""Download a subset specified by the regex searchString."""
# TODO: Maybe optimize downloading multiple subsets with MultiThreading

# TODO An alternative to downloadling subset with curl is
Expand Down Expand Up @@ -910,17 +904,26 @@ def subset(searchString, outFile):
print(
f" {row.grib_message:<3g} {ANSI.orange}{row.search_this}{ANSI.reset}"
)

range = f"{curl_group.start_byte.min():.0f}-{curl_group.end_byte.max():.0f}".replace(
"nan", ""
)

if curl_group.end_byte.max() - curl_group.start_byte.min() < 0:
# The byte range for GRIB submessages (like in the
# RAP model's UGRD/VGRD) need to be handled differently.
# See https://github.com/blaylockbk/Herbie/issues/259
if verbose:
print(f" ERROR: Invalid cURL range {range}; Skip message.")
continue

if i == 1:
# If we are working on the first item, overwrite the existing file...
curl = f'''curl -s --range {range} "{grib_source}" > "{outFile}"'''
else:
# ...all other messages are appended to the subset file.
curl = f'''curl -s --range {range} "{grib_source}" >> "{outFile}"'''

if verbose:
print(curl)
os.system(curl)
Expand Down Expand Up @@ -1024,7 +1027,7 @@ def xarray(
**download_kwargs,
):
"""
Open GRIB2 data as xarray DataSet
Open GRIB2 data as xarray DataSet.
Parameters
----------
Expand All @@ -1034,7 +1037,6 @@ def xarray(
If True, grib file will be removed ONLY IF it didn't exist
before we downloaded it.
"""

download_kwargs = {**dict(overwrite=False), **download_kwargs}

local_file = self.get_localFilePath(searchString=searchString)
Expand Down Expand Up @@ -1151,7 +1153,7 @@ def xarray(

# Shortcut Methods below
def terrain(self, water_masked=True):
"""Return model terrain as an xarray.Dataset"""
"""Return model terrain as an xarray.Dataset."""
ds = self.xarray(":(?:HGT|LAND):surface")
if water_masked:
ds["orog"] = ds.orog.where(ds.lsm > 0)
Expand Down
2 changes: 1 addition & 1 deletion herbie/help.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def _searchString_help(kind="wgrib2"):
"""

if kind == "wgrib2":
msg = """
msg = r"""
Use regular expression to search for lines in the index file.
Here are some examples you can use for the wgrib2-style `searchString`
Expand Down
25 changes: 24 additions & 1 deletion tests/test_rap.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from herbie import Herbie
from tests.util import is_time_between

today = pd.to_datetime("today").floor("1D")
today = pd.to_datetime("today").floor("1D") - pd.to_timedelta("1D")
save_dir = "$TMPDIR/Herbie-Tests/"


Expand Down Expand Up @@ -69,3 +69,26 @@ def test_rap_ncei():
save_dir=save_dir,
)
assert H.grib is not None


# ===========================
# Check Downloaded File Sizes
# ===========================


def test_rap_file_size_subset1():
"""Test that the U/V wind components are downloaded correctly.
This test is important for the RAP model which uses
GRIB submessages for storing UGRD and VGRD
See https://github.com/blaylockbk/Herbie/issues/259
"""
var = ":.GRD:10 m"
H = Herbie("2023-12-01", model="rap", save_dir=save_dir, overwrite=True)
H.download(var)

idx = H.inventory(var)
stated_size = ((idx.end_byte + 1) - idx.start_byte).sum()

assert stated_size == H.get_localFilePath(var).stat().st_size

0 comments on commit 2260d3e

Please sign in to comment.