Skip to content

Commit 01f8c03

Browse files
committed
Simplify/reduce arguments needed to load IBTrACS
1 parent 72a6d7f commit 01f8c03

File tree

3 files changed

+56
-77
lines changed

3 files changed

+56
-77
lines changed

huracanpy/_data/_load.py

+8-50
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,7 @@ def load(
2525
variable_names=None,
2626
rename=dict(),
2727
baselon=None,
28-
ibtracs_online=False,
2928
ibtracs_subset="wmo",
30-
ibtracs_clean=True,
3129
tempest_extremes_unstructured=False,
3230
tempest_extremes_header_str="start",
3331
track_calendar=None,
@@ -37,13 +35,15 @@ def load(
3735
3836
The optional parameters for different sources of tracks (currently **IBTrACS**,
3937
**TRACK** and **TempestExtremes**) are named {source}_{parameter} (in lower case),
40-
e.g. "ibtracs_online".
38+
e.g. "ibtracs_subset".
4139
4240
Parameters
4341
----------
4442
filename : str, optional
4543
The file to be loaded. If `source="ibtracs"`, this is not needed as the data is
46-
either included in huracanpy or downloaded when called
44+
either included in huracanpy or downloaded when called. If the filename is
45+
provided for an online IBTrACS subset, then the raw downloaded data will be
46+
saved there.
4747
source : str, optional
4848
If the file is not a CSV or NetCDF (identified by the file extension) then the
4949
source needs to be specified to decide how to load the data
@@ -76,15 +76,12 @@ def load(
7676
baselon : scalar, optional
7777
Force the loaded longitudes into the range (baselon, baselon + 360). e.g.
7878
(0, 360) or (-180, 180)
79-
ibtracs_online : bool, default=False
80-
* **False**: Use a small subset of the IBTrACS data included in this package
81-
* **True**: Download the IBTrACS data
82-
ibtracs_subset : str, default="ALL"
79+
ibtracs_subset : str, default="wmo"
8380
IBTrACS subset. When loading offline data it is one of
8481
85-
* **WMO**: Data with the wmo_* variables. The data as reported by the WMO agency
82+
* **wmo**: Data with the wmo_* variables. The data as reported by the WMO agency
8683
responsible for each basin, so methods are not consistent across basins
87-
* **USA** or **JTWC**: Data with the usa_* variables. The data as recorded by
84+
* **usa** or **JTWC**: Data with the usa_* variables. The data as recorded by
8885
the USA/Joint Typhoon Warning Centre. Methods are consistent across basins,
8986
but may not be complete.
9087
@@ -98,10 +95,6 @@ def load(
9895
* **since1980**: Entire IBTrACS database since 1980 (advent of satellite era,
9996
considered reliable from then on)
10097
101-
ibtracs_clean : bool, default=True
102-
If downloading IBTrACS data, this parameter says whether to delete the
103-
downloaded file after loading it into memory.
104-
10598
tempest_extremes_unstructured : bool, default=False,
10699
By default the first two columns in TempestExtremes files are the i, j indices
107100
of the closest gridpoint, but for unstructured grids it is a single lookup index
@@ -169,42 +162,7 @@ def load(
169162
tempest_extremes_header_str,
170163
)
171164
elif source.lower() == "ibtracs":
172-
if ibtracs_online:
173-
if filename is None:
174-
filename = "ibtracs.csv"
175-
176-
with ibtracs.online(ibtracs_subset, filename, ibtracs_clean) as f:
177-
# Put IBTrACS specific arguments to read_csv second, so it
178-
# overwrites any arguments passed
179-
kwargs = {
180-
**kwargs,
181-
**dict(
182-
header=0,
183-
skiprows=[1],
184-
converters={
185-
"SID": str,
186-
"SEASON": int,
187-
"BASIN": str,
188-
"SUBBASIN": str,
189-
"LON": float,
190-
"LAT": float,
191-
},
192-
),
193-
}
194-
return load(
195-
filename=f,
196-
source="csv",
197-
rename=rename,
198-
baselon=baselon,
199-
**kwargs,
200-
)
201-
else:
202-
return load(
203-
filename=ibtracs.offline(ibtracs_subset),
204-
rename=rename,
205-
baselon=baselon,
206-
**kwargs,
207-
)
165+
data = ibtracs.load(ibtracs_subset, filename, **kwargs)
208166
else:
209167
raise ValueError(f"Source {source} unsupported or misspelled")
210168

huracanpy/_data/ibtracs.py

+47-26
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,44 @@
11
"""Module for functions related to the ibtracs database"""
22

33
from urllib.request import urlretrieve
4-
import os
54
import warnings
65
import pathlib
7-
from contextlib import contextmanager
6+
7+
from . import _csv
88

99
here = pathlib.Path(__file__).parent
1010
ibdata_dir = here / "_ibtracs_files/"
1111

1212
wmo_file = str(ibdata_dir / "wmo.csv")
1313
usa_file = str(ibdata_dir / "usa.csv")
1414

15-
16-
@contextmanager
17-
def online(subset, filename="ibtracs.csv", clean=True):
15+
online_default_kwargs = (
16+
dict(
17+
header=0,
18+
skiprows=[1],
19+
converters={
20+
"SID": str,
21+
"SEASON": int,
22+
"BASIN": str,
23+
"SUBBASIN": str,
24+
"LON": float,
25+
"LAT": float,
26+
},
27+
),
28+
)
29+
30+
31+
def load(subset, filename, **kwargs):
32+
if subset.lower() in ["wmo", "usa"]:
33+
return offline(subset)
34+
else:
35+
return online(subset, filename=filename, **kwargs)
36+
37+
38+
def online(subset, filename=None, **kwargs):
1839
"""
19-
Downloads an load into the current workspace the specified ibtracs subset from the IBTrACS archive online.
40+
Downloads and load into the current workspace the specified ibtracs subset from the
41+
IBTrACS archive online.
2042
2143
Parameters
2244
----------
@@ -26,35 +48,34 @@ def online(subset, filename="ibtracs.csv", clean=True):
2648
* ALL: Entire IBTrACS database
2749
* Specific basins: EP, NA, NI, SA, SI, SP, WP
2850
* last3years: self-explanatory
29-
* since1980: Entire IBTrACS database since 1980 (advent of satellite era, considered reliable from then on)
30-
31-
filename : str
32-
(temporary) file to which to save the data
33-
The default is "tmp/ibtracs_ACTIVE.csv".
51+
* since1980: Entire IBTrACS database since 1980 (advent of satellite era,
52+
considered reliable from then on)
3453
35-
clean : bool
36-
If True (default), remove the temporary file after loading the data.
54+
filename : str, optional
55+
file to which to save the raw data. None to use a temporary file. Default is
56+
None
3757
3858
Returns
3959
-------
40-
ib : the IBTrACS subset requested
41-
60+
xarray.DataArray
61+
the IBTrACS subset requested
4262
"""
43-
# TODO: Make it so that the user does not need to specify the filename
63+
# Put IBTrACS specific arguments to read_csv second, so it
64+
# overwrites any arguments passed
65+
kwargs = {**kwargs, **online_default_kwargs}
66+
4467
url = (
45-
"https://www.ncei.noaa.gov/data/international-best-track-archive-for-climate-stewardship-ibtracs/v04r01/access/csv/ibtracs."
46-
+ subset
47-
+ ".list.v04r01.csv"
68+
"https://www.ncei.noaa.gov/data/"
69+
"international-best-track-archive-for-climate-stewardship-ibtracs/"
70+
f"v04r01/access/csv/ibtracs.{subset}.list.v04r01.csv"
4871
)
4972

73+
# filename=None downloads the data to a temporary file
5074
# Ruff (Flake8 bandit) complains that this url isn't checked, but it explicitly has
5175
# "https:/" at the start anyway
52-
urlretrieve(url, filename) # noqa: S310
53-
54-
yield filename
76+
filename, _ = urlretrieve(url, filename) # noqa: S310
5577

56-
if clean:
57-
os.remove(filename) # Somehow, this is slower than the rest ofthe function (??)
78+
return _csv.load(filename, **kwargs)
5879

5980

6081
def offline(subset="wmo"):
@@ -103,9 +124,9 @@ def offline(subset="wmo"):
103124
which means in particular that wind speeds are in knots and averaged over different time periods.\n\
104125
For more information, see the IBTrACS column documentation at https://www.ncei.noaa.gov/sites/default/files/2021-07/IBTrACS_v04_column_documentation.pdf"
105126
)
106-
return wmo_file
127+
return _csv.load(wmo_file)
107128
if subset.lower() in ["usa", "jtwc"]:
108-
return usa_file
129+
return _csv.load(usa_file)
109130

110131

111132
# TODOS:

scripts/prepare_ibtracs_offline.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99

1010
def prepare_offline(wmo=True, usa=True):
11-
ib = ibtracs.online("since1980", "tmp/ibtracs.csv")
11+
ib = huracanpy.load(source="ibtracs", ibtracs_subset="since1980")
1212

1313
# Remove season with tracks that are still provisional
1414
first_season_provi = ib.where(

0 commit comments

Comments
 (0)