Skip to content

Commit

Permalink
Merge pull request #66 from claytharrison/fix_swath_indicator_nans
Browse files Browse the repository at this point in the history
Fix dtype nan irregularities in EPS/BUFR readers
  • Loading branch information
sebhahn authored Jul 18, 2024
2 parents e5c02d4 + 37f912c commit 50489c3
Show file tree
Hide file tree
Showing 4 changed files with 51 additions and 15 deletions.
2 changes: 1 addition & 1 deletion src/ascat/read_native/bufr.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,7 @@ def conv_bufrl1b_generic(data, metadata):
data.pop(f[2]))).T.astype(new_dtype)
if nan_val is not None:
valid = data[new_name] != nan_val
data[new_name][~valid] = float32_nan
data[new_name][~valid] = nan_val_dict[new_dtype]
data[new_name][valid] *= s

if data['node_num'].max() == 82:
Expand Down
37 changes: 24 additions & 13 deletions src/ascat/read_native/eps_native.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@

from ascat.utils import get_toi_subset, get_roi_subset
from ascat.utils import get_bit, set_bit
from ascat.utils import dtype_to_nan

short_cds_time = np.dtype([("day", ">u2"), ("time", ">u4")])
long_cds_time = np.dtype([("day", ">u2"), ("ms", ">u4"), ("mms", ">u2")])
Expand Down Expand Up @@ -793,9 +794,11 @@ def conv_epsl1bszf_generic(data, metadata, gen_fields_lut, skip_fields):
(data[new_name] > valid_range[1]))
data[new_name].set_fill_value(nan_val)
else:
invalid = data[var_name] == dtype_to_nan[np.dtype(data[var_name].dtype)]
data[new_name] = np.ma.array(data.pop(var_name).astype(new_dtype))
data[new_name].mask = ((data[new_name] < valid_range[0]) |
(data[new_name] > valid_range[1]))
(data[new_name] > valid_range[1]) |
invalid)
data[new_name].set_fill_value(nan_val)

return data
Expand All @@ -817,12 +820,15 @@ def conv_epsl1bszx_generic(data, metadata):
data : dict of numpy.ndarray
Converted dataset.
"""
# template - "old_var_name": ("new_name", new dtype )
gen_fields_lut = {
"inc_angle_trip": ("inc", np.float32, uint_nan),
"azi_angle_trip": ("azi", np.float32, int_nan),
"sigma0_trip": ("sig", np.float32, long_nan),
"kp": ("kp", np.float32, uint_nan),
"f_kp": ("kp_quality", np.uint8, uint8_nan)
"f_kp": ("kp_quality", np.uint8, None), # "f_kp": ("kp_quality", np.int8, uint8_nan),
"f_usable": ("f_usable", np.int8, uint8_nan),
"swath_indicator": ("swath_indicator", np.int8, uint8_nan),
}

skip_fields = ["flagfield_rf1", "f_f", "f_v", "f_oa", "f_sa", "f_tel"]
Expand All @@ -832,9 +838,11 @@ def conv_epsl1bszx_generic(data, metadata):
data.pop(var_name)

for var_name, (new_name, new_dtype, nan_val) in gen_fields_lut.items():
invalid = data[var_name] == nan_val
data[new_name] = data.pop(var_name).astype(new_dtype)
if nan_val is not None:
data[new_name][data[new_name] == nan_val] = float32_nan
new_nan_val = dtype_to_nan[np.dtype(new_dtype)]
data[new_name][invalid] = new_nan_val

data["sat_id"] = np.repeat(metadata["sat_id"], data["time"].size)

Expand Down Expand Up @@ -879,7 +887,8 @@ def conv_epsl2szx_generic(data, metadata):
"frozen_soil_probability": ("frozen_prob", np.uint8, None),
"innudation_or_wetland": ("wetland", np.uint8, None),
"topographical_complexity": ("topo", np.uint8, None),
"kp": ("kp", np.float32, uint_nan)
"kp": ("kp", np.float32, uint_nan),
"swath_indicator": ("swath_indicator", np.int8, uint8_nan)
}

skip_fields = ["flagfield_rf1", "f_f", "f_v", "f_oa", "f_sa", "f_tel"]
Expand All @@ -889,9 +898,11 @@ def conv_epsl2szx_generic(data, metadata):
data.pop(var_name)

for var_name, (new_name, new_dtype, nan_val) in gen_fields_lut.items():
invalid = data[var_name] == nan_val
data[new_name] = data.pop(var_name).astype(new_dtype)
if nan_val is not None:
data[new_name][data[new_name] == nan_val] = float32_nan
new_nan_val = dtype_to_nan[np.dtype(new_dtype)]
data[new_name][invalid] = new_nan_val

data["sat_id"] = np.repeat(metadata["sat_id"], data["time"].size)

Expand Down Expand Up @@ -1321,7 +1332,7 @@ def read_szx_fmv_11(eps_file):
data[f] = raw_data[f.upper()].flatten()[idx_nodes]

fields = [("longitude", long_nan), ("latitude", long_nan),
("swath_indicator", int8_nan)]
("swath_indicator", uint8_nan)]

for f, nan_val in fields:
data[f] = raw_data[f.upper()].flatten()
Expand All @@ -1330,7 +1341,7 @@ def read_szx_fmv_11(eps_file):

fields = [("sigma0_trip", long_nan), ("inc_angle_trip", uint_nan),
("azi_angle_trip", int_nan), ("kp", uint_nan),
("f_kp", int8_nan), ("f_usable", int8_nan), ("f_f", uint_nan),
("f_kp", uint8_nan), ("f_usable", uint8_nan), ("f_f", uint_nan),
("f_v", uint_nan), ("f_oa", uint_nan), ("f_sa", uint_nan),
("f_tel", uint_nan), ("f_land", uint_nan)]

Expand Down Expand Up @@ -1406,7 +1417,7 @@ def read_szx_fmv_12(eps_file):
data[f] = raw_data[f.upper()].flatten()[idx_nodes]

fields = [("longitude", long_nan), ("latitude", long_nan),
("swath indicator", int8_nan)]
("swath indicator", uint8_nan)]

for f, nan_val in fields:
data[f] = raw_data[f.upper()].flatten()
Expand All @@ -1415,8 +1426,8 @@ def read_szx_fmv_12(eps_file):

fields = [("sigma0_trip", long_nan), ("inc_angle_trip", uint_nan),
("azi_angle_trip", int_nan), ("kp", uint_nan),
("num_val_trip", ulong_nan), ("f_kp", int8_nan),
("f_usable", int8_nan), ("f_f", uint_nan), ("f_v", uint_nan),
("num_val_trip", ulong_nan), ("f_kp", uint8_nan),
("f_usable", uint8_nan), ("f_f", uint_nan), ("f_v", uint_nan),
("f_oa", uint_nan), ("f_sa", uint_nan), ("f_tel", uint_nan),
("f_ref", uint_nan), ("f_land", uint_nan)]

Expand Down Expand Up @@ -1546,7 +1557,7 @@ def read_szf_fmv_12(eps_file, ignore_noise_ool=False):
fields = [("longitude_full", long_nan), ("latitude_full", long_nan),
("sigma0_full", long_nan), ("inc_angle_full", uint_nan),
("azi_angle_full", int_nan), ("land_frac", uint_nan),
("flagfield_gen2", int8_nan)]
("flagfield_gen2", uint8_nan)]

for f, nan_val in fields:
data[f] = eps_file.mdr[f.upper()].flatten()
Expand Down Expand Up @@ -1622,7 +1633,7 @@ def read_smx_fmv_12(eps_file):

fields = [("longitude", long_nan, long_nan),
("latitude", long_nan, long_nan),
("swath_indicator", int8_nan, int8_nan),
("swath_indicator", uint8_nan, uint8_nan),
("soil_moisture", uint_nan, uint_nan),
("soil_moisture_error", uint_nan, uint_nan),
("sigma40", long_nan, long_nan),
Expand Down Expand Up @@ -1861,7 +1872,7 @@ def read_szx_fmv_13(eps_file):

fields = [("sigma0_trip", long_nan), ("inc_angle_trip", uint_nan),
("azi_angle_trip", int_nan), ("kp", uint_nan),
("num_val_trip", ulong_nan), ("f_kp", int8_nan),
("num_val_trip", ulong_nan), ("f_kp", uint8_nan),
("f_usable", int8_nan), ("land_frac", uint_nan)]

for f, nan_val in fields:
Expand Down
2 changes: 1 addition & 1 deletion src/ascat/read_native/hdf5.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ def read(self, generic=False, to_xarray=False):
"FORMAT_MAJOR_VERSION", "FORMAT_MINOR_VERSION"]

for f in fields:
pos = np.core.defchararray.startswith(
pos = np.char.startswith(
mdr_metadata["MPHR/MPHR_TABLE"]["EntryName"], f.encode())
var = mdr_metadata["MPHR/MPHR_TABLE"]["EntryValue"][
pos][0].decode()
Expand Down
25 changes: 25 additions & 0 deletions src/ascat/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,31 @@
import numpy as np
import xarray as xr

int8_nan = np.iinfo(np.int8).min
uint8_nan = np.iinfo(np.uint8).max
int16_nan = np.iinfo(np.int16).min
uint16_nan = np.iinfo(np.uint16).max
int32_nan = np.iinfo(np.int32).min
uint32_nan = np.iinfo(np.uint32).max
int64_nan = np.iinfo(np.int64).min
uint64_nan = np.iinfo(np.uint64).max
float32_nan = -999999.
float64_nan = -999999.

dtype_to_nan = {
np.dtype('int8'): int8_nan,
np.dtype('uint8'): uint8_nan,
np.dtype('int16'): int16_nan,
np.dtype('uint16'): uint16_nan,
np.dtype('int32'): int32_nan,
np.dtype('uint32'): uint32_nan,
np.dtype('int64'): int64_nan,
np.dtype('uint64'): uint64_nan,
np.dtype('float32'): float32_nan,
np.dtype('float64'): float64_nan,
np.dtype('<U1'): None,
np.dtype('O'): None,
}

def get_bit(a, bit_pos):
"""
Expand Down

0 comments on commit 50489c3

Please sign in to comment.