Skip to content

Commit

Permalink
FIX: improve CF and gliderdac compliance; add tests
Browse files Browse the repository at this point in the history
  • Loading branch information
jklymak committed Jul 14, 2024
1 parent fc59e43 commit 0a823d7
Show file tree
Hide file tree
Showing 65 changed files with 225 additions and 175 deletions.
44 changes: 33 additions & 11 deletions pyglider/ncprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
_log = logging.getLogger(__name__)


def extract_timeseries_profiles(inname, outdir, deploymentyaml):
def extract_timeseries_profiles(inname, outdir, deploymentyaml, force=False):
"""
Extract and save each profile from a timeseries netCDF.
Expand All @@ -29,6 +29,9 @@ def extract_timeseries_profiles(inname, outdir, deploymentyaml):
deploymentyaml : str or Path
location of deployment yaml file for the netCDF file. This should
be the same yaml file that was used to make the timeseries file.
force : bool, default False
Force an overwite even if profile netcdf already exists
"""
try:
os.mkdir(outdir)
Expand All @@ -48,7 +51,7 @@ def extract_timeseries_profiles(inname, outdir, deploymentyaml):
dss = ds.isel(time=ind)
outname = outdir + '/' + utils.get_file_id(dss) + '.nc'
_log.info('Checking %s', outname)
if not os.path.exists(outname):
if force or (not os.path.exists(outname)):
# this is the id for the whole file, not just this profile..
dss['trajectory'] = utils.get_file_id(ds).encode()
trajlen = len(utils.get_file_id(ds).encode())
Expand All @@ -73,8 +76,13 @@ def extract_timeseries_profiles(inname, outdir, deploymentyaml):
dss['v'] = profile_meta['v'].get('_FillValue', np.NaN)
dss['v'].attrs = profile_meta['v']

dss['profile_id'] = np.array(p*1.0)
dss['profile_id'] = np.int32(p)
dss['profile_id'].attrs = profile_meta['profile_id']
if '_FillValue' not in dss['profile_id'].attrs:
dss['profile_id'].attrs['_FillValue'] = -1
dss['profile_id'].attrs['valid_min'] = np.int32(dss['profile_id'].attrs['valid_min'])
dss['profile_id'].attrs['valid_max'] = np.int32(dss['profile_id'].attrs['valid_max'])

dss['profile_time'] = dss.time.mean()
dss['profile_time'].attrs = profile_meta['profile_time']
# remove units so they can be encoded later:
Expand All @@ -90,9 +98,9 @@ def extract_timeseries_profiles(inname, outdir, deploymentyaml):

dss['lat'] = dss['latitude']
dss['lon'] = dss['longitude']
dss['platform'] = np.NaN
dss['platform'] = np.int32(1)
comment = (meta['glider_model'] + ' operated by ' +
meta['institution'])
meta['institution'])
dss['platform'].attrs['comment'] = comment
dss['platform'].attrs['id'] = (
meta['glider_name'] + meta['glider_serial'])
Expand All @@ -101,6 +109,9 @@ def extract_timeseries_profiles(inname, outdir, deploymentyaml):
meta['glider_model'] + dss['platform'].attrs['id'])
dss['platform'].attrs['type'] = 'platform'
dss['platform'].attrs['wmo_id'] = meta['wmo_id']
if '_FillValue' not in dss['platform'].attrs:
dss['platform'].attrs['_FillValue'] = -1


dss['lat_uv'] = np.NaN
dss['lat_uv'].attrs = profile_meta['lat_uv']
Expand All @@ -109,16 +120,25 @@ def extract_timeseries_profiles(inname, outdir, deploymentyaml):
dss['time_uv'] = np.NaN
dss['time_uv'].attrs = profile_meta['time_uv']

dss['instrument_ctd'] = np.NaN
dss['instrument_ctd'] = np.int32(1.0)
dss['instrument_ctd'].attrs = profile_meta['instrument_ctd']
if '_FillValue' not in dss['instrument_ctd'].attrs:
dss['instrument_ctd'].attrs['_FillValue'] = -1

dss.attrs['date_modified'] = str(np.datetime64('now')) + 'Z'

# ancillary variables::
to_fill = ['temperature', 'pressure', 'conductivity',
'salinity', 'density', 'lon', 'lat', 'depth']
for name in to_fill:
dss[name].attrs['ancillary_variables'] = name + '_qc'
if True:
to_fill = ['temperature', 'pressure', 'conductivity',
'salinity', 'density', 'lon', 'lat', 'depth']
for name in to_fill:
qcname = name + '_qc'
dss[name].attrs['ancillary_variables'] = qcname
if qcname not in dss.keys():

dss[qcname] = ('time', 2 * np.ones(len(dss[name]), np.int8))
dss[qcname].attrs = utils.fill_required_qcattrs({}, name)
# 2 is "not eval"
# outname = outdir + '/' + utils.get_file_id(dss) + '.nc'
_log.info('Writing %s', outname)
timeunits = 'nanoseconds since 1970-01-01T00:00:00Z'
Expand All @@ -134,7 +154,9 @@ def extract_timeseries_profiles(inname, outdir, deploymentyaml):
'profile_time':
{'units': timeunits,
'_FillValue': -99999.0,
'dtype': 'float64'}}
'dtype': 'float64'},
}

)

# add traj_strlen using bare ntcdf to make IOOS happy
Expand Down
14 changes: 5 additions & 9 deletions pyglider/slocum.py
Original file line number Diff line number Diff line change
Expand Up @@ -716,7 +716,6 @@ def raw_to_timeseries(indir, outdir, deploymentyaml, *,
if atts != 'coordinates':
attr[atts] = ncvar[name][atts]
ds[name] = (('time'), ebd[name].values, attr)

for name in thenames:
_log.info('working on %s', name)
if 'method' in ncvar[name].keys():
Expand Down Expand Up @@ -868,8 +867,8 @@ def binary_to_timeseries(indir, cachedir, outdir, deploymentyaml, *,
# get the time:
time = data.pop(0)
ds['time'] = (('time'), time, attr)
ds['latitude'] = 0 * ds.time
ds['longitude'] = 0 * ds.time
ds['latitude'] = (('time'), np.zeros(len(time)))
ds['longitude'] = (('time'), np.zeros(len(time)))
# get the time_base data:
basedata = data.pop(0)
# slot the time_base variable into the right place in the
Expand Down Expand Up @@ -908,7 +907,7 @@ def binary_to_timeseries(indir, cachedir, outdir, deploymentyaml, *,
ValueError(f'{sensorname} not in science or eng parameter names')

# make the attributes:
ncvar[name].pop('coordinates', None)
ncvar[name]['coordinates'] = 'time'
attrs = ncvar[name]
attrs = utils.fill_required_attrs(attrs)
ds[name] = (('time'), val, attrs)
Expand All @@ -921,9 +920,6 @@ def binary_to_timeseries(indir, cachedir, outdir, deploymentyaml, *,
ds = utils.get_distance_over_ground(ds)

ds = utils.get_derived_eos_raw(ds)
ds = ds.assign_coords(longitude=ds.longitude)
ds = ds.assign_coords(latitude=ds.latitude)
ds = ds.assign_coords(depth=ds.depth)

# screen out-of-range times; these won't convert:
ds['time'] = ds.time.where((ds.time>0) & (ds.time<6.4e9), np.NaN)
Expand Down Expand Up @@ -953,8 +949,8 @@ def binary_to_timeseries(indir, cachedir, outdir, deploymentyaml, *,
_log.info('writing %s', outname)
ds.to_netcdf(outname, 'w',
encoding={'time': {'units': 'seconds since 1970-01-01T00:00:00Z',
'_FillValue': -999999,
'dtype': 'int64'}})
'_FillValue': np.NaN,
'dtype': 'float64'}})

return outname

Expand Down
21 changes: 19 additions & 2 deletions pyglider/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -398,6 +398,22 @@ def fill_required_attrs(attrs):
return attrs


def fill_required_qcattrs(attrs, varname):
required = {
"units": "1",
"flag_values": np.array([1, 2, 3, 4, 9], dtype=np.int8),
"valid_min": np.int8(1),
"valid_max": np.int8(9),
"flag_meanings": "PASS NOT_EVALUATED SUSPECT FAIL MISSING",
"standard_name": "quality_flag",
"long_name": "Initial flag for {varname}"
}
for k in required.keys():
if not (k in attrs.keys()):
attrs[k] = required[k]
return attrs


def get_file_id(ds):
"""
Make a file id for a Dataset
Expand Down Expand Up @@ -458,9 +474,10 @@ def fill_metadata(ds, metadata, sensor_data):
ds.attrs['history'] = 'CPROOF glider toolbox version: pre-tag'
for k, v in metadata.items():
ds.attrs[k] = v
ds.attrs['featureType'] = 'timeseries'
ds.attrs['featureType'] = 'trajectory'
ds.attrs['cdm_data_type'] = 'Trajectory'
ds.attrs['Conventions'] = 'CF-1.6'
ds.attrs['Conventions'] = 'CF-1.8'
ds.attrs['standard_name_vocabulary'] = 'CF STandard Name Table v72'
ds.attrs['date_created'] = str(np.datetime64('now')) + 'Z'
ds.attrs['date_issued'] = str(np.datetime64('now')) + 'Z'
ds.attrs['date_modified'] = " "
Expand Down
25 changes: 5 additions & 20 deletions tests/example-data/example-slocum/deploymentRealtime.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ metadata:
license: "This data may be redistributed and used without restriction or
warranty"
metadata_link: "https://cproof.uvic.ca"
Metadata_Conventions: CF-1.6, Unidata Dataset Discovery v1.0
Metadata_Conventions: CF-1.8, Unidata Dataset Discovery v1.0
naming_authority: "ca.uvic.cproof"
platform_type: "Slocum Glider"
processing_level: "Data provided as is with no expressed or implied
Expand All @@ -44,9 +44,9 @@ metadata:
publisher_url: http://cproof.uvic.ca
references: cproof toolbox URL
# https://www.nodc.noaa.gov/General/NODC-Archive/seanamelist.txt
sea_name: Coastal Waters of British Columbia
sea_name: Coastal Waters of Southeast Alaska and British Columbia
source: Observational data from a profiling glider.
standard_name_vocabulary: CF STandard Name Table v49
standard_name_vocabulary: CF STandard Name Table v72
summary: Manufacturer test in Saanich Inlet.
transmission_system: IRRIDIUM
wmo_id: "999999"
Expand Down Expand Up @@ -88,15 +88,13 @@ netcdf_variables:
units: seconds since 1970-01-01T00:00:00Z
axis: T
observation_type: "measured"
coordinates: time depth latitude longitude

latitude:
source: m_lat
long_name: latitude
standard_name: latitude
units: degrees_north
axis: Y
coordinates: time depth latitude longitude
comment: "Estimated between surface fixes"
observation_type: measured
platform: platform
Expand All @@ -111,7 +109,6 @@ netcdf_variables:
standard_name: longitude
units: degrees_east
axis: X
coordinates: time depth latitude longitude
comment: "Estimated between surface fixes"
observation_type: measured
platform: platform
Expand All @@ -125,44 +122,38 @@ netcdf_variables:
long_name: glider heading angle
standard_name: platform_orientation
units: rad
coordinates: time depth latitude longitude

pitch:
source: m_pitch
long_name: glider pitch angle
standard_name: platform_pitch_angle
units: rad
coordinates: time depth latitude longitude

roll:
source: m_roll
long_name: glider roll angle
standard_name: platform_roll_angle
units: rad
coordinates: time depth latitude longitude

# profile info:
waypoint_latitude:
source: c_wpt_lat
long_name: waypoint latitude
standard_name: latitude
units: degree_north
coordinates: time depth latitude longitude

waypoint_longitude:
source: c_wpt_lon
long_name: waypoint longitude
standard_name: longitude
units: degree_east
coordinates: time depth latitude longitude

# data parameters
conductivity:
source: sci_water_cond
long_name: water conductivity
standard_name: sea_water_electrical_conductivity
units: S m-1
coordinates: time depth latitude longitude
instrument: instrument_ctd
valid_min: 0.
valid_max: 10.
Expand All @@ -176,7 +167,6 @@ netcdf_variables:
long_name: water temperature
standard_name: sea_water_temperature
units: Celsius
coordinates: time depth latitude longitude
instrument: instrument_ctd
valid_min: -5.0
valid_max: 50.0
Expand All @@ -190,7 +180,6 @@ netcdf_variables:
long_name: water pressure
standard_name: sea_water_pressure
units: dbar
coordinates: time depth latitude longitude
conversion: bar2dbar
valid_min: 0.0
valid_max: 2000.0
Expand All @@ -210,27 +199,23 @@ netcdf_variables:
long_name: chlorophyll
standard_name: concentration_of_chlorophyll_in_sea_water
units: mg m-3
coordinates: time depth latitude longitude

cdom:
source: sci_flbbcd_cdom_units
long_name: CDOM
units: ppb
coordinates: time depth latitude longitude

backscatter_700:
source: sci_flbbcd_bb_units
long_name: 700 nm wavelength backscatter
units: "1"
coordinates: time depth latitude longitude

# Oxygen
oxygen_concentration:
source: sci_oxy4_oxygen
long_name: oxygen concentration
standard_name: mole_concentration_of_dissolved_molecular_oxygen_in_sea_water
units: umol l-1
coordinates: time depth latitude longitude

# derived water speed:
# water_velocity_eastward:
Expand All @@ -252,8 +237,8 @@ profile_variables:
profile_id:
comment: Sequential profile number within the trajectory. This value is unique in each file that is part of a single trajectory/deployment.
long_name: 'Profile ID'
valid_max: 2147483647.0
valid_min: 1.0
valid_max: 2147483646
valid_min: 1

profile_time:
comment: Timestamp corresponding to the mid-point of the profile
Expand Down
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading

0 comments on commit 0a823d7

Please sign in to comment.