Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUFR2IODA python API converters for marine monthly in situ BUFR #844

Merged
merged 12 commits into from
Jan 18, 2024
12 changes: 12 additions & 0 deletions parm/ioda/bufr2ioda/bufr2ioda_bathythermal_profiles.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{
"data_format" : "bathy",
"subsets" : "BATHY",
"source" : "NCEP data tank",
"data_type" : "bathy",
"cycle_type" : "{{ RUN }}",
"cycle_datetime" : "{{ current_cycle | to_YMDH }}",
"dump_directory" : "{{ DMPDIR }}",
"ioda_directory" : "{{ COM_OBS }}",
"data_description" : "6-hrly in situ Bathythermal profiles",
"data_provider" : "U.S. NOAA"
}
2 changes: 1 addition & 1 deletion parm/ioda/bufr2ioda/bufr2ioda_subpfl_argo_profiles.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,6 @@
"cycle_datetime" : "{{ current_cycle | to_YMDH }}",
"dump_directory" : "{{ DMPDIR }}",
"ioda_directory" : "{{ COM_OBS }}",
"data_description" : "6-hourly in situ ARGO profiles",
"data_description" : "6-hrly in situ ARGO profiles",
"data_provider" : "U.S. NOAA"
}
12 changes: 12 additions & 0 deletions parm/ioda/bufr2ioda/bufr2ioda_subpfl_glider_profiles.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{
"data_format" : "subpfl",
"subsets" : "SUBPFL",
"source" : "NCEP data tank",
"data_type" : "glider",
"cycle_type" : "{{ RUN }}",
"cycle_datetime" : "{{ current_cycle | to_YMDH }}",
"dump_directory" : "{{ DMPDIR }}",
"ioda_directory" : "{{ COM_OBS }}",
"data_description" : "6-hrly in situ GLIDER profiles",
"data_provider" : "U.S. NOAA"
}
12 changes: 12 additions & 0 deletions parm/ioda/bufr2ioda/bufr2ioda_tesac_mammals_profiles.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{
"data_format" : "tesac",
"subsets" : "TESAC",
"source" : "NCEP data tank",
"data_type" : "mammals",
"cycle_type" : "{{ RUN }}",
"cycle_datetime" : "{{ current_cycle | to_YMDH }}",
"dump_directory" : "{{ DMPDIR }}",
"ioda_directory" : "{{ COM_OBS }}",
"data_description" : "6-hrly in situ marine mammals from TESAC profiles",
"data_provider" : "U.S. NOAA"
}
12 changes: 12 additions & 0 deletions parm/ioda/bufr2ioda/bufr2ioda_tesac_profiles.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{
"data_format" : "tesac",
"subsets" : "TESAC",
"source" : "NCEP data tank",
"data_type" : "tesac",
"cycle_type" : "{{ RUN }}",
"cycle_datetime" : "{{ current_cycle | to_YMDH }}",
"dump_directory" : "{{ DMPDIR }}",
"ioda_directory" : "{{ COM_OBS }}",
"data_description" : "6-hrly in situ TESAC profiles",
"data_provider" : "U.S. NOAA"
}
12 changes: 12 additions & 0 deletions parm/ioda/bufr2ioda/bufr2ioda_trackob_surface.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{
"data_format" : "trkob",
"subsets" : "TRACKOB",
"source" : "NCEP data tank",
"data_type" : "trackob",
"cycle_type" : "{{ RUN }}",
"cycle_datetime" : "{{ current_cycle | to_YMDH }}",
"dump_directory" : "{{ DMPDIR }}",
"ioda_directory" : "{{ COM_OBS }}",
"data_description" : "6-hrly in situ TRACKOB surface",
"data_provider" : "U.S. NOAA"
}
12 changes: 12 additions & 0 deletions parm/ioda/bufr2ioda/bufr2ioda_xbtctd_profiles.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{
"data_format" : "xbtctd",
"subsets" : "XBTCTD",
"source" : "NCEP data tank",
"data_type" : "xbtctd",
"cycle_type" : "{{ RUN }}",
"cycle_datetime" : "{{ current_cycle | to_YMDH }}",
"dump_directory" : "{{ DMPDIR }}",
"ioda_directory" : "{{ COM_OBS }}",
"data_description" : "6-hrly in situ XBT/XCTD profiles",
"data_provider" : "U.S. NOAA"
}
295 changes: 295 additions & 0 deletions ush/ioda/bufr2ioda/bufr2ioda_altkob_surface.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,295 @@
#!/usr/bin/env python3
# (C) Copyright 2023 NOAA/NWS/NCEP/EMC
#
# This software is licensed under the terms of the Apache Licence Version 2.0
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.

import sys
sys.path.append('/work2/noaa/da/spaturi/sandbox/CDA_testbed/global-workflow/sorc/gdas.cd/build/lib/python3.7/')
sys.path.append('/work2/noaa/da/spaturi/sandbox/CDA_testbed/global-workflow/sorc/gdas.cd/build/lib/python3.7/pyiodaconv/')
sys.path.append('/work2/noaa/da/spaturi/sandbox/CDA_testbed/global-workflow/sorc/gdas.cd/build/lib/python3.7/pyioda/')

import numpy as np
import numpy.ma as ma
import os
import argparse
import math
import calendar
import time
import copy
from datetime import datetime
import json
from pyiodaconv import bufr
from collections import namedtuple
from pyioda import ioda_obs_space as ioda_ospace
from wxflow import Logger

def Compute_sequenceNumber(lon):
lon_u, seqNum = np.unique(lon, return_inverse=True)
seqNum = seqNum.astype(np.int64)
ShastriPaturi marked this conversation as resolved.
Show resolved Hide resolved
logger.debug(f"Len of Sequence Number: {len(seqNum)}")

return seqNum

def bufr_to_ioda(config, logger):

subsets = config["subsets"]
logger.debug(f"Checking subsets = {subsets}")

# Get parameters from configuration
data_format = config["data_format"]
source = config["source"]
data_type = config["data_type"]
data_description = config["data_description"]
data_provider = config["data_provider"]
cycle_type = config["cycle_type"]
cycle_datetime = config["cycle_datetime"]
dump_dir = config["dump_directory"]
ioda_dir = config["ioda_directory"]
cycle = config["cycle_datetime"]

yyyymmdd = cycle[0:8]
hh = cycle[8:10]

# General Information
converter = 'BUFR to IODA Converter'
platform_description = 'Surface obs from ALTKOB: temperature and salinity'

bufrfile = f"{cycle_type}.t{hh}z.{data_format}.tm{hh}.bufr_d"
DATA_PATH = os.path.join(dump_dir, f"{cycle_type}.{yyyymmdd}", str(hh), f"atmos",
bufrfile)
logger.debug(f"{bufrfile}, {DATA_PATH}")
#print(platform_description)

#===========================================
# Make the QuerySet for all the data we want
#===========================================
start_time = time.time()

logger.debug(f"Making QuerySet ...")
q = bufr.QuerySet() #(subsets)

# MetaData
q.add('year', '*/YEAR')
q.add('month', '*/MNTH')
q.add('day', '*/DAYS')
q.add('hour', '*/HOUR')
q.add('minute', '*/MINU')
q.add('ryear', '*/RCYR')
q.add('rmonth', '*/RCMO')
q.add('rday', '*/RCDY')
q.add('rhour', '*/RCHR')
q.add('rminute', '*/RCMI')
q.add('latitude', '*/CLATH')
q.add('longitude', '*/CLONH')

# ObsValue
q.add('temp', '*/SST0')
q.add('saln', '*/SSS0')

end_time = time.time()
running_time = end_time - start_time
logger.debug(f"Running time for making QuerySet: {running_time} seconds")

#===============================================================
# Open the BUFR file and execute the QuerySet
# Use the ResultSet returned to get numpy arrays of the data
# ==============================================================

logger.debug(f"Executing QuerySet to get ResultSet ...")
with bufr.File(DATA_PATH) as f:
r = f.execute(q)

# MetaData
logger.debug(f" ... Executing QuerySet: get MetaData ...")
dateTime = r.get_datetime('year', 'month', 'day', 'hour', 'minute',group_by='depth')
dateTime = dateTime.astype(np.int64)
rcptdateTime = r.get_datetime('ryear', 'rmonth', 'rday', 'rhour', 'rminute',group_by='depth')
rcptdateTime = rcptdateTime.astype(np.int64)
lat = r.get('latitude',group_by='depth')
lon = r.get('longitude',group_by='depth')

# ObsValue
logger.debug(f" ... Executing QuerySet: get ObsValue ...")
temp = r.get('temp',group_by='depth')
temp -= 273.15
saln = r.get('saln',group_by='depth')

# print ("masking temp & saln ...")
mask = ((temp > -10.0) & (temp <= 50.0)) & ((saln >= 0.0) & (saln <= 45.0))
lat = lat[mask]
lon = lon[mask]
dateTime = dateTime[mask]
rcptdateTime = rcptdateTime[mask]
temp = temp[mask]
saln = saln[mask]

# ObsError
logger.debug(f"Generating ObsError array with constant value (instrument error)...")
ObsError_temp = np.float32( np.ma.masked_array(np.full((len(mask)), 0.30)) )
ObsError_saln = np.float32( np.ma.masked_array(np.full((len(mask)), 1.00)) )

# PreQC
logger.debug(f"Generating PreQC array with 0...")
PreQC = ( np.ma.masked_array(np.full((len(mask)), 0)) ).astype(np.int32)

logger.debug(f" ... Executing QuerySet: Done!")

logger.debug(f" ... Executing QuerySet: Check BUFR variable generic \
dimension and type ...")
# #================================
# # Check values of BUFR variables, dimension and type
# #================================

logger.debug(f" temp min, max, length, dtype = {temp.min()}, {temp.max()}, {len(temp)}, {temp.dtype}")
logger.debug(f" saln min, max, length, dtype = {saln.min()}, {saln.max()}, {len(saln)}, {saln.dtype}")
logger.debug(f" lon min, max, length, dtype = {lon.min()}, {lon.max()}, {len(lon)}, {lon.dtype}")
logger.debug(f" lat min, max, length, dtype = {lat.min()}, {lat.max()}, {len(lat)}, {lat.dtype}")
logger.debug(f" depth min, max, length, dtype = {depth.min()}, {depth.max()}, {len(depth)}, {depth.dtype}")
logger.debug(f" PreQC min, max, length, dtype = {PreQC.min()}, {PreQC.max()}, {len(PreQC)}, {PreQC.dtype}")
logger.debug(f" ObsError_temp min, max, length, dtype = {ObsError_temp.min()}, {ObsError_temp.max()}, {len(ObsError_temp)}, {ObsError_temp.dtype}")
logger.debug(f" ObsError_saln min, max, length, dtype = {ObsError_saln.min()}, {ObsError_saln.max()}, {len(ObsError_saln)}, {ObsError_saln.dtype}")

logger.debug(f" stationID shape, dtype = {stationID.shape}, {stationID.astype(str).dtype}")
logger.debug(f" dateTime shape, dtype = {dateTime.shape}, {dateTime.dtype}")
logger.debug(f" rcptdateTime shape, dytpe = {rcptdateTime.shape}, {rcptdateTime.dtype}")
# logger.debug(f" sequence Num shape, dtype = {seqNum.shape}, {seqNum.dtype}")

# =====================================
# Create IODA ObsSpace
# Write IODA output
# =====================================

# Create the dimensions
dims = {'Location': np.arange(0, lat.shape[0])}

iodafile = f"{cycle_type}.t{hh}z.{data_type}_profiles.{data_format}.nc"
OUTPUT_PATH = os.path.join(ioda_dir, iodafile)
logger.debug(f" ... ... Create OUTPUT file: {OUTPUT_PATH}")

path, fname = os.path.split(OUTPUT_PATH)
if path and not os.path.exists(path):
os.makedirs(path)

# logger.debug(f" ... ... Create output file ...', OUTPUT_PATH)
obsspace = ioda_ospace.ObsSpace(OUTPUT_PATH, mode='w', dim_dict=dims)

# Create Global attributes
logger.debug(f" ... ... Create global attributes")
obsspace.write_attr('Converter', converter)
obsspace.write_attr('source', source)
obsspace.write_attr('sourceFiles', bufrfile)
obsspace.write_attr('dataProviderOrigin', data_provider)
obsspace.write_attr('description', data_description)
#obsspace.write_attr('datetimeReference', reference_time)
obsspace.write_attr('datetimeRange', [str(dateTime.min()),
str(dateTime.max())])
obsspace.write_attr('platformLongDescription', platform_description)
# obsspace.write_attr('platforms', PLATFORM + ': temperature and salinity')
# obsspace.write_attr('datetimeReference', date)
# obsspace.write_attr('dataProviderOrigin', 'GTS/NCEP data tank')
# obsspace.write_attr('description', '24-hrly from 6-hourly dumps')
#
# Create IODA variables
logger.debug(f" ... ... Create variables: name, type, units, and attributes")

# Datetime
obsspace.create_var('MetaData/dateTime', dtype=dateTime.dtype, fillval=dateTime.fill_value) \
.write_attr('units', 'seconds since 1970-01-01T00:00:00Z') \
.write_attr('long_name', 'Datetime') \
.write_data(dateTime)

# rcptDatetime
obsspace.create_var('MetaData/rcptdateTime', dtype=dateTime.dtype, fillval=dateTime.fill_value) \
.write_attr('units', 'seconds since 1970-01-01T00:00:00Z') \
.write_attr('long_name', 'receipt Datetime') \
.write_data(rcptdateTime)

# Longitude
obsspace.create_var('MetaData/longitude', dtype=lon.dtype, fillval=lon.fill_value) \
.write_attr('units', 'degrees_east') \
.write_attr('valid_range', np.array([-180, 180], dtype=np.float32)) \
.write_attr('long_name', 'Longitude') \
.write_data(lon)

# Latitude
obsspace.create_var('MetaData/latitude', dtype=lat.dtype, fillval=lat.fill_value) \
.write_attr('units', 'degrees_north') \
.write_attr('valid_range', np.array([-90, 90], dtype=np.float32)) \
.write_attr('long_name', 'Latitude') \
.write_data(lat)

# Station Identification
obsspace.create_var('MetaData/stationID', dtype=stationID.dtype, fillval=stationID.fill_value) \
.write_attr('long_name', 'Station Identification') \
.write_data(stationID)

# # Depth
# obsspace.create_var('MetaData/depth', dtype=depth.dtype, fillval=depth.fill_value) \
# .write_attr('units', 'm') \
# .write_attr('long_name', 'Water depth') \
# .write_data(depth)

# # Sequence Number
# obsspace.create_var('MetaData/sequenceNumber', dtype=PreQC.dtype, fillval=PreQC.fill_value) \
# .write_attr('long_name', 'Sequence Number') \
# .write_data(seqNum)

# PreQC
obsspace.create_var('PreQC/seaSurfaceTemperature', dtype=PreQC.dtype, fillval=PreQC.fill_value) \
.write_attr('long_name', 'PreQC') \
.write_data(PreQC)

obsspace.create_var('PreQC/seaSurfaceSalinity', dtype=PreQC.dtype, fillval=PreQC.fill_value) \
.write_attr('long_name', 'PreQC') \
.write_data(PreQC)

# ObsError
obsspace.create_var('ObsError/seaSurfaceTemperature', dtype=ObsError_temp.dtype, fillval=ObsError_temp.fill_value) \
.write_attr('units', 'degC') \
.write_attr('long_name', 'ObsError') \
.write_data(ObsError_temp)

obsspace.create_var('ObsError/seaSurfaceSalinity', dtype=ObsError_saln.dtype, fillval=ObsError_saln.fill_value) \
.write_attr('units', 'psu') \
.write_attr('long_name', 'ObsError') \
.write_data(ObsError_saln)

# ObsValue
obsspace.create_var('ObsValue/seaSurfaceTemperature', dtype=temp.dtype, fillval=temp.fill_value) \
.write_attr('units', 'degC') \
.write_attr('valid_range', np.array([-10.0, 50.0], dtype=np.float32)) \
.write_attr('long_name', 'Sea Surface Temperature') \
.write_data(temp)

obsspace.create_var('ObsValue/seaSurfaceSalinity', dtype=saln.dtype, fillval=saln.fill_value) \
.write_attr('units', 'psu') \
.write_attr('valid_range', np.array([0.0, 45.0], dtype=np.float32)) \
.write_attr('long_name', 'Sea Surface Salinity') \
.write_data(saln)

end_time = time.time()
running_time = end_time - start_time
logger.debug(f"Running time for splitting and output IODA: {running_time} \
seconds")

logger.debug(f"All Done!")

if __name__ == '__main__':

start_time = time.time()
config = "bufr2ioda_trackob_surface.json"

log_level = 'DEBUG' #if args.verbose else 'INFO'
logger = Logger('bufr2ioda_trackob_surface.py', level=log_level,
colored_log=True)

with open(config, "r") as json_file:
config = json.load(json_file)

bufr_to_ioda(config, logger)

end_time = time.time()
running_time = end_time - start_time
logger.debug(f"Total running time: {running_time} seconds")

Loading
Loading