diff --git a/src/pyobs/mplnet_Level_15.py b/src/pyobs/mplnet_Level_15.py new file mode 100644 index 0000000..302d40a --- /dev/null +++ b/src/pyobs/mplnet_Level_15.py @@ -0,0 +1,195 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" + +This script reads MPLnet data into memory: + +A NetCDFData class to store the data and attributes + +A read_netcdf function that: + Takes a filename (required) and + (optional) list of variables to read (default variables if none specified) + Reads data & attributes for each variable + Stores everything in the class structure + Verb=0(default), no print,=1 variables names and sizes, =2 var names and attributes + +Main Subroutines: + read_file : actual read single file + get_path : set path where files are stored (needed if working in different clusters) + + +Created on Fri Jan 17 10:51:10 2025 + +@author: sgasso + chat.gsfc +""" +from netCDF4 import Dataset +# import numpy as np +import platform +import sys + +class MPL_L15: + def __init__(self): + """Initialize the class with empty attributes""" + self.time = None + self.time_attributes = {} + + self.altitude = None + self.altitude_attributes = {} + + self.extinction = None + self.extinction_attributes = {} + + self.extinction_err = None + self.extinction_err_attributes = {} + + self.qa_extinction = None + self.qa_extinction_attributes = {} + + @classmethod + def read_file(cls, filename, variables=None, verb=0): + """ + Read data from NetCDF file + + Parameters: + filename (str) : Path to the NetCDF file + variables (list): List of variables to read. If None, reads default variables + verb (int): Verbosity level + 0: No printing + 1: Print variable name, type, and dimensions + 2: Print level 1 info plus attributes + + Returns: + NetCDFData: Class instance containing the requested data and attributes + """ + # Default variables to read if none specified + default_variables = ['time', 'altitude', 'extinction', 'extinction_err'] + + # Use default variables if none provided + variables_to_read = variables if variables is not None else default_variables + + # Initialize the class + instance = cls() + + try: + # Open the NetCDF file + with Dataset(filename, 'r') as nc: + # Read each requested variable + for var_name in variables_to_read: + if var_name in nc.variables: + # Get the variable + var = nc.variables[var_name] + + # Read the data + var_data = var[:] + + # Read all attributes + var_attributes = {attr: var.getncattr(attr) + for attr in var.ncattrs()} + + # Print information based on verbosity level + if verb >= 1: + print("\nVariable Information:") + print(f"Name: {var_name}") + print(f"Type: {var.dtype}") + print(f"Dimensions: {var.dimensions}") + print(f"Shape: {var.shape}") + + if verb >= 2: + print("\nAttributes:") + for attr, value in var_attributes.items(): + print(f" {attr}: {value}") + print("=" * 50) + + # Store data and attributes in the class + setattr(instance, var_name, var_data) + setattr(instance, f"{var_name}_attributes", var_attributes) + else: + print(f"Warning: Variable {var_name} not found in file") + + except Exception as e: + print(f"Error reading NetCDF file: {str(e)}") + return None + + return instance + + def get_variable_names(self): + """Return list of available variables""" + return [attr for attr in self.__dict__.keys() if not attr.endswith('_attributes')] + + def get_variable_info(self, variable_name): + """Print information about a specific variable""" + if hasattr(self, variable_name): + data = getattr(self, variable_name) + attrs = getattr(self, f"{variable_name}_attributes", {}) + print(f"\nVariable: {variable_name}") + print(f"Shape: {data.shape}") + print(f"Type: {data.dtype}") + print("Attributes:", attrs) + else: + print(f"Variable {variable_name} not found") + +def get_path(now_os, now_computer): + # Your existing get_path function remains the same + if now_os=='win32': + base_path = 'C:/Users/sgasso/Downloads/' + pth_fig_out='C:/Users/sgasso/OneDrive - NASA/ToShare/2025/GEOS/Pyfigures/' + elif now_os == 'darwin': + base_path='/Users/sgasso/Downloads/' + pth_fig_out='/Users/sgasso/Library/CloudStorage/OneDrive-NASA/ToShare/2025/AOCH/PyFigures/' + elif now_os == 'linux' and "calculon" in now_computer: + base_path = '/nobackup/CALIPSO/Level1.5/Santiago/' + elif now_os == 'linux' and "discover" in now_computer: + base_path = '/nobackup/CALIPSO/Level1.5/Santiago/' + else: + print('Current operating system no recognized.') + print('Cannot set path to MPL files. Terminate Run') + sys.exit() + + return base_path,pth_fig_out + +#### Read list of MPL sites +def read_mpl_site_list(filename): +#Expected format: comma separated +#line 1 :Name_MPL_sites , name,lat, lon, altiude(km) +#line 2 : GSFC ,38.9930,-76.8400,0.050 +#............. +# + sites = [] + with open(filename, 'r') as file: + header = file.readline() + for line in file: + data = [item.strip() for item in line.split(',')] + site = ( + data[0], + float(data[1]), + float(data[2]), + float(data[3]) + ) + sites.append(site) + return sites #this is a tuple + +######---------------------------------------------- + + +if __name__ == "__main__": + # Option 1: Direct call to the class method + current_os = platform.system() + computer_name = platform.node() + current_pth,pth_fig = get_path(current_os.lower(),computer_name) + filename = "MPLNET_V3_L15_AER_20190603_MPL44258_GSFC.nc4" + full_pathname = current_pth + filename + + # Define variables to read + variables_to_read = ['time', 'altitude', 'extinction'] + + # Read the data + data = MPL_L15.read_file(full_pathname, variables=variables_to_read, verb=2) + + ### now access data directly + print(data.time.shape) + print(data.altitude.shape) + print(data.extinction.shape) + + # Access attributes + print(data.time_attributes) + print(data.extinction_attributes.get('units')) \ No newline at end of file diff --git a/src/pyobs/tropomi_l2_reader.py b/src/pyobs/tropomi_l2_reader.py new file mode 100644 index 0000000..eacdccb --- /dev/null +++ b/src/pyobs/tropomi_l2_reader.py @@ -0,0 +1,275 @@ +# -*- coding: utf-8 -*- +""" + +Generic script to load and plot aerosol data from TROPOAER_L2 product. + +Sept/2024 - by Santiago Gassó , sgasso@umd.edu + +Internal notes: + +@author: sgassoumd +""" + + +import os, sys, time +import numpy as np +from datetime import date, datetime, timedelta + +import h5py +import netCDF4 as nc + +### plotting modules +import matplotlib.pyplot as plt +from matplotlib.colors import ListedColormap, BoundaryNorm # For custom colormaps +import cartopy.crs as ccrs # For cartographic projections +import cartopy.feature as cfeature # For adding features to the map +# from glob import glob +# import pdb + + +#default SDSs if not specifically input by user +SDS = dict ( + GEODATA = ('latitude','longitude','latitude_bounds','longitude_bounds','delta_time'), + SCIDATA = ('UVAerosolIndex','FinalAlgorithmFlags','FinalAerosolLayerHeight',\ + 'SurfaceType','Reflectivity') + ) + +ALIAS = dict ( latitude ='lat', + longitude ='lon', + latitude_bounds ='lat4', + longitude_bounds ='lon4', + delta_time ='dtime', # seconds since DATE_START = datetime(2010,1,1,0,0,0) + UVAerosolIndex ='uvai', + FinalAerosolLayerHeight='ztro', + FinalAlgorithmFlags ='algQA', + SurfaceType ='surftype', + Reflectivity ='ref') + +# KEYS=[1,2] # SDS to be selected in [1,2]='GEODATA','SCIDATA', note this is 1-order +# SELGROUP=['GEODATA','SCIDATA'] # SDS to be selected +# SELGROUP=['GEODATA'] # SDS to be selected + +CHANNELS=(354.0 ,388.0 ,500.0) +DATE_START = datetime(2010,1,1,0,0,0) +#........................................................................... + +class TROPOAER_L2(object): + """ + This class loads TROPOMI data, for variable input types: single files, several directories. + Based on existing codes in GEOSPYobs aura.py and mxd04.py + """ + + def __init__ (self,Path,SDS=SDS,Verbose=0,GEO=False,alias=None): + """ + Put General Ancilliary info here + + """ + + # Initially are lists of numpy arrays for each granule + # ---------------------------------------------------- + self.verb = Verbose + self.SDS = SDS + # if input GEO is TRUE then only get Geolocation group + if GEO: + self.SELGROUP=['GEODATA'] + if self.verb: print('User selected GEODATA only') + else: + self.SELGROUP=['GEODATA','SCIDATA'] + + + # Variable names + # -------------- + self.Names = [] + for group in list(self.SDS.keys()): + for name in self.SDS[group]: + self.Names.append(name) + self.Names += ['nymd','nhms'] # inherited from old code. Might be useful. + + # Add/Substitute some aliases if given + # ------------------------------------ + self.ALIAS = ALIAS.copy() + if alias is not None: + for a in alias: self.ALIAS[a] = alias[a] + # print('self.ALIAS 2 ', self.ALIAS) + + # Create empty lists for SDS to be read from orbit file; + # each element of the list contains data for one orbit + # ------------------------------------------------------ + for name in self.Names: + self.__dict__[name] = [] + self.time = [] # to hold datetime objects + + # Read each orbit, appending them to the list + # ------------------------------------------- + if type(Path) is list: + if len(Path) == 0: + self.nobs = 0 + print("WARNING: Empty TROPO object created") + return + else: + Path = [Path, ] + # print('List of Files to read:') + # print(Path) + self._readList(Path) ##### note this read the actual file, see example in mxd04.py + + + + # Make each attribute a single numpy array + # ---------------------------------------- + # for name in self.SDS: + # try: + # self.__dict__[name] = concatenate(self.__dict__[name]) + # except: + # print("Failed concatenating "+name) + + # for group in list(self.SDS.keys()): + # for name in self.SDS[group]: + # try : + # self.__dict__[name] = concatenate(self.__dict__[name]) + # except: + # print("Failed concatenating,name= "+name) + # print("Failed concatenating,group= "+group) + + for igroup in self.SELGROUP: + key_content_names=tuple(self.SDS.get(igroup)) + for sds_name in key_content_names: + try : + self.__dict__[sds_name] = np.concatenate(self.__dict__[sds_name]) + except: + print("Failed concatenating,name= "+sds_name) + print("Failed concatenating,group= "+igroup) + + # USE ALIAS dic to shorten the variable names + Alias = list(self.ALIAS.keys()) + for group in list(self.SDS.keys()): + for name in self.SDS[group]: + if name in Alias: + self.__dict__[self.ALIAS[name]] = self.__dict__[name] + ### now remove the longer name attribute + delattr(self, name) + else: # this maybe removed if i want to keep variables + delattr(self, name) # that are no ALIAS for + + # remove temporary attribute that does not need to be output + delattr(self,'SELGROUP') + + def _readList(self,List): + """ + Recursively, look for files in list; list items can + be files or directories. + """ + for item in List: + if os.path.isdir(item): self._readDir(item) + elif os.path.isfile(item): self._readOrbit(item) + else: + print("%s is not a valid file or directory, ignoring it"%item) + sys.exit('Exit _readList') + + #-------------- from Aura.py, called by _readList(self,List) + def _readDir(self,dir): + """Recursively, look for files in directory.""" + for item in os.listdir(dir): + path = dir + os.sep + item + if os.path.isdir(path): self._readDir(path) + elif os.path.isfile(path): self._readOrbit(path) + else: + print("%s is not a valid file or directory, ignoring it"%item) + sys.exit('Exit _readDir') + #-------------- from Aura.py called _readList(self,List) + def _readOrbit(self,filename): + """Reads one OMRUV/OMSO2 granule with Level 2 aerosol data.""" + + # Reference time + # -------------- + # REF_DATE = = datetime(2010,1,1,0,0,0) + + # Open the hdf file and loop over the datasets, + # extracting GEOLOCATION and Data fields + # ---------------------------------------------- + if self.verb: print("\n ... working with file <%s>"%filename) + f = h5py.File(filename,mode='r') + print(' ') + + #### added make sure to select the group name (which are ht key names in the dictionary) + #### then once the group is selected get the respective elelent sin those keys and get the data + #### check the class type stored in the key. It has to be a tuple or a list. Also if it is a single + #### item (astring with a name) , there is no need to turn it into tuple + + for igroup in self.SELGROUP: + key_content_names=self.SDS[igroup] + + if self.verb: + print(f"The name of the selected group is {igroup} and it contains the following names: {key_content_names}") + + ### now open corresponding group + g=f.get(igroup) + + ### now loop over the SDS names stores in key_content_name + # Read select variables (reshape to allow concatenation later) + # ------------------------------------------------------------ + for ig in key_content_names: + #print('hello2 :',ig,igroup) + if ig == 'delta_time': # "milliseconds since 2019-08-13 00:00:00 or since beginning of day for orbit" + delta_time=g.get(ig) + delta_time=delta_time.astype('float') + Time = g.get('time') + # breakpoint() + nobs = len(delta_time) + nymd = np.ones(nobs).astype('int') + nhms = np.ones(nobs).astype('int') + print('') + for i in range(nobs): + t_secs = delta_time[i] + n_secs = timedelta(seconds=t_secs) + t = DATE_START + n_secs + + yy, mm, dd = (t.year,t.month,t.day) + h, m, s = (t.hour,t.minute,t.second) + + nymd[i] = 10000 * yy + 100 * mm + dd + nhms[i] = 10000 * h + 100 * m + s + self.time.append(t) + +# self.delta_time.append(delta_time[:]+Time) # time as on file + self.__dict__[ig].append(delta_time[:]+Time) # time as on file + self.nymd.append(nymd) + self.nhms.append(nhms) + + else: + data = g.get(ig) + self.__dict__[ig].append(data) + # print('Read .... ',self.__dict__[ig]) + if self.verb: print('Read .... ',self.__dict__[ig]) + # print('End of _readOrbit \n') + print(' ') + + +##### Now the modules +#............................................................................ +###### -------- test area +# if __name__ == "__main__": + + # current_os=sys.platform + # if current_os=='win32': #in PC office + # pthin="D:\Satellite\Tropomi\Level2\/226\/" + # elif current_os=='darwin': #in Laptop + # # pthin='/Users/sgasso/' + # pthin='/Volumes/ExtData1/SatData/Tropomi/Level2/' + # elif current_os == 'linux': # in Calculon + # pthin='/nobackup/TROPOMAER/2019/226/' + # else: + # print('Current operating system no recognized.') + # print('Cannot set path to Level1 files. Terminate Run') + # sys.exit() + + + # filename_tro='TROPOMI-Sentinel-5P_L2-TROPOMAER_2019m0814t131912-o09508_v01-2021m1015t022508.nc' + # syn_time = datetime(2019,8,14,13,19,12) + # Files = pthin+filename_tro + # # print(Files) + # # Files = granules('/discover/nobackup/dao_ops/intermediate/flk/modis','MOD04',syn_time,coll='006') + # # select group of data from dictionary defined begining of code + # keys=[1,2,3] # select GEODATA amd SCIDATA, using index start in 1 + + + # TROP = TROPOAER_L2(Files,Verbose=1,only_good=True)