-
Notifications
You must be signed in to change notification settings - Fork 13
/
LMR_convertNPZtoNETCDF_fullfield.py
281 lines (227 loc) · 12 KB
/
LMR_convertNPZtoNETCDF_fullfield.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
"""
Module: LMR_convertNPZtoNETCDF_fullfield.py
Purpose: Converts LMR output from .npz files to netcdf files for every
Monte-Carlo realization included in the experiment. Monte-Carlo
realizations are saved as seperate files to avoid memory errors.
Originator: Robert Tardif | Univ. of Washington, Dept. of Atmospheric Sciences
Michael Erb | University of Southern California
| June 2017
Revisions: None
"""
import glob
import numpy as np
from netCDF4 import Dataset
import os
# LMR-specific import
from LMR_utils import natural_sort
# --- Begin section of user-defined parameters ---
# name of directory where the output of LMR experiments are located
#datadir = '/home/scec-00/lmr/erbm/LMR/archive_output'
#datadir = '/home/disk/ekman4/rtardif/LMR/output'
datadir ='/Users/hakim/data/LMR/archive'
# name of the experiment
nexp = 'test'
# Dictionary containing definitions of variables that can be handled by this code
var_desc = \
{
'd18O_sfc_Amon' : ('d18O', 'delta18 oxygen isotope','permil SMOW'),
'tas_sfc_Amon' : ('Tsfc', 'Near surface air temperature anomaly', 'K'),
'psl_sfc_Amon' : ('MSLP', 'Mean sea level pressure anomaly', 'Pa'),
'pr_sfc_Amon' : ('PRCP', 'Precipitation rate anomaly', 'kg/m2/s1'),
'scpdsi_sfc_Amon' : ('scpdsi','self-calibrated Palmer Drought Severity Index', ''),
'scpdsipm_sfc_Amon' : ('scpdsi','self-calibrated Palmer Drought Severity Index (Penman-Monteith)', ''),
'uas_sfc_Amon' : ('Usfc', 'Near surface zonal wind anomaly', 'm/s'),
'vas_sfc_Amon' : ('Vsfc', 'Near surface meridional wind anomaly', 'm/s'),
'zg_500hPa_Amon' : ('H500', '500hPa geopotential height anomaly', 'm'),
'wap_500hPa_Amon' : ('W500', '500hPa vertical motion anomaly', 'Ps/s'),
'wap_700hPa_Amon' : ('W700', '700hPa vertical motion anomaly', 'Ps/s'),
'ua_500hPa_Amon' : ('U500', '500hPa zonal wind anomaly', 'm/s'),
'va_500hPa_Amon' : ('V500', '500hPa meridional wind anomaly', 'm/s'),
'tos_sfc_Omon' : ('tos', 'Sea surface temperature', 'K'),
'ohcArctic_0-700m_Omon' : ('ohcArctic_0to700m','Basin-averaged Ocean Heat Content of Arctic Ocean in 0-700m layer','J'),
'ohcAtlanticNH_0-700m_Omon' : ('ohcAtlanticNH_0to700m','Basin-averaged Ocean Heat Content of N. Atlantic Ocean in 0-700m layer','J'),
'ohcAtlanticSH_0-700m_Omon' : ('ohcAtlanticNH_0to700m','Basin-averaged Ocean Heat Content of S. Atlantic Ocean in 0-700m layer','J'),
'ohcPacificNH_0-700m_Omon' : ('ohcPacificNH_0to700m','Basin-averaged Ocean Heat Content of N. Pacific Ocean in 0-700m layer','J'),
'ohcPacificSH_0-700m_Omon' : ('ohcPacificSH_0to700m','Basin-averaged Ocean Heat Content of S. Pacific Ocean in 0-700m layer','J'),
'ohcSouthern_0-700m_Omon' : ('ohcSouthern_0to700m','Basin-averaged Ocean Heat Content of Southern Ocean in 0-700m layer','J'),
'ohcIndian_0-700m_Omon' : ('ohcIndian_0to700m','Basin-averaged Ocean Heat Content of Indian Ocean in 0-700m layer','J'),
'AMOCindex_Omon' : ('AMOCindex','AMOC index (max. overturning in region between 30N-70N and 500m-2km depth in N Atl.)','kg s-1'),
'AMOC45N1000m_Omon' : ('AMOC45N1000m','Meridional overturning streamfunction at 45N and 1000m depth','kg s-1'),
'AMOC26N1000m_Omon' : ('AMOC26N1000m','Meridional overturning streamfunction at 26N and 1000m depth','kg s-1'),
'AMOC26Nmax_Omon' : ('AMOC26Nmax','Maximum meridional overturning streamfunction in ocean column at 26N','kg s-1'),
'tas_sfc_Adec' : ('Tsfc', 'Near surface air temperature anomaly', 'K')
}
# --- End section of user-defined parameters ---
expdir = datadir + '/'+nexp
# where the netcdf files are created
outdir = expdir
print(('\n Getting information on Monte-Carlo realizations for ' + expdir + '...\n'))
dirs = glob.glob(expdir+"/r*")
# keep names of MC directories (i.r. "r...") only
mcdirs = [item.split('/')[-1] for item in dirs]
# Make sure list is properly sorted
mcdirs = natural_sort(mcdirs)
# number of MC realizations found
niters = len(mcdirs)
print(('mcdirs:' + str(mcdirs)))
print(('niters = ' + str(niters)))
print('\n Getting information on reconstructed variables...\n')
# look in first "mcdirs" only. It should be the same for all.
workdir = expdir+'/'+mcdirs[0]
# look for "ensemble_full" files
listdirfiles = glob.glob(workdir+"/ensemble_full_*")
# strip directory name, keep file name only
listfiles = [item.split('/')[-1] for item in listdirfiles]
# strip everything but variable name
listvars = [(item.replace('ensemble_full_','')).replace('.npz','') for item in listfiles]
print(('Variables:', listvars, '\n'))
# Loop over variables
for var in listvars:
print(('\n Variable:', var))
if var not in list(var_desc.keys()):
print(' ***WARNING: Variable %s does not have a corresponding entry in variable definitions'
' (var_desc) at the top of this program. Please make necessary edits to have this variable'
' included in the format conversion output' %var)
continue
# Loop over realizations
for dir in mcdirs:
fname = expdir+'/'+dir+'/ensemble_full_'+var+'.npz'
npzfile = np.load(fname)
# Get the reconstructed field
field_values = npzfile['xa_ens']
npzcontent = npzfile.files
print((' file contents:', npzcontent))
# get the years in the reconstruction ... for some reason stored in an array of strings ...
years_str = npzfile['years']
# convert to array of floats
years = np.asarray([float(item) for item in years_str])
# Determine type of variation, get spatial coordinates if present
if 'lat' in npzcontent and 'lon' in npzcontent:
field_type = '2D:horizontal'
print(' field type:', field_type)
# get lat/lon data
lat2d = npzfile['lat']
lon2d = npzfile['lon']
#print ' ', lat2d.shape, lon2d.shape
lat1d = lat2d[:,0]
lon1d = lon2d[0,:]
print(' nlat/nlon=', lat1d.shape, lon1d.shape)
elif 'lat' in npzcontent and 'lev' in npzcontent:
field_type = '2D:meridional_vertical'
print(' field type:', field_type)
# get lat/lev data
lat2d = npzfile['lat']
lev2d = npzfile['lev']
#print ' ', lat2d.shape, lev2d.shape
lat1d = lat2d[:,0]
lev1d = lev2d[0,:]
print(' nlat/nlev=', lat1d.shape, lev1d.shape)
elif 'lat' not in npzcontent and 'lon' not in npzcontent and 'lev' not in npzcontent:
# no spatial coordinate, must be a scalar (time series)
field_type='1D:time_series'
print(' field type:', field_type)
else:
print('Cannot handle this variable yet! Variable of unrecognized dimensions... Exiting!')
raise SystemExit(1)
# declare master array that will contain data from all the M-C realizations
# (i.e. the "Monte-Carlo ensemble")
dims = field_values.shape
print(' xam field dimensions', dims)
mc_ens = field_values
# Roll array to get dims as [nens, time, nlat, nlon]
mc_ens_outarr = np.rollaxis(mc_ens,-1,0)
# Create the netcdf file for the current variable
outfile_nc = outdir+'/'+var+'_MCiters_ensemble_full_'+dir+'.nc'
outfile = Dataset(outfile_nc, 'w', format='NETCDF4')
outfile.description = 'LMR climate field reconstruction for variable: %s' % var
outfile.experiment = nexp
outfile.comment = 'File contains values for each ensemble member for Monte-Carlo realization '+dir
# define dimensions
ntime = years.shape[0]
nensembles = mc_ens_outarr.shape[0]
outfile.createDimension('time', ntime)
outfile.createDimension('ensemble_member', nensembles)
if field_type == '2D:horizontal':
nlat = lat1d.shape[0]
nlon = lon1d.shape[0]
outfile.createDimension('lat', nlat)
outfile.createDimension('lon', nlon)
elif field_type == '2D:meridional_vertical':
nlat = lat1d.shape[0]
nlev = lev1d.shape[0]
outfile.createDimension('lat', nlat)
outfile.createDimension('lev', nlev)
else:
pass
# define variables & upload the data to file
# reconstructed field
varinfolst = var.split('_')
varname = varinfolst[0]
varlevel = None
if len(varinfolst) > 2:
if varinfolst[2] == 'Amon' or varinfolst[2] == 'Omon' or varinfolst[2] == 'OImon':
varlevel = varinfolst[1]
# time
time = outfile.createVariable('time', 'i', ('time',))
time.description = 'time'
time.long_name = 'year CE'
if field_type == '2D:horizontal':
# lat
lat = outfile.createVariable('lat', 'f', ('lat',))
lat.description = 'latitude'
lat.units = 'degrees_north'
lat.long_name = 'latitude'
# lon
lon = outfile.createVariable('lon', 'f', ('lon',))
lon.description = 'longitude'
lon.units = 'degrees_east'
lon.long_name = 'longitude'
varout = outfile.createVariable(varname, 'f', ('ensemble_member','time', 'lat','lon'),zlib=True,complevel=4,fletcher32=True)
varout.description = var_desc[var][0]
varout.long_name = var_desc[var][1]
varout.units = var_desc[var][2]
if varlevel: varout.level = varlevel
# upload the data to file
lat[:] = lat1d
lon[:] = lon1d
time[:] = years
varout[:] = mc_ens_outarr
elif field_type == '2D:meridional_vertical':
# lat
lat = outfile.createVariable('lat', 'f', ('lat',))
lat.description = 'latitude'
lat.units = 'degrees_north'
lat.long_name = 'latitude'
# lev
lev = outfile.createVariable('lev', 'f', ('lev',))
lev.description = 'depth'
lev.units = 'm'
lev.long_name = 'depth'
varout = outfile.createVariable(varname, 'f', ('ensemble_member','time', 'lat','lev'),zlib=True,complevel=4,fletcher32=True)
varout.description = var_desc[var][0]
varout.long_name = var_desc[var][1]
varout.units = var_desc[var][2]
# upload the data to file
lat[:] = lat1d
lev[:] = lev1d
time[:] = years
varout[:] = mc_ens_outarr
elif field_type == '1D:time_series':
varout = outfile.createVariable(varname, 'f', ('ensemble_member','time'),zlib=True,complevel=4,fletcher32=True)
varout.description = var_desc[var][0]
varout.long_name = var_desc[var][1]
varout.units = var_desc[var][2]
if varlevel: varout.level = varlevel
# upload the data to file
time[:] = years
varout[:] = mc_ens_outarr
else:
print('Variable of unrecognized dimensions... Exiting!')
raise SystemExit(1)
# Closing the file
outfile.close()
# The initial netcdf contains some compression (zlib=True,complevel=4) and error detection (fletcher32=True).
# Use ncpdq to create a smaller version of the newly-created netcdf file using short (16-bit) integers.
# This is a lossy compression which saves about 5 significant digits, but takes up less storage space.
cmd = 'ncpdq -O -P all_new ' + outfile_nc + ' ' + outfile_nc
os.system(cmd)