-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathDeepDA_config.yml
495 lines (405 loc) · 17.3 KB
/
DeepDA_config.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
# Configuration parameters for running a data assimilation reconstruction using cGENIE priors.
#
# Mingsong Li, Peking University
#
# Initial: Jan 15, 2020
# Updated April 27, 2020; add CaCO3 proxy
# Updated Oct. 12, 2020; add multi_seed for Monte Carlo simulations
# Updated Oct. 31, 2020; Support MacOS
# /Applications/MATLAB/MATLAB_Runtime/v96/sys/os/maci64/libgfortran.3.dylib.dylib was renamed as:
# libgfortran.3.dylib.acycle.dylib; otherwise, scipy.stats won't work
# Updated Nov. 3, 2020; log_level
# Updated July 15, 2021; Rscale_style: tuple or use dict
# Updated Mar 30, 2022. Add comments
#
# ==============================================================================
#
# Control logging output. (0 = none; 1 = most important; 2 = many; 3 = a lot; >=4 all)
log_level: 1
MonteCarlo:
# number: integer. 1 = single run; >1 = monte carlo simulations
number: 100
multi_seed: [
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38,
39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,
52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77,
78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90,
91, 92, 93, 94, 95, 96, 97, 98, 99, 100
]
# ==============================================================================
core:
# DA experiment name
nexp: _petm35_v0.1.4_20240623_All.whiatus._bays_MC100_pHcor_frac0.99_Ca75_biogem_SatuBen_setLim
proj_dir: /Users/mingsongli/Dropbox/git/deepDA/mlwrk # for MacBook Pro
# Working directory for saving outputs
wrkdir: /Users/mingsongli/Dropbox/PETM/posterior # MacBook Pro
# directory for proxy data
proxy_dir: /Users/mingsongli/Dropbox/git/deepDA/mlwrk/proxy # for MacOS
# prior directory for ensemble members
prior_dir: /Users/mingsongli/Dropbox/PETM/ML.petm/ML.petm035 # for MacOS pre-PETM; state-dependent ECS; rain ration [0.05-0.25]
# ===============================
# reconstruction period
# for the PETM DA, [0, 2] with a timescale_interval of 1, means time steps of 0, 1, 2
# where 0 = prePETM, 1 = peakPETM, and 2 = postPETM
# see below settings -- proxies: petm3slices: data_period_id:[] & data_period_idstd:[]
#recon_period: !!python/tuple [0, 1] # both pre and peak PETM
recon_period: !!python/tuple [0, 0] # pre-PETM
# reconstruction time scale step
recon_timescale_interval: 1
# geologic age for Mg/Ca calibration in the baymag PSM only
geologic_age: 56.0
# number of ensember members. 100 means this DA has 100 prior members (cgenie simulations)
nens: 100
# covariance localization radians; nan means null;
# cGENIE PETM DA doesn't need localization because the database is too small
#local_rad: null
local_rad_list: !!python/tuple [null]
#inflation_fact: 1.0 # unused
# R scale style
# 1 = tuple
# 2 = use dictionary; use different values for different proxies
Rscale_style: 1
# Scaling the global estimate of the proxy variance Rg
Rscale: !!python/tuple [2.0]
# Ensemble saving option, save full ensemble: True or False
save_ens_full: True
# save nc and hdf5 for each MC?
save_mc_full: False
# ==============================================================================
# ==============================================================================
proxies:
#use_from: [NCDCdadt]
use_from: [petm3slices]
# proxy fraction used. for example, 0.98 means 98% of the total number of dataset are randomly assimilated
proxy_frac: !!python/tuple [0.98]
# ... feature not included ...
proxy_timeseries_kind: asis
# ... feature not included ...
proxy_availability_filter: False
# ... feature not included ...
proxy_availability_fraction: 1.0
# quality control: no control; all proxy values are assimilated.
proxy_qc: null
# quality control: if the innovation is larger than n * std of the prior, ignore this observation
#proxy_qc: 3.0
# Only assimilate d18O of the glassy foraminifera
proxy_d18o_glassy: True
NCDCdadt:
petm3slices:
dbversion: 'petmproxy3slices_v0.1.5.csv' # 2024-07-28
# ... feature not included ...
dataformat_proxy: 'DF'
# ... feature not included ...
regions: []
# ... feature not included ...
proxy_resolution: [!!python/tuple [0.,5000.]]
# ... feature not included ...
database_filter: []
# define labels of longitude and latitude in the database, deep time DA data may have multiple coords.
lon_label: 'lonbci'
lat_label: 'latbci'
#lon_label: 'lonbcishift'
#lat_label: 'latbcishift'
# proxy error evaluation method:
# proxy_err_psm error derived from PSM only using real observation value
# proxy_err_psm_fixed error derived from Bayesian PSM only using default observation value
# proxy_err_psm_mp Bayesian PSM error + multiple points variance included
# proxy_err_eval user-defined; much faster? see below
proxy_err_eval: 'proxy_err_psm_fixed'
#proxy_err_eval: 'proxy_err_psm_mp'
#proxy_err_eval: 'user-defined'
# deepmip
data_period_id: [
'deepMIPprePETM',
'deepMIPPETM'
]
data_period_idstd: [
'deepMIPprePETMstd',
'deepMIPPETMstd'
]
# ==============================
# Select the proxy type
#
## All
proxy_blacklist: []
## exclude TEX86
#proxy_blacklist: ['Marine sediments_tex86']
## exclude caco3_13c
#proxy_blacklist: ['Marine sediments_caco3_13c']
## exclude d18O
#proxy_blacklist: ['Marine sediments_d18o_pooled']
# exclude Mg/Ca
#proxy_blacklist: ['Marine sediments_mgca_pooled_bcp',
# 'Marine sediments_mgca_pooled_red']
## exclude CaCO3 = TOM
#proxy_blacklist: ['Marine sediments_caco3']
# d18O + Mg/Ca
#proxy_blacklist: ['Marine sediments_caco3',
# 'Marine sediments_tex86']
# d18O + CaCO3
#proxy_blacklist: ['Marine sediments_tex86',
# 'Marine sediments_mgca_pooled_bcp',
# 'Marine sediments_mgca_pooled_red']
## d18O + CaCO3
#proxy_blacklist: [
# 'Marine sediments_d18o_pooled',
# 'Marine sediments_mgca_pooled_bcp',
# 'Marine sediments_mgca_pooled_red']
## d18O + TEX
#proxy_blacklist: ['Marine sediments_caco3','Marine sediments_mgca_pooled_bcp','Marine sediments_mgca_pooled_red']
## CaCO3 only
#proxy_blacklist: ['Marine sediments_tex86','Marine sediments_d18o_pooled','Marine sediments_mgca_pooled_bcp','Marine sediments_mgca_pooled_red']
## TEX only
#proxy_blacklist: ['Marine sediments_d18o_pooled',
# 'Marine sediments_caco3',
# 'Marine sediments_mgca_pooled_bcp',
# 'Marine sediments_mgca_pooled_red']
## d18O only
#proxy_blacklist: ['Marine sediments_tex86',
# 'Marine sediments_caco3',
# 'Marine sediments_mgca_pooled_bcp',
# 'Marine sediments_mgca_pooled_red']
## Mg/Ca only
#proxy_blacklist: ['Marine sediments_tex86',
# 'Marine sediments_d18o_pooled',
# 'Marine sediments_caco3']
## caco3_13c only
#proxy_blacklist: ['Marine sediments_tex86',
# 'Marine sediments_d18o_pooled',
# 'Marine sediments_caco3',
# 'Marine sediments_mgca_pooled_bcp',
# 'Marine sediments_mgca_pooled_red']
#
# ==============================
# ==============================
# proxy order
proxy_order_type: random
#proxy_order_type: use_list
proxy_order: [
'Marine sediments_uk37',
'Marine sediments_d18o_pooled',
'Marine sediments_tex86',
'Marine sediments_mgca_pooled_bcp',
'Marine sediments_mgca_pooled_red',
'Marine sediments_caco3',
'Marine sediments_caco3_13c'
]
# ==============================
proxy_psm_type:
Marine sediments_uk37: bayesreg_uk37 # Bayesian model
Marine sediments_tex86: bayesreg_tex86 # DeepMIP
#Marine sediments_tex86: tex86h_forward # TEX86h model
Marine sediments_d18o_pooled: bayesreg_d18o_pooled # Bayesian model
#Marine sediments_d18o_pooled: deepmip_d18o # DeepMIP
Marine sediments_mgca_pooled_red: bayesreg_mgca_pooled_red # Bayesian model
Marine sediments_mgca_pooled_bcp: bayesreg_mgca_pooled_bcp # Bayesian model
#Marine sediments_mgca_pooled_red: deepmip_mgca # DeepMIP
#Marine sediments_mgca_pooled_bcp: deepmip_mgca # DeepMIP
Marine sediments_caco3: cgenie_caco3 # direct assimilation
Marine sediments_caco3_13c: cgenie_caco3_13c # direct assimilation
# proxy_assim2: define the proxy style assimilated? labels should be consistent with those used in the database
# this first dictionary is not used, the next proxy_assim2 dict is actually used
# newer list assimilated
proxy_assim2:
Marine sediments_uk37: ['uk37', 'UK37']
Marine sediments_tex86: ['tex86', 'TEX86']
Marine sediments_d18o_pooled: ['d18o_morozovella',
'd18O_morozovella',
'd18o_m.subb',
'd18o_M.subb',
'd18o_m.velascoensis']
Marine sediments_mgca_pooled_red: ['mgca_ruber:reductive',
'mgca_ruber_lato:reductive',
'mgca_ruber_stricto:reductive',
'mgca_sacculifer:reductive',
'mgca_bulloides:reductive',
'mgca_pachyderma:reductive',
'mgca_morozovella:reductive']
Marine sediments_mgca_pooled_bcp: ['mgca_ruber:barker',
'mgca_ruber_lato:barker',
'mgca_ruber_stricto:barker',
'mgca_sacculifer:barker',
'mgca_bulloides:barker',
'mgca_pachyderma:barker',
'mgca_m.subb:barker',
'mgca_m.subbotinae:barker',
'mgca_m.velascoensis:barker',
'mgca_morozovella:barker']
#Marine sediments_caco3: ['caco3','CaCO3','caco3shallow']
Marine sediments_caco3: ['caco3','CaCO3']
Marine sediments_caco3_13c: ['caco3_13c','CaCO3_13C']
# glassy foraminiferal label: glassy fora label blacklist
# see proxies: proxy_d18o_glassy: . If True, the following blacklist will be used.
proxy_assim3:
Marine sediments_d18o_pooled_glassy: ['unknown', 'frosty']
# ==============================================================================
# settings for proxy system models (psm)
# ==============================================================================
psm:
# TEX86 Bayspar model. See https://baysparpy.readthedocs.io/
bayesreg_tex86:
psm_required_nc: '/biogem/fields_biogem_2d.nc'
psm_required_variables:
ocn_sur_temp: 'full'
temptype: 'sst'
search_tol: 10 # search tolerance
nens: 5000 # number of ensemble for the Bayesian model
# below is only useful for the verification
search_tol_file: 'PETMTEX_baysparSettings_v0.2.csv'
# TEX86h model
tex86h_forward:
psm_required_nc: '/biogem/fields_biogem_2d.nc'
psm_required_variables:
ocn_sur_temp: 'full'
temptype: 'sst'
psm_error: 0.01 # user-defined, fixed variance
# d18O BAYFOX model. See https://pypi.org/project/bayfox/
bayesreg_d18o:
psm_required_nc: '/biogem/fields_biogem_2d.nc'
psm_required_variables:
ocn_sur_temp: 'full'
psm_d18osw_variable:
sos_sfc_Oequ: 'full'
psm_d18osw_from_salinity: False
seasonal_seatemp: False
# d18O BAYFOX pooled model. See https://pypi.org/project/bayfox/
bayesreg_d18o_pooled:
# d18o_phcor_water command out or not
#d18o_phcor_water: 'modern'
psm_required_nc: '/biogem/fields_biogem_2d.nc'
psm_required_variables:
ocn_sur_temp: 'full'
psm_d18osw_adjust: -0.96
#d18osw_local_choice: 'zachos94' # use Zachos et al 1994 model
d18osw_local_choice: 'zhu19' # use sea water d18O in Zhu et al., 2019 Sci. Adv., which can be found in the database
d18osw_icesm_pco2: 3.0 # prePETM # which pCO2 condition is used
#d18osw_icesm_pco2: 6.0 # PETM
# d18O non-bayesian model
deepmip_d18o:
psm_required_nc: '/biogem/fields_biogem_2d.nc'
psm_required_variables:
ocn_sur_temp: 'full'
psm_d18osw_adjust: -1
#d18osw_local_choice: 'zachos94' # use sea water d18O in Zachos et al 1994 model
d18osw_local_choice: 'zhu19' # use sea water d18O in Zhu et al., 2019 Sci. Adv., which can be found in the database
#d18osw_icesm_pco2: 3.0 # prePETM
d18osw_icesm_pco2: 6.0 # PETM
psm_error: 0.4
# variance fixed bayfox
# Mg/Ca DeepMIP model
deepmip_mgca:
psm_required_nc: '/biogem/fields_biogem_2d.nc'
psm_required_variables:
ocn_sur_temp: 'full'
mgcasw: 2.5 # for PETM; from Evans et al., 2018
psm_error: 0.5 # fixed variance, determined using baymagpy
# Mg/Ca Baymag Bayesian model. See https://github.com/mingsongli/baymagpy
bayesreg_mgca:
psm_required_nc: '/biogem/fields_biogem_2d.nc'
psm_required_variables:
ocn_sur_temp: 'full'
seasonal_seatemp: False
# Mg/Ca Baymag Bayesian pooled model. See https://github.com/mingsongli/baymagpy
bayesreg_mgca_pooled_red:
psm_required_nc: '/biogem/fields_biogem_2d.nc' # PSM required netCDF file
psm_required_variables:
ocn_sur_temp: 'full'
seasonal_seatemp: False
#bottom water calcite saturation grid definition
psm_required_nc_ohm: '/sedgem/fields_sedgem_2d.nc' # netCDF file for omega
water_saturation: 'bottom' # use bottom water
water_saturation_field: 'carb_ohm_cal' # field
#water_saturation: 'fixed'
#water_saturation_value: 5.0
psm_required_nc_mg: '/biogem/fields_biogem_2d.nc'
psm_baymag_ln: 'yes' # yes = ln (default); no = normal
#water_saturation: 'modern' # deep time applications may find nan value for a given location
# Mg/Ca Baymag Bayesian model. See https://github.com/mingsongli/baymagpy
bayesreg_mgca_pooled_bcp:
psm_required_nc: '/biogem/fields_biogem_2d.nc'
psm_required_variables:
ocn_sur_temp: 'full'
seasonal_seatemp: False
# CaCO3 model. Direct assimilation. Users need to set psm_error.
cgenie_caco3:
psm_required_nc: '/biogem/fields_biogem_2d.nc'
#psm_required_nc: '/sedgem/fields_sedgem_2d.nc' # netCDF file for CaCO3 data in the prior
psm_required_variables:
sed_CaCO3: 'full'
#psm_error: 100.0
psm_error: 75.0 # user-defined prior error, 75 is used because this parameter leads to the least RMSE and the highest CE
#psm_error: 25.0
#psm_error: 37.5 # least RMSE and highest CE 37.5*2 = 75% This is not preferred because surf ohm is too small between pre- and peak- PETM
# not tested
cgenie_caco3_13c:
#psm_required_nc: '/sedgem/fields_sedgem_2d.nc'
psm_required_nc: '/biogem/fields_biogem_2d.nc'
psm_required_variables:
sed_CaCO3_13C: 'full'
psm_error: 0.5 # needs test
# ==============================================================================
# Parameters for the prior
# ==============================================================================
prior:
prior_source: cgenie_36x36
cgenie_36x36:
# longitude offset degree. default -180.0, works for cgenie model
dum_lon_offset: -180.0
# hard code for setting limitations of variable values
limit_hard:
sed_CaCO3:
lim_min: 0.0
lim_max: 100.0
sed_CaCO3_13C:
lim_min: -100.0
lim_max: null
atm_pCO2:
lim_min: 0.0
lim_max: null
fburial_CaCO3:
lim_min: 0.0
lim_max: null
carb_sur_ohm_cal:
lim_min: 0.0
lim_max: null
carb_sur_ohm_arg:
lim_min: 0.0
lim_max: null
carb_ben_ohm_cal:
lim_min: 0.0
lim_max: null
carb_ben_ohm_arg:
lim_min: 0.0
lim_max: null
# State variable to be analyzed
# The first one must be 'ocn_sur_temp'
#
state_variable:
# use 2d netCDF file (lon-lat, surface/benthic)
2d:
ncname:
biogem: 'fields_biogem_2d'
fields_biogem_2d: [
'ocn_sur_temp',
'atm_temp',
'atm_pCO2',
'ocn_sur_sal',
'misc_pH',
'carb_sur_ohm_cal',
'carb_sur_ohm_arg',
'ocn_ben_temp',
'ocn_sur_ALK',
'sed_CaCO3',
'carb_ben_ohm_cal',
'carb_ben_ohm_arg'
]
# use 3D netCDF file DA (lon-lat-depth)
# not ready
3d:
ncname:
biogem: 'fields_biogem_3d'
fields_biogem_3d: [] # empty