-
Notifications
You must be signed in to change notification settings - Fork 11
/
mbmod_daily_oneflowline.py
executable file
·4580 lines (4043 loc) · 215 KB
/
mbmod_daily_oneflowline.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Dec 24 12:28:37 2020
@author: lilianschuster
different temperature index mass balance types added that are working with the Huss flowlines
"""
import numpy as np
from scipy.interpolate import interp1d
import pandas as pd
import xarray as xr
import os
import netCDF4
import datetime
import warnings
import scipy.stats as stats
import logging
import copy
from scipy import optimize as optimization
# imports from oggm
from oggm import entity_task
from oggm import cfg, utils
from oggm.cfg import SEC_IN_YEAR, SEC_IN_MONTH, SEC_IN_DAY
from oggm.utils import (floatyear_to_date, date_to_floatyear, ncDataset,
lazy_property, monthly_timeseries,
clip_min, clip_array)
from oggm.utils._funcs import haversine
from oggm.utils._workflow import global_task
from oggm.exceptions import InvalidParamsError, InvalidWorkflowError
from oggm.shop.ecmwf import get_ecmwf_file, BASENAMES
from oggm.core.massbalance import MassBalanceModel
import MBsandbox
# Module logger
log = logging.getLogger(__name__)
ECMWF_SERVER = 'https://cluster.klima.uni-bremen.de/~oggm/climate/'
# only relevant when using
# server='https://cluster.klima.uni-bremen.de/~lschuster/'
# only glacier-relevant gridpoints included!
BASENAMES['ERA5_daily'] = {
'inv': 'era5/daily/v1.0/era5_glacier_invariant_flat.nc',
'tmp': 'era5/daily/v1.0/era5_daily_t2m_1979-2018_flat.nc'
}
BASENAMES['WFDE5_CRU_daily'] = {
'inv': 'wfde5_cru/daily/v1.1/wfde5_cru_glacier_invariant_flat.nc',
'tmp': 'wfde5_cru/daily/v1.1/wfde5_cru_tmp_1979-2018_flat.nc',
'prcp': 'wfde5_cru/daily/v1.1/wfde5_cru_prcp_1979-2018_flat.nc',
}
BASENAMES['W5E5_daily'] = {
'inv': 'w5e5v2.0/flattened/2023.2/daily/w5e5v2.0_glacier_invariant_flat.nc',
'tmp': 'w5e5v2.0/flattened/2023.2/daily/w5e5v2.0_tas_global_daily_flat_glaciers_1979_2019.nc',
'prcp': 'w5e5v2.0/flattened/2023.2/daily/w5e5v2.0_pr_global_daily_flat_glaciers_1979_2019.nc',
}
BASENAMES['MSWEP_daily'] = {
'prcp': 'mswepv2.8/flattened/daily/mswep_pr_global_daily_flat_glaciers_1979_2019.nc'
# there is no orography file for MSWEP!!! (and also no temperature file)
}
# this is for Sarah Hanus workflow
# this only works if
# ECMWF_SERVER = 'https://cluster.klima.uni-bremen.de/~oggm/shanus/'
BASENAMES['W5E5_daily_dw'] = {
'inv': 'ISIMIP3a/flattened/daily/gswp3-w5e5_obsclim_glacier_invariant_flat.nc',
'tmp': 'ISIMIP3a/flattened/daily/gswp3-w5e5_obsclim_tas_global_daily_flat_glaciers_1979_2019.nc',
'prcp': 'ISIMIP3a/flattened/daily/gswp3-w5e5_obsclim_pr_global_daily_flat_glaciers_1979_2019.nc',
}
def get_w5e5_file(dataset='W5E5_daily', var=None,
server='https://cluster.klima.uni-bremen.de/~lschuster/'):
"""returns a path to desired WFDE5_CRU or W5E5 or MSWEP
baseline climate file.
If the file is not present, downloads it
... copy of get_ecmwf_file (the only difference is that this function is more variable with a
server keyword-argument instead of using only ECMWF_SERVER),
if wished could be easily adapted to work in OGGM proper
dataset : str
e.g. 'W5E5_daily', 'WFDE5_CRU_daily', 'MSWEP_daily', 'W5E5_daily_dw', could define more BASENAMES
var : str
'inv' for invariant
'tmp' for temperature
'pre' for precipitation
server : str
path to climate files on the cluster
"""
if dataset == 'WFDE5_CRU_daily' or dataset == 'ERA5_daily':
print('Attention: these datasets are basically deprecated. There '
'is also an issue with the flattened files, i.e., some'
'glacier gridpoints are missing. If you really want to use'
'them, you need to do the flattening yourself. ')
# check if input makes sense
if dataset not in BASENAMES.keys():
raise InvalidParamsError('ECMWF dataset {} not '
'in {}'.format(dataset, BASENAMES.keys()))
if var not in BASENAMES[dataset].keys():
raise InvalidParamsError('ECMWF variable {} not '
'in {}'.format(var,
BASENAMES[dataset].keys()))
# File to look for
return utils.file_downloader(server + BASENAMES[dataset][var])
def write_climate_file(gdir, time, prcp, temp,
ref_pix_hgt, ref_pix_lon, ref_pix_lat,
ref_pix_lon_pr=None, ref_pix_lat_pr=None,
gradient=None, temp_std=None,
time_unit=None, calendar=None,
source=None, long_source=None,
file_name='climate_historical',
filesuffix='',
temporal_resol='monthly'):
"""Creates a netCDF4 file with climate data timeseries.
this could be used in general also in OGGM proper
Parameters
----------
gdir:
glacier directory
time : ndarray
the time array, in a format understood by netCDF4
prcp : ndarray
the precipitation array (unit: 'kg m-2')
temp : ndarray
the temperature array (unit: 'degC')
ref_pix_hgt : float
the elevation of the dataset's reference altitude
(for correction). In practice it is the same altitude as the
baseline climate (if MSWEP prcp used, only of temp. climate file).
ref_pix_lon : float
the location of the gridded data's grid point
(if MSWEP prcp used, only of temp. climate file)
ref_pix_lat : float
the location of the gridded data's grid point
(if MSWEP prcp used, only of temp. climate file)
ref_pix_lon_pr : float
default is None, only if MSWEP prcp used, it is the
location of the gridded prcp data grid point
ref_pix_lat_pr : float
default is None, only if MSWEP prcp used, it is the
location of the gridded prcp data grid point
gradient : ndarray, optional
whether to use a time varying gradient
temp_std : ndarray, optional
the daily standard deviation of temperature (useful for PyGEM)
time_unit : str
the reference time unit for your time array. This should be chosen
depending on the length of your data. The default is to choose
it ourselves based on the starting year.
calendar : str
If you use an exotic calendar (e.g. 'noleap')
source : str
the climate data source (required)
long_source : str
the climate data source describing origin of
temp, prpc and lapse rate in detail
file_name : str
How to name the file
filesuffix : str
Apply a suffix to the file
temporal_resol : str
temporal resolution of climate file, either monthly (default) or
daily
"""
if source == 'ERA5_daily' and filesuffix == '':
raise InvalidParamsError("filesuffix should be '_daily' for ERA5_daily"
"file_name climate_historical is normally"
"monthly data")
elif (source == 'WFDE5_CRU_daily' and filesuffix == ''
and temporal_resol == 'daily'):
raise InvalidParamsError("filesuffix should be '_daily' for WFDE5_CRU_daily"
"if daily chosen as temporal_resol"
"file_name climate_historical is normally"
"monthly data")
elif (source == 'W5E5_daily' and filesuffix == ''
and temporal_resol == 'daily'):
raise InvalidParamsError("filesuffix should be '_daily' for W5E5_daily"
"if daily chosen as temporal_resol"
"file_name climate_historical is normally"
"monthly data")
if long_source is None:
long_source = source
if 'MSWEP' in long_source:
prcp_from_mswep = True
else:
prcp_from_mswep = False
# overwrite is default
fpath = gdir.get_filepath(file_name, filesuffix=filesuffix)
if os.path.exists(fpath):
os.remove(fpath)
if source is None:
raise InvalidParamsError('`source` kwarg is required')
zlib = cfg.PARAMS['compress_climate_netcdf']
try:
y0 = time[0].year
y1 = time[-1].year
except AttributeError:
time = pd.DatetimeIndex(time)
y0 = time[0].year
y1 = time[-1].year
if time_unit is None:
# http://pandas.pydata.org/pandas-docs/stable/timeseries.html
# #timestamp-limitations
if y0 > 1800:
time_unit = 'days since 1801-01-01 00:00:00'
elif y0 >= 0:
time_unit = ('days since {:04d}-01-01 '
'00:00:00'.format(time[0].year))
else:
raise InvalidParamsError('Time format not supported')
with ncDataset(fpath, 'w', format='NETCDF4') as nc:
# these are only valid for temperature if MSWEP prcp is used!!!
nc.ref_hgt = ref_pix_hgt
nc.ref_pix_lon = ref_pix_lon
nc.ref_pix_lat = ref_pix_lat
nc.ref_pix_dis = haversine(gdir.cenlon, gdir.cenlat,
ref_pix_lon, ref_pix_lat)
if prcp_from_mswep:
# there is no reference height given!!!
if ref_pix_lon_pr is None or ref_pix_lat_pr is None:
raise InvalidParamsError('if MSWEP is used for prcp, need to add'
'precipitation lon/lat gridpoints')
nc.ref_pix_lon_pr = np.round(ref_pix_lon_pr,3)
nc.ref_pix_lat_pr = np.round(ref_pix_lat_pr,3)
nc.climate_source = long_source
if time[0].month == 1:
nc.hydro_yr_0 = y0
else:
nc.hydro_yr_0 = y0 + 1
nc.hydro_yr_1 = y1
nc.createDimension('time', None)
nc.author = 'OGGM'
nc.author_info = 'Open Global Glacier Model'
timev = nc.createVariable('time', 'i4', ('time',))
tatts = {'units': time_unit}
if calendar is None:
calendar = 'standard'
tatts['calendar'] = calendar
try:
numdate = netCDF4.date2num([t for t in time], time_unit,
calendar=calendar)
except TypeError:
# numpy's broken datetime only works for us precision
time = time.astype('M8[us]').astype(datetime.datetime)
numdate = netCDF4.date2num(time, time_unit, calendar=calendar)
timev.setncatts(tatts)
timev[:] = numdate
v = nc.createVariable('prcp', 'f4', ('time',), zlib=zlib)
v.units = 'kg m-2'
# this could be made more beautiful
# just rough estimate
if (len(prcp) > (nc.hydro_yr_1 - nc.hydro_yr_0 + 1) * 28 * 12 and
temporal_resol == 'daily'):
if source == 'ERA5_daily':
v.long_name = ("total daily precipitation amount, "
"assumed same for each day of month")
elif source == 'WFDE5_daily_cru':
v.long_name = ("total daily precipitation amount"
"sum of snowfall and rainfall")
elif source == 'W5E5_daily' and not prcp_from_mswep:
v.long_name = ("total daily precipitation amount")
elif source == 'W5E5_daily' and prcp_from_mswep:
v.long_name = ("total daily precipitation amount, "
"1979-01-01 prcp assumed to be equal to 1979-01-02"
"due to missing data, MSWEP prcp with 0.1deg resolution"
"(finer than temp. data), so refpixhgt, "
"refpixlon and refpixlat not valid for prcp data!!!")
elif (len(prcp) == (nc.hydro_yr_1 - nc.hydro_yr_0 + 1) * 12
and temporal_resol == 'monthly' and not prcp_from_mswep):
v.long_name = 'total monthly precipitation amount'
elif (len(prcp) == (nc.hydro_yr_1 - nc.hydro_yr_0 + 1) * 12
and temporal_resol == 'monthly' and prcp_from_mswep):
v.long_name = ("total monthly precipitation amount, MSWEP prcp with 0.1deg resolution"
"(finer than temp. data), so refpixhgt, "
"refpixlon and refpixlat not valid for prcp data!!!")
else:
raise InvalidParamsError('there is a conflict in the'
'prcp timeseries, '
'please check temporal_resol')
# just to check that it is in kg m-2 per day or per month and not in per second
assert prcp.max() > 1
v[:] = prcp
# check that there are no filling values inside
assert np.all(v[:].data < 1e5)
v = nc.createVariable('temp', 'f4', ('time',), zlib=zlib)
v.units = 'degC'
if ((source == 'ERA5_daily' or source == 'WFDE5_daily_cru' or source =='W5E5_daily') and
len(temp) > (y1 - y0) * 28 * 12 and temporal_resol == 'daily'):
v.long_name = '2m daily temperature at height ref_hgt'
elif source == 'ERA5_daily' and len(temp) <= (y1 - y0) * 30 * 12:
raise InvalidParamsError('if the climate dataset (here source)'
'is ERA5_daily, temperatures should be in'
'daily resolution, please check or set'
'set source to another climate dataset')
elif (source == 'WFDE5_daily_cru' and temporal_resol == 'monthly' and
len(temp) > (y1 - y0) * 28 * 12):
raise InvalidParamsError('something wrong in the implementation')
else:
v.long_name = '2m monthly temperature at height ref_hgt'
v[:] = temp
# no filling values!
assert np.all(v[:].data < 1e5)
if gradient is not None:
v = nc.createVariable('gradient', 'f4', ('time',), zlib=zlib)
v.units = 'degC m-1'
v.long_name = ('temperature gradient from local regression or'
'lapserates')
v[:] = gradient
# no filling values
assert np.all(v[:].data < 1e5)
if temp_std is not None:
v = nc.createVariable('temp_std', 'f4', ('time',), zlib=zlib)
v.units = 'degC'
v.long_name = 'standard deviation of daily temperatures'
v[:] = temp_std
# no filling values
assert np.all(v[:].data < 1e5)
@entity_task(log, writes=['climate_historical_daily'])
def process_w5e5_data(gdir, y0=None, y1=None, temporal_resol='daily',
climate_type=None,
output_filesuffix=None,
cluster=False):
"""
Processes and writes the WFDE5_CRU & W5E5 daily baseline climate data for a glacier.
Either on daily or on monthly basis. Can also use W5E5_MSWEP, where precipitation is taken
from MSWEP (with higher resolution, 0.1°) and temperature comes from W5E5 (so actually from ERA5).
In this case, the mean gridpoint altitude is only valid for the temperature gridpoint
(there is no orography for MSWEP, and it is different because of a finer resolution in MSWEP)
Extracts the nearest timeseries and writes everything to a NetCDF file.
This uses only the WFDE5_CRU / W5E5 daily temperatures. The temperature lapse
rate are used from ERA5dr.
comment: This is similar to process_era5_daily, maybe a general function would be better
TODO: see _verified_download_helper no known hash for ...
----------
y0 : int
the starting year of the timeseries to write. The default is to take
the entire time period available in the file, but with this kwarg
you can shorten it (to save space or to crop bad data)
y1 : int
the starting year of the timeseries to write. The default is to take
the entire time period available in the file, but with this kwarg
you can shorten it (to save space or to crop bad data)
temporal_resol : str
uses either daily (default) or monthly data
climate_type: str
either WFDE5_CRU (default, v1.1 only till end of 2018) or W5E5 (end of 2019)
or W5E5_MSWEP (precipitation from MSWEP, temp. from W5E5)
output_filesuffix : optional
None by default, as the output_filesuffix is automatically chosen
from the temporal_resol and climate_type. But you can change the filesuffix here,
just make sure that you use then later the right climate file
cluster : bool
default is False, if this is run on the cluster, set it to True,
because we do not need to download the files
todo: this logic does not make anymore sense as there are other ways to prevent the cluster from downloading
stuff -> probably I can remove cluster = True entirely ?!
"""
if cfg.PARAMS['hydro_month_nh'] != 1 and climate_type != 'WFDE5_CRU':
raise InvalidParamsError('Hydro months different than 1 are not tested, there is some'
'issue with the lapse rates, as they only go until 2019-05'
'if you want other hydro months, need to adapt the code!!!')
if climate_type == 'WFDE5_CRU':
if temporal_resol == 'monthly':
output_filesuffix_def = '_monthly_WFDE5_CRU'
elif temporal_resol == 'daily':
output_filesuffix_def = '_daily_WFDE5_CRU'
# basename of climate
# (we use for both the daily dataset and resample to monthly)
dataset = 'WFDE5_CRU_daily'
dataset_prcp = dataset
elif climate_type == 'W5E5':
if temporal_resol == 'monthly':
output_filesuffix_def = '_monthly_W5E5'
elif temporal_resol == 'daily':
output_filesuffix_def = '_daily_W5E5'
# basename of climate
# (for both the daily dataset and resample to monthly)
dataset = 'W5E5_daily'
dataset_prcp = dataset
elif climate_type == 'W5E5_dw':
output_filesuffix_def = '_daily_W5E5_dw'
dataset = 'W5E5_daily_dw'
dataset_prcp = dataset
elif climate_type =='W5E5_MSWEP':
if temporal_resol == 'monthly':
output_filesuffix_def = '_monthly_W5E5_MSWEP'
elif temporal_resol == 'daily':
output_filesuffix_def = '_daily_W5E5_MSWEP'
# basename of climate
# (for both the daily dataset and resample to monthly)
dataset = 'W5E5_daily'
# precipitation from MSWEP!!!
dataset_prcp = 'MSWEP_daily'
else:
raise NotImplementedError('climate_type can either be WFDE5_CRU or W5E5 and '
'temporal_resol either monthly or daily!')
if output_filesuffix is None:
# set the default output_filesuffix
output_filesuffix = output_filesuffix_def
else:
# use the user-given output-filesufix
pass
# wfde5_daily for temperature and precipitation
# but need temperature lapse rates from ERA5
dataset_othervars = 'ERA5dr'
# get the central longitude/latitudes of the glacier
lon = gdir.cenlon + 360 if gdir.cenlon < 0 else gdir.cenlon
lat = gdir.cenlat
# todo: this logic should be removed
if cluster:
cluster_path = '/home/www/lschuster/'
path_tmp = cluster_path + BASENAMES[dataset]['tmp']
path_prcp = cluster_path + BASENAMES[dataset_prcp]['prcp']
path_inv = cluster_path + BASENAMES[dataset]['inv']
else:
if climate_type != 'W5E5_dw':
path_tmp = get_w5e5_file(dataset, 'tmp')
path_prcp = get_w5e5_file(dataset_prcp, 'prcp')
path_inv = get_w5e5_file(dataset, 'inv')
elif climate_type == 'W5E5_dw':
path_tmp = get_w5e5_file(dataset, 'tmp', server='https://cluster.klima.uni-bremen.de/~shanus/')
path_prcp = get_w5e5_file(dataset_prcp, 'prcp', server='https://cluster.klima.uni-bremen.de/~shanus/')
path_inv = get_w5e5_file(dataset, 'inv', server='https://cluster.klima.uni-bremen.de/~shanus/')
# Use xarray to read the data
# todo: would go faster with only netCDF -.-, but easier with xarray
# first temperature dataset
with xr.open_dataset(path_tmp) as ds:
assert ds.longitude.min() >= 0
# set temporal subset for the ts data (hydro years)
if gdir.hemisphere == 'nh':
sm = cfg.PARAMS['hydro_month_nh']
elif gdir.hemisphere == 'sh':
sm = cfg.PARAMS['hydro_month_sh']
em = sm - 1 if (sm > 1) else 12
yrs = ds['time.year'].data
y0 = yrs[0] if y0 is None else y0
y1 = yrs[-1] if y1 is None else y1
if climate_type == 'WFDE5_CRU':
# old version of WFDE5_CRU that only goes till 2018
if y1 > 2018 or y0 < 1979:
text = 'The climate files only go from 1979--2018,\
choose another y0 and y1'
raise InvalidParamsError(text)
elif climate_type[:4] == 'W5E5':
if y1 > 2019 or y0 < 1979:
text = 'The climate files only go from 1979 --2019, something is wrong'
raise InvalidParamsError(text)
# if default settings: this is the last day in March or September
time_f = '{}-{:02d}'.format(y1, em)
end_day = int(ds.sel(time=time_f).time.dt.daysinmonth[-1].values)
# this was tested also for hydro_month = 1
ds = ds.sel(time=slice('{}-{:02d}-01'.format(y0, sm),
'{}-{:02d}-{}'.format(y1, em, end_day)))
if sm == 1 and y1 == 2019 and (climate_type[:4] == 'W5E5'):
days_in_month = ds['time.daysinmonth'].copy()
try:
# computing all the distances and choose the nearest gridpoint
c = (ds.longitude - lon)**2 + (ds.latitude - lat)**2
ds = ds.isel(points=c.argmin())
# I turned this around
except ValueError:
ds = ds.sel(longitude=lon, latitude=lat, method='nearest')
# normally if I do the flattening, this here should not occur
# if we want to use monthly mean temperatures and
# standard deviation of daily temperature:
Tvar = 'Tair'
Pvar = 'tp'
if climate_type[:4] == 'W5E5':
Tvar = 'tas'
Pvar = 'pr'
if temporal_resol == 'monthly':
Tair_std = ds.resample(time='MS').std()[Tvar]
temp_std = Tair_std.data
ds = ds.resample(time='MS').mean()
ds['longitude'] = ds.longitude.isel(time=0)
ds['latitude'] = ds.latitude.isel(time=0)
elif temporal_resol == 'daily':
temp_std = None
else:
raise InvalidParamsError('temporal_resol can only be monthly'
'or daily!')
# temperature should be in degree Celsius for the glacier climate files
temp = ds[Tvar].data - 273.15
time = ds.time.data
ref_lon = float(ds['longitude'])
ref_lat = float(ds['latitude'])
ref_lon = ref_lon - 360 if ref_lon > 180 else ref_lon
# precipitation: similar as temperature
with xr.open_dataset(path_prcp) as ds:
assert ds.longitude.min() >= 0
yrs = ds['time.year'].data
y0 = yrs[0] if y0 is None else y0
y1 = yrs[-1] if y1 is None else y1
# Attention here we take the same y0 and y1 as given from the
# daily tmp dataset (goes till end of 2018, or end of 2019)
# attention if daily data, need endday!!!
if climate_type == 'W5E5_MSWEP' and y0 == 1979 and sm == 1:
# first day of 1979 is missing!!! (will assume later that it is equal to the
# median of January daily prcp ...)
ds = ds.sel(time=slice('{}-{:02d}-02'.format(y0, sm),
'{}-{:02d}-{}'.format(y1, em, end_day)))
else:
ds = ds.sel(time=slice('{}-{:02d}-01'.format(y0, sm),
'{}-{:02d}-{}'.format(y1, em, end_day)))
try:
# ... prcp is also flattened
# in case of W5E5_MSWEP this will be another gridpoint than for temperature
# but normally it should work
c = (ds.longitude - lon)**2 + (ds.latitude - lat)**2
ds = ds.isel(points=c.argmin())
except ValueError:
# this should not occur
ds = ds.sel(longitude=lon, latitude=lat, method='nearest')
if y0 == 1979 and sm == 1 and climate_type == 'W5E5_MSWEP':
median_jan_1979 = np.median(ds.sel(time='1979-01').pr.values) * SEC_IN_DAY
# if we want to use monthly summed up precipitation:
if temporal_resol == 'monthly':
ds = ds.resample(time='MS').sum()
ds['longitude'] = ds.longitude.isel(time=0)
ds['latitude'] = ds.latitude.isel(time=0)
elif temporal_resol == 'daily':
pass
if climate_type == 'WFDE5_CRU':
# the prcp data of wfde5_CRU has been converted already into
# kg m-2 day-1 ~ mm/day or into kg m-2 month-1 ~ mm/month
prcp = ds[Pvar].data # * 1000
elif climate_type[:4] == 'W5E5':
# if daily: convert kg m-2 s-1 into kg m-2 day-1
# if monthly: convert monthly sum of kg m-2 s-1 into kg m-2 month-1
prcp = ds[Pvar].data * SEC_IN_DAY
if climate_type == 'W5E5_MSWEP':
# 1st day of January 1979 is missing for MSWEP prcp
# assume that precipitation is equal to
# median of January daily prcp
if y0 == 1979 and sm == 1:
if temporal_resol == 'daily':
prcp = np.append(np.array(median_jan_1979), prcp)
elif temporal_resol == 'monthly':
# in the monthly sum, 1st of January 1979 is missing
# add this value by assuming mean over the other days
prcp[0] = prcp[0] + median_jan_1979
if y1 == 2019:
if temporal_resol == 'daily':
assert len(prcp) == 14975 # 41 years * amount of days
elif temporal_resol == 'monthly':
assert len(prcp) == 492 # 41 years * amount of months
ref_lon_pr = float(ds['longitude'])
ref_lat_pr = float(ds['latitude'])
ref_lon_pr = ref_lon_pr - 360 if ref_lon_pr > 180 else ref_lon_pr
# wfde5/w5e5 invariant file
# (gridpoint altitude only valid for temperature
# in case of 'W5E5_MSWEP')
with xr.open_dataset(path_inv) as ds:
assert ds.longitude.min() >= 0
ds = ds.isel(time=0)
try:
# Flattened wfde5_inv (only possibility at the moment)
c = (ds.longitude - lon)**2 + (ds.latitude - lat)**2
ds = ds.isel(points=c.argmin())
except ValueError:
# this should not occur
ds = ds.sel(longitude=lon, latitude=lat, method='nearest')
# wfde5 inv ASurf/hgt is already in hgt coordinates
# G = cfg.G # 9.80665
hgt = ds['ASurf'].data # / G
# here we need to use the ERA5dr data ...
# there are no lapse rates from wfde5/W5E5 !!!
# TODO: use updated ERA5dr files that go until end of 2019 and update the code accordingly !!!
path_lapserates = get_ecmwf_file(dataset_othervars, 'lapserates')
with xr.open_dataset(path_lapserates) as ds:
assert ds.longitude.min() >= 0
yrs = ds['time.year'].data
y0 = yrs[0] if y0 is None else y0
y1 = yrs[-1] if y1 is None else y1
# Attention here we take the same y0 and y1 as given from the
# daily tmp dataset (goes till end of 2018/2019)
ds = ds.sel(time=slice('{}-{:02d}-01'.format(y0, sm),
'{}-{:02d}-01'.format(y1, em)))
# no flattening done for the ERA5dr gradient dataset
ds = ds.sel(longitude=lon, latitude=lat, method='nearest')
if sm == 1 and y1 == 2019 and (climate_type[:4] == 'W5E5'):
# missing some months of ERA5dr (which only goes till middle of 2019)
# otherwise it will fill it with large numbers ...
ds = ds.sel(time=slice('{}-{:02d}-01'.format(y0, sm), '2018-{:02d}-01'.format(em)))
mean_grad = ds.groupby('time.month').mean().lapserate
# fill the last year with mean gradients
gradient = np.concatenate((ds['lapserate'].data, mean_grad.values), axis=None)
else:
# get the monthly gradient values
gradient = ds['lapserate'].data
if temporal_resol == 'monthly':
pass
elif temporal_resol == 'daily':
# gradient needs to be restructured to have values for each day
# when daily resolution is applied
# assume same gradient for each day
if sm == 1 and y1 == 2019 and (climate_type[:4] == 'W5E5'):
gradient = np.repeat(gradient, days_in_month.resample(time='MS').mean())
assert len(gradient) == len(days_in_month)
else:
gradient = np.repeat(gradient, ds['time.daysinmonth'])
long_source = 'temp: {}, prcp: {}, lapse rate: {}'.format(dataset,
dataset_prcp,
dataset_othervars)
if climate_type == 'W5E5_MSWEP':
ref_pix_lon_pr = ref_lon_pr
ref_pix_lat_pr = ref_lat_pr
else:
# if not MSWEP prcp and temp gridpoints should be the same ones!!!
np.testing.assert_allclose(ref_lon_pr, ref_lon)
np.testing.assert_allclose(ref_lat_pr, ref_lat)
ref_pix_lon_pr = None
ref_pix_lat_pr = None
# OK, ready to write
write_climate_file(gdir, time, prcp, temp, hgt, ref_lon, ref_lat,
ref_pix_lon_pr=ref_pix_lon_pr, ref_pix_lat_pr=ref_pix_lat_pr,
filesuffix=output_filesuffix,
temporal_resol=temporal_resol,
gradient=gradient,
temp_std=temp_std,
source=dataset,
long_source=long_source,
file_name='climate_historical')
@entity_task(log, writes=['climate_historical_daily'])
def process_era5_daily_data(gdir, y0=None, y1=None, output_filesuffix='_daily_ERA5',
cluster=False):
"""Processes and writes the era5 daily baseline climate data for a glacier.
into climate_historical_daily.nc
Extracts the nearest timeseries and writes everything to a NetCDF file.
This uses only the ERA5 daily temperatures. The precipitation, lapse
rate and standard deviations are used from ERA5dr.
comment: this is now a new function, which could also be adapted for other climates, but
it seemed to be easier to make a new function for a new climate dataset (such as process_w5e5_data)
TODO: see _verified_download_helper no known hash for
era5_daily_t2m_1979-2018_flat.nc and era5_glacier_invariant_flat
----------
y0 : int
the starting year of the timeseries to write. The default is to take
the entire time period available in the file, but with this kwarg
you can shorten it (to save space or to crop bad data)
y1 : int
the starting year of the timeseries to write. The default is to take
the entire time period available in the file, but with this kwarg
you can shorten it (to save space or to crop bad data)
output_filesuffix : str
this add a suffix to the output file (useful to avoid overwriting
previous experiments)
cluster : bool
default is False, if this is run on the cluster, set it to True,
because we do not need to download the files
todo: this logic does not make anymore sense as there are other
ways to prevent the cluster from downloading stuff
-> probably I can remove cluster = True entirely ?!
"""
# era5daily only for temperature
dataset = 'ERA5_daily'
# for the other variables use the data of ERA5dr
dataset_othervars = 'ERA5dr'
# get the central longitude/latidudes of the glacier
lon = gdir.cenlon + 360 if gdir.cenlon < 0 else gdir.cenlon
lat = gdir.cenlat
cluster_path = '/home/www/oggm/climate/'
if cluster:
path = cluster_path + BASENAMES[dataset]['tmp']
else:
path = get_ecmwf_file(dataset, 'tmp')
# Use xarray to read the data
# would go faster with netCDF -.-
with xr.open_dataset(path) as ds:
assert ds.longitude.min() >= 0
# set temporal subset for the ts data (hydro years)
if gdir.hemisphere == 'nh':
sm = cfg.PARAMS['hydro_month_nh']
elif gdir.hemisphere == 'sh':
sm = cfg.PARAMS['hydro_month_sh']
em = sm - 1 if (sm > 1) else 12
yrs = ds['time.year'].data
y0 = yrs[0] if y0 is None else y0
y1 = yrs[-1] if y1 is None else y1
if y1 > 2018 or y0 < 1979:
text = 'The climate files only go from 1979--2018,\
choose another y0 and y1'
raise InvalidParamsError(text)
# if default settings: this is the last day in March or September
time_f = '{}-{:02d}'.format(y1, em)
end_day = int(ds.sel(time=time_f).time.dt.daysinmonth[-1].values)
# this was tested also for hydro_month = 1
ds = ds.sel(time=slice('{}-{:02d}-01'.format(y0, sm),
'{}-{:02d}-{}'.format(y1, em, end_day)))
try:
# computing all the distances and choose the nearest gridpoint
c = (ds.longitude - lon)**2 + (ds.latitude - lat)**2
ds = ds.isel(points=c.argmin())
# I turned this around
except ValueError:
ds = ds.sel(longitude=lon, latitude=lat, method='nearest')
# normally if I do the flattening, this here should not occur
# temperature should be in degree Celsius for the glacier climate files
temp = ds['t2m'].data - 273.15
time = ds.time.data
ref_lon = float(ds['longitude'])
ref_lat = float(ds['latitude'])
ref_lon = ref_lon - 360 if ref_lon > 180 else ref_lon
# pre should be done as in ERA5dr datasets
with xr.open_dataset(get_ecmwf_file(dataset_othervars, 'pre')) as ds:
assert ds.longitude.min() >= 0
yrs = ds['time.year'].data
y0 = yrs[0] if y0 is None else y0
y1 = yrs[-1] if y1 is None else y1
# Attention here we take the same y0 and y1 as given from the
# daily tmp dataset (goes till end of 2018)
ds = ds.sel(time=slice('{}-{:02d}-01'.format(y0, sm),
'{}-{:02d}-01'.format(y1, em)))
try:
# prcp is not flattened, so this here should work normally
ds = ds.sel(longitude=lon, latitude=lat, method='nearest')
except ValueError:
# if Flattened ERA5_precipitation?
c = (ds.longitude - lon)**2 + (ds.latitude - lat)**2
ds = ds.isel(points=c.argmin())
# the prcp dataset needs to be restructured to have values for each day
prcp = ds['tp'].data * 1000
# just assume that precipitation is every day the same:
prcp = np.repeat(prcp, ds['time.daysinmonth'])
# Attention the unit is now prcp per day
# (not per month as in OGGM default:
# prcp = ds['tp'].data * 1000 * ds['time.daysinmonth']
if cluster:
path_inv = cluster_path + BASENAMES[dataset]['inv']
else:
path_inv = get_ecmwf_file(dataset, 'inv')
with xr.open_dataset(path_inv) as ds:
assert ds.longitude.min() >= 0
ds = ds.isel(time=0)
try:
# Flattened ERA5_invariant (only possibility at the moment)
c = (ds.longitude - lon)**2 + (ds.latitude - lat)**2
ds = ds.isel(points=c.argmin())
except ValueError:
# this should not occur
ds = ds.sel(longitude=lon, latitude=lat, method='nearest')
G = cfg.G # 9.80665
hgt = ds['z'].data / G
temp_std = None
path_lapserates = get_ecmwf_file(dataset_othervars, 'lapserates')
with xr.open_dataset(path_lapserates) as ds:
assert ds.longitude.min() >= 0
yrs = ds['time.year'].data
y0 = yrs[0] if y0 is None else y0
y1 = yrs[-1] if y1 is None else y1
# Attention here we take the same y0 and y1 as given from the
# daily tmp dataset
ds = ds.sel(time=slice('{}-{:02d}-01'.format(y0, sm),
'{}-{:02d}-01'.format(y1, em)))
# no flattening done for the ERA5dr gradient dataset
ds = ds.sel(longitude=lon, latitude=lat, method='nearest')
# get the monthly gradient values
gradient = ds['lapserate'].data
# gradient needs to be restructured to have values for each day
gradient = np.repeat(gradient, ds['time.daysinmonth'])
# assume same gradient for each day
# OK, ready to write
write_climate_file(gdir, time, prcp, temp, hgt, ref_lon, ref_lat,
filesuffix=output_filesuffix,
temporal_resol='daily',
gradient=gradient,
temp_std=temp_std,
source=dataset,
file_name='climate_historical')
class TIModel_Parent(MassBalanceModel):
""" Parent class that works for different temperature-index models, this is only instanciated
via the child classes TIModel or TIModel_Sfc_Type. It is just a container with shared code
to get annual, monthly and daily climate. The actual mass balance can only be computed in child classes as
there the methods between using surface type and not using sfc type differ.
Different mass balance modules compatible to OGGM with one flowline,
so far this is only tested for the elevation-band flowlines
"""
def __init__(self, gdir, melt_f, prcp_fac=2.5, residual=0,
mb_type='mb_pseudo_daily', N=100, loop=False,
temp_std_const_from_hist = False,
grad_type='cte', filename='climate_historical',
repeat=False, ys=None, ye=None,
t_solid=0, t_liq=2, t_melt=0,
default_grad=-0.0065,
temp_local_gradient_bounds=[-0.009, -0.003],
SEC_IN_YEAR=SEC_IN_YEAR,
SEC_IN_MONTH=SEC_IN_MONTH,
SEC_IN_DAY=SEC_IN_DAY,
baseline_climate=None,
input_filesuffix='default',
input_filesuffix_h_for_temp_std = '_monthly_W5E5'
):
""" Initialize.
Parameters
----------
gdir : GlacierDirectory
the glacier directory
melt_f : float
melt temperature sensitivity factor per month (kg /m² /mth /K),
need to be prescribed, e.g. such that
|mean(MODEL_MB)-mean(REF_MB)|--> 0
prcp_fac : float
multiplicative precipitation factor, has to be calibrated for each option and
each climate dataset, default is 2.5,
residual : float, optional
default is to use a residual of zero [mm we yr-1 ~ kg/m2/mth]
Note that this residual is *substracted* from the computed MB.
do not change this unless you know what you do!
Indeed: residual = MODEL_MB - REFERENCE_MB.
ToDO: maybe change the sign?,
opposite to OGGM "MB terms + residual"
mb_type: str
three types: 'mb_pseudo_daily' (default: use temp_std and N percentiles),
'mb_monthly' (same as default OGGM PastMassBalance),
'mb_real_daily' (use daily temperature values).
the mb_type only work if the baseline_climate of gdir is right
N : int
number of percentiles used to generate gaussian-like daily
temperatures from daily std and mean monthly temp (default is N=100)
loop : bool
the way how the matrix multiplication is done,
using np.matmul or a loop(default: False)
only applied if mb_type is 'mb_pseudo_daily'
todo: can actually remove loop=True, because this is not anymore used!
(loop=True is deprecated)
grad_type : str
three types of applying the temperature gradient:
'cte' (default, constant lapse rate, set to default_grad,
same as in default OGGM)
'var_an_cycle' (varies spatially and over annual cycle,
but constant over the years)
'var' (varies spatially & temporally as in the climate files, deprecated!)
temp_std_const_from_hist : boolean, optional
whether a variable temp_std is used or one that is constant but just change
for each month of the year (avg. over calib period 2000-2019). Only is used
in combination with mb_type='mb_pseudo_daily'. If set to True, mb_pseudo_daily
is more similar to the approach inside of GloGEM, GloGEMFlow ...
filename : str, optional
set it to a different BASENAME if you want to use alternative climate
data, default is 'climate_historical'
repeat : bool
Whether the climate period given by [ys, ye] should be repeated
indefinitely in a circular way (default is False)
todo: add when this is used
ys : int
The start of the climate period where the MB model is valid
(default: the period with available data)
ye : int
The end of the climate period where the MB model is valid
(default: the period with available data)
t_solid : float
temperature threshold for solid precipitation
(degree Celsius, default 0)
t_liq: float
temperature threshold for liquid precipitation
(degree Celsius, default 2)
t_melt : float
temperature threshold where snow/ice melts
(degree Celsius, default 0)
default_grad : float,
constant lapse rate (temperature gradient, default: -0.0065 m/K)
if grad_type != cte, then this value is not used
but instead the changing lapse rate from the climate datasets
temp_local_gradient_bounds : [float, float],
if grad_type != cte and the estimated lapse rate (from ERA5)
does not lie in this range,
set gradient instead to these minimum, maximum gradients
(default: [-0.009, -0.003] m/K)
SEC_IN_YEAR: float
seconds in a year (default: 31536000s),
comment: maybe this could be changed
SEC_IN_MONTH: float
seconds in a month (default: 2628000s),
comment: maybe this could be changed as not each
month has the same amount of seconds,
in February can be a difference of 8%
baseline_climate: str
climate that should be applied, e.g. ERA5dr, WFDE5_CRU, W5E5
input_filesuffix: str
if set to 'default', it is set depending on mb_type and
baseline_climate, but can change this here,
e.g. change it to '' to work without filesuffix as
default in oggm PastMassBalance
input_filesuffix_h_for_temp_std : str
historical climate_historical filesuffix which was used for calibration,
only used if filename is not
climate_historical and if mb_type = 'mb_pseudo_daily' and if
temp_std_const_from_hist is True. It is then necessary to get the 12 monthly
daily std from the historical climate file.
Attributes
----------
temp_bias : float,
Add a temperature bias to the time series (default is 0)
todo: maybe also add it as keyword argument?
prcp_fac : float, >0
multiplicative precipitation correction factor (default 2.5)
melt_f : float, >0