Skip to content
This repository was archived by the owner on Jun 2, 2023. It is now read-only.

Commit 23cf6a3

Browse files
Merge pull request #204 from janetrbarclay/composite_model
adds a function for making composite models
2 parents d97290d + e490a47 commit 23cf6a3

File tree

1 file changed

+51
-0
lines changed

1 file changed

+51
-0
lines changed

river_dl/postproc_utils.py

+51
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import pandas as pd
33
import xarray as xr
44
from matplotlib import pyplot as plt
5+
from copy import deepcopy
56

67

78
def get_var_names(variable):
@@ -162,3 +163,53 @@ def prepped_array_to_df(data_array, dates, ids, col_names, spatial_idx_name='seg
162163
df_ids = pd.DataFrame(ids, columns=[spatial_idx_name])
163164
df = pd.concat([df_dates, df_ids, df_preds], axis=1)
164165
return df
166+
167+
168+
def combine_preds(fileList,weights=None,pred_vars=None, outFile = "composite.feather", spatial_idx_name="seg_id_nat", time_idx_name="date"):
169+
"""
170+
combine multiple model outputs into 1 composite file
171+
:param fileList: [str] list of model prediction files
172+
:param weights: [list] list model weights corresponding to the list of model prediction files. This could be a list of
173+
dataframes with spatial_idx_name and / or time_idx_name columns and a modelWeight column or it could be a single value for
174+
each model (range of 0 - 1). If None, the models are weighted equally
175+
:param pred_vars: [str] list of predicted variables
176+
:param outFile: [str] feather file where the composite predictions should be written
177+
"""
178+
idx_cols = [spatial_idx_name, time_idx_name]
179+
180+
for i in range(len(fileList)):
181+
thisFile = fileList[i]
182+
tempDF = pd.read_feather(thisFile)
183+
if not pred_vars:
184+
pred_vars = [x for x in tempDF.columns if x not in idx_cols]
185+
if weights:
186+
thisWeight = weights[i]
187+
if type(thisWeight)==pd.DataFrame:
188+
tempDF=tempDF.merge(thisWeight)
189+
else:
190+
tempDF['modelWeight']=float(thisWeight)
191+
else:
192+
tempDF['modelWeight']=1.0/len(fileList)
193+
194+
#make the composite dataframe
195+
if thisFile==fileList[0]:
196+
compositeDF = tempDF.iloc[:,:-1]
197+
for thisVar in pred_vars:
198+
compositeDF[thisVar]=compositeDF[thisVar].values*tempDF.modelWeight.values
199+
#save the weights for this model to ensure they are 1 across all models
200+
weightCheckDF = deepcopy(tempDF[[spatial_idx_name, time_idx_name,'modelWeight']])
201+
else:
202+
for thisVar in pred_vars:
203+
compositeDF[thisVar]=compositeDF[thisVar].values+tempDF[thisVar]*tempDF.modelWeight.values
204+
weightCheckDF['modelWeight']=weightCheckDF['modelWeight']+tempDF['modelWeight']
205+
206+
207+
#check that all cummulative weights are less than 1.01
208+
np.testing.assert_allclose(weightCheckDF.modelWeight, 1, rtol=1e-02, atol=1e-02, equal_nan=True, err_msg='Model weights did not sum to 1', verbose=True)
209+
210+
#drop predicted variables that weren't merged
211+
colsToDrop = [x for x in compositeDF.columns if x not in pred_vars and x not in idx_cols]
212+
if len(colsToDrop)>0:
213+
compositeDF.drop(columns=colsToDrop,inplace=True)
214+
#save the output
215+
compositeDF.to_feather(outFile)

0 commit comments

Comments
 (0)