|
2 | 2 | import pandas as pd
|
3 | 3 | import xarray as xr
|
4 | 4 | from matplotlib import pyplot as plt
|
| 5 | +from copy import deepcopy |
5 | 6 |
|
6 | 7 |
|
7 | 8 | def get_var_names(variable):
|
@@ -162,3 +163,53 @@ def prepped_array_to_df(data_array, dates, ids, col_names, spatial_idx_name='seg
|
162 | 163 | df_ids = pd.DataFrame(ids, columns=[spatial_idx_name])
|
163 | 164 | df = pd.concat([df_dates, df_ids, df_preds], axis=1)
|
164 | 165 | return df
|
| 166 | + |
| 167 | + |
| 168 | +def combine_preds(fileList,weights=None,pred_vars=None, outFile = "composite.feather", spatial_idx_name="seg_id_nat", time_idx_name="date"): |
| 169 | + """ |
| 170 | + combine multiple model outputs into 1 composite file |
| 171 | + :param fileList: [str] list of model prediction files |
| 172 | + :param weights: [list] list model weights corresponding to the list of model prediction files. This could be a list of |
| 173 | +dataframes with spatial_idx_name and / or time_idx_name columns and a modelWeight column or it could be a single value for |
| 174 | +each model (range of 0 - 1). If None, the models are weighted equally |
| 175 | + :param pred_vars: [str] list of predicted variables |
| 176 | + :param outFile: [str] feather file where the composite predictions should be written |
| 177 | + """ |
| 178 | + idx_cols = [spatial_idx_name, time_idx_name] |
| 179 | + |
| 180 | + for i in range(len(fileList)): |
| 181 | + thisFile = fileList[i] |
| 182 | + tempDF = pd.read_feather(thisFile) |
| 183 | + if not pred_vars: |
| 184 | + pred_vars = [x for x in tempDF.columns if x not in idx_cols] |
| 185 | + if weights: |
| 186 | + thisWeight = weights[i] |
| 187 | + if type(thisWeight)==pd.DataFrame: |
| 188 | + tempDF=tempDF.merge(thisWeight) |
| 189 | + else: |
| 190 | + tempDF['modelWeight']=float(thisWeight) |
| 191 | + else: |
| 192 | + tempDF['modelWeight']=1.0/len(fileList) |
| 193 | + |
| 194 | + #make the composite dataframe |
| 195 | + if thisFile==fileList[0]: |
| 196 | + compositeDF = tempDF.iloc[:,:-1] |
| 197 | + for thisVar in pred_vars: |
| 198 | + compositeDF[thisVar]=compositeDF[thisVar].values*tempDF.modelWeight.values |
| 199 | + #save the weights for this model to ensure they are 1 across all models |
| 200 | + weightCheckDF = deepcopy(tempDF[[spatial_idx_name, time_idx_name,'modelWeight']]) |
| 201 | + else: |
| 202 | + for thisVar in pred_vars: |
| 203 | + compositeDF[thisVar]=compositeDF[thisVar].values+tempDF[thisVar]*tempDF.modelWeight.values |
| 204 | + weightCheckDF['modelWeight']=weightCheckDF['modelWeight']+tempDF['modelWeight'] |
| 205 | + |
| 206 | + |
| 207 | + #check that all cummulative weights are less than 1.01 |
| 208 | + np.testing.assert_allclose(weightCheckDF.modelWeight, 1, rtol=1e-02, atol=1e-02, equal_nan=True, err_msg='Model weights did not sum to 1', verbose=True) |
| 209 | + |
| 210 | + #drop predicted variables that weren't merged |
| 211 | + colsToDrop = [x for x in compositeDF.columns if x not in pred_vars and x not in idx_cols] |
| 212 | + if len(colsToDrop)>0: |
| 213 | + compositeDF.drop(columns=colsToDrop,inplace=True) |
| 214 | + #save the output |
| 215 | + compositeDF.to_feather(outFile) |
0 commit comments