-
Notifications
You must be signed in to change notification settings - Fork 4
/
main.py
132 lines (111 loc) · 5.17 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
"""Run the models."""
import os
import argparse
import logging
import models
import iss
def parse_args():
"""Read in model run arguments from bash command."""
parser = argparse.ArgumentParser()
parser.add_argument('--logging_level', required=False, type=str,
choices=['DEBUG', 'INFO', 'WARNING', 'ERROR',
'CRITICAL'], default='INFO',
help='Python logging module verbosity level')
args = parser.parse_args()
return args
def conduct_model_run(model_name_in_paper, ts_data, ts_subsampling,
subsample_blocks, num_days_subsample, num_days_high):
"""Conduct a model run using subsampled data.
Parameters:
-----------
model_name_in_paper (str) : which model to run. Either 'LP' or 'MILP',
corresponding to the models in the paper. The only difference
is that, in the 'MILP' model, baseload can only be installed
in units of 3GW and has a ramping constraint of 20%/hr.
ts_data (pandas DataFrame) : the time series to run the model across
(possibly after subsampling)
ts_subsampling (str or None) : how to subsample the time series data.
Either None (no subsampling, run across full time series),
'random' (random sampling of days or hours), 'clustering' (k-medoids
(clustering into days) or 'importance' (importance subsampling)
subsample_blocks (str) : the subsample blocks. If 'days', subsampling
is used to create a set of contiguous days. If 'hours', subsamples
are hours. 'Hours' are not allowed if ts_subsampling='clustering',
and has no effect if ts_subsampling=None.
num_days_subsample (int) : number of days in the subsample.
If subsample_blocks='hours', subsample length (in hours)
is 24*num_days_subsample
num_days_high (int) : number of "extreme" days with high cost to
subsample if ts_subsampling='importance'. Not used otherwise.
Returns:
--------
Nothing, but saves model outputs to CSV: 'summary_outputs.csv' and
many more in a directory called 'outputs'
"""
run_characteristics = {'ts_data': ts_data,
'baseload_integer': model_name_in_paper == 'MILP',
'baseload_ramping': model_name_in_paper == 'MILP'}
if ts_subsampling is None:
solved_model = iss.run_model(**run_characteristics)
elif ts_subsampling == 'random':
solved_model = iss.run_model_with_random_subsample(
**run_characteristics,
num_days_sample=num_days_subsample,
subsample_blocks=subsample_blocks
)
elif ts_subsampling == 'clustering':
if subsample_blocks == 'hours':
raise ValueError('Cluster subsample blocks must be days.')
solved_model = iss.run_model_with_clustered_subsample(
**run_characteristics,
num_days_sample=num_days_subsample
)
elif ts_subsampling == 'importance':
solved_model = iss.run_model_with_importance_subsample(
**run_characteristics,
num_days_sample=num_days_subsample,
num_days_high=num_days_high,
subsample_blocks=subsample_blocks
)
else:
raise ValueError('Invalid subsampling scheme')
# Save summary outputs and a directory of the full range of outputs
solved_model.get_summary_outputs().to_csv('summary_outputs.csv')
solved_model.to_csv('full_outputs')
def run_example():
"""Run an example application of importance subsampling.
The default settings are importance subsampling being applied to
the 'LP' model to estimate the optimal system design across 2017
using a 48-day subsample. The number of "extreme" days (n_d_e in
the paper) is 16. It's easy to customise this function to use
different subsample length, subsample scheme (e.g. random
subsampling or regular k-medoids representative days), and to
change to the 'MILP' model. See the docstring for conduct_model_run
above for more details.
"""
# Read in command line arguments and log run info
args = parse_args()
logging.basicConfig(
format='[%(asctime)s] %(levelname)s: %(message)s',
level=getattr(logging, args.logging_level),
datefmt='%Y-%m-%d,%H:%M:%S'
)
if os.path.exists('summary_outputs.csv'):
raise ValueError(
'Example script creates file `summary_outputs.csv`, but this '
'already exists. Delete or rename this file before continuing')
if os.path.exists('full_outputs'):
raise ValueError(
'Example script creates directory `full_outputs`, but this '
'already exists. Delete or rename it before continuing')
# Load the full time series that we will sample from
ts_data = models.load_time_series_data(model_name='6_region')
ts_data = ts_data.loc['2017']
conduct_model_run(model_name_in_paper='LP',
ts_data=ts_data,
ts_subsampling='importance',
subsample_blocks='days',
num_days_subsample=48,
num_days_high=16)
if __name__ == '__main__':
run_example()