-
Notifications
You must be signed in to change notification settings - Fork 26
/
trainer_test.py
109 lines (97 loc) · 4.31 KB
/
trainer_test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
# trainer_test.py
import threading
import pandas as pd
import sklearn
from kepler_model.train import load_class
from kepler_model.util.loader import default_train_output_pipeline
from kepler_model.util.train_types import PowerSourceMap, default_trainer_names
from tests.extractor_test import (
get_expected_power_columns,
get_extract_results,
node_info_column,
test_energy_source,
test_extractors,
)
from tests.isolator_test import get_isolate_results, test_isolators
test_trainer_names = default_trainer_names
pipeline_lock = threading.Lock()
def assert_train(trainer, data, energy_components):
trainer.print_log("assert train")
node_types = pd.unique(data[node_info_column])
for node_type in node_types:
node_type_str = int(node_type)
node_type_filtered_data = data[data[node_info_column] == node_type]
X_values = node_type_filtered_data[trainer.features].values
for component in energy_components:
try:
output = trainer.predict(node_type_str, component, X_values)
assert len(output) == len(X_values), f"length of predicted values != features ({len(output)}!={len(X_values)})"
except sklearn.exceptions.NotFittedError:
pass
def process(
node_level,
feature_group,
result,
trainer_names=test_trainer_names,
energy_source=test_energy_source,
power_columns=get_expected_power_columns(),
pipeline_name=default_train_output_pipeline,
):
energy_components = PowerSourceMap[energy_source]
train_items = []
for trainer_name in trainer_names:
trainer_class = load_class("trainer", trainer_name)
trainer = trainer_class(energy_components, feature_group, energy_source, node_level=node_level, pipeline_name=pipeline_name)
trainer.process(result, power_columns, pipeline_lock=pipeline_lock)
assert_train(trainer, result, energy_components)
train_items += [trainer.get_metadata()]
return pd.concat(train_items)
def process_all(
extractors=test_extractors,
isolators=test_isolators,
trainer_names=test_trainer_names,
energy_source=test_energy_source,
power_columns=get_expected_power_columns(),
pipeline_name=default_train_output_pipeline,
):
abs_train_list = []
dyn_train_list = []
for extractor in extractors:
extractor_name = extractor.__class__.__name__
extractor_results = get_extract_results(extractor_name, node_level=True)
for feature_group, result in extractor_results.items():
print("Extractor ", extractor_name)
metadata_df = process(
True, feature_group, result, trainer_names=trainer_names, energy_source=energy_source, power_columns=power_columns, pipeline_name=pipeline_name
)
metadata_df["extractor"] = extractor_name
metadata_df["feature_group"] = feature_group
abs_train_list += [metadata_df]
for isolator in isolators:
isolator_name = isolator.__class__.__name__
isolator_results = get_isolate_results(isolator_name, extractor_name)
for feature_group, result in isolator_results.items():
print("Isolator ", isolator_name)
metadata_df = process(
False,
feature_group,
result,
trainer_names=trainer_names,
energy_source=energy_source,
power_columns=power_columns,
pipeline_name=pipeline_name,
)
metadata_df["extractor"] = extractor_name
metadata_df["isolator"] = isolator_name
metadata_df["feature_group"] = feature_group
dyn_train_list += [metadata_df]
abs_train_df = pd.concat(abs_train_list)
dyn_train_df = pd.concat(dyn_train_list)
return abs_train_df, dyn_train_df
def test_trainer_process():
focus_columns = ["model_name", "mae"]
abs_train_df, dyn_train_df = process_all()
print("Node-level train results:")
print(abs_train_df.set_index(["extractor", "feature_group"])[focus_columns].sort_values(by=["mae"], ascending=True))
print("Container-level train results:")
print(dyn_train_df.set_index(["extractor", "isolator", "feature_group"])[focus_columns].sort_values(by=["mae"], ascending=True))