-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathselection.py
128 lines (99 loc) · 4.31 KB
/
selection.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
# -*- coding: utf-8 -*-
import numpy as np
import queue
from reg import RegModel
def get_graph_indegrees(graph):
# compute the in-degree: indegrees[i] = np.sum(graph[:, i])
return np.count_nonzero(graph, axis=0) # equal to indegrees = np.sum(graph, axis=0)
def build_regression_model(pop, config):
"""
Args:
pop: population
pop_y: the evaluation of `pop`, the smallest the better
config: configuration
Todo:
pop_y is unused
"""
x, y = config["samples"]
config["reg_model"] = RegModel()
config["reg_model"].set_pop(pop)
config["reg_model"].fit(x, y)
def select(pop, config):
"""
Args:
pop: population
config: configuration
Return
The top n solutions in the `pop` predicted by model_class and model_reg
"""
graph = config["cmp_model"].predict_to_graph(pop)
build_regression_model(pop, config)
y_reg = config["reg_model"].predict(pop).reshape(-1)
# fix the grap
for i in range(len(pop)):
for j in range(i + 1, len(pop)):
if graph[i][j] == graph[j][i]:
tmp = (y_reg[i] < y_reg[j])
graph[i][j] = tmp
graph[j][i] = not tmp
return topological_sort(graph, y_reg, config)
def topological_sort(graph, y_reg, config):
"""topological sort variant
To get the rank of the solutions by pairwise relation. The basic idea is topological sort and greedy algorithm
Args:
graph: a adjacency matrix, graph[i, j] = 1 means as follow
1. solution i is better than solution j
2. there is a edge from i to j
y_reg: regression model prediction
Return:
The solution id of top `top_n` solutions
Note:
This method will modify the value of graph
"""
# compute the in-degree
indegrees = get_graph_indegrees(graph)
# REMOVED is a tag.
# If the in-degree of a vertex is REMOVED, it means that this vertex has been removed
# Setting REMOVED to an infinite number is to make it easier to find the vertex with a smaller in-degree when topological sorting encounter conflict in the later steps.
REMOVED = np.iinfo(graph.dtype).max
# init the queue
q = queue.Queue()
# topological sort
ret = []
while(len(ret) < config["pop_size"]):
if q.empty():
# When to use y_reg:
# 1. When the minimum in-degree of remaining vertices is relatively large, it indicates that there is a large error in the classification model
# 2. There are multiple minimum in-degree vertices
min_indegree = np.min(indegrees)
# n_remain_vertices is the number of remaining vertices
n_remain_vertices = np.sum(indegrees != REMOVED)
# threshold is hyperparameter, it is used to indicate when to use the second indicator
threshold = int(n_remain_vertices * 0.3)
candidate_vertices = np.argwhere(indegrees == min_indegree).reshape(-1)
if min_indegree >= threshold or candidate_vertices.size > 1:
# Select the vertex with the smallest y_reg value among the remaining vertices.
# If there are more than two vertices, the vertex with smallest id will be selected
y_mask = np.ma.array(y_reg, mask=(indegrees == REMOVED))
v = np.argmin(y_mask)
else:
v = candidate_vertices[0]
# remove all edge to v
graph[:, v] = 0
# update the in-degree of v
indegrees[v] = REMOVED
q.put(v)
v = q.get()
ret.append(v)
# v_neighbors is a vector that contains id of all neighboring vertices of v
v_neigbors = np.argwhere(graph[v] != 0).reshape(-1)
# remove all edge from v
graph[v] = 0
# update the in-degree of the neighbors of v
indegrees[v_neigbors] -= 1
for u in v_neigbors[indegrees[v_neigbors] == 0]:
# u is a neighbor of v, and its indegree is 0 (there is no vertices are better than it among the remaining vertices)
q.put(u)
# update the in-degree
indegrees[u] = REMOVED
return np.array(ret)