-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathexecute_dataset_analysis.py
133 lines (118 loc) · 4.91 KB
/
execute_dataset_analysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
from enum import Enum
from package.evaluation_functions import CLUSTER_CRIT_ALLOWED_FITNESSES
from package.main import run, e_scenarios
class e_datasets(Enum):
CAMPUS_BASIN = './datasets/CampusBasin/subtotals_dataset2.xlsx'
EQUATORIAL_MARGIN = './datasets/MargemEquatorial/subtotals_dataset2.xlsx'
TALARA_BASIN = './datasets/TalaraBasin/subtotals_dataset2.xlsx'
CARMOPOLIS_GROUPED = './datasets/CarmopolisGrouped/subtotals_dataset2.xlsx'
JEQUITINHONHA = './datasets/Jequitinhonha/subtotals_dataset2.xlsx'
MUCURI = './datasets/Mucuri/subtotals_dataset2.xlsx'
class e_meta_clustering_algorithms(Enum):
GA = 'ga'
PSO = 'pso'
WARD_P = 'ward_p'
RANDOM_GA = 'random_ga'
NONE = 'none'
class e_clustering_algorithms(Enum):
AGGLOMERATIVE = 'agglomerative'
KMEANS = 'kmeans'
AFFINITY_PROPAGATION = 'affinity-propagation'
PREFERENCES = {
e_scenarios.RAW: {
e_datasets.CAMPUS_BASIN: -650,
e_datasets.EQUATORIAL_MARGIN: -400,
e_datasets.TALARA_BASIN: -400,
e_datasets.CARMOPOLIS_GROUPED: -540,
e_datasets.JEQUITINHONHA: -470,
e_datasets.MUCURI: -5300
},
e_scenarios.COMPOSITIONAL_LOCALIZATIONAL: {
e_datasets.CAMPUS_BASIN: -1550,
e_datasets.EQUATORIAL_MARGIN: -900,
e_datasets.TALARA_BASIN: -750,
e_datasets.CARMOPOLIS_GROUPED: -1590,
e_datasets.JEQUITINHONHA: -900,
e_datasets.MUCURI: -7700
}
}
def run_experiment(args):
run(args=args)
if __name__ == '__main__':
database = 'section_5_1.db'
for algorithm in e_clustering_algorithms:
if algorithm != e_clustering_algorithms.KMEANS:
continue
for _ in range(200):
for dataset in e_datasets:
for scenario in e_scenarios:
input_args = [
dataset.value,
'1',
'--level', 'features_groups',
# '--num-gen', '0',
# '--pop-size', '0',
# '--perfect',
'--eval-rate', '1',
'--min-features', '50',
'--fitness-metric', 'silhouette_sklearn',
'--cluster-algorithm', f'{algorithm.value}',
'--db-file', f'{database}',
'--strategy', 'none',
# '--p_ward', '0',
'--preference', str(PREFERENCES[scenario][dataset]),
f'--scenario', scenario.name
]
run_experiment(input_args)
database = 'section_5_2.db'
for affinity, linkage in [('euclidean', 'complete'), ('euclidean', 'single'),
('manhattan', 'complete'), ('manhattan', 'single'),
('euclidean', 'ward')]:
for _ in range(10):
for scenario in e_scenarios:
for dataset in e_datasets:
input_args = [
dataset.value,
'1',
'--level', 'features_groups',
'--num-gen', '1000',
'--pop-size', '50',
'--perfect',
'--eval-rate', '0',
'--min-features', '2',
'--fitness-metric', 'silhouette_sklearn',
'--cluster-algorithm', 'agglomerative',
'--db-file', database,
'--strategy', 'ga',
# '--p_ward', '0',
# '--preference', str(PREFERENCES[scenario][dataset]),
f'--scenario', scenario.name,
'--max-gens-without-improvement', '200',
'--affinity', affinity,
'--linkage', linkage
]
run_experiment(input_args)
database = 'results_5_3.db'
for metric, _ in CLUSTER_CRIT_ALLOWED_FITNESSES:
for dataset in e_datasets:
for scenario in e_scenarios:
input_args = [
dataset.value,
'2',
'--level', 'features_groups',
'--num-gen', '1000',
'--pop-size', '50',
# '--perfect',
'--eval-rate', '0',
'--min-features', '2',
f'--fitness-metric', f'{metric}',
f'--cluster-algorithm', 'agglomerative',
f'--db-file', f'{database}',
'--strategy', 'ga',
# '--p_ward', '0',
# '--preference', '0',
f'--scenario', f'{scenario.name}',
'--max-gens-without-improvement', '200',
'--linkage', 'ward',
]
run_experiment(input_args)