-
Notifications
You must be signed in to change notification settings - Fork 5
/
partition.cpp
144 lines (130 loc) · 6 KB
/
partition.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
#include <fstream>
#include <iostream>
#include <random>
#include "kmeans.h"
#include "metis_io.h"
#include "overlapping_partitioning.h"
#include "partitioning.h"
#include "points_io.h"
#include <parlay/primitives.h>
std::vector<int> BalancedKMeansCall(PointSet& points, int k, double eps) {
PointSet centroids = RandomSample(points, k, 555);
size_t max_cluster_size = points.n * (1.0 + eps) / k;
Timer timer;
timer.Start();
auto result = BalancedKMeans(points, centroids, max_cluster_size);
std::cout << "Balanced Kmeans took " << timer.Stop() << " seconds" << std::endl;
return result;
}
std::vector<int> FlatKMeansCall(PointSet& points, int k, double eps) {
PointSet centroids = RandomSample(points, k, 555);
return KMeans(points, centroids);
}
void PrintImbalance(std::vector<int>& partition, int k) {
auto histo = parlay::histogram_by_index(partition, k);
auto max_part_size = *parlay::max_element(histo);
std::cout << " max part size " << max_part_size << " " << partition.size() << " " << k << std::endl;
double imbalance = double(max_part_size) / (partition.size() / k);
std::cout << "imbalance " << imbalance << " max part size " << max_part_size << " perf balanced " << partition.size() / k << std::endl;
}
int main(int argc, const char* argv[]) {
if (argc != 6 && argc != 7) {
std::cerr << "Usage ./Partition input-points output-path num-clusters partitioning-method (default|strong) [overlap]" << std::endl;
std::abort();
}
std::string input_file = argv[1];
std::string output_file = argv[2];
std::string k_str = argv[3];
int k = std::stoi(k_str);
std::string part_method = argv[4];
std::string part_file = output_file + ".k=" + k_str + "." + part_method;
std::string config = argv[5];
bool strong = false;
if (config == "strong") {
strong = true;
} else if (config != "default") {
throw std::runtime_error("Unknown config: " + config);
}
double overlap = 0.0;
if (argc == 7) {
std::string overlap_str = argv[6];
overlap = std::stod(overlap_str);
part_file += ".o=" + overlap_str;
}
if (part_method == "Random") {
uint32_t n;
{
std::ifstream in(input_file, std::ios::binary);
in.read(reinterpret_cast<char*>(&n), sizeof(uint32_t));
}
std::vector<int> partition;
partition.reserve(n);
for (int b = 0; b < k; ++b) {
partition.insert(partition.end(), n / k, b);
}
std::mt19937 prng(555);
std::shuffle(partition.begin(), partition.end(), prng);
WriteMetisPartition(partition, part_file);
return 0;
}
PointSet points = ReadPoints(input_file);
std::cout << "Finished reading points" << std::endl;
if (part_method == "GP" && overlap != 0.0) {
part_method = "OGP";
}
const double eps = 0.05;
std::vector<int> partition;
Clusters clusters;
if (part_method == "GP") {
partition = GraphPartitioning(points, k, eps, strong);
} else if (part_method == "Pyramid") {
partition = PyramidPartitioning(points, k, eps, part_file + ".pyramid_routing_index");
} else if (part_method == "KMeans") {
partition = KMeansPartitioning(points, k, eps);
} else if (part_method == "BalancedKMeans") {
partition = BalancedKMeansCall(points, k, eps);
} else if (part_method == "FlatKMeans") {
partition = FlatKMeansCall(points, k, eps);
} else if (part_method == "RKM") {
const size_t max_cluster_size = (1.0 + eps) * points.n / k;
partition = RebalancingKMeansPartitioning(points, max_cluster_size, k);
} else if (part_method == "ORKM") {
const size_t max_cluster_size = (1.0 + eps) * points.n / k;
int adjusted_num_clusters = std::ceil(k * (1.0 + overlap));
auto rkm = RebalancingKMeansPartitioning(points, max_cluster_size, adjusted_num_clusters);
clusters = OverlappingKMeansPartitioningSPANN(points, rkm, k, eps, overlap);
} else if (part_method == "OurPyramid") {
partition = OurPyramidPartitioning(points, k, eps, part_file + ".our_pyramid_routing_index", 0.02);
} else if (part_method == "OGP") {
clusters = OverlappingGraphPartitioning(points, k, eps, overlap, strong);
} else if (part_method == "OGPS") {
const size_t max_cluster_size = (1.0 + eps) * points.n / k;
const size_t num_extra_assignments = overlap * points.n;
const size_t num_total_assignments = points.n + num_extra_assignments;
int adjusted_num_clusters = std::ceil(static_cast<double>(num_total_assignments) / max_cluster_size);
auto kmp = GraphPartitioning(points, adjusted_num_clusters, eps, false);
clusters = OverlappingKMeansPartitioningSPANN(points, kmp, k, eps, overlap);
} else if (part_method == "OKM") {
// leave the same num clusters, since k-means will use more than requested anyways
Timer timer;
timer.Start();
auto kmp = KMeansPartitioning(points, k, eps);
std::cout << "KM took " << timer.Stop() << " seconds" << std::endl;
clusters = OverlappingKMeansPartitioningSPANN(points, kmp, k, eps, overlap);
} else if (part_method == "OBKM") {
int adjusted_num_clusters = std::ceil(k * (1.0 + overlap));
// use adjusted num clusters for BKM call
auto bkm = BalancedKMeansCall(points, adjusted_num_clusters, eps);
// but use the original number for the overlap call, so that it chooses the correct max cluster size. The code can handle the case
// that NumPartsInPartition(bkm) != k
clusters = OverlappingKMeansPartitioningSPANN(points, bkm, k, eps, overlap);
} else {
std::cout << "Unsupported partitioning method " << part_method << " . The supported options are [GP, Pyramid, KMeans]" << std::endl;
std::abort();
}
std::cout << "Finished partitioning" << std::endl;
if (clusters.empty()) {
clusters = ConvertPartitionToClusters(partition);
}
WriteClusters(clusters, part_file);
}