Skip to content

Commit 1bb6759

Browse files
author
Ubuntu
committedFeb 27, 2024·
fixing errors for ms compiler
1 parent ed00a4e commit 1bb6759

File tree

3 files changed

+59
-22
lines changed

3 files changed

+59
-22
lines changed
 

‎examples/CMakeLists.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ set(EXAMPLES "primes" "BFS" "word_counts" "tokens" "filter" "linefit"
1212
"knuth_morris_pratt" "huffman_tree" "decision_tree_c45" "karatsuba" "suffix_tree"
1313
"2d_linear_program" "box_kdtree" "radix_tree" "ray_trace" "hash_map" "oct_tree"
1414
"3d_range" "rectangle_intersection" "star_connectivity" "ldd_connectivity" "boruvka"
15-
"counting_sort")
15+
"counting_sort" "integer_sort")
1616

1717
function(add_example NAME)
1818
add_executable(${NAME} ${NAME}.cpp)

‎examples/counting_sort.h

+55-19
Original file line numberDiff line numberDiff line change
@@ -5,30 +5,53 @@
55
#include <parlay/sequence.h>
66
#include <parlay/primitives.h>
77

8-
template <typename InIt, typename OutIt, typename KeyIt>
9-
parlay::sequence<int>
10-
counting_sort(const InIt& begin, const InIt& end,
11-
OutIt out, const KeyIt& keys,
8+
// **************************************************************
9+
// Counting sort
10+
// A parallel version of counting sort. It breaks the input into
11+
// partitions and for each partition, in parallel, it counts how many
12+
// of each key there are. It then using scan to calculate the offsets
13+
// for each bucket in each partition, and does a final pass placing
14+
// all keys in their correct position.
15+
// **************************************************************
16+
17+
using counter_type = unsigned long;
18+
19+
// **************************************************************
20+
// Input:
21+
// begin and end iterators for the values to be rearranged
22+
// begin iterator for the output (value_type must be the same)
23+
// begin iterator for the keys (range must be same length as values)
24+
// num_buckets : number of buckets (should be smallish, e.g. 256)
25+
// Output:
26+
// Offsets within output of each key. Will be of length
27+
// num_buckets+1 since last entry will contain total size
28+
// (i.e. end-begin).
29+
// **************************************************************
30+
template <typename InIt, typename OutIt, typename KeyIt>
31+
parlay::sequence<counter_type>
32+
counting_sort(const InIt& begin, const InIt& end,
33+
OutIt out, const KeyIt& keys,
1234
long num_buckets) {
1335
long n = end - begin;
14-
long num_parts = n / (num_buckets * 64) + 1;
36+
if (n == 0) return parlay::sequence<counter_type>(1, 0);
37+
long num_parts = std::min(1000l, n / (num_buckets * 64) + 1);
1538
long part_size = (n - 1)/num_parts + 1;
1639

1740
// first count buckets within each partition
18-
auto counts = parlay::sequence<int>::uninitialized(num_buckets * num_parts);
41+
auto counts = parlay::sequence<counter_type>::uninitialized(num_buckets * num_parts);
1942
parlay::parallel_for(0, num_parts, [&] (long i) {
2043
long start = i * part_size;
2144
long end = std::min<long>(start + part_size, n);
22-
for (int j = 0; j < num_buckets; j++) counts[i*num_buckets + j] = 0;
23-
for (size_t j = start; j < end; j++) counts[i*num_buckets + keys[j]]++;
45+
for (long j = 0; j < num_buckets; j++) counts[i*num_buckets + j] = 0;
46+
for (long j = start; j < end; j++) counts[i*num_buckets + keys[j]]++;
2447
}, 1);
2548

2649
// transpose the counts if more than one part
27-
parlay::sequence<int> trans_counts;
28-
if (num_parts > 1) {
29-
trans_counts = parlay::sequence<int>::uninitialized(num_buckets * num_parts);
30-
parlay::parallel_for(0, num_buckets, [&] (long i) {
31-
for (size_t j = 0; j < num_parts; j++)
50+
parlay::sequence<counter_type> trans_counts;
51+
if (num_parts > 1) {
52+
trans_counts = parlay::sequence<counter_type>::uninitialized(num_buckets * num_parts);
53+
parlay::parallel_for(0, num_buckets, [&] (long i) {
54+
for (size_t j = 0; j < num_parts; j++)
3255
trans_counts[i* num_parts + j] = counts[j * num_buckets + i];}, 1);
3356
} else trans_counts = std::move(counts);
3457

@@ -39,19 +62,32 @@ counting_sort(const InIt& begin, const InIt& end,
3962
parlay::parallel_for(0, num_parts, [&] (long i) {
4063
long start = i * part_size;
4164
long end = std::min<long>(start + part_size, n);
42-
int local_offsets[num_buckets];
65+
parlay::sequence<counter_type> local_offsets(num_buckets);
4366

4467
// transpose back
45-
for (int j = 0; j < num_buckets; j++)
68+
for (long j = 0; j < num_buckets; j++)
4669
local_offsets[j] = trans_counts[num_parts * j + i];
4770

4871
// copy to output
49-
for (size_t j = start; j < end; j++) {
50-
int k = local_offsets[keys[j]]++;
72+
for (long j = start; j < end; j++) {
73+
counter_type k = local_offsets[keys[j]]++;
74+
// prefetching speeds up the code
75+
#if defined(__GNUC__) || defined(__clang__)
5176
__builtin_prefetch (((char*) &out[k]) + 64);
77+
#endif
5278
out[k] = begin[j];
5379
}}, 1);
5480

55-
return parlay::tabulate(num_buckets, [&] (long i) {
56-
return trans_counts[i * num_parts];});
81+
return parlay::tabulate(num_buckets+1, [&] (long i) {
82+
return (i == num_buckets) ? (counter_type) n : trans_counts[i * num_parts];});
83+
}
84+
85+
// A version that uses ranges as inputs and generates its own output sequence
86+
template <typename InRange, typename KeysRange>
87+
auto counting_sort(const InRange& in, const KeysRange& keys,
88+
long num_buckets) {
89+
auto out = parlay::sequence<typename InRange::value_type>::uninitialized(in.size());
90+
auto offsets = counting_sort(in.begin(), in.end(), out.begin(), keys.begin(),
91+
num_buckets);
92+
return std::pair(std::move(out), std::move(offsets));
5793
}

‎examples/samplesort.h

+3-2
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include <parlay/utilities.h>
1010

1111
#include "helper/heap_tree.h"
12+
#include "counting_sort.h"
1213

1314
// **************************************************************
1415
// Sample sort
@@ -29,7 +30,7 @@ void sample_sort_(Range in, Range out, Less less, int level=1) {
2930
long cutoff = 256;
3031
if (n <= cutoff || level > 2) {
3132
parlay::copy(in, out);
32-
std::stable_sort(out.begin(), out.end());
33+
std::sort(out.begin(), out.end());
3334
return;
3435
}
3536

@@ -58,7 +59,7 @@ void sample_sort_(Range in, Range out, Less less, int level=1) {
5859
return ss.find(in[i], less);});
5960

6061
// sort into the buckets
61-
auto [keys,offsets] = parlay::internal::count_sort(in, bucket_ids, num_buckets);
62+
auto [keys,offsets] = counting_sort(in, bucket_ids, num_buckets);
6263

6364
// now recursively sort each bucket
6465
parlay::parallel_for(0, num_buckets, [&, &keys = keys, &offsets = offsets] (long i) {

0 commit comments

Comments
 (0)
Please sign in to comment.