-
Notifications
You must be signed in to change notification settings - Fork 86
/
main.cpp
115 lines (97 loc) · 3.07 KB
/
main.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
#include <iostream>
#include <occa.hpp>
#include <occa/experimental.hpp>
//---[ Internal Tools ]-----------------
// Note: These headers are not officially supported
// Please don't rely on it outside of the occa examples
#include <occa/internal/utils/cli.hpp>
#include <occa/internal/utils/testing.hpp>
//======================================
occa::json parseArgs(int argc, const char **argv);
int main(int argc, const char **argv) {
occa::json args = parseArgs(argc, argv);
// OCCA keeps 1 device in the background at a time
// Rather than keeping the device object, we can rely on the background device
occa::setDevice((std::string) args["options/device"]);
int entries = 10;
float *a = new float[entries];
float *b = new float[entries];
float *ab = new float[entries];
for (int i = 0; i < entries; ++i) {
a[i] = i;
b[i] = 1 - i;
ab[i] = 0;
}
// Uses the background device
occa::memory o_a = occa::malloc<float>(entries, a);
occa::memory o_b = occa::malloc<float>(entries, b);
occa::memory o_ab = occa::malloc<float>(entries);
occa::scope scope({
// Capture variables
{"a", o_a},
{"b", o_b},
{"ab", o_ab}
}, {
// Props for the kernel are passed here
// For example, if we wanted to define a variable at compile-time:
// {"defines/MY_VALUE", value}
});
// JIT-compile a kernel given the for-loop definitions:
//
// for (int index = 0; index < entries; ++index; @tile(16, @outer, @inner) {
// <lambda>
// }
//
// We support 1D, 2D, and 3D loops which depend on how many arguments .tile(), .outer(), and .inner() take.
// Based on the loop types, outerIndex and/or innerIndex will be of types:
// 1 argument -> int
// 2 argument -> int2 (outerIndex.x, outerIndex.y)
// 3 argument -> int3 (outerIndex.x, outerIndex.y, outerIndex.z)
//
// For finer granularity, this could have been called using:
//
// .outer(entries / 2)
// .inner(2)
//
// which is useful when outer and inner dimensions don't match
occa::forLoop()
.tile({entries, 16})
.run(OCCA_FUNCTION(scope, [=](const int i) -> void {
ab[i] = a[i] + b[i];
}));
o_ab.copyTo(ab);
// Assert values
for (int i = 0; i < entries; ++i) {
std::cout << i << ": " << ab[i] << '\n';
}
for (int i = 0; i < entries; ++i) {
if (!occa::areBitwiseEqual(ab[i], a[i] + b[i])) {
throw 1;
}
}
// Free host memory
delete [] a;
delete [] b;
delete [] ab;
return 0;
}
occa::json parseArgs(int argc, const char **argv) {
occa::cli::parser parser;
parser
.withDescription(
"Example using occa::forLoop for inline kernels"
)
.addOption(
occa::cli::option('d', "device",
"Device properties (default: \"{mode: 'Serial'}\")")
.withArg()
.withDefaultValue("{mode: 'Serial'}")
)
.addOption(
occa::cli::option('v', "verbose",
"Compile kernels in verbose mode")
);
occa::json args = parser.parseArgs(argc, argv);
occa::settings()["kernel/verbose"] = args["options/verbose"];
return args;
}