This repository has been archived by the owner on Apr 28, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 211
/
tensordot.cc
109 lines (97 loc) · 3.96 KB
/
tensordot.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
/**
* Copyright (c) 2017-present, Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <iostream>
#include <string>
#include <vector>
#include <gflags/gflags.h>
#include <glog/logging.h>
#include <gtest/gtest.h>
#include "common.h"
#include "tc/aten/aten.h"
#include "tc/aten/aten_autotuner.h"
#include "tc/aten/aten_compiler.h"
#include "tc/autotuner/genetic_search.h"
#include "tc/core/check.h"
#include "tc/core/cpu/cpu_mapping_options.h"
#include "tc/core/cpu/cpu_tc_executor.h"
#include "tc/core/cuda/cuda_mapping_options.h"
#include "tc/core/cuda/cuda_tc_executor.h"
#include "tc/core/flags.h"
template <typename Backend>
void testOnBackend() {
// 1. Define and setup the TC compilation unit with CUDA memory
// management backed by ATen tensors.
std::string tc = R"TC(
def tensordot(float(N, C1, C2, H, W) I0,
float(N, C2, C3, H, W) I1) -> (O)
{
O(n, c1, c3, h, w) +=! I0(n, c1, r_c2, h, w) * I1(n, r_c2, c3, h, w)
}
)TC";
// 2. Allocate tensors with random data.
at::Tensor I0 = makeATenTensor<Backend>({16, 8, 16, 17, 25});
at::Tensor I1 = makeATenTensor<Backend>({16, 16, 2, 17, 25});
// 3. Run autotuning with evolutionary search starting from a naive option.
auto naiveOptions = Backend::MappingOptionsType::makeNaiveMappingOptions();
tc::aten::ATenAutotuner<Backend, tc::autotune::GeneticSearch>
geneticAutotuneATen(tc);
auto bestOption =
geneticAutotuneATen.tune("tensordot", {I0, I1}, {naiveOptions});
TC_CHECK_GT(bestOption.size(), 0u);
// 4. Compile and run the TC with the best option.
// Outputs get allocated; could also be pre-allocated and passed.
auto pExecutor =
tc::aten::compile<Backend>(tc, "tensordot", {I0, I1}, bestOption[0]);
auto outputs = tc::aten::prepareOutputs(tc, "tensordot", {I0, I1});
auto timings = tc::aten::profile(*pExecutor, {I0, I1}, outputs);
std::cout << "tensordot size I0: " << I0.sizes() << ", "
<< "size I1: " << I1.sizes()
<< " ran in: " << timings.kernelRuntime.toMicroSeconds() << "us\n";
// 5. Optionally, perform precision checks against a ref. implementation.
// TODO.
// 6. Reuse bestOptions from autotuning on another kernel
for (auto sizes : std::vector<std::pair<at::IntList, at::IntList>>{
{{4, 9, 7, 16, 14}, {4, 7, 3, 16, 14}},
{{8, 5, 11, 10, 10}, {8, 11, 16, 10, 10}},
}) {
at::Tensor I0 = makeATenTensor<Backend>(sizes.first);
at::Tensor I1 = makeATenTensor<Backend>(sizes.second);
auto pExecutor =
tc::aten::compile<Backend>(tc, "tensordot", {I0, I1}, bestOption[0]);
auto outputs = tc::aten::prepareOutputs(tc, "tensordot", {I0, I1});
auto timings = tc::aten::profile(*pExecutor, {I0, I1}, outputs);
std::cout << "tensordot size I0: " << I0.sizes() << ", "
<< "size I1: " << I1.sizes()
<< " ran in: " << timings.kernelRuntime.toMicroSeconds()
<< "us\n";
}
}
TEST(TensorDotCPU, SimpleAutotune) {
testOnBackend<tc::CpuBackend>();
}
TEST(TensorDotGPU, SimpleAutotune) {
testOnBackend<tc::CudaBackend>();
}
// From root, run with:
// ./build/examples/tensordot --tuner_threads=10 --tuner_gen_pop_size=10
// --tuner_gen_generations=3 --tuner_gen_number_elites=4
int main(int argc, char** argv) {
::testing::InitGoogleTest(&argc, argv);
::gflags::ParseCommandLineFlags(&argc, &argv, true);
::google::InitGoogleLogging(argv[0]);
tc::aten::setAtenSeed(tc::initRandomSeed(), at::Backend::CUDA);
return RUN_ALL_TESTS();
}