From 6de4a376e1be46837d1533f2bf785a7e1de7e7f9 Mon Sep 17 00:00:00 2001 From: Li Zhang Date: Thu, 2 Nov 2023 06:48:29 +0000 Subject: [PATCH] fix msvc build --- .../test_decoder_multihead_attention.cu | 17 +++++++++-------- .../decoder_multihead_attention/test_utils.h | 1 + 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/src/turbomind/kernels/decoder_multihead_attention/test_decoder_multihead_attention.cu b/src/turbomind/kernels/decoder_multihead_attention/test_decoder_multihead_attention.cu index e4636bcea0..f93f8ce466 100644 --- a/src/turbomind/kernels/decoder_multihead_attention/test_decoder_multihead_attention.cu +++ b/src/turbomind/kernels/decoder_multihead_attention/test_decoder_multihead_attention.cu @@ -10,6 +10,7 @@ #include #include +#include using namespace turbomind; @@ -106,15 +107,15 @@ int main(int argc, char* argv[]) DecoderMultiHeadAttentionParams params{}; - constexpr int kHeadNum = 32; - constexpr int kHeadDim = 128; - constexpr int KvHeadNum = 32; - constexpr int kBatchSize = 32; - constexpr int kContextLen = 7306; - // constexpr int kContextLen = 1024; + constexpr int kHeadNum = 32; + constexpr int kHeadDim = 128; + constexpr int KvHeadNum = 32; + constexpr int kBatchSize = 1; + // constexpr int kContextLen = 7306; + constexpr int kContextLen = 1024; constexpr int kSequenceLen = kContextLen + 1; constexpr int kBlockSz = 128; - constexpr int kTestIter = 1; + constexpr int kTestIter = 10; constexpr int kMaxSplitK = 1; RNG rng{}; @@ -256,7 +257,7 @@ int main(int argc, char* argv[]) std::vector> outputs; - for (int i = 0; i < std::max(kTestIter, 10); ++i) { + for (int i = 0; i < std::max(kTestIter, 1); ++i) { DispatchDecoderMultiheadAttention(params); if (auto err = cudaGetLastError(); err != cudaSuccess) { std::cout << cudaGetErrorString(err) << "\n"; diff --git a/src/turbomind/kernels/decoder_multihead_attention/test_utils.h b/src/turbomind/kernels/decoder_multihead_attention/test_utils.h index ecfedcb53f..35caf5f036 100644 --- a/src/turbomind/kernels/decoder_multihead_attention/test_utils.h +++ b/src/turbomind/kernels/decoder_multihead_attention/test_utils.h @@ -3,6 +3,7 @@ #pragma once #include "decoder_multihead_attention.h" +#include "src/turbomind/macro.h" #include #include