Skip to content

Commit

Permalink
line adjustments for lint error
Browse files Browse the repository at this point in the history
  • Loading branch information
root committed Jun 18, 2024
1 parent 63c7d7e commit 3de25e9
Showing 1 changed file with 4 additions and 9 deletions.
13 changes: 4 additions & 9 deletions csrc/custom/custom_kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -329,8 +329,8 @@ __device__ __forceinline__ T loadnt(T* addr) {
#define DTYPE half

__global__ void wvSpltK_hf_m1_sml_(const int K, const int N, const DTYPE* B,
const DTYPE* __restrict__ A, DTYPE* C,
const int CuCount) {
const DTYPE* __restrict__ A, DTYPE* C,
const int CuCount) {
union bigType {
DTYPE h[A_CHUNK];
float f[A_CHUNK / 2];
Expand Down Expand Up @@ -395,9 +395,8 @@ __global__ void wvSpltK_hf_m1_sml_(const int K, const int N, const DTYPE* B,
if (k_ >= K) break;
#pragma unroll
for (uint32_t m = 0; m < M; m++) {

// Do the matrix multiplication of activation and weight matrix
// - Remember the accumulation is happening for K-split of 64!
// Do the matrix multiplication of activation and weight matrix
// - Remember the accumulation is happening for K-split of 64!
#pragma unroll
for (uint32_t b = 0; b < A_CHUNK / 2; b++) {
asm("v_dot2c_f32_f16 %0, %2, %3"
Expand Down Expand Up @@ -680,7 +679,6 @@ __global__ void wvSpltK_hf_m1_(const int K, const int N, const DTYPE* B,
if (k_ >= K) break;
#pragma unroll
for (uint32_t m = 0; m < M; m++) {

// Do the matrix multiplication of activation and weight matrix
// - Remember the accumulation is happening for K-split of 64!
#pragma unroll
Expand Down Expand Up @@ -1033,7 +1031,6 @@ __global__ void wvSpltK_hf_m2_(const int K, const int N, const DTYPE* B,
if (k_ >= K) break;
#pragma unroll
for (uint32_t m = 0; m < M; m++) {

// Do the matrix multiplication of activation and weight matrix
// - Remember the accumulation is happening for K-split of 64!
#pragma unroll
Expand Down Expand Up @@ -1386,7 +1383,6 @@ __global__ void wvSpltK_hf_m3_(const int K, const int N, const DTYPE* B,
if (k_ >= K) break;
#pragma unroll
for (uint32_t m = 0; m < M; m++) {

// Do the matrix multiplication of activation and weight matrix
// - Remember the accumulation is happening for K-split of 64!
#pragma unroll
Expand Down Expand Up @@ -1739,7 +1735,6 @@ __global__ void wvSpltK_hf_m4_(const int K, const int N, const DTYPE* B,
if (k_ >= K) break;
#pragma unroll
for (uint32_t m = 0; m < M; m++) {

// Do the matrix multiplication of activation and weight matrix
// - Remember the accumulation is happening for K-split of 64!
#pragma unroll
Expand Down

0 comments on commit 3de25e9

Please sign in to comment.