Skip to content

Commit

Permalink
Fix tests - disable marlin_fiest_moe; fix rocm_paged attention
Browse files Browse the repository at this point in the history
Minor post merge fixes
  • Loading branch information
gshtras committed Sep 16, 2024
1 parent 30a9875 commit 533f64b
Show file tree
Hide file tree
Showing 11 changed files with 58 additions and 1,692 deletions.
18 changes: 16 additions & 2 deletions csrc/rocm/attention.cu
Original file line number Diff line number Diff line change
@@ -1,4 +1,19 @@
// TODO: add license terms
/*
* Copyright (c) 2024, The vLLM team.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <torch/all.h>
#include <ATen/cuda/CUDAContext.h>
#include <c10/cuda/CUDAGuard.h>
Expand Down Expand Up @@ -594,7 +609,6 @@ __global__ __launch_bounds__(NUM_THREADS) void paged_attention_ll4mi_QKV_kernel(
}
}
} else { // warp in context

// iterate across heads
#pragma unroll
for (int qh = 0; qh < QHLOOP; qh++) {
Expand Down
78 changes: 0 additions & 78 deletions csrc/rocm/custom copy.cu

This file was deleted.

Loading

0 comments on commit 533f64b

Please sign in to comment.