Fix tests - disable marlin_fiest_moe; fix rocm_paged attention

Minor post merge fixes
ROCm · Sep 16, 2024 · ee78500 · ee78500
1 parent 30a9875
commit ee78500
Show file tree

Hide file tree

Showing 10 changed files with 47 additions and 1,686 deletions.
diff --git a/csrc/rocm/attention.cu b/csrc/rocm/attention.cu
@@ -1,4 +1,19 @@
-// TODO: add license terms
+/*
+ * Copyright (c) 2024, The vLLM team.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 #include <torch/all.h>
 #include <ATen/cuda/CUDAContext.h>
 #include <c10/cuda/CUDAGuard.h>
@@ -594,7 +609,6 @@ __global__ __launch_bounds__(NUM_THREADS) void paged_attention_ll4mi_QKV_kernel(
       }
     }
   } else {  // warp in context
-
   // iterate across heads
   #pragma unroll
     for (int qh = 0; qh < QHLOOP; qh++) {

diff --git a/csrc/rocm/custom copy.cu b/csrc/rocm/custom copy.cu