unconst

InternLM · Dec 9, 2024 · a65007b · a65007b
1 parent 240f352
commit a65007b
Showing 1 changed file with 2 additions and 2 deletions.
diff --git a/lmdeploy/pytorch/kernels/cuda/flatten_kv_cache.py b/lmdeploy/pytorch/kernels/cuda/flatten_kv_cache.py
@@ -31,7 +31,7 @@ def _flatten_kv_cache(
     stride_vos: tl.constexpr,
     stride_vod: tl.constexpr,
     stride_boff,
-    OUT_SIZE: tl.constexpr,
+    OUT_SIZE,
     HEAD_DIM_K: tl.constexpr,
     HEAD_DIM_V: tl.constexpr,
     BLOCK_BS: tl.constexpr,
@@ -124,7 +124,7 @@ def _flatten_kv_cache_quant(
     stride_vod: tl.constexpr,
     stride_boff,
     quant_policy: tl.constexpr,
-    OUT_SIZE: tl.constexpr,
+    OUT_SIZE,
     HEAD_DIM_K: tl.constexpr,
     HEAD_DIM_V: tl.constexpr,
     BLOCK_BS: tl.constexpr,