Skip to content

Commit

Permalink
[Bugfix] Fix value unpack error of simple connector for KVCache trans…
Browse files Browse the repository at this point in the history
…fer. (vllm-project#11058)

Signed-off-by: ShangmingCai <[email protected]>
  • Loading branch information
ShangmingCai authored Dec 12, 2024
1 parent 9f3974a commit db6c264
Showing 1 changed file with 6 additions and 2 deletions.
8 changes: 6 additions & 2 deletions vllm/distributed/kv_transfer/kv_connector/simple_connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,12 @@ def send_kv_caches_and_hidden_states(
start_layer = model_executable.model.start_layer
end_layer = model_executable.model.end_layer

model_config = model_executable.model.config
num_heads = model_config.num_key_value_heads
hidden_size = model_config.hidden_size
num_attention_heads = model_config.num_attention_heads
head_size = int(hidden_size / num_attention_heads)

# query_lens contains new KV caches that are added to vLLM.
# so we will send them to decode instance
# FIXME(Kuntai): This assume that all requests are prefill.
Expand All @@ -131,8 +137,6 @@ def send_kv_caches_and_hidden_states(
for layer_id in range(start_layer, end_layer):
kv_cache = kv_caches[layer_id - start_layer]

_, _, num_heads, head_size = kv_cache[0].shape

key_cache = kv_cache[0].reshape(-1, num_heads, head_size)
value_cache = kv_cache[1].reshape(-1, num_heads, head_size)

Expand Down

0 comments on commit db6c264

Please sign in to comment.