Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
irexyc committed Dec 18, 2024
1 parent 5e967c0 commit 5750194
Showing 1 changed file with 5 additions and 3 deletions.
8 changes: 5 additions & 3 deletions lmdeploy/turbomind/deploy/module.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ def __init__(self, model: BaseOutputModel):
self.attn_bias = model.model_config.attn_bias

def _reorder_and_merge(self, qkvo):
q, k, v, o = map(transpose, qkvo)
q, k, v, o = qkvo
# reorder output dim for tm's rotary embedding layout
if self.model.permute_qk:
q = permute_v2(q, self.head_dim)
Expand All @@ -210,8 +210,9 @@ def _repeat_kv(self, qkvo, kind: str):

def _repeat(x):
dim = hidden_dim if kind != 'bias' else 1
x = x.view(-1, head_dim, dim).repeat(1, self.model.repeat_kv, 1)
x = x.reshape(-1, dim)
x = x.t().reshape(-1, head_dim, dim)
x = x.repeat(1, self.model.repeat_kv, 1)
x = x.reshape(-1, dim).t()
return x

k, v = map(_repeat, (k, v))
Expand All @@ -229,6 +230,7 @@ def _export(self, idx: int, qkvo, kind: str, pack_fn, **kwargs):
if is_lora_a:
qkv, o = map(transpose, qkvo)
else:
qkvo = tuple(map(transpose, qkvo))
if self.model.repeat_kv:
qkvo = self._repeat_kv(qkvo, kind)
qkv, o = self._reorder_and_merge(qkvo)
Expand Down

0 comments on commit 5750194

Please sign in to comment.