Skip to content

Commit

Permalink
small tweak
Browse files Browse the repository at this point in the history
  • Loading branch information
lucidrains committed Oct 2, 2024
1 parent adaa1d3 commit a8f6461
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 5 deletions.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
setup(
name = 'x-transformers',
packages = find_packages(exclude=['examples']),
version = '1.37.7',
version = '1.37.8',
license='MIT',
description = 'X-Transformers - Pytorch',
author = 'Phil Wang',
Expand Down
8 changes: 4 additions & 4 deletions x_transformers/attend.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,12 +211,12 @@ def flash_attn(

if self.l2_distance:
k_norm_sq = k.norm(dim = -1, keepdim = True) ** 2
k = F.pad(k, (0, 1), value = 1.)
k = torch.cat((k, -k_norm_sq), dim = -1)
k = F.pad(k, (0, 1), value = -1.)
k = torch.cat((k, k_norm_sq), dim = -1)

q_norm_sq = q.norm(dim = -1, keepdim = True) ** 2
q = torch.cat((2 * q, -q_norm_sq), dim = -1)
q = F.pad(q, (0, 1), value = 1.)
q = torch.cat((2 * q, q_norm_sq), dim = -1)
q = F.pad(q, (0, 1), value = -1.)

# handle scale - by default they scale by dim_head ** -0.5, but need to take care if using cosine sim attention

Expand Down

0 comments on commit a8f6461

Please sign in to comment.