Skip to content

Commit

Permalink
deviate from paper and use softclamping instead for laser
Browse files Browse the repository at this point in the history
  • Loading branch information
lucidrains committed Dec 3, 2024
1 parent 5b4ddef commit 544c699
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 4 deletions.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
setup(
name = 'x-transformers',
packages = find_packages(exclude=['examples']),
version = '1.42.22',
version = '1.42.23',
license='MIT',
description = 'X-Transformers - Pytorch',
author = 'Phil Wang',
Expand Down
7 changes: 4 additions & 3 deletions x_transformers/x_transformers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1079,6 +1079,7 @@ def __init__(
neutreno_alpha = 0.4,
learned_value_residual_mix = False,
laser = False, # https://arxiv.org/abs/2411.03493v1
laser_softclamp_value = 15.,
onnxable = False,
attend_sdp_kwargs: dict = dict(
enable_flash = True,
Expand Down Expand Up @@ -1121,6 +1122,7 @@ def __init__(
# enhancing gradients to attention through exponentiated values

self.laser = laser
self.laser_softclamp_value = laser_softclamp_value

# relations projection from tp-attention

Expand Down Expand Up @@ -1448,8 +1450,7 @@ def forward(
attn_bias = pad_at_dim(attn_bias, (num_mem_kv, 0))

if self.laser:
values_max = v.amax(dim = -2, keepdim = True).detach() # numerical stability
v = v - values_max
v = softclamp(v, self.laser_softclamp_value)
v = v.exp()

# attention is all we need
Expand All @@ -1464,7 +1465,7 @@ def forward(
# laser

if self.laser:
out = log(out) + values_max
out = log(out)

# store the values for resformer or Neutreno

Expand Down

0 comments on commit 544c699

Please sign in to comment.