Skip to content

Commit

Permalink
avoid potential issue with PAR and transformer-xl recurrence
Browse files Browse the repository at this point in the history
  • Loading branch information
lucidrains committed Jan 3, 2021
1 parent af23656 commit 6dda487
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 4 deletions.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
setup(
name = 'x-transformers',
packages = find_packages(exclude=['examples']),
version = '0.6.5',
version = '0.6.6',
license='MIT',
description = 'X-Transformers - Pytorch',
author = 'Phil Wang',
Expand Down
12 changes: 9 additions & 3 deletions x_transformers/x_transformers.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,16 @@ def inner(*args, **kwargs):
return val
return inner

def not_equal(val):
def not_equals(val):
def inner(x):
return x != val
return inner

def equals(val):
def inner(x):
return x == val
return inner

def max_neg_value(tensor):
return -torch.finfo(tensor.dtype).max

Expand Down Expand Up @@ -415,7 +420,7 @@ def __init__(
elif exists(par_ratio):
par_depth = depth * len(default_block)
assert 1 < par_ratio <= par_depth, 'par ratio out of range'
default_block = tuple(filter(not_equal('f'), default_block))
default_block = tuple(filter(not_equals('f'), default_block))
par_attn = par_depth // par_ratio
depth_cut = par_depth * 2 // 3 # 2 / 3 attention layer cutoff suggested by PAR paper
par_width = (depth_cut + depth_cut // par_attn) // par_attn
Expand All @@ -430,6 +435,7 @@ def __init__(
layer_types = default_block * depth

self.layer_types = layer_types
self.default_mems = ([None] * len(list(filter(equals('a'), layer_types))))

for layer_type in self.layer_types:
if layer_type == 'a':
Expand Down Expand Up @@ -464,7 +470,7 @@ def forward(
prev_attn = None
prev_cross_attn = None

mems = mems.copy() if exists(mems) else ([None] * self.depth)
mems = mems.copy() if exists(mems) else self.default_mems

for ind, (layer_type, (norm, block)) in enumerate(zip(self.layer_types, self.layers)):
is_last = ind == (len(self.layers) - 1)
Expand Down

0 comments on commit 6dda487

Please sign in to comment.