Skip to content

Commit

Permalink
make maxvit a bit more customizable, add some asserts and self explan…
Browse files Browse the repository at this point in the history
…atory error messages
  • Loading branch information
lucidrains committed May 7, 2024
1 parent a521b02 commit a76f2a1
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 6 deletions.
25 changes: 20 additions & 5 deletions q_transformer/q_robotic_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -347,6 +347,7 @@ def __init__(
heads = 8,
dim_head = 64,
dim_conv_stem = None,
conv_stem_downsample = True,
window_size = 7,
mbconv_expansion_rate = 4,
mbconv_shrinkage_rate = 0.25,
Expand All @@ -356,14 +357,22 @@ def __init__(
flash_attn = True
):
super().__init__()

self.depth = depth

# convolutional stem

dim_conv_stem = default(dim_conv_stem, dim)

self.conv_stem = nn.Sequential(
nn.Conv2d(channels, dim_conv_stem, 3, stride = 2, padding = 1),
nn.Conv2d(dim_conv_stem, dim_conv_stem, 3, padding = 1)
)
self.conv_stem_downsample = conv_stem_downsample

if conv_stem_downsample:
self.conv_stem = nn.Sequential(
nn.Conv2d(channels, dim_conv_stem, 3, stride = 2, padding = 1),
nn.Conv2d(dim_conv_stem, dim_conv_stem, 3, padding = 1)
)
else:
self.conv_stem = nn.Conv2d(channels, dim_conv_stem, 7, padding = 3)

# variables

Expand Down Expand Up @@ -433,6 +442,10 @@ def __init__(
nn.Linear(embed_dim, num_classes)
)

@property
def downsample_factor(self):
return (2 if self.conv_stem_downsample else 1) * (2 ** len(self.depth))

@beartype
def forward(
self,
Expand All @@ -442,7 +455,9 @@ def forward(
cond_drop_prob = 0.,
return_embeddings = False
):
assert all([divisible_by(d, self.window_size) for d in img.shape[-2:]])
hw = img.shape[-2:]
assert all([divisible_by(d, self.window_size) for d in hw]), f'height and width of video frames {tuple(hw)} must be divisible by window size {self.window_size}'
assert all([divisible_by(d, self.downsample_factor) for d in hw]), f'height and width of video frames {tuple(hw)} must be divisible by total downsample factor {self.downsample_factor}'

x = self.conv_stem(img)

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
setup(
name = 'q-transformer',
packages = find_packages(exclude=[]),
version = '0.1.14',
version = '0.1.15',
license='MIT',
description = 'Q-Transformer',
author = 'Phil Wang',
Expand Down

0 comments on commit a76f2a1

Please sign in to comment.