-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest.py
167 lines (129 loc) · 5.35 KB
/
test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
import torch
import numpy as np
from torch.nn import functional as F
from attn import *
def test_single_query_vector_attn(d, kv_seq_len):
q = np.random.random((d))
k = np.random.random((kv_seq_len, d))
v = k
out = single_query_vector_attn(q, k, v)
q_torch = torch.from_numpy(q)[None, :]
k_torch = torch.from_numpy(k)
v_torch = torch.from_numpy(v)
out_torch = F.scaled_dot_product_attention(q_torch, k_torch, v_torch)
assert np.allclose(out_torch.detach().numpy(), out)
def test_single_query_vector_attn_lazy_softmax(d, kv_seq_len):
q = np.random.random((d))
k = np.random.random((kv_seq_len, d))
v = k
out = single_query_vector_attn_lazy_softmax(q, k, v)
q_torch = torch.from_numpy(q)[None, :]
k_torch = torch.from_numpy(k)
v_torch = torch.from_numpy(v)
out_torch = F.scaled_dot_product_attention(q_torch, k_torch, v_torch)
assert np.allclose(out_torch.detach().numpy(), out)
def test_attn(
b, h, q_seq_len, d, kv_seq_len, q_chunk_seq_len=None, kv_chunk_seq_len=None, f=None
):
q = np.random.random((b, h, q_seq_len, d))
k = np.random.random((b, h, kv_seq_len, d))
v = k
opt_args = {}
if q_chunk_seq_len is not None:
opt_args["q_chunk_seq_len"] = q_chunk_seq_len
if kv_chunk_seq_len is not None:
opt_args["kv_chunk_seq_len"] = kv_chunk_seq_len
out = f(q, k, v, **opt_args)
q_torch = torch.from_numpy(q)
k_torch = torch.from_numpy(k)
v_torch = torch.from_numpy(v)
out_torch = F.scaled_dot_product_attention(q_torch, k_torch, v_torch)
assert np.allclose(out_torch.detach().numpy(), out)
def test_attn_chunk_q_chunk_kv_cuda(b, h, q_seq_len, d, kv_seq_len):
from attn_chunk_q_chunk_kv_cuda import attn_chunk_q_chunk_kv_cuda
q = torch.rand((b, h, q_seq_len, d), device="cuda")
k = torch.rand((b, h, kv_seq_len, d), device="cuda")
v = k
out = attn_chunk_q_chunk_kv_cuda(q, k, v)
out_ = F.scaled_dot_product_attention(q, k, v)
assert torch.allclose(out_, out)
def test_all():
for test in [
test_single_query_vector_attn,
test_single_query_vector_attn_lazy_softmax,
]:
print(test.__name__)
for d in range(1, 6):
for kv_seq_len in range(1, 6):
test(d, kv_seq_len)
print(attn_lazy_softmax.__name__)
for b in range(1, 3):
for h in range(1, 3):
for q_seq_len in range(1, 6):
for d in range(1, 6):
for kv_seq_len in range(1, 6):
test_attn(b, h, q_seq_len, d, kv_seq_len, f=attn_lazy_softmax)
print(attn_split_kv.__name__)
for b in range(1, 3):
for h in range(1, 3):
for q_seq_len in range(1, 6):
for d in range(1, 6):
for kv_seq_len in [2, 4, 6]:
test_attn(b, h, q_seq_len, d, kv_seq_len, f=attn_split_kv)
print(attn_chunk_kv.__name__)
for b in range(1, 3):
for h in range(1, 3):
for q_seq_len in range(1, 3):
for d in range(1, 6):
for kv_chunk_seq_len in range(1, 3):
for kv_seq_len_multiple in range(1, 6):
kv_seq_len = kv_chunk_seq_len * kv_seq_len_multiple
test_attn(
b,
h,
q_seq_len,
d,
kv_seq_len,
kv_chunk_seq_len=kv_chunk_seq_len,
f=attn_chunk_kv,
)
for f in [
attn_chunk_q_chunk_kv,
attn_chunk_kv_chunk_q,
attn_chunk_kv_chunk_q_incremental,
]:
print(f.__name__)
for b in range(1, 3):
for h in range(1, 3):
for q_chunk_seq_len in range(1, 3):
for q_seq_len_multiple in range(1, 3):
for d in range(1, 6):
for kv_chunk_seq_len in range(1, 3):
for kv_seq_len_multiple in range(1, 3):
q_seq_len = q_chunk_seq_len * q_seq_len_multiple
kv_seq_len = kv_chunk_seq_len * kv_seq_len_multiple
test_attn(
b,
h,
q_seq_len,
d,
kv_seq_len,
q_chunk_seq_len,
kv_chunk_seq_len,
f=f,
)
if torch.cuda.is_available():
from attn_chunk_q_chunk_kv_cuda import attn_chunk_q_chunk_kv_cuda
print(attn_chunk_q_chunk_kv_cuda.__name__)
for b in range(1, 3):
for h in range(1, 3):
for q_seq_len in range(1, 6):
for d in range(1, 6):
for kv_seq_len in range(1, 6):
test_attn_chunk_q_chunk_kv_cuda(
b, h, q_seq_len, d, kv_seq_len
)
else:
print("cuda not available, skipping cuda tests")
if __name__ == "__main__":
test_all()