Skip to content

Commit f081a54

Browse files
committed
tmp
1 parent 14f8cdd commit f081a54

File tree

1 file changed

+40
-98
lines changed

1 file changed

+40
-98
lines changed

tests/model_loader/test_common_model.py

Lines changed: 40 additions & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,8 @@
2323
sys.path.insert(0, project_root)
2424

2525
from tests.model_loader.utils import (
26-
check_tokens_id_and_text_close,
2726
form_model_get_output_topp0,
28-
form_model_get_output_topp1,
2927
get_paddle_model_path,
30-
get_torch_model_path,
3128
run_with_timeout,
3229
)
3330

@@ -36,59 +33,43 @@
3633

3734
prompts = ["解释下”温故而知新”", "Hello, how are you?"]
3835

36+
# {id,baseline}
37+
baseline = {
38+
"ernie-4_5-21b-a3b-bf16-paddle.wint8.default": "test",
39+
}
3940

4041
model_param_map = {
41-
"Qwen3-0.6B": {
42-
"max_num_seqs": 1,
43-
"quantizations": ["None", "wint8", "wint4"],
44-
},
42+
# "Qwen3-0.6B": {
43+
# "max_num_seqs": 1,
44+
# "quantizations": ["None", "wint8", "wint4"],
45+
# },
4546
"ernie-4_5-21b-a3b-bf16-paddle": {
4647
"max_num_seqs": 1,
4748
"tensor_parallel_size": 2,
4849
"quantizations": [
4950
"wint8",
5051
],
5152
},
52-
"Qwen2-7B-Instruct": {
53-
"max_num_seqs": 1,
54-
"quantizations": ["wint4"],
55-
},
56-
"Qwen2.5-VL-7B-Instruct": {
57-
"max_num_seqs": 1,
58-
"quantizations": ["wint4"],
59-
"is_mm": True,
60-
"torch_model_name_or_path": "Qwen2.5-VL-7B-Instruct-PT",
61-
},
62-
"Qwen3-30B-A3B": {
63-
"tensor_parallel_size": 2,
64-
"max_num_seqs": 1,
65-
"quantizations": [
66-
{
67-
"quant_type": "block_wise_fp8",
68-
"backend": "triton",
69-
"env": {"DG_NVCC_OVERRIDE_CPP_STANDARD": "17"},
70-
},
71-
{
72-
"quant_type": "block_wise_fp8",
73-
"backend": "deepgemm",
74-
"env": {"DG_NVCC_OVERRIDE_CPP_STANDARD": "17", "FD_USE_DEEP_GEMM": "1"},
75-
},
76-
],
77-
},
78-
"DeepSeek-V3-0324": {
79-
"tensor_parallel_size": 2,
80-
"quantizations": [
81-
{
82-
"quant_type": "wint4",
83-
"env": {
84-
"FD_ATTENTION_BACKEND": "MLA_ATTN",
85-
"FLAGS_mla_use_tensorcore": "1",
86-
"FLAGS_flash_attn_version": "3",
87-
"FD_USE_MACHETE": "1",
88-
},
89-
},
90-
],
91-
},
53+
# "Qwen2-7B-Instruct": {
54+
# "max_num_seqs": 1,
55+
# "quantizations": ["wint4"],
56+
# },
57+
# "Qwen3-30B-A3B": {
58+
# "tensor_parallel_size": 2,
59+
# "max_num_seqs": 1,
60+
# "quantizations": [
61+
# {
62+
# "quant_type": "block_wise_fp8",
63+
# "backend": "triton",
64+
# "env": {"DG_NVCC_OVERRIDE_CPP_STANDARD": "17"},
65+
# },
66+
# {
67+
# "quant_type": "block_wise_fp8",
68+
# "backend": "deepgemm",
69+
# "env": {"DG_NVCC_OVERRIDE_CPP_STANDARD": "17", "FD_USE_DEEP_GEMM": "1"},
70+
# },
71+
# ],
72+
# },
9273
}
9374

9475

@@ -109,15 +90,14 @@
10990
quant,
11091
cfg.get("max_tokens", 32),
11192
env,
112-
cfg.get("is_mm", False),
11393
marks=[pytest.mark.core_model],
11494
id=f"{model}.{quant}.{backend}",
11595
)
11696
)
11797

11898

11999
@pytest.mark.parametrize(
120-
"model_name_or_path,torch_model_name_or_path,tensor_parallel_size,max_num_seqs,max_model_len,quantization,max_tokens,env,is_mm",
100+
"model_name_or_path,torch_model_name_or_path,tensor_parallel_size,max_num_seqs,max_model_len,quantization,max_tokens,env",
121101
params,
122102
)
123103
def test_common_model(
@@ -130,31 +110,17 @@ def test_common_model(
130110
max_tokens: int,
131111
quantization: str,
132112
env,
133-
is_mm: bool,
113+
request,
134114
monkeypatch,
135115
) -> None:
116+
print("当前用例 id:", request.node.callspec.id)
136117
model_path = get_paddle_model_path(model_name_or_path)
137118
if env:
138119
for k, v in env.items():
139120
monkeypatch.setenv(k, v)
140121

141-
form_model_get_output = form_model_get_output_topp0 if not is_mm else form_model_get_output_topp1
142-
fd_outputs_v0 = run_with_timeout(
143-
target=form_model_get_output,
144-
args=(
145-
fd_runner,
146-
model_path,
147-
tensor_parallel_size,
148-
max_num_seqs,
149-
max_model_len,
150-
max_tokens,
151-
quantization,
152-
"default",
153-
FD_ENGINE_QUEUE_PORT,
154-
prompts,
155-
FD_CACHE_QUEUE_PORT,
156-
),
157-
)
122+
form_model_get_output = form_model_get_output_topp0
123+
158124
fd_outputs_v1 = run_with_timeout(
159125
target=form_model_get_output,
160126
args=(
@@ -171,35 +137,11 @@ def test_common_model(
171137
FD_CACHE_QUEUE_PORT,
172138
),
173139
)
140+
print(fd_outputs_v1)
174141

175-
check_tokens_id_and_text_close(
176-
outputs_0_lst=fd_outputs_v0,
177-
outputs_1_lst=fd_outputs_v1,
178-
name_0="default loader",
179-
name_1="default_v1 loader",
180-
)
181-
182-
if torch_model_name_or_path != "":
183-
torch_model_path = get_torch_model_path(torch_model_name_or_path)
184-
fd_outputs_v1_torch = run_with_timeout(
185-
target=form_model_get_output,
186-
args=(
187-
fd_runner,
188-
torch_model_path,
189-
tensor_parallel_size,
190-
max_num_seqs,
191-
max_model_len,
192-
max_tokens,
193-
quantization,
194-
"default_v1",
195-
FD_ENGINE_QUEUE_PORT,
196-
prompts,
197-
FD_CACHE_QUEUE_PORT,
198-
),
199-
)
200-
check_tokens_id_and_text_close(
201-
outputs_0_lst=fd_outputs_v1,
202-
outputs_1_lst=fd_outputs_v1_torch,
203-
name_0="default loader",
204-
name_1="default_v1 loader",
205-
)
142+
# check_tokens_id_and_text_close(
143+
# outputs_0_lst=fd_outputs_v0,
144+
# outputs_1_lst=fd_outputs_v1,
145+
# name_0="default loader",
146+
# name_1="default_v1 loader",
147+
# )

0 commit comments

Comments
 (0)