diff --git a/tests/model_loader/test_common_model.py b/tests/model_loader/test_common_model.py index bc894bddc3c..f80c2006244 100644 --- a/tests/model_loader/test_common_model.py +++ b/tests/model_loader/test_common_model.py @@ -25,9 +25,7 @@ from tests.model_loader.utils import ( check_tokens_id_and_text_close, form_model_get_output_topp0, - form_model_get_output_topp1, get_paddle_model_path, - get_torch_model_path, run_with_timeout, ) @@ -36,6 +34,541 @@ prompts = ["解释下”温故而知新”", "Hello, how are you?"] +# {id,baseline} +baseline = { + "Qwen3-0.6B.None.default": [ + ( + [ + 99487, + 115040, + 105855, + 3837, + 101034, + 99652, + 18493, + 100384, + 101047, + 99892, + 3837, + 101034, + 100007, + 18493, + 100384, + 15946, + 104026, + 99487, + 115040, + 36407, + 100627, + 105683, + 105520, + 106579, + 26850, + 101140, + 3837, + 35946, + 85106, + 81167, + 99487, + 151645, + ], + "这个成语的意思,以及它在教学中的应用,以及如何在教学中运用这个成语来提高学生的语文素养?\n\n首先,我需要确认这个", + ), + ( + [ + 358, + 2776, + 14589, + 369, + 279, + 60009, + 13, + 358, + 2776, + 14589, + 369, + 279, + 60009, + 13, + 358, + 2776, + 14589, + 369, + 279, + 60009, + 13, + 358, + 2776, + 14589, + 369, + 279, + 60009, + 13, + 358, + 2776, + 14589, + 151645, + ], + " I'm sorry for the inconvenience. I'm sorry for the inconvenience. I'm sorry for the inconvenience. I'm sorry for the inconvenience. I'm sorry", + ), + ], + "Qwen3-0.6B.wint8.default": [ + ( + [ + 99487, + 115040, + 105855, + 3837, + 101034, + 99652, + 18493, + 100384, + 101047, + 99892, + 3837, + 101034, + 100007, + 18493, + 100384, + 15946, + 104026, + 99487, + 115040, + 36407, + 100627, + 99720, + 105595, + 101062, + 8997, + 2073, + 99416, + 99535, + 68536, + 52183, + 16628, + 151645, + ], + "这个成语的意思,以及它在教学中的应用,以及如何在教学中运用这个成语来提高学生的学习效果。\n“温故而知新", + ), + ( + [ + 358, + 2776, + 14589, + 369, + 279, + 60009, + 13, + 358, + 2776, + 14589, + 369, + 279, + 60009, + 13, + 358, + 2776, + 14589, + 369, + 279, + 60009, + 13, + 358, + 2776, + 14589, + 369, + 279, + 60009, + 13, + 358, + 2776, + 14589, + 151645, + ], + " I'm sorry for the inconvenience. I'm sorry for the inconvenience. I'm sorry for the inconvenience. I'm sorry for the inconvenience. I'm sorry", + ), + ], + "Qwen3-0.6B.wint4.default": [ + ( + [ + 99487, + 115040, + 9370, + 109091, + 8997, + 102349, + 5122, + 99487, + 115040, + 9370, + 109091, + 20412, + 5122, + 99416, + 99535, + 100052, + 29826, + 3837, + 99794, + 100052, + 29826, + 3837, + 101982, + 102009, + 16628, + 100032, + 1773, + 104136, + 5122, + 99416, + 99535, + 151645, + ], + "这个成语的含义。\n答案:这个成语的含义是:温故旧事,了解旧事,从而掌握新知识。解释:温故", + ), + ( + [ + 358, + 2776, + 264, + 5458, + 518, + 264, + 12103, + 13, + 358, + 2776, + 264, + 5458, + 518, + 264, + 12103, + 13, + 358, + 2776, + 264, + 5458, + 518, + 264, + 12103, + 13, + 358, + 2776, + 264, + 5458, + 518, + 264, + 12103, + 151645, + ], + " I'm a student at a university. I'm a student at a university. I'm a student at a university. I'm a student at a university", + ), + ], + "ernie-4_5-21b-a3b-bf16-paddle.wint8.default": [ + ( + [ + 58544, + 23, + 5458, + 93956, + 1294, + 94705, + 94752, + 55817, + 94136, + 94041, + 93986, + 94227, + 80951, + 94226, + 1855, + 18982, + 78351, + 93956, + 94338, + 35829, + 5154, + 93977, + 24053, + 58544, + 706, + 8290, + 94022, + 94035, + 1594, + 26635, + 94029, + 2, + ], + "的含义\n不了, “温故而知新”是《论语》中的一句名言,由孔子提出。这句话的含义可以解释为:通过回顾和", + ), + ( + [ + 354, + 4932, + 536, + 93968, + 276, + 4447, + 1622, + 93937, + 25062, + 93938, + 354, + 1481, + 318, + 7427, + 441, + 536, + 274, + 4497, + 326, + 57142, + 38210, + 385, + 274, + 24742, + 18268, + 56335, + 93963, + 3717, + 82674, + 23050, + 45955, + 2, + ], + " I hope you're doing well. Today, I want to share with you a simple and delicious recipe for a classic Italian dish: Spaghetti Carbon", + ), + ], + "Qwen2-7B-Instruct.wint4.default": [ + ( + [ + 106599, + 105855, + 8997, + 2073, + 99416, + 99535, + 68536, + 52183, + 16628, + 854, + 110434, + 26940, + 67831, + 72881, + 25067, + 101047, + 26940, + 47764, + 68536, + 99824, + 87243, + 103283, + 17714, + 36987, + 99416, + 99535, + 68536, + 52183, + 16628, + 3837, + 73670, + 151645, + ], + "这句话的意思。\n“温故而知新”出自《论语》中的《学而篇》,原文为:“温故而知新,可以", + ), + ( + [ + 358, + 2776, + 1101, + 264, + 6366, + 2025, + 11, + 773, + 358, + 1513, + 944, + 614, + 15650, + 476, + 21261, + 13, + 358, + 2776, + 1588, + 311, + 1492, + 498, + 448, + 894, + 4755, + 498, + 2578, + 614, + 311, + 279, + 1850, + 151645, + ], + " I'm just a computer program, so I don't have feelings or emotions. I'm here to help you with any questions you might have to the best", + ), + ], + "Qwen3-30B-A3B.block_wise_fp8.triton": [ + ( + [ + 106599, + 9370, + 109091, + 90395, + 107485, + 46944, + 99912, + 111564, + 1773, + 1036, + 99416, + 99535, + 68536, + 52183, + 16628, + 854, + 99639, + 99700, + 110434, + 26940, + 67831, + 72881, + 25067, + 9370, + 115040, + 3837, + 111490, + 67338, + 107090, + 100052, + 107232, + 151645, + ], + "这句话的含义,并给出一个实际的例子。 “温故而知新”是一句出自《论语》的成语,意思是通过复习旧的知识", + ), + ( + [ + 358, + 2776, + 4460, + 311, + 1477, + 279, + 897, + 315, + 279, + 25098, + 315, + 279, + 729, + 282, + 2075, + 8, + 284, + 220, + 16, + 11884, + 87, + 61, + 17, + 488, + 220, + 16, + 8, + 504, + 856, + 284, + 481, + 151645, + ], + " I'm trying to find the value of the integral of the function f(x) = 1/(x^2 + 1) from x = -", + ), + ], + "Qwen3-30B-A3B.block_wise_fp8.deepgemm": [ + ( + [ + 106599, + 9370, + 109091, + 90395, + 107485, + 46944, + 99912, + 111564, + 1773, + 1036, + 99416, + 99535, + 68536, + 52183, + 16628, + 854, + 99639, + 99700, + 110434, + 26940, + 67831, + 72881, + 25067, + 9370, + 115040, + 3837, + 111490, + 67338, + 107090, + 100052, + 107232, + 151645, + ], + "这句话的含义,并给出一个实际的例子。 “温故而知新”是一句出自《论语》的成语,意思是通过复习旧的知识", + ), + ( + [ + 358, + 2776, + 4460, + 311, + 11625, + 419, + 3491, + 25, + 330, + 9885, + 279, + 897, + 315, + 279, + 7493, + 25, + 220, + 16, + 15, + 15, + 15, + 14, + 16, + 15, + 15, + 15, + 488, + 220, + 16, + 15, + 15, + 151645, + ], + " I'm trying to solve this problem: \"Find the value of the expression: 1000/1000 + 100", + ), + ], +} model_param_map = { "Qwen3-0.6B": { @@ -53,12 +586,6 @@ "max_num_seqs": 1, "quantizations": ["wint4"], }, - "Qwen2.5-VL-7B-Instruct": { - "max_num_seqs": 1, - "quantizations": ["wint4"], - "is_mm": True, - "torch_model_name_or_path": "Qwen2.5-VL-7B-Instruct-PT", - }, "Qwen3-30B-A3B": { "tensor_parallel_size": 2, "max_num_seqs": 1, @@ -75,20 +602,6 @@ }, ], }, - "DeepSeek-V3-0324": { - "tensor_parallel_size": 2, - "quantizations": [ - { - "quant_type": "wint4", - "env": { - "FD_ATTENTION_BACKEND": "MLA_ATTN", - "FLAGS_mla_use_tensorcore": "1", - "FLAGS_flash_attn_version": "3", - "FD_USE_MACHETE": "1", - }, - }, - ], - }, } @@ -109,7 +622,6 @@ quant, cfg.get("max_tokens", 32), env, - cfg.get("is_mm", False), marks=[pytest.mark.core_model], id=f"{model}.{quant}.{backend}", ) @@ -117,7 +629,7 @@ @pytest.mark.parametrize( - "model_name_or_path,torch_model_name_or_path,tensor_parallel_size,max_num_seqs,max_model_len,quantization,max_tokens,env,is_mm", + "model_name_or_path,torch_model_name_or_path,tensor_parallel_size,max_num_seqs,max_model_len,quantization,max_tokens,env", params, ) def test_common_model( @@ -130,7 +642,7 @@ def test_common_model( max_tokens: int, quantization: str, env, - is_mm: bool, + request, monkeypatch, ) -> None: model_path = get_paddle_model_path(model_name_or_path) @@ -138,23 +650,8 @@ def test_common_model( for k, v in env.items(): monkeypatch.setenv(k, v) - form_model_get_output = form_model_get_output_topp0 if not is_mm else form_model_get_output_topp1 - fd_outputs_v0 = run_with_timeout( - target=form_model_get_output, - args=( - fd_runner, - model_path, - tensor_parallel_size, - max_num_seqs, - max_model_len, - max_tokens, - quantization, - "default", - FD_ENGINE_QUEUE_PORT, - prompts, - FD_CACHE_QUEUE_PORT, - ), - ) + form_model_get_output = form_model_get_output_topp0 + fd_outputs_v1 = run_with_timeout( target=form_model_get_output, args=( @@ -173,33 +670,8 @@ def test_common_model( ) check_tokens_id_and_text_close( - outputs_0_lst=fd_outputs_v0, + outputs_0_lst=baseline[request.node.callspec.id], outputs_1_lst=fd_outputs_v1, name_0="default loader", name_1="default_v1 loader", ) - - if torch_model_name_or_path != "": - torch_model_path = get_torch_model_path(torch_model_name_or_path) - fd_outputs_v1_torch = run_with_timeout( - target=form_model_get_output, - args=( - fd_runner, - torch_model_path, - tensor_parallel_size, - max_num_seqs, - max_model_len, - max_tokens, - quantization, - "default_v1", - FD_ENGINE_QUEUE_PORT, - prompts, - FD_CACHE_QUEUE_PORT, - ), - ) - check_tokens_id_and_text_close( - outputs_0_lst=fd_outputs_v1, - outputs_1_lst=fd_outputs_v1_torch, - name_0="default loader", - name_1="default_v1 loader", - )