|
4 | 4 | from tensorrt_llm.sampling_params import SamplingParams |
5 | 5 |
|
6 | 6 | # isort: off |
7 | | -from .test_llm import ( |
8 | | - get_model_path, global_kvcache_config, llama_model_path, |
9 | | - llm_get_stats_async_test_harness, llm_get_stats_test_harness, prompts, |
10 | | - run_llm_abort_request, run_llm_with_postprocess_parallel_and_result_handler, |
11 | | - tinyllama_logits_processor_test_harness, _test_llm_capture_request_error) |
| 7 | +from .test_llm import (get_model_path, global_kvcache_config, llama_model_path, |
| 8 | + llm_get_stats_async_test_harness, |
| 9 | + llm_get_stats_test_harness, prompts, |
| 10 | + run_llm_abort_request, |
| 11 | + run_llm_with_postprocess_parallel_and_result_handler, |
| 12 | + tinyllama_logits_processor_test_harness) |
12 | 13 | from utils.util import force_ampere, similar, skip_gpu_memory_less_than_40gb, skip_gpu_memory_less_than_80gb, skip_gpu_memory_less_than_138gb |
13 | 14 | from utils.llm_data import llm_models_root |
14 | 15 | from tensorrt_llm.lora_manager import LoraConfig |
@@ -63,10 +64,6 @@ def test_llm_get_stats_async(return_context_logits, use_overlap, |
63 | 64 | enable_iter_req_stats=enable_iter_req_stats) |
64 | 65 |
|
65 | 66 |
|
66 | | -def test_llm_capture_request_error(): |
67 | | - _test_llm_capture_request_error(pytorch_backend=True, tp_size=1) |
68 | | - |
69 | | - |
70 | 67 | @force_ampere |
71 | 68 | @pytest.mark.parametrize( |
72 | 69 | "sampling_params", |
|
0 commit comments