diff --git a/.ci/pytorch/test.sh b/.ci/pytorch/test.sh index 43014b2b10fec..b1c0e9b09bed0 100755 --- a/.ci/pytorch/test.sh +++ b/.ci/pytorch/test.sh @@ -384,22 +384,30 @@ test_inductor_cpp_wrapper() { # unit tests with cpp wrapper. python test/run_test.py --include inductor/test_torchinductor.py --verbose - python benchmarks/dynamo/timm_models.py --device cuda --accuracy --amp \ + + # Run inductor benchmark tests with cpp wrapper. + # Skip benchmark tests if it's in rerun-disabled-mode. + if [[ "${PYTORCH_TEST_RERUN_DISABLED_TESTS}" == "1" ]]; then + echo "skip dynamo benchmark tests for rerun-disabled-test" + else + echo "run dynamo benchmark tests with cpp wrapper" + python benchmarks/dynamo/timm_models.py --device cuda --accuracy --amp \ --training --inductor --disable-cudagraphs --only vit_base_patch16_224 \ --output "$TEST_REPORTS_DIR/inductor_cpp_wrapper_training.csv" - python benchmarks/dynamo/check_accuracy.py \ - --actual "$TEST_REPORTS_DIR/inductor_cpp_wrapper_training.csv" \ - --expected "benchmarks/dynamo/ci_expected_accuracy/inductor_timm_training.csv" - - python benchmarks/dynamo/torchbench.py --device cuda --accuracy \ - --bfloat16 --inference --inductor --only hf_T5 --output "$TEST_REPORTS_DIR/inductor_cpp_wrapper_inference.csv" - python benchmarks/dynamo/torchbench.py --device cuda --accuracy \ - --bfloat16 --inference --inductor --only llama --output "$TEST_REPORTS_DIR/inductor_cpp_wrapper_inference.csv" - python benchmarks/dynamo/torchbench.py --device cuda --accuracy \ - --bfloat16 --inference --inductor --only moco --output "$TEST_REPORTS_DIR/inductor_cpp_wrapper_inference.csv" - python benchmarks/dynamo/check_accuracy.py \ - --actual "$TEST_REPORTS_DIR/inductor_cpp_wrapper_inference.csv" \ - --expected "benchmarks/dynamo/ci_expected_accuracy/inductor_torchbench_inference.csv" + python benchmarks/dynamo/check_accuracy.py \ + --actual "$TEST_REPORTS_DIR/inductor_cpp_wrapper_training.csv" \ + --expected "benchmarks/dynamo/ci_expected_accuracy/inductor_timm_training.csv" + + python benchmarks/dynamo/torchbench.py --device cuda --accuracy \ + --bfloat16 --inference --inductor --only hf_T5 --output "$TEST_REPORTS_DIR/inductor_cpp_wrapper_inference.csv" + python benchmarks/dynamo/torchbench.py --device cuda --accuracy \ + --bfloat16 --inference --inductor --only llama --output "$TEST_REPORTS_DIR/inductor_cpp_wrapper_inference.csv" + python benchmarks/dynamo/torchbench.py --device cuda --accuracy \ + --bfloat16 --inference --inductor --only moco --output "$TEST_REPORTS_DIR/inductor_cpp_wrapper_inference.csv" + python benchmarks/dynamo/check_accuracy.py \ + --actual "$TEST_REPORTS_DIR/inductor_cpp_wrapper_inference.csv" \ + --expected "benchmarks/dynamo/ci_expected_accuracy/inductor_torchbench_inference.csv" + fi } # "Global" flags for inductor benchmarking controlled by TEST_CONFIG