Skip to content

Commit

Permalink
Fix timeouts. Update sample commands.
Browse files Browse the repository at this point in the history
  • Loading branch information
kuhar committed Sep 5, 2024
1 parent a6f3c5a commit 7adce63
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 26 deletions.
6 changes: 3 additions & 3 deletions tuner/examples/punet/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,16 +31,16 @@ cp ./dump-mmt/module_main_0_dispatch_0_rocm_hsaco_fb_benchmark.mlir test-benchma
### Recommended Trial Run
For an initial trial to test the tuning loop, use:
```shell
python punet_autotune.py test-benchmark.mlir --num-candidates=10
python -m tuner.examples.punet.punet_autotune test-benchmark.mlir --num-candidates=10
```

### Dry Run Test
To perform a dry run (no GPU required), use:
```shell
python punet_autotune.py test-benchmark.mlir --num-candidates=64 --num-model-candidates=10 --dry-run
python -m tuner.examples.punet.punet_autotune test-benchmark.mlir --num-candidates=64 --num-model-candidates=10 --dry-run
```

### Basic Usage
```shell
python punet_autotune.py test-benchmark.mlir
python -m tuner.examples.punet.punet_autotune test-benchmark.mlir
```
21 changes: 7 additions & 14 deletions tuner/examples/punet/punet_autotune.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,17 @@
"""
Sample Usage:
python punet_autotune.py 2.mlir --lhs-dims=bmk --rhs-dims=bkn --tile-dims=*mnk --devices=hip://0,hip://1 --num-candidates=64
python -m tuner.examples.punet.punet_autotune benchmark.mlir --lhs-dims=bmk --rhs-dims=bkn --tile-dims=*mnk --devices=hip://0,hip://1 --num-candidates=64
Recommended Trial Run:
python punet_autotune.py 2.mlir --num-candidates=1
python -m tuner.examples.punet.punet_autotune benchmark.mlir --num-candidates=1
Dry Run Test (no gpu requried):
python punet_autotune.py 2.mlir --num-candidates=64 --num-model-candidates=10 --dry-run
python -m tuner.examples.punet.punet_autotune benchmark.mlir --num-candidates=64 --num-model-candidates=10 --dry-run
"""

Expand All @@ -35,7 +35,7 @@ def get_dispatch_compile_command(
mlir_path = candidate_tracker.dispatch_mlir_path
assert mlir_path is not None
command = [
"./compile_candidate.sh",
"compile_candidate.sh",
mlir_path.as_posix(),
]
return command
Expand All @@ -51,9 +51,7 @@ def get_dispatch_benchmark_command(
assert compiled_vmfb_path is not None

command = [
"timeout",
"16s",
"./iree-benchmark-module",
"iree-benchmark-module",
f"--device={libtuner.DEVICE_ID_PLACEHOLDER}",
f"--module={compiled_vmfb_path.resolve()}",
"--hip_use_streams=true",
Expand All @@ -74,14 +72,11 @@ def get_model_compile_command(
) -> list[str]:
mlir_spec_path = candidate_tracker.spec_path
assert mlir_spec_path is not None
script_dir = Path(__file__).resolve().parent
target_dir = mlir_spec_path.resolve().parent.parent.parent
output_name = f"unet_candidate_{candidate_tracker.candidate_id}.vmfb"
command = [
"timeout",
"300s",
"./compile-punet-base.sh",
"./iree-compile",
"compile-punet-base.sh",
"iree-compile",
"gfx942",
f"{mlir_spec_path.resolve()}",
"./punet.mlir",
Expand All @@ -100,8 +95,6 @@ def get_model_benchmark_command(
assert unet_candidate_path is not None

command = [
"timeout",
"180s",
"iree-benchmark-module",
f"--device={libtuner.DEVICE_ID_PLACEHOLDER}",
"--hip_use_streams=true",
Expand Down
24 changes: 15 additions & 9 deletions tuner/libtuner.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@ def get_model_benchmark_timeout_s(self) -> int:
class RunPack:
command: list[str]
check: bool = True
timeout: Optional[int] = None
timeout_seconds: Optional[int] = None


@dataclass
Expand Down Expand Up @@ -523,7 +523,7 @@ def create_worker_context_queue(device_ids: list[int]) -> queue.Queue[tuple[int,
def run_command(run_pack: RunPack) -> TaskResult:
command = run_pack.command
check = run_pack.check
timeout = run_pack.timeout
timeout_seconds = run_pack.timeout

result = None
is_timeout = False
Expand All @@ -534,15 +534,21 @@ def run_command(run_pack: RunPack) -> TaskResult:

# Add timeout to subprocess.run call
result = subprocess.run(
command, check=check, capture_output=True, text=True, timeout=timeout
command,
check=check,
capture_output=True,
text=True,
timeout=timeout_seconds,
)

if result.stdout:
logging.debug(f"stdout: {result.stdout}")
if result.stderr:
logging.debug(f"stderr: {result.stderr}")
except subprocess.TimeoutExpired as e:
logging.warning(f"Command '{command_str}' timed out after {timeout} seconds.")
logging.warning(
f"Command '{command_str}' timed out after {timeout_seconds} seconds."
)
is_timeout = True
except subprocess.CalledProcessError as e:
print(e.output)
Expand Down Expand Up @@ -811,7 +817,7 @@ def compile_dispatches(
candidate_trackers[i]
),
check=False,
timeout=tuning_client.get_dispatch_compile_timeout_s(),
timeout_seconds=tuning_client.get_dispatch_compile_timeout_s(),
),
candidate_id=i,
)
Expand Down Expand Up @@ -991,7 +997,7 @@ def benchmark_dispatches(
candidate_trackers[i]
),
check=False,
timeout=tuning_client.get_dispatch_benchmark_timeout_s(),
timeout_seconds=tuning_client.get_dispatch_benchmark_timeout_s(),
),
candidate_id=i,
command_need_device_id=True,
Expand Down Expand Up @@ -1071,7 +1077,7 @@ def compile_models(
RunPack(
command=tuning_client.get_model_compile_command(candidate_trackers[i]),
check=False,
timeout=tuning_client.get_model_compile_timeout_s(),
timeout_seconds=tuning_client.get_model_compile_timeout_s(),
),
candidate_id=i,
)
Expand Down Expand Up @@ -1272,7 +1278,7 @@ def benchmark_models(
candidate_trackers[i]
),
check=False,
timeout=tuning_client.get_dispatch_benchmark_timeout_s(),
timeout_seconds=tuning_client.get_dispatch_benchmark_timeout_s(),
),
candidate_id=i,
command_need_device_id=True,
Expand All @@ -1298,7 +1304,7 @@ def benchmark_models(
candidate_trackers[0]
),
check=False,
timeout=tuning_client.get_model_benchmark_timeout_s(),
timeout_seconds=tuning_client.get_model_benchmark_timeout_s(),
),
candidate_id=0,
command_need_device_id=True,
Expand Down

0 comments on commit 7adce63

Please sign in to comment.