Skip to content

Commit

Permalink
Update llama tests for block size 32 (#696)
Browse files Browse the repository at this point in the history
The block_seq_stride default is changing to 32 instead of 16, so this PR
updates the tests to use the block_seq_stride flag and the new numpy
inputs for block size 32 to benchmark correctly. This PR also removes
the decomposed fp16 tests that are not needed anymore.

---------

Signed-off-by: aviator19941 <[email protected]>
  • Loading branch information
aviator19941 authored Dec 16, 2024
1 parent 4f542ac commit ba78824
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 188 deletions.
5 changes: 4 additions & 1 deletion sharktank/sharktank/utils/export_artifacts.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ def __init__(
iree_hal_target_backends: str,
attention_kernel: str,
tensor_parallelism_size: int,
block_seq_stride: Optional[int] = None,
):
self.sharktank_dir = str(
Path(os.path.dirname(os.path.abspath(__file__))).parent.parent.parent
Expand All @@ -102,6 +103,7 @@ def __init__(
self.iree_hal_target_backends = iree_hal_target_backends
self.attention_kernel = attention_kernel
self.tensor_parallelism_size = tensor_parallelism_size
self.block_seq_stride = block_seq_stride

def timeit(func):
def wrapper(*args, **kwargs):
Expand Down Expand Up @@ -184,6 +186,8 @@ def export_to_mlir(
if self.attention_kernel in ["decomposed", "torch"]:
export_args.append("--attention-kernel")
export_args.append(self.attention_kernel)
if self.block_seq_stride:
export_args.append(f"--block-seq-stride={self.block_seq_stride}")

cwd = self.sharktank_dir
cmd = subprocess.list2cmdline(export_args)
Expand Down Expand Up @@ -280,7 +284,6 @@ def iree_benchmark_vmfb(
benchmark_args += [
"iree-benchmark-module",
"--hip_use_streams=true",
"--hip_allow_inline_execution=true",
"--device_allocator=caching",
f"--module={vmfb_name}",
]
Expand Down
Loading

0 comments on commit ba78824

Please sign in to comment.