Skip to content

Commit

Permalink
reverted some unneeded changes
Browse files Browse the repository at this point in the history
  • Loading branch information
rsuderman committed Dec 12, 2024
1 parent 0469c25 commit 9bc64ee
Show file tree
Hide file tree
Showing 2 changed files with 2 additions and 2 deletions.
2 changes: 1 addition & 1 deletion sharktank/sharktank/examples/export_paged_llm_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def main():
tensor_parallelism_size=tensor_parallelism_size,
use_hf=False,
static_tables=False, # Rely on the compiler for hoisting tables.
kv_cache_type="paged" if args.bs == [1] else "paged",
kv_cache_type="direct" if args.bs == [1] else "paged",
attention_kernel=args.attention_kernel,
)
llama_config.fake_quant = args.fake_quant
Expand Down
2 changes: 1 addition & 1 deletion sharktank/sharktank/layers/configs/llm_configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ class LlamaModelConfig:

# Block sequence stride for a paged KV cache. This must divide evenly
# into the context length.
block_seq_stride: int = 32
block_seq_stride: int = 16

# Either "paged" or "direct".
kv_cache_type: str = "paged"
Expand Down

0 comments on commit 9bc64ee

Please sign in to comment.