Skip to content

Commit

Permalink
4 bit cache support is now active
Browse files Browse the repository at this point in the history
  • Loading branch information
psych0v0yager committed Mar 8, 2024
1 parent e33d344 commit be528af
Showing 1 changed file with 4 additions and 3 deletions.
7 changes: 4 additions & 3 deletions outlines/models/exllamav2.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,10 +143,11 @@ def exl2(
"""

try:
from exllamav2 import ( # , ExLlamaV2Cache_Q4
from exllamav2 import (
ExLlamaV2,
ExLlamaV2Cache,
ExLlamaV2Cache_8bit,
ExLlamaV2Cache_Q4,
ExLlamaV2Config,
)
from transformers import AutoTokenizer
Expand Down Expand Up @@ -209,8 +210,8 @@ def exl2(

if cache_8bit:
cache = ExLlamaV2Cache_8bit(model, lazy=not model.loaded)
# elif cache_q4:
# cache = ExLlamaV2Cache_Q4(model, lazy = not model.loaded)
elif cache_q4:
cache = ExLlamaV2Cache_Q4(model, lazy=not model.loaded)
else:
cache = ExLlamaV2Cache(model, lazy=not model.loaded)

Expand Down

0 comments on commit be528af

Please sign in to comment.