Skip to content

Commit

Permalink
Improve
Browse files Browse the repository at this point in the history
  • Loading branch information
patrickvonplaten committed Jul 20, 2024
1 parent 58230b4 commit 64d0d70
Showing 1 changed file with 4 additions and 0 deletions.
4 changes: 4 additions & 0 deletions src/mistral_inference/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from mistral_common.protocol.instruct.request import ChatCompletionRequest
from mistral_common.tokens.tokenizers.base import Tokenizer
from mistral_common.tokens.tokenizers.mistral import MistralTokenizer
from mistral_common.tokens.tokenizers.tekken import Tekkenizer, SpecialTokenPolicy
from mistral_common.tokens.tokenizers.sentencepiece import is_sentencepiece
from mistral_common.tokens.tokenizers.tekken import is_tekken

Expand All @@ -36,6 +37,9 @@ def load_tokenizer(model_path: Path) -> MistralTokenizer:

mistral_tokenizer = MistralTokenizer.from_file(str(model_path / tokenizer[0]))

if isinstance(mistral_tokenizer.instruct_tokenizer.tokenizer, Tekkenizer):
mistral_tokenizer.instruct_tokenizer.tokenizer.special_token_policy = SpecialTokenPolicy.KEEP

logging.info(f"Loaded tokenizer of type {mistral_tokenizer.instruct_tokenizer.__class__}")

return mistral_tokenizer
Expand Down

0 comments on commit 64d0d70

Please sign in to comment.