Skip to content

Commit

Permalink
removing tensorflow_text for aarch64 compatiblity
Browse files Browse the repository at this point in the history
  • Loading branch information
rdyro committed Oct 30, 2024
1 parent 2e1ebad commit b750691
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 4 deletions.
7 changes: 4 additions & 3 deletions MaxText/tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from typing import Dict, Iterable, Union, Literal, Sequence, Collection, List
from pathlib import Path
import tensorflow as tf
import tensorflow_text as tftxt
import sentencepiece as sp
import max_logging
import tiktoken
from tiktoken.load import load_tiktoken_bpe
Expand Down Expand Up @@ -191,7 +191,8 @@ def __init__(self, model_path: str, add_bos: bool, add_eos: bool):
max_logging.log(f"Tokenizer path: {model_path}")
with tf.io.gfile.GFile(model_path, "rb") as model_fp:
sp_model = model_fp.read()
self.sp_tokenizer = tftxt.SentencepieceTokenizer(model=sp_model, add_bos=add_bos, add_eos=add_eos, reverse=False)
# this tokenizer is ONLY compatible with previous tftxt sp tokenizer if reverse=False
self.sp_tokenizer = sp.SentencePieceProcessor(model_proto=sp_model, add_bos=add_bos, add_eos=add_eos, reverse=False)

def encode(self, s: str) -> List[int]:
return self.sp_tokenizer.tokenize(s)
Expand Down Expand Up @@ -223,5 +224,5 @@ def _process_string(string_tensor):
if isinstance(tokenizer, TikTokenTokenizer):
features[k] = tf.py_function(_process_string, [features[k]], Tout=[tf.int32])[0]
elif isinstance(tokenizer, SentencePieceTokenizer):
features[k] = tokenizer.encode(features[k])
features[k] = tf.py_function(_process_string, [features[k]], Tout=[tf.int32])[0]
return features
1 change: 0 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ pyink
pre-commit
pytype
sentencepiece==0.1.97
tensorflow-text>=2.13.0
tensorflow>=2.13.0
tensorflow-datasets
tensorboardx
Expand Down

0 comments on commit b750691

Please sign in to comment.