a tokenizer.
based on
@lenml/tokenizers
import { fromPreTrained } from "@lenml/tokenizer-aya_expanse";
const tokenizer = fromPreTrained();
console.log(
"encode()",
tokenizer.encode("Hello, my dog is cute", null, {
add_special_tokens: true,
})
);
console.log(
"_encode_text",
tokenizer._encode_text("Hello, my dog is cute")
);
Complete api parameters and usage can be found in transformer.js tokenizers document
Apache-2.0