Skip to content

Commit

Permalink
Fix title on mobile + add model functionality
Browse files Browse the repository at this point in the history
  • Loading branch information
Daniel Demmel committed Dec 18, 2023
1 parent 01eecc2 commit 77dc523
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 20 deletions.
5 changes: 5 additions & 0 deletions public/tokenizer.css
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,11 @@ body {
background-color: #3c3c43;
}
}
@media (max-width: 640px) {
h1 img {
display: none;
}
}
h1 {
display: flex;
justify-content: space-around;
Expand Down
6 changes: 6 additions & 0 deletions public/tokenizer.html
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,19 @@ <h1><img src="favicons/token.svg" alt="Token"><img src="favicons/token.svg" alt=
<p>
A pure Javascript tokenizer running in your browser that can load <code>tokenizer.json</code> and <code>tokenizer_config.json</code> from any repository on Huggingface. You can use it to count tokens and compare how different large language model vocabularies work. It's also useful for debugging prompt templates.
</p>
<!-- TODO: take text from URL params? -->
<textarea id="textInput" name="textInput" autofocus placeholder="Enter the text you want to tokenize" style="height: 5em;">
[INST] <<SYS>>
You are a friendly Llama.
<</SYS>>

Do you spit at people?[/INST]</textarea>
<ul id="models"></ul>
<div id="addModel">
<!-- TODO: add styling -->
<input placeholder="mistralai/Mixtral-8x7B-v0.1" />
<button>Add tokenizer from HuggingFace</button>
</div>
</body>
<script type="module" async src="tokenizer.js"></script>
</html>
49 changes: 29 additions & 20 deletions public/tokenizer.js
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ const COLOURS = [

let models = []

// TODO: take model list from URL params?
function loadModels() {
const storedModels = localStorage.getItem(KEY_MODELS)
try {
Expand All @@ -30,10 +31,10 @@ function loadModels() {
'mistralai/Mistral-7B-v0.1',
'hf-internal-testing/llama-tokenizer',
'deepseek-ai/deepseek-coder-6.7b-instruct',
'microsoft/phi-1_5',
'01-ai/Yi-34B',
'Xenova/bert-base-cased',
'Xenova/t5-small',
// 'microsoft/phi-1_5',
// '01-ai/Yi-34B',
// 'Xenova/bert-base-cased',
// 'Xenova/t5-small',
// 'obvious/error',
// 'meta-llama/Llama-2-7b-chat-hf',
]
Expand All @@ -45,6 +46,10 @@ function saveModels() {
localStorage.setItem(KEY_MODELS, JSON.stringify(models))
}

function addModel(name) {
localStorage.setItem(KEY_MODELS, JSON.stringify([...models, name]))
}

loadModels()

const loadedModels = {}
Expand All @@ -62,36 +67,32 @@ textInput.addEventListener('input', (event) => {
})

async function loadTokenizers() {
modelsList.innerHTML = ''

console.log('Loading models...')
for (const model of models) {
if (!(model in loadedModels)) {
try {
console.log('Loading model: ' + model)
loadedModels[model] = await AutoTokenizer.from_pretrained(model)
} catch (error) {
console.error('Model loading error:' + error)
loadedModels[model] = { error }
}
// some tokenizers strip spaces, let's prevent it so we can render text with the token numbers

console.log('Loaded model', loadedModels[model])
// some tokenizers strip spaces, let's prevent it so we can render them with the token numbers
if (loadedModels[model]?.decoder?.decoders?.at(-1)?.config?.type === 'Strip') {
loadedModels[model].decoder.decoders.pop()
}
}

const newModelListItem = document.createElement('li')
newModelListItem.dataset.model = model
// TODO: add delete button
// TODO: make it possible to reorder them?
// TODO: add token count to each box
modelsList.appendChild(newModelListItem)

updateTokens()
const newModelListItem = document.createElement('li')
newModelListItem.dataset.model = model
// TODO: add delete button
// TODO: make it possible to reorder them?
modelsList.appendChild(newModelListItem)
}
}

// const addModelListItem = document.createElement('li')
// addModelListItem.id = 'addModel'
// addModelListItem.innerHTML = `<input /><button class="addModel">Add</button>`
// modelsList.appendChild(addModelListItem)
updateTokens()
}

const renderTokenAndText = (acc, { token, text }, index) => {
Expand Down Expand Up @@ -132,3 +133,11 @@ function updateTokens() {
}

await loadTokenizers()

const addModelBox = document.getElementById('addModel')
addModelBox.querySelector('button').addEventListener('click', async () => {
addModel(addModelBox.querySelector('input').value)
loadModels()
await loadTokenizers()
window.scrollTo(0, document.body.scrollHeight)
})

1 comment on commit 77dc523

@vercel
Copy link

@vercel vercel bot commented on 77dc523 Dec 18, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.