diff --git a/MaxText/scratch_code/golden_mistral-7b_export.ipynb b/MaxText/scratch_code/golden_mistral-7b_export.ipynb new file mode 100644 index 000000000..18cbcf9b5 --- /dev/null +++ b/MaxText/scratch_code/golden_mistral-7b_export.ipynb @@ -0,0 +1,288 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "0d13ebbb", + "metadata": {}, + "outputs": [], + "source": [ + "!pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu\n", + "!pip3 install tokenizers -U\n", + "!pip3 install transformers -U" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "6a8a4bb6", + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "from transformers import AutoTokenizer, AutoModelForCausalLM\n", + "import jsonlines" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "ff804403", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "587cc338332e42cd8438f831d6fcf2f7", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "tokenizer_config.json: 0%| | 0.00/996 [00:00