Update torchchat.py #26

Workflow file for this run

.github/workflows/compile-dtype.yml at 191f929

	name: Compile-dtype main

	on:
	push:
	branches:
	- main
	pull_request:
	workflow_dispatch:

	jobs:
	run-tinystories:
	strategy:
	matrix:
	runner: [ubuntu-latest, macos-14]
	runs-on: ${{matrix.runner}}
	steps:
	- name: Checkout repo
	uses: actions/checkout@v2
	- name: Setup Python
	uses: actions/setup-python@v2
	with:
	python-version: 3.11
	- name: Print machine info
	run: \|
	uname -a
	if [ $(uname -s) == Darwin ]; then
	sysctl machdep.cpu.brand_string
	sysctl machdep.cpu.core_count
	fi
	- name: Install requirements
	run: \|
	pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cpu
	pip install -r requirements.txt
	- name: Download checkpoints
	run: \|
	mkdir -p checkpoints/stories15M
	pushd checkpoints/stories15M
	wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories15M.pt
	wget https://github.com/karpathy/llama2.c/raw/master/tokenizer.model
	popd
	- name: Run inference
	run: \|
Check failure on line 42 in .github/workflows/compile-dtype.yml View workflow run for this annotation GitHub Actions / .github/workflows/compile-dtype.yml Invalid workflow file `You have an error in your yaml syntax on line 42`
	export MODEL_PATH=checkpoints/stories15M/stories15M.pt
	export MODEL_NAME=stories15M
	export MODEL_DIR=/tmp
	for DTYPE in bfloat16 float16 float32; do
	python generate.py --dtype ${DTYPE} --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_eager
	cat ./output_eager
	python generate.py --dtype ${DTYPE} --compile --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_compiled
	cat ./output_compiled
	python export.py --dtype ${DTYPE} --checkpoint-path ${MODEL_PATH} --output-dso-path ${MODEL_DIR}/${MODEL_NAME}.so
	python generate.py --dtype ${DTYPE} --checkpoint-path ${MODEL_PATH} --temperature 0 --dso-path ${MODEL_DIR}/${MODEL_NAME}.so > ./output_aoti
	cat ./output_aoti

	echo "******************************************"
	echo "***** Emb: channel-wise quantized ****"
	echo "******************************************"
	python generate.py --dtype ${DTYPE} --quant '{"embedding" : {"bitwidth": 8, "groupsize": 0}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_eager
	cat ./output_eager
	python generate.py --dtype ${DTYPE} --compile --quant '{"embedding" : {"bitwidth": 8, "groupsize": 0}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_compiled
	cat ./output_compiled
	python export.py --dtype ${DTYPE} --quant '{"embedding" : {"bitwidth": 8, "groupsize": 0}}' --checkpoint-path ${MODEL_PATH} --output-dso-path ${MODEL_DIR}/${MODEL_NAME}.so
	python generate.py --dtype ${DTYPE} --checkpoint-path ${MODEL_PATH} --temperature 0 --dso-path ${MODEL_DIR}/${MODEL_NAME}.so > ./output_aoti
	cat ./output_aoti

	echo "******************************************"
	echo "****** Emb: group-wise quantized *****"
	echo "******************************************"
	python generate.py --dtype ${DTYPE} --quant '{"embedding" : {"bitwidth": 8, "groupsize": 8}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_eager
	cat ./output_eager
	python generate.py --dtype ${DTYPE} --compile --quant '{"embedding" : {"bitwidth": 8, "groupsize": 8}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_compiled
	cat ./output_compiled
	python export.py --dtype ${DTYPE} --quant '{"embedding" : {"bitwidth": 8, "groupsize": 8}}' --checkpoint-path ${MODEL_PATH} --output-dso-path ${MODEL_DIR}/${MODEL_NAME}.so
	python generate.py --dtype ${DTYPE} --checkpoint-path ${MODEL_PATH} --temperature 0 --dso-path ${MODEL_DIR}/${MODEL_NAME}.so > ./output_aoti
	cat ./output_aoti

	echo "******************************************"
	echo "***** INT8 channel-wise quantized ****"
	echo "******************************************"
	python generate.py --dtype ${DTYPE} --quant '{"linear:int8" : {"bitwidth": 8, "groupsize": 0}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_eager
	cat ./output_eager
	python generate.py --dtype ${DTYPE} --compile --quant '{"linear:int8" : {"bitwidth": 8, "groupsize": 0}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_compiled
	cat ./output_compiled
	python export.py --dtype ${DTYPE} --quant '{"linear:int8" : {"bitwidth": 8, "groupsize": 0}}' --checkpoint-path ${MODEL_PATH} --output-dso-path ${MODEL_DIR}/${MODEL_NAME}.so
	python generate.py --dtype ${DTYPE} --checkpoint-path ${MODEL_PATH} --temperature 0 --dso-path ${MODEL_DIR}/${MODEL_NAME}.so > ./output_aoti
	cat ./output_aoti

	echo "******************************************"
	echo "****** INT8 group-wise quantized *****"
	echo "******************************************"
	python generate.py --dtype ${DTYPE} --quant '{"linear:int8" : {"bitwidth": 8, "groupsize": 8}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_eager
	cat ./output_eager
	python generate.py --dtype ${DTYPE} --compile --quant '{"linear:int8" : {"bitwidth": 8, "groupsize": 8}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_compiled
	cat ./output_compiled
	python export.py --dtype ${DTYPE} --quant '{"linear:int8" : {"bitwidth": 8, "groupsize": 8}}' --checkpoint-path ${MODEL_PATH} --output-dso-path ${MODEL_DIR}/${MODEL_NAME}.so
	python generate.py --dtype ${DTYPE} --checkpoint-path ${MODEL_PATH} --temperature 0 --dso-path ${MODEL_DIR}/${MODEL_NAME}.so > ./output_aoti
	cat ./output_aoti

	echo "******************************************"
	echo "****** INT4 group-wise quantized *****"
	echo "******************************************"

	python generate.py --dtype ${DTYPE} --quant '{"linear:int4" : {"groupsize": 32}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_eager
	cat ./output_eager
	python generate.py --dtype ${DTYPE} --compile --quant '{"linear:int4" : {"groupsize": 32}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_compiled
	cat ./output_compiled
	python export.py --dtype ${DTYPE} --quant '{"linear:int4" : {"groupsize": 32}}' --checkpoint-path ${MODEL_PATH} --output-dso-path ${MODEL_DIR}/${MODEL_NAME}.so
	if [ $(uname -s) == Linux ]; then
	echo "skipping INT4 groupwise quantization because AOTI fails"
	else
	python generate.py --dtype ${DTYPE} --checkpoint-path ${MODEL_PATH} --temperature 0 --dso-path ${MODEL_DIR}/${MODEL_NAME}.so > ./output_aoti
	cat ./output_aoti
	fi

	echo "tests complete for ${DTYPE}"
	done

	echo "tests complete for all dtypes!"

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Update torchchat.py #26

Workflow file

Update torchchat.py #26

Jobs

Run details

Workflow file for this run

GitHub Actions / .github/workflows/compile-dtype.yml