diff --git a/.github/workflows/compile-bf16.yml b/.github/workflows/compile-dtype.yml similarity index 98% rename from .github/workflows/compile-bf16.yml rename to .github/workflows/compile-dtype.yml index c255e3a60..ec99f9e3f 100644 --- a/.github/workflows/compile-bf16.yml +++ b/.github/workflows/compile-dtype.yml @@ -11,7 +11,7 @@ jobs: run-tinystories: strategy: matrix: - runner: [ubuntu-latest, macos-14, macos-12] + runner: [ubuntu-latest, macos-14] runs-on: ${{matrix.runner}} steps: - name: Checkout repo @@ -102,9 +102,6 @@ jobs: echo "******************************************" echo "******** INT4 group-wise quantized *******" echo "******************************************" - if [ ${DTYPE} == float16 ]; then - DTYPE=bfloat16 - fi python generate.py --dtype ${DTYPE} --quant '{"linear:int4" : {"groupsize": 32}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_eager cat ./output_eager diff --git a/.github/workflows/eager-dtype.yml b/.github/workflows/eager-dtype.yml new file mode 100644 index 000000000..f8f564b7b --- /dev/null +++ b/.github/workflows/eager-dtype.yml @@ -0,0 +1,87 @@ +name: Compile-dtype main + +on: + push: + branches: + - main + pull_request: + workflow_dispatch: + +jobs: + run-tinystories: + strategy: + matrix: + runner: [macos-12] + runs-on: ${{matrix.runner}} + steps: + - name: Checkout repo + uses: actions/checkout@v2 + - name: Setup Python + uses: actions/setup-python@v2 + with: + python-version: 3.11 + - name: Print machine info + run: | + uname -a + if [ $(uname -s) == Darwin ]; then + sysctl machdep.cpu.brand_string + sysctl machdep.cpu.core_count + fi + - name: Install requirements + run: | + pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cpu + pip install -r requirements.txt + - name: Download checkpoints + run: | + mkdir -p checkpoints/stories15M + pushd checkpoints/stories15M + wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories15M.pt + wget https://github.com/karpathy/llama2.c/raw/master/tokenizer.model + popd + - name: Run inference + run: | + export MODEL_PATH=checkpoints/stories15M/stories15M.pt + export MODEL_NAME=stories15M + export MODEL_DIR=/tmp + for DTYPE in bfloat16 float16 float32; do + # if [ $(uname -s) == Darwin ]; then + # export DTYPE=float16 + # fi + python generate.py --dtype ${DTYPE} --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_eager + cat ./output_eager + + echo "******************************************" + echo "******* Emb: channel-wise quantized ******" + echo "******************************************" + python generate.py --dtype ${DTYPE} --quant '{"embedding" : {"bitwidth": 8, "groupsize": 0}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_eager + cat ./output_eager + + echo "******************************************" + echo "******** Emb: group-wise quantized *******" + echo "******************************************" + python generate.py --dtype ${DTYPE} --quant '{"embedding" : {"bitwidth": 8, "groupsize": 8}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_eager + cat ./output_eager + + echo "******************************************" + echo "******* INT8 channel-wise quantized ******" + echo "******************************************" + python generate.py --dtype ${DTYPE} --quant '{"linear:int8" : {"bitwidth": 8, "groupsize": 0}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_eager + cat ./output_eager + + echo "******************************************" + echo "******** INT8 group-wise quantized *******" + echo "******************************************" + python generate.py --dtype ${DTYPE} --quant '{"linear:int8" : {"bitwidth": 8, "groupsize": 8}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_eager + cat ./output_eager + + echo "******************************************" + echo "******** INT4 group-wise quantized *******" + echo "******************************************" + + python generate.py --dtype ${DTYPE} --quant '{"linear:int4" : {"groupsize": 32}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_eager + cat ./output_eager + + echo "tests complete for ${DTYPE}" + done + + echo "tests complete for all dtypes!" \ No newline at end of file