From 39a41a53b067b159bb6633739296c51a1df3a3c4 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Thu, 4 Jul 2024 20:44:32 +0300 Subject: [PATCH 1/6] py : switch to snake_case ggml-ci --- README.md | 8 +++--- ci/run.sh | 4 +-- convert_hf_to_gguf.py | 10 +++---- convert_hf_to_gguf_update.py | 26 +++++++++---------- docs/HOWTO-add-model.md | 2 +- ...egacy-llama.py => convert_legacy_llama.py} | 0 ...=> convert_finetune_checkpoint_to_gguf.py} | 0 ...ple.py => json_schema_pydantic_example.py} | 0 examples/llava/MobileVLM-README.md | 14 +++++----- examples/llava/README.md | 20 +++++++------- ...uf.py => convert_image_encoder_to_gguf.py} | 0 .../{llava-surgery.py => llava_surgery.py} | 0 ...lava-surgery-v2.py => llava_surgery_v2.py} | 0 ...=> pydantic_models_to_grammar_examples.py} | 0 ...egex-to-grammar.py => regex_to_grammar.py} | 0 examples/{server-embd.py => server_embd.py} | 0 ...py => convert_train_checkpoint_to_gguf.py} | 0 gguf-py/README.md | 2 +- ...nvert-endian.py => gguf_convert_endian.py} | 0 .../scripts/{gguf-dump.py => gguf_dump.py} | 0 ...f-new-metadata.py => gguf_new_metadata.py} | 0 ...f-set-metadata.py => gguf_set_metadata.py} | 0 scripts/check-requirements.sh | 10 +++---- scripts/convert-gg.sh | 26 ------------------- scripts/pod-llama.sh | 14 +++++----- 25 files changed, 55 insertions(+), 81 deletions(-) rename examples/{convert-legacy-llama.py => convert_legacy_llama.py} (100%) rename examples/finetune/{convert-finetune-checkpoint-to-gguf.py => convert_finetune_checkpoint_to_gguf.py} (100%) rename examples/{json-schema-pydantic-example.py => json_schema_pydantic_example.py} (100%) rename examples/llava/{convert-image-encoder-to-gguf.py => convert_image_encoder_to_gguf.py} (100%) rename examples/llava/{llava-surgery.py => llava_surgery.py} (100%) rename examples/llava/{llava-surgery-v2.py => llava_surgery_v2.py} (100%) rename examples/{pydantic-models-to-grammar-examples.py => pydantic_models_to_grammar_examples.py} (100%) rename examples/{regex-to-grammar.py => regex_to_grammar.py} (100%) rename examples/{server-embd.py => server_embd.py} (100%) rename examples/train-text-from-scratch/{convert-train-checkpoint-to-gguf.py => convert_train_checkpoint_to_gguf.py} (100%) rename gguf-py/scripts/{gguf-convert-endian.py => gguf_convert_endian.py} (100%) rename gguf-py/scripts/{gguf-dump.py => gguf_dump.py} (100%) rename gguf-py/scripts/{gguf-new-metadata.py => gguf_new_metadata.py} (100%) rename gguf-py/scripts/{gguf-set-metadata.py => gguf_set_metadata.py} (100%) delete mode 100755 scripts/convert-gg.sh diff --git a/README.md b/README.md index 3569b2bbb5e34..0cc1d10f4cb08 100644 --- a/README.md +++ b/README.md @@ -26,7 +26,7 @@ Inference of Meta's [LLaMA](https://arxiv.org/abs/2302.13971) model (and others) ### Hot topics -- **`convert.py` has been deprecated and moved to `examples/convert-legacy-llama.py`, please use `convert-hf-to-gguf.py`** https://github.com/ggerganov/llama.cpp/pull/7430 +- **`convert.py` has been deprecated and moved to `examples/convert_legacy_llama.py`, please use `convert_hf_to_gguf.py`** https://github.com/ggerganov/llama.cpp/pull/7430 - Initial Flash-Attention support: https://github.com/ggerganov/llama.cpp/pull/5021 - BPE pre-tokenization support has been added: https://github.com/ggerganov/llama.cpp/pull/6920 - MoE memory layout has been updated - reconvert models for `mmap` support and regenerate `imatrix` https://github.com/ggerganov/llama.cpp/pull/6387 @@ -636,8 +636,8 @@ Building the program with BLAS support may lead to some performance improvements To obtain the official LLaMA 2 weights please see the Obtaining and using the Facebook LLaMA 2 model section. There is also a large selection of pre-quantized `gguf` models available on Hugging Face. -Note: `convert.py` has been moved to `examples/convert-legacy-llama.py` and shouldn't be used for anything other than `Llama/Llama2/Mistral` models and their derivatives. -It does not support LLaMA 3, you can use `convert-hf-to-gguf.py` with LLaMA 3 downloaded from Hugging Face. +Note: `convert.py` has been moved to `examples/convert_legacy_llama.py` and shouldn't be used for anything other than `Llama/Llama2/Mistral` models and their derivatives. +It does not support LLaMA 3, you can use `convert_hf_to_gguf.py` with LLaMA 3 downloaded from Hugging Face. ```bash # obtain the official LLaMA model weights and place them in ./models @@ -654,7 +654,7 @@ ls ./models python3 -m pip install -r requirements.txt # convert the model to ggml FP16 format -python3 convert-hf-to-gguf.py models/mymodel/ +python3 convert_hf_to_gguf.py models/mymodel/ # quantize the model to 4-bits (using Q4_K_M method) ./llama-quantize ./models/mymodel/ggml-model-f16.gguf ./models/mymodel/ggml-model-Q4_K_M.gguf Q4_K_M diff --git a/ci/run.sh b/ci/run.sh index 067ac405b412a..f03fd72caddc3 100755 --- a/ci/run.sh +++ b/ci/run.sh @@ -421,7 +421,7 @@ function gg_run_pythia_1_4b { (time cmake -DCMAKE_BUILD_TYPE=Release ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log (time make -j ) 2>&1 | tee -a $OUT/${ci}-make.log - python3 ../convert-hf-to-gguf.py ${path_models} --outfile ${path_models}/ggml-model-f16.gguf + python3 ../convert_hf_to_gguf.py ${path_models} --outfile ${path_models}/ggml-model-f16.gguf model_f16="${path_models}/ggml-model-f16.gguf" model_q8_0="${path_models}/ggml-model-q8_0.gguf" @@ -553,7 +553,7 @@ function gg_run_pythia_2_8b { (time cmake -DCMAKE_BUILD_TYPE=Release ${CMAKE_EXTRA} -DGGML_CUDA=1 .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log (time make -j ) 2>&1 | tee -a $OUT/${ci}-make.log - python3 ../convert-hf-to-gguf.py ${path_models} --outfile ${path_models}/ggml-model-f16.gguf + python3 ../convert_hf_to_gguf.py ${path_models} --outfile ${path_models}/ggml-model-f16.gguf model_f16="${path_models}/ggml-model-f16.gguf" model_q8_0="${path_models}/ggml-model-q8_0.gguf" diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index bdc336644e626..7261c1736c34c 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -404,7 +404,7 @@ def get_vocab_base(self) -> tuple[list[str], list[int], str]: return tokens, toktypes, tokpre - # NOTE: this function is generated by convert-hf-to-gguf-update.py + # NOTE: this function is generated by convert_hf_to_gguf_update.py # do not modify it manually! # ref: https://github.com/ggerganov/llama.cpp/pull/6920 # Marker: Start get_vocab_base_pre @@ -424,7 +424,7 @@ def get_vocab_base_pre(self, tokenizer) -> str: res = None - # NOTE: if you get an error here, you need to update the convert-hf-to-gguf-update.py script + # NOTE: if you get an error here, you need to update the convert_hf_to_gguf_update.py script # or pull the latest version of the model from Huggingface # don't edit the hashes manually! if chkhsh == "0ef9807a4087ebef797fc749390439009c3b9eda9ad1a097abbe738f486c01e5": @@ -499,9 +499,9 @@ def get_vocab_base_pre(self, tokenizer) -> str: logger.warning("**************************************************************************************") logger.warning("** WARNING: The BPE pre-tokenizer was not recognized!") logger.warning("** There are 2 possible reasons for this:") - logger.warning("** - the model has not been added to convert-hf-to-gguf-update.py yet") + logger.warning("** - the model has not been added to convert_hf_to_gguf_update.py yet") logger.warning("** - the pre-tokenization config has changed upstream") - logger.warning("** Check your model files and convert-hf-to-gguf-update.py and update them accordingly.") + logger.warning("** Check your model files and convert_hf_to_gguf_update.py and update them accordingly.") logger.warning("** ref: https://github.com/ggerganov/llama.cpp/pull/6920") logger.warning("**") logger.warning(f"** chkhsh: {chkhsh}") @@ -1161,7 +1161,7 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter # So we rearrange them here,, so that we have n_head query weights # followed by n_head_kv key weights followed by n_head_kv value weights, # in contiguous fashion. - # ref: https://github.com/jploski/ggml/blob/falcon40b/examples/falcon/convert-hf-to-ggml.py + # ref: https://github.com/jploski/ggml/blob/falcon40b/examples/falcon/convert_hf_to_gguf.py if "query_key_value" in name: n_head = self.find_hparam(["num_attention_heads", "n_head"]) diff --git a/convert_hf_to_gguf_update.py b/convert_hf_to_gguf_update.py index 21a3062554578..344d034fcad7a 100755 --- a/convert_hf_to_gguf_update.py +++ b/convert_hf_to_gguf_update.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- # This script downloads the tokenizer models of the specified models from Huggingface and -# generates the get_vocab_base_pre() function for convert-hf-to-gguf.py +# generates the get_vocab_base_pre() function for convert_hf_to_gguf.py # # This is necessary in order to analyze the type of pre-tokenizer used by the model and # provide the necessary information to llama.cpp via the GGUF header in order to implement @@ -15,9 +15,9 @@ # - Add a new model to the "models" list # - Run the script with your huggingface token: # -# python3 convert-hf-to-gguf-update.py +# python3 convert_hf_to_gguf-update.py # -# - Copy-paste the generated get_vocab_base_pre() function into convert-hf-to-gguf.py +# - Copy-paste the generated get_vocab_base_pre() function into convert_hf_to_gguf.py # - Update llama.cpp with the new pre-tokenizer if necessary # # TODO: generate tokenizer tests for llama.cpp @@ -37,7 +37,7 @@ from transformers import AutoTokenizer logging.basicConfig(level=logging.DEBUG) -logger = logging.getLogger("convert-hf-to-gguf-update") +logger = logging.getLogger("convert_hf_to_gguf-update") sess = requests.Session() @@ -56,10 +56,10 @@ class TOKENIZER_TYPE(IntEnum): token = sys.argv[1] if not token.startswith("hf_"): logger.info("Huggingface token seems invalid") - logger.info("Usage: python convert-hf-to-gguf-update.py ") + logger.info("Usage: python convert_hf_to_gguf-update.py ") sys.exit(1) else: - logger.info("Usage: python convert-hf-to-gguf-update.py ") + logger.info("Usage: python convert_hf_to_gguf-update.py ") sys.exit(1) # TODO: add models here, base models preferred @@ -134,7 +134,7 @@ def download_model(model): logger.error(f"Failed to download model {model['name']}. Error: {e}") -# generate the source code for the convert-hf-to-gguf.py:get_vocab_base_pre() function: +# generate the source code for the convert_hf_to_gguf.py:get_vocab_base_pre() function: src_ifs = "" for model in models: @@ -201,7 +201,7 @@ def get_vocab_base_pre(self, tokenizer) -> str: res = None - # NOTE: if you get an error here, you need to update the convert-hf-to-gguf-update.py script + # NOTE: if you get an error here, you need to update the convert_hf_to_gguf-update.py script # or pull the latest version of the model from Huggingface # don't edit the hashes manually! {src_ifs} @@ -210,9 +210,9 @@ def get_vocab_base_pre(self, tokenizer) -> str: logger.warning("**************************************************************************************") logger.warning("** WARNING: The BPE pre-tokenizer was not recognized!") logger.warning("** There are 2 possible reasons for this:") - logger.warning("** - the model has not been added to convert-hf-to-gguf-update.py yet") + logger.warning("** - the model has not been added to convert_hf_to_gguf_update.py yet") logger.warning("** - the pre-tokenization config has changed upstream") - logger.warning("** Check your model files and convert-hf-to-gguf-update.py and update them accordingly.") + logger.warning("** Check your model files and convert_hf_to_gguf_update.py and update them accordingly.") logger.warning("** ref: https://github.com/ggerganov/llama.cpp/pull/6920") logger.warning("**") logger.warning(f"** chkhsh: {{chkhsh}}") @@ -226,7 +226,7 @@ def get_vocab_base_pre(self, tokenizer) -> str: return res """ -convert_py_pth = pathlib.Path("convert-hf-to-gguf.py") +convert_py_pth = pathlib.Path("convert_hf_to_gguf.py") convert_py = convert_py_pth.read_text(encoding="utf-8") convert_py = re.sub( r"(# Marker: Start get_vocab_base_pre)(.+?)( +# Marker: End get_vocab_base_pre)", @@ -237,7 +237,7 @@ def get_vocab_base_pre(self, tokenizer) -> str: convert_py_pth.write_text(convert_py, encoding="utf-8") -logger.info("+++ convert-hf-to-gguf.py was updated") +logger.info("+++ convert_hf_to_gguf.py was updated") # generate tests for each tokenizer model @@ -343,6 +343,6 @@ def get_vocab_base_pre(self, tokenizer) -> str: for model in models: name = model["name"] - print(f"python3 convert-hf-to-gguf.py models/tokenizers/{name}/ --outfile models/ggml-vocab-{name}.gguf --vocab-only") # noqa: NP100 + print(f"python3 convert_hf_to_gguf.py models/tokenizers/{name}/ --outfile models/ggml-vocab-{name}.gguf --vocab-only") # noqa: NP100 logger.info("\n") diff --git a/docs/HOWTO-add-model.md b/docs/HOWTO-add-model.md index 3eec077ea7bd9..a2a8168045c20 100644 --- a/docs/HOWTO-add-model.md +++ b/docs/HOWTO-add-model.md @@ -17,7 +17,7 @@ Also, it is important to check that the examples and main ggml backends (CUDA, M ### 1. Convert the model to GGUF This step is done in python with a `convert` script using the [gguf](https://pypi.org/project/gguf/) library. -Depending on the model architecture, you can use either [convert-hf-to-gguf.py](../convert-hf-to-gguf.py) or [examples/convert-legacy-llama.py](../examples/convert-legacy-llama.py) (for `llama/llama2` models in `.pth` format). +Depending on the model architecture, you can use either [convert_hf_to_gguf.py](../convert_hf_to_gguf.py) or [examples/convert-legacy-llama.py](../examples/convert-legacy-llama.py) (for `llama/llama2` models in `.pth` format). The convert script reads the model configuration, tokenizer, tensor names+data and converts them to GGUF metadata and tensors. diff --git a/examples/convert-legacy-llama.py b/examples/convert_legacy_llama.py similarity index 100% rename from examples/convert-legacy-llama.py rename to examples/convert_legacy_llama.py diff --git a/examples/finetune/convert-finetune-checkpoint-to-gguf.py b/examples/finetune/convert_finetune_checkpoint_to_gguf.py similarity index 100% rename from examples/finetune/convert-finetune-checkpoint-to-gguf.py rename to examples/finetune/convert_finetune_checkpoint_to_gguf.py diff --git a/examples/json-schema-pydantic-example.py b/examples/json_schema_pydantic_example.py similarity index 100% rename from examples/json-schema-pydantic-example.py rename to examples/json_schema_pydantic_example.py diff --git a/examples/llava/MobileVLM-README.md b/examples/llava/MobileVLM-README.md index f6c619c87df55..06a65fba4787a 100644 --- a/examples/llava/MobileVLM-README.md +++ b/examples/llava/MobileVLM-README.md @@ -30,16 +30,16 @@ git clone https://huggingface.co/mtgv/MobileVLM-1.7B git clone https://huggingface.co/openai/clip-vit-large-patch14-336 ``` -2. Use `llava-surgery.py` to split the LLaVA model to LLaMA and multimodel projector constituents: +2. Use `llava_surgery.py` to split the LLaVA model to LLaMA and multimodel projector constituents: ```sh -python ./examples/llava/llava-surgery.py -m path/to/MobileVLM-1.7B +python ./examples/llava/llava_surgery.py -m path/to/MobileVLM-1.7B ``` -3. Use `convert-image-encoder-to-gguf.py` with `--projector-type ldp` (for **V2** please use `--projector-type ldpv2`) to convert the LLaVA image encoder to GGUF: +3. Use `convert_image_encoder_to_gguf.py` with `--projector-type ldp` (for **V2** please use `--projector-type ldpv2`) to convert the LLaVA image encoder to GGUF: ```sh -python ./examples/llava/convert-image-encoder-to-gguf \ +python ./examples/llava/convert_image_encoder_to_gguf \ -m path/to/clip-vit-large-patch14-336 \ --llava-projector path/to/MobileVLM-1.7B/llava.projector \ --output-dir path/to/MobileVLM-1.7B \ @@ -47,17 +47,17 @@ python ./examples/llava/convert-image-encoder-to-gguf \ ``` ```sh -python ./examples/llava/convert-image-encoder-to-gguf \ +python ./examples/llava/convert_image_encoder_to_gguf \ -m path/to/clip-vit-large-patch14-336 \ --llava-projector path/to/MobileVLM-1.7B_V2/llava.projector \ --output-dir path/to/MobileVLM-1.7B_V2 \ --projector-type ldpv2 ``` -4. Use `examples/convert-legacy-llama.py` to convert the LLaMA part of LLaVA to GGUF: +4. Use `examples/convert_legacy_llama.py` to convert the LLaMA part of LLaVA to GGUF: ```sh -python ./examples/convert-legacy-llama.py path/to/MobileVLM-1.7B +python ./examples/convert_legacy_llama.py path/to/MobileVLM-1.7B ``` 5. Use `quantize` to convert LLaMA part's DataType from `fp16` to `q4_k` diff --git a/examples/llava/README.md b/examples/llava/README.md index f4554de676e4c..012451361763c 100644 --- a/examples/llava/README.md +++ b/examples/llava/README.md @@ -38,22 +38,22 @@ git clone https://huggingface.co/openai/clip-vit-large-patch14-336 pip install -r examples/llava/requirements.txt ``` -3. Use `llava-surgery.py` to split the LLaVA model to LLaMA and multimodel projector constituents: +3. Use `llava_surgery.py` to split the LLaVA model to LLaMA and multimodel projector constituents: ```sh -python ./examples/llava/llava-surgery.py -m ../llava-v1.5-7b +python ./examples/llava/llava_surgery.py -m ../llava-v1.5-7b ``` -4. Use `convert-image-encoder-to-gguf.py` to convert the LLaVA image encoder to GGUF: +4. Use `convert_image_encoder_to_gguf.py` to convert the LLaVA image encoder to GGUF: ```sh -python ./examples/llava/convert-image-encoder-to-gguf.py -m ../clip-vit-large-patch14-336 --llava-projector ../llava-v1.5-7b/llava.projector --output-dir ../llava-v1.5-7b +python ./examples/llava/convert_image_encoder_to_gguf.py -m ../clip-vit-large-patch14-336 --llava-projector ../llava-v1.5-7b/llava.projector --output-dir ../llava-v1.5-7b ``` -5. Use `examples/convert-legacy-llama.py` to convert the LLaMA part of LLaVA to GGUF: +5. Use `examples/convert_legacy_llama.py` to convert the LLaMA part of LLaVA to GGUF: ```sh -python ./examples/convert-legacy-llama.py ../llava-v1.5-7b --skip-unknown +python ./examples/convert_legacy_llama.py ../llava-v1.5-7b --skip-unknown ``` Now both the LLaMA part and the image encoder are in the `llava-v1.5-7b` directory. @@ -70,9 +70,9 @@ git clone https://huggingface.co/liuhaotian/llava-v1.6-vicuna-7b pip install -r examples/llava/requirements.txt ``` -3) Use `llava-surgery-v2.py` which also supports llava-1.5 variants pytorch as well as safetensor models: +3) Use `llava_surgery_v2.py` which also supports llava-1.5 variants pytorch as well as safetensor models: ```console -python examples/llava/llava-surgery-v2.py -C -m ../llava-v1.6-vicuna-7b/ +python examples/llava/llava_surgery_v2.py -C -m ../llava-v1.6-vicuna-7b/ ``` - you will find a llava.projector and a llava.clip file in your model directory @@ -86,13 +86,13 @@ curl -s -q https://huggingface.co/cmp-nct/llava-1.6-gguf/raw/main/config_vit.jso 5) Create the visual gguf model: ```console -python ./examples/llava/convert-image-encoder-to-gguf.py -m vit --llava-projector vit/llava.projector --output-dir vit --clip-model-is-vision +python ./examples/llava/convert_image_encoder_to_gguf.py -m vit --llava-projector vit/llava.projector --output-dir vit --clip-model-is-vision ``` - This is similar to llava-1.5, the difference is that we tell the encoder that we are working with the pure vision model part of CLIP 6) Then convert the model to gguf format: ```console -python ./examples/convert-legacy-llama.py ../llava-v1.6-vicuna-7b/ --skip-unknown +python ./examples/convert_legacy_llama.py ../llava-v1.6-vicuna-7b/ --skip-unknown ``` 7) And finally we can run the llava cli using the 1.6 model version: diff --git a/examples/llava/convert-image-encoder-to-gguf.py b/examples/llava/convert_image_encoder_to_gguf.py similarity index 100% rename from examples/llava/convert-image-encoder-to-gguf.py rename to examples/llava/convert_image_encoder_to_gguf.py diff --git a/examples/llava/llava-surgery.py b/examples/llava/llava_surgery.py similarity index 100% rename from examples/llava/llava-surgery.py rename to examples/llava/llava_surgery.py diff --git a/examples/llava/llava-surgery-v2.py b/examples/llava/llava_surgery_v2.py similarity index 100% rename from examples/llava/llava-surgery-v2.py rename to examples/llava/llava_surgery_v2.py diff --git a/examples/pydantic-models-to-grammar-examples.py b/examples/pydantic_models_to_grammar_examples.py similarity index 100% rename from examples/pydantic-models-to-grammar-examples.py rename to examples/pydantic_models_to_grammar_examples.py diff --git a/examples/regex-to-grammar.py b/examples/regex_to_grammar.py similarity index 100% rename from examples/regex-to-grammar.py rename to examples/regex_to_grammar.py diff --git a/examples/server-embd.py b/examples/server_embd.py similarity index 100% rename from examples/server-embd.py rename to examples/server_embd.py diff --git a/examples/train-text-from-scratch/convert-train-checkpoint-to-gguf.py b/examples/train-text-from-scratch/convert_train_checkpoint_to_gguf.py similarity index 100% rename from examples/train-text-from-scratch/convert-train-checkpoint-to-gguf.py rename to examples/train-text-from-scratch/convert_train_checkpoint_to_gguf.py diff --git a/gguf-py/README.md b/gguf-py/README.md index a04c22759793d..2d8d03eee5e59 100644 --- a/gguf-py/README.md +++ b/gguf-py/README.md @@ -3,7 +3,7 @@ This is a Python package for writing binary files in the [GGUF](https://github.com/ggerganov/ggml/pull/302) (GGML Universal File) format. -See [convert-llama-hf-to-gguf.py](https://github.com/ggerganov/llama.cpp/blob/master/convert-hf-to-gguf.py) +See [convert-llama-hf-to-gguf.py](https://github.com/ggerganov/llama.cpp/blob/master/convert_hf_to_gguf.py) as an example for its usage. ## Installation diff --git a/gguf-py/scripts/gguf-convert-endian.py b/gguf-py/scripts/gguf_convert_endian.py similarity index 100% rename from gguf-py/scripts/gguf-convert-endian.py rename to gguf-py/scripts/gguf_convert_endian.py diff --git a/gguf-py/scripts/gguf-dump.py b/gguf-py/scripts/gguf_dump.py similarity index 100% rename from gguf-py/scripts/gguf-dump.py rename to gguf-py/scripts/gguf_dump.py diff --git a/gguf-py/scripts/gguf-new-metadata.py b/gguf-py/scripts/gguf_new_metadata.py similarity index 100% rename from gguf-py/scripts/gguf-new-metadata.py rename to gguf-py/scripts/gguf_new_metadata.py diff --git a/gguf-py/scripts/gguf-set-metadata.py b/gguf-py/scripts/gguf_set_metadata.py similarity index 100% rename from gguf-py/scripts/gguf-set-metadata.py rename to gguf-py/scripts/gguf_set_metadata.py diff --git a/scripts/check-requirements.sh b/scripts/check-requirements.sh index 69a08c8410768..27f264999a165 100755 --- a/scripts/check-requirements.sh +++ b/scripts/check-requirements.sh @@ -97,9 +97,9 @@ check_requirements() { } check_convert_script() { - local py=$1 # e.g. ./convert-hf-to-gguf.py - local pyname=${py##*/} # e.g. convert-hf-to-gguf.py - pyname=${pyname%.py} # e.g. convert-hf-to-gguf + local py=$1 # e.g. convert_hf_to_gguf.py + local pyname=${py##*/} # e.g. convert_hf_to_gguf.py + pyname=${pyname%.py} # e.g. convert_hf_to_gguf info "$py: beginning check" @@ -166,9 +166,9 @@ if (( do_cleanup )); then rm -rf -- "$all_venv" fi -check_convert_script examples/convert-legacy-llama.py +check_convert_script examples/convert_legacy_llama.py for py in convert_*.py; do - # skip convert-hf-to-gguf-update.py + # skip convert_hf_to_gguf_update.py # TODO: the check is failing for some reason: # https://github.com/ggerganov/llama.cpp/actions/runs/8875330981/job/24364557177?pr=6920 [[ $py == convert_hf_to_gguf_update.py ]] && continue diff --git a/scripts/convert-gg.sh b/scripts/convert-gg.sh deleted file mode 100755 index 8a016843290b9..0000000000000 --- a/scripts/convert-gg.sh +++ /dev/null @@ -1,26 +0,0 @@ -#!/bin/bash - -set -e - -# LLaMA v1 -python3 examples/convert-legacy-llama.py ../llama1/7B --outfile models/llama-7b/ggml-model-f16.gguf --outtype f16 -python3 examples/convert-legacy-llama.py ../llama1/13B --outfile models/llama-13b/ggml-model-f16.gguf --outtype f16 -python3 examples/convert-legacy-llama.py ../llama1/30B --outfile models/llama-30b/ggml-model-f16.gguf --outtype f16 -python3 examples/convert-legacy-llama.py ../llama1/65B --outfile models/llama-65b/ggml-model-f16.gguf --outtype f16 - -# LLaMA v2 -python3 examples/convert-legacy-llama.py ../llama2/llama-2-7b --outfile models/llama-7b-v2/ggml-model-f16.gguf --outtype f16 -python3 examples/convert-legacy-llama.py ../llama2/llama-2-13b --outfile models/llama-13b-v2/ggml-model-f16.gguf --outtype f16 -python3 examples/convert-legacy-llama.py ../llama2/llama-2-70b --outfile models/llama-70b-v2/ggml-model-f16.gguf --outtype f16 - -# Code Llama -python3 examples/convert-legacy-llama.py ../codellama/CodeLlama-7b/ --outfile models/codellama-7b/ggml-model-f16.gguf --outtype f16 -python3 examples/convert-legacy-llama.py ../codellama/CodeLlama-13b/ --outfile models/codellama-13b/ggml-model-f16.gguf --outtype f16 -python3 examples/convert-legacy-llama.py ../codellama/CodeLlama-34b/ --outfile models/codellama-34b/ggml-model-f16.gguf --outtype f16 - -# Falcon -python3 convert-falcon-hf-to-gguf.py ../falcon/falcon-7b 1 -mv -v ../falcon/falcon-7b/ggml-model-f16.gguf models/falcon-7b/ggml-model-f16.gguf - -python3 convert-falcon-hf-to-gguf.py ../falcon/falcon-40b 1 -mv -v ../falcon/falcon-40b/ggml-model-f16.gguf models/falcon-40b/ggml-model-f16.gguf diff --git a/scripts/pod-llama.sh b/scripts/pod-llama.sh index 0d6d4032d8a9e..6e56e1ed0908c 100644 --- a/scripts/pod-llama.sh +++ b/scripts/pod-llama.sh @@ -75,7 +75,7 @@ if [ "$1" -eq "1" ]; then cd /workspace/llama.cpp - python3 examples/convert-legacy-llama.py ./models/tinyllama-1b --outfile ./models/tinyllama-1b/ggml-model-f16.gguf --outtype f16 + python3 examples/convert_legacy_llama.py ./models/tinyllama-1b --outfile ./models/tinyllama-1b/ggml-model-f16.gguf --outtype f16 ./llama-quantize ./models/tinyllama-1b/ggml-model-f16.gguf ./models/tinyllama-1b/ggml-model-q4_0.gguf q4_0 ./llama-quantize ./models/tinyllama-1b/ggml-model-f16.gguf ./models/tinyllama-1b/ggml-model-q4_k.gguf q4_k @@ -90,7 +90,7 @@ if [ "$1" -eq "2" ]; then cd /workspace/llama.cpp - python3 examples/convert-legacy-llama.py ./models/codellama-7b --outfile ./models/codellama-7b/ggml-model-f16.gguf --outtype f16 + python3 examples/convert_legacy_llama.py ./models/codellama-7b --outfile ./models/codellama-7b/ggml-model-f16.gguf --outtype f16 ./llama-quantize ./models/codellama-7b/ggml-model-f16.gguf ./models/codellama-7b/ggml-model-q4_0.gguf q4_0 ./llama-quantize ./models/codellama-7b/ggml-model-f16.gguf ./models/codellama-7b/ggml-model-q4_k.gguf q4_k @@ -105,7 +105,7 @@ if [ "$1" -eq "3" ]; then cd /workspace/llama.cpp - python3 examples/convert-legacy-llama.py ./models/codellama-13b --outfile ./models/codellama-13b/ggml-model-f16.gguf --outtype f16 + python3 examples/convert_legacy_llama.py ./models/codellama-13b --outfile ./models/codellama-13b/ggml-model-f16.gguf --outtype f16 ./llama-quantize ./models/codellama-13b/ggml-model-f16.gguf ./models/codellama-13b/ggml-model-q4_0.gguf q4_0 ./llama-quantize ./models/codellama-13b/ggml-model-f16.gguf ./models/codellama-13b/ggml-model-q4_k.gguf q4_k @@ -120,7 +120,7 @@ if [ "$1" -eq "4" ]; then cd /workspace/llama.cpp - python3 examples/convert-legacy-llama.py ./models/codellama-34b --outfile ./models/codellama-34b/ggml-model-f16.gguf --outtype f16 + python3 examples/convert_legacy_llama.py ./models/codellama-34b --outfile ./models/codellama-34b/ggml-model-f16.gguf --outtype f16 ./llama-quantize ./models/codellama-34b/ggml-model-f16.gguf ./models/codellama-34b/ggml-model-q4_0.gguf q4_0 ./llama-quantize ./models/codellama-34b/ggml-model-f16.gguf ./models/codellama-34b/ggml-model-q4_k.gguf q4_k @@ -135,7 +135,7 @@ if [ "$1" -eq "5" ]; then cd /workspace/llama.cpp - python3 examples/convert-legacy-llama.py ./models/codellama-7b-instruct --outfile ./models/codellama-7b-instruct/ggml-model-f16.gguf --outtype f16 + python3 examples/convert_legacy_llama.py ./models/codellama-7b-instruct --outfile ./models/codellama-7b-instruct/ggml-model-f16.gguf --outtype f16 ./llama-quantize ./models/codellama-7b-instruct/ggml-model-f16.gguf ./models/codellama-7b-instruct/ggml-model-q4_0.gguf q4_0 ./llama-quantize ./models/codellama-7b-instruct/ggml-model-f16.gguf ./models/codellama-7b-instruct/ggml-model-q4_k.gguf q4_k @@ -150,7 +150,7 @@ if [ "$1" -eq "6" ]; then cd /workspace/llama.cpp - python3 examples/convert-legacy-llama.py ./models/codellama-13b-instruct --outfile ./models/codellama-13b-instruct/ggml-model-f16.gguf --outtype f16 + python3 examples/convert_legacy_llama.py ./models/codellama-13b-instruct --outfile ./models/codellama-13b-instruct/ggml-model-f16.gguf --outtype f16 ./llama-quantize ./models/codellama-13b-instruct/ggml-model-f16.gguf ./models/codellama-13b-instruct/ggml-model-q4_0.gguf q4_0 ./llama-quantize ./models/codellama-13b-instruct/ggml-model-f16.gguf ./models/codellama-13b-instruct/ggml-model-q4_k.gguf q4_k @@ -165,7 +165,7 @@ if [ "$1" -eq "7" ]; then cd /workspace/llama.cpp - python3 examples/convert-legacy-llama.py ./models/codellama-34b-instruct --outfile ./models/codellama-34b-instruct/ggml-model-f16.gguf --outtype f16 + python3 examples/convert_legacy_llama.py ./models/codellama-34b-instruct --outfile ./models/codellama-34b-instruct/ggml-model-f16.gguf --outtype f16 ./llama-quantize ./models/codellama-34b-instruct/ggml-model-f16.gguf ./models/codellama-34b-instruct/ggml-model-q4_0.gguf q4_0 ./llama-quantize ./models/codellama-34b-instruct/ggml-model-f16.gguf ./models/codellama-34b-instruct/ggml-model-q4_k.gguf q4_k From d8f2da6b9fcd5a60b83ac4f0b626a72d969ab053 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Thu, 4 Jul 2024 20:47:03 +0300 Subject: [PATCH 2/6] cont ggml-ci --- gguf-py/README.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/gguf-py/README.md b/gguf-py/README.md index 2d8d03eee5e59..bc46d6e1dd708 100644 --- a/gguf-py/README.md +++ b/gguf-py/README.md @@ -3,7 +3,7 @@ This is a Python package for writing binary files in the [GGUF](https://github.com/ggerganov/ggml/pull/302) (GGML Universal File) format. -See [convert-llama-hf-to-gguf.py](https://github.com/ggerganov/llama.cpp/blob/master/convert_hf_to_gguf.py) +See [convert_hf_to_gguf.py](https://github.com/ggerganov/llama.cpp/blob/master/convert_hf_to_gguf.py) as an example for its usage. ## Installation @@ -15,13 +15,13 @@ pip install gguf [examples/writer.py](https://github.com/ggerganov/llama.cpp/blob/master/gguf-py/examples/writer.py) — Generates `example.gguf` in the current directory to demonstrate generating a GGUF file. Note that this file cannot be used as a model. -[scripts/gguf-dump.py](https://github.com/ggerganov/llama.cpp/blob/master/gguf-py/scripts/gguf-dump.py) — Dumps a GGUF file's metadata to the console. +[scripts/gguf_dump.py](https://github.com/ggerganov/llama.cpp/blob/master/gguf-py/scripts/gguf_dump.py) — Dumps a GGUF file's metadata to the console. -[scripts/gguf-set-metadata.py](https://github.com/ggerganov/llama.cpp/blob/master/gguf-py/scripts/gguf-set-metadata.py) — Allows changing simple metadata values in a GGUF file by key. +[scripts/gguf_set_metadata.py](https://github.com/ggerganov/llama.cpp/blob/master/gguf-py/scripts/gguf_set_metadata.py) — Allows changing simple metadata values in a GGUF file by key. -[scripts/gguf-convert-endian.py](https://github.com/ggerganov/llama.cpp/blob/master/gguf-py/scripts/gguf-convert-endian.py) — Allows converting the endianness of GGUF files. +[scripts/gguf_convert_endian.py](https://github.com/ggerganov/llama.cpp/blob/master/gguf-py/scripts/gguf_convert_endian.py) — Allows converting the endianness of GGUF files. -[scripts/gguf-new-metadata.py](https://github.com/ggerganov/llama.cpp/blob/master/gguf-py/scripts/gguf-new-metadata.py) — Copies a GGUF file with added/modified/removed metadata values. +[scripts/gguf_new_metadata.py](https://github.com/ggerganov/llama.cpp/blob/master/gguf-py/scripts/gguf_new_metadata.py) — Copies a GGUF file with added/modified/removed metadata values. ## Development Maintainers who participate in development of this package are advised to install it in editable mode: From c172b322c2e915c3bbc8f49fce52d9bc23640565 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Thu, 4 Jul 2024 22:28:19 +0300 Subject: [PATCH 3/6] cont ggml-ci --- ci/run.sh | 2 +- convert_hf_to_gguf.py | 2 +- convert_hf_to_gguf_update.py | 10 +++++----- docs/HOWTO-add-model.md | 2 +- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/ci/run.sh b/ci/run.sh index f03fd72caddc3..9703b77ce51db 100755 --- a/ci/run.sh +++ b/ci/run.sh @@ -287,7 +287,7 @@ function gg_run_open_llama_7b_v2 { (time cmake -DCMAKE_BUILD_TYPE=Release ${CMAKE_EXTRA} -DGGML_CUDA=1 .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log (time make -j ) 2>&1 | tee -a $OUT/${ci}-make.log - python3 ../examples/convert-legacy-llama.py ${path_models} --outfile ${path_models}/ggml-model-f16.gguf + python3 ../examples/convert_legacy_llama.py ${path_models} --outfile ${path_models}/ggml-model-f16.gguf model_f16="${path_models}/ggml-model-f16.gguf" model_q8_0="${path_models}/ggml-model-q8_0.gguf" diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 7261c1736c34c..d1422c4138b94 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -1161,7 +1161,7 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter # So we rearrange them here,, so that we have n_head query weights # followed by n_head_kv key weights followed by n_head_kv value weights, # in contiguous fashion. - # ref: https://github.com/jploski/ggml/blob/falcon40b/examples/falcon/convert_hf_to_gguf.py + # ref: https://github.com/jploski/ggml/blob/falcon40b/examples/falcon/convert-hf-to-gguf.py if "query_key_value" in name: n_head = self.find_hparam(["num_attention_heads", "n_head"]) diff --git a/convert_hf_to_gguf_update.py b/convert_hf_to_gguf_update.py index 344d034fcad7a..e4165ae2d977c 100755 --- a/convert_hf_to_gguf_update.py +++ b/convert_hf_to_gguf_update.py @@ -15,7 +15,7 @@ # - Add a new model to the "models" list # - Run the script with your huggingface token: # -# python3 convert_hf_to_gguf-update.py +# python3 convert_hf_to_gguf_update.py # # - Copy-paste the generated get_vocab_base_pre() function into convert_hf_to_gguf.py # - Update llama.cpp with the new pre-tokenizer if necessary @@ -37,7 +37,7 @@ from transformers import AutoTokenizer logging.basicConfig(level=logging.DEBUG) -logger = logging.getLogger("convert_hf_to_gguf-update") +logger = logging.getLogger("convert_hf_to_gguf_update") sess = requests.Session() @@ -56,10 +56,10 @@ class TOKENIZER_TYPE(IntEnum): token = sys.argv[1] if not token.startswith("hf_"): logger.info("Huggingface token seems invalid") - logger.info("Usage: python convert_hf_to_gguf-update.py ") + logger.info("Usage: python convert_hf_to_gguf_update.py ") sys.exit(1) else: - logger.info("Usage: python convert_hf_to_gguf-update.py ") + logger.info("Usage: python convert_hf_to_gguf_update.py ") sys.exit(1) # TODO: add models here, base models preferred @@ -201,7 +201,7 @@ def get_vocab_base_pre(self, tokenizer) -> str: res = None - # NOTE: if you get an error here, you need to update the convert_hf_to_gguf-update.py script + # NOTE: if you get an error here, you need to update the convert_hf_to_gguf_update.py script # or pull the latest version of the model from Huggingface # don't edit the hashes manually! {src_ifs} diff --git a/docs/HOWTO-add-model.md b/docs/HOWTO-add-model.md index a2a8168045c20..87093cedd9ab6 100644 --- a/docs/HOWTO-add-model.md +++ b/docs/HOWTO-add-model.md @@ -17,7 +17,7 @@ Also, it is important to check that the examples and main ggml backends (CUDA, M ### 1. Convert the model to GGUF This step is done in python with a `convert` script using the [gguf](https://pypi.org/project/gguf/) library. -Depending on the model architecture, you can use either [convert_hf_to_gguf.py](../convert_hf_to_gguf.py) or [examples/convert-legacy-llama.py](../examples/convert-legacy-llama.py) (for `llama/llama2` models in `.pth` format). +Depending on the model architecture, you can use either [convert_hf_to_gguf.py](../convert_hf_to_gguf.py) or [examples/convert_legacy_llama.py](../examples/convert_legacy_llama.py) (for `llama/llama2` models in `.pth` format). The convert script reads the model configuration, tokenizer, tensor names+data and converts them to GGUF metadata and tensors. From 3e3cc7102ff8c1ef2fcdf4897759958665398e16 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Thu, 4 Jul 2024 22:36:36 +0300 Subject: [PATCH 4/6] cont : fix link --- convert_hf_to_gguf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index d1422c4138b94..ed54905932044 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -1161,7 +1161,7 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter # So we rearrange them here,, so that we have n_head query weights # followed by n_head_kv key weights followed by n_head_kv value weights, # in contiguous fashion. - # ref: https://github.com/jploski/ggml/blob/falcon40b/examples/falcon/convert-hf-to-gguf.py + # ref: https://github.com/jploski/ggml/blob/falcon40b/examples/falcon/convert-hf-to-ggml.py if "query_key_value" in name: n_head = self.find_hparam(["num_attention_heads", "n_head"]) From 902de8826b325afbdb5eda4c6895b6e588bede09 Mon Sep 17 00:00:00 2001 From: Francis Couture-Harpin Date: Thu, 4 Jul 2024 16:08:15 -0400 Subject: [PATCH 5/6] gguf-py : use snake_case in scripts entrypoint export --- examples/json_schema_pydantic_example.py | 2 +- gguf-py/scripts/__init__.py | 17 ++++------------- scripts/check-requirements.sh | 2 +- 3 files changed, 6 insertions(+), 15 deletions(-) diff --git a/examples/json_schema_pydantic_example.py b/examples/json_schema_pydantic_example.py index 2a24f81189fb1..c7ca7b8d9041a 100644 --- a/examples/json_schema_pydantic_example.py +++ b/examples/json_schema_pydantic_example.py @@ -1,7 +1,7 @@ # Usage: #! ./llama-server -m some-model.gguf & #! pip install pydantic -#! python json-schema-pydantic-example.py +#! python json_schema_pydantic_example.py from pydantic import BaseModel, Extra, TypeAdapter from annotated_types import MinLen diff --git a/gguf-py/scripts/__init__.py b/gguf-py/scripts/__init__.py index 1ad45639a62ea..f9d29cb691d62 100644 --- a/gguf-py/scripts/__init__.py +++ b/gguf-py/scripts/__init__.py @@ -1,13 +1,4 @@ -import os - -from importlib import import_module - - -os.environ["NO_LOCAL_GGUF"] = "TRUE" - -gguf_convert_endian_entrypoint = import_module("scripts.gguf-convert-endian").main -gguf_dump_entrypoint = import_module("scripts.gguf-dump").main -gguf_set_metadata_entrypoint = import_module("scripts.gguf-set-metadata").main -gguf_new_metadata_entrypoint = import_module("scripts.gguf-new-metadata").main - -del import_module, os +from .gguf_convert_endian import main as gguf_convert_endian_entrypoint +from .gguf_dump import main as gguf_dump_entrypoint +from .gguf_set_metadata import main as gguf_set_metadata_entrypoint +from .gguf_new_metadata import main as gguf_new_metadata_entrypoint diff --git a/scripts/check-requirements.sh b/scripts/check-requirements.sh index 27f264999a165..48f924c02d32c 100755 --- a/scripts/check-requirements.sh +++ b/scripts/check-requirements.sh @@ -97,7 +97,7 @@ check_requirements() { } check_convert_script() { - local py=$1 # e.g. convert_hf_to_gguf.py + local py=$1 # e.g. ./convert_hf_to_gguf.py local pyname=${py##*/} # e.g. convert_hf_to_gguf.py pyname=${pyname%.py} # e.g. convert_hf_to_gguf From 91deef460613255c815320b1c567628b189a50c5 Mon Sep 17 00:00:00 2001 From: Francis Couture-Harpin Date: Thu, 4 Jul 2024 16:16:05 -0400 Subject: [PATCH 6/6] py : rename requirements for convert_legacy_llama.py Needed for scripts/check-requirements.sh --- examples/llava/requirements.txt | 2 +- requirements.txt | 2 +- requirements/requirements-convert_hf_to_gguf.txt | 2 +- requirements/requirements-convert_hf_to_gguf_update.txt | 2 +- ...t-legacy-llama.txt => requirements-convert_legacy_llama.txt} | 0 requirements/requirements-convert_llama_ggml_to_gguf.txt | 2 +- 6 files changed, 5 insertions(+), 5 deletions(-) rename requirements/{requirements-convert-legacy-llama.txt => requirements-convert_legacy_llama.txt} (100%) diff --git a/examples/llava/requirements.txt b/examples/llava/requirements.txt index 21149d6fe1494..4713f0a3460f4 100644 --- a/examples/llava/requirements.txt +++ b/examples/llava/requirements.txt @@ -1,3 +1,3 @@ --r ../../requirements/requirements-convert-legacy-llama.txt +-r ../../requirements/requirements-convert_legacy_llama.txt pillow~=10.2.0 torch~=2.2.1 diff --git a/requirements.txt b/requirements.txt index 1eca1a13f999e..52456c2e6fd24 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,7 +4,7 @@ # Package versions must stay compatible across all top-level python scripts. # --r ./requirements/requirements-convert-legacy-llama.txt +-r ./requirements/requirements-convert_legacy_llama.txt -r ./requirements/requirements-convert_hf_to_gguf.txt -r ./requirements/requirements-convert_hf_to_gguf_update.txt diff --git a/requirements/requirements-convert_hf_to_gguf.txt b/requirements/requirements-convert_hf_to_gguf.txt index a7112f39655ee..653355c07d68c 100644 --- a/requirements/requirements-convert_hf_to_gguf.txt +++ b/requirements/requirements-convert_hf_to_gguf.txt @@ -1,2 +1,2 @@ --r ./requirements-convert-legacy-llama.txt +-r ./requirements-convert_legacy_llama.txt torch~=2.2.1 diff --git a/requirements/requirements-convert_hf_to_gguf_update.txt b/requirements/requirements-convert_hf_to_gguf_update.txt index a7112f39655ee..653355c07d68c 100644 --- a/requirements/requirements-convert_hf_to_gguf_update.txt +++ b/requirements/requirements-convert_hf_to_gguf_update.txt @@ -1,2 +1,2 @@ --r ./requirements-convert-legacy-llama.txt +-r ./requirements-convert_legacy_llama.txt torch~=2.2.1 diff --git a/requirements/requirements-convert-legacy-llama.txt b/requirements/requirements-convert_legacy_llama.txt similarity index 100% rename from requirements/requirements-convert-legacy-llama.txt rename to requirements/requirements-convert_legacy_llama.txt diff --git a/requirements/requirements-convert_llama_ggml_to_gguf.txt b/requirements/requirements-convert_llama_ggml_to_gguf.txt index e80c29012a674..afe2747d448d4 100644 --- a/requirements/requirements-convert_llama_ggml_to_gguf.txt +++ b/requirements/requirements-convert_llama_ggml_to_gguf.txt @@ -1 +1 @@ --r ./requirements-convert-legacy-llama.txt +-r ./requirements-convert_legacy_llama.txt