Skip to content

Commit

Permalink
build(python): Package scripts with pip-0517 compliance
Browse files Browse the repository at this point in the history
  • Loading branch information
ditsuke committed Jul 2, 2024
1 parent 023b880 commit 8cce8a4
Show file tree
Hide file tree
Showing 9 changed files with 1,661 additions and 35 deletions.
11 changes: 6 additions & 5 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -98,13 +98,14 @@ examples/server/*.mjs.hpp

# Python

__pycache__
.venv
/Pipfile
dist
poetry.lock
/.venv
/__pycache__/
*/poetry.lock
poetry.toml

# Nix
/result

# Test binaries
/tests/test-backend-ops
/tests/test-double-float
Expand Down
Empty file added __init__.py
Empty file.
File renamed without changes.
158 changes: 128 additions & 30 deletions convert-hf-to-gguf-update.py → convert_hf_to_gguf_update.py
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ class TOKENIZER_TYPE(IntEnum):

# TODO: this string has to exercise as much pre-tokenizer functionality as possible
# will be updated with time - contributions welcome
chktxt = '\n \n\n \n\n\n \t \t\t \t\n \n \n \n \n🚀 (normal) 😶‍🌫️ (multiple emojis concatenated) ✅ 🦙🦙 3 33 333 3333 33333 333333 3333333 33333333 3.3 3..3 3...3 កាន់តែពិសេសអាច😁 ?我想在apple工作1314151天~ ------======= нещо на Български \'\'\'\'\'\'```````\"\"\"\"......!!!!!!?????? I\'ve been \'told he\'s there, \'RE you sure? \'M not sure I\'ll make it, \'D you like some tea? We\'Ve a\'lL'
chktxt = "\n \n\n \n\n\n \t \t\t \t\n \n \n \n \n🚀 (normal) 😶‍🌫️ (multiple emojis concatenated) ✅ 🦙🦙 3 33 333 3333 33333 333333 3333333 33333333 3.3 3..3 3...3 កាន់តែពិសេសអាច😁 ?我想在apple工作1314151天~ ------======= нещо на Български ''''''```````\"\"\"\"......!!!!!!?????? I've been 'told he's there, 'RE you sure? 'M not sure I'll make it, 'D you like some tea? We'Ve a'lL"

if len(sys.argv) == 2:
token = sys.argv[1]
Expand All @@ -63,29 +63,121 @@ class TOKENIZER_TYPE(IntEnum):

# TODO: add models here, base models preferred
models = [
{"name": "llama-spm", "tokt": TOKENIZER_TYPE.SPM, "repo": "https://huggingface.co/meta-llama/Llama-2-7b-hf", },
{"name": "llama-bpe", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/meta-llama/Meta-Llama-3-8B", },
{"name": "phi-3", "tokt": TOKENIZER_TYPE.SPM, "repo": "https://huggingface.co/microsoft/Phi-3-mini-4k-instruct", },
{"name": "deepseek-llm", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/deepseek-ai/deepseek-llm-7b-base", },
{"name": "deepseek-coder", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/deepseek-ai/deepseek-coder-6.7b-base", },
{"name": "falcon", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/tiiuae/falcon-7b", },
{"name": "bert-bge", "tokt": TOKENIZER_TYPE.WPM, "repo": "https://huggingface.co/BAAI/bge-small-en-v1.5", },
{"name": "mpt", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/mosaicml/mpt-7b", },
{"name": "starcoder", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/bigcode/starcoder2-3b", },
{"name": "gpt-2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/openai-community/gpt2", },
{"name": "stablelm2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/stabilityai/stablelm-2-zephyr-1_6b", },
{"name": "refact", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/smallcloudai/Refact-1_6-base", },
{"name": "command-r", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/CohereForAI/c4ai-command-r-v01", },
{"name": "qwen2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/Qwen/Qwen1.5-7B", },
{"name": "olmo", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/allenai/OLMo-1.7-7B-hf", },
{"name": "dbrx", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/databricks/dbrx-base", },
{"name": "jina-v2-en", "tokt": TOKENIZER_TYPE.WPM, "repo": "https://huggingface.co/jinaai/jina-embeddings-v2-base-en", }, # WPM!
{"name": "jina-v2-es", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/jinaai/jina-embeddings-v2-base-es", },
{"name": "jina-v2-de", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/jinaai/jina-embeddings-v2-base-de", },
{"name": "smaug-bpe", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/abacusai/Smaug-Llama-3-70B-Instruct", },
{"name": "poro-chat", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/LumiOpen/Poro-34B-chat", },
{"name": "jina-v2-code", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/jinaai/jina-embeddings-v2-base-code", },
{"name": "viking", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/LumiOpen/Viking-7B", }, # Also used for Viking 13B and 33B
{
"name": "llama-spm",
"tokt": TOKENIZER_TYPE.SPM,
"repo": "https://huggingface.co/meta-llama/Llama-2-7b-hf",
},
{
"name": "llama-bpe",
"tokt": TOKENIZER_TYPE.BPE,
"repo": "https://huggingface.co/meta-llama/Meta-Llama-3-8B",
},
{
"name": "phi-3",
"tokt": TOKENIZER_TYPE.SPM,
"repo": "https://huggingface.co/microsoft/Phi-3-mini-4k-instruct",
},
{
"name": "deepseek-llm",
"tokt": TOKENIZER_TYPE.BPE,
"repo": "https://huggingface.co/deepseek-ai/deepseek-llm-7b-base",
},
{
"name": "deepseek-coder",
"tokt": TOKENIZER_TYPE.BPE,
"repo": "https://huggingface.co/deepseek-ai/deepseek-coder-6.7b-base",
},
{
"name": "falcon",
"tokt": TOKENIZER_TYPE.BPE,
"repo": "https://huggingface.co/tiiuae/falcon-7b",
},
{
"name": "bert-bge",
"tokt": TOKENIZER_TYPE.WPM,
"repo": "https://huggingface.co/BAAI/bge-small-en-v1.5",
},
{
"name": "mpt",
"tokt": TOKENIZER_TYPE.BPE,
"repo": "https://huggingface.co/mosaicml/mpt-7b",
},
{
"name": "starcoder",
"tokt": TOKENIZER_TYPE.BPE,
"repo": "https://huggingface.co/bigcode/starcoder2-3b",
},
{
"name": "gpt-2",
"tokt": TOKENIZER_TYPE.BPE,
"repo": "https://huggingface.co/openai-community/gpt2",
},
{
"name": "stablelm2",
"tokt": TOKENIZER_TYPE.BPE,
"repo": "https://huggingface.co/stabilityai/stablelm-2-zephyr-1_6b",
},
{
"name": "refact",
"tokt": TOKENIZER_TYPE.BPE,
"repo": "https://huggingface.co/smallcloudai/Refact-1_6-base",
},
{
"name": "command-r",
"tokt": TOKENIZER_TYPE.BPE,
"repo": "https://huggingface.co/CohereForAI/c4ai-command-r-v01",
},
{
"name": "qwen2",
"tokt": TOKENIZER_TYPE.BPE,
"repo": "https://huggingface.co/Qwen/Qwen1.5-7B",
},
{
"name": "olmo",
"tokt": TOKENIZER_TYPE.BPE,
"repo": "https://huggingface.co/allenai/OLMo-1.7-7B-hf",
},
{
"name": "dbrx",
"tokt": TOKENIZER_TYPE.BPE,
"repo": "https://huggingface.co/databricks/dbrx-base",
},
{
"name": "jina-v2-en",
"tokt": TOKENIZER_TYPE.WPM,
"repo": "https://huggingface.co/jinaai/jina-embeddings-v2-base-en",
}, # WPM!
{
"name": "jina-v2-es",
"tokt": TOKENIZER_TYPE.BPE,
"repo": "https://huggingface.co/jinaai/jina-embeddings-v2-base-es",
},
{
"name": "jina-v2-de",
"tokt": TOKENIZER_TYPE.BPE,
"repo": "https://huggingface.co/jinaai/jina-embeddings-v2-base-de",
},
{
"name": "smaug-bpe",
"tokt": TOKENIZER_TYPE.BPE,
"repo": "https://huggingface.co/abacusai/Smaug-Llama-3-70B-Instruct",
},
{
"name": "poro-chat",
"tokt": TOKENIZER_TYPE.BPE,
"repo": "https://huggingface.co/LumiOpen/Poro-34B-chat",
},
{
"name": "jina-v2-code",
"tokt": TOKENIZER_TYPE.BPE,
"repo": "https://huggingface.co/jinaai/jina-embeddings-v2-base-code",
},
{
"name": "viking",
"tokt": TOKENIZER_TYPE.BPE,
"repo": "https://huggingface.co/LumiOpen/Viking-7B",
}, # Also used for Viking 13B and 33B
]


Expand All @@ -94,7 +186,7 @@ def download_file_with_auth(url, token, save_path):
response = sess.get(url, headers=headers)
response.raise_for_status()
os.makedirs(os.path.dirname(save_path), exist_ok=True)
with open(save_path, 'wb') as f:
with open(save_path, "wb") as f:
f.write(response.content)
logger.info(f"File {save_path} downloaded successfully")

Expand Down Expand Up @@ -144,7 +236,9 @@ def download_model(model):
try:
tokenizer = AutoTokenizer.from_pretrained(f"models/tokenizers/{name}")
except OSError as e:
logger.error(f"Error loading tokenizer for model {name}. The model may not exist or is not accessible with the provided token. Error: {e}")
logger.error(
f"Error loading tokenizer for model {name}. The model may not exist or is not accessible with the provided token. Error: {e}"
)
continue # Skip to the next model if the tokenizer can't be loaded

chktok = tokenizer.encode(chktxt)
Expand All @@ -164,13 +258,15 @@ def download_model(model):
pre_tokenizer = cfg["pre_tokenizer"]
logger.info("pre_tokenizer: " + json.dumps(pre_tokenizer, indent=4))
if "ignore_merges" in cfg["model"]:
logger.info("ignore_merges: " + json.dumps(cfg["model"]["ignore_merges"], indent=4))
logger.info(
"ignore_merges: " + json.dumps(cfg["model"]["ignore_merges"], indent=4)
)

logger.info("")

src_ifs += f" if chkhsh == \"{chkhsh}\":\n"
src_ifs += f' if chkhsh == "{chkhsh}":\n'
src_ifs += f" # ref: {model['repo']}\n"
src_ifs += f" res = \"{name}\"\n"
src_ifs += f' res = "{name}"\n'

src_func = f"""
def get_vocab_base_pre(self, tokenizer) -> str:
Expand Down Expand Up @@ -326,6 +422,8 @@ def get_vocab_base_pre(self, tokenizer) -> str:
for model in models:
name = model["name"]

print(f"python3 convert-hf-to-gguf.py models/tokenizers/{name}/ --outfile models/ggml-vocab-{name}.gguf --vocab-only") # noqa: NP100
print(
f"python3 convert-hf-to-gguf.py models/tokenizers/{name}/ --outfile models/ggml-vocab-{name}.gguf --vocab-only"
) # noqa: NP100

logger.info("\n")
File renamed without changes.
149 changes: 149 additions & 0 deletions convert_lora_to_ggml.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
#!/usr/bin/env python3
from __future__ import annotations

import json
import os
import struct
import sys
from pathlib import Path
from typing import Any, BinaryIO, Sequence

import numpy as np
import torch

if 'NO_LOCAL_GGUF' not in os.environ:
sys.path.insert(1, str(Path(__file__).parent / 'gguf-py' / 'gguf'))
import gguf

NUMPY_TYPE_TO_FTYPE: dict[str, int] = {"float32": 0, "float16": 1}


def write_file_header(fout: BinaryIO, params: dict[str, Any]) -> None:
fout.write(b"ggla"[::-1]) # magic (ggml lora)
fout.write(struct.pack("i", 1)) # file version
fout.write(struct.pack("i", params["r"]))
# https://opendelta.readthedocs.io/en/latest/modules/deltas.html says that `lora_alpha` is an int
# but some models ship a float value instead
# let's convert to int, but fail if lossless conversion is not possible
assert (
int(params["lora_alpha"]) == params["lora_alpha"]
), "cannot convert float to int losslessly"
fout.write(struct.pack("i", int(params["lora_alpha"])))


def write_tensor_header(fout: BinaryIO, name: str, shape: Sequence[int], data_type: np.dtype[Any]) -> None:
sname = name.encode("utf-8")
fout.write(
struct.pack(
"iii",
len(shape),
len(sname),
NUMPY_TYPE_TO_FTYPE[data_type.name],
)
)
fout.write(struct.pack("i" * len(shape), *shape[::-1]))
fout.write(sname)
fout.seek((fout.tell() + 31) & -32)


if __name__ == '__main__':
if len(sys.argv) < 2:
print(f"Usage: python {sys.argv[0]} <path> [arch]")
print(
"Path must contain HuggingFace PEFT LoRA files 'adapter_config.json' and 'adapter_model.bin'"
)
print(f"Arch must be one of {list(gguf.MODEL_ARCH_NAMES.values())} (default: llama)")
sys.exit(1)

input_json = os.path.join(sys.argv[1], "adapter_config.json")
input_model = os.path.join(sys.argv[1], "adapter_model.bin")
output_path = os.path.join(sys.argv[1], "ggml-adapter-model.bin")

if os.path.exists(input_model):
model = torch.load(input_model, map_location="cpu")
else:
input_model = os.path.join(sys.argv[1], "adapter_model.safetensors")
# lazy import load_file only if lora is in safetensors format.
from safetensors.torch import load_file
model = load_file(input_model, device="cpu")

arch_name = sys.argv[2] if len(sys.argv) == 3 else "llama"

if arch_name not in gguf.MODEL_ARCH_NAMES.values():
print(f"Error: unsupported architecture {arch_name}")
sys.exit(1)

arch = list(gguf.MODEL_ARCH_NAMES.keys())[list(gguf.MODEL_ARCH_NAMES.values()).index(arch_name)]
name_map = gguf.TensorNameMap(arch, 200) # 200 layers ought to be enough for anyone

with open(input_json, "r") as f:
params = json.load(f)

if params["peft_type"] != "LORA":
print(f"Error: unsupported adapter type {params['peft_type']}, expected LORA")
sys.exit(1)

if params["fan_in_fan_out"] is True:
print("Error: param fan_in_fan_out is not supported")
sys.exit(1)

if params["bias"] is not None and params["bias"] != "none":
print("Error: param bias is not supported")
sys.exit(1)

# TODO: these seem to be layers that have been trained but without lora.
# doesn't seem widely used but eventually should be supported
if params["modules_to_save"] is not None and len(params["modules_to_save"]) > 0:
print("Error: param modules_to_save is not supported")
sys.exit(1)

with open(output_path, "wb") as fout:
fout.truncate()

write_file_header(fout, params)
for k, v in model.items():
orig_k = k
if k.endswith(".default.weight"):
k = k.replace(".default.weight", ".weight")
if k in ["llama_proj.weight", "llama_proj.bias"]:
continue
if k.endswith("lora_A.weight"):
if v.dtype != torch.float16 and v.dtype != torch.float32:
v = v.float()
v = v.T
else:
v = v.float()

t = v.detach().numpy()

prefix = "base_model.model."
if k.startswith(prefix):
k = k[len(prefix) :]

lora_suffixes = (".lora_A.weight", ".lora_B.weight")
if k.endswith(lora_suffixes):
suffix = k[-len(lora_suffixes[0]):]
k = k[: -len(lora_suffixes[0])]
else:
print(f"Error: unrecognized tensor name {orig_k}")
sys.exit(1)

tname = name_map.get_name(k)
if tname is None:
print(f"Error: could not map tensor name {orig_k}")
print(" Note: the arch parameter must be specified if the model is not llama")
sys.exit(1)

if suffix == ".lora_A.weight":
tname += ".weight.loraA"
elif suffix == ".lora_B.weight":
tname += ".weight.loraB"
else:
assert False

print(f"{k} => {tname} {t.shape} {t.dtype} {t.nbytes/1024/1024:.2f}MB")
write_tensor_header(fout, tname, t.shape, t.dtype)
t.tofile(fout)

print(f"Converted {input_json} and {input_model} to {output_path}")

Loading

0 comments on commit 8cce8a4

Please sign in to comment.