Skip to content

Commit

Permalink
convert models with sparse threshold
Browse files Browse the repository at this point in the history
  • Loading branch information
hodlen committed Dec 18, 2023
1 parent 7fff20a commit 1d921c0
Show file tree
Hide file tree
Showing 5 changed files with 44 additions and 2 deletions.
33 changes: 32 additions & 1 deletion convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import argparse
import concurrent.futures
import dataclasses
import enum
import faulthandler
import functools
Expand Down Expand Up @@ -138,6 +139,28 @@ def type_for_tensor(self, name: str, tensor: LazyTensor) -> DataType:
# hparams loading
#

@dataclass
class PredictorParams:
sparse_threshold: float | None = None

@staticmethod
def loadPredictorJson(model: LazyModel, config_path: Path) -> PredictorParams:
config = json.load(open(config_path))
return PredictorParams(
sparse_threshold = config.get("sparse_threshold"),
)

@staticmethod
def load(model_plus: ModelPlus) -> PredictorParams:
config_path = model_plus.paths[0].parent / "config.json"

if config_path.exists():
params = PredictorParams.loadPredictorJson(model_plus.model, config_path)
else:
params = PredictorParams()

return params

@dataclass
class Params:
n_vocab: int
Expand All @@ -160,6 +183,9 @@ class Params:
# path to the directory containing the model files
path_model: Path | None = None

# MLP predictor parameters
predictor_params: PredictorParams = dataclasses.field(default_factory=PredictorParams)

@staticmethod
def guessed(model: LazyModel) -> Params:
# try transformer naming first
Expand Down Expand Up @@ -843,6 +869,9 @@ def add_meta_arch(self, params: Params) -> None:
if params.ftype is not None:
self.gguf.add_file_type(params.ftype)

if params.predictor_params.sparse_threshold is not None:
self.gguf.add_sparse_threshold(params.predictor_params.sparse_threshold)

def add_meta_vocab(self, vocab: Vocab) -> None:
tokens = []
scores = []
Expand Down Expand Up @@ -1181,10 +1210,13 @@ def main(args_in: list[str] | None = None) -> None:

if not args.vocab_only:
model_plus = load_some_model(args.model)
params = Params.load(model_plus)
mlp_predictor_plus = load_mlp_model(args.mlp_model)
params.predictor_params = PredictorParams.load(mlp_predictor_plus)
model_plus = merge_multifile_models([model_plus, mlp_predictor_plus])
else:
model_plus = ModelPlus(model = {}, paths = [args.model / 'dummy'], format = 'none', vocab = None)
params = Params.load(model_plus)

if args.dump:
do_dump_model(model_plus)
Expand All @@ -1193,7 +1225,6 @@ def main(args_in: list[str] | None = None) -> None:
if args.bigendian:
endianess = gguf.GGUFEndian.BIG

params = Params.load(model_plus)
if params.n_ctx == -1:
if args.ctx is None:
raise Exception("The model doesn't have a context size, and you didn't specify one with --ctx\n"
Expand Down
3 changes: 3 additions & 0 deletions gguf-py/gguf/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,9 @@ class Tokenizer:
ADD_EOS = "tokenizer.ggml.add_eos_token"
HF_JSON = "tokenizer.huggingface.json"
RWKV = "tokenizer.rwkv.world"

class PowerInfer:
SPARSE_THRESHOLD = "powerinfer.sparse_threshold"


#
Expand Down
3 changes: 3 additions & 0 deletions gguf-py/gguf/gguf_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -399,6 +399,9 @@ def add_add_bos_token(self, value: bool) -> None:
def add_add_eos_token(self, value: bool) -> None:
self.add_bool(Keys.Tokenizer.ADD_EOS, value)

def add_sparse_threshold(self, value: float) -> None:
self.add_float32(Keys.PowerInfer.SPARSE_THRESHOLD, value)

def _pack(self, fmt: str, value: Any, skip_pack_prefix: bool = False) -> bytes:
pack_prefix = ''
if not skip_pack_prefix:
Expand Down
5 changes: 5 additions & 0 deletions llama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -314,6 +314,8 @@ static std::map<llm_kv, std::string> LLM_KV_NAMES = {
{ LLM_KV_TOKENIZER_PAD_ID, "tokenizer.ggml.padding_token_id" },
{ LLM_KV_TOKENIZER_HF_JSON, "tokenizer.huggingface.json" },
{ LLM_KV_TOKENIZER_RWKV, "tokenizer.rwkv.world" },

{ LLM_KV_SPARSE_THRESHOLD, "powerinfer.sparse_threshold" },
};

struct LLM_KV {
Expand Down Expand Up @@ -2624,6 +2626,9 @@ static void llm_load_print_meta(llama_model_loader & ml, llama_model & model) {
if (vocab.special_sep_id != -1) { LLAMA_LOG_INFO( "%s: SEP token = %d '%s'\n", __func__, vocab.special_sep_id, vocab.id_to_token[vocab.special_sep_id].text.c_str() ); }
if (vocab.special_pad_id != -1) { LLAMA_LOG_INFO( "%s: PAD token = %d '%s'\n", __func__, vocab.special_pad_id, vocab.id_to_token[vocab.special_pad_id].text.c_str() ); }
if (vocab.linefeed_id != -1) { LLAMA_LOG_INFO( "%s: LF token = %d '%s'\n", __func__, vocab.linefeed_id, vocab.id_to_token[vocab.linefeed_id].text.c_str() ); }

// sparse inference
LLAMA_LOG_INFO("%s: sparse_pred_threshold = %.2f\n", __func__, hparams.sparse_pred_threshold);
}


Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
numpy==1.24.4
sentencepiece==0.1.98
gguf>=0.1.0
-e ./gguf-py

0 comments on commit 1d921c0

Please sign in to comment.