Skip to content

Commit

Permalink
Merge IK main 2023-08-12
Browse files Browse the repository at this point in the history
  • Loading branch information
Nexesenex committed Aug 12, 2024
1 parent 6e02327 commit ed8f227
Show file tree
Hide file tree
Showing 43 changed files with 13,666 additions and 803 deletions.
2 changes: 1 addition & 1 deletion .devops/llama-server.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ ARG UBUNTU_VERSION=22.04
FROM ubuntu:$UBUNTU_VERSION AS build

RUN apt-get update && \
apt-get install -y build-essential git libcurl4-openssl-dev
apt-get install -y build-essential git libcurl4-openssl-dev curl

WORKDIR /app

Expand Down
4 changes: 4 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ set(CMAKE_WARN_UNUSED_CLI YES)

set(CMAKE_EXPORT_COMPILE_COMMANDS ON)

set(CMAKE_CUDA_USE_RESPONSE_FILE_FOR_INCLUDES 0)
set(CMAKE_CUDA_USE_RESPONSE_FILE_FOR_LIBRARIES 0)
set(CMAKE_CUDA_USE_RESPONSE_FILE_FOR_OBJECTS 0)

if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE)
set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo")
Expand Down
1 change: 1 addition & 0 deletions LICENSE
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
MIT License

Copyright (c) 2023-2024 The ggml authors
Copyright (c) 2024 Iwan Kawrakow

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
Expand Down
28 changes: 24 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,7 @@ endif
# keep standard at C11 and C++11
MK_CPPFLAGS = -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon
MK_CFLAGS = -std=c11 -fPIC
MK_CXXFLAGS = -std=c++11 -fPIC
MK_CXXFLAGS = -std=c++17 -fPIC
MK_NVCCFLAGS = -std=c++11

ifdef LLAMA_NO_CCACHE
Expand Down Expand Up @@ -326,9 +326,9 @@ ifdef LLAMA_DEBUG
endif
else
MK_CPPFLAGS += -DNDEBUG
MK_CFLAGS += -O3 -g
MK_CXXFLAGS += -O3 -g
MK_NVCCFLAGS += -O3 -g
MK_CFLAGS += -O3
MK_CXXFLAGS += -O3
MK_NVCCFLAGS += -O3
endif

ifdef LLAMA_SANITIZE_THREAD
Expand Down Expand Up @@ -572,6 +572,12 @@ ifdef GGML_NVPL
OBJ_GGML += ggml/src/ggml-blas.o
endif # GGML_NVPL

OBJ_GGML += ggml/src/iqk/iqk_quantize.o
ifndef GGML_NO_IQKMULMAT
MK_CPPFLAGS += -DGGML_USE_IQK_MULMAT
OBJ_GGML += ggml/src/iqk/iqk_mul_mat.o
endif

ifndef GGML_NO_LLAMAFILE
MK_CPPFLAGS += -DGGML_USE_LLAMAFILE
OBJ_GGML += ggml/src/llamafile/sgemm.o
Expand Down Expand Up @@ -1058,6 +1064,20 @@ ggml/src/ggml-blas.o: \
ggml/include/ggml-blas.h
$(CXX) $(CXXFLAGS) -c $< -o $@

ggml/src/iqk/iqk_quantize.o: \
ggml/src/iqk/iqk_quantize.cpp \
ggml/src/iqk/iqk_quantize.h \
ggml/src/ggml-quants.h ggml/src/ggml-common.h ggml/include/ggml.h ggml/src/ggml-impl.h

ifndef GGML_NO_IQKMULMAT
ggml/src/iqk/iqk_mul_mat.o: \
ggml/src/iqk/iqk_mul_mat.cpp \
ggml/src/iqk/iqk_mul_mat.h \
ggml/src/iqk/iqk_quantize.h \
ggml/src/ggml-quants.h ggml/src/ggml-common.h ggml/include/ggml.h ggml/src/ggml-impl.h
$(CXX) $(CXXFLAGS) -c $< -o $@
endif # GGML_NO_IQKMULMAT

ifndef GGML_NO_LLAMAFILE
ggml/src/llamafile/sgemm.o: \
ggml/src/llamafile/sgemm.cpp \
Expand Down
708 changes: 247 additions & 461 deletions README.md

Large diffs are not rendered by default.

8 changes: 8 additions & 0 deletions common/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1296,6 +1296,14 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
params.process_output = true;
return true;
}
if (arg == "--output-tensor-name") {
if (++i >= argc) {
invalid_param = true;
return true;
}
params.output_tensor_name = argv[i];
return true;
}
if (arg == "--no-ppl") {
params.compute_ppl = false;
return true;
Expand Down
1 change: 1 addition & 0 deletions common/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,7 @@ struct gpt_params {

// imatrix params
std::string out_file = "imatrix.dat"; // save the resulting imatrix to this file
std::string output_tensor_name = "output.weight"; // name of the output tensor

int32_t n_out_freq = 10; // output the imatrix every n_out_freq iterations
int32_t n_save_freq = 0; // save the imatrix every n_save_freq iterations
Expand Down
49 changes: 33 additions & 16 deletions convert_hf_to_gguf.py
Original file line number Diff line number Diff line change
Expand Up @@ -1617,23 +1617,40 @@ def weight_quant(self, weight):
return weight.type(dtype), scale.type(torch.float32)

def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
new_name = self.map_tensor_name(name)

if any(self.match_model_tensor_name(new_name, key, bid) for key in [
gguf.MODEL_TENSOR.ATTN_Q,
gguf.MODEL_TENSOR.ATTN_K,
gguf.MODEL_TENSOR.ATTN_V,
gguf.MODEL_TENSOR.ATTN_OUT,
gguf.MODEL_TENSOR.FFN_UP,
gguf.MODEL_TENSOR.FFN_DOWN,
gguf.MODEL_TENSOR.FFN_GATE,
]):
# transform weight into 1/0/-1 (in fp32)
# transform weight into 1/0/-1 (in fp32)
if name.endswith(("q_proj.weight", "k_proj.weight", "v_proj.weight",
"down_proj.weight", "up_proj.weight", "gate_proj.weight",
"o_proj.weight")):
weight_torch, scale_torch = self.weight_quant(data_torch)
yield (new_name, weight_torch)
yield (new_name.removesuffix(".weight") + ".scale", scale_torch)
else:
yield (new_name, data_torch)

tensors: list[tuple[str, Tensor]] = []

if name.endswith("q_proj.weight"):
tensors.append((self.format_tensor_name(gguf.MODEL_TENSOR.ATTN_Q, bid), weight_torch))
tensors.append((self.format_tensor_name(gguf.MODEL_TENSOR.ATTN_Q, bid, suffix=".scale"), scale_torch))
elif name.endswith("k_proj.weight"):
tensors.append((self.format_tensor_name(gguf.MODEL_TENSOR.ATTN_K, bid), weight_torch))
tensors.append((self.format_tensor_name(gguf.MODEL_TENSOR.ATTN_K, bid, suffix=".scale"), scale_torch))
elif name.endswith("v_proj.weight"):
tensors.append((self.format_tensor_name(gguf.MODEL_TENSOR.ATTN_V, bid), weight_torch))
tensors.append((self.format_tensor_name(gguf.MODEL_TENSOR.ATTN_V, bid, suffix=".scale"), scale_torch))
elif name.endswith("o_proj.weight"):
tensors.append((self.format_tensor_name(gguf.MODEL_TENSOR.ATTN_OUT, bid), weight_torch))
tensors.append((self.format_tensor_name(gguf.MODEL_TENSOR.ATTN_OUT, bid, suffix=".scale"), scale_torch))
elif name.endswith("up_proj.weight"):
tensors.append((self.format_tensor_name(gguf.MODEL_TENSOR.FFN_UP, bid), weight_torch))
tensors.append((self.format_tensor_name(gguf.MODEL_TENSOR.FFN_UP, bid, suffix=".scale"), scale_torch))
elif name.endswith("down_proj.weight"):
tensors.append((self.format_tensor_name(gguf.MODEL_TENSOR.FFN_DOWN, bid), weight_torch))
tensors.append((self.format_tensor_name(gguf.MODEL_TENSOR.FFN_DOWN, bid, suffix=".scale"), scale_torch))
elif name.endswith("gate_proj.weight"):
tensors.append((self.format_tensor_name(gguf.MODEL_TENSOR.FFN_GATE, bid), weight_torch))
tensors.append((self.format_tensor_name(gguf.MODEL_TENSOR.FFN_GATE, bid, suffix=".scale"), scale_torch))

if len(tensors) == 0:
tensors.append((self.map_tensor_name(name), data_torch))

return tensors


@Model.register("GrokForCausalLM")
Expand Down
Loading

0 comments on commit ed8f227

Please sign in to comment.