Skip to content

Commit

Permalink
self merge ok
Browse files Browse the repository at this point in the history
  • Loading branch information
ngxson committed Mar 3, 2024
1 parent 6573043 commit a032bb6
Show file tree
Hide file tree
Showing 4 changed files with 248 additions and 137 deletions.
107 changes: 96 additions & 11 deletions examples/merge/config.example.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,19 @@
# Supported verbs:
# - linear: merge linearly, parameters: source_layer,source_layer,t
# - slerp: spherical linear interpolation, parameters: source_layer,source_layer,scale,scale
# - repeat: repeat a layer in the same output model (to reduce file size)
#
# For example:
#
# - copy: copy from which model, which layer


#########################
# Example:

# This is the first layer of output model:
# For all tensors, we want slerp(model[0].layer[0], model[1].layer[0], 0.1)
# Except for "attn_output" tensor that we want t=0.5 instead t=0.1

output layer 0
all slerp 0,0,0.9
attn_output slerp 0,0,0.9
all slerp 0,0,0.1
attn_output slerp 0,0,0.5

# For next layer, we want: model[0].layer[1]*0.6 + model[1].layer[1]*0.4
# Except for "attn_output" tensor that we want to use slerp with t=0.9
Expand All @@ -26,13 +28,96 @@ output layer 1
all linear 1,1,0.6,0.4
attn_output slerp 1,1,0.9

output layer 2
all linear 2,2,1.0,0.0
# For next layer, we want to copy from model[0].layer[2]

# repeat the first layers defined earlier in this file
output layer 2
all copy 0,2

output layer 3
all repeat 0
all copy 0,3

# For next layer, we want to copy from model[1].layer[4]

output layer 4
all repeat 1
all copy 1,4

output layer 5
all copy 1,5

output layer 6
all linear 6,6,0.1,0.9

output layer 7
all linear 7,7,0.1,0.9

output layer 8
all linear 8,8,0.1,0.9

output layer 9
all linear 9,9,0.1,0.9

output layer 10
all linear 10,10,0.1,0.9

output layer 11
all linear 11,11,0.1,0.9

output layer 12
all linear 12,12,0.1,0.9

output layer 13
all linear 13,13,0.3333,0.6666

output layer 14
all linear 14,14,0.3333,0.6666

output layer 15
all linear 15,15,0.3333,0.6666

output layer 16
all linear 16,16,0.3333,0.6666

output layer 17
all linear 17,17,0.3333,0.6666

output layer 18
all linear 18,18,0.3333,0.6666

output layer 19
all linear 19,19,0.3333,0.6666

output layer 20
all slerp 20,20,0.8

output layer 21
all slerp 21,21,0.8

output layer 22
all slerp 22,22,0.8

output layer 23
all slerp 23,23,0.8

output layer 24
all slerp 24,24,0.8

output layer 25
all slerp 25,25,0.8

output layer 26
all slerp 26,26,0.8

output layer 27
all slerp 27,27,0.8

output layer 28
all slerp 28,28,0.8

output layer 29
all slerp 29,29,0.8

output layer 30
all slerp 30,30,0.8

output layer 31
all slerp 31,31,0.8
38 changes: 31 additions & 7 deletions examples/merge/parser.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,8 @@ static std::vector<struct llama_merge_inst> parse_config(std::string & config_pa
struct llama_merge_inst ins;
ins.method = LLAMA_MERGE_COPY;
strcpy(ins.name, name.c_str());
strcpy(ins.srcs[0], name.c_str());
strcpy(ins.srcs[0], name.c_str()); // always take the first model
strcpy(ins.srcs[1], "");
instructions.push_back(ins);
} else {
// tensor belong to layer
Expand Down Expand Up @@ -177,7 +178,7 @@ static std::vector<struct llama_merge_inst> parse_config(std::string & config_pa

auto parts = str_split(line, " ");
if (parts.size() != 3) {
raise_err(i_line, "does not follow format: \"target (space) verb (space) arguments\"");
raise_err(i_line, "does not follow format: \"target (space) verb (space) parameters\"");
}

auto target = parts[0];
Expand All @@ -197,7 +198,7 @@ static std::vector<struct llama_merge_inst> parse_config(std::string & config_pa

auto linear = [&](struct llama_merge_inst & ins, std::string unit) {
if (params.size() != 4) {
raise_err(i_line, "verb \"linear\" requires exactly 4 params");
raise_err(i_line, "verb \"linear\" requires exactly 4 parameters");
}
ins.method = LLAMA_MERGE_LINEAR;
int src0 = std::stoi(params[0]);
Expand All @@ -211,7 +212,7 @@ static std::vector<struct llama_merge_inst> parse_config(std::string & config_pa

auto slerp = [&](struct llama_merge_inst & ins, std::string unit) {
if (params.size() != 3) {
raise_err(i_line, "verb \"slerp\" requires exactly 3 params");
raise_err(i_line, "verb \"slerp\" requires exactly 3 parameters");
}
ins.method = LLAMA_MERGE_SLERP;
int src0 = std::stoi(params[0]);
Expand All @@ -222,14 +223,33 @@ static std::vector<struct llama_merge_inst> parse_config(std::string & config_pa
is_layer_empty = false;
};

auto repeat = [&](struct llama_merge_inst & ins, std::string unit) {
/*auto repeat = [&](struct llama_merge_inst & ins, std::string unit) {
if (params.size() != 1) {
raise_err(i_line, "verb \"repeat\" requires exactly 1 param");
raise_err(i_line, "verb \"repeat\" requires exactly 1 parameter");
}
ins.method = LLAMA_MERGE_REPEAT;
int src0 = std::stoi(params[0]);
strcpy(ins.srcs[0], get_tensor_name(src0, unit).c_str());
is_layer_empty = false;
};*/

auto copy = [&](struct llama_merge_inst & ins, std::string unit) {
if (params.size() != 2) {
raise_err(i_line, "verb \"copy\" requires exactly 2 parameters");
}
ins.method = LLAMA_MERGE_COPY;
int model = std::stoi(params[0]);
int layer = std::stoi(params[1]);
if (model == 0) {
strcpy(ins.srcs[0], get_tensor_name(layer, unit).c_str());
strcpy(ins.srcs[1], "");
} else if (model == 1) {
strcpy(ins.srcs[0], "");
strcpy(ins.srcs[1], get_tensor_name(layer, unit).c_str());
} else {
raise_err(i_line, "can only copy from model 0 or 1");
}
is_layer_empty = false;
};

auto apply_verb = [&](struct llama_merge_inst & ins, std::string unit) {
Expand All @@ -238,12 +258,16 @@ static std::vector<struct llama_merge_inst> parse_config(std::string & config_pa
} else if (verb == "slerp") {
slerp(ins, unit);
} else if (verb == "repeat") {
repeat(ins, unit);
// repeat(ins, unit);
raise_err(i_line, "repeat is currently not supported");
} else if (verb == "copy") {
copy(ins, unit);
} else {
raise_err(i_line, "invalid verb: " + verb);
}
};

// TODO: what if user does not use "all"? we may miss some tensors?
if (target == "all") {
for (auto & u : units) {
apply_verb(layer[u], u);
Expand Down
Loading

0 comments on commit a032bb6

Please sign in to comment.