From f86a979b831174df5b40252d59d7483053206a3d Mon Sep 17 00:00:00 2001 From: anakin87 Date: Sat, 16 Nov 2024 03:14:16 +0100 Subject: [PATCH 1/2] clean up; add gemma-2-2b-it --- .../snr_results_google-gemma-2-2b-it.json | 319 ++++++++++++++++++ ...Storm-8B_unfrozenparameters_50percent.yaml | 159 --------- ...Instruct_unfrozenparameters_50percent.yaml | 159 --------- 3 files changed, 319 insertions(+), 318 deletions(-) create mode 100644 model_snr_results/snr_results_google-gemma-2-2b-it.json delete mode 100644 snr_results_akjindal53244-Llama-3.1-Storm-8B_unfrozenparameters_50percent.yaml delete mode 100644 snr_results_meta-llama-Meta-Llama-3.1-8B-Instruct_unfrozenparameters_50percent.yaml diff --git a/model_snr_results/snr_results_google-gemma-2-2b-it.json b/model_snr_results/snr_results_google-gemma-2-2b-it.json new file mode 100644 index 0000000..d39275c --- /dev/null +++ b/model_snr_results/snr_results_google-gemma-2-2b-it.json @@ -0,0 +1,319 @@ +{ + "mlp.down_proj": { + "model.layers.4.mlp.down_proj": 33.30361557006836, + "model.layers.2.mlp.down_proj": 28.84607696533203, + "model.layers.6.mlp.down_proj": 19.587360382080078, + "model.layers.15.mlp.down_proj": 15.88519287109375, + "model.layers.24.mlp.down_proj": 15.06785774230957, + "model.layers.14.mlp.down_proj": 13.604269027709961, + "model.layers.17.mlp.down_proj": 13.215081214904785, + "model.layers.25.mlp.down_proj": 13.187742233276367, + "model.layers.22.mlp.down_proj": 12.597224235534668, + "model.layers.23.mlp.down_proj": 11.809103965759277, + "model.layers.20.mlp.down_proj": 11.670610427856445, + "model.layers.5.mlp.down_proj": 11.197671890258789, + "model.layers.21.mlp.down_proj": 9.488790512084961, + "model.layers.7.mlp.down_proj": 9.305631637573242, + "model.layers.18.mlp.down_proj": 9.030320167541504, + "model.layers.13.mlp.down_proj": 8.854863166809082, + "model.layers.3.mlp.down_proj": 8.736675262451172, + "model.layers.0.mlp.down_proj": 8.105010032653809, + "model.layers.19.mlp.down_proj": 7.772491455078125, + "model.layers.10.mlp.down_proj": 7.373418807983398, + "model.layers.16.mlp.down_proj": 7.208968639373779, + "model.layers.8.mlp.down_proj": 6.769254207611084, + "model.layers.11.mlp.down_proj": 6.636877536773682, + "model.layers.12.mlp.down_proj": 5.944761753082275, + "model.layers.9.mlp.down_proj": 5.525699615478516, + "model.layers.1.mlp.down_proj": 4.299622535705566 + }, + "mlp.up_proj": { + "model.layers.4.mlp.up_proj": 170.2850799560547, + "model.layers.2.mlp.up_proj": 124.66849517822266, + "model.layers.3.mlp.up_proj": 112.8239517211914, + "model.layers.6.mlp.up_proj": 48.43644332885742, + "model.layers.1.mlp.up_proj": 41.71646499633789, + "model.layers.5.mlp.up_proj": 32.67828369140625, + "model.layers.0.mlp.up_proj": 28.067331314086914, + "model.layers.7.mlp.up_proj": 28.041343688964844, + "model.layers.17.mlp.up_proj": 26.711233139038086, + "model.layers.15.mlp.up_proj": 21.886568069458008, + "model.layers.10.mlp.up_proj": 19.634605407714844, + "model.layers.9.mlp.up_proj": 16.594703674316406, + "model.layers.18.mlp.up_proj": 16.550199508666992, + "model.layers.12.mlp.up_proj": 15.803447723388672, + "model.layers.11.mlp.up_proj": 15.281010627746582, + "model.layers.16.mlp.up_proj": 15.040379524230957, + "model.layers.8.mlp.up_proj": 14.842131614685059, + "model.layers.19.mlp.up_proj": 13.117680549621582, + "model.layers.14.mlp.up_proj": 11.41967487335205, + "model.layers.20.mlp.up_proj": 11.295614242553711, + "model.layers.21.mlp.up_proj": 10.831307411193848, + "model.layers.23.mlp.up_proj": 9.982320785522461, + "model.layers.13.mlp.up_proj": 9.852916717529297, + "model.layers.24.mlp.up_proj": 7.668051719665527, + "model.layers.22.mlp.up_proj": 7.442222595214844, + "model.layers.25.mlp.up_proj": 4.745918273925781 + }, + "self_attn.k_proj": { + "model.layers.20.self_attn.k_proj": 1.2623848915100098, + "model.layers.21.self_attn.k_proj": 1.126978874206543, + "model.layers.2.self_attn.k_proj": 1.0654395818710327, + "model.layers.1.self_attn.k_proj": 1.0484901666641235, + "model.layers.3.self_attn.k_proj": 0.9912707805633545, + "model.layers.19.self_attn.k_proj": 0.9845684170722961, + "model.layers.18.self_attn.k_proj": 0.9728671312332153, + "model.layers.4.self_attn.k_proj": 0.9639020562171936, + "model.layers.6.self_attn.k_proj": 0.9274349808692932, + "model.layers.8.self_attn.k_proj": 0.9095296263694763, + "model.layers.15.self_attn.k_proj": 0.9011291265487671, + "model.layers.17.self_attn.k_proj": 0.9004068970680237, + "model.layers.23.self_attn.k_proj": 0.8472215533256531, + "model.layers.5.self_attn.k_proj": 0.8334033489227295, + "model.layers.12.self_attn.k_proj": 0.8052268028259277, + "model.layers.14.self_attn.k_proj": 0.7967967391014099, + "model.layers.24.self_attn.k_proj": 0.7792221903800964, + "model.layers.9.self_attn.k_proj": 0.7442198395729065, + "model.layers.22.self_attn.k_proj": 0.738936185836792, + "model.layers.16.self_attn.k_proj": 0.7356035113334656, + "model.layers.13.self_attn.k_proj": 0.7267259955406189, + "model.layers.7.self_attn.k_proj": 0.7121332883834839, + "model.layers.25.self_attn.k_proj": 0.7094318270683289, + "model.layers.10.self_attn.k_proj": 0.6790370345115662, + "model.layers.11.self_attn.k_proj": 0.6573328971862793, + "model.layers.0.self_attn.k_proj": 0.5670744776725769 + }, + "self_attn.o_proj": { + "model.layers.25.self_attn.o_proj": 0.3131503760814667, + "model.layers.6.self_attn.o_proj": 0.30267348885536194, + "model.layers.5.self_attn.o_proj": 0.29917699098587036, + "model.layers.4.self_attn.o_proj": 0.294284462928772, + "model.layers.1.self_attn.o_proj": 0.29038190841674805, + "model.layers.15.self_attn.o_proj": 0.279990553855896, + "model.layers.7.self_attn.o_proj": 0.2798372209072113, + "model.layers.3.self_attn.o_proj": 0.27829504013061523, + "model.layers.2.self_attn.o_proj": 0.27560076117515564, + "model.layers.12.self_attn.o_proj": 0.27154284715652466, + "model.layers.8.self_attn.o_proj": 0.25681808590888977, + "model.layers.11.self_attn.o_proj": 0.2539152503013611, + "model.layers.17.self_attn.o_proj": 0.2529059052467346, + "model.layers.0.self_attn.o_proj": 0.25265467166900635, + "model.layers.13.self_attn.o_proj": 0.25026363134384155, + "model.layers.22.self_attn.o_proj": 0.24831214547157288, + "model.layers.10.self_attn.o_proj": 0.24782003462314606, + "model.layers.14.self_attn.o_proj": 0.2316737025976181, + "model.layers.9.self_attn.o_proj": 0.2291463017463684, + "model.layers.16.self_attn.o_proj": 0.2279209941625595, + "model.layers.19.self_attn.o_proj": 0.2066410332918167, + "model.layers.23.self_attn.o_proj": 0.20168422162532806, + "model.layers.20.self_attn.o_proj": 0.20063552260398865, + "model.layers.21.self_attn.o_proj": 0.1952311396598816, + "model.layers.24.self_attn.o_proj": 0.18264922499656677, + "model.layers.18.self_attn.o_proj": 0.1780945062637329 + }, + "self_attn.q_proj": { + "model.layers.14.self_attn.q_proj": 0.5165186524391174, + "model.layers.6.self_attn.q_proj": 0.4943715035915375, + "model.layers.17.self_attn.q_proj": 0.47298240661621094, + "model.layers.16.self_attn.q_proj": 0.4636949300765991, + "model.layers.12.self_attn.q_proj": 0.4446524381637573, + "model.layers.10.self_attn.q_proj": 0.4427404999732971, + "model.layers.13.self_attn.q_proj": 0.4392520785331726, + "model.layers.15.self_attn.q_proj": 0.42862948775291443, + "model.layers.7.self_attn.q_proj": 0.42357802391052246, + "model.layers.19.self_attn.q_proj": 0.4219931662082672, + "model.layers.11.self_attn.q_proj": 0.4105197787284851, + "model.layers.18.self_attn.q_proj": 0.40164801478385925, + "model.layers.21.self_attn.q_proj": 0.397067129611969, + "model.layers.4.self_attn.q_proj": 0.3742890954017639, + "model.layers.20.self_attn.q_proj": 0.36900317668914795, + "model.layers.2.self_attn.q_proj": 0.3575708866119385, + "model.layers.23.self_attn.q_proj": 0.3568076491355896, + "model.layers.22.self_attn.q_proj": 0.351676344871521, + "model.layers.9.self_attn.q_proj": 0.3485497236251831, + "model.layers.1.self_attn.q_proj": 0.3282804489135742, + "model.layers.5.self_attn.q_proj": 0.315373957157135, + "model.layers.24.self_attn.q_proj": 0.28249993920326233, + "model.layers.3.self_attn.q_proj": 0.28181594610214233, + "model.layers.8.self_attn.q_proj": 0.26795536279678345, + "model.layers.0.self_attn.q_proj": 0.20543718338012695, + "model.layers.25.self_attn.q_proj": 0.19488219916820526 + }, + "self_attn.v_proj": { + "model.layers.18.self_attn.v_proj": 8.96899127960205, + "model.layers.19.self_attn.v_proj": 5.689997673034668, + "model.layers.4.self_attn.v_proj": 5.57612419128418, + "model.layers.25.self_attn.v_proj": 5.519944190979004, + "model.layers.21.self_attn.v_proj": 4.606760025024414, + "model.layers.15.self_attn.v_proj": 4.523311614990234, + "model.layers.7.self_attn.v_proj": 4.476346015930176, + "model.layers.3.self_attn.v_proj": 4.074836254119873, + "model.layers.17.self_attn.v_proj": 4.035794734954834, + "model.layers.2.self_attn.v_proj": 3.826523780822754, + "model.layers.24.self_attn.v_proj": 3.7527880668640137, + "model.layers.20.self_attn.v_proj": 3.703176498413086, + "model.layers.14.self_attn.v_proj": 3.437045097351074, + "model.layers.5.self_attn.v_proj": 2.7114996910095215, + "model.layers.23.self_attn.v_proj": 2.575815200805664, + "model.layers.1.self_attn.v_proj": 2.421088218688965, + "model.layers.13.self_attn.v_proj": 2.371032476425171, + "model.layers.11.self_attn.v_proj": 2.1319024562835693, + "model.layers.8.self_attn.v_proj": 1.7037322521209717, + "model.layers.9.self_attn.v_proj": 1.6977472305297852, + "model.layers.16.self_attn.v_proj": 1.5405595302581787, + "model.layers.12.self_attn.v_proj": 1.520254135131836, + "model.layers.6.self_attn.v_proj": 1.4515656232833862, + "model.layers.10.self_attn.v_proj": 1.4224656820297241, + "model.layers.0.self_attn.v_proj": 1.3283824920654297, + "model.layers.22.self_attn.v_proj": 0.9253509640693665 + }, + "mlp.gate_proj": { + "model.layers.1.mlp.gate_proj": 22.19009780883789, + "model.layers.2.mlp.gate_proj": 21.98990249633789, + "model.layers.0.mlp.gate_proj": 17.114286422729492, + "model.layers.3.mlp.gate_proj": 16.797019958496094, + "model.layers.4.mlp.gate_proj": 11.603490829467773, + "model.layers.6.mlp.gate_proj": 6.690526008605957, + "model.layers.5.mlp.gate_proj": 6.205199241638184, + "model.layers.24.mlp.gate_proj": 4.2970757484436035, + "model.layers.21.mlp.gate_proj": 4.259458541870117, + "model.layers.23.mlp.gate_proj": 4.240945816040039, + "model.layers.20.mlp.gate_proj": 4.202367782592773, + "model.layers.12.mlp.gate_proj": 4.017270565032959, + "model.layers.25.mlp.gate_proj": 3.916248321533203, + "model.layers.10.mlp.gate_proj": 3.850186586380005, + "model.layers.19.mlp.gate_proj": 3.7126221656799316, + "model.layers.17.mlp.gate_proj": 3.680795431137085, + "model.layers.15.mlp.gate_proj": 3.6055760383605957, + "model.layers.22.mlp.gate_proj": 3.5673747062683105, + "model.layers.18.mlp.gate_proj": 3.4207923412323, + "model.layers.13.mlp.gate_proj": 3.4158849716186523, + "model.layers.14.mlp.gate_proj": 3.331536054611206, + "model.layers.9.mlp.gate_proj": 3.2925198078155518, + "model.layers.11.mlp.gate_proj": 3.2574970722198486, + "model.layers.16.mlp.gate_proj": 3.216805934906006, + "model.layers.7.mlp.gate_proj": 3.2015552520751953, + "model.layers.8.mlp.gate_proj": 2.326204299926758 + }, + "input_layernorm": { + "model.layers.0.input_layernorm": Infinity, + "model.layers.1.input_layernorm": Infinity, + "model.layers.2.input_layernorm": Infinity, + "model.layers.3.input_layernorm": Infinity, + "model.layers.4.input_layernorm": Infinity, + "model.layers.5.input_layernorm": Infinity, + "model.layers.6.input_layernorm": Infinity, + "model.layers.7.input_layernorm": Infinity, + "model.layers.8.input_layernorm": Infinity, + "model.layers.9.input_layernorm": Infinity, + "model.layers.10.input_layernorm": Infinity, + "model.layers.11.input_layernorm": Infinity, + "model.layers.12.input_layernorm": Infinity, + "model.layers.13.input_layernorm": Infinity, + "model.layers.14.input_layernorm": Infinity, + "model.layers.15.input_layernorm": Infinity, + "model.layers.16.input_layernorm": Infinity, + "model.layers.17.input_layernorm": Infinity, + "model.layers.18.input_layernorm": Infinity, + "model.layers.19.input_layernorm": Infinity, + "model.layers.20.input_layernorm": Infinity, + "model.layers.21.input_layernorm": Infinity, + "model.layers.22.input_layernorm": Infinity, + "model.layers.23.input_layernorm": Infinity, + "model.layers.24.input_layernorm": Infinity, + "model.layers.25.input_layernorm": Infinity + }, + "lm_head": { + "lm_head": 3.2275562286376953 + }, + "model.embed_tokens": { + "model.embed_tokens": 3.2275562286376953 + }, + "model.norm": { + "model.norm": Infinity + }, + "post_attention_layernorm": { + "model.layers.0.post_attention_layernorm": Infinity, + "model.layers.1.post_attention_layernorm": Infinity, + "model.layers.2.post_attention_layernorm": Infinity, + "model.layers.3.post_attention_layernorm": Infinity, + "model.layers.4.post_attention_layernorm": Infinity, + "model.layers.5.post_attention_layernorm": Infinity, + "model.layers.6.post_attention_layernorm": Infinity, + "model.layers.7.post_attention_layernorm": Infinity, + "model.layers.8.post_attention_layernorm": Infinity, + "model.layers.9.post_attention_layernorm": Infinity, + "model.layers.10.post_attention_layernorm": Infinity, + "model.layers.11.post_attention_layernorm": Infinity, + "model.layers.12.post_attention_layernorm": Infinity, + "model.layers.13.post_attention_layernorm": Infinity, + "model.layers.14.post_attention_layernorm": Infinity, + "model.layers.15.post_attention_layernorm": Infinity, + "model.layers.16.post_attention_layernorm": Infinity, + "model.layers.17.post_attention_layernorm": Infinity, + "model.layers.18.post_attention_layernorm": Infinity, + "model.layers.19.post_attention_layernorm": Infinity, + "model.layers.20.post_attention_layernorm": Infinity, + "model.layers.21.post_attention_layernorm": Infinity, + "model.layers.22.post_attention_layernorm": Infinity, + "model.layers.23.post_attention_layernorm": Infinity, + "model.layers.24.post_attention_layernorm": Infinity, + "model.layers.25.post_attention_layernorm": Infinity + }, + "post_feedforward_layernorm": { + "model.layers.0.post_feedforward_layernorm": Infinity, + "model.layers.1.post_feedforward_layernorm": Infinity, + "model.layers.2.post_feedforward_layernorm": Infinity, + "model.layers.3.post_feedforward_layernorm": Infinity, + "model.layers.4.post_feedforward_layernorm": Infinity, + "model.layers.5.post_feedforward_layernorm": Infinity, + "model.layers.6.post_feedforward_layernorm": Infinity, + "model.layers.7.post_feedforward_layernorm": Infinity, + "model.layers.8.post_feedforward_layernorm": Infinity, + "model.layers.9.post_feedforward_layernorm": Infinity, + "model.layers.10.post_feedforward_layernorm": Infinity, + "model.layers.11.post_feedforward_layernorm": Infinity, + "model.layers.12.post_feedforward_layernorm": Infinity, + "model.layers.13.post_feedforward_layernorm": Infinity, + "model.layers.14.post_feedforward_layernorm": Infinity, + "model.layers.15.post_feedforward_layernorm": Infinity, + "model.layers.16.post_feedforward_layernorm": Infinity, + "model.layers.17.post_feedforward_layernorm": Infinity, + "model.layers.18.post_feedforward_layernorm": Infinity, + "model.layers.19.post_feedforward_layernorm": Infinity, + "model.layers.20.post_feedforward_layernorm": Infinity, + "model.layers.21.post_feedforward_layernorm": Infinity, + "model.layers.22.post_feedforward_layernorm": Infinity, + "model.layers.23.post_feedforward_layernorm": Infinity, + "model.layers.24.post_feedforward_layernorm": Infinity, + "model.layers.25.post_feedforward_layernorm": Infinity + }, + "pre_feedforward_layernorm": { + "model.layers.0.pre_feedforward_layernorm": Infinity, + "model.layers.1.pre_feedforward_layernorm": Infinity, + "model.layers.2.pre_feedforward_layernorm": Infinity, + "model.layers.3.pre_feedforward_layernorm": Infinity, + "model.layers.4.pre_feedforward_layernorm": Infinity, + "model.layers.5.pre_feedforward_layernorm": Infinity, + "model.layers.6.pre_feedforward_layernorm": Infinity, + "model.layers.7.pre_feedforward_layernorm": Infinity, + "model.layers.8.pre_feedforward_layernorm": Infinity, + "model.layers.9.pre_feedforward_layernorm": Infinity, + "model.layers.10.pre_feedforward_layernorm": Infinity, + "model.layers.11.pre_feedforward_layernorm": Infinity, + "model.layers.12.pre_feedforward_layernorm": Infinity, + "model.layers.13.pre_feedforward_layernorm": Infinity, + "model.layers.14.pre_feedforward_layernorm": Infinity, + "model.layers.15.pre_feedforward_layernorm": Infinity, + "model.layers.16.pre_feedforward_layernorm": Infinity, + "model.layers.17.pre_feedforward_layernorm": Infinity, + "model.layers.18.pre_feedforward_layernorm": Infinity, + "model.layers.19.pre_feedforward_layernorm": Infinity, + "model.layers.20.pre_feedforward_layernorm": Infinity, + "model.layers.21.pre_feedforward_layernorm": Infinity, + "model.layers.22.pre_feedforward_layernorm": Infinity, + "model.layers.23.pre_feedforward_layernorm": Infinity, + "model.layers.24.pre_feedforward_layernorm": Infinity, + "model.layers.25.pre_feedforward_layernorm": Infinity + } + diff --git a/snr_results_akjindal53244-Llama-3.1-Storm-8B_unfrozenparameters_50percent.yaml b/snr_results_akjindal53244-Llama-3.1-Storm-8B_unfrozenparameters_50percent.yaml deleted file mode 100644 index da9ece9..0000000 --- a/snr_results_akjindal53244-Llama-3.1-Storm-8B_unfrozenparameters_50percent.yaml +++ /dev/null @@ -1,159 +0,0 @@ -unfrozen_parameters: -- ^lm_head.weight$ -- ^model.embed_tokens.weight$ -# input_layernorm layers -- model.layers.0.input_layernorm -- model.layers.1.input_layernorm -- model.layers.2.input_layernorm -- model.layers.3.input_layernorm -- model.layers.4.input_layernorm -- model.layers.5.input_layernorm -- model.layers.6.input_layernorm -- model.layers.7.input_layernorm -- model.layers.8.input_layernorm -- model.layers.9.input_layernorm -- model.layers.10.input_layernorm -- model.layers.11.input_layernorm -- model.layers.12.input_layernorm -- model.layers.13.input_layernorm -- model.layers.14.input_layernorm -- model.layers.15.input_layernorm -# lm_head layers -# mlp.down_proj layers -- model.layers.0.mlp.down_proj -- model.layers.1.mlp.down_proj -- model.layers.30.mlp.down_proj -- model.layers.2.mlp.down_proj -- model.layers.21.mlp.down_proj -- model.layers.29.mlp.down_proj -- model.layers.22.mlp.down_proj -- model.layers.5.mlp.down_proj -- model.layers.4.mlp.down_proj -- model.layers.20.mlp.down_proj -- model.layers.23.mlp.down_proj -- model.layers.19.mlp.down_proj -- model.layers.3.mlp.down_proj -- model.layers.17.mlp.down_proj -- model.layers.6.mlp.down_proj -- model.layers.31.mlp.down_proj -# mlp.gate_proj layers -- model.layers.1.mlp.gate_proj -- model.layers.2.mlp.gate_proj -- model.layers.3.mlp.gate_proj -- model.layers.4.mlp.gate_proj -- model.layers.0.mlp.gate_proj -- model.layers.25.mlp.gate_proj -- model.layers.26.mlp.gate_proj -- model.layers.5.mlp.gate_proj -- model.layers.24.mlp.gate_proj -- model.layers.28.mlp.gate_proj -- model.layers.23.mlp.gate_proj -- model.layers.27.mlp.gate_proj -- model.layers.21.mlp.gate_proj -- model.layers.22.mlp.gate_proj -- model.layers.29.mlp.gate_proj -- model.layers.20.mlp.gate_proj -# mlp.up_proj layers -- model.layers.4.mlp.up_proj -- model.layers.3.mlp.up_proj -- model.layers.0.mlp.up_proj -- model.layers.7.mlp.up_proj -- model.layers.5.mlp.up_proj -- model.layers.6.mlp.up_proj -- model.layers.2.mlp.up_proj -- model.layers.1.mlp.up_proj -- model.layers.8.mlp.up_proj -- model.layers.14.mlp.up_proj -- model.layers.12.mlp.up_proj -- model.layers.9.mlp.up_proj -- model.layers.15.mlp.up_proj -- model.layers.17.mlp.up_proj -- model.layers.13.mlp.up_proj -- model.layers.19.mlp.up_proj -# model.embed_tokens layers -# model.norm layers -# post_attention_layernorm layers -- model.layers.0.post_attention_layernorm -- model.layers.1.post_attention_layernorm -- model.layers.2.post_attention_layernorm -- model.layers.3.post_attention_layernorm -- model.layers.4.post_attention_layernorm -- model.layers.5.post_attention_layernorm -- model.layers.6.post_attention_layernorm -- model.layers.7.post_attention_layernorm -- model.layers.8.post_attention_layernorm -- model.layers.9.post_attention_layernorm -- model.layers.10.post_attention_layernorm -- model.layers.11.post_attention_layernorm -- model.layers.12.post_attention_layernorm -- model.layers.13.post_attention_layernorm -- model.layers.14.post_attention_layernorm -- model.layers.15.post_attention_layernorm -# self_attn.k_proj layers -- model.layers.29.self_attn.k_proj -- model.layers.25.self_attn.k_proj -- model.layers.23.self_attn.k_proj -- model.layers.28.self_attn.k_proj -- model.layers.21.self_attn.k_proj -- model.layers.19.self_attn.k_proj -- model.layers.22.self_attn.k_proj -- model.layers.20.self_attn.k_proj -- model.layers.24.self_attn.k_proj -- model.layers.31.self_attn.k_proj -- model.layers.27.self_attn.k_proj -- model.layers.26.self_attn.k_proj -- model.layers.17.self_attn.k_proj -- model.layers.11.self_attn.k_proj -- model.layers.18.self_attn.k_proj -- model.layers.14.self_attn.k_proj -# self_attn.o_proj layers -- model.layers.14.self_attn.o_proj -- model.layers.7.self_attn.o_proj -- model.layers.5.self_attn.o_proj -- model.layers.11.self_attn.o_proj -- model.layers.6.self_attn.o_proj -- model.layers.24.self_attn.o_proj -- model.layers.9.self_attn.o_proj -- model.layers.13.self_attn.o_proj -- model.layers.10.self_attn.o_proj -- model.layers.12.self_attn.o_proj -- model.layers.8.self_attn.o_proj -- model.layers.25.self_attn.o_proj -- model.layers.21.self_attn.o_proj -- model.layers.23.self_attn.o_proj -- model.layers.15.self_attn.o_proj -- model.layers.16.self_attn.o_proj -# self_attn.q_proj layers -- model.layers.8.self_attn.q_proj -- model.layers.13.self_attn.q_proj -- model.layers.9.self_attn.q_proj -- model.layers.14.self_attn.q_proj -- model.layers.10.self_attn.q_proj -- model.layers.11.self_attn.q_proj -- model.layers.0.self_attn.q_proj -- model.layers.15.self_attn.q_proj -- model.layers.1.self_attn.q_proj -- model.layers.6.self_attn.q_proj -- model.layers.5.self_attn.q_proj -- model.layers.7.self_attn.q_proj -- model.layers.12.self_attn.q_proj -- model.layers.16.self_attn.q_proj -- model.layers.17.self_attn.q_proj -- model.layers.26.self_attn.q_proj -# self_attn.v_proj layers -- model.layers.26.self_attn.v_proj -- model.layers.17.self_attn.v_proj -- model.layers.3.self_attn.v_proj -- model.layers.28.self_attn.v_proj -- model.layers.29.self_attn.v_proj -- model.layers.21.self_attn.v_proj -- model.layers.15.self_attn.v_proj -- model.layers.16.self_attn.v_proj -- model.layers.20.self_attn.v_proj -- model.layers.25.self_attn.v_proj -- model.layers.6.self_attn.v_proj -- model.layers.23.self_attn.v_proj -- model.layers.4.self_attn.v_proj -- model.layers.1.self_attn.v_proj -- model.layers.14.self_attn.v_proj -- model.layers.22.self_attn.v_proj diff --git a/snr_results_meta-llama-Meta-Llama-3.1-8B-Instruct_unfrozenparameters_50percent.yaml b/snr_results_meta-llama-Meta-Llama-3.1-8B-Instruct_unfrozenparameters_50percent.yaml deleted file mode 100644 index fa0000e..0000000 --- a/snr_results_meta-llama-Meta-Llama-3.1-8B-Instruct_unfrozenparameters_50percent.yaml +++ /dev/null @@ -1,159 +0,0 @@ -unfrozen_parameters: -- ^lm_head.weight$ -- ^model.embed_tokens.weight$ -# input_layernorm layers -- model.layers.0.input_layernorm -- model.layers.1.input_layernorm -- model.layers.2.input_layernorm -- model.layers.3.input_layernorm -- model.layers.4.input_layernorm -- model.layers.5.input_layernorm -- model.layers.6.input_layernorm -- model.layers.7.input_layernorm -- model.layers.8.input_layernorm -- model.layers.9.input_layernorm -- model.layers.10.input_layernorm -- model.layers.11.input_layernorm -- model.layers.12.input_layernorm -- model.layers.13.input_layernorm -- model.layers.14.input_layernorm -- model.layers.15.input_layernorm -# lm_head layers -# mlp.down_proj layers -- model.layers.1.mlp.down_proj -- model.layers.0.mlp.down_proj -- model.layers.30.mlp.down_proj -- model.layers.2.mlp.down_proj -- model.layers.21.mlp.down_proj -- model.layers.22.mlp.down_proj -- model.layers.29.mlp.down_proj -- model.layers.5.mlp.down_proj -- model.layers.4.mlp.down_proj -- model.layers.20.mlp.down_proj -- model.layers.23.mlp.down_proj -- model.layers.19.mlp.down_proj -- model.layers.3.mlp.down_proj -- model.layers.17.mlp.down_proj -- model.layers.6.mlp.down_proj -- model.layers.31.mlp.down_proj -# mlp.gate_proj layers -- model.layers.1.mlp.gate_proj -- model.layers.2.mlp.gate_proj -- model.layers.3.mlp.gate_proj -- model.layers.4.mlp.gate_proj -- model.layers.0.mlp.gate_proj -- model.layers.25.mlp.gate_proj -- model.layers.26.mlp.gate_proj -- model.layers.5.mlp.gate_proj -- model.layers.24.mlp.gate_proj -- model.layers.28.mlp.gate_proj -- model.layers.23.mlp.gate_proj -- model.layers.27.mlp.gate_proj -- model.layers.21.mlp.gate_proj -- model.layers.22.mlp.gate_proj -- model.layers.29.mlp.gate_proj -- model.layers.20.mlp.gate_proj -# mlp.up_proj layers -- model.layers.4.mlp.up_proj -- model.layers.3.mlp.up_proj -- model.layers.0.mlp.up_proj -- model.layers.7.mlp.up_proj -- model.layers.5.mlp.up_proj -- model.layers.6.mlp.up_proj -- model.layers.2.mlp.up_proj -- model.layers.1.mlp.up_proj -- model.layers.8.mlp.up_proj -- model.layers.14.mlp.up_proj -- model.layers.12.mlp.up_proj -- model.layers.9.mlp.up_proj -- model.layers.15.mlp.up_proj -- model.layers.17.mlp.up_proj -- model.layers.13.mlp.up_proj -- model.layers.19.mlp.up_proj -# model.embed_tokens layers -# model.norm layers -# post_attention_layernorm layers -- model.layers.0.post_attention_layernorm -- model.layers.1.post_attention_layernorm -- model.layers.2.post_attention_layernorm -- model.layers.3.post_attention_layernorm -- model.layers.4.post_attention_layernorm -- model.layers.5.post_attention_layernorm -- model.layers.6.post_attention_layernorm -- model.layers.7.post_attention_layernorm -- model.layers.8.post_attention_layernorm -- model.layers.9.post_attention_layernorm -- model.layers.10.post_attention_layernorm -- model.layers.11.post_attention_layernorm -- model.layers.12.post_attention_layernorm -- model.layers.13.post_attention_layernorm -- model.layers.14.post_attention_layernorm -- model.layers.15.post_attention_layernorm -# self_attn.k_proj layers -- model.layers.29.self_attn.k_proj -- model.layers.25.self_attn.k_proj -- model.layers.23.self_attn.k_proj -- model.layers.28.self_attn.k_proj -- model.layers.21.self_attn.k_proj -- model.layers.19.self_attn.k_proj -- model.layers.22.self_attn.k_proj -- model.layers.20.self_attn.k_proj -- model.layers.24.self_attn.k_proj -- model.layers.31.self_attn.k_proj -- model.layers.27.self_attn.k_proj -- model.layers.26.self_attn.k_proj -- model.layers.17.self_attn.k_proj -- model.layers.11.self_attn.k_proj -- model.layers.14.self_attn.k_proj -- model.layers.18.self_attn.k_proj -# self_attn.o_proj layers -- model.layers.14.self_attn.o_proj -- model.layers.7.self_attn.o_proj -- model.layers.5.self_attn.o_proj -- model.layers.11.self_attn.o_proj -- model.layers.6.self_attn.o_proj -- model.layers.24.self_attn.o_proj -- model.layers.9.self_attn.o_proj -- model.layers.13.self_attn.o_proj -- model.layers.10.self_attn.o_proj -- model.layers.12.self_attn.o_proj -- model.layers.8.self_attn.o_proj -- model.layers.25.self_attn.o_proj -- model.layers.21.self_attn.o_proj -- model.layers.23.self_attn.o_proj -- model.layers.15.self_attn.o_proj -- model.layers.16.self_attn.o_proj -# self_attn.q_proj layers -- model.layers.8.self_attn.q_proj -- model.layers.13.self_attn.q_proj -- model.layers.9.self_attn.q_proj -- model.layers.14.self_attn.q_proj -- model.layers.10.self_attn.q_proj -- model.layers.11.self_attn.q_proj -- model.layers.0.self_attn.q_proj -- model.layers.15.self_attn.q_proj -- model.layers.1.self_attn.q_proj -- model.layers.6.self_attn.q_proj -- model.layers.5.self_attn.q_proj -- model.layers.7.self_attn.q_proj -- model.layers.12.self_attn.q_proj -- model.layers.16.self_attn.q_proj -- model.layers.17.self_attn.q_proj -- model.layers.26.self_attn.q_proj -# self_attn.v_proj layers -- model.layers.26.self_attn.v_proj -- model.layers.17.self_attn.v_proj -- model.layers.3.self_attn.v_proj -- model.layers.28.self_attn.v_proj -- model.layers.29.self_attn.v_proj -- model.layers.21.self_attn.v_proj -- model.layers.15.self_attn.v_proj -- model.layers.16.self_attn.v_proj -- model.layers.20.self_attn.v_proj -- model.layers.25.self_attn.v_proj -- model.layers.6.self_attn.v_proj -- model.layers.23.self_attn.v_proj -- model.layers.4.self_attn.v_proj -- model.layers.1.self_attn.v_proj -- model.layers.14.self_attn.v_proj -- model.layers.22.self_attn.v_proj From 3d0f3c42e25459bc3cb89bc7f9fe2030f4a02ca5 Mon Sep 17 00:00:00 2001 From: anakin87 Date: Sat, 16 Nov 2024 09:30:09 +0100 Subject: [PATCH 2/2] add also phi 3.5 mini --- ...sults_microsoft-Phi-3.5-mini-instruct.json | 214 ++++++++++++++++++ 1 file changed, 214 insertions(+) create mode 100644 model_snr_results/snr_results_microsoft-Phi-3.5-mini-instruct.json diff --git a/model_snr_results/snr_results_microsoft-Phi-3.5-mini-instruct.json b/model_snr_results/snr_results_microsoft-Phi-3.5-mini-instruct.json new file mode 100644 index 0000000..fb98c7f --- /dev/null +++ b/model_snr_results/snr_results_microsoft-Phi-3.5-mini-instruct.json @@ -0,0 +1,214 @@ +{ + "input_layernorm": { + "model.layers.0.input_layernorm": Infinity, + "model.layers.1.input_layernorm": Infinity, + "model.layers.2.input_layernorm": Infinity, + "model.layers.3.input_layernorm": Infinity, + "model.layers.4.input_layernorm": Infinity, + "model.layers.5.input_layernorm": Infinity, + "model.layers.6.input_layernorm": Infinity, + "model.layers.7.input_layernorm": Infinity, + "model.layers.8.input_layernorm": Infinity, + "model.layers.9.input_layernorm": Infinity, + "model.layers.10.input_layernorm": Infinity, + "model.layers.11.input_layernorm": Infinity, + "model.layers.12.input_layernorm": Infinity, + "model.layers.13.input_layernorm": Infinity, + "model.layers.14.input_layernorm": Infinity, + "model.layers.15.input_layernorm": Infinity, + "model.layers.16.input_layernorm": Infinity, + "model.layers.17.input_layernorm": Infinity, + "model.layers.18.input_layernorm": Infinity, + "model.layers.19.input_layernorm": Infinity, + "model.layers.20.input_layernorm": Infinity, + "model.layers.21.input_layernorm": Infinity, + "model.layers.22.input_layernorm": Infinity, + "model.layers.23.input_layernorm": Infinity, + "model.layers.24.input_layernorm": Infinity, + "model.layers.25.input_layernorm": Infinity, + "model.layers.26.input_layernorm": Infinity, + "model.layers.27.input_layernorm": Infinity, + "model.layers.28.input_layernorm": Infinity, + "model.layers.29.input_layernorm": Infinity, + "model.layers.30.input_layernorm": Infinity, + "model.layers.31.input_layernorm": Infinity + }, + "lm_head": { + "lm_head": 69.15093994140625 + }, + "mlp.down_proj": { + "model.layers.2.mlp.down_proj": 0.9352746605873108, + "model.layers.3.mlp.down_proj": 0.9051517844200134, + "model.layers.1.mlp.down_proj": 0.7663238644599915, + "model.layers.23.mlp.down_proj": 0.7655190825462341, + "model.layers.4.mlp.down_proj": 0.7617908716201782, + "model.layers.26.mlp.down_proj": 0.7487379312515259, + "model.layers.25.mlp.down_proj": 0.7464706301689148, + "model.layers.24.mlp.down_proj": 0.7204372882843018, + "model.layers.28.mlp.down_proj": 0.7196976542472839, + "model.layers.27.mlp.down_proj": 0.6962915658950806, + "model.layers.22.mlp.down_proj": 0.6947131156921387, + "model.layers.29.mlp.down_proj": 0.6657569408416748, + "model.layers.5.mlp.down_proj": 0.6114027500152588, + "model.layers.21.mlp.down_proj": 0.6042328476905823, + "model.layers.30.mlp.down_proj": 0.5964826941490173, + "model.layers.6.mlp.down_proj": 0.5820637345314026, + "model.layers.20.mlp.down_proj": 0.5010251998901367, + "model.layers.19.mlp.down_proj": 0.47812408208847046, + "model.layers.7.mlp.down_proj": 0.4773608148097992, + "model.layers.8.mlp.down_proj": 0.4373893737792969, + "model.layers.18.mlp.down_proj": 0.4199794828891754, + "model.layers.9.mlp.down_proj": 0.40802717208862305, + "model.layers.16.mlp.down_proj": 0.39150694012641907, + "model.layers.17.mlp.down_proj": 0.38244763016700745, + "model.layers.10.mlp.down_proj": 0.364518940448761, + "model.layers.11.mlp.down_proj": 0.35167938470840454, + "model.layers.13.mlp.down_proj": 0.33682316541671753, + "model.layers.14.mlp.down_proj": 0.32477253675460815, + "model.layers.15.mlp.down_proj": 0.3225097358226776, + "model.layers.31.mlp.down_proj": 0.32002490758895874, + "model.layers.12.mlp.down_proj": 0.31769993901252747, + "model.layers.0.mlp.down_proj": 0.19427147507667542 + }, + "mlp.gate_up_proj": { + "model.layers.31.mlp.gate_up_proj": 53.814605712890625, + "model.layers.4.mlp.gate_up_proj": 37.674842834472656, + "model.layers.3.mlp.gate_up_proj": 35.983978271484375, + "model.layers.5.mlp.gate_up_proj": 35.65141296386719, + "model.layers.6.mlp.gate_up_proj": 27.75414276123047, + "model.layers.2.mlp.gate_up_proj": 20.59932518005371, + "model.layers.30.mlp.gate_up_proj": 20.19803237915039, + "model.layers.9.mlp.gate_up_proj": 19.5028018951416, + "model.layers.28.mlp.gate_up_proj": 18.861576080322266, + "model.layers.8.mlp.gate_up_proj": 18.611696243286133, + "model.layers.29.mlp.gate_up_proj": 18.294103622436523, + "model.layers.7.mlp.gate_up_proj": 17.676464080810547, + "model.layers.27.mlp.gate_up_proj": 13.531187057495117, + "model.layers.11.mlp.gate_up_proj": 11.151909828186035, + "model.layers.26.mlp.gate_up_proj": 11.042325973510742, + "model.layers.10.mlp.gate_up_proj": 8.631453514099121, + "model.layers.25.mlp.gate_up_proj": 7.959994316101074, + "model.layers.16.mlp.gate_up_proj": 7.0104851722717285, + "model.layers.24.mlp.gate_up_proj": 6.684956073760986, + "model.layers.23.mlp.gate_up_proj": 5.915026664733887, + "model.layers.15.mlp.gate_up_proj": 5.774378299713135, + "model.layers.14.mlp.gate_up_proj": 5.465820789337158, + "model.layers.22.mlp.gate_up_proj": 5.3470778465271, + "model.layers.12.mlp.gate_up_proj": 5.190053939819336, + "model.layers.21.mlp.gate_up_proj": 4.597593307495117, + "model.layers.18.mlp.gate_up_proj": 4.574717998504639, + "model.layers.13.mlp.gate_up_proj": 4.124696731567383, + "model.layers.20.mlp.gate_up_proj": 4.038638591766357, + "model.layers.19.mlp.gate_up_proj": 3.957982063293457, + "model.layers.17.mlp.gate_up_proj": 3.574976682662964, + "model.layers.1.mlp.gate_up_proj": 0.5177408456802368, + "model.layers.0.mlp.gate_up_proj": 0.31232842803001404 + }, + "model.embed_tokens": { + "model.embed_tokens": Infinity + }, + "model.norm": { + "model.norm": Infinity + }, + "post_attention_layernorm": { + "model.layers.0.post_attention_layernorm": Infinity, + "model.layers.1.post_attention_layernorm": Infinity, + "model.layers.2.post_attention_layernorm": Infinity, + "model.layers.3.post_attention_layernorm": Infinity, + "model.layers.4.post_attention_layernorm": Infinity, + "model.layers.5.post_attention_layernorm": Infinity, + "model.layers.6.post_attention_layernorm": Infinity, + "model.layers.7.post_attention_layernorm": Infinity, + "model.layers.8.post_attention_layernorm": Infinity, + "model.layers.9.post_attention_layernorm": Infinity, + "model.layers.10.post_attention_layernorm": Infinity, + "model.layers.11.post_attention_layernorm": Infinity, + "model.layers.12.post_attention_layernorm": Infinity, + "model.layers.13.post_attention_layernorm": Infinity, + "model.layers.14.post_attention_layernorm": Infinity, + "model.layers.15.post_attention_layernorm": Infinity, + "model.layers.16.post_attention_layernorm": Infinity, + "model.layers.17.post_attention_layernorm": Infinity, + "model.layers.18.post_attention_layernorm": Infinity, + "model.layers.19.post_attention_layernorm": Infinity, + "model.layers.20.post_attention_layernorm": Infinity, + "model.layers.21.post_attention_layernorm": Infinity, + "model.layers.22.post_attention_layernorm": Infinity, + "model.layers.23.post_attention_layernorm": Infinity, + "model.layers.24.post_attention_layernorm": Infinity, + "model.layers.25.post_attention_layernorm": Infinity, + "model.layers.26.post_attention_layernorm": Infinity, + "model.layers.27.post_attention_layernorm": Infinity, + "model.layers.28.post_attention_layernorm": Infinity, + "model.layers.29.post_attention_layernorm": Infinity, + "model.layers.30.post_attention_layernorm": Infinity, + "model.layers.31.post_attention_layernorm": Infinity + }, + "self_attn.o_proj": { + "model.layers.0.self_attn.o_proj": 0.12121370434761047, + "model.layers.1.self_attn.o_proj": 0.12061744183301926, + "model.layers.10.self_attn.o_proj": 0.10316251218318939, + "model.layers.11.self_attn.o_proj": 0.10039299726486206, + "model.layers.9.self_attn.o_proj": 0.09593994915485382, + "model.layers.3.self_attn.o_proj": 0.09419731050729752, + "model.layers.19.self_attn.o_proj": 0.09406369924545288, + "model.layers.8.self_attn.o_proj": 0.09309512376785278, + "model.layers.4.self_attn.o_proj": 0.09227702021598816, + "model.layers.15.self_attn.o_proj": 0.09132996946573257, + "model.layers.21.self_attn.o_proj": 0.09022396802902222, + "model.layers.16.self_attn.o_proj": 0.0901850163936615, + "model.layers.14.self_attn.o_proj": 0.09010837227106094, + "model.layers.2.self_attn.o_proj": 0.08998695015907288, + "model.layers.6.self_attn.o_proj": 0.08903370797634125, + "model.layers.20.self_attn.o_proj": 0.08843923360109329, + "model.layers.12.self_attn.o_proj": 0.084518663585186, + "model.layers.5.self_attn.o_proj": 0.08444256335496902, + "model.layers.17.self_attn.o_proj": 0.08404343575239182, + "model.layers.18.self_attn.o_proj": 0.0802607536315918, + "model.layers.23.self_attn.o_proj": 0.07823388278484344, + "model.layers.22.self_attn.o_proj": 0.07819005846977234, + "model.layers.7.self_attn.o_proj": 0.0773889571428299, + "model.layers.13.self_attn.o_proj": 0.07384383678436279, + "model.layers.24.self_attn.o_proj": 0.06991618126630783, + "model.layers.28.self_attn.o_proj": 0.06686124205589294, + "model.layers.26.self_attn.o_proj": 0.06276919692754745, + "model.layers.25.self_attn.o_proj": 0.062214791774749756, + "model.layers.29.self_attn.o_proj": 0.06140708923339844, + "model.layers.30.self_attn.o_proj": 0.05870771408081055, + "model.layers.27.self_attn.o_proj": 0.05600262060761452, + "model.layers.31.self_attn.o_proj": 0.05066452547907829 + }, + "self_attn.qkv_proj": { + "model.layers.23.self_attn.qkv_proj": 0.2959131598472595, + "model.layers.24.self_attn.qkv_proj": 0.2941616475582123, + "model.layers.22.self_attn.qkv_proj": 0.2844860851764679, + "model.layers.26.self_attn.qkv_proj": 0.2664036452770233, + "model.layers.27.self_attn.qkv_proj": 0.2614766061306, + "model.layers.25.self_attn.qkv_proj": 0.2562023103237152, + "model.layers.28.self_attn.qkv_proj": 0.2548952102661133, + "model.layers.29.self_attn.qkv_proj": 0.2481124997138977, + "model.layers.31.self_attn.qkv_proj": 0.24758334457874298, + "model.layers.20.self_attn.qkv_proj": 0.24697288870811462, + "model.layers.18.self_attn.qkv_proj": 0.22055192291736603, + "model.layers.21.self_attn.qkv_proj": 0.21936123073101044, + "model.layers.19.self_attn.qkv_proj": 0.212538942694664, + "model.layers.30.self_attn.qkv_proj": 0.21087859570980072, + "model.layers.17.self_attn.qkv_proj": 0.1822361797094345, + "model.layers.16.self_attn.qkv_proj": 0.16475369036197662, + "model.layers.7.self_attn.qkv_proj": 0.16432476043701172, + "model.layers.6.self_attn.qkv_proj": 0.15850073099136353, + "model.layers.15.self_attn.qkv_proj": 0.15458078682422638, + "model.layers.14.self_attn.qkv_proj": 0.15215390920639038, + "model.layers.12.self_attn.qkv_proj": 0.15206561982631683, + "model.layers.13.self_attn.qkv_proj": 0.1442185789346695, + "model.layers.10.self_attn.qkv_proj": 0.14281435310840607, + "model.layers.8.self_attn.qkv_proj": 0.14161457121372223, + "model.layers.5.self_attn.qkv_proj": 0.13123807311058044, + "model.layers.11.self_attn.qkv_proj": 0.1260766088962555, + "model.layers.9.self_attn.qkv_proj": 0.12384241819381714, + "model.layers.4.self_attn.qkv_proj": 0.11245359480381012, + "model.layers.3.self_attn.qkv_proj": 0.07319162786006927, + "model.layers.2.self_attn.qkv_proj": 0.07126714289188385, + "model.layers.1.self_attn.qkv_proj": 0.06750696897506714, + "model.layers.0.self_attn.qkv_proj": 0.05644218623638153 + } \ No newline at end of file