Skip to content

Commit d013aa5

Browse files
committed
vulkan : sync (llama/0)
ggml-ci
1 parent 8852b01 commit d013aa5

8 files changed

+275
-9
lines changed

cmake/ggml-config.cmake.in

+7-2
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ foreach(_ggml_backend ${GGML_AVAILABLE_BACKENDS})
112112

113113
string(REGEX MATCH "^ggml-cpu" is_cpu_variant "${_ggml_backend}")
114114
if(is_cpu_variant)
115-
list(APPEND GGML_CPU_INTERFACE_LINK_LIBRARIES "ggml::ggml" "ggml::ggml-base")
115+
list(APPEND GGML_CPU_INTERFACE_LINK_LIBRARIES "ggml::ggml-base")
116116
set_target_properties(ggml::${_ggml_backend}
117117
PROPERTIES
118118
INTERFACE_LINK_LIBRARIES "${GGML_CPU_INTERFACE_LINK_LIBRARIES}")
@@ -124,7 +124,7 @@ foreach(_ggml_backend ${GGML_AVAILABLE_BACKENDS})
124124
endif()
125125

126126
else()
127-
list(APPEND ${_ggml_backend_pfx}_INTERFACE_LINK_LIBRARIES "ggml::ggml" "ggml::ggml-base")
127+
list(APPEND ${_ggml_backend_pfx}_INTERFACE_LINK_LIBRARIES "ggml::ggml-base")
128128
set_target_properties(ggml::${_ggml_backend}
129129
PROPERTIES
130130
INTERFACE_LINK_LIBRARIES "${${_ggml_backend_pfx}_INTERFACE_LINK_LIBRARIES}")
@@ -139,6 +139,11 @@ foreach(_ggml_backend ${GGML_AVAILABLE_BACKENDS})
139139
list(APPEND _ggml_all_targets ggml::${_ggml_backend})
140140
endforeach()
141141

142+
list(APPEND GGML_INTERFACE_LINK_LIBRARIES ggml::ggml-base "${_ggml_all_targets}")
143+
set_target_properties(ggml::ggml
144+
PROPERTIES
145+
INTERFACE_LINK_LIBRARIES "${GGML_INTERFACE_LINK_LIBRARIES}")
146+
142147
add_library(ggml::all INTERFACE IMPORTED)
143148
set_target_properties(ggml::all
144149
PROPERTIES

src/ggml-vulkan/ggml-vulkan.cpp

+108-7
Large diffs are not rendered by default.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
#version 450
2+
3+
#include "generic_head.comp"
4+
#include "types.comp"
5+
6+
#extension GL_EXT_control_flow_attributes : enable
7+
#define BLOCK_SIZE 512
8+
9+
layout(local_size_x = BLOCK_SIZE, local_size_y = 1, local_size_z = 1) in;
10+
11+
layout (binding = 0) readonly buffer G {A_TYPE data_a[];};
12+
layout (binding = 1) readonly buffer X {B_TYPE data_b[];};
13+
layout (binding = 2) writeonly buffer D {D_TYPE data_d[];};
14+
15+
shared FLOAT_TYPE sum_xx[BLOCK_SIZE];
16+
shared FLOAT_TYPE sum_xg[BLOCK_SIZE];
17+
18+
void main() {
19+
const uint row = gl_WorkGroupID.z * 262144 + gl_WorkGroupID.y * 512 + gl_WorkGroupID.x;
20+
const uint tid = gl_LocalInvocationID.x;
21+
22+
// Compute derivative of x[i]/norm(x) = g[i]/norm(x) - x[i] dot(x,g)/KX / norm(x)^1.5
23+
24+
// partial sums for thread in warp
25+
sum_xx[tid] = FLOAT_TYPE(0.0f);
26+
sum_xg[tid] = FLOAT_TYPE(0.0f);
27+
28+
[[unroll]] for (uint col = tid; col < p.KX; col += BLOCK_SIZE) {
29+
const FLOAT_TYPE gi = FLOAT_TYPE(data_a[row*p.KX + col]);
30+
const FLOAT_TYPE xi = FLOAT_TYPE(data_b[row*p.KX + col]);
31+
sum_xx[tid] += xi * xi;
32+
sum_xg[tid] += xi * gi;
33+
}
34+
35+
// sum up partial sums and write back result
36+
barrier();
37+
[[unroll]] for (int s = BLOCK_SIZE / 2; s > 0; s >>= 1) {
38+
if (tid < s) {
39+
sum_xx[tid] += sum_xx[tid + s];
40+
sum_xg[tid] += sum_xg[tid + s];
41+
}
42+
barrier();
43+
}
44+
45+
const FLOAT_TYPE eps = FLOAT_TYPE(p.param1);
46+
const FLOAT_TYPE mean = sum_xx[0] / FLOAT_TYPE(p.KX);
47+
const FLOAT_TYPE scale_g = inversesqrt(mean + eps);
48+
const FLOAT_TYPE scale_x = -scale_g * sum_xg[0] / (sum_xx[0] + FLOAT_TYPE(p.KX) * eps);
49+
50+
[[unroll]] for (uint col = tid; col < p.KX; col += BLOCK_SIZE) {
51+
data_d[row*p.KX + col] = D_TYPE(
52+
scale_g * FLOAT_TYPE(data_a[row*p.KX + col]) +
53+
scale_x * FLOAT_TYPE(data_b[row*p.KX + col]));
54+
}
55+
}

src/ggml-vulkan/vulkan-shaders/rope_head.comp

+5
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ layout (push_constant) uniform parameter {
2929
uint s1;
3030
uint s2;
3131
int sections[4];
32+
uint is_back;
3233
} p;
3334

3435
float rope_yarn_ramp(const float low, const float high, const uint i0) {
@@ -48,6 +49,10 @@ void rope_yarn(const float theta_extrap, const uint i0, out float cos_theta, out
4849
// Get n-d magnitude scaling corrected for interpolation
4950
mscale *= 1.0f + 0.1f * log(1.0f / p.freq_scale);
5051
}
52+
// Backprogagation uses inverted rotation
53+
if (p.is_back != 0) {
54+
theta = -theta;
55+
}
5156
cos_theta = cos(theta) * mscale;
5257
sin_theta = sin(theta) * mscale;
5358
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
#version 450
2+
3+
#include "generic_head.comp"
4+
#include "types.comp"
5+
6+
#extension GL_EXT_control_flow_attributes : enable
7+
8+
layout(local_size_x = 512, local_size_y = 1, local_size_z = 1) in;
9+
10+
layout (binding = 0) readonly buffer X {A_TYPE data_a[];};
11+
layout (binding = 1) writeonly buffer D {D_TYPE data_d[];};
12+
13+
void main() {
14+
const uint i = gl_GlobalInvocationID.z * 262144 + gl_GlobalInvocationID.y * 512 + gl_GlobalInvocationID.x;
15+
16+
if (i >= p.KX) {
17+
return;
18+
}
19+
data_d[i] = D_TYPE(1. / (1 + exp(-1. *data_a[i])));
20+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
#version 450
2+
3+
#include "generic_head.comp"
4+
#include "types.comp"
5+
6+
#extension GL_EXT_control_flow_attributes : enable
7+
8+
layout(local_size_x = 512, local_size_y = 1, local_size_z = 1) in;
9+
10+
layout (binding = 0) readonly buffer G {A_TYPE data_g[];};
11+
layout (binding = 1) readonly buffer X {B_TYPE data_x[];};
12+
layout (binding = 2) writeonly buffer D {D_TYPE data_d[];};
13+
14+
void main() {
15+
const uint i = gl_GlobalInvocationID.z * 262144 + gl_GlobalInvocationID.y * 512 + gl_GlobalInvocationID.x;
16+
17+
if (i >= p.KX) {
18+
return;
19+
}
20+
21+
// Compute derivative of SiLU(x): 1/(1+exp(-x)) - x*exp(-x)/(1+exp(-x))^2
22+
23+
const float xi = float(data_x[i]);
24+
const float s = 1.0f / (1.0f + exp(-xi));
25+
data_d[i] = D_TYPE(data_g[i] * (s + xi * s * (1 - s)));
26+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
#version 450
2+
3+
#extension GL_EXT_control_flow_attributes : enable
4+
5+
#include "generic_head.comp"
6+
#include "types.comp"
7+
8+
layout(constant_id = 0) const uint BLOCK_SIZE = 32;
9+
layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in;
10+
11+
// In this shader Y = softmax(X) and X is not provided as input.
12+
13+
layout (binding = 0) readonly buffer G {A_TYPE data_g[];};
14+
layout (binding = 1) readonly buffer Y {B_TYPE data_y[];};
15+
layout (binding = 2) buffer D {D_TYPE data_d[];};
16+
17+
shared FLOAT_TYPE sum_yg[BLOCK_SIZE];
18+
19+
void main() {
20+
const uint row = gl_WorkGroupID.z * 262144 + gl_WorkGroupID.y * 512 + gl_WorkGroupID.x;
21+
const uint tid = gl_LocalInvocationID.x;
22+
23+
FLOAT_TYPE scale = p.param1;
24+
25+
// partial sums for thread in warp
26+
sum_yg[tid] = FLOAT_TYPE(0.0f);
27+
28+
[[unroll]] for (uint col = tid; col < p.KX; col += BLOCK_SIZE) {
29+
const FLOAT_TYPE gi = FLOAT_TYPE(data_g[row*p.KX + col]);
30+
const FLOAT_TYPE yi = FLOAT_TYPE(data_y[row*p.KX + col]);
31+
sum_yg[tid] += yi * gi;
32+
}
33+
34+
// sum up partial sums and write back result
35+
barrier();
36+
[[unroll]] for (uint s = BLOCK_SIZE / 2; s > 0; s >>= 1) {
37+
if (tid < s) {
38+
sum_yg[tid] += sum_yg[tid + s];
39+
}
40+
barrier();
41+
}
42+
43+
const FLOAT_TYPE dot_yg = sum_yg[0];
44+
45+
[[unroll]] for (uint col = tid; col < p.KX; col += BLOCK_SIZE) {
46+
data_d[row*p.KX + col] = D_TYPE(scale
47+
* (FLOAT_TYPE(data_g[row*p.KX + col]) - dot_yg)
48+
* FLOAT_TYPE(data_y[row*p.KX + col]));
49+
}
50+
}

src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp

+4
Original file line numberDiff line numberDiff line change
@@ -433,6 +433,7 @@ void process_shaders() {
433433
string_to_spv("norm_f32", "norm.comp", merge_maps(base_dict, {{"A_TYPE", "float"}, {"D_TYPE", "float"}}));
434434
string_to_spv("group_norm_f32", "group_norm.comp", merge_maps(base_dict, {{"A_TYPE", "float"}, {"D_TYPE", "float"}}));
435435
string_to_spv("rms_norm_f32", "rms_norm.comp", merge_maps(base_dict, {{"A_TYPE", "float"}, {"D_TYPE", "float"}}));
436+
string_to_spv("rms_norm_back_f32", "rms_norm_back.comp", merge_maps(base_dict, {{"A_TYPE", "float"}, {"B_TYPE", "float"}, {"D_TYPE", "float"}}));
436437

437438
string_to_spv("cpy_f32_f32", "copy.comp", {{"A_TYPE", "float"}, {"D_TYPE", "float"}});
438439
string_to_spv("cpy_f32_f16", "copy.comp", {{"A_TYPE", "float"}, {"D_TYPE", "float16_t"}});
@@ -483,14 +484,17 @@ void process_shaders() {
483484
string_to_spv("gelu_f32", "gelu.comp", {{"A_TYPE", "float"}, {"D_TYPE", "float"}});
484485
string_to_spv("gelu_quick_f32", "gelu_quick.comp", {{"A_TYPE", "float"}, {"D_TYPE", "float"}});
485486
string_to_spv("silu_f32", "silu.comp", {{"A_TYPE", "float"}, {"D_TYPE", "float"}});
487+
string_to_spv("silu_back_f32", "silu_back.comp", {{"A_TYPE", "float"}, {"B_TYPE", "float"}, {"D_TYPE", "float"}});
486488
string_to_spv("relu_f32", "relu.comp", {{"A_TYPE", "float"}, {"D_TYPE", "float"}});
487489
string_to_spv("leaky_relu_f32", "leaky_relu.comp", {{"A_TYPE", "float"}, {"D_TYPE", "float"}});
488490
string_to_spv("tanh_f32", "tanh.comp", {{"A_TYPE", "float"}, {"D_TYPE", "float"}});
491+
string_to_spv("sigmoid_f32", "sigmoid.comp", {{"A_TYPE", "float"}, {"D_TYPE", "float"}});
489492

490493
string_to_spv("diag_mask_inf_f32", "diag_mask_inf.comp", {{"A_TYPE", "float"}, {"D_TYPE", "float"}});
491494

492495
string_to_spv("soft_max_f32", "soft_max.comp", merge_maps(base_dict, {{"A_TYPE", "float"}, {"B_TYPE", "float"}, {"D_TYPE", "float"}}));
493496
string_to_spv("soft_max_f32_f16", "soft_max.comp", merge_maps(base_dict, {{"A_TYPE", "float"}, {"B_TYPE", "float16_t"}, {"D_TYPE", "float"}}));
497+
string_to_spv("soft_max_back_f32", "soft_max_back.comp", merge_maps(base_dict, {{"A_TYPE", "float"}, {"B_TYPE", "float"}, {"D_TYPE", "float"}}));
494498

495499
string_to_spv("rope_norm_f32", "rope_norm.comp", {{"A_TYPE", "float"}, {"D_TYPE", "float"}});
496500
string_to_spv("rope_norm_f16", "rope_norm.comp", {{"A_TYPE", "float16_t"}, {"D_TYPE", "float16_t"}});

0 commit comments

Comments
 (0)