Skip to content

Commit 0a421c0

Browse files
committed
ggml-cpu : check src[1] dims are 2 for repack
This commit adds an additional check for src[1] dimensions to be 2 when determining if a tensor supports repacking. The motivation for this change is to ensure that both source tensors are strictly 2D before using repack. The repack implementation does not support broadcasting in dimensions 2 and 3, which occurs when src1 has more dimensions than src0 (like when nr != [1,1] in test-backend-ops.cpp). Without this check, operations with broadcasting would use repack and produce incorrect results because repack assumes. With this check, broadcasting operations fall back to the standard CPU implementation which correctly handles the index mapping (i02 = i12/r2, i03 = i13/r3). This fixes test failures like: ```console MUL_MAT(type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1]) MUL_MAT(type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2]) ``` which were consistently failing across all architectures (x86, ARM, macOS) with high NMSE values (~0.4-0.7).
1 parent cdf0349 commit 0a421c0

File tree

1 file changed

+13
-38
lines changed

1 file changed

+13
-38
lines changed

ggml/src/ggml-cpu/repack.cpp

Lines changed: 13 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1869,10 +1869,13 @@ static const ggml::cpu::tensor_traits * ggml_repack_get_optimal_repack_type(cons
18691869
return nullptr;
18701870
}
18711871

1872-
static bool supports_tensor(const struct ggml_tensor * op) {
1872+
static bool repack_supports_op(const struct ggml_tensor * op) {
18731873
if (op->op == GGML_OP_MUL_MAT &&
18741874
op->src[0]->buffer &&
1875-
(ggml_n_dims(op->src[0]) == 2) && ggml_repack_get_optimal_repack_type(op->src[0])) {
1875+
(ggml_n_dims(op->src[0]) == 2) &&
1876+
(ggml_n_dims(op->src[1]) == 2) &&
1877+
op->src[0]->buffer->buft == ggml_backend_cpu_repack_buffer_type() &&
1878+
ggml_repack_get_optimal_repack_type(op->src[0])) {
18761879

18771880
if (op->src[1]->buffer && !ggml_backend_buft_is_host(op->src[1]->buffer->buft)) {
18781881
return false;
@@ -1882,8 +1885,12 @@ static bool supports_tensor(const struct ggml_tensor * op) {
18821885
return true;
18831886
}
18841887

1885-
} else if (op->op == GGML_OP_MUL_MAT_ID && op->src[0]->buffer &&
1886-
(ggml_n_dims(op->src[0]) == 3) && ggml_repack_get_optimal_repack_type(op->src[0])) {
1888+
} else if (op->op == GGML_OP_MUL_MAT_ID &&
1889+
op->src[0]->buffer &&
1890+
(ggml_n_dims(op->src[0]) == 3) &&
1891+
(ggml_n_dims(op->src[1]) == 2) &&
1892+
op->src[0]->buffer->buft == ggml_backend_cpu_repack_buffer_type() &&
1893+
ggml_repack_get_optimal_repack_type(op->src[0])) {
18871894

18881895
if (op->src[1]->buffer && !ggml_backend_buft_is_host(op->src[1]->buffer->buft)) {
18891896
return false;
@@ -1902,7 +1909,7 @@ static enum ggml_status ggml_backend_cpu_repack_buffer_init_tensor(ggml_backend_
19021909
tensor->buffer = buffer;
19031910
}
19041911

1905-
if (supports_tensor(tensor)) {
1912+
if (repack_supports_op(tensor)) {
19061913
tensor->src[0]->extra = (void *) const_cast<ggml::cpu::tensor_traits *>(ggml_repack_get_optimal_repack_type(tensor->src[0]));
19071914
tensor->src[0]->buffer = buffer;
19081915
}
@@ -1953,39 +1960,7 @@ static size_t ggml_backend_cpu_repack_buffer_type_get_alignment(ggml_backend_buf
19531960
namespace ggml::cpu::repack {
19541961
class extra_buffer_type : ggml::cpu::extra_buffer_type {
19551962
bool supports_op(ggml_backend_dev_t, const struct ggml_tensor * op) override {
1956-
if ( op->op == GGML_OP_MUL_MAT &&
1957-
op->src[0]->buffer &&
1958-
(ggml_n_dims(op->src[0]) == 2) &&
1959-
op->src[0]->buffer->buft == ggml_backend_cpu_repack_buffer_type() &&
1960-
ggml_repack_get_optimal_repack_type(op->src[0])
1961-
) {
1962-
if (op->src[1]->buffer && !ggml_backend_buft_is_host(op->src[1]->buffer->buft)) {
1963-
return false;
1964-
}
1965-
if (op->src[1]->type == GGML_TYPE_F32) {
1966-
return true;
1967-
}
1968-
//if (op->src[1]->type == GGML_TYPE_Q8_0) {
1969-
// return true;
1970-
//}
1971-
// may be possible if Q8_0 packed...
1972-
} else if (op->op == GGML_OP_MUL_MAT_ID
1973-
&& op->src[0]->buffer
1974-
&& (ggml_n_dims(op->src[0]) == 3)
1975-
&& op->src[0]->buffer->buft == ggml_backend_cpu_repack_buffer_type()
1976-
&& ggml_repack_get_optimal_repack_type(op->src[0])
1977-
) {
1978-
if (op->src[1]->buffer && !ggml_backend_buft_is_host(op->src[1]->buffer->buft)) {
1979-
return false;
1980-
}
1981-
if (op->src[1]->type == GGML_TYPE_F32) {
1982-
return true;
1983-
}
1984-
//if (op->src[1]->type == GGML_TYPE_Q8_0) {
1985-
// return true;
1986-
//}
1987-
}
1988-
return false;
1963+
return repack_supports_op(op);
19891964
}
19901965

19911966
ggml::cpu::tensor_traits * get_tensor_traits(const struct ggml_tensor * op) override {

0 commit comments

Comments
 (0)