From bfaa676b0841617d4ef3596e63aca6be1a8eb1b5 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Wed, 12 Jun 2024 15:24:20 +0300 Subject: [PATCH] ggml : improve ggml_is_contiguous logic (#7856) * ggml : improve ggml_is_contiguous logic ggml-ci * ggml : support more contiguous cases ggml-ci --- ggml.c | 75 +++++++++++++++++++++++++++------------------------------- 1 file changed, 35 insertions(+), 40 deletions(-) diff --git a/ggml.c b/ggml.c index 1fc77743bc7b9..5fb9e9a327d22 100644 --- a/ggml.c +++ b/ggml.c @@ -3212,35 +3212,42 @@ GGML_CALL bool ggml_is_transposed(const struct ggml_tensor * tensor) { return tensor->nb[0] > tensor->nb[1]; } -GGML_CALL bool ggml_is_contiguous(const struct ggml_tensor * tensor) { - static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function"); +static bool ggml_is_contiguous_n(const struct ggml_tensor * tensor, int n) { + size_t next_nb = ggml_type_size(tensor->type); + if (tensor->ne[0] != ggml_blck_size(tensor->type) && tensor->nb[0] != next_nb) { + return false; + } + next_nb *= tensor->ne[0]/ggml_blck_size(tensor->type); + for (int i = 1; i < GGML_MAX_DIMS; i++) { + if (tensor->ne[i] != 1) { + if (i > n) { + if (tensor->nb[i] != next_nb) { + return false; + } + next_nb *= tensor->ne[i]; + } else { + // this dimension does not need to be contiguous + next_nb = tensor->ne[i]*tensor->nb[i]; + } + } + } + return true; +} - return - tensor->nb[0] == ggml_type_size(tensor->type) && - tensor->nb[1] == (tensor->nb[0]*tensor->ne[0])/ggml_blck_size(tensor->type) && - tensor->nb[2] == tensor->nb[1]*tensor->ne[1] && - tensor->nb[3] == tensor->nb[2]*tensor->ne[2]; +GGML_CALL bool ggml_is_contiguous(const struct ggml_tensor * tensor) { + return ggml_is_contiguous_0(tensor); } GGML_CALL bool ggml_is_contiguous_0(const struct ggml_tensor * tensor) { - return ggml_is_contiguous(tensor); + return ggml_is_contiguous_n(tensor, 0); } GGML_CALL bool ggml_is_contiguous_1(const struct ggml_tensor * tensor) { - static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function"); - - return - tensor->nb[0] == ggml_type_size(tensor->type) && - tensor->nb[2] == tensor->nb[1]*tensor->ne[1] && - tensor->nb[3] == tensor->nb[2]*tensor->ne[2]; + return ggml_is_contiguous_n(tensor, 1); } GGML_CALL bool ggml_is_contiguous_2(const struct ggml_tensor * tensor) { - static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function"); - - return - tensor->nb[0] == ggml_type_size(tensor->type) && - tensor->nb[3] == tensor->nb[2]*tensor->ne[2]; + return ggml_is_contiguous_n(tensor, 2); } GGML_CALL bool ggml_is_permuted(const struct ggml_tensor * tensor) { @@ -3272,20 +3279,20 @@ bool ggml_are_same_shape(const struct ggml_tensor * t0, const struct ggml_tensor static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function"); return - (t0->ne[0] == t1->ne[0] ) && - (t0->ne[1] == t1->ne[1] ) && - (t0->ne[2] == t1->ne[2] ) && - (t0->ne[3] == t1->ne[3] ); + (t0->ne[0] == t1->ne[0]) && + (t0->ne[1] == t1->ne[1]) && + (t0->ne[2] == t1->ne[2]) && + (t0->ne[3] == t1->ne[3]); } bool ggml_are_same_stride(const struct ggml_tensor * t0, const struct ggml_tensor * t1) { static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function"); return - (t0->nb[0] == t1->nb[0] ) && - (t0->nb[1] == t1->nb[1] ) && - (t0->nb[2] == t1->nb[2] ) && - (t0->nb[3] == t1->nb[3] ); + (t0->nb[0] == t1->nb[0]) && + (t0->nb[1] == t1->nb[1]) && + (t0->nb[2] == t1->nb[2]) && + (t0->nb[3] == t1->nb[3]); } // check if t1 can be represented as a repeatition of t0 @@ -4078,32 +4085,26 @@ float ggml_get_f32_1d(const struct ggml_tensor * tensor, int i) { switch (tensor->type) { case GGML_TYPE_I8: { - GGML_ASSERT(tensor->nb[0] == sizeof(int8_t)); return ((int8_t *)(tensor->data))[i]; } case GGML_TYPE_I16: { - GGML_ASSERT(tensor->nb[0] == sizeof(int16_t)); return ((int16_t *)(tensor->data))[i]; } case GGML_TYPE_I32: { - GGML_ASSERT(tensor->nb[0] == sizeof(int32_t)); return ((int32_t *)(tensor->data))[i]; } case GGML_TYPE_F16: { - GGML_ASSERT(tensor->nb[0] == sizeof(ggml_fp16_t)); return GGML_FP16_TO_FP32(((ggml_fp16_t *)(tensor->data))[i]); } case GGML_TYPE_BF16: { - GGML_ASSERT(tensor->nb[0] == sizeof(ggml_bf16_t)); return GGML_BF16_TO_FP32(((ggml_bf16_t *)(tensor->data))[i]); } case GGML_TYPE_F32: { - GGML_ASSERT(tensor->nb[0] == sizeof(float)); return ((float *)(tensor->data))[i]; } default: @@ -4125,32 +4126,26 @@ void ggml_set_f32_1d(const struct ggml_tensor * tensor, int i, float value) { switch (tensor->type) { case GGML_TYPE_I8: { - GGML_ASSERT(tensor->nb[0] == sizeof(int8_t)); ((int8_t *)(tensor->data))[i] = value; } break; case GGML_TYPE_I16: { - GGML_ASSERT(tensor->nb[0] == sizeof(int16_t)); ((int16_t *)(tensor->data))[i] = value; } break; case GGML_TYPE_I32: { - GGML_ASSERT(tensor->nb[0] == sizeof(int32_t)); ((int32_t *)(tensor->data))[i] = value; } break; case GGML_TYPE_F16: { - GGML_ASSERT(tensor->nb[0] == sizeof(ggml_fp16_t)); ((ggml_fp16_t *)(tensor->data))[i] = GGML_FP32_TO_FP16(value); } break; case GGML_TYPE_BF16: { - GGML_ASSERT(tensor->nb[0] == sizeof(ggml_bf16_t)); ((ggml_bf16_t *)(tensor->data))[i] = GGML_FP32_TO_BF16(value); } break; case GGML_TYPE_F32: { - GGML_ASSERT(tensor->nb[0] == sizeof(float)); ((float *)(tensor->data))[i] = value; } break; default: @@ -7343,7 +7338,7 @@ struct ggml_tensor * ggml_add_rel_pos_inplace( return ggml_add_rel_pos_impl(ctx, a, pw, ph, true); } -// gmml_unary +// ggml_unary static struct ggml_tensor * ggml_unary_impl( struct ggml_context * ctx,