@@ -595,7 +595,9 @@ static bool ggml_gallocr_is_own(ggml_gallocr_t galloc, struct ggml_tensor * t) {
595595}
596596
597597static bool ggml_gallocr_is_allocated (ggml_gallocr_t galloc , struct ggml_tensor * t ) {
598- return t -> data != NULL || ggml_gallocr_hash_get (galloc , t )-> allocated ;
598+ return t -> data != NULL // tensor data already set externally
599+ || t -> buffer // tensor on external buffer (but not yet allocated)
600+ || ggml_gallocr_is_own (galloc , t ); // tensor will be allocated by galloc
599601}
600602
601603static void ggml_gallocr_allocate_node (ggml_gallocr_t galloc , struct ggml_tensor * node , int buffer_id ) {
@@ -791,7 +793,8 @@ static void ggml_gallocr_alloc_graph_impl(ggml_gallocr_t galloc, struct ggml_cgr
791793 }
792794}
793795
794- bool ggml_gallocr_reserve_n (ggml_gallocr_t galloc , struct ggml_cgraph * graph , const int * node_buffer_ids , const int * leaf_buffer_ids ) {
796+ static bool ggml_gallocr_reserve_n_impl (
797+ ggml_gallocr_t galloc , struct ggml_cgraph * graph , const int * node_buffer_ids , const int * leaf_buffer_ids , bool no_alloc ) {
795798 size_t min_hash_size = graph -> n_nodes + graph -> n_leafs ;
796799 // add 25% margin to avoid hash collisions
797800 min_hash_size += min_hash_size / 4 ;
@@ -893,21 +896,41 @@ bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, c
893896 if (realloc ) {
894897#ifndef NDEBUG
895898 size_t cur_size = galloc -> buffers [i ] ? ggml_vbuffer_size (galloc -> buffers [i ]) : 0 ;
896- GGML_LOG_DEBUG ("%s: reallocating %s buffer from size %.02f MiB to %.02f MiB\n" , __func__ , ggml_backend_buft_name (galloc -> bufts [i ]), cur_size / 1024.0 / 1024.0 , new_size / 1024.0 / 1024.0 );
899+ GGML_LOG_DEBUG ("%s: reallocating %s buffer from size %.02f MiB to %.02f MiB\n" ,
900+ __func__ , ggml_backend_buft_name (galloc -> bufts [i ]), cur_size / 1024.0 / 1024.0 , new_size / 1024.0 / 1024.0 );
897901#endif
898902
899903 ggml_vbuffer_free (galloc -> buffers [i ]);
900- galloc -> buffers [i ] = ggml_vbuffer_alloc (galloc -> bufts [i ], galloc -> buf_tallocs [i ], GGML_BACKEND_BUFFER_USAGE_COMPUTE );
901- if (galloc -> buffers [i ] == NULL ) {
902- GGML_LOG_ERROR ("%s: failed to allocate %s buffer of size %zu\n" , __func__ , ggml_backend_buft_name (galloc -> bufts [i ]), new_size );
903- return false;
904+ if (no_alloc ) {
905+ galloc -> buffers [i ] = NULL ;
906+ } else {
907+ galloc -> buffers [i ] = ggml_vbuffer_alloc (galloc -> bufts [i ], galloc -> buf_tallocs [i ], GGML_BACKEND_BUFFER_USAGE_COMPUTE );
908+ if (galloc -> buffers [i ] == NULL ) {
909+ GGML_LOG_ERROR ("%s: failed to allocate %s buffer of size %zu\n" , __func__ , ggml_backend_buft_name (galloc -> bufts [i ]), new_size );
910+ return false;
911+ }
904912 }
905913 }
906914 }
907915
908916 return true;
909917}
910918
919+ void ggml_gallocr_reserve_n_size (
920+ ggml_gallocr_t galloc , struct ggml_cgraph * graph , const int * node_buffer_ids , const int * leaf_buffer_ids , size_t * sizes ) {
921+ GGML_ASSERT (ggml_gallocr_reserve_n_impl (galloc , graph , node_buffer_ids , leaf_buffer_ids , /*no_alloc =*/ true));
922+ for (int i = 0 ; i < galloc -> n_buffers ; i ++ ) {
923+ sizes [i ] = 0 ;
924+ for (int c = 0 ; c < galloc -> buf_tallocs [i ]-> n_chunks ; c ++ ) {
925+ sizes [i ] += galloc -> buf_tallocs [i ]-> chunks [c ]-> max_size ;
926+ }
927+ }
928+ }
929+
930+ bool ggml_gallocr_reserve_n (ggml_gallocr_t galloc , struct ggml_cgraph * graph , const int * node_buffer_ids , const int * leaf_buffer_ids ) {
931+ return ggml_gallocr_reserve_n_impl (galloc , graph , node_buffer_ids , leaf_buffer_ids , /*no_alloc =*/ false);
932+ }
933+
911934bool ggml_gallocr_reserve (ggml_gallocr_t galloc , struct ggml_cgraph * graph ) {
912935 return ggml_gallocr_reserve_n (galloc , graph , NULL , NULL );
913936}
@@ -1110,14 +1133,16 @@ static bool alloc_tensor_range(struct ggml_context * ctx,
11101133 return true;
11111134}
11121135
1113- ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors_from_buft (struct ggml_context * ctx , ggml_backend_buffer_type_t buft ) {
1136+ static ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors_from_buft_impl (
1137+ struct ggml_context * ctx , ggml_backend_buffer_type_t buft , size_t * nbytes_total , bool no_alloc ) {
11141138 GGML_ASSERT (ggml_get_no_alloc (ctx ) == true);
11151139
11161140 size_t alignment = ggml_backend_buft_get_alignment (buft );
11171141 size_t max_size = ggml_backend_buft_get_max_size (buft );
11181142
11191143 ggml_backend_buffer_t * buffers = NULL ;
11201144 size_t n_buffers = 0 ;
1145+ * nbytes_total = 0 ;
11211146
11221147 size_t cur_buf_size = 0 ;
11231148 struct ggml_tensor * first = ggml_get_first_tensor (ctx );
@@ -1129,10 +1154,11 @@ ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors_from_buft(struct ggml_conte
11291154
11301155 if (cur_buf_size > 0 && (cur_buf_size + this_size ) > max_size ) {
11311156 // allocate tensors in the current buffer
1132- if (!alloc_tensor_range (ctx , first , t , buft , cur_buf_size , & buffers , & n_buffers )) {
1157+ if (!no_alloc && ! alloc_tensor_range (ctx , first , t , buft , cur_buf_size , & buffers , & n_buffers )) {
11331158 return NULL ;
11341159 }
11351160 first = t ;
1161+ * nbytes_total += cur_buf_size ;
11361162 cur_buf_size = this_size ;
11371163 } else {
11381164 cur_buf_size += this_size ;
@@ -1141,15 +1167,21 @@ ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors_from_buft(struct ggml_conte
11411167
11421168 // allocate remaining tensors
11431169 if (cur_buf_size > 0 ) {
1144- if (!alloc_tensor_range (ctx , first , NULL , buft , cur_buf_size , & buffers , & n_buffers )) {
1170+ * nbytes_total += cur_buf_size ;
1171+ if (!no_alloc && !alloc_tensor_range (ctx , first , NULL , buft , cur_buf_size , & buffers , & n_buffers )) {
11451172 return NULL ;
11461173 }
11471174 }
11481175
1176+ if (no_alloc ) {
1177+ return NULL ;
1178+ }
1179+
11491180 if (n_buffers == 0 ) {
11501181#ifndef NDEBUG
11511182 GGML_LOG_DEBUG ("%s: all tensors in the context are already allocated\n" , __func__ );
11521183#endif
1184+ GGML_ASSERT (!buffers );
11531185 return NULL ;
11541186 }
11551187
@@ -1159,10 +1191,24 @@ ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors_from_buft(struct ggml_conte
11591191 } else {
11601192 buffer = ggml_backend_multi_buffer_alloc_buffer (buffers , n_buffers );
11611193 }
1162- free (buffers );
1194+ if (buffers ) {
1195+ free (buffers ); // can be NULL if context is empty or no_alloc
1196+ }
11631197 return buffer ;
11641198}
11651199
1200+ size_t ggml_backend_alloc_ctx_tensors_from_buft_size (struct ggml_context * ctx , ggml_backend_buffer_type_t buft ) {
1201+ size_t nbytes_total = 0 ;
1202+ ggml_backend_buffer_t buf = ggml_backend_alloc_ctx_tensors_from_buft_impl (ctx , buft , & nbytes_total , /*no_alloc=*/ true);
1203+ GGML_ASSERT (!buf );
1204+ return nbytes_total ;
1205+ }
1206+
1207+ ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors_from_buft (struct ggml_context * ctx , ggml_backend_buffer_type_t buft ) {
1208+ size_t nbytes_total = 0 ;
1209+ return ggml_backend_alloc_ctx_tensors_from_buft_impl (ctx , buft , & nbytes_total , /*no_alloc =*/ false);
1210+ }
1211+
11661212ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors (struct ggml_context * ctx , ggml_backend_t backend ) {
11671213 return ggml_backend_alloc_ctx_tensors_from_buft (ctx , ggml_backend_get_default_buffer_type (backend ));
11681214}
0 commit comments