diff --git a/.github/workflows/test-cpu-variants.yml b/.github/workflows/test-cpu-variants.yml new file mode 100644 index 0000000000000..1c02a1fd731dd --- /dev/null +++ b/.github/workflows/test-cpu-variants.yml @@ -0,0 +1,70 @@ +name: Test CPU Variants + +on: + workflow_dispatch: + inputs: + operation: + description: 'Operation to test (e.g., MUL_MAT or full spec)' + required: false + default: 'MUL_MAT' + type: string + variant: + description: 'CPU variant to test (leave empty to list available variants)' + required: false + default: '' + type: string + +jobs: + test-cpu-variant-sve: + runs-on: ubuntu-24.04-arm + steps: + - name: Clone + uses: actions/checkout@v4 + + - name: Dependencies + run: | + sudo apt-get update + sudo add-apt-repository ppa:ubuntu-toolchain-r/test -y + sudo apt-get update + sudo apt-get install build-essential gcc-14 g++-14 + sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-14 100 + sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-14 100 + gcc --version + + - name: Build with CPU reference backend + run: | + cmake -B build -S . \ + -DGGML_CPU_REF_BACKEND=ON \ + -DGGML_CPU_ALL_VARIANTS=ON \ + -DGGML_CPU_REPACK=ON \ + -DGGML_NATIVE=OFF \ + -DGGML_BACKEND_DL=ON \ + -DGGML_BLAS=OFF \ + -DLLAMA_CURL=OFF \ + -DCMAKE_BUILD_TYPE=Release + + cmake --build build -j8 + + - name: List available CPU variants + run: | + echo "Available CPU variants:" + ./build/bin/test-backend-ops cpu-variants --list + + - name: Test CPU variant + if: ${{ inputs.variant != '' }} + run: | + echo "Testing variant: ${{ inputs.variant }}" + echo "Operation: ${{ inputs.operation }}" + ./build/bin/test-backend-ops cpu-variants \ + --variant ${{ inputs.variant }} \ + -o "${{ inputs.operation }}" + + - name: Instructions + if: ${{ inputs.variant == '' }} + run: | + echo "==========================================" + echo "No variant specified - only listed available variants above" + echo "To test a specific variant, re-run this workflow with:" + echo " - variant: one of the variants listed above" + echo " - operation: your operation string (default: MUL_MAT)" + echo "==========================================" diff --git a/ggml/CMakeLists.txt b/ggml/CMakeLists.txt index 6ce52ffc6698b..9bcee3e6b2645 100644 --- a/ggml/CMakeLists.txt +++ b/ggml/CMakeLists.txt @@ -252,8 +252,9 @@ set (GGML_OPENCL_TARGET_VERSION "300" CACHE STRING set (GGML_VULKAN_SHADERS_GEN_TOOLCHAIN "" CACHE FILEPATH "ggml: toolchain file for vulkan-shaders-gen") # extra artifacts -option(GGML_BUILD_TESTS "ggml: build tests" ${GGML_STANDALONE}) -option(GGML_BUILD_EXAMPLES "ggml: build examples" ${GGML_STANDALONE}) +option(GGML_BUILD_TESTS "ggml: build tests" ${GGML_STANDALONE}) +option(GGML_CPU_REF_BACKEND "ggml: build reference CPU backend for testing" OFF) +option(GGML_BUILD_EXAMPLES "ggml: build examples" ${GGML_STANDALONE}) # # dependencies @@ -283,7 +284,9 @@ add_subdirectory(src) if (GGML_BUILD_TESTS) enable_testing() - add_subdirectory(tests) + if (EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/tests") + add_subdirectory(tests) + endif () endif () if (GGML_BUILD_EXAMPLES) diff --git a/ggml/include/ggml-backend.h b/ggml/include/ggml-backend.h index f1b740785914e..29daeaa0a4c1d 100644 --- a/ggml/include/ggml-backend.h +++ b/ggml/include/ggml-backend.h @@ -245,6 +245,9 @@ extern "C" { // Load all known backends from dynamic libraries GGML_API void ggml_backend_load_all(void); GGML_API void ggml_backend_load_all_from_path(const char * dir_path); + // Load all variants for a backend and register them + GGML_API void ggml_backend_load_all_variants(const char * name); + GGML_API void ggml_backend_load_variant(const char * name, const char * variant); // // Backend scheduler diff --git a/ggml/include/ggml-cpu.h b/ggml/include/ggml-cpu.h index 9edd485136972..004acea318d4c 100644 --- a/ggml/include/ggml-cpu.h +++ b/ggml/include/ggml-cpu.h @@ -75,6 +75,7 @@ extern "C" { // // x86 + GGML_BACKEND_API int ggml_cpu_has_sse2 (void); GGML_BACKEND_API int ggml_cpu_has_sse3 (void); GGML_BACKEND_API int ggml_cpu_has_ssse3 (void); GGML_BACKEND_API int ggml_cpu_has_avx (void); diff --git a/ggml/src/CMakeLists.txt b/ggml/src/CMakeLists.txt index c8f3d8596427c..09b7abc3c0c6f 100644 --- a/ggml/src/CMakeLists.txt +++ b/ggml/src/CMakeLists.txt @@ -388,6 +388,33 @@ ggml_add_backend(WebGPU) ggml_add_backend(zDNN) ggml_add_backend(OpenCL) +if (GGML_CPU_REF_BACKEND) + if (NOT GGML_BACKEND_DL) + message(FATAL_ERROR "GGML_CPU_REF_BACKEND requires GGML_BACKEND_DL") + endif() + set(GGML_SYSTEM_ARCH "cpu-ref") + set(GGML_LLAMAFILE OFF) + set(GGML_CPU_HBM OFF) + set(GGML_OPENMP OFF) + set(GGML_CPU_KLEIDIAI OFF) + set(GGML_CPU_REPACK OFF) + set(GGML_ACCELERATE OFF) + + ggml_add_cpu_backend_variant(ref) + + if (CMAKE_SYSTEM_PROCESSOR MATCHES "arm|aarch64|ARM|AARCH64") + target_compile_options(ggml-cpu-ref PRIVATE + -U__ARM_NEON + -U__ARM_FEATURE_FMA + -U__ARM_FEATURE_FP16_VECTOR_ARITHMETIC + -U__ARM_FEATURE_DOTPROD + -U__ARM_FEATURE_MATMUL_INT8 + -U__ARM_FEATURE_SVE + ) + endif() + target_compile_definitions(ggml PRIVATE GGML_USE_CPU_REF) +endif() + foreach (target ggml-base ggml) target_include_directories(${target} PUBLIC $ $) target_compile_features (${target} PRIVATE c_std_11 cxx_std_17) # don't bump diff --git a/ggml/src/ggml-backend-reg.cpp b/ggml/src/ggml-backend-reg.cpp index 136afec748d96..69287cd335b56 100644 --- a/ggml/src/ggml-backend-reg.cpp +++ b/ggml/src/ggml-backend-reg.cpp @@ -605,4 +605,76 @@ void ggml_backend_load_all_from_path(const char * dir_path) { if (backend_path) { ggml_backend_load(backend_path); } +#ifdef GGML_USE_CPU_REF + ggml_backend_load_best("cpu-ref", silent, dir_path); +#endif +} + +void ggml_backend_load_all_variants(const char * name) { + // enumerate all the files that match [lib]ggml-name-*.[so|dll] in the search paths + const fs::path name_path = fs::u8path(name); + const fs::path file_prefix = backend_filename_prefix().native() + name_path.native() + fs::u8path("-").native(); + const fs::path file_extension = backend_filename_extension(); + + std::vector search_paths; +#ifdef GGML_BACKEND_DIR + search_paths.push_back(fs::u8path(GGML_BACKEND_DIR)); +#endif + // default search paths: executable directory, current directory + search_paths.push_back(get_executable_path()); + search_paths.push_back(fs::current_path()); + + for (const auto & search_path : search_paths) { + if (!fs::exists(search_path)) { + GGML_LOG_DEBUG("%s: search path %s does not exist\n", __func__, path_str(search_path).c_str()); + continue; + } + fs::directory_iterator dir_it(search_path, fs::directory_options::skip_permission_denied); + for (const auto & entry : dir_it) { + if (entry.is_regular_file()) { + auto filename = entry.path().filename(); + auto ext = entry.path().extension(); + if (filename.native().find(file_prefix.native()) == 0 && ext == file_extension) { + fs::path path = search_path / filename; + ggml_backend_reg_t backend = get_reg().load_backend(path, false); + if (backend == nullptr) { + GGML_LOG_ERROR("%s: failed to load backend variant %s\n", __func__, path_str(entry.path()).c_str()); + } + + } + } + } + } +} + +void ggml_backend_load_variant(const char * name, const char * variant) { + const fs::path name_path = fs::u8path(name); + const fs::path variant_path = fs::u8path(variant); + const fs::path file_prefix = backend_filename_prefix().native() + name_path.native() + fs::u8path("-").native(); + const fs::path target_filename = file_prefix.native() + variant_path.native() + backend_filename_extension().native(); + + std::vector search_paths; +#ifdef GGML_BACKEND_DIR + search_paths.push_back(fs::u8path(GGML_BACKEND_DIR)); +#endif + // default search paths: executable directory, current directory + search_paths.push_back(get_executable_path()); + search_paths.push_back(fs::current_path()); + + for (const auto & search_path : search_paths) { + if (!fs::exists(search_path)) { + GGML_LOG_DEBUG("%s: search path %s does not exist\n", __func__, path_str(search_path).c_str()); + continue; + } + + fs::path full_path = search_path / target_filename; + if (fs::exists(full_path) && fs::is_regular_file(full_path)) { + ggml_backend_reg_t backend = get_reg().load_backend(full_path, false); + if (backend == nullptr) { + GGML_LOG_ERROR("%s: failed to load backend variant %s\n", __func__, path_str(full_path).c_str()); + } else { + return; + } + } + } } diff --git a/ggml/src/ggml-cpu/CMakeLists.txt b/ggml/src/ggml-cpu/CMakeLists.txt index 42041b717aa22..cbb81ec1e1a1e 100644 --- a/ggml/src/ggml-cpu/CMakeLists.txt +++ b/ggml/src/ggml-cpu/CMakeLists.txt @@ -52,6 +52,12 @@ function(ggml_add_cpu_backend_variant_impl tag_name) target_compile_features(${GGML_CPU_NAME} PRIVATE c_std_11 cxx_std_17) target_include_directories(${GGML_CPU_NAME} PRIVATE . ggml-cpu) + if (tag_name) + target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_CPU_VARIANT_NAME="CPU-${tag_name}") + else() + target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_CPU_VARIANT_NAME="CPU") + endif() + if (APPLE AND GGML_ACCELERATE) find_library(ACCELERATE_FRAMEWORK Accelerate) if (ACCELERATE_FRAMEWORK) diff --git a/ggml/src/ggml-cpu/ggml-cpu.c b/ggml/src/ggml-cpu/ggml-cpu.c index eded6eb77ed69..ca4c66cca457a 100644 --- a/ggml/src/ggml-cpu/ggml-cpu.c +++ b/ggml/src/ggml-cpu/ggml-cpu.c @@ -3443,6 +3443,14 @@ int ggml_cpu_has_llamafile(void) { #endif } +int ggml_cpu_has_sse2(void) { +#if defined(__SSE2__) + return 1; +#else + return 0; +#endif +} + int ggml_cpu_has_sse3(void) { #if defined(__SSE3__) return 1; diff --git a/ggml/src/ggml-cpu/ggml-cpu.cpp b/ggml/src/ggml-cpu/ggml-cpu.cpp index 3191faaa4cd92..e7e151a622757 100644 --- a/ggml/src/ggml-cpu/ggml-cpu.cpp +++ b/ggml/src/ggml-cpu/ggml-cpu.cpp @@ -108,7 +108,7 @@ struct ggml_backend_cpu_context { }; static const char * ggml_backend_cpu_get_name(ggml_backend_t backend) { - return "CPU"; + return GGML_CPU_VARIANT_NAME; GGML_UNUSED(backend); } @@ -337,7 +337,7 @@ struct ggml_backend_cpu_device_context { }; static const char * ggml_backend_cpu_device_get_name(ggml_backend_dev_t dev) { - return "CPU"; + return GGML_CPU_VARIANT_NAME; GGML_UNUSED(dev); } @@ -516,6 +516,9 @@ static ggml_backend_feature * ggml_backend_cpu_get_features(ggml_backend_reg_t r ggml_cpu_init(); std::vector features; + if (ggml_cpu_has_sse2()) { + features.push_back({ "SSE2", "1" }); + } if (ggml_cpu_has_sse3()) { features.push_back({ "SSE3", "1" }); } diff --git a/ggml/src/ggml-cpu/repack.cpp b/ggml/src/ggml-cpu/repack.cpp index f531d21e23224..4419ee0c3659e 100644 --- a/ggml/src/ggml-cpu/repack.cpp +++ b/ggml/src/ggml-cpu/repack.cpp @@ -1920,7 +1920,7 @@ class extra_buffer_type : ggml::cpu::extra_buffer_type { bool supports_op(ggml_backend_dev_t, const struct ggml_tensor * op) override { if ( op->op == GGML_OP_MUL_MAT && op->src[0]->buffer && - (ggml_n_dims(op->src[0]) == 2) && + ggml_n_dims(op->src[0]) == 2 && ggml_n_dims(op->src[1]) <= 2 && op->src[0]->buffer->buft == ggml_backend_cpu_repack_buffer_type() && ggml_repack_get_optimal_repack_type(op->src[0]) ) { @@ -1936,7 +1936,7 @@ class extra_buffer_type : ggml::cpu::extra_buffer_type { // may be possible if Q8_0 packed... } else if (op->op == GGML_OP_MUL_MAT_ID && op->src[0]->buffer - && (ggml_n_dims(op->src[0]) == 3) + && ggml_n_dims(op->src[0]) == 3 && ggml_n_dims(op->src[1]) <= 2 && op->src[0]->buffer->buft == ggml_backend_cpu_repack_buffer_type() && ggml_repack_get_optimal_repack_type(op->src[0]) ) { diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index d9cc5e933f4ce..3310902f4a05c 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -202,6 +202,9 @@ if (NOT LLAMA_SANITIZE_ADDRESS) endif() llama_build_and_test(test-gguf.cpp) llama_build_and_test(test-backend-ops.cpp) +target_sources(test-backend-ops PRIVATE ${PROJECT_SOURCE_DIR}/ggml/src/ggml.c) +target_compile_definitions(test-backend-ops PRIVATE GGML_BUILD GGML_VERSION=\"${GGML_VERSION}\" GGML_COMMIT=\"${GGML_COMMIT}\") +target_include_directories(test-backend-ops PRIVATE ${PROJECT_SOURCE_DIR}/ggml/src) llama_build_and_test(test-model-load-cancel.cpp LABEL "model") llama_build_and_test(test-autorelease.cpp LABEL "model") diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp index c1e45972e54ca..84dffef1f268e 100644 --- a/tests/test-backend-ops.cpp +++ b/tests/test-backend-ops.cpp @@ -20,6 +20,8 @@ #include #include +#include "ggml-impl.h" + #include #include #include @@ -39,6 +41,7 @@ #include #include #include +#include static void init_tensor_uniform(ggml_tensor * tensor, float min = -1.0f, float max = 1.0f) { size_t nels = ggml_nelements(tensor); @@ -315,6 +318,8 @@ static bool isinf_or_max(float f) { return _isinf(f) || f == FLT_MAX || f == -FLT_MAX; } +using extra_buffer_map_t = std::unordered_map; + static bool ggml_is_view_op(enum ggml_op op) { return op == GGML_OP_VIEW || op == GGML_OP_RESHAPE || op == GGML_OP_PERMUTE || op == GGML_OP_TRANSPOSE; } @@ -324,6 +329,7 @@ enum test_mode { MODE_PERF, MODE_GRAD, MODE_SUPPORT, + MODE_CPU_VARIANTS, }; // Output format support similar to llama-bench @@ -1083,7 +1089,255 @@ struct test_case { } } - bool eval(ggml_backend_t backend1, ggml_backend_t backend2, const char * op_names_filter, printer * output_printer) { + struct ggml_tensor * ggml_dup_tensor_layout(struct ggml_context * ctx, const struct ggml_tensor * tensor) { + struct ggml_tensor * dup = ggml_dup_tensor(ctx, tensor); + for (int i = 0; i < GGML_MAX_DIMS; i++) { + dup->nb[i] = tensor->nb[i]; + } + return dup; + } + + struct ggml_tensor * graph_copy_dup_tensor(struct ggml_hash_set hash_set, struct ggml_tensor ** node_copies, + struct ggml_context * ctx_allocated, struct ggml_context * ctx_unallocated, struct ggml_tensor * src) { + + GGML_ASSERT(src != NULL); + GGML_ASSERT(src->data && "graph must be allocated"); + + size_t id = ggml_hash_insert(&hash_set, src); + if (id == GGML_HASHSET_ALREADY_EXISTS) { + return node_copies[ggml_hash_find(&hash_set, src)]; + } + + struct ggml_tensor * dst = ggml_dup_tensor_layout(src->data && !src->view_src ? ctx_allocated : ctx_unallocated, src); + if (src->view_src != NULL) { + dst->view_src = graph_copy_dup_tensor(hash_set, node_copies, ctx_allocated, ctx_unallocated, src->view_src); + dst->view_offs = src->view_offs; + } + dst->op = src->op; + memcpy(dst->op_params, src->op_params, sizeof(dst->op_params)); + ggml_set_name(dst, src->name); + + // copy src + for (int i = 0; i < GGML_MAX_SRC; i++) { + struct ggml_tensor * s = src->src[i]; + if (s == NULL) { + continue; + } + dst->src[i] = graph_copy_dup_tensor(hash_set, node_copies, ctx_allocated, ctx_unallocated, s); + } + + node_copies[id] = dst; + return dst; + } + + void graph_copy_init_tensor(struct ggml_hash_set * hash_set, struct ggml_tensor ** node_copies, bool * node_init, struct ggml_tensor * src) { + size_t id = ggml_hash_find(hash_set, src); + if (node_init[id]) { + return; + } + node_init[id] = true; + + struct ggml_tensor * dst = node_copies[id]; + if (dst->view_src != NULL) { + graph_copy_init_tensor(hash_set, node_copies, node_init, src->view_src); + enum ggml_status status = ggml_backend_view_init(dst); + GGML_ASSERT(status == GGML_STATUS_SUCCESS); + } + else { + ggml_backend_tensor_copy(src, dst); + } + + // init src + for (int i = 0; i < GGML_MAX_SRC; i++) { + struct ggml_tensor * s = src->src[i]; + if (s == NULL) { + continue; + } + graph_copy_init_tensor(hash_set, node_copies, node_init, s); + } + } + + static void try_assign_extra_buffer(struct ggml_tensor * node_copy, const extra_buffer_map_t & extra_buf_map) { + struct ggml_tensor * src0_copy = node_copy->src[0]; + ggml_backend_buffer_t org_buf = src0_copy->buffer; + + for (const auto& [buft, buf] : extra_buf_map) { + // Initialize the tensor in the extra buffer + if (ggml_backend_buffer_init_tensor(buf, src0_copy) != GGML_STATUS_SUCCESS) { + continue; + } + + if (!src0_copy->extra) { + continue; + } + + // Temporarily assign buffer so we can call ggml_backend_dev_supports_op + src0_copy->buffer = buf; + + ggml_backend_dev_t dev = ggml_backend_buft_get_device(buft); + // Check if extra buffer type supports the operation + if (dev && ggml_backend_dev_supports_op(dev, node_copy)) { + return; + } else { + src0_copy->buffer = org_buf; // Restore original buffer if not supported + } + } + } + + struct ggml_backend_graph_copy ggml_backend_graph_copy(ggml_backend_t backend, struct ggml_cgraph * graph, + std::unordered_map extra_buf_map) { + GGML_ASSERT(graph); + struct ggml_hash_set hash_set = ggml_hash_set_new(graph->visited_hash_set.size); + struct ggml_tensor ** node_copies = (ggml_tensor **) calloc(hash_set.size, sizeof(node_copies[0])); // NOLINT + bool * node_init = (bool *) calloc(hash_set.size, sizeof(node_init[0])); + + struct ggml_init_params params = { + /* .mem_size = */ ggml_tensor_overhead()*hash_set.size + ggml_graph_overhead_custom(graph->size, false), + /* .mem_buffer = */ NULL, + /* .no_alloc = */ true + }; + + struct ggml_context * ctx_allocated = ggml_init(params); + struct ggml_context * ctx_unallocated = ggml_init(params); + + if (ctx_allocated == NULL || ctx_unallocated == NULL) { + GGML_LOG_ERROR("%s: failed to allocate context for graph copy\n", __func__); + ggml_hash_set_free(&hash_set); + free(node_copies); + free(node_init); + ggml_free(ctx_allocated); + ggml_free(ctx_unallocated); + return { + /* .buffer = */ NULL, + /* .ctx_allocated = */ NULL, + /* .ctx_unallocated = */ NULL, + /* .graph = */ NULL, + }; + } + + // dup nodes + for (int i = 0; i < graph->n_nodes; i++) { + struct ggml_tensor * node = graph->nodes[i]; + graph_copy_dup_tensor(hash_set, node_copies, ctx_allocated, ctx_unallocated, node); + } + + // allocate nodes + ggml_backend_buffer_t buffer = ggml_backend_alloc_ctx_tensors(ctx_allocated, backend); + + if (buffer == NULL) { + GGML_LOG_ERROR("%s: failed to allocate buffer for graph copy\n", __func__); + ggml_hash_set_free(&hash_set); + free(node_copies); + free(node_init); + ggml_free(ctx_allocated); + ggml_free(ctx_unallocated); + for (auto buft : extra_buf_map) { + ggml_backend_buffer_free(buft.second); + } + return { + /* .buffer = */ NULL, + /* .ctx_allocated = */ NULL, + /* .ctx_unallocated = */ NULL, + /* .graph = */ NULL, + }; + } + + //printf("copy buffer size: %zu MB\n", ggml_backend_buffer_get_size(buffer) / 1024 / 1024); + + // copy data and init views + for (int i = 0; i < graph->n_nodes; i++) { + struct ggml_tensor * node = graph->nodes[i]; + + // Handle extra buffer types (before graph_copy_init_tensor) + if (node->op != GGML_OP_NONE && !ggml_is_view_op(node->op) && node->src[0]) { + size_t id = ggml_hash_find(&hash_set, node); + try_assign_extra_buffer(node_copies[id], extra_buf_map); + } + + graph_copy_init_tensor(&hash_set, node_copies, node_init, node); + } + + // build graph copy + struct ggml_cgraph * graph_copy = ggml_new_graph_custom(ctx_allocated, graph->size, false); + for (int i = 0; i < graph->n_nodes; i++) { + struct ggml_tensor * node = graph->nodes[i]; + struct ggml_tensor * node_copy = node_copies[ggml_hash_find(&hash_set, node)]; + graph_copy->nodes[i] = node_copy; + } + graph_copy->n_nodes = graph->n_nodes; + + ggml_hash_set_free(&hash_set); + free(node_copies); + free(node_init); + + return { + /* .buffer = */ buffer, + /* .ctx_allocated = */ ctx_allocated, + /* .ctx_unallocated = */ ctx_unallocated, + /* .graph = */ graph_copy, + }; + } + + bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t backend2, + struct ggml_cgraph * graph, ggml_backend_eval_callback callback, void * user_data, + struct ggml_tensor * test_node, + std::unordered_map extra_buf_map) { + struct ggml_backend_graph_copy copy = ggml_backend_graph_copy(backend2, graph, extra_buf_map); + if (copy.buffer == NULL) { + return false; + } + + struct ggml_cgraph * g1 = graph; + struct ggml_cgraph * g2 = copy.graph; + + assert(g1->n_nodes == g2->n_nodes); + + if (test_node != nullptr) { + // Compute the whole graph and only test the output for a specific tensor + ggml_backend_graph_compute(backend1, g1); + ggml_backend_graph_compute(backend2, g2); + + int test_node_idx = -1; + for (int i = 0; i < g1->n_nodes; i++) { + struct ggml_tensor * t1 = g1->nodes[i]; + if (t1 == test_node) { + test_node_idx = i; + break; + } + } + GGML_ASSERT(test_node_idx != -1); + + callback(test_node_idx, g1->nodes[test_node_idx], g2->nodes[test_node_idx], user_data); + } else { + for (int i = 0; i < g1->n_nodes; i++) { + struct ggml_tensor * t1 = g1->nodes[i]; + struct ggml_tensor * t2 = g2->nodes[i]; + + assert(t1->op == t2->op && ggml_are_same_layout(t1, t2)); + + struct ggml_cgraph g1v = ggml_graph_view(g1, i, i + 1); + struct ggml_cgraph g2v = ggml_graph_view(g2, i, i + 1); + + ggml_backend_graph_compute(backend1, &g1v); + ggml_backend_graph_compute(backend2, &g2v); + + if (ggml_is_view_op(t1->op)) { + continue; + } + + // compare results, calculate rms etc + if (!callback(i, t1, t2, user_data)) { + break; + } + } + } + ggml_backend_graph_copy_free(copy); + + return true; + } + + bool eval(ggml_backend_t backend1, ggml_backend_t backend2, const char * op_names_filter, printer * output_printer, + std::unordered_map extra_buf_map) { mode = MODE_TEST; ggml_init_params params = { @@ -1137,12 +1391,6 @@ struct test_case { // allocate ggml_backend_buffer_t buf = ggml_backend_alloc_ctx_tensors(ctx, backend1); - if (buf == NULL) { - printf("failed to allocate tensors [%s] ", ggml_backend_name(backend1)); - ggml_free(ctx); - return false; - } - // build graph ggml_build_forward_expand(gf, out); @@ -1229,7 +1477,8 @@ struct test_case { GGML_UNUSED(index); }; - const bool cmp_ok = ggml_backend_compare_graph_backend(backend1, backend2, gf, callback, &ud, run_whole_graph() ? out : nullptr); + const bool cmp_ok = ggml_backend_compare_graph_backend(backend1, backend2, gf, callback, + &ud, run_whole_graph() ? out : nullptr, extra_buf_map); ggml_backend_buffer_free(buf); @@ -6961,7 +7210,7 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op size_t n_ok = 0; for (auto & test : test_cases) { - if (test->eval(backend, backend_cpu, op_names_filter, output_printer)) { + if (test->eval(backend, backend_cpu, op_names_filter, output_printer, {})) { n_ok++; } } @@ -7099,18 +7348,161 @@ static void show_test_coverage() { printf(" Coverage: %.1f%%\n", (double)covered_ops.size() / all_ops.size() * 100.0); } +static void print_backend_features(ggml_backend_t backend) { + auto device = ggml_backend_get_device(backend); + auto reg = ggml_backend_dev_backend_reg(device); + auto name = ggml_backend_dev_name(device); + auto * get_features_fn = (ggml_backend_get_features_t) ggml_backend_reg_get_proc_address(reg, "ggml_backend_get_features"); + if (get_features_fn) { + ggml_backend_feature * features = get_features_fn(reg); + printf("%s features:\n", name); + if (features->name == nullptr) { + printf(" (no features reported)\n"); + } else { + for (; features->name; features++) { + printf(" %s = %s\n", features->name, features->value); + } + } + } +} + +static extra_buffer_map_t load_cpu_extra_bufts() { + auto * cpu_dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU); + auto * cpu_reg = ggml_backend_dev_backend_reg(cpu_dev); + + std::unordered_map extra_buf_map; + auto ggml_backend_dev_get_extra_bufts_fn = (ggml_backend_dev_get_extra_bufts_t) + ggml_backend_reg_get_proc_address(cpu_reg, "ggml_backend_dev_get_extra_bufts"); + if (ggml_backend_dev_get_extra_bufts_fn) { + ggml_backend_buffer_type_t * extra_bufts = ggml_backend_dev_get_extra_bufts_fn(cpu_dev); + while (extra_bufts && *extra_bufts) { + // TODO: What should the size be here? Do extra buffer types need a size even? + // We need to have a value larger than 0 to avoid the dummy backend buffer to be used. + extra_buf_map[*extra_bufts] = ggml_backend_buft_alloc_buffer(*extra_bufts, 1); + ++extra_bufts; + } + } + return extra_buf_map; +} + +static bool test_cpu_variant(const char * variant_name, const char * op_names_filter, + const char * params_filter, printer * output_printer) { + // Load the variant first so that extra buffer types created only use that + // backend as the features of the backend can determine if the extra buf + // types are enabled or not. + ggml_backend_load_variant("cpu", std::string(variant_name).substr(4).c_str()); + + // Load extra buffer types and allocate a buffer from each type. + auto extra_buf_map = load_cpu_extra_bufts(); + + printf("\n"); + for (auto buft : extra_buf_map) { + printf("Using extra buffer type: %s\n", ggml_backend_buft_name(buft.first)); + } + printf("\n"); + + std::string backend_ref_name = "CPU-ref"; + ggml_backend_load_variant("cpu", std::string(backend_ref_name).substr(4).c_str()); + + ggml_backend_t backend_ref = ggml_backend_init_by_name(backend_ref_name.c_str(), nullptr); + if (backend_ref == nullptr) { + printf("Error: CPU-ref backend not found. Make sure it's built and available.\n"); + return false; + } + print_backend_features(backend_ref); + + ggml_backend_t backend_variant = ggml_backend_init_by_name(variant_name, nullptr); + if (backend_variant == nullptr) { + printf("Error: CPU variant '%s' not found or failed to initialize.\n", variant_name); + printf("Use --list to see available variants.\n"); + ggml_backend_free(backend_ref); + return false; + } + print_backend_features(backend_variant); + + + + printf("Testing CPU variant '%s' against '%s' backend...\n\n", variant_name, backend_ref_name.c_str()); + + auto test_cases = make_test_cases_eval(); + + if (params_filter != nullptr) { + std::regex regex(params_filter); + auto it = test_cases.begin(); + while (it != test_cases.end()) { + std::string test_params = (*it)->vars(); + if (!std::regex_search(test_params, regex)) { + it = test_cases.erase(it); + } else { + it++; + } + } + } + + size_t n_ok = 0; + for (auto & test : test_cases) { + // Switch the order so that we copy from the reference backend to the + // variant backend. + if (test->eval(backend_ref, backend_variant, op_names_filter, output_printer, extra_buf_map)) { + n_ok++; + } + } + + output_printer->print_summary(test_summary_info(n_ok, test_cases.size(), false)); + + ggml_backend_free(backend_variant); + ggml_backend_free(backend_ref); + + for (auto buft : extra_buf_map) { + ggml_backend_buffer_free(buft.second); + } + + return n_ok == test_cases.size(); +} + +static void list_cpu_variants() { + std::unordered_map variant_names; + ggml_backend_load_all_variants("cpu"); + + for (size_t i = 0; i < ggml_backend_dev_count(); ++i) { + ggml_backend_dev_t dev = ggml_backend_dev_get(i); + if (ggml_backend_dev_type(dev) == GGML_BACKEND_DEVICE_TYPE_CPU) { + const char * name = ggml_backend_dev_name(dev); + if (strcmp(name, "CPU-ref") != 0) { + variant_names.emplace(name, ggml_backend_dev_description(dev)); + } + } + } + + if (variant_names.size() == 0) { + printf("No CPU backend variants found. To enable CPU variants, rebuild with:\n"); + printf(" cmake -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON\n"); + return; + } + + printf("CPU variants:\n"); + for (const auto & it : variant_names) { + printf(" %-15s - %s\n", it.first.c_str(), it.second.c_str()); + } +} + static void usage(char ** argv) { - printf("Usage: %s [mode] [-o ] [-b ] [-p ] [--output ] [--list-ops] [--show-coverage]\n", argv[0]); + printf("Usage: %s [mode] [-o ] [-b ] [-p ] [--output ] [--list-ops] [--list-cpu-variants] [--show-coverage]\n", argv[0]); printf(" valid modes:\n"); printf(" - test (default, compare with CPU backend for correctness)\n"); printf(" - grad (compare gradients from backpropagation with method of finite differences)\n"); printf(" - perf (performance evaluation)\n"); printf(" - support (probe backend operation support)\n"); + printf(" - cpu-variants (test CPU variants against cpu-ref backend)\n"); printf(" op names for -o are as given by ggml_op_desc() (e.g. ADD, MUL_MAT, etc),\n"); printf(" optionally including the full test case string (e.g. \"ADD(type=f16,ne=[1,1,8,1],nr=[1,1,1,1],nf=1)\")\n"); printf(" --output specifies output format (default: console, options: console, sql, csv)\n"); printf(" --list-ops lists all available GGML operations\n"); + printf(" --list-cpu-variants lists all available CPU backend variants\n"); printf(" --show-coverage shows test coverage\n"); + printf(" cpu-variants mode options:\n"); + printf(" --list lists available CPU variants on this system\n"); + printf(" --variant test specific CPU variant against cpu-ref backend\n"); } int main(int argc, char ** argv) { @@ -7119,6 +7511,7 @@ int main(int argc, char ** argv) { const char * op_names_filter = nullptr; const char * backend_filter = nullptr; const char * params_filter = nullptr; + const char * cpu_variant_name = nullptr; for (int i = 1; i < argc; i++) { if (strcmp(argv[i], "test") == 0) { @@ -7129,6 +7522,8 @@ int main(int argc, char ** argv) { mode = MODE_GRAD; } else if (strcmp(argv[i], "support") == 0) { mode = MODE_SUPPORT; + } else if (strcmp(argv[i], "cpu-variants") == 0) { + mode = MODE_CPU_VARIANTS; } else if (strcmp(argv[i], "-o") == 0) { if (i + 1 < argc) { op_names_filter = argv[++i]; @@ -7163,6 +7558,16 @@ int main(int argc, char ** argv) { } else if (strcmp(argv[i], "--list-ops") == 0) { list_all_ops(); return 0; + } else if (strcmp(argv[i], "--list") == 0) { + list_cpu_variants(); + return 0; + } else if (strcmp(argv[i], "--variant") == 0) { + if (i + 1 < argc) { + cpu_variant_name = argv[++i]; + } else { + usage(argv); + return 1; + } } else if (strcmp(argv[i], "--show-coverage") == 0) { show_test_coverage(); return 0; @@ -7172,8 +7577,6 @@ int main(int argc, char ** argv) { } } - // load and enumerate backends - ggml_backend_load_all(); // Create printer for output format std::unique_ptr output_printer = create_printer(output_format); @@ -7181,6 +7584,19 @@ int main(int argc, char ** argv) { output_printer->print_header(); } + if (mode == MODE_CPU_VARIANTS) { + if (cpu_variant_name == nullptr) { + printf("Error: cpu-variants mode requires --variant or --list\n"); + usage(argv); + return 1; + } + + return test_cpu_variant(cpu_variant_name, op_names_filter, params_filter, output_printer.get()) ? 0 : 1; + } + + // load and enumerate backends + ggml_backend_load_all(); + output_printer->print_testing_start(testing_start_info(ggml_backend_dev_count())); size_t n_ok = 0;