From a5096626ef286015f0081b5fe0edc9ac89d156df Mon Sep 17 00:00:00 2001 From: Daniel Bevenius Date: Mon, 15 Sep 2025 12:07:15 +0200 Subject: [PATCH 1/6] ggml : add CPU backend reference implementation (wip) This commit introduces a CPU reference implementation for GGML, designed primarily for testing and validation purposes. The motivation for this addition is to have a pure C CPU backend implementation that does not use any hardware-specific optimizations or intrinsics. This will allow for testing the CPU backend variants against the reference implementation to ensure correctness --- ggml/CMakeLists.txt | 1 + ggml/src/CMakeLists.txt | 9 +++ ggml/src/ggml-backend-reg.cpp | 3 + ggml/src/ggml-cpu/CMakeLists.txt | 6 ++ ggml/src/ggml-cpu/ggml-cpu.cpp | 2 +- tests/CMakeLists.txt | 9 +++ tests/test-backend-ops.cpp | 113 ++++++++++++++++++++++++++++++- 7 files changed, 141 insertions(+), 2 deletions(-) diff --git a/ggml/CMakeLists.txt b/ggml/CMakeLists.txt index d06464f5eba5e..31187913c235c 100644 --- a/ggml/CMakeLists.txt +++ b/ggml/CMakeLists.txt @@ -220,6 +220,7 @@ set (GGML_VULKAN_SHADERS_GEN_TOOLCHAIN "" CACHE FILEPATH "ggml: toolchain file # extra artifacts option(GGML_BUILD_TESTS "ggml: build tests" ${GGML_STANDALONE}) +option(GGML_CPU_REF "ggml: build reference CPU backend for testing" OFF) option(GGML_BUILD_EXAMPLES "ggml: build examples" ${GGML_STANDALONE}) # diff --git a/ggml/src/CMakeLists.txt b/ggml/src/CMakeLists.txt index 2b5b8169d7513..d833edbb1e738 100644 --- a/ggml/src/CMakeLists.txt +++ b/ggml/src/CMakeLists.txt @@ -385,6 +385,15 @@ ggml_add_backend(WebGPU) ggml_add_backend(zDNN) ggml_add_backend(OpenCL) +if (GGML_CPU_REF_BACKEND) + if (NOT GGML_BACKEND_DL) + message(FATAL_ERROR "GGML_CPU_REF_BACKEND requires GGML_BACKEND_DL") + endif() + set(DGGML_CPU_GENERIC ON) + ggml_add_cpu_backend_variant_impl(ref) + target_compile_definitions(ggml PRIVATE GGML_USE_CPU_REF) +endif() + foreach (target ggml-base ggml) target_include_directories(${target} PUBLIC $ $) target_compile_features (${target} PRIVATE c_std_11 cxx_std_17) # don't bump diff --git a/ggml/src/ggml-backend-reg.cpp b/ggml/src/ggml-backend-reg.cpp index 7002cb07e0015..3d7c5c943ca79 100644 --- a/ggml/src/ggml-backend-reg.cpp +++ b/ggml/src/ggml-backend-reg.cpp @@ -596,4 +596,7 @@ void ggml_backend_load_all_from_path(const char * dir_path) { if (backend_path) { ggml_backend_load(backend_path); } +#ifdef GGML_USE_CPU_REF + ggml_backend_load_best("cpu-ref", silent, dir_path); +#endif } diff --git a/ggml/src/ggml-cpu/CMakeLists.txt b/ggml/src/ggml-cpu/CMakeLists.txt index 369905750754f..83032e0ca11d9 100644 --- a/ggml/src/ggml-cpu/CMakeLists.txt +++ b/ggml/src/ggml-cpu/CMakeLists.txt @@ -52,6 +52,12 @@ function(ggml_add_cpu_backend_variant_impl tag_name) target_compile_features(${GGML_CPU_NAME} PRIVATE c_std_11 cxx_std_17) target_include_directories(${GGML_CPU_NAME} PRIVATE . ggml-cpu) + if (tag_name) + target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_CPU_VARIANT_NAME="CPU-${tag_name}") + else() + target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_CPU_VARIANT_NAME="CPU") + endif() + if (APPLE AND GGML_ACCELERATE) find_library(ACCELERATE_FRAMEWORK Accelerate) if (ACCELERATE_FRAMEWORK) diff --git a/ggml/src/ggml-cpu/ggml-cpu.cpp b/ggml/src/ggml-cpu/ggml-cpu.cpp index 2b81f8b9afa22..a44dd4a62a1d0 100644 --- a/ggml/src/ggml-cpu/ggml-cpu.cpp +++ b/ggml/src/ggml-cpu/ggml-cpu.cpp @@ -327,7 +327,7 @@ struct ggml_backend_cpu_device_context { }; static const char * ggml_backend_cpu_device_get_name(ggml_backend_dev_t dev) { - return "CPU"; + return GGML_CPU_VARIANT_NAME; GGML_UNUSED(dev); } diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 91719577564a9..cbb61a4737d98 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -199,6 +199,15 @@ endif() llama_build_and_test(test-gguf.cpp) llama_build_and_test(test-backend-ops.cpp) +if (GGML_CPU_REF_BACKEND) + if (WIN32) + set(GGML_CPU_REF_BACKEND_PATH "${CMAKE_BINARY_DIR}/bin/ggml-cpu-ref.dll") + else() + set(GGML_CPU_REF_BACKEND_PATH "${CMAKE_BINARY_DIR}/bin/libggml-cpu-ref.so") + endif() + target_compile_definitions(test-backend-ops PRIVATE GGML_CPU_REF_BACKEND_PATH="${GGML_CPU_REF_BACKEND_PATH}") +endif() + llama_build_and_test(test-model-load-cancel.cpp LABEL "model") llama_build_and_test(test-autorelease.cpp LABEL "model") diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp index b54a1a4e823f9..01f28d88143a1 100644 --- a/tests/test-backend-ops.cpp +++ b/tests/test-backend-ops.cpp @@ -39,6 +39,7 @@ #include #include #include +#include static void init_tensor_uniform(ggml_tensor * tensor, float min = -1.0f, float max = 1.0f) { size_t nels = ggml_nelements(tensor); @@ -324,6 +325,7 @@ enum test_mode { MODE_PERF, MODE_GRAD, MODE_SUPPORT, + MODE_CPU_VARIANTS, }; // Output format support similar to llama-bench @@ -6880,18 +6882,99 @@ static void show_test_coverage() { printf(" Coverage: %.1f%%\n", (double)covered_ops.size() / all_ops.size() * 100.0); } +static bool test_cpu_variant(const char * variant_name, const char * op_names_filter, + const char * params_filter, printer * output_printer) { + + ggml_backend_t backend_ref = ggml_backend_init_by_name("CPU-ref", nullptr); + if (backend_ref == nullptr) { + printf("Error: CPU-ref backend not found. Make sure it's built and available.\n"); + return false; + } + + ggml_backend_t backend_variant = ggml_backend_init_by_name(variant_name, nullptr); + if (backend_variant == nullptr) { + printf("Error: CPU variant '%s' not found or failed to initialize.\n", variant_name); + printf("Use --list to see available variants.\n"); + ggml_backend_free(backend_ref); + return false; + } + + printf("Testing CPU variant '%s' against cpu-ref backend...\n\n", variant_name); + + auto test_cases = make_test_cases_eval(); + + if (params_filter != nullptr) { + std::regex regex(params_filter); + auto it = test_cases.begin(); + while (it != test_cases.end()) { + std::string test_params = (*it)->vars(); + if (!std::regex_search(test_params, regex)) { + it = test_cases.erase(it); + } else { + it++; + } + } + } + + size_t n_ok = 0; + for (auto & test : test_cases) { + if (test->eval(backend_variant, backend_ref, op_names_filter, output_printer)) { + n_ok++; + } + } + + output_printer->print_summary(test_summary_info(n_ok, test_cases.size(), false)); + + ggml_backend_free(backend_variant); + ggml_backend_free(backend_ref); + + return n_ok == test_cases.size(); +} + +static void list_cpu_variants() { + std::unordered_map variant_names; + for (size_t i = 0; i < ggml_backend_reg_count(); i++) { + ggml_backend_reg_t reg = ggml_backend_reg_get(i); + if (strstr(ggml_backend_reg_name(reg), "CPU") != nullptr) { + for (size_t j = 0; j < ggml_backend_reg_dev_count(reg); j++) { + ggml_backend_dev_t dev = ggml_backend_reg_dev_get(reg, j); + const char * name = ggml_backend_dev_name(dev); + if (strcmp(name, "CPU-ref") != 0) { + variant_names.emplace(name, ggml_backend_dev_description(dev)); + } + } + } + } + + if (variant_names.size() == 0) { + printf("No CPU backend variants found. To enable CPU variants, rebuild with:\n"); + printf(" cmake -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON\n"); + return; + } + + printf("CPU variants:\n"); + for (const auto & it : variant_names) { + printf(" %-15s - %s\n", it.first.c_str(), it.second.c_str()); + } +} + static void usage(char ** argv) { - printf("Usage: %s [mode] [-o ] [-b ] [-p ] [--output ] [--list-ops] [--show-coverage]\n", argv[0]); + printf("Usage: %s [mode] [-o ] [-b ] [-p ] [--output ] [--list-ops] [--list-cpu-variants] [--show-coverage]\n", argv[0]); printf(" valid modes:\n"); printf(" - test (default, compare with CPU backend for correctness)\n"); printf(" - grad (compare gradients from backpropagation with method of finite differences)\n"); printf(" - perf (performance evaluation)\n"); printf(" - support (probe backend operation support)\n"); + printf(" - cpu-variants (test CPU variants against cpu-ref backend)\n"); printf(" op names for -o are as given by ggml_op_desc() (e.g. ADD, MUL_MAT, etc),\n"); printf(" optionally including the full test case string (e.g. \"ADD(type=f16,ne=[1,1,8,1],nr=[1,1,1,1],nf=1)\")\n"); printf(" --output specifies output format (default: console, options: console, sql, csv)\n"); printf(" --list-ops lists all available GGML operations\n"); + printf(" --list-cpu-variants lists all available CPU backend variants\n"); printf(" --show-coverage shows test coverage\n"); + printf(" cpu-variants mode options:\n"); + printf(" --list lists available CPU variants on this system\n"); + printf(" --variant test specific CPU variant against cpu-ref backend\n"); } int main(int argc, char ** argv) { @@ -6900,6 +6983,8 @@ int main(int argc, char ** argv) { const char * op_names_filter = nullptr; const char * backend_filter = nullptr; const char * params_filter = nullptr; + const char * cpu_variant_name = nullptr; + bool list_variants_flag = false; for (int i = 1; i < argc; i++) { if (strcmp(argv[i], "test") == 0) { @@ -6910,6 +6995,8 @@ int main(int argc, char ** argv) { mode = MODE_GRAD; } else if (strcmp(argv[i], "support") == 0) { mode = MODE_SUPPORT; + } else if (strcmp(argv[i], "cpu-variants") == 0) { + mode = MODE_CPU_VARIANTS; } else if (strcmp(argv[i], "-o") == 0) { if (i + 1 < argc) { op_names_filter = argv[++i]; @@ -6944,6 +7031,15 @@ int main(int argc, char ** argv) { } else if (strcmp(argv[i], "--list-ops") == 0) { list_all_ops(); return 0; + } else if (strcmp(argv[i], "--list") == 0) { + list_variants_flag = true; + } else if (strcmp(argv[i], "--variant") == 0) { + if (i + 1 < argc) { + cpu_variant_name = argv[++i]; + } else { + usage(argv); + return 1; + } } else if (strcmp(argv[i], "--show-coverage") == 0) { show_test_coverage(); return 0; @@ -6962,6 +7058,21 @@ int main(int argc, char ** argv) { output_printer->print_header(); } + if (mode == MODE_CPU_VARIANTS) { + if (list_variants_flag) { + list_cpu_variants(); + return 0; + } + + if (cpu_variant_name == nullptr) { + printf("Error: cpu-variants mode requires --variant or --list\n"); + usage(argv); + return 1; + } + + return test_cpu_variant(cpu_variant_name, op_names_filter, params_filter, output_printer.get()) ? 0 : 1; + } + output_printer->print_testing_start(testing_start_info(ggml_backend_dev_count())); size_t n_ok = 0; From adb57c9f082550f078c755a5dfbd4fbbae0105eb Mon Sep 17 00:00:00 2001 From: Daniel Bevenius Date: Wed, 17 Sep 2025 10:06:10 +0200 Subject: [PATCH 2/6] move list_cpu_variants() to be called directly --- tests/test-backend-ops.cpp | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp index 01f28d88143a1..c25d092652889 100644 --- a/tests/test-backend-ops.cpp +++ b/tests/test-backend-ops.cpp @@ -6932,6 +6932,8 @@ static bool test_cpu_variant(const char * variant_name, const char * op_names_fi } static void list_cpu_variants() { + ggml_backend_load_all(); + std::unordered_map variant_names; for (size_t i = 0; i < ggml_backend_reg_count(); i++) { ggml_backend_reg_t reg = ggml_backend_reg_get(i); @@ -6984,7 +6986,6 @@ int main(int argc, char ** argv) { const char * backend_filter = nullptr; const char * params_filter = nullptr; const char * cpu_variant_name = nullptr; - bool list_variants_flag = false; for (int i = 1; i < argc; i++) { if (strcmp(argv[i], "test") == 0) { @@ -7032,7 +7033,8 @@ int main(int argc, char ** argv) { list_all_ops(); return 0; } else if (strcmp(argv[i], "--list") == 0) { - list_variants_flag = true; + list_cpu_variants(); + return 0; } else if (strcmp(argv[i], "--variant") == 0) { if (i + 1 < argc) { cpu_variant_name = argv[++i]; @@ -7059,11 +7061,6 @@ int main(int argc, char ** argv) { } if (mode == MODE_CPU_VARIANTS) { - if (list_variants_flag) { - list_cpu_variants(); - return 0; - } - if (cpu_variant_name == nullptr) { printf("Error: cpu-variants mode requires --variant or --list\n"); usage(argv); From d7fd378c9ea446d28542e8dabfdd1f0e78a000da Mon Sep 17 00:00:00 2001 From: Daniel Bevenius Date: Tue, 23 Sep 2025 12:43:31 +0200 Subject: [PATCH 3/6] fix GGML_CPU_REF option name in CMakeLists.txt --- ggml/CMakeLists.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ggml/CMakeLists.txt b/ggml/CMakeLists.txt index 31187913c235c..7fbc07f9a9852 100644 --- a/ggml/CMakeLists.txt +++ b/ggml/CMakeLists.txt @@ -219,9 +219,9 @@ set (GGML_OPENCL_TARGET_VERSION "300" CACHE STRING set (GGML_VULKAN_SHADERS_GEN_TOOLCHAIN "" CACHE FILEPATH "ggml: toolchain file for vulkan-shaders-gen") # extra artifacts -option(GGML_BUILD_TESTS "ggml: build tests" ${GGML_STANDALONE}) -option(GGML_CPU_REF "ggml: build reference CPU backend for testing" OFF) -option(GGML_BUILD_EXAMPLES "ggml: build examples" ${GGML_STANDALONE}) +option(GGML_BUILD_TESTS "ggml: build tests" ${GGML_STANDALONE}) +option(GGML_CPU_REF_BACKEND "ggml: build reference CPU backend for testing" OFF) +option(GGML_BUILD_EXAMPLES "ggml: build examples" ${GGML_STANDALONE}) # # dependencies From 8530df56a7b450ed14f732ba238f81a05ce5ccdc Mon Sep 17 00:00:00 2001 From: Daniel Bevenius Date: Wed, 24 Sep 2025 15:12:24 +0200 Subject: [PATCH 4/6] set GGML_SYSTEM_ARCH to cpu-ref This commit set the `GGML_SYSTEM_ARCH` variable in the CMakeLists.txt file to `cpu-ref` to force a generic CPU architecture. --- ggml/src/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ggml/src/CMakeLists.txt b/ggml/src/CMakeLists.txt index d833edbb1e738..6b3c63e2188ed 100644 --- a/ggml/src/CMakeLists.txt +++ b/ggml/src/CMakeLists.txt @@ -389,7 +389,7 @@ if (GGML_CPU_REF_BACKEND) if (NOT GGML_BACKEND_DL) message(FATAL_ERROR "GGML_CPU_REF_BACKEND requires GGML_BACKEND_DL") endif() - set(DGGML_CPU_GENERIC ON) + set(GGML_SYSTEM_ARCH "cpu-ref") ggml_add_cpu_backend_variant_impl(ref) target_compile_definitions(ggml PRIVATE GGML_USE_CPU_REF) endif() From 32f8fc3b4d909aaf1ed96b1495084990ed25c78c Mon Sep 17 00:00:00 2001 From: Daniel Bevenius Date: Wed, 24 Sep 2025 16:36:52 +0200 Subject: [PATCH 5/6] disable GGML_LLAMAFILE for cpu ref backend --- ggml/src/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/ggml/src/CMakeLists.txt b/ggml/src/CMakeLists.txt index 6b3c63e2188ed..9279792c8d146 100644 --- a/ggml/src/CMakeLists.txt +++ b/ggml/src/CMakeLists.txt @@ -390,6 +390,7 @@ if (GGML_CPU_REF_BACKEND) message(FATAL_ERROR "GGML_CPU_REF_BACKEND requires GGML_BACKEND_DL") endif() set(GGML_SYSTEM_ARCH "cpu-ref") + set(GGML_LLAMAFILE OFF) ggml_add_cpu_backend_variant_impl(ref) target_compile_definitions(ggml PRIVATE GGML_USE_CPU_REF) endif() From 9601e7aad37c9be2ecee2828dad61e29e2ae52d8 Mon Sep 17 00:00:00 2001 From: Daniel Bevenius Date: Thu, 25 Sep 2025 05:53:00 +0200 Subject: [PATCH 6/6] disable HBM, OpenMP, KleidiAI for CPU ref backend. --- ggml/src/CMakeLists.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ggml/src/CMakeLists.txt b/ggml/src/CMakeLists.txt index 9279792c8d146..2d33ce8628549 100644 --- a/ggml/src/CMakeLists.txt +++ b/ggml/src/CMakeLists.txt @@ -391,6 +391,9 @@ if (GGML_CPU_REF_BACKEND) endif() set(GGML_SYSTEM_ARCH "cpu-ref") set(GGML_LLAMAFILE OFF) + set(GGML_CPU_HBM OFF) + set(GGML_OPENMP OFF) + set(GGML_CPU_KLEIDIAI OFF) ggml_add_cpu_backend_variant_impl(ref) target_compile_definitions(ggml PRIVATE GGML_USE_CPU_REF) endif()