diff --git a/ggml/CMakeLists.txt b/ggml/CMakeLists.txt index d06464f5eba5e..31187913c235c 100644 --- a/ggml/CMakeLists.txt +++ b/ggml/CMakeLists.txt @@ -220,6 +220,7 @@ set (GGML_VULKAN_SHADERS_GEN_TOOLCHAIN "" CACHE FILEPATH "ggml: toolchain file # extra artifacts option(GGML_BUILD_TESTS "ggml: build tests" ${GGML_STANDALONE}) +option(GGML_CPU_REF "ggml: build reference CPU backend for testing" OFF) option(GGML_BUILD_EXAMPLES "ggml: build examples" ${GGML_STANDALONE}) # diff --git a/ggml/src/CMakeLists.txt b/ggml/src/CMakeLists.txt index 2b5b8169d7513..d833edbb1e738 100644 --- a/ggml/src/CMakeLists.txt +++ b/ggml/src/CMakeLists.txt @@ -385,6 +385,15 @@ ggml_add_backend(WebGPU) ggml_add_backend(zDNN) ggml_add_backend(OpenCL) +if (GGML_CPU_REF_BACKEND) + if (NOT GGML_BACKEND_DL) + message(FATAL_ERROR "GGML_CPU_REF_BACKEND requires GGML_BACKEND_DL") + endif() + set(DGGML_CPU_GENERIC ON) + ggml_add_cpu_backend_variant_impl(ref) + target_compile_definitions(ggml PRIVATE GGML_USE_CPU_REF) +endif() + foreach (target ggml-base ggml) target_include_directories(${target} PUBLIC $ $) target_compile_features (${target} PRIVATE c_std_11 cxx_std_17) # don't bump diff --git a/ggml/src/ggml-backend-reg.cpp b/ggml/src/ggml-backend-reg.cpp index 7002cb07e0015..3d7c5c943ca79 100644 --- a/ggml/src/ggml-backend-reg.cpp +++ b/ggml/src/ggml-backend-reg.cpp @@ -596,4 +596,7 @@ void ggml_backend_load_all_from_path(const char * dir_path) { if (backend_path) { ggml_backend_load(backend_path); } +#ifdef GGML_USE_CPU_REF + ggml_backend_load_best("cpu-ref", silent, dir_path); +#endif } diff --git a/ggml/src/ggml-cpu/CMakeLists.txt b/ggml/src/ggml-cpu/CMakeLists.txt index 369905750754f..83032e0ca11d9 100644 --- a/ggml/src/ggml-cpu/CMakeLists.txt +++ b/ggml/src/ggml-cpu/CMakeLists.txt @@ -52,6 +52,12 @@ function(ggml_add_cpu_backend_variant_impl tag_name) target_compile_features(${GGML_CPU_NAME} PRIVATE c_std_11 cxx_std_17) target_include_directories(${GGML_CPU_NAME} PRIVATE . ggml-cpu) + if (tag_name) + target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_CPU_VARIANT_NAME="CPU-${tag_name}") + else() + target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_CPU_VARIANT_NAME="CPU") + endif() + if (APPLE AND GGML_ACCELERATE) find_library(ACCELERATE_FRAMEWORK Accelerate) if (ACCELERATE_FRAMEWORK) diff --git a/ggml/src/ggml-cpu/ggml-cpu.cpp b/ggml/src/ggml-cpu/ggml-cpu.cpp index 2b81f8b9afa22..a44dd4a62a1d0 100644 --- a/ggml/src/ggml-cpu/ggml-cpu.cpp +++ b/ggml/src/ggml-cpu/ggml-cpu.cpp @@ -327,7 +327,7 @@ struct ggml_backend_cpu_device_context { }; static const char * ggml_backend_cpu_device_get_name(ggml_backend_dev_t dev) { - return "CPU"; + return GGML_CPU_VARIANT_NAME; GGML_UNUSED(dev); } diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 91719577564a9..cbb61a4737d98 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -199,6 +199,15 @@ endif() llama_build_and_test(test-gguf.cpp) llama_build_and_test(test-backend-ops.cpp) +if (GGML_CPU_REF_BACKEND) + if (WIN32) + set(GGML_CPU_REF_BACKEND_PATH "${CMAKE_BINARY_DIR}/bin/ggml-cpu-ref.dll") + else() + set(GGML_CPU_REF_BACKEND_PATH "${CMAKE_BINARY_DIR}/bin/libggml-cpu-ref.so") + endif() + target_compile_definitions(test-backend-ops PRIVATE GGML_CPU_REF_BACKEND_PATH="${GGML_CPU_REF_BACKEND_PATH}") +endif() + llama_build_and_test(test-model-load-cancel.cpp LABEL "model") llama_build_and_test(test-autorelease.cpp LABEL "model") diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp index b54a1a4e823f9..c25d092652889 100644 --- a/tests/test-backend-ops.cpp +++ b/tests/test-backend-ops.cpp @@ -39,6 +39,7 @@ #include #include #include +#include static void init_tensor_uniform(ggml_tensor * tensor, float min = -1.0f, float max = 1.0f) { size_t nels = ggml_nelements(tensor); @@ -324,6 +325,7 @@ enum test_mode { MODE_PERF, MODE_GRAD, MODE_SUPPORT, + MODE_CPU_VARIANTS, }; // Output format support similar to llama-bench @@ -6880,18 +6882,101 @@ static void show_test_coverage() { printf(" Coverage: %.1f%%\n", (double)covered_ops.size() / all_ops.size() * 100.0); } +static bool test_cpu_variant(const char * variant_name, const char * op_names_filter, + const char * params_filter, printer * output_printer) { + + ggml_backend_t backend_ref = ggml_backend_init_by_name("CPU-ref", nullptr); + if (backend_ref == nullptr) { + printf("Error: CPU-ref backend not found. Make sure it's built and available.\n"); + return false; + } + + ggml_backend_t backend_variant = ggml_backend_init_by_name(variant_name, nullptr); + if (backend_variant == nullptr) { + printf("Error: CPU variant '%s' not found or failed to initialize.\n", variant_name); + printf("Use --list to see available variants.\n"); + ggml_backend_free(backend_ref); + return false; + } + + printf("Testing CPU variant '%s' against cpu-ref backend...\n\n", variant_name); + + auto test_cases = make_test_cases_eval(); + + if (params_filter != nullptr) { + std::regex regex(params_filter); + auto it = test_cases.begin(); + while (it != test_cases.end()) { + std::string test_params = (*it)->vars(); + if (!std::regex_search(test_params, regex)) { + it = test_cases.erase(it); + } else { + it++; + } + } + } + + size_t n_ok = 0; + for (auto & test : test_cases) { + if (test->eval(backend_variant, backend_ref, op_names_filter, output_printer)) { + n_ok++; + } + } + + output_printer->print_summary(test_summary_info(n_ok, test_cases.size(), false)); + + ggml_backend_free(backend_variant); + ggml_backend_free(backend_ref); + + return n_ok == test_cases.size(); +} + +static void list_cpu_variants() { + ggml_backend_load_all(); + + std::unordered_map variant_names; + for (size_t i = 0; i < ggml_backend_reg_count(); i++) { + ggml_backend_reg_t reg = ggml_backend_reg_get(i); + if (strstr(ggml_backend_reg_name(reg), "CPU") != nullptr) { + for (size_t j = 0; j < ggml_backend_reg_dev_count(reg); j++) { + ggml_backend_dev_t dev = ggml_backend_reg_dev_get(reg, j); + const char * name = ggml_backend_dev_name(dev); + if (strcmp(name, "CPU-ref") != 0) { + variant_names.emplace(name, ggml_backend_dev_description(dev)); + } + } + } + } + + if (variant_names.size() == 0) { + printf("No CPU backend variants found. To enable CPU variants, rebuild with:\n"); + printf(" cmake -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON\n"); + return; + } + + printf("CPU variants:\n"); + for (const auto & it : variant_names) { + printf(" %-15s - %s\n", it.first.c_str(), it.second.c_str()); + } +} + static void usage(char ** argv) { - printf("Usage: %s [mode] [-o ] [-b ] [-p ] [--output ] [--list-ops] [--show-coverage]\n", argv[0]); + printf("Usage: %s [mode] [-o ] [-b ] [-p ] [--output ] [--list-ops] [--list-cpu-variants] [--show-coverage]\n", argv[0]); printf(" valid modes:\n"); printf(" - test (default, compare with CPU backend for correctness)\n"); printf(" - grad (compare gradients from backpropagation with method of finite differences)\n"); printf(" - perf (performance evaluation)\n"); printf(" - support (probe backend operation support)\n"); + printf(" - cpu-variants (test CPU variants against cpu-ref backend)\n"); printf(" op names for -o are as given by ggml_op_desc() (e.g. ADD, MUL_MAT, etc),\n"); printf(" optionally including the full test case string (e.g. \"ADD(type=f16,ne=[1,1,8,1],nr=[1,1,1,1],nf=1)\")\n"); printf(" --output specifies output format (default: console, options: console, sql, csv)\n"); printf(" --list-ops lists all available GGML operations\n"); + printf(" --list-cpu-variants lists all available CPU backend variants\n"); printf(" --show-coverage shows test coverage\n"); + printf(" cpu-variants mode options:\n"); + printf(" --list lists available CPU variants on this system\n"); + printf(" --variant test specific CPU variant against cpu-ref backend\n"); } int main(int argc, char ** argv) { @@ -6900,6 +6985,7 @@ int main(int argc, char ** argv) { const char * op_names_filter = nullptr; const char * backend_filter = nullptr; const char * params_filter = nullptr; + const char * cpu_variant_name = nullptr; for (int i = 1; i < argc; i++) { if (strcmp(argv[i], "test") == 0) { @@ -6910,6 +6996,8 @@ int main(int argc, char ** argv) { mode = MODE_GRAD; } else if (strcmp(argv[i], "support") == 0) { mode = MODE_SUPPORT; + } else if (strcmp(argv[i], "cpu-variants") == 0) { + mode = MODE_CPU_VARIANTS; } else if (strcmp(argv[i], "-o") == 0) { if (i + 1 < argc) { op_names_filter = argv[++i]; @@ -6944,6 +7032,16 @@ int main(int argc, char ** argv) { } else if (strcmp(argv[i], "--list-ops") == 0) { list_all_ops(); return 0; + } else if (strcmp(argv[i], "--list") == 0) { + list_cpu_variants(); + return 0; + } else if (strcmp(argv[i], "--variant") == 0) { + if (i + 1 < argc) { + cpu_variant_name = argv[++i]; + } else { + usage(argv); + return 1; + } } else if (strcmp(argv[i], "--show-coverage") == 0) { show_test_coverage(); return 0; @@ -6962,6 +7060,16 @@ int main(int argc, char ** argv) { output_printer->print_header(); } + if (mode == MODE_CPU_VARIANTS) { + if (cpu_variant_name == nullptr) { + printf("Error: cpu-variants mode requires --variant or --list\n"); + usage(argv); + return 1; + } + + return test_cpu_variant(cpu_variant_name, op_names_filter, params_filter, output_printer.get()) ? 0 : 1; + } + output_printer->print_testing_start(testing_start_info(ggml_backend_dev_count())); size_t n_ok = 0;