Skip to content

Commit a509662

Browse files
committed
ggml : add CPU backend reference implementation (wip)
This commit introduces a CPU reference implementation for GGML, designed primarily for testing and validation purposes. The motivation for this addition is to have a pure C CPU backend implementation that does not use any hardware-specific optimizations or intrinsics. This will allow for testing the CPU backend variants against the reference implementation to ensure correctness
1 parent 51abc96 commit a509662

File tree

7 files changed

+141
-2
lines changed

7 files changed

+141
-2
lines changed

ggml/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -220,6 +220,7 @@ set (GGML_VULKAN_SHADERS_GEN_TOOLCHAIN "" CACHE FILEPATH "ggml: toolchain file
220220

221221
# extra artifacts
222222
option(GGML_BUILD_TESTS "ggml: build tests" ${GGML_STANDALONE})
223+
option(GGML_CPU_REF "ggml: build reference CPU backend for testing" OFF)
223224
option(GGML_BUILD_EXAMPLES "ggml: build examples" ${GGML_STANDALONE})
224225

225226
#

ggml/src/CMakeLists.txt

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -385,6 +385,15 @@ ggml_add_backend(WebGPU)
385385
ggml_add_backend(zDNN)
386386
ggml_add_backend(OpenCL)
387387

388+
if (GGML_CPU_REF_BACKEND)
389+
if (NOT GGML_BACKEND_DL)
390+
message(FATAL_ERROR "GGML_CPU_REF_BACKEND requires GGML_BACKEND_DL")
391+
endif()
392+
set(DGGML_CPU_GENERIC ON)
393+
ggml_add_cpu_backend_variant_impl(ref)
394+
target_compile_definitions(ggml PRIVATE GGML_USE_CPU_REF)
395+
endif()
396+
388397
foreach (target ggml-base ggml)
389398
target_include_directories(${target} PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../include> $<INSTALL_INTERFACE:include>)
390399
target_compile_features (${target} PRIVATE c_std_11 cxx_std_17) # don't bump

ggml/src/ggml-backend-reg.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -596,4 +596,7 @@ void ggml_backend_load_all_from_path(const char * dir_path) {
596596
if (backend_path) {
597597
ggml_backend_load(backend_path);
598598
}
599+
#ifdef GGML_USE_CPU_REF
600+
ggml_backend_load_best("cpu-ref", silent, dir_path);
601+
#endif
599602
}

ggml/src/ggml-cpu/CMakeLists.txt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,12 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
5252
target_compile_features(${GGML_CPU_NAME} PRIVATE c_std_11 cxx_std_17)
5353
target_include_directories(${GGML_CPU_NAME} PRIVATE . ggml-cpu)
5454

55+
if (tag_name)
56+
target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_CPU_VARIANT_NAME="CPU-${tag_name}")
57+
else()
58+
target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_CPU_VARIANT_NAME="CPU")
59+
endif()
60+
5561
if (APPLE AND GGML_ACCELERATE)
5662
find_library(ACCELERATE_FRAMEWORK Accelerate)
5763
if (ACCELERATE_FRAMEWORK)

ggml/src/ggml-cpu/ggml-cpu.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -327,7 +327,7 @@ struct ggml_backend_cpu_device_context {
327327
};
328328

329329
static const char * ggml_backend_cpu_device_get_name(ggml_backend_dev_t dev) {
330-
return "CPU";
330+
return GGML_CPU_VARIANT_NAME;
331331

332332
GGML_UNUSED(dev);
333333
}

tests/CMakeLists.txt

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,15 @@ endif()
199199
llama_build_and_test(test-gguf.cpp)
200200
llama_build_and_test(test-backend-ops.cpp)
201201

202+
if (GGML_CPU_REF_BACKEND)
203+
if (WIN32)
204+
set(GGML_CPU_REF_BACKEND_PATH "${CMAKE_BINARY_DIR}/bin/ggml-cpu-ref.dll")
205+
else()
206+
set(GGML_CPU_REF_BACKEND_PATH "${CMAKE_BINARY_DIR}/bin/libggml-cpu-ref.so")
207+
endif()
208+
target_compile_definitions(test-backend-ops PRIVATE GGML_CPU_REF_BACKEND_PATH="${GGML_CPU_REF_BACKEND_PATH}")
209+
endif()
210+
202211
llama_build_and_test(test-model-load-cancel.cpp LABEL "model")
203212
llama_build_and_test(test-autorelease.cpp LABEL "model")
204213

tests/test-backend-ops.cpp

Lines changed: 112 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
#include <string_view>
4040
#include <thread>
4141
#include <vector>
42+
#include <unordered_map>
4243

4344
static void init_tensor_uniform(ggml_tensor * tensor, float min = -1.0f, float max = 1.0f) {
4445
size_t nels = ggml_nelements(tensor);
@@ -324,6 +325,7 @@ enum test_mode {
324325
MODE_PERF,
325326
MODE_GRAD,
326327
MODE_SUPPORT,
328+
MODE_CPU_VARIANTS,
327329
};
328330

329331
// Output format support similar to llama-bench
@@ -6880,18 +6882,99 @@ static void show_test_coverage() {
68806882
printf(" Coverage: %.1f%%\n", (double)covered_ops.size() / all_ops.size() * 100.0);
68816883
}
68826884

6885+
static bool test_cpu_variant(const char * variant_name, const char * op_names_filter,
6886+
const char * params_filter, printer * output_printer) {
6887+
6888+
ggml_backend_t backend_ref = ggml_backend_init_by_name("CPU-ref", nullptr);
6889+
if (backend_ref == nullptr) {
6890+
printf("Error: CPU-ref backend not found. Make sure it's built and available.\n");
6891+
return false;
6892+
}
6893+
6894+
ggml_backend_t backend_variant = ggml_backend_init_by_name(variant_name, nullptr);
6895+
if (backend_variant == nullptr) {
6896+
printf("Error: CPU variant '%s' not found or failed to initialize.\n", variant_name);
6897+
printf("Use --list to see available variants.\n");
6898+
ggml_backend_free(backend_ref);
6899+
return false;
6900+
}
6901+
6902+
printf("Testing CPU variant '%s' against cpu-ref backend...\n\n", variant_name);
6903+
6904+
auto test_cases = make_test_cases_eval();
6905+
6906+
if (params_filter != nullptr) {
6907+
std::regex regex(params_filter);
6908+
auto it = test_cases.begin();
6909+
while (it != test_cases.end()) {
6910+
std::string test_params = (*it)->vars();
6911+
if (!std::regex_search(test_params, regex)) {
6912+
it = test_cases.erase(it);
6913+
} else {
6914+
it++;
6915+
}
6916+
}
6917+
}
6918+
6919+
size_t n_ok = 0;
6920+
for (auto & test : test_cases) {
6921+
if (test->eval(backend_variant, backend_ref, op_names_filter, output_printer)) {
6922+
n_ok++;
6923+
}
6924+
}
6925+
6926+
output_printer->print_summary(test_summary_info(n_ok, test_cases.size(), false));
6927+
6928+
ggml_backend_free(backend_variant);
6929+
ggml_backend_free(backend_ref);
6930+
6931+
return n_ok == test_cases.size();
6932+
}
6933+
6934+
static void list_cpu_variants() {
6935+
std::unordered_map<std::string, std::string> variant_names;
6936+
for (size_t i = 0; i < ggml_backend_reg_count(); i++) {
6937+
ggml_backend_reg_t reg = ggml_backend_reg_get(i);
6938+
if (strstr(ggml_backend_reg_name(reg), "CPU") != nullptr) {
6939+
for (size_t j = 0; j < ggml_backend_reg_dev_count(reg); j++) {
6940+
ggml_backend_dev_t dev = ggml_backend_reg_dev_get(reg, j);
6941+
const char * name = ggml_backend_dev_name(dev);
6942+
if (strcmp(name, "CPU-ref") != 0) {
6943+
variant_names.emplace(name, ggml_backend_dev_description(dev));
6944+
}
6945+
}
6946+
}
6947+
}
6948+
6949+
if (variant_names.size() == 0) {
6950+
printf("No CPU backend variants found. To enable CPU variants, rebuild with:\n");
6951+
printf(" cmake -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON\n");
6952+
return;
6953+
}
6954+
6955+
printf("CPU variants:\n");
6956+
for (const auto & it : variant_names) {
6957+
printf(" %-15s - %s\n", it.first.c_str(), it.second.c_str());
6958+
}
6959+
}
6960+
68836961
static void usage(char ** argv) {
6884-
printf("Usage: %s [mode] [-o <op,..>] [-b <backend>] [-p <params regex>] [--output <console|sql|csv>] [--list-ops] [--show-coverage]\n", argv[0]);
6962+
printf("Usage: %s [mode] [-o <op,..>] [-b <backend>] [-p <params regex>] [--output <console|sql|csv>] [--list-ops] [--list-cpu-variants] [--show-coverage]\n", argv[0]);
68856963
printf(" valid modes:\n");
68866964
printf(" - test (default, compare with CPU backend for correctness)\n");
68876965
printf(" - grad (compare gradients from backpropagation with method of finite differences)\n");
68886966
printf(" - perf (performance evaluation)\n");
68896967
printf(" - support (probe backend operation support)\n");
6968+
printf(" - cpu-variants (test CPU variants against cpu-ref backend)\n");
68906969
printf(" op names for -o are as given by ggml_op_desc() (e.g. ADD, MUL_MAT, etc),\n");
68916970
printf(" optionally including the full test case string (e.g. \"ADD(type=f16,ne=[1,1,8,1],nr=[1,1,1,1],nf=1)\")\n");
68926971
printf(" --output specifies output format (default: console, options: console, sql, csv)\n");
68936972
printf(" --list-ops lists all available GGML operations\n");
6973+
printf(" --list-cpu-variants lists all available CPU backend variants\n");
68946974
printf(" --show-coverage shows test coverage\n");
6975+
printf(" cpu-variants mode options:\n");
6976+
printf(" --list lists available CPU variants on this system\n");
6977+
printf(" --variant <name> test specific CPU variant against cpu-ref backend\n");
68956978
}
68966979

68976980
int main(int argc, char ** argv) {
@@ -6900,6 +6983,8 @@ int main(int argc, char ** argv) {
69006983
const char * op_names_filter = nullptr;
69016984
const char * backend_filter = nullptr;
69026985
const char * params_filter = nullptr;
6986+
const char * cpu_variant_name = nullptr;
6987+
bool list_variants_flag = false;
69036988

69046989
for (int i = 1; i < argc; i++) {
69056990
if (strcmp(argv[i], "test") == 0) {
@@ -6910,6 +6995,8 @@ int main(int argc, char ** argv) {
69106995
mode = MODE_GRAD;
69116996
} else if (strcmp(argv[i], "support") == 0) {
69126997
mode = MODE_SUPPORT;
6998+
} else if (strcmp(argv[i], "cpu-variants") == 0) {
6999+
mode = MODE_CPU_VARIANTS;
69137000
} else if (strcmp(argv[i], "-o") == 0) {
69147001
if (i + 1 < argc) {
69157002
op_names_filter = argv[++i];
@@ -6944,6 +7031,15 @@ int main(int argc, char ** argv) {
69447031
} else if (strcmp(argv[i], "--list-ops") == 0) {
69457032
list_all_ops();
69467033
return 0;
7034+
} else if (strcmp(argv[i], "--list") == 0) {
7035+
list_variants_flag = true;
7036+
} else if (strcmp(argv[i], "--variant") == 0) {
7037+
if (i + 1 < argc) {
7038+
cpu_variant_name = argv[++i];
7039+
} else {
7040+
usage(argv);
7041+
return 1;
7042+
}
69477043
} else if (strcmp(argv[i], "--show-coverage") == 0) {
69487044
show_test_coverage();
69497045
return 0;
@@ -6962,6 +7058,21 @@ int main(int argc, char ** argv) {
69627058
output_printer->print_header();
69637059
}
69647060

7061+
if (mode == MODE_CPU_VARIANTS) {
7062+
if (list_variants_flag) {
7063+
list_cpu_variants();
7064+
return 0;
7065+
}
7066+
7067+
if (cpu_variant_name == nullptr) {
7068+
printf("Error: cpu-variants mode requires --variant <name> or --list\n");
7069+
usage(argv);
7070+
return 1;
7071+
}
7072+
7073+
return test_cpu_variant(cpu_variant_name, op_names_filter, params_filter, output_printer.get()) ? 0 : 1;
7074+
}
7075+
69657076
output_printer->print_testing_start(testing_start_info(ggml_backend_dev_count()));
69667077

69677078
size_t n_ok = 0;

0 commit comments

Comments
 (0)