From d262c7931d5d70c13d0e113c08d4927a77dff8ac Mon Sep 17 00:00:00 2001 From: safranowith Date: Sun, 14 Sep 2025 18:37:16 +0300 Subject: [PATCH 1/2] ops: add CEIL operator support for CPU and SYCL --- docs/ops.md | 1 + docs/ops/CPU.csv | 4 +++ docs/ops/SYCL.csv | 4 +++ ggml/include/ggml.h | 9 +++++++ ggml/src/ggml-cpu/ops.cpp | 4 +++ ggml/src/ggml-cpu/unary-ops.cpp | 9 +++++++ ggml/src/ggml-cpu/unary-ops.h | 1 + ggml/src/ggml-sycl/element_wise.cpp | 29 +++++++++++++++++++++ ggml/src/ggml-sycl/element_wise.hpp | 2 ++ ggml/src/ggml-sycl/ggml-sycl.cpp | 4 +++ ggml/src/ggml.c | 18 ++++++++++++- tests/test-backend-ops.cpp | 40 +++++++++++++++++++++++++++++ 12 files changed, 124 insertions(+), 1 deletion(-) diff --git a/docs/ops.md b/docs/ops.md index 0047ef3fa5e53..84ee36a7b2162 100644 --- a/docs/ops.md +++ b/docs/ops.md @@ -22,6 +22,7 @@ Legend: | ARANGE | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | | ARGMAX | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ | | ARGSORT | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | +| CEIL | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | | CLAMP | ❌ | ✅ | ✅ | ✅ | 🟡 | 🟡 | ✅ | 🟡 | ❌ | | CONCAT | ❌ | ✅ | ✅ | 🟡 | ✅ | 🟡 | 🟡 | ✅ | ❌ | | CONT | ❌ | 🟡 | ✅ | ✅ | ✅ | 🟡 | 🟡 | 🟡 | ❌ | diff --git a/docs/ops/CPU.csv b/docs/ops/CPU.csv index 21e0d1b3c9117..84a873f707fed 100644 --- a/docs/ops/CPU.csv +++ b/docs/ops/CPU.csv @@ -1,6 +1,8 @@ "backend_name","op_name","op_params","test_mode","supported","error_message","backend_reg_name" "CPU","ABS","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","CPU" "CPU","ABS","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","CPU" +"CPU","CEIL","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","CPU" +"CPU","CEIL","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","CPU" "CPU","SGN","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","CPU" "CPU","SGN","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","CPU" "CPU","NEG","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","CPU" @@ -61,6 +63,8 @@ "CPU","GELU_ERF","type=f16,ne_a=[5,7,11,13],v=1","support","1","yes","CPU" "CPU","ABS","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CPU" "CPU","ABS","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","CPU" +"CPU","CEIL","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CPU" +"CPU","CEIL","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","CPU" "CPU","SGN","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CPU" "CPU","SGN","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","CPU" "CPU","NEG","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CPU" diff --git a/docs/ops/SYCL.csv b/docs/ops/SYCL.csv index 5d022ee91aa7a..8c993c97330de 100644 --- a/docs/ops/SYCL.csv +++ b/docs/ops/SYCL.csv @@ -1,6 +1,8 @@ "backend_name","op_name","op_params","test_mode","supported","error_message","backend_reg_name" "SYCL0","ABS","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL" "SYCL0","ABS","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","SYCL" +"SYCL0","CEIL","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL" +"SYCL0","CEIL","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","SYCL" "SYCL0","SGN","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL" "SYCL0","SGN","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","SYCL" "SYCL0","NEG","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL" @@ -61,6 +63,8 @@ "SYCL0","GELU_ERF","type=f16,ne_a=[5,7,11,13],v=1","support","0","no","SYCL" "SYCL0","ABS","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL" "SYCL0","ABS","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","SYCL" +"SYCL0","CEIL","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL" +"SYCL0","CEIL","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","SYCL" "SYCL0","SGN","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL" "SYCL0","SGN","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","SYCL" "SYCL0","NEG","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL" diff --git a/ggml/include/ggml.h b/ggml/include/ggml.h index b7b472c56ec61..aa84c65c50f58 100644 --- a/ggml/include/ggml.h +++ b/ggml/include/ggml.h @@ -574,6 +574,7 @@ extern "C" { GGML_UNARY_OP_HARDSIGMOID, GGML_UNARY_OP_EXP, GGML_UNARY_OP_GELU_ERF, + GGML_UNARY_OP_CEIL, GGML_UNARY_OP_COUNT, }; @@ -1147,6 +1148,14 @@ extern "C" { GGML_API struct ggml_tensor * ggml_exp_inplace( struct ggml_context * ctx, struct ggml_tensor * a); + + GGML_API struct ggml_tensor * ggml_ceil( + struct ggml_context * ctx, + struct ggml_tensor * a); + + GGML_API struct ggml_tensor * ggml_ceil_inplace( + struct ggml_context * ctx, + struct ggml_tensor * a); // gated linear unit ops // A: n columns, r rows, diff --git a/ggml/src/ggml-cpu/ops.cpp b/ggml/src/ggml-cpu/ops.cpp index 212e52ef6a1c8..7643ace0083f8 100644 --- a/ggml/src/ggml-cpu/ops.cpp +++ b/ggml/src/ggml-cpu/ops.cpp @@ -9767,6 +9767,10 @@ void ggml_compute_forward_unary( { ggml_compute_forward_exp(params, dst); } break; + case GGML_UNARY_OP_CEIL: + { + ggml_compute_forward_ceil(params, dst); + } break; default: { GGML_ABORT("fatal error"); diff --git a/ggml/src/ggml-cpu/unary-ops.cpp b/ggml/src/ggml-cpu/unary-ops.cpp index 4fce569b3bfc8..da6980d5f309d 100644 --- a/ggml/src/ggml-cpu/unary-ops.cpp +++ b/ggml/src/ggml-cpu/unary-ops.cpp @@ -64,6 +64,10 @@ static inline float op_log(float x) { return logf(x); } +static inline float op_ceil(float x) { + return ceilf(x); +} + template static inline void vec_unary_op(int64_t n, dst_t * y, const src0_t * x) { constexpr auto src0_to_f32 = type_conversion_table::to_f32; @@ -184,3 +188,8 @@ void ggml_compute_forward_cos(const ggml_compute_params * params, ggml_tensor * void ggml_compute_forward_log(const ggml_compute_params * params, ggml_tensor * dst) { unary_op(params, dst); } + +void ggml_compute_forward_ceil(const ggml_compute_params * params, ggml_tensor * dst) { + unary_op(params, dst); +} + diff --git a/ggml/src/ggml-cpu/unary-ops.h b/ggml/src/ggml-cpu/unary-ops.h index b1ade2c8e341f..63fd26620f9b1 100644 --- a/ggml/src/ggml-cpu/unary-ops.h +++ b/ggml/src/ggml-cpu/unary-ops.h @@ -22,6 +22,7 @@ void ggml_compute_forward_sqrt(const struct ggml_compute_params * params, struct void ggml_compute_forward_sin(const struct ggml_compute_params * params, struct ggml_tensor * dst); void ggml_compute_forward_cos(const struct ggml_compute_params * params, struct ggml_tensor * dst); void ggml_compute_forward_log(const struct ggml_compute_params * params, struct ggml_tensor * dst); +void ggml_compute_forward_ceil(const struct ggml_compute_params * params, struct ggml_tensor * dst); #ifdef __cplusplus } diff --git a/ggml/src/ggml-sycl/element_wise.cpp b/ggml/src/ggml-sycl/element_wise.cpp index c2da2fb48ad28..abcc851f82d7b 100644 --- a/ggml/src/ggml-sycl/element_wise.cpp +++ b/ggml/src/ggml-sycl/element_wise.cpp @@ -156,6 +156,11 @@ static void unary_op_sgn_kernel(const T * x, T * dst, const int k, const sycl::n dst[i] = op_sgn(x[i]); } } +template +static __dpct_inline__ T op_ceil(T x) { + return sycl::ceil(x); +} + template static void unary_op_abs_kernel(const T * x, T * dst, const int k, const sycl::nd_item<1> &item_ct1) { @@ -304,6 +309,13 @@ static void unary_op_clamp_kernel(const T * x, T * dst, const int k, const sycl: } } +template +static void unary_op_ceil_kernel(const T * x, T * dst, const int k, const sycl::nd_item<1> &item_ct1) { + SYCL_GLOBAL_ID_LOOP(k, item_ct1) { + dst[i] = op_ceil(x[i]); + } +} + template static void upscale(const T *x, T *dst, const int nb00, const int nb01, const int nb02, const int nb03, const int ne10, const int ne11, @@ -944,6 +956,19 @@ static inline void ggml_sycl_op_clamp(ggml_backend_sycl_context & ctx, ggml_tens }, min_val, max_val); } +static inline void ggml_sycl_op_ceil(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { + ggml_sycl_detail::dispatch_ggml_sycl_op_unary(ctx, dst, + [](const auto* src, auto* dst_ptr, int k_elements, queue_ptr stream) { + const int num_blocks = ceil_div(k_elements, 256); + stream->parallel_for( + sycl::nd_range<1>(sycl::range<1>(num_blocks) * sycl::range<1>(256), + sycl::range<1>(256)), + [=](sycl::nd_item<1> item_ct1) { + unary_op_ceil_kernel(src, dst_ptr, k_elements, item_ct1); + }); + }); +} + static inline void ggml_sycl_op_acc(ggml_backend_sycl_context & ctx, ggml_tensor *dst) { GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT(dst->src[1]->type == GGML_TYPE_F32); @@ -1168,3 +1193,7 @@ void ggml_sycl_geglu_quick(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1); ggml_sycl_op_geglu_quick(ctx, dst); } +void ggml_sycl_ceil(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { + scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1); + ggml_sycl_op_ceil(ctx, dst); +} diff --git a/ggml/src/ggml-sycl/element_wise.hpp b/ggml/src/ggml-sycl/element_wise.hpp index 50749e87d783e..af2aebc8d1810 100644 --- a/ggml/src/ggml-sycl/element_wise.hpp +++ b/ggml/src/ggml-sycl/element_wise.hpp @@ -75,6 +75,8 @@ void ggml_sycl_sgn(ggml_backend_sycl_context & ctx, ggml_tensor * dst); void ggml_sycl_abs(ggml_backend_sycl_context & ctx, ggml_tensor * dst); +void ggml_sycl_ceil(ggml_backend_sycl_context & ctx, ggml_tensor * dst); + void ggml_sycl_elu(ggml_backend_sycl_context & ctx, ggml_tensor * dst); void ggml_sycl_geglu(ggml_backend_sycl_context & ctx, ggml_tensor * dst); diff --git a/ggml/src/ggml-sycl/ggml-sycl.cpp b/ggml/src/ggml-sycl/ggml-sycl.cpp index e06ec613fc81f..b36de04f8d51d 100644 --- a/ggml/src/ggml-sycl/ggml-sycl.cpp +++ b/ggml/src/ggml-sycl/ggml-sycl.cpp @@ -3636,6 +3636,9 @@ static bool ggml_sycl_compute_forward(ggml_backend_sycl_context & ctx, struct gg case GGML_UNARY_OP_ELU: ggml_sycl_elu(ctx, dst); break; + case GGML_UNARY_OP_CEIL: + ggml_sycl_ceil(ctx, dst); + break; default: return false; } @@ -4190,6 +4193,7 @@ static bool ggml_backend_sycl_device_supports_op(ggml_backend_dev_t dev, const g case GGML_UNARY_OP_SGN: case GGML_UNARY_OP_ABS: case GGML_UNARY_OP_ELU: + case GGML_UNARY_OP_CEIL: #if defined (GGML_SYCL_F16) return ggml_is_contiguous(op->src[0]) && (op->type == op->src[0]->type); #else diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c index 50dc1aa24fff5..a813a8c99cf4c 100644 --- a/ggml/src/ggml.c +++ b/ggml/src/ggml.c @@ -1143,9 +1143,10 @@ static const char * GGML_UNARY_OP_NAME[GGML_UNARY_OP_COUNT] = { "HARDSIGMOID", "EXP", "GELU_ERF", + "CEIL" }; -static_assert(GGML_UNARY_OP_COUNT == 15, "GGML_UNARY_OP_COUNT != 15"); +static_assert(GGML_UNARY_OP_COUNT == 16, "GGML_UNARY_OP_COUNT != 16"); static const char * GGML_GLU_OP_NAME[GGML_GLU_OP_COUNT] = { @@ -5743,6 +5744,21 @@ struct ggml_tensor * ggml_cross_entropy_loss_back( return result; } +// ggml_ceil + +struct ggml_tensor * ggml_ceil( + struct ggml_context * ctx, + struct ggml_tensor * a) { + return ggml_unary(ctx, a, GGML_UNARY_OP_CEIL); +} + +struct ggml_tensor * ggml_ceil_inplace( + struct ggml_context * ctx, + struct ggml_tensor * a) { + return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_CEIL); +} + + // opt_step_adamw struct ggml_tensor * ggml_opt_step_adamw( diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp index b54a1a4e823f9..bc8a1f3a66912 100644 --- a/tests/test-backend-ops.cpp +++ b/tests/test-backend-ops.cpp @@ -3524,6 +3524,45 @@ struct test_log : public test_case { } }; +// GGML_OP_CEIL +struct test_ceil : public test_case { + const ggml_type type; + const std::array ne; + + std::string vars() override { + return VARS_TO_STR2(type, ne); + } + + test_ceil(ggml_type type = GGML_TYPE_F32, + std::array ne = {10, 5, 4, 3}) + : type(type), ne(ne) {} + + ggml_tensor * build_graph(ggml_context * ctx) override { + ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data()); + ggml_set_param(a); + ggml_set_name(a, "a"); + + ggml_tensor * out = ggml_ceil(ctx, a); + ggml_set_name(out, "out"); + + return out; + } + + void initialize_tensors(ggml_context * ctx) override { + for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) { + init_tensor_uniform(t, -10.0f, 10.0f); + } + } + + float grad_eps() override { + return 1.0f; + } + + bool grad_precise() override { + return true; + } +}; + // GGML_OP_SIN struct test_sin : public test_case { const ggml_type type; @@ -6328,6 +6367,7 @@ static std::vector> make_test_cases_eval() { test_cases.emplace_back(new test_sqr(type)); test_cases.emplace_back(new test_sqrt(type)); test_cases.emplace_back(new test_log(type)); + test_cases.emplace_back(new test_ceil(type)); test_cases.emplace_back(new test_sin(type)); test_cases.emplace_back(new test_cos(type)); test_cases.emplace_back(new test_clamp(type)); From 3881731d5106e4d028b89927937610b51df69e46 Mon Sep 17 00:00:00 2001 From: safranowith Date: Tue, 16 Sep 2025 13:56:09 +0300 Subject: [PATCH 2/2] Fix EditorConfig errors --- ggml/include/ggml.h | 2 +- ggml/src/ggml-cpu/unary-ops.cpp | 4 ++-- tests/test-backend-ops.cpp | 2 +- vendor/miniaudio/miniaudio.h | 6 +++--- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/ggml/include/ggml.h b/ggml/include/ggml.h index aa84c65c50f58..a00f604600121 100644 --- a/ggml/include/ggml.h +++ b/ggml/include/ggml.h @@ -1148,7 +1148,7 @@ extern "C" { GGML_API struct ggml_tensor * ggml_exp_inplace( struct ggml_context * ctx, struct ggml_tensor * a); - + GGML_API struct ggml_tensor * ggml_ceil( struct ggml_context * ctx, struct ggml_tensor * a); diff --git a/ggml/src/ggml-cpu/unary-ops.cpp b/ggml/src/ggml-cpu/unary-ops.cpp index da6980d5f309d..816dae68b3027 100644 --- a/ggml/src/ggml-cpu/unary-ops.cpp +++ b/ggml/src/ggml-cpu/unary-ops.cpp @@ -65,8 +65,8 @@ static inline float op_log(float x) { } static inline float op_ceil(float x) { - return ceilf(x); -} + return ceilf(x); +} template static inline void vec_unary_op(int64_t n, dst_t * y, const src0_t * x) { diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp index bc8a1f3a66912..9f32f92d0989c 100644 --- a/tests/test-backend-ops.cpp +++ b/tests/test-backend-ops.cpp @@ -3555,7 +3555,7 @@ struct test_ceil : public test_case { } float grad_eps() override { - return 1.0f; + return 1.0f; } bool grad_precise() override { diff --git a/vendor/miniaudio/miniaudio.h b/vendor/miniaudio/miniaudio.h index c74bebeb3c725..53499f9a2cd5c 100644 --- a/vendor/miniaudio/miniaudio.h +++ b/vendor/miniaudio/miniaudio.h @@ -28227,7 +28227,7 @@ static ma_result ma_device_start__alsa(ma_device* pDevice) } if (pDevice->type == ma_device_type_playback || pDevice->type == ma_device_type_duplex) { - /* + /* When data is written to the device we wait for the device to get ready to receive data with poll(). In my testing I have observed that poll() can sometimes block forever unless the device is started explicitly with snd_pcm_start() or some data is written with snd_pcm_writei(). @@ -34520,7 +34520,7 @@ static ma_result ma_device_init_internal__coreaudio(ma_context* pContext, ma_dev #endif } - + status = ((ma_AudioUnitSetProperty_proc)pContext->coreaudio.AudioUnitSetProperty)(pData->audioUnit, kAudioUnitProperty_StreamFormat, formatScope, formatElement, &bestFormat, sizeof(bestFormat)); if (status != noErr) { ((ma_AudioComponentInstanceDispose_proc)pContext->coreaudio.AudioComponentInstanceDispose)(pData->audioUnit); @@ -38526,7 +38526,7 @@ static ma_result ma_device_reinit__aaudio(ma_device* pDevice, ma_device_type dev ma_device_stop(pDevice); /* Do a full device stop so we set internal state correctly. */ } } - + result = MA_SUCCESS; } done: