metal : adjust ops API

ggerganov · ggerganov · commit ff06f862e814 · 2025-09-16T15:09:52.000+03:00
ggml-ci
diff --git a/ggml/src/ggml-metal/ggml-metal-context.m b/ggml/src/ggml-metal/ggml-metal-context.m
@@ -1408,11 +1408,11 @@ static int ggml_metal_encode_node(struct ggml_metal_encode_context * ctx_enc, in
                     // src1 is a row
                     GGML_ASSERT(ne11 == 1);
 
-                    pipeline = ggml_metal_op_bin_get_pipeline(ctx, dst->op, n_fuse, true);
+                    pipeline = ggml_metal_op_bin_get_pipeline(node->op, ctx, n_fuse, true);
 
                     bcast_row = true;
                 } else {
-                    pipeline = ggml_metal_op_bin_get_pipeline(ctx, dst->op, n_fuse, false);
+                    pipeline = ggml_metal_op_bin_get_pipeline(node->op, ctx, n_fuse, false);
                 }
 
                 if (n_fuse > 1) {
@@ -1602,7 +1602,7 @@ static int ggml_metal_encode_node(struct ggml_metal_encode_context * ctx_enc, in
                 };
 
                 //const id<MTLComputePipelineState> pipeline = ctx->pipelines[GGML_METAL_PIPELINE_TYPE_ADD].pipeline;
-                const id<MTLComputePipelineState> pipeline = ggml_metal_op_bin_get_pipeline(ctx, GGML_OP_ADD, 1, false);
+                const id<MTLComputePipelineState> pipeline = ggml_metal_op_bin_get_pipeline(GGML_OP_ADD, ctx, 1, false);
 
                 [encoder setComputePipelineState:pipeline];
                 [encoder setBytes:&args length:sizeof(args) atIndex:0];
@@ -3517,7 +3517,7 @@ static int ggml_metal_encode_node(struct ggml_metal_encode_context * ctx_enc, in
                     }
                 }
 
-                const id<MTLComputePipelineState> pipeline = ggml_metal_op_rms_norm_get_pipeline(ctx, node, n_fuse);
+                const id<MTLComputePipelineState> pipeline = ggml_metal_op_rms_norm_get_pipeline(node, ctx, n_fuse);
 
                 int nth = 32; // SIMD width
 
@@ -4257,7 +4257,7 @@ static int ggml_metal_encode_node(struct ggml_metal_encode_context * ctx_enc, in
                         /*.logit_softcap =*/ logit_softcap,
                     };
 
-                    id<MTLComputePipelineState> pipeline = ggml_metal_op_flash_attn_ext_get_pipeline(ctx, node, has_mask, has_sinks, has_bias, has_scap, nsg);
+                    id<MTLComputePipelineState> pipeline = ggml_metal_op_flash_attn_ext_get_pipeline(node, ctx, has_mask, has_sinks, has_bias, has_scap, nsg);
 
                     [encoder setComputePipelineState:pipeline];
                     [encoder setBytes:&args length:sizeof(args)     atIndex:0];
@@ -4372,7 +4372,7 @@ static int ggml_metal_encode_node(struct ggml_metal_encode_context * ctx_enc, in
                         /*.logit_softcap =*/ logit_softcap,
                     };
 
-                    id<MTLComputePipelineState> pipeline = ggml_metal_op_flash_attn_ext_vec_get_pipeline(ctx, node, has_mask, has_sinks, has_bias, has_scap, nsg, nwg);
+                    id<MTLComputePipelineState> pipeline = ggml_metal_op_flash_attn_ext_vec_get_pipeline(node, ctx, has_mask, has_sinks, has_bias, has_scap, nsg, nwg);
 
                     GGML_ASSERT(nsg*32 <= (int) pipeline.maxTotalThreadsPerThreadgroup);
 
@@ -4426,7 +4426,7 @@ static int ggml_metal_encode_node(struct ggml_metal_encode_context * ctx_enc, in
                                 nrows,
                             };
 
-                            id<MTLComputePipelineState> pipeline0 = ggml_metal_op_flash_attn_ext_vec_reduce_get_pipeline(ctx, node, ne20, nwg);
+                            id<MTLComputePipelineState> pipeline0 = ggml_metal_op_flash_attn_ext_vec_reduce_get_pipeline(node, ctx, ne20, nwg);
 
                             [encoder setComputePipelineState:pipeline0];
                             [encoder setBytes:&args0   length:sizeof(args0) atIndex:0];
diff --git a/ggml/src/ggml-metal/ggml-metal-ops.cpp b/ggml/src/ggml-metal/ggml-metal-ops.cpp
@@ -48,8 +48,8 @@ size_t ggml_metal_op_flash_attn_ext_extra_tmp(const ggml_tensor * op) {
 }
 
 ggml_metal_pipeline_t ggml_metal_op_flash_attn_ext_get_pipeline(
-        ggml_metal_t ctx,
         ggml_tensor * op,
+        ggml_metal_t ctx,
         bool    has_mask,
         bool    has_sinks,
         bool    has_bias,
@@ -107,8 +107,8 @@ ggml_metal_pipeline_t ggml_metal_op_flash_attn_ext_get_pipeline(
 }
 
 ggml_metal_pipeline_t ggml_metal_op_flash_attn_ext_vec_get_pipeline(
-        ggml_metal_t ctx,
         ggml_tensor * op,
+        ggml_metal_t ctx,
         bool    has_mask,
         bool    has_sinks,
         bool    has_bias,
@@ -168,8 +168,8 @@ ggml_metal_pipeline_t ggml_metal_op_flash_attn_ext_vec_get_pipeline(
 }
 
 ggml_metal_pipeline_t ggml_metal_op_flash_attn_ext_vec_reduce_get_pipeline(
-        ggml_metal_t ctx,
         ggml_tensor * op,
+        ggml_metal_t ctx,
         int32_t dv,
         int32_t nwg) {
     char base[256];
@@ -198,8 +198,8 @@ ggml_metal_pipeline_t ggml_metal_op_flash_attn_ext_vec_reduce_get_pipeline(
 }
 
 ggml_metal_pipeline_t ggml_metal_op_bin_get_pipeline(
-        ggml_metal_t ctx,
         enum ggml_op op,
+        ggml_metal_t ctx,
         int32_t n_fuse,
         bool row) {
     char base[256];
@@ -231,8 +231,8 @@ ggml_metal_pipeline_t ggml_metal_op_bin_get_pipeline(
 }
 
 ggml_metal_pipeline_t ggml_metal_op_rms_norm_get_pipeline(
-        ggml_metal_t ctx,
         ggml_tensor * op,
+        ggml_metal_t ctx,
         int32_t n_fuse) {
     char base[256];
     char name[256];
diff --git a/ggml/src/ggml-metal/ggml-metal-ops.h b/ggml/src/ggml-metal/ggml-metal-ops.h
@@ -18,17 +18,17 @@ bool ggml_metal_op_flash_attn_ext_use_vec(const struct ggml_tensor * op);
 size_t ggml_metal_op_flash_attn_ext_extra_tmp(const struct ggml_tensor * op);
 
 ggml_metal_pipeline_t ggml_metal_op_flash_attn_ext_get_pipeline(
-        ggml_metal_t ctx,
         struct ggml_tensor * op,
+        ggml_metal_t ctx,
         bool    has_mask,
         bool    has_sinks,
         bool    has_bias,
         bool    has_scap,
         int32_t nsg);
 
 ggml_metal_pipeline_t ggml_metal_op_flash_attn_ext_vec_get_pipeline(
-        ggml_metal_t ctx,
         struct ggml_tensor * op,
+        ggml_metal_t ctx,
         bool    has_mask,
         bool    has_sinks,
         bool    has_bias,
@@ -37,20 +37,20 @@ ggml_metal_pipeline_t ggml_metal_op_flash_attn_ext_vec_get_pipeline(
         int32_t nwg);
 
 ggml_metal_pipeline_t ggml_metal_op_flash_attn_ext_vec_reduce_get_pipeline(
-        ggml_metal_t ctx,
         struct ggml_tensor * op,
+        ggml_metal_t ctx,
         int32_t dv,
         int32_t nwg);
 
 ggml_metal_pipeline_t ggml_metal_op_bin_get_pipeline(
-        ggml_metal_t ctx,
         enum ggml_op op,
+        ggml_metal_t ctx,
         int32_t n_fuse,
         bool row);
 
 ggml_metal_pipeline_t ggml_metal_op_rms_norm_get_pipeline(
-        ggml_metal_t ctx,
         struct ggml_tensor * op,
+        ggml_metal_t ctx,
         int32_t n_fuse);
 
 #ifdef __cplusplus
diff --git a/ggml/src/ggml-metal/ggml-metal.cpp b/ggml/src/ggml-metal/ggml-metal.cpp
@@ -689,6 +689,7 @@ static void * ggml_backend_metal_get_proc_address(ggml_backend_reg_t reg, const
 
     GGML_UNUSED(reg);
 }
+
 static ggml_backend_reg_i ggml_backend_metal_reg_i = {
     /* .get_name         = */ ggml_backend_metal_reg_get_name,
     /* .device_count     = */ ggml_backend_metal_reg_device_count,

Original file line number	Diff line number	Diff line change
`@@ -689,6 +689,7 @@ static void * ggml_backend_metal_get_proc_address(ggml_backend_reg_t reg, const`
`689`	`689`
`690`	`690`	`GGML_UNUSED(reg);`
`691`	`691`	`}`
	`692`	`+`
`692`	`693`	`static ggml_backend_reg_i ggml_backend_metal_reg_i = {`
`693`	`694`	`/* .get_name = */ ggml_backend_metal_reg_get_name,`
`694`	`695`	`/* .device_count = */ ggml_backend_metal_reg_device_count,`