diff --git a/ggml/src/ggml-cuda/moe-expert-reduce.cu b/ggml/src/ggml-cuda/moe-expert-reduce.cu
index a97c5d573bbef..2e5a2ebe0749a 100644
--- a/ggml/src/ggml-cuda/moe-expert-reduce.cu
+++ b/ggml/src/ggml-cuda/moe-expert-reduce.cu
@@ -96,7 +96,12 @@ static void launch_moe_expert_reduce(ggml_backend_cuda_context & ctx,
 }
 
 bool ggml_cuda_should_use_moe_expert_reduce(const ggml_cgraph * cgraph, int start_index, int end_index) {
-    const ggml_tensor * mul = cgraph->nodes[start_index];
+    const ggml_tensor * mul     = cgraph->nodes[start_index];
+    const ggml_tensor * experts = mul->src[0];
+
+    if (experts->ne[2] != 1) {
+        return false;
+    }
 
     if (mul->op != GGML_OP_MUL || !ggml_is_contiguous(mul->src[0]) || !ggml_is_contiguous(mul->src[1])) {
         return false;