From 65cd2d5764c04929de0673930ba2277a853ad780 Mon Sep 17 00:00:00 2001
From: mattsinc-qti <mattsinc@qti.qualcomm.com>
Date: Fri, 3 Oct 2025 16:04:43 -0700
Subject: [PATCH] [QNN-EP] Apply Softmax layout transformation for GPU

- Transposes are inserted for Softmax with axis != output_rank-1
  for the HTP backend.
- The GPU backend also has this requirement on the axis param, so
  this change enables the layout transformation for the GPU as well.
---
 .../providers/qnn/builder/opbuilder/softmax_op_builder.cc     | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/softmax_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/softmax_op_builder.cc
index c2211ce35ff59..4a5f4e9d8f888 100644
--- a/onnxruntime/core/providers/qnn/builder/opbuilder/softmax_op_builder.cc
+++ b/onnxruntime/core/providers/qnn/builder/opbuilder/softmax_op_builder.cc
@@ -152,7 +152,7 @@ Status SoftmaxOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_
                                                      std::vector<std::string>&& input_names,
                                                      const logging::Logger& logger,
                                                      bool do_op_validation) const {
-  const bool is_npu_backend = IsNpuBackend(qnn_model_wrapper.GetQnnBackendType());
+  const bool is_qpu_backend = IsQpuBackend(qnn_model_wrapper.GetQnnBackendType());
   const std::string& op_type = node_unit.OpType();
   const auto& outputs = node_unit.Outputs();
   const std::string& orig_output_name = outputs[0].node_arg.Name();
@@ -202,7 +202,7 @@ Status SoftmaxOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_
                                                          do_op_validation,
                                                          false,
                                                          is_graph_output));
-  } else if (is_npu_backend && axis != static_cast<int32_t>(output_rank) - 1) {
+  } else if (is_qpu_backend && axis != static_cast<int32_t>(output_rank) - 1) {
     std::string transpose_input_name = utils::GetUniqueName(orig_output_name, "_transpose");
 
     std::vector<uint32_t> transpose_input_shape = output_info.shape;