From 1f09921cb39562a913ed6d13f08716d7eee05c93 Mon Sep 17 00:00:00 2001 From: Uwe Dolinsky Date: Wed, 5 Mar 2025 17:30:24 +0000 Subject: [PATCH 1/5] [SYCL][NATIVECPU] always inline kernels with -O3 --- llvm/include/llvm/SYCLLowerIR/PrepareSYCLNativeCPU.h | 3 +++ llvm/lib/SYCLNativeCPUUtils/PipelineSYCLNativeCPU.cpp | 2 +- llvm/lib/SYCLNativeCPUUtils/PrepareSYCLNativeCPU.cpp | 6 ++++++ .../check_device_code/native_cpu/kernelhandler-scalar.cpp | 7 +++++++ 4 files changed, 17 insertions(+), 1 deletion(-) diff --git a/llvm/include/llvm/SYCLLowerIR/PrepareSYCLNativeCPU.h b/llvm/include/llvm/SYCLLowerIR/PrepareSYCLNativeCPU.h index 4e4c48cd64e3e..15cba7afa28af 100644 --- a/llvm/include/llvm/SYCLLowerIR/PrepareSYCLNativeCPU.h +++ b/llvm/include/llvm/SYCLLowerIR/PrepareSYCLNativeCPU.h @@ -23,7 +23,10 @@ class ModulePass; class PrepareSYCLNativeCPUPass : public PassInfoMixin { + const bool O3; + public: + PrepareSYCLNativeCPUPass(bool O3) : O3(O3) {} PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM); }; diff --git a/llvm/lib/SYCLNativeCPUUtils/PipelineSYCLNativeCPU.cpp b/llvm/lib/SYCLNativeCPUUtils/PipelineSYCLNativeCPU.cpp index b30b6c41c2b99..9838bbc578c4a 100644 --- a/llvm/lib/SYCLNativeCPUUtils/PipelineSYCLNativeCPU.cpp +++ b/llvm/lib/SYCLNativeCPUUtils/PipelineSYCLNativeCPU.cpp @@ -113,7 +113,7 @@ void llvm::sycl::utils::addSYCLNativeCPUBackendPasses( MPM.addPass(compiler::utils::ReplaceLocalModuleScopeVariablesPass()); MPM.addPass(AlwaysInlinerPass()); #endif - MPM.addPass(PrepareSYCLNativeCPUPass()); + MPM.addPass(PrepareSYCLNativeCPUPass(OptLevel == OptimizationLevel::O3)); #ifdef NATIVECPU_USE_OCK MPM.addPass(compiler::utils::DefineMuxBuiltinsPass()); #endif diff --git a/llvm/lib/SYCLNativeCPUUtils/PrepareSYCLNativeCPU.cpp b/llvm/lib/SYCLNativeCPUUtils/PrepareSYCLNativeCPU.cpp index 57718429e533a..2d69a244935c5 100644 --- a/llvm/lib/SYCLNativeCPUUtils/PrepareSYCLNativeCPU.cpp +++ b/llvm/lib/SYCLNativeCPUUtils/PrepareSYCLNativeCPU.cpp @@ -401,6 +401,12 @@ PreservedAnalyses PrepareSYCLNativeCPUPass::run(Module &M, for (auto &OldF : OldKernels) { auto *NewF = cloneFunctionAndAddParam(OldF, StatePtrType, CurrentStatePointerTLS); + if (O3 && !NewF->hasFnAttribute(Attribute::NoInline)) { + if (!NewF->hasFnAttribute(Attribute::AlwaysInline)) + NewF->addFnAttr(Attribute::AlwaysInline); + // Set internal linkage to enable removal of kernel after inlining + NewF->setLinkage(llvm::GlobalValue::LinkageTypes::InternalLinkage); + } NewF->takeName(OldF); OldF->replaceAllUsesWith(NewF); OldF->eraseFromParent(); diff --git a/sycl/test/check_device_code/native_cpu/kernelhandler-scalar.cpp b/sycl/test/check_device_code/native_cpu/kernelhandler-scalar.cpp index f26a966803948..d8ca7b2e1d3a7 100644 --- a/sycl/test/check_device_code/native_cpu/kernelhandler-scalar.cpp +++ b/sycl/test/check_device_code/native_cpu/kernelhandler-scalar.cpp @@ -1,5 +1,6 @@ // RUN: %clangxx -fsycl-device-only -fsycl-targets=native_cpu -Xclang -sycl-std=2020 -mllvm -sycl-opt -S -emit-llvm -o %t_temp.ll %s // RUN: %clangxx -mllvm -sycl-native-cpu-backend -S -emit-llvm -o - %t_temp.ll | FileCheck %s +// RUN: %clangxx -mllvm -sycl-native-cpu-backend -O3 -S -emit-llvm -o - %t_temp.ll | FileCheck %s --check-prefix=CHECK-O3 #include #include @@ -51,3 +52,9 @@ int main() { // CHECK-DAG: @_ZTS6init_aIjE.NativeCPUKernel(ptr {{.*}}%0, ptr {{.*}}%1, i32 {{.*}}%2, ptr {{.*}}%3){{.*}} // CHECK-DAG: @_ZTS6init_aIfE.NativeCPUKernel(ptr {{.*}}%0, ptr {{.*}}%1, float {{.*}}%2, ptr {{.*}}%3){{.*}} // CHECK-DAG: @_ZTS6init_aIdE.NativeCPUKernel(ptr {{.*}}%0, ptr {{.*}}%1, double {{.*}}%2, ptr {{.*}}%3){{.*}} + +// CHECK-O3-NOT: @{{.*}}.NativeCPUKernel +// CHECK-O3-DAG: define void @_ZTS6init_aIiE.SYCLNCPU(ptr {{.*}}%0, ptr addrspace(1) %1) {{.*}} #{{.*}} { +// CHECK-O3-DAG: define void @_ZTS6init_aIjE.SYCLNCPU(ptr {{.*}}%0, ptr addrspace(1) %1) {{.*}} #{{.*}} { +// CHECK-O3-DAG: define void @_ZTS6init_aIfE.SYCLNCPU(ptr {{.*}}%0, ptr addrspace(1) %1) {{.*}} #{{.*}} { +// CHECK-O3-DAG: define void @_ZTS6init_aIdE.SYCLNCPU(ptr {{.*}}%0, ptr addrspace(1) %1) {{.*}} #{{.*}} { From d9129a01d8a9ac49b83de50100b8a43fcc641d8f Mon Sep 17 00:00:00 2001 From: Uwe Dolinsky Date: Mon, 10 Mar 2025 12:42:51 +0000 Subject: [PATCH 2/5] [SYCL][NATIVECPU] fix lit test to account for more properties occuring in upstream CI --- .../check_device_code/native_cpu/kernelhandler-scalar.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sycl/test/check_device_code/native_cpu/kernelhandler-scalar.cpp b/sycl/test/check_device_code/native_cpu/kernelhandler-scalar.cpp index d8ca7b2e1d3a7..916a8e869a7c2 100644 --- a/sycl/test/check_device_code/native_cpu/kernelhandler-scalar.cpp +++ b/sycl/test/check_device_code/native_cpu/kernelhandler-scalar.cpp @@ -54,7 +54,7 @@ int main() { // CHECK-DAG: @_ZTS6init_aIdE.NativeCPUKernel(ptr {{.*}}%0, ptr {{.*}}%1, double {{.*}}%2, ptr {{.*}}%3){{.*}} // CHECK-O3-NOT: @{{.*}}.NativeCPUKernel -// CHECK-O3-DAG: define void @_ZTS6init_aIiE.SYCLNCPU(ptr {{.*}}%0, ptr addrspace(1) %1) {{.*}} #{{.*}} { -// CHECK-O3-DAG: define void @_ZTS6init_aIjE.SYCLNCPU(ptr {{.*}}%0, ptr addrspace(1) %1) {{.*}} #{{.*}} { -// CHECK-O3-DAG: define void @_ZTS6init_aIfE.SYCLNCPU(ptr {{.*}}%0, ptr addrspace(1) %1) {{.*}} #{{.*}} { -// CHECK-O3-DAG: define void @_ZTS6init_aIdE.SYCLNCPU(ptr {{.*}}%0, ptr addrspace(1) %1) {{.*}} #{{.*}} { +// CHECK-O3-DAG: define void @_ZTS6init_aIiE.SYCLNCPU(ptr {{.*}}%0, ptr addrspace(1) {{.*}}%1) {{.*}} #{{.*}} { +// CHECK-O3-DAG: define void @_ZTS6init_aIjE.SYCLNCPU(ptr {{.*}}%0, ptr addrspace(1) {{.*}}%1) {{.*}} #{{.*}} { +// CHECK-O3-DAG: define void @_ZTS6init_aIfE.SYCLNCPU(ptr {{.*}}%0, ptr addrspace(1) {{.*}}%1) {{.*}} #{{.*}} { +// CHECK-O3-DAG: define void @_ZTS6init_aIdE.SYCLNCPU(ptr {{.*}}%0, ptr addrspace(1) {{.*}}%1) {{.*}} #{{.*}} { From 4fc7da6119b7fa9ae6580975f23beff8bd4fa030 Mon Sep 17 00:00:00 2001 From: Uwe Dolinsky Date: Tue, 23 Sep 2025 19:37:39 +0100 Subject: [PATCH 3/5] [NATIVECPU] use LinkOnceODRLinkage on kernels --- llvm/lib/SYCLNativeCPUUtils/PrepareSYCLNativeCPU.cpp | 3 +-- sycl/test/check_device_code/native_cpu/local_module_scope.cpp | 2 +- sycl/test/check_device_code/native_cpu/native_cpu_builtins.cpp | 2 +- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/llvm/lib/SYCLNativeCPUUtils/PrepareSYCLNativeCPU.cpp b/llvm/lib/SYCLNativeCPUUtils/PrepareSYCLNativeCPU.cpp index c89902c9730b2..3d136b95407ce 100644 --- a/llvm/lib/SYCLNativeCPUUtils/PrepareSYCLNativeCPU.cpp +++ b/llvm/lib/SYCLNativeCPUUtils/PrepareSYCLNativeCPU.cpp @@ -403,11 +403,10 @@ PreservedAnalyses PrepareSYCLNativeCPUPass::run(Module &M, for (auto &OldF : OldKernels) { auto *NewF = cloneFunctionAndAddParam(OldF, StatePtrType, CurrentStatePointerTLS); + NewF->setLinkage(llvm::GlobalValue::LinkageTypes::LinkOnceODRLinkage); if (O3 && !NewF->hasFnAttribute(Attribute::NoInline)) { if (!NewF->hasFnAttribute(Attribute::AlwaysInline)) NewF->addFnAttr(Attribute::AlwaysInline); - // Set internal linkage to enable removal of kernel after inlining - NewF->setLinkage(llvm::GlobalValue::LinkageTypes::InternalLinkage); } NewF->takeName(OldF); OldF->replaceAllUsesWith(NewF); diff --git a/sycl/test/check_device_code/native_cpu/local_module_scope.cpp b/sycl/test/check_device_code/native_cpu/local_module_scope.cpp index 917c6e757d7db..ed4de3d5f3b93 100644 --- a/sycl/test/check_device_code/native_cpu/local_module_scope.cpp +++ b/sycl/test/check_device_code/native_cpu/local_module_scope.cpp @@ -7,7 +7,7 @@ // find the original function after this pass is run // CHECK: %localVarTypes = type { ptr addrspace(1) } -// CHECK: define void @_ZTS4TestILi1ELi4EiE.NativeCPUKernel{{.*}} #[[ATTR:[0-9]*]] +// CHECK: define linkonce_odr void @_ZTS4TestILi1ELi4EiE.NativeCPUKernel{{.*}} #[[ATTR:[0-9]*]] // CHECK: alloca %localVarTypes // CHECK: attributes #[[ATTR]] = {{.*}} "mux-orig-fn"="_ZTS4TestILi1ELi4EiE" diff --git a/sycl/test/check_device_code/native_cpu/native_cpu_builtins.cpp b/sycl/test/check_device_code/native_cpu/native_cpu_builtins.cpp index c9972eb5c606a..72c12588368f8 100644 --- a/sycl/test/check_device_code/native_cpu/native_cpu_builtins.cpp +++ b/sycl/test/check_device_code/native_cpu/native_cpu_builtins.cpp @@ -26,7 +26,7 @@ int main() { // CHECK: call{{.*}}__dpcpp_nativecpu_get_global_id(i32 0, ptr addrspace(1) %2) // CHECK-NOT: @llvm.threadlocal - // CHECK-TL: define void @_ZTSN4sycl3_V16detail19__pf_kernel_wrapperI5Test1EE.NativeCPUKernel({{.*}} + // CHECK-TL: define linkonce_odr void @_ZTSN4sycl3_V16detail19__pf_kernel_wrapperI5Test1EE.NativeCPUKernel({{.*}} // CHECK-TL-NEXT:entry: // CHECK-TL-NEXT: %[[VAL1:.*]] = call ptr addrspace(1) @llvm.threadlocal.address.p1(ptr addrspace(1) @_ZL28nativecpu_thread_local_state) // CHECK-TL-NEXT: %[[VAL2:.*]] = load ptr addrspace(1), ptr addrspace(1) %[[VAL1]], align 8 From 5f4632171b7eeb00a84e45be5e9ca7d92b2e8a6a Mon Sep 17 00:00:00 2001 From: Uwe Dolinsky Date: Thu, 25 Sep 2025 10:57:17 +0100 Subject: [PATCH 4/5] [NATIVECPU] pass OptimizationLevel --- llvm/include/llvm/SYCLLowerIR/PrepareSYCLNativeCPU.h | 5 +++-- llvm/lib/SYCLNativeCPUUtils/PipelineSYCLNativeCPU.cpp | 2 +- llvm/lib/SYCLNativeCPUUtils/PrepareSYCLNativeCPU.cpp | 3 ++- 3 files changed, 6 insertions(+), 4 deletions(-) mode change 100644 => 100755 llvm/lib/SYCLNativeCPUUtils/PrepareSYCLNativeCPU.cpp diff --git a/llvm/include/llvm/SYCLLowerIR/PrepareSYCLNativeCPU.h b/llvm/include/llvm/SYCLLowerIR/PrepareSYCLNativeCPU.h index 15cba7afa28af..47f3ef1f56f0c 100644 --- a/llvm/include/llvm/SYCLLowerIR/PrepareSYCLNativeCPU.h +++ b/llvm/include/llvm/SYCLLowerIR/PrepareSYCLNativeCPU.h @@ -16,6 +16,7 @@ #include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" +#include "llvm/Passes/OptimizationLevel.h" namespace llvm { @@ -23,10 +24,10 @@ class ModulePass; class PrepareSYCLNativeCPUPass : public PassInfoMixin { - const bool O3; + const OptimizationLevel OptLevel; public: - PrepareSYCLNativeCPUPass(bool O3) : O3(O3) {} + PrepareSYCLNativeCPUPass(OptimizationLevel OL) : OptLevel(OL) {} PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM); }; diff --git a/llvm/lib/SYCLNativeCPUUtils/PipelineSYCLNativeCPU.cpp b/llvm/lib/SYCLNativeCPUUtils/PipelineSYCLNativeCPU.cpp index 89e7cb6288b8c..44862689988d8 100644 --- a/llvm/lib/SYCLNativeCPUUtils/PipelineSYCLNativeCPU.cpp +++ b/llvm/lib/SYCLNativeCPUUtils/PipelineSYCLNativeCPU.cpp @@ -109,7 +109,7 @@ void llvm::sycl::utils::addSYCLNativeCPUBackendPasses( MPM.addPass(compiler::utils::ReplaceLocalModuleScopeVariablesPass()); MPM.addPass(AlwaysInlinerPass()); #endif - MPM.addPass(PrepareSYCLNativeCPUPass(OptLevel == OptimizationLevel::O3)); + MPM.addPass(PrepareSYCLNativeCPUPass(OptLevel)); #ifdef NATIVECPU_USE_OCK MPM.addPass(compiler::utils::DefineMuxBuiltinsPass()); #endif diff --git a/llvm/lib/SYCLNativeCPUUtils/PrepareSYCLNativeCPU.cpp b/llvm/lib/SYCLNativeCPUUtils/PrepareSYCLNativeCPU.cpp old mode 100644 new mode 100755 index 3d136b95407ce..43d1326e76fc1 --- a/llvm/lib/SYCLNativeCPUUtils/PrepareSYCLNativeCPU.cpp +++ b/llvm/lib/SYCLNativeCPUUtils/PrepareSYCLNativeCPU.cpp @@ -404,7 +404,8 @@ PreservedAnalyses PrepareSYCLNativeCPUPass::run(Module &M, auto *NewF = cloneFunctionAndAddParam(OldF, StatePtrType, CurrentStatePointerTLS); NewF->setLinkage(llvm::GlobalValue::LinkageTypes::LinkOnceODRLinkage); - if (O3 && !NewF->hasFnAttribute(Attribute::NoInline)) { + if (OptLevel == OptimizationLevel::O3 && + !NewF->hasFnAttribute(Attribute::NoInline)) { if (!NewF->hasFnAttribute(Attribute::AlwaysInline)) NewF->addFnAttr(Attribute::AlwaysInline); } From e6b90e1c45e3266daa6631c3f0e9a248f1d5054b Mon Sep 17 00:00:00 2001 From: Uwe Dolinsky Date: Thu, 25 Sep 2025 11:03:53 +0100 Subject: [PATCH 5/5] [NATIVECPU] file mode fix --- llvm/lib/SYCLNativeCPUUtils/PrepareSYCLNativeCPU.cpp | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100755 => 100644 llvm/lib/SYCLNativeCPUUtils/PrepareSYCLNativeCPU.cpp diff --git a/llvm/lib/SYCLNativeCPUUtils/PrepareSYCLNativeCPU.cpp b/llvm/lib/SYCLNativeCPUUtils/PrepareSYCLNativeCPU.cpp old mode 100755 new mode 100644