Skip to content
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions llvm/include/llvm/SYCLLowerIR/PrepareSYCLNativeCPU.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,10 @@ class ModulePass;

class PrepareSYCLNativeCPUPass
: public PassInfoMixin<PrepareSYCLNativeCPUPass> {
const bool O3;

public:
PrepareSYCLNativeCPUPass(bool O3) : O3(O3) {}
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM);
};

Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/SYCLNativeCPUUtils/PipelineSYCLNativeCPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ void llvm::sycl::utils::addSYCLNativeCPUBackendPasses(
MPM.addPass(compiler::utils::ReplaceLocalModuleScopeVariablesPass());
MPM.addPass(AlwaysInlinerPass());
#endif
MPM.addPass(PrepareSYCLNativeCPUPass());
MPM.addPass(PrepareSYCLNativeCPUPass(OptLevel == OptimizationLevel::O3));
#ifdef NATIVECPU_USE_OCK
MPM.addPass(compiler::utils::DefineMuxBuiltinsPass());
#endif
Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/SYCLNativeCPUUtils/PrepareSYCLNativeCPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -403,6 +403,11 @@ PreservedAnalyses PrepareSYCLNativeCPUPass::run(Module &M,
for (auto &OldF : OldKernels) {
auto *NewF =
cloneFunctionAndAddParam(OldF, StatePtrType, CurrentStatePointerTLS);
NewF->setLinkage(llvm::GlobalValue::LinkageTypes::LinkOnceODRLinkage);
if (O3 && !NewF->hasFnAttribute(Attribute::NoInline)) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this need to be limited to -O3 or could we do if (!NewF->hasOptNone() && !NewF->hasFnAttribute(Attribute::NoInline)) ?

(If we do want it to be limited to -O3, I think it'd be slightly better to have OptLevel as the field that we store in the pass, and check OptLevel == OptimizationLevel::O3 here, but I'm thinking we want it at other levels too.)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, do we know why the regular inliner doesn't handle this?

if (!NewF->hasFnAttribute(Attribute::AlwaysInline))
NewF->addFnAttr(Attribute::AlwaysInline);
}
NewF->takeName(OldF);
OldF->replaceAllUsesWith(NewF);
OldF->eraseFromParent();
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
// RUN: %clangxx -fsycl-device-only -fno-sycl-libspirv -Wno-unsafe-libspirv-not-linked -fsycl-targets=native_cpu -Xclang -sycl-std=2020 -mllvm -sycl-opt -S -emit-llvm -o %t_temp.ll %s
// RUN: %clangxx -mllvm -sycl-native-cpu-backend -S -emit-llvm -o - %t_temp.ll | FileCheck %s
// RUN: %clangxx -mllvm -sycl-native-cpu-backend -O3 -S -emit-llvm -o - %t_temp.ll | FileCheck %s --check-prefix=CHECK-O3
#include <sycl/sycl.hpp>

#include <iostream>
Expand Down Expand Up @@ -51,3 +52,9 @@ int main() {
// CHECK-DAG: @_ZTS6init_aIjE.NativeCPUKernel(ptr {{.*}}%0, ptr {{.*}}%1, i32 {{.*}}%2, ptr {{.*}}%3){{.*}}
// CHECK-DAG: @_ZTS6init_aIfE.NativeCPUKernel(ptr {{.*}}%0, ptr {{.*}}%1, float {{.*}}%2, ptr {{.*}}%3){{.*}}
// CHECK-DAG: @_ZTS6init_aIdE.NativeCPUKernel(ptr {{.*}}%0, ptr {{.*}}%1, double {{.*}}%2, ptr {{.*}}%3){{.*}}

// CHECK-O3-NOT: @{{.*}}.NativeCPUKernel
// CHECK-O3-DAG: define void @_ZTS6init_aIiE.SYCLNCPU(ptr {{.*}}%0, ptr addrspace(1) {{.*}}%1) {{.*}} #{{.*}} {
// CHECK-O3-DAG: define void @_ZTS6init_aIjE.SYCLNCPU(ptr {{.*}}%0, ptr addrspace(1) {{.*}}%1) {{.*}} #{{.*}} {
// CHECK-O3-DAG: define void @_ZTS6init_aIfE.SYCLNCPU(ptr {{.*}}%0, ptr addrspace(1) {{.*}}%1) {{.*}} #{{.*}} {
// CHECK-O3-DAG: define void @_ZTS6init_aIdE.SYCLNCPU(ptr {{.*}}%0, ptr addrspace(1) {{.*}}%1) {{.*}} #{{.*}} {
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
// find the original function after this pass is run

// CHECK: %localVarTypes = type { ptr addrspace(1) }
// CHECK: define void @_ZTS4TestILi1ELi4EiE.NativeCPUKernel{{.*}} #[[ATTR:[0-9]*]]
// CHECK: define linkonce_odr void @_ZTS4TestILi1ELi4EiE.NativeCPUKernel{{.*}} #[[ATTR:[0-9]*]]
// CHECK: alloca %localVarTypes
// CHECK: attributes #[[ATTR]] = {{.*}} "mux-orig-fn"="_ZTS4TestILi1ELi4EiE"

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ int main() {
// CHECK: call{{.*}}__dpcpp_nativecpu_get_global_id(i32 0, ptr addrspace(1) %2)
// CHECK-NOT: @llvm.threadlocal

// CHECK-TL: define void @_ZTSN4sycl3_V16detail19__pf_kernel_wrapperI5Test1EE.NativeCPUKernel({{.*}}
// CHECK-TL: define linkonce_odr void @_ZTSN4sycl3_V16detail19__pf_kernel_wrapperI5Test1EE.NativeCPUKernel({{.*}}
// CHECK-TL-NEXT:entry:
// CHECK-TL-NEXT: %[[VAL1:.*]] = call ptr addrspace(1) @llvm.threadlocal.address.p1(ptr addrspace(1) @_ZL28nativecpu_thread_local_state)
// CHECK-TL-NEXT: %[[VAL2:.*]] = load ptr addrspace(1), ptr addrspace(1) %[[VAL1]], align 8
Expand Down