|
12 | 12 | #[import(cc = "thorin")] fn cmpxchg_weak[T](_addr: &mut T, _cmp: T, _new: T, _success_order: u32, _failure_order: u32, _scope: &[u8]) -> (T, bool); // only for integer data types
|
13 | 13 | #[import(cc = "thorin")] fn fence(_order: u32, _scope: &[u8]) -> ();
|
14 | 14 | #[import(cc = "thorin")] fn pe_info[T](_src: &[u8], _val: T) -> ();
|
15 |
| -#[import(cc = "thorin")] fn cuda(_dev: i32, _grid: (i32, i32, i32), _block: (i32, i32, i32), _body: fn() -> ()) -> (); |
16 |
| -#[import(cc = "thorin")] fn nvvm(_dev: i32, _grid: (i32, i32, i32), _block: (i32, i32, i32), _body: fn() -> ()) -> (); |
17 |
| -#[import(cc = "thorin")] fn opencl(_dev: i32, _grid: (i32, i32, i32), _block: (i32, i32, i32), _body: fn() -> ()) -> (); |
18 |
| -#[import(cc = "thorin")] fn amdgpu_hsa(_dev: i32, _grid: (i32, i32, i32), _block: (i32, i32, i32), _body: fn() -> ()) -> (); |
19 |
| -#[import(cc = "thorin")] fn amdgpu_pal(_dev: i32, _grid: (i32, i32, i32), _block: (i32, i32, i32), _body: fn() -> ()) -> (); |
| 15 | +#[import(cc = "thorin", name = "cuda")] fn cuda_with_lmem(_dev: i32, _grid: (i32, i32, i32), _block: (i32, i32, i32), i32, _body: fn() -> ()) -> (); |
| 16 | +#[import(cc = "thorin", name = "nvvm")] fn nvvm_with_lmem(_dev: i32, _grid: (i32, i32, i32), _block: (i32, i32, i32), i32, _body: fn() -> ()) -> (); |
| 17 | +#[import(cc = "thorin", name = "opencl")] fn opencl_with_lmem(_dev: i32, _grid: (i32, i32, i32), _block: (i32, i32, i32), i32, _body: fn() -> ()) -> (); |
| 18 | +#[import(cc = "thorin", name = "amdgpu_hsa")] fn amdgpu_hsa_with_lmem(_dev: i32, _grid: (i32, i32, i32), _block: (i32, i32, i32), i32, _body: fn() -> ()) -> (); |
| 19 | +#[import(cc = "thorin", name = "amdgpu_pal")] fn amdgpu_pal_with_lmem(_dev: i32, _grid: (i32, i32, i32), _block: (i32, i32, i32), i32, _body: fn() -> ()) -> (); |
| 20 | +#[import(cc = "thorin")] fn local_memory_base() -> &mut addrspace(3)[u8]; |
20 | 21 | #[import(cc = "thorin")] fn reserve_shared[T](_size: i32) -> &mut addrspace(3)[T];
|
21 | 22 | #[import(cc = "thorin")] fn hls(_dev: i32, _body: fn() -> ()) -> ();
|
22 | 23 | #[import(cc = "thorin", name = "pipeline")] fn thorin_pipeline(_initiation_interval: i32, _lower: i32, _upper: i32, _body: fn(i32) -> ()) -> (); // only for HLS/OpenCL backend
|
|
36 | 37 | #[import(cc = "thorin", name = "cmpxchg_weak")] fn cmpxchg_weak_p1[T](_addr: &mut addrspace(1)T, _cmp: T, _new: T, _success_order: u32, _failure_order: u32, _scope: &[u8]) -> (T, bool);
|
37 | 38 | #[import(cc = "thorin", name = "cmpxchg_weak")] fn cmpxchg_weak_p3[T](_addr: &mut addrspace(3)T, _cmp: T, _new: T, _success_order: u32, _failure_order: u32, _scope: &[u8]) -> (T, bool);
|
38 | 39 |
|
| 40 | +fn @cuda(dev: i32, grid: (i32, i32, i32), block: (i32, i32, i32), body: fn() -> ()) = cuda_with_lmem(dev, grid, block, 0, body); |
| 41 | +fn @nvvm(dev: i32, grid: (i32, i32, i32), block: (i32, i32, i32), body: fn() -> ()) = nvvm_with_lmem(dev, grid, block, 0, body); |
| 42 | +fn @opencl(dev: i32, grid: (i32, i32, i32), block: (i32, i32, i32), body: fn() -> ()) = opencl_with_lmem(dev, grid, block, 0, body); |
| 43 | +fn @amdgpu_hsa(dev: i32, grid: (i32, i32, i32), block: (i32, i32, i32), body: fn() -> ()) = amdgpu_hsa_with_lmem(dev, grid, block, 0, body); |
| 44 | +fn @amdgpu_pal(dev: i32, grid: (i32, i32, i32), block: (i32, i32, i32), body: fn() -> ()) = amdgpu_pal_with_lmem(dev, grid, block, 0, body); |
| 45 | + |
39 | 46 | fn @pipeline(body: fn(i32) -> ()) = @|initiation_interval: i32, lower: i32, upper: i32| thorin_pipeline(initiation_interval, lower, upper, body);
|
40 | 47 | fn @parallel(body: fn(i32) -> ()) = @|num_threads: i32, lower: i32, upper: i32| thorin_parallel(num_threads, lower, upper, body);
|
41 | 48 | fn @spawn(body: fn() -> ()) = @|| thorin_spawn(body);
|
0 commit comments