|
12 | 12 | #[import(cc = "thorin")] fn cmpxchg_weak[T](_addr: &mut T, _cmp: T, _new: T, _success_order: u32, _failure_order: u32, _scope: &[u8]) -> (T, bool); // only for integer data types
|
13 | 13 | #[import(cc = "thorin")] fn fence(_order: u32, _scope: &[u8]) -> ();
|
14 | 14 | #[import(cc = "thorin")] fn pe_info[T](_src: &[u8], _val: T) -> ();
|
15 |
| -#[import(cc = "thorin")] fn cuda(_dev: i32, _grid: (i32, i32, i32), _block: (i32, i32, i32), _body: fn() -> ()) -> (); |
16 |
| -#[import(cc = "thorin")] fn nvvm(_dev: i32, _grid: (i32, i32, i32), _block: (i32, i32, i32), _body: fn() -> ()) -> (); |
17 |
| -#[import(cc = "thorin")] fn opencl(_dev: i32, _grid: (i32, i32, i32), _block: (i32, i32, i32), _body: fn() -> ()) -> (); |
18 |
| -#[import(cc = "thorin")] fn amdgpu(_dev: i32, _grid: (i32, i32, i32), _block: (i32, i32, i32), _body: fn() -> ()) -> (); |
| 15 | +#[import(cc = "thorin", name = "cuda")] fn cuda_with_lmem(_dev: i32, _grid: (i32, i32, i32), _block: (i32, i32, i32), _lmem: i32, _body: fn() -> ()) -> (); |
| 16 | +#[import(cc = "thorin", name = "nvvm")] fn nvvm_with_lmem(_dev: i32, _grid: (i32, i32, i32), _block: (i32, i32, i32), _lmem: i32, _body: fn() -> ()) -> (); |
| 17 | +#[import(cc = "thorin", name = "opencl")] fn opencl_with_lmem(_dev: i32, _grid: (i32, i32, i32), _block: (i32, i32, i32), _lmem: i32, _body: fn() -> ()) -> (); |
| 18 | +#[import(cc = "thorin", name = "amdgpu")] fn amdgpu_with_lmem(_dev: i32, _grid: (i32, i32, i32), _block: (i32, i32, i32), _lmem: i32, _body: fn() -> ()) -> (); |
| 19 | +#[import(cc = "thorin")] fn local_memory() -> &mut addrspace(3)[u8]; |
19 | 20 | #[import(cc = "thorin")] fn reserve_shared[T](_size: i32) -> &mut addrspace(3)[T];
|
20 | 21 | #[import(cc = "thorin")] fn hls(_dev: i32, _body: fn() -> ()) -> ();
|
21 | 22 | #[import(cc = "thorin", name = "pipeline")] fn thorin_pipeline(_initiation_interval: i32, _lower: i32, _upper: i32, _body: fn(i32) -> ()) -> (); // only for HLS/OpenCL backend
|
|
35 | 36 | #[import(cc = "thorin", name = "cmpxchg_weak")] fn cmpxchg_weak_p1[T](_addr: &mut addrspace(1)T, _cmp: T, _new: T, _success_order: u32, _failure_order: u32, _scope: &[u8]) -> (T, bool);
|
36 | 37 | #[import(cc = "thorin", name = "cmpxchg_weak")] fn cmpxchg_weak_p3[T](_addr: &mut addrspace(3)T, _cmp: T, _new: T, _success_order: u32, _failure_order: u32, _scope: &[u8]) -> (T, bool);
|
37 | 38 |
|
| 39 | +fn @cuda(dev: i32, grid: (i32, i32, i32), block: (i32, i32, i32), body: fn() -> ()) = cuda_with_lmem(dev, grid, block, 0, body); |
| 40 | +fn @nvvm(dev: i32, grid: (i32, i32, i32), block: (i32, i32, i32), body: fn() -> ()) = nvvm_with_lmem(dev, grid, block, 0, body); |
| 41 | +fn @opencl(dev: i32, grid: (i32, i32, i32), block: (i32, i32, i32), body: fn() -> ()) = opencl_with_lmem(dev, grid, block, 0, body); |
| 42 | +fn @amdgpu(dev: i32, grid: (i32, i32, i32), block: (i32, i32, i32), body: fn() -> ()) = amdgpu_with_lmem(dev, grid, block, 0, body); |
| 43 | + |
38 | 44 | fn @pipeline(body: fn(i32) -> ()) = @|initiation_interval: i32, lower: i32, upper: i32| thorin_pipeline(initiation_interval, lower, upper, body);
|
39 | 45 | fn @parallel(body: fn(i32) -> ()) = @|num_threads: i32, lower: i32, upper: i32| thorin_parallel(num_threads, lower, upper, body);
|
40 | 46 | fn @spawn(body: fn() -> ()) = @|| thorin_spawn(body);
|
0 commit comments