From 190060bf09776c43022c64f69b816a2fb78d641c Mon Sep 17 00:00:00 2001 From: ManasviGoyal Date: Mon, 29 Jan 2024 15:45:21 +0200 Subject: [PATCH 01/18] feat: add cumulative sum CUDA kernels --- dev/generate-kernel-signatures.py | 4 + dev/generate-tests.py | 4 + kernel-test-data.json | 127 +++++++++++++++++++++++++- src/awkward/_connect/cuda/__init__.py | 4 + 4 files changed, 137 insertions(+), 2 deletions(-) diff --git a/dev/generate-kernel-signatures.py b/dev/generate-kernel-signatures.py index 39f9ddb690..fadc840ebc 100644 --- a/dev/generate-kernel-signatures.py +++ b/dev/generate-kernel-signatures.py @@ -12,12 +12,16 @@ cuda_kernels_impl = [ + "awkward_Index_nones_as_index", "awkward_ListArray_min_range", "awkward_ListArray_validity", "awkward_BitMaskedArray_to_ByteMaskedArray", "awkward_ListArray_compact_offsets", "awkward_ListOffsetArray_flatten_offsets", "awkward_IndexedArray_overlay_mask", + "awkward_ByteMaskedArray_numnull", + "awkward_IndexedArray_numnull", + "awkward_IndexedArray_numnull_parents", "awkward_IndexedArray_numnull_unique_64", "awkward_NumpyArray_fill", "awkward_ListArray_fill", diff --git a/dev/generate-tests.py b/dev/generate-tests.py index b98c4932b4..e1d7ef7041 100644 --- a/dev/generate-tests.py +++ b/dev/generate-tests.py @@ -652,12 +652,16 @@ def gencpuunittests(specdict): cuda_kernels_tests = [ + "awkward_Index_nones_as_index", "awkward_ListArray_min_range", "awkward_ListArray_validity", "awkward_BitMaskedArray_to_ByteMaskedArray", "awkward_ListArray_compact_offsets", "awkward_ListOffsetArray_flatten_offsets", "awkward_IndexedArray_overlay_mask", + "awkward_ByteMaskedArray_numnull", + "awkward_IndexedArray_numnull", + "awkward_IndexedArray_numnull_parents", "awkward_IndexedArray_numnull_unique_64", "awkward_NumpyArray_fill", "awkward_ListArray_fill", diff --git a/kernel-test-data.json b/kernel-test-data.json index 9c7dc92172..23e77a0df0 100644 --- a/kernel-test-data.json +++ b/kernel-test-data.json @@ -1138,7 +1138,7 @@ }, { "name": "awkward_ByteMaskedArray_numnull", - "status": false, + "status": true, "tests": [ { "error": false, @@ -1795,11 +1795,51 @@ }, { "name": "awkward_IndexedArray_numnull", - "status": false, + "status": true, "tests": [ { "error": false, "message": "", + "inputs": { + "fromindex": [1], + "lenindex": 1 + }, + "outputs": { + "numnull": [0] + } + }, + { + "error": false, + "inputs": { + "fromindex": [-1], + "lenindex": 1 + }, + "outputs": { + "numnull": [1] + } + }, + { + "error": false, + "inputs": { + "fromindex": [-1, -1, -1, -1], + "lenindex": 4 + }, + "outputs": { + "numnull": [4] + } + }, + { + "error": false, + "inputs": { + "fromindex": [0, -1, 2, -1, -1, -1, -1], + "lenindex": 7 + }, + "outputs": { + "numnull": [5] + } + }, + { + "error": false, "inputs": { "fromindex": [0, 1], "lenindex": 2 @@ -1942,6 +1982,89 @@ } ] }, + { + "name": "awkward_IndexedArray_numnull_parents", + "status": true, + "tests": [ + { + "error": false, + "inputs": { + "fromindex": [1], + "lenindex": 1 + }, + "outputs": { + "numnull": [0], + "tolength": [0] + } + }, + { + "error": false, + "inputs": { + "fromindex": [-1], + "lenindex": 1 + }, + "outputs": { + "numnull": [1], + "tolength": [1] + } + }, + { + "error": false, + "inputs": { + "fromindex": [-1, -1, -1, -1], + "lenindex": 4 + }, + "outputs": { + "numnull": [1, 1, 1, 1], + "tolength": [4] + } + }, + { + "error": false, + "inputs": { + "fromindex": [0, -1, 2, -1, -1, -1, -1], + "lenindex": 7 + }, + "outputs": { + "numnull": [0, 1, 0, 1, 1, 1, 1], + "tolength": [5] + } + }, + { + "error": false, + "inputs": { + "fromindex": [0, 1], + "lenindex": 2 + }, + "outputs": { + "numnull": [0, 0], + "tolength": [0] + } + }, + { + "error": false, + "inputs": { + "fromindex": [0, 1, 2, 3], + "lenindex": 4 + }, + "outputs": { + "numnull": [0, 0, 0, 0], + "tolength": [0] + } + }, + { + "error": false, + "inputs": { + "fromindex": [0, 1, -2, 3, -4, 5, -6], + "lenindex": 7 + }, + "outputs": { + "numnull": [0, 0, 1, 0, 1, 0, 1], + "tolength": [3] + } + } + ] + }, { "name": "awkward_IndexedArray_numnull_unique_64", "status": true, diff --git a/src/awkward/_connect/cuda/__init__.py b/src/awkward/_connect/cuda/__init__.py index b3e3857d9c..fc237f4611 100644 --- a/src/awkward/_connect/cuda/__init__.py +++ b/src/awkward/_connect/cuda/__init__.py @@ -71,7 +71,11 @@ def fetch_template_specializations(kernel_dict): # These cuda kernels consist of multiple kernels that don't have templated # specializations of the same name (e.g. '_a', '_b'). kernel_exclusions = [ + "awkward_Index_nones_as_index", "awkward_ByteMaskedArray_getitem_nextcarry", + "awkward_ByteMaskedArray_numnull", + "awkward_IndexedArray_numnull", + "awkward_IndexedArray_numnull_parents", "awkward_ByteMaskedArray_getitem_nextcarry_outindex", "awkward_ByteMaskedArray_reduce_next_64", "awkward_ByteMaskedArray_reduce_next_nonlocal_nextshifts_64", From 0299babb64277cf9d70d46d78e979f55f6b819cb Mon Sep 17 00:00:00 2001 From: ManasviGoyal Date: Tue, 30 Jan 2024 10:31:25 +0100 Subject: [PATCH 02/18] feat: add CUDA kernels (need to be fixed) --- .../awkward_ByteMaskedArray_numnull.cu | 50 +++++++++++++++ .../awkward_Index_nones_as_index.cu | 56 +++++++++++++++++ .../awkward_IndexedArray_numnull.cu | 47 ++++++++++++++ .../awkward_IndexedArray_numnull_parents.cu | 61 +++++++++++++++++++ 4 files changed, 214 insertions(+) create mode 100644 src/awkward/_connect/cuda/cuda_kernels/awkward_ByteMaskedArray_numnull.cu create mode 100644 src/awkward/_connect/cuda/cuda_kernels/awkward_Index_nones_as_index.cu create mode 100644 src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_numnull.cu create mode 100644 src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_numnull_parents.cu diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_ByteMaskedArray_numnull.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_ByteMaskedArray_numnull.cu new file mode 100644 index 0000000000..8adc4a9c60 --- /dev/null +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_ByteMaskedArray_numnull.cu @@ -0,0 +1,50 @@ +// BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE + +// BEGIN PYTHON +// def f(grid, block, args): +// (numnull, mask, length, validwhen, invocation_index, err_code) = args +// scan_in_array = cupy.empty(length, dtype=cupy.int64) +// cuda_kernel_templates.get_function(fetch_specialization(['awkward_ByteMaskedArray_numnull_a', numnull.dtype, mask.dtype]))(grid, block, (numnull, mask, length, validwhen, scan_in_array, invocation_index, err_code)) +// scan_in_array = inclusive_scan(grid, block, (scan_in_array, invocation_index, err_code)) +// cuda_kernel_templates.get_function(fetch_specialization(['awkward_ByteMaskedArray_numnull_b', numnull.dtype, mask.dtype]))(grid, block, (numnull, mask, length, validwhen, scan_in_array, invocation_index, err_code)) +// out["awkward_ByteMaskedArray_numnull_a", {dtype_specializations}] = None +// out["awkward_ByteMaskedArray_numnull_b", {dtype_specializations}] = None +// END PYTHON + +template +__global__ void +awkward_ByteMaskedArray_numnull_a(T* numnull, + const C* mask, + int64_t length, + bool validwhen, + int64_t* scan_in_array, + uint64_t invocation_index, + uint64_t* err_code) { + if (err_code[0] == NO_ERROR) { + int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; + + if (thread_id < length) { + *numnull = 0; + if ((mask[thread_id] != 0) != validwhen) { + scan_in_array[thread_id] = 1; + } + else { + scan_in_array[thread_id] = 0; + } + } + } +} + +template +__global__ void +awkward_ByteMaskedArray_numnull_b(T* numnull, + const C* mask, + int64_t length, + bool validwhen, + int64_t* scan_in_array, + uint64_t invocation_index, + uint64_t* err_code) { + if (err_code[0] == NO_ERROR) { + *numnull = scan_in_array[length - 1]; + } +} diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_Index_nones_as_index.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_Index_nones_as_index.cu new file mode 100644 index 0000000000..20e657f9cd --- /dev/null +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_Index_nones_as_index.cu @@ -0,0 +1,56 @@ +// BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE + +// BEGIN PYTHON +// def f(grid, block, args): +// (toindex, length, invocation_index, err_code) = args +// scan_in_array = cupy.empty(length, dtype=cupy.int64) +// scan_in_array_n_non_null = cupy.empty(length, dtype=cupy.int64) +// cuda_kernel_templates.get_function(fetch_specialization(["awkward_Index_nones_as_index_a", toindex.dtype]))(grid, block, (toindex, length, scan_in_array, scan_in_array_n_non_null, invocation_index, err_code)) +// scan_in_array = inclusive_scan(grid, block, (scan_in_array, invocation_index, err_code)) +// scan_in_array_n_non_null = inclusive_scan(grid, block, (scan_in_array_n_non_null, invocation_index, err_code)) +// cuda_kernel_templates.get_function(fetch_specialization(["awkward_Index_nones_as_index_b", toindex.dtype]))(grid, block, (toindex, length, scan_in_array, scan_in_array_n_non_null, invocation_index, err_code)) +// out["awkward_Index_nones_as_index_a", {dtype_specializations}] = None +// out["awkward_Index_nones_as_index_b", {dtype_specializations}] = None +// END PYTHON + +template +__global__ void +awkward_Index_nones_as_index_a(T* toindex, + int64_t length, + int64_t* scan_in_array, + int64_t* scan_in_array_n_non_null, + uint64_t invocation_index, + uint64_t* err_code) { + if (err_code[0] == NO_ERROR) { + int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; + if (thread_id < length) { + if (toindex[thread_id] != -1) { + scan_in_array[thread_id] = 1; + scan_in_array_n_non_null[thread_id] = 0; + } + else { + scan_in_array_n_non_null[thread_id] = 1; + scan_in_array[thread_id] = 0; + } + } + } +} + +template +__global__ void +awkward_Index_nones_as_index_b(T* toindex, + int64_t length, + int64_t* scan_in_array, + int64_t* scan_in_array_n_non_null, + uint64_t invocation_index, + uint64_t* err_code) { + if (err_code[0] == NO_ERROR) { + int64_t n_non_null = scan_in_array[length - 1]; + int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; + if (thread_id < length) { + toindex[thread_id] == -1 ? toindex[thread_id] = (n_non_null + scan_in_array_n_non_null[thread_id] - 1): toindex[thread_id]; + } + } +} + +// fails for [-1] diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_numnull.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_numnull.cu new file mode 100644 index 0000000000..831742fcee --- /dev/null +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_numnull.cu @@ -0,0 +1,47 @@ +// BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE + +// BEGIN PYTHON +// def f(grid, block, args): +// (numnull, fromindex, lenindex, invocation_index, err_code) = args +// scan_in_array = cupy.empty(lenindex, dtype=cupy.int64) +// cuda_kernel_templates.get_function(fetch_specialization(['awkward_IndexedArray_numnull_a', numnull.dtype, fromindex.dtype]))(grid, block, (numnull, fromindex, lenindex, scan_in_array, invocation_index, err_code)) +// scan_in_array = inclusive_scan(grid, block, (scan_in_array, invocation_index, err_code)) +// cuda_kernel_templates.get_function(fetch_specialization(['awkward_IndexedArray_numnull_b', numnull.dtype, fromindex.dtype]))(grid, block, (numnull, fromindex, lenindex, scan_in_array, invocation_index, err_code)) +// out["awkward_IndexedArray_numnull_a", {dtype_specializations}] = None +// out["awkward_IndexedArray_numnull_b", {dtype_specializations}] = None +// END PYTHON + +template +__global__ void +awkward_IndexedArray_numnull_a(T* numnull, + const C* fromindex, + int64_t lenindex, + int64_t* scan_in_array, + uint64_t invocation_index, + uint64_t* err_code) { + if (err_code[0] == NO_ERROR) { + int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; + + if (thread_id < lenindex) { + if (fromindex[thread_id] < 0) { + scan_in_array[thread_id] = 1; + } + else { + scan_in_array[thread_id] = 0; + } + } + } +} + +template +__global__ void +awkward_IndexedArray_numnull_b(T* numnull, + const C* fromindex, + int64_t lenindex, + int64_t* scan_in_array, + uint64_t invocation_index, + uint64_t* err_code) { + if (err_code[0] == NO_ERROR) { + *numnull = scan_in_array[lenindex - 1]; + } +} diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_numnull_parents.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_numnull_parents.cu new file mode 100644 index 0000000000..afbd0f0f19 --- /dev/null +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_numnull_parents.cu @@ -0,0 +1,61 @@ +// BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE + +// BEGIN PYTHON +// def f(grid, block, args): +// (numnull, tolength, fromindex, lenindex, invocation_index, err_code) = args +// scan_in_array = cupy.empty(lenindex, dtype=cupy.int64) +// cuda_kernel_templates.get_function(fetch_specialization(['awkward_IndexedArray_numnull_parents_a', numnull.dtype, tolength.dtype, fromindex.dtype]))(grid, block, (numnull, tolength, fromindex, lenindex, scan_in_array, invocation_index, err_code)) +// scan_in_array = inclusive_scan(grid, block, (scan_in_array, invocation_index, err_code)) +// cuda_kernel_templates.get_function(fetch_specialization(['awkward_IndexedArray_numnull_parents_b', numnull.dtype, tolength.dtype, fromindex.dtype]))(grid, block, (numnull, tolength, fromindex, lenindex, scan_in_array, invocation_index, err_code)) +// out["awkward_IndexedArray_numnull_parents_a", {dtype_specializations}] = None +// out["awkward_IndexedArray_numnull_parents_b", {dtype_specializations}] = None +// END PYTHON + +template +__global__ void +awkward_IndexedArray_numnull_parents_a(T* numnull, + U* tolength, + const C* fromindex, + int64_t lenindex, + int64_t* scan_in_array, + uint64_t invocation_index, + uint64_t* err_code) { + if (err_code[0] == NO_ERROR) { + int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; + + if (thread_id < lenindex) { + if (fromindex[thread_id] < 0) { + scan_in_array[thread_id] = 1; + } + else { + scan_in_array[thread_id] = 0; + } + } + } +} + +template +__global__ void +awkward_IndexedArray_numnull_parents_b(T* numnull, + U* tolength, + const C* fromindex, + int64_t lenindex, + int64_t* scan_in_array, + uint64_t invocation_index, + uint64_t* err_code) { + if (err_code[0] == NO_ERROR) { + int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; + + if (thread_id < lenindex) { + if (fromindex[thread_id] < 0) { + numnull[thread_id] = 1; + } + else { + numnull[thread_id] = 0; + } + } + *tolength = scan_in_array[lenindex - 1]; + } +} + +// fails for [-1] From 432a11d6d71c4ccdca650ac66511a3435403063b Mon Sep 17 00:00:00 2001 From: ManasviGoyal Date: Tue, 30 Jan 2024 15:24:32 +0100 Subject: [PATCH 03/18] feat: add more kernels with cumulative sum --- dev/generate-kernel-signatures.py | 3 ++ dev/generate-tests.py | 3 ++ src/awkward/_connect/cuda/__init__.py | 3 ++ .../awkward_ByteMaskedArray_numnull.cu | 3 +- .../awkward_IndexedArray_numnull.cu | 22 ++++----- .../awkward_IndexedArray_numnull_parents.cu | 2 +- ...kward_ListArray_getitem_jagged_carrylen.cu | 46 +++++++++++++++++ ...ard_ListArray_getitem_next_range_counts.cu | 46 +++++++++++++++++ ...rd_ListArray_rpad_and_clip_length_axis1.cu | 49 +++++++++++++++++++ 9 files changed, 163 insertions(+), 14 deletions(-) create mode 100644 src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_getitem_jagged_carrylen.cu create mode 100644 src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_getitem_next_range_counts.cu create mode 100644 src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_rpad_and_clip_length_axis1.cu diff --git a/dev/generate-kernel-signatures.py b/dev/generate-kernel-signatures.py index fadc840ebc..f0a100dfa2 100644 --- a/dev/generate-kernel-signatures.py +++ b/dev/generate-kernel-signatures.py @@ -50,9 +50,12 @@ "awkward_missing_repeat", "awkward_RegularArray_getitem_jagged_expand", "awkward_ListArray_getitem_jagged_expand", + "awkward_ListArray_getitem_jagged_carrylen", "awkward_ListArray_getitem_next_array_advanced", "awkward_ListArray_getitem_next_array", "awkward_ListArray_getitem_next_at", + "awkward_ListArray_getitem_next_range_counts", + "awkward_ListArray_rpad_and_clip_length_axis1", "awkward_NumpyArray_reduce_adjust_starts_64", "awkward_NumpyArray_reduce_adjust_starts_shifts_64", "awkward_RegularArray_getitem_next_at", diff --git a/dev/generate-tests.py b/dev/generate-tests.py index e1d7ef7041..32be2fdab6 100644 --- a/dev/generate-tests.py +++ b/dev/generate-tests.py @@ -690,9 +690,12 @@ def gencpuunittests(specdict): "awkward_missing_repeat", "awkward_RegularArray_getitem_jagged_expand", "awkward_ListArray_getitem_jagged_expand", + "awkward_ListArray_getitem_jagged_carrylen", "awkward_ListArray_getitem_next_array_advanced", "awkward_ListArray_getitem_next_array", "awkward_ListArray_getitem_next_at", + "awkward_ListArray_getitem_next_range_counts", + "awkward_ListArray_rpad_and_clip_length_axis1", "awkward_NumpyArray_reduce_adjust_starts_64", "awkward_NumpyArray_reduce_adjust_starts_shifts_64", "awkward_RegularArray_getitem_next_at", diff --git a/src/awkward/_connect/cuda/__init__.py b/src/awkward/_connect/cuda/__init__.py index fc237f4611..5a7fb3987e 100644 --- a/src/awkward/_connect/cuda/__init__.py +++ b/src/awkward/_connect/cuda/__init__.py @@ -84,12 +84,15 @@ def fetch_template_specializations(kernel_dict): "awkward_IndexedArray_flatten_nextcarry", "awkward_IndexedArray_getitem_nextcarry", "awkward_IndexedArray_getitem_nextcarry_outindex", + "awkward_ListArray_getitem_next_range_counts", "awkward_IndexedArray_index_of_nulls", "awkward_IndexedArray_reduce_next_64", "awkward_IndexedArray_reduce_next_nonlocal_nextshifts_64", "awkward_IndexedArray_reduce_next_nonlocal_nextshifts_fromshifts_64", "awkward_IndexedOptionArray_rpad_and_clip_mask_axis1", "awkward_ListArray_compact_offsets", + "awkward_ListArray_getitem_jagged_carrylen", + "awkward_ListArray_rpad_and_clip_length_axis1", "awkward_MaskedArray_getitem_next_jagged_project", "awkward_UnionArray_project", "awkward_reduce_count_64", diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_ByteMaskedArray_numnull.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_ByteMaskedArray_numnull.cu index 8adc4a9c60..50a7fe0d7f 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_ByteMaskedArray_numnull.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_ByteMaskedArray_numnull.cu @@ -24,7 +24,6 @@ awkward_ByteMaskedArray_numnull_a(T* numnull, int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; if (thread_id < length) { - *numnull = 0; if ((mask[thread_id] != 0) != validwhen) { scan_in_array[thread_id] = 1; } @@ -45,6 +44,6 @@ awkward_ByteMaskedArray_numnull_b(T* numnull, uint64_t invocation_index, uint64_t* err_code) { if (err_code[0] == NO_ERROR) { - *numnull = scan_in_array[length - 1]; + *numnull = (T)scan_in_array[length - 1]; } } diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_numnull.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_numnull.cu index 831742fcee..54ae39b47d 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_numnull.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_numnull.cu @@ -14,11 +14,11 @@ template __global__ void awkward_IndexedArray_numnull_a(T* numnull, - const C* fromindex, - int64_t lenindex, - int64_t* scan_in_array, - uint64_t invocation_index, - uint64_t* err_code) { + const C* fromindex, + int64_t lenindex, + int64_t* scan_in_array, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; @@ -36,12 +36,12 @@ awkward_IndexedArray_numnull_a(T* numnull, template __global__ void awkward_IndexedArray_numnull_b(T* numnull, - const C* fromindex, - int64_t lenindex, - int64_t* scan_in_array, - uint64_t invocation_index, - uint64_t* err_code) { + const C* fromindex, + int64_t lenindex, + int64_t* scan_in_array, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { - *numnull = scan_in_array[lenindex - 1]; + *numnull = (T)scan_in_array[lenindex - 1]; } } diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_numnull_parents.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_numnull_parents.cu index afbd0f0f19..70c3f72571 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_numnull_parents.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_numnull_parents.cu @@ -54,7 +54,7 @@ awkward_IndexedArray_numnull_parents_b(T* numnull, numnull[thread_id] = 0; } } - *tolength = scan_in_array[lenindex - 1]; + *tolength = (T)scan_in_array[lenindex - 1]; } } diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_getitem_jagged_carrylen.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_getitem_jagged_carrylen.cu new file mode 100644 index 0000000000..1fd477b0e6 --- /dev/null +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_getitem_jagged_carrylen.cu @@ -0,0 +1,46 @@ +// BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE + +// BEGIN PYTHON +// def f(grid, block, args): +// (carrylen, slicestarts, slicestops, sliceouterlen, invocation_index, err_code) = args +// scan_in_array = cupy.empty(sliceouterlen, dtype=cupy.int64) +// cuda_kernel_templates.get_function(fetch_specialization(["awkward_ListArray_getitem_jagged_carrylen_a", carrylen.dtype, slicestarts.dtype, slicestops.dtype]))(grid, block, (carrylen, slicestarts, slicestops, sliceouterlen, scan_in_array, invocation_index, err_code)) +// scan_in_array = inclusive_scan(grid, block, (scan_in_array, invocation_index, err_code)) +// cuda_kernel_templates.get_function(fetch_specialization(["awkward_ListArray_getitem_jagged_carrylen_b", carrylen.dtype, slicestarts.dtype, slicestops.dtype]))(grid, block, (carrylen, slicestarts, slicestops, sliceouterlen, scan_in_array, invocation_index, err_code)) +// out["awkward_ListArray_getitem_jagged_carrylen_a", {dtype_specializations}] = None +// out["awkward_ListArray_getitem_jagged_carrylen_b", {dtype_specializations}] = None +// END PYTHON + +template +__global__ void +awkward_ListArray_getitem_jagged_carrylen_a(T* carrylen, + const C* slicestarts, + const U* slicestops, + int64_t sliceouterlen, + int64_t* scan_in_array, + uint64_t invocation_index, + uint64_t* err_code) { + if (err_code[0] == NO_ERROR) { + int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; + + if (thread_id < sliceouterlen) { + scan_in_array[thread_id] = (T)(slicestops[thread_id] - slicestarts[thread_id]); + } + } +} + +template +__global__ void +awkward_ListArray_getitem_jagged_carrylen_b(T* carrylen, + const C* slicestarts, + const U* slicestops, + int64_t sliceouterlen, + int64_t* scan_in_array, + uint64_t invocation_index, + uint64_t* err_code) { + if (err_code[0] == NO_ERROR) { + *carrylen = scan_in_array[sliceouterlen - 1]; + } +} + +// fails for sliceouterlen = 1 \ No newline at end of file diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_getitem_next_range_counts.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_getitem_next_range_counts.cu new file mode 100644 index 0000000000..ff4e621321 --- /dev/null +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_getitem_next_range_counts.cu @@ -0,0 +1,46 @@ +// BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE + +// BEGIN PYTHON +// def f(grid, block, args): +// (total, fromoffsets, lenstarts, invocation_total, err_code) = args +// scan_in_array = cupy.empty(lenstarts, dtype=cupy.int64) +// cuda_kernel_templates.get_function(fetch_specialization(["awkward_ListArray_getitem_next_range_counts_a", total.dtype, fromoffsets.dtype]))(grid, block, (total, fromoffsets, lenstarts, scan_in_array, invocation_total, err_code)) +// scan_in_array = inclusive_scan(grid, block, (scan_in_array, invocation_total, err_code)) +// cuda_kernel_templates.get_function(fetch_specialization(["awkward_ListArray_getitem_next_range_counts_b", total.dtype, fromoffsets.dtype]))(grid, block, (total, fromoffsets, lenstarts, scan_in_array, invocation_total, err_code)) +// out["awkward_ListArray_getitem_next_range_counts_a", {dtype_specializations}] = None +// out["awkward_ListArray_getitem_next_range_counts_b", {dtype_specializations}] = None +// END PYTHON + +template +__global__ void +awkward_ListArray_getitem_next_range_counts_a(T* total, + const C* fromoffsets, + int64_t lenstarts, + int64_t* scan_in_array, + uint64_t invocation_total, + uint64_t* err_code) { + if (err_code[0] == NO_ERROR) { + int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; + + if (thread_id < lenstarts) { + scan_in_array[thread_id] = (T)(fromoffsets[thread_id + 1] - fromoffsets[thread_id]); + } + } +} + +template +__global__ void +awkward_ListArray_getitem_next_range_counts_b(T* total, + const C* fromoffsets, + int64_t lenstarts, + int64_t* scan_in_array, + uint64_t invocation_total, + uint64_t* err_code) { + if (err_code[0] == NO_ERROR) { + int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; + + if (thread_id < lenstarts) { + *total = scan_in_array[lenstarts - 1]; + } + } +} diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_rpad_and_clip_length_axis1.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_rpad_and_clip_length_axis1.cu new file mode 100644 index 0000000000..54e2f8b6aa --- /dev/null +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_rpad_and_clip_length_axis1.cu @@ -0,0 +1,49 @@ +// BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE + +// BEGIN PYTHON +// def f(grid, block, args): +// (tomin, fromstarts, fromstops, target, lenstarts, invocation_index, err_code) = args +// scan_in_array = cupy.empty(lenstarts, dtype=cupy.int64) +// cuda_kernel_templates.get_function(fetch_specialization(["awkward_ListArray_rpad_and_clip_length_axis1_a", tomin.dtype, fromstarts.dtype, fromstops.dtype]))(grid, block, (tomin, fromstarts, fromstops, target, lenstarts, scan_in_array, invocation_index, err_code)) +// scan_in_array = inclusive_scan(grid, block, (scan_in_array, invocation_index, err_code)) +// cuda_kernel_templates.get_function(fetch_specialization(["awkward_ListArray_rpad_and_clip_length_axis1_b", tomin.dtype, fromstarts.dtype, fromstops.dtype]))(grid, block, (tomin, fromstarts, fromstops, target, lenstarts, scan_in_array, invocation_index, err_code)) +// out["awkward_ListArray_rpad_and_clip_length_axis1_a", {dtype_specializations}] = None +// out["awkward_ListArray_rpad_and_clip_length_axis1_b", {dtype_specializations}] = None +// END PYTHON + +template +__global__ void +awkward_ListArray_rpad_and_clip_length_axis1_a(T* tomin, + const C* fromstarts, + const U* fromstops, + int64_t target, + int64_t lenstarts, + int64_t* scan_in_array, + uint64_t invocation_index, + uint64_t* err_code) { + if (err_code[0] == NO_ERROR) { + int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; + + if (thread_id < lenstarts) { + int64_t rangeval = fromstops[thread_id] - fromstarts[thread_id]; + scan_in_array[thread_id] = (target > rangeval) ? target : rangeval; + } + } +} + +template +__global__ void +awkward_ListArray_rpad_and_clip_length_axis1_b(T* tomin, + const C* fromstarts, + const U* fromstops, + int64_t target, + int64_t lenstarts, + int64_t* scan_in_array, + uint64_t invocation_index, + uint64_t* err_code) { + if (err_code[0] == NO_ERROR) { + *tomin = scan_in_array[lenstarts - 1]; + } +} + +// fails for lenstarts = 1 \ No newline at end of file From fdcf96eb311fc1188555121948b33db57e94b6ad Mon Sep 17 00:00:00 2001 From: ManasviGoyal Date: Wed, 31 Jan 2024 17:12:21 +0100 Subject: [PATCH 04/18] added exclusive_scan function and add new cuda kernels --- dev/generate-kernel-signatures.py | 3 + dev/generate-tests.py | 45 +++- kernel-test-data.json | 232 +++++++++++++++++- src/awkward/_connect/cuda/__init__.py | 3 + .../awkward_ByteMaskedArray_numnull.cu | 4 +- .../awkward_Index_nones_as_index.cu | 4 +- .../awkward_IndexedArray_numnull.cu | 4 +- .../awkward_IndexedArray_numnull_parents.cu | 9 +- ...kward_ListArray_getitem_jagged_carrylen.cu | 8 +- ...ard_ListArray_getitem_next_range_counts.cu | 28 +-- ...rd_ListArray_rpad_and_clip_length_axis1.cu | 6 +- ...d_RegularArray_reduce_local_nextparents.cu | 46 ++++ ...egularArray_reduce_nonlocal_preparenext.cu | 51 ++++ .../awkward_sorting_ranges_length.cu | 49 ++++ .../_connect/cuda/cuda_kernels/cuda_common.cu | 57 +++++ 15 files changed, 505 insertions(+), 44 deletions(-) create mode 100644 src/awkward/_connect/cuda/cuda_kernels/awkward_RegularArray_reduce_local_nextparents.cu create mode 100644 src/awkward/_connect/cuda/cuda_kernels/awkward_RegularArray_reduce_nonlocal_preparenext.cu create mode 100644 src/awkward/_connect/cuda/cuda_kernels/awkward_sorting_ranges_length.cu diff --git a/dev/generate-kernel-signatures.py b/dev/generate-kernel-signatures.py index f0a100dfa2..6bd43476cf 100644 --- a/dev/generate-kernel-signatures.py +++ b/dev/generate-kernel-signatures.py @@ -47,6 +47,8 @@ "awkward_RegularArray_getitem_next_range", "awkward_RegularArray_getitem_next_range_spreadadvanced", "awkward_RegularArray_getitem_next_array", + "awkward_RegularArray_reduce_local_nextparents", + "awkward_RegularArray_reduce_nonlocal_preparenext", "awkward_missing_repeat", "awkward_RegularArray_getitem_jagged_expand", "awkward_ListArray_getitem_jagged_expand", @@ -93,6 +95,7 @@ "awkward_reduce_sum_bool", "awkward_reduce_prod_bool", "awkward_reduce_countnonzero", + "awkward_sorting_ranges_length", ] diff --git a/dev/generate-tests.py b/dev/generate-tests.py index 32be2fdab6..c3ac69fdf8 100644 --- a/dev/generate-tests.py +++ b/dev/generate-tests.py @@ -35,6 +35,13 @@ def __init__(self, name, typename, direction, role="default"): self.role = role +no_role_kernels = [ + "awkward_NumpyArray_sort_asstrings_uint8", + "awkward_argsort", + "awkward_sort", +] + + class Specification: def __init__(self, templatized_kernel_name, spec, testdata, blacklisted): self.templatized_kernel_name = templatized_kernel_name @@ -51,6 +58,8 @@ def __init__(self, templatized_kernel_name, spec, testdata, blacklisted): ) if blacklisted: self.tests = [] + elif templatized_kernel_name in no_role_kernels: + self.tests = [] else: self.tests = self.gettests(testdata) @@ -185,6 +194,7 @@ def gettests(self, testdata): def readspec(): specdict = {} + specdict_unit = {} with open(os.path.join(CURRENT_DIR, "..", "kernel-specification.yml")) as f: loadfile = yaml.load(f, Loader=yaml.CSafeLoader) @@ -193,6 +203,13 @@ def readspec(): data = json.load(f)["tests"] for spec in indspec: + for childfunc in spec["specializations"]: + specdict_unit[childfunc["name"]] = Specification( + spec["name"], + childfunc, + data, + not spec["automatic-tests"], + ) if "def " in spec["definition"]: for childfunc in spec["specializations"]: specdict[childfunc["name"]] = Specification( @@ -201,7 +218,7 @@ def readspec(): data, not spec["automatic-tests"], ) - return specdict + return specdict, specdict_unit def getdtypes(args): @@ -215,6 +232,8 @@ def getdtypes(args): typename = typename + "_" if count == 1: dtypes.append("cupy." + typename) + elif count == 2: + dtypes.append("cupy." + typename) return dtypes @@ -239,7 +258,12 @@ def checkintrange(test_args, error, args): if "int" in typename or "uint" in typename: dtype = gettypename(typename) min_val, max_val = np.iinfo(dtype).min, np.iinfo(dtype).max - if "List" in typename: + if "List[List" in typename: + for row in val: + for data in row: + if not (min_val <= data <= max_val): + flag = False + elif "List" in typename: for data in val: if not (min_val <= data <= max_val): flag = False @@ -687,6 +711,8 @@ def gencpuunittests(specdict): "awkward_RegularArray_getitem_next_range", "awkward_RegularArray_getitem_next_range_spreadadvanced", "awkward_RegularArray_getitem_next_array", + "awkward_RegularArray_reduce_local_nextparents", + "awkward_RegularArray_reduce_nonlocal_preparenext", "awkward_missing_repeat", "awkward_RegularArray_getitem_jagged_expand", "awkward_ListArray_getitem_jagged_expand", @@ -733,6 +759,7 @@ def gencpuunittests(specdict): "awkward_reduce_sum_bool", "awkward_reduce_prod_bool", "awkward_reduce_countnonzero", + "awkward_sorting_ranges_length", ] @@ -973,8 +1000,12 @@ def gencudaunittests(specdict): ) ) elif count == 2: - raise NotImplementedError - + f.write( + " " * 4 + + "{} = cupy.array({}, dtype=cupy.{})\n".format( + arg, val, typename + ) + ) cuda_string = ( "funcC = cupy_backend['" + spec.templatized_kernel_name @@ -1075,10 +1106,10 @@ def evalkernels(): if __name__ == "__main__": genpykernels() evalkernels() - specdict = readspec() + specdict, specdict_unit = readspec() genspectests(specdict) gencpukerneltests(specdict) - gencpuunittests(specdict) + gencpuunittests(specdict_unit) genunittests() gencudakerneltests(specdict) - gencudaunittests(specdict) + gencudaunittests(specdict_unit) diff --git a/kernel-test-data.json b/kernel-test-data.json index 23e77a0df0..b73947ab77 100644 --- a/kernel-test-data.json +++ b/kernel-test-data.json @@ -4427,6 +4427,79 @@ } ] }, + { + "name": "awkward_RegularArray_reduce_local_nextparents", + "status": true, + "tests": [ + { + "error": false, + "message": "", + "inputs": { + "size": 3, + "length": 2 + }, + "outputs": { + "nextparents": [0, 0, 0, 1, 1, 1] + } + }, + { + "error": false, + "message": "", + "inputs": { + "size": 1, + "length": 1 + }, + "outputs": { + "nextparents": [0] + } + } + ] + }, + { + "name": "awkward_RegularArray_reduce_nonlocal_preparenext", + "status": true, + "tests": [ + { + "error": false, + "message": "", + "inputs": { + "parents": [0, 1], + "size": 3, + "length": 2 + }, + "outputs": { + "nextcarry": [0, 3, 1, 4, 2, 5], + "nextparents": [0, 3, 1, 4, 2, 5] + } + }, + { + "error": false, + "message": "", + "inputs": { + "parents": [2, 4, 6], + "size": 3, + "length": 3 + }, + "outputs": { + "nextcarry": [0, 3, 6, 1, 4, 7, 2, 5, 8], + "nextparents": [6, 12, 18, 7, 13, 19, 8, 14, 20] + } + }, + { + "error": false, + "message": "", + "inputs": { + "parents": [0], + "size": 1, + "length": 1 + }, + "outputs": { + "nextcarry": [0], + "nextparents": [0] + } + } + ] + }, { "name": "awkward_RegularArray_localindex", "status": true, @@ -11772,6 +11845,72 @@ } ] }, + { + "name": "awkward_ListArray_getitem_jagged_carrylen", + "status": true, + "tests": [ + { + "error": false, + "message": "", + "inputs": { + "slicestarts": [0, 2], + "slicestops": [0, 2], + "sliceouterlen": 2 + }, + "outputs": { + "carrylen": [0] + } + }, + { + "error": false, + "message": "", + "inputs": { + "slicestarts": [0, 2], + "slicestops": [2, 5], + "sliceouterlen": 2 + }, + "outputs": { + "carrylen": [5] + } + }, + { + "error": false, + "message": "", + "inputs": { + "slicestarts": [2], + "slicestops": [4], + "sliceouterlen": 1 + }, + "outputs": { + "carrylen": [2] + } + }, + { + "error": false, + "message": "", + "inputs": { + "slicestarts": [0, 1, 3, 5, 7], + "slicestops": [1, 3, 5, 7, 9], + "sliceouterlen": 5 + }, + "outputs": { + "carrylen": [9] + } + }, + { + "error": false, + "message": "", + "inputs": { + "slicestarts": [1], + "slicestops": [1], + "sliceouterlen": 1 + }, + "outputs": { + "carrylen": [0] + } + } + ] + }, { "name": "awkward_ListArray_getitem_jagged_expand", "status": false, @@ -13743,6 +13882,25 @@ } ] }, + { + "name": "awkward_UnionArray_flatten_length", + "status": false, + "tests": [ + { + "error": false, + "message": "", + "inputs": { + "fromtags": [0, 0, 0, 0], + "fromindex": [0, 1, 2, 3], + "length": 4, + "offsetsraws": [[0, 1, 3, 5, 7], [1, 3, 5, 7, 9]] + }, + "outputs": { + "total_length": [7] + } + } + ] + }, { "name": "awkward_UnionArray_validity", "status": true, @@ -15814,7 +15972,7 @@ }, { "name": "awkward_ListArray_getitem_next_range_counts", - "status": false, + "status": true, "tests": [ { "error": false, @@ -15837,6 +15995,28 @@ "outputs": { "total": [9] } + }, + { + "error": false, + "message": "", + "inputs": { + "fromoffsets": [0, 0, 0, 0], + "lenstarts": 3 + }, + "outputs": { + "total": [0] + } + }, + { + "error": false, + "message": "", + "inputs": { + "fromoffsets": [0, 3], + "lenstarts": 1 + }, + "outputs": { + "total": [3] + } } ] }, @@ -22064,6 +22244,56 @@ } } ] + }, + { + "name": "awkward_sorting_ranges_length", + "status": true, + "tests": [ + { + "error": false, + "message": "", + "inputs": { + "parents": [0, 1], + "parentslength": 2 + }, + "outputs": { + "tolength": [3] + } + }, + { + "error": false, + "message": "", + "inputs": { + "parents": [0, 3, 6, 9], + "parentslength": 4 + }, + "outputs": { + "tolength": [5] + } + }, + { + "error": false, + "message": "", + "inputs": { + "parents": [3, 3, 3, 3], + "parentslength": 4 + }, + "outputs": { + "tolength": [2] + } + }, + { + "error": false, + "message": "", + "inputs": { + "parents": [2, 4, 4], + "parentslength": 3 + }, + "outputs": { + "tolength": [3] + } + } + ] } ] } diff --git a/src/awkward/_connect/cuda/__init__.py b/src/awkward/_connect/cuda/__init__.py index 5a7fb3987e..40a5702889 100644 --- a/src/awkward/_connect/cuda/__init__.py +++ b/src/awkward/_connect/cuda/__init__.py @@ -94,6 +94,8 @@ def fetch_template_specializations(kernel_dict): "awkward_ListArray_getitem_jagged_carrylen", "awkward_ListArray_rpad_and_clip_length_axis1", "awkward_MaskedArray_getitem_next_jagged_project", + "awkward_RegularArray_reduce_local_nextparents", + "awkward_RegularArray_reduce_nonlocal_preparenext", "awkward_UnionArray_project", "awkward_reduce_count_64", "awkward_reduce_sum", @@ -106,6 +108,7 @@ def fetch_template_specializations(kernel_dict): "awkward_reduce_countnonzero", "awkward_reduce_max", "awkward_reduce_min", + "awkward_sorting_ranges_length", ] template_specializations = [] import re diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_ByteMaskedArray_numnull.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_ByteMaskedArray_numnull.cu index 50a7fe0d7f..d22d8709dc 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_ByteMaskedArray_numnull.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_ByteMaskedArray_numnull.cu @@ -5,7 +5,7 @@ // (numnull, mask, length, validwhen, invocation_index, err_code) = args // scan_in_array = cupy.empty(length, dtype=cupy.int64) // cuda_kernel_templates.get_function(fetch_specialization(['awkward_ByteMaskedArray_numnull_a', numnull.dtype, mask.dtype]))(grid, block, (numnull, mask, length, validwhen, scan_in_array, invocation_index, err_code)) -// scan_in_array = inclusive_scan(grid, block, (scan_in_array, invocation_index, err_code)) +// scan_in_array = exclusive_scan(grid, block, (scan_in_array, invocation_index, err_code)) // cuda_kernel_templates.get_function(fetch_specialization(['awkward_ByteMaskedArray_numnull_b', numnull.dtype, mask.dtype]))(grid, block, (numnull, mask, length, validwhen, scan_in_array, invocation_index, err_code)) // out["awkward_ByteMaskedArray_numnull_a", {dtype_specializations}] = None // out["awkward_ByteMaskedArray_numnull_b", {dtype_specializations}] = None @@ -44,6 +44,6 @@ awkward_ByteMaskedArray_numnull_b(T* numnull, uint64_t invocation_index, uint64_t* err_code) { if (err_code[0] == NO_ERROR) { - *numnull = (T)scan_in_array[length - 1]; + *numnull = scan_in_array[length - 1]; } } diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_Index_nones_as_index.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_Index_nones_as_index.cu index 20e657f9cd..6ec8d96685 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_Index_nones_as_index.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_Index_nones_as_index.cu @@ -6,8 +6,8 @@ // scan_in_array = cupy.empty(length, dtype=cupy.int64) // scan_in_array_n_non_null = cupy.empty(length, dtype=cupy.int64) // cuda_kernel_templates.get_function(fetch_specialization(["awkward_Index_nones_as_index_a", toindex.dtype]))(grid, block, (toindex, length, scan_in_array, scan_in_array_n_non_null, invocation_index, err_code)) -// scan_in_array = inclusive_scan(grid, block, (scan_in_array, invocation_index, err_code)) -// scan_in_array_n_non_null = inclusive_scan(grid, block, (scan_in_array_n_non_null, invocation_index, err_code)) +// scan_in_array = exclusive_scan(grid, block, (scan_in_array, invocation_index, err_code)) +// scan_in_array_n_non_null = exclusive_scan(grid, block, (scan_in_array_n_non_null, invocation_index, err_code)) // cuda_kernel_templates.get_function(fetch_specialization(["awkward_Index_nones_as_index_b", toindex.dtype]))(grid, block, (toindex, length, scan_in_array, scan_in_array_n_non_null, invocation_index, err_code)) // out["awkward_Index_nones_as_index_a", {dtype_specializations}] = None // out["awkward_Index_nones_as_index_b", {dtype_specializations}] = None diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_numnull.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_numnull.cu index 54ae39b47d..bbd820c2f0 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_numnull.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_numnull.cu @@ -5,7 +5,7 @@ // (numnull, fromindex, lenindex, invocation_index, err_code) = args // scan_in_array = cupy.empty(lenindex, dtype=cupy.int64) // cuda_kernel_templates.get_function(fetch_specialization(['awkward_IndexedArray_numnull_a', numnull.dtype, fromindex.dtype]))(grid, block, (numnull, fromindex, lenindex, scan_in_array, invocation_index, err_code)) -// scan_in_array = inclusive_scan(grid, block, (scan_in_array, invocation_index, err_code)) +// scan_in_array = exclusive_scan(grid, block, (scan_in_array, invocation_index, err_code)) // cuda_kernel_templates.get_function(fetch_specialization(['awkward_IndexedArray_numnull_b', numnull.dtype, fromindex.dtype]))(grid, block, (numnull, fromindex, lenindex, scan_in_array, invocation_index, err_code)) // out["awkward_IndexedArray_numnull_a", {dtype_specializations}] = None // out["awkward_IndexedArray_numnull_b", {dtype_specializations}] = None @@ -42,6 +42,6 @@ awkward_IndexedArray_numnull_b(T* numnull, uint64_t invocation_index, uint64_t* err_code) { if (err_code[0] == NO_ERROR) { - *numnull = (T)scan_in_array[lenindex - 1]; + *numnull = scan_in_array[lenindex - 1]; } } diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_numnull_parents.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_numnull_parents.cu index 70c3f72571..0169e9e452 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_numnull_parents.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_numnull_parents.cu @@ -5,7 +5,7 @@ // (numnull, tolength, fromindex, lenindex, invocation_index, err_code) = args // scan_in_array = cupy.empty(lenindex, dtype=cupy.int64) // cuda_kernel_templates.get_function(fetch_specialization(['awkward_IndexedArray_numnull_parents_a', numnull.dtype, tolength.dtype, fromindex.dtype]))(grid, block, (numnull, tolength, fromindex, lenindex, scan_in_array, invocation_index, err_code)) -// scan_in_array = inclusive_scan(grid, block, (scan_in_array, invocation_index, err_code)) +// scan_in_array = exclusive_scan(grid, block, (scan_in_array, invocation_index, err_code)) // cuda_kernel_templates.get_function(fetch_specialization(['awkward_IndexedArray_numnull_parents_b', numnull.dtype, tolength.dtype, fromindex.dtype]))(grid, block, (numnull, tolength, fromindex, lenindex, scan_in_array, invocation_index, err_code)) // out["awkward_IndexedArray_numnull_parents_a", {dtype_specializations}] = None // out["awkward_IndexedArray_numnull_parents_b", {dtype_specializations}] = None @@ -45,7 +45,9 @@ awkward_IndexedArray_numnull_parents_b(T* numnull, uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; - + if(thread_id == 0) { + *tolength = scan_in_array[lenindex - 1]; + } if (thread_id < lenindex) { if (fromindex[thread_id] < 0) { numnull[thread_id] = 1; @@ -54,8 +56,5 @@ awkward_IndexedArray_numnull_parents_b(T* numnull, numnull[thread_id] = 0; } } - *tolength = (T)scan_in_array[lenindex - 1]; } } - -// fails for [-1] diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_getitem_jagged_carrylen.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_getitem_jagged_carrylen.cu index 1fd477b0e6..c81638e0d4 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_getitem_jagged_carrylen.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_getitem_jagged_carrylen.cu @@ -5,7 +5,7 @@ // (carrylen, slicestarts, slicestops, sliceouterlen, invocation_index, err_code) = args // scan_in_array = cupy.empty(sliceouterlen, dtype=cupy.int64) // cuda_kernel_templates.get_function(fetch_specialization(["awkward_ListArray_getitem_jagged_carrylen_a", carrylen.dtype, slicestarts.dtype, slicestops.dtype]))(grid, block, (carrylen, slicestarts, slicestops, sliceouterlen, scan_in_array, invocation_index, err_code)) -// scan_in_array = inclusive_scan(grid, block, (scan_in_array, invocation_index, err_code)) +// scan_in_array = exclusive_scan(grid, block, (scan_in_array, invocation_index, err_code)) // cuda_kernel_templates.get_function(fetch_specialization(["awkward_ListArray_getitem_jagged_carrylen_b", carrylen.dtype, slicestarts.dtype, slicestops.dtype]))(grid, block, (carrylen, slicestarts, slicestops, sliceouterlen, scan_in_array, invocation_index, err_code)) // out["awkward_ListArray_getitem_jagged_carrylen_a", {dtype_specializations}] = None // out["awkward_ListArray_getitem_jagged_carrylen_b", {dtype_specializations}] = None @@ -24,7 +24,7 @@ awkward_ListArray_getitem_jagged_carrylen_a(T* carrylen, int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; if (thread_id < sliceouterlen) { - scan_in_array[thread_id] = (T)(slicestops[thread_id] - slicestarts[thread_id]); + scan_in_array[thread_id] = (T)(slicestops[thread_id] - slicestarts[thread_id]); } } } @@ -39,8 +39,6 @@ awkward_ListArray_getitem_jagged_carrylen_b(T* carrylen, uint64_t invocation_index, uint64_t* err_code) { if (err_code[0] == NO_ERROR) { - *carrylen = scan_in_array[sliceouterlen - 1]; + *carrylen = (T)scan_in_array[sliceouterlen - 1]; } } - -// fails for sliceouterlen = 1 \ No newline at end of file diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_getitem_next_range_counts.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_getitem_next_range_counts.cu index ff4e621321..7a3f3d18cd 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_getitem_next_range_counts.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_getitem_next_range_counts.cu @@ -5,7 +5,7 @@ // (total, fromoffsets, lenstarts, invocation_total, err_code) = args // scan_in_array = cupy.empty(lenstarts, dtype=cupy.int64) // cuda_kernel_templates.get_function(fetch_specialization(["awkward_ListArray_getitem_next_range_counts_a", total.dtype, fromoffsets.dtype]))(grid, block, (total, fromoffsets, lenstarts, scan_in_array, invocation_total, err_code)) -// scan_in_array = inclusive_scan(grid, block, (scan_in_array, invocation_total, err_code)) +// scan_in_array = exclusive_scan(grid, block, (scan_in_array, invocation_total, err_code)) // cuda_kernel_templates.get_function(fetch_specialization(["awkward_ListArray_getitem_next_range_counts_b", total.dtype, fromoffsets.dtype]))(grid, block, (total, fromoffsets, lenstarts, scan_in_array, invocation_total, err_code)) // out["awkward_ListArray_getitem_next_range_counts_a", {dtype_specializations}] = None // out["awkward_ListArray_getitem_next_range_counts_b", {dtype_specializations}] = None @@ -14,11 +14,11 @@ template __global__ void awkward_ListArray_getitem_next_range_counts_a(T* total, - const C* fromoffsets, - int64_t lenstarts, - int64_t* scan_in_array, - uint64_t invocation_total, - uint64_t* err_code) { + const C* fromoffsets, + int64_t lenstarts, + int64_t* scan_in_array, + uint64_t invocation_total, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; @@ -31,16 +31,12 @@ awkward_ListArray_getitem_next_range_counts_a(T* total, template __global__ void awkward_ListArray_getitem_next_range_counts_b(T* total, - const C* fromoffsets, - int64_t lenstarts, - int64_t* scan_in_array, - uint64_t invocation_total, - uint64_t* err_code) { + const C* fromoffsets, + int64_t lenstarts, + int64_t* scan_in_array, + uint64_t invocation_total, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { - int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; - - if (thread_id < lenstarts) { - *total = scan_in_array[lenstarts - 1]; - } + *total = scan_in_array[lenstarts - 1]; } } diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_rpad_and_clip_length_axis1.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_rpad_and_clip_length_axis1.cu index 54e2f8b6aa..1ed393afa1 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_rpad_and_clip_length_axis1.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_rpad_and_clip_length_axis1.cu @@ -5,7 +5,7 @@ // (tomin, fromstarts, fromstops, target, lenstarts, invocation_index, err_code) = args // scan_in_array = cupy.empty(lenstarts, dtype=cupy.int64) // cuda_kernel_templates.get_function(fetch_specialization(["awkward_ListArray_rpad_and_clip_length_axis1_a", tomin.dtype, fromstarts.dtype, fromstops.dtype]))(grid, block, (tomin, fromstarts, fromstops, target, lenstarts, scan_in_array, invocation_index, err_code)) -// scan_in_array = inclusive_scan(grid, block, (scan_in_array, invocation_index, err_code)) +// scan_in_array = exclusive_scan(grid, block, (scan_in_array, invocation_index, err_code)) // cuda_kernel_templates.get_function(fetch_specialization(["awkward_ListArray_rpad_and_clip_length_axis1_b", tomin.dtype, fromstarts.dtype, fromstops.dtype]))(grid, block, (tomin, fromstarts, fromstops, target, lenstarts, scan_in_array, invocation_index, err_code)) // out["awkward_ListArray_rpad_and_clip_length_axis1_a", {dtype_specializations}] = None // out["awkward_ListArray_rpad_and_clip_length_axis1_b", {dtype_specializations}] = None @@ -42,8 +42,6 @@ awkward_ListArray_rpad_and_clip_length_axis1_b(T* tomin, uint64_t invocation_index, uint64_t* err_code) { if (err_code[0] == NO_ERROR) { - *tomin = scan_in_array[lenstarts - 1]; + *tomin = scan_in_array[lenstarts - 1]; } } - -// fails for lenstarts = 1 \ No newline at end of file diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_RegularArray_reduce_local_nextparents.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_RegularArray_reduce_local_nextparents.cu new file mode 100644 index 0000000000..5f6b566339 --- /dev/null +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_RegularArray_reduce_local_nextparents.cu @@ -0,0 +1,46 @@ +// BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE + +// BEGIN PYTHON +// def f(grid, block, args): +// (nextparents, size, length, invocation_index, err_code) = args +// scan_in_array = cupy.empty(length * size, dtype=cupy.int64) +// cuda_kernel_templates.get_function(fetch_specialization(['awkward_RegularArray_reduce_local_nextparents_a', nextparents.dtype]))(grid, block, (nextparents, size, length, scan_in_array, invocation_index, err_code)) +// scan_in_array = exclusive_scan(grid, block, (scan_in_array, invocation_index, err_code)) +// cuda_kernel_templates.get_function(fetch_specialization(['awkward_RegularArray_reduce_local_nextparents_b', nextparents.dtype]))(grid, block, (nextparents, size, length, scan_in_array, invocation_index, err_code)) +// out["awkward_RegularArray_reduce_local_nextparents_a", {dtype_specializations}] = None +// out["awkward_RegularArray_reduce_local_nextparents_b", {dtype_specializations}] = None +// END PYTHON + +template +__global__ void +awkward_RegularArray_reduce_local_nextparents_a(T* nextparents, + int64_t size, + int64_t length, + int64_t* scan_in_array, + uint64_t invocation_index, + uint64_t* err_code) { + if (err_code[0] == NO_ERROR) { + int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; + int64_t len = length * size; + if (thread_id < len) { + scan_in_array[thread_id] = 1; + } + } +} + +template +__global__ void +awkward_RegularArray_reduce_local_nextparents_b(T* nextparents, + int64_t size, + int64_t length, + int64_t* scan_in_array, + uint64_t invocation_index, + uint64_t* err_code) { + if (err_code[0] == NO_ERROR) { + int64_t thread_id = (blockIdx.x * blockDim.x + threadIdx.x) / size; + int64_t thready_id = (blockIdx.x * blockDim.x + threadIdx.x) % size; + if (thread_id < length && thready_id < size) { + nextparents[scan_in_array[thread_id * size + thready_id] - 1] = thread_id; + } + } +} diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_RegularArray_reduce_nonlocal_preparenext.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_RegularArray_reduce_nonlocal_preparenext.cu new file mode 100644 index 0000000000..2a970ea87d --- /dev/null +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_RegularArray_reduce_nonlocal_preparenext.cu @@ -0,0 +1,51 @@ +// BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE + +// BEGIN PYTHON +// def f(grid, block, args): +// (nextcarry, nextparents, parents, size, length, invocation_index, err_code) = args +// scan_in_array = cupy.empty(length * size, dtype=cupy.int64) +// cuda_kernel_templates.get_function(fetch_specialization(['awkward_RegularArray_reduce_nonlocal_preparenext_a', nextcarry.dtype, nextparents.dtype, parents.dtype]))(grid, block, (nextcarry, nextparents, parents, size, length, scan_in_array, invocation_index, err_code)) +// scan_in_array = exclusive_scan(grid, block, (scan_in_array, invocation_index, err_code)) +// cuda_kernel_templates.get_function(fetch_specialization(['awkward_RegularArray_reduce_nonlocal_preparenext_b', nextcarry.dtype, nextparents.dtype, parents.dtype]))(grid, block, (nextcarry, nextparents, parents, size, length, scan_in_array, invocation_index, err_code)) +// out["awkward_RegularArray_reduce_nonlocal_preparenext_a", {dtype_specializations}] = None +// out["awkward_RegularArray_reduce_nonlocal_preparenext_b", {dtype_specializations}] = None +// END PYTHON + +template +__global__ void +awkward_RegularArray_reduce_nonlocal_preparenext_a(T* nextcarry, + C* nextparents, + const U* parents, + int64_t size, + int64_t length, + int64_t* scan_in_array, + uint64_t invocation_index, + uint64_t* err_code) { + if (err_code[0] == NO_ERROR) { + int64_t thready_id = blockIdx.x * blockDim.x + threadIdx.x; + int64_t len = length * size; + if (thready_id < len) { + scan_in_array[thready_id] = 1; + } + } +} + +template +__global__ void +awkward_RegularArray_reduce_nonlocal_preparenext_b(T* nextcarry, + C* nextparents, + const U* parents, + int64_t size, + int64_t length, + int64_t* scan_in_array, + uint64_t invocation_index, + uint64_t* err_code) { + if (err_code[0] == NO_ERROR) { + int64_t thready_id = (blockIdx.x * blockDim.x + threadIdx.x) / length; + int64_t thread_id = (blockIdx.x * blockDim.x + threadIdx.x) % length; + if (thread_id < length && thready_id < size) { + nextcarry[scan_in_array[thready_id * length + thread_id] - 1] = thread_id * size + thready_id; + nextparents[scan_in_array[thready_id * length + thread_id] - 1] = parents[thread_id] * size + thready_id; + } + } +} diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_sorting_ranges_length.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_sorting_ranges_length.cu new file mode 100644 index 0000000000..4dc66c0fbe --- /dev/null +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_sorting_ranges_length.cu @@ -0,0 +1,49 @@ +// BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE + +// BEGIN PYTHON +// def f(grid, block, args): +// (tolength, parents, parentslength, invocation_index, err_code) = args +// scan_in_array = cupy.empty(parentslength, dtype=cupy.int64) +// cuda_kernel_templates.get_function(fetch_specialization(['awkward_sorting_ranges_length_a', tolength.dtype, parents.dtype]))(grid, block, (tolength, parents, parentslength, scan_in_array, invocation_index, err_code)) +// scan_in_array = exclusive_scan(grid, block, (scan_in_array, invocation_index, err_code)) +// cuda_kernel_templates.get_function(fetch_specialization(['awkward_sorting_ranges_length_b', tolength.dtype, parents.dtype]))(grid, block, (tolength, parents, parentslength, scan_in_array, invocation_index, err_code)) +// out["awkward_sorting_ranges_length_a", {dtype_specializations}] = None +// out["awkward_sorting_ranges_length_b", {dtype_specializations}] = None +// END PYTHON + +template +__global__ void +awkward_sorting_ranges_length_a(T* tolength, + const C* parents, + int64_t parentslength, + int64_t* scan_in_array, + uint64_t invocation_index, + uint64_t* err_code) { + if (err_code[0] == NO_ERROR) { + int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; + if (thread_id == 0 ) { + scan_in_array[thread_id] = 2; + } + if (thread_id > 0 && thread_id < parentslength) { + if (parents[thread_id - 1] != parents[thread_id]) { + scan_in_array[thread_id] = 1; + } + else { + scan_in_array[thread_id] = 0; + } + } + } +} + +template +__global__ void +awkward_sorting_ranges_length_b(T* tolength, + const C* parents, + int64_t parentslength, + int64_t* scan_in_array, + uint64_t invocation_index, + uint64_t* err_code) { + if (err_code[0] == NO_ERROR) { + *tolength = scan_in_array[parentslength - 1]; + } +} diff --git a/src/awkward/_connect/cuda/cuda_kernels/cuda_common.cu b/src/awkward/_connect/cuda/cuda_kernels/cuda_common.cu index 3c5cdf37f9..8a02094f34 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/cuda_common.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/cuda_common.cu @@ -46,6 +46,17 @@ typedef unsigned long long uintmax_t; // stride = stride * 2 // return d_final // out['inclusive_scan_kernel', cupy.int64] = inclusive_scan + +// def exclusive_scan(grid, block, args): +// print(args) +// (d_in, invocation_index, err_code) = args +// import math +// d_out = cupy.empty(len(d_in), dtype=cupy.int64) +// cuda_kernel_templates.get_function(fetch_specialization(['exclusive_scan_kernel', cupy.int64]))(grid, block, (d_in, d_out, len(d_in), invocation_index, err_code)) +// print(d_out) +// print("\n") +// return d_out +// out['exclusive_scan_kernel', cupy.int64] = exclusive_scan // END PYTHON template @@ -87,3 +98,49 @@ inclusive_scan_kernel(T* d_in, } } } + + +template +__global__ void +exclusive_scan_kernel(T* input, + T* output, + int64_t n, + uint64_t* invocation_index, + uint64_t* err_code) { + if (err_code[0] == NO_ERROR) { + extern __shared__ int temp[1024*2]; + int tid = threadIdx.x; + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx < n) { + temp[tid] = input[idx]; + } else { + temp[tid] = 0; + } + __syncthreads(); + + for (int stride = 1; stride <= 1024; stride *= 2) { + int index = (tid + 1) * stride * 2 - 1; + if (index < 2 * 1024) { + temp[index] += temp[index - stride]; + } + __syncthreads(); + } + + if (tid == 0) { + temp[2 * 1024 - 1] = 0; + } + __syncthreads(); + + for (int stride = 1024; stride > 0; stride /= 2) { + int index = (tid + 1) * stride * 2 - 1; + if (index + stride < 2 * 1024) { + temp[index + stride] += temp[index]; + } + __syncthreads(); + } + + if (idx < n) { + output[idx] = temp[tid]; + } + } +} From 69e2a6e8331f897e7043d8702b73d74953f702c5 Mon Sep 17 00:00:00 2001 From: ManasviGoyal Date: Wed, 31 Jan 2024 17:20:01 +0100 Subject: [PATCH 05/18] test: remove XFAIL for awkward_ByteMaskedArray_numnull --- tests-cuda/test_1276_cuda_num.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/tests-cuda/test_1276_cuda_num.py b/tests-cuda/test_1276_cuda_num.py index 2ce8b86b70..b283a4a835 100644 --- a/tests-cuda/test_1276_cuda_num.py +++ b/tests-cuda/test_1276_cuda_num.py @@ -14,7 +14,6 @@ pytest.skip(reason="too old Numba version", allow_module_level=True) -@pytest.mark.xfail(reason="unimplemented CUDA Kernels (awkward_ByteMaskedArray_numnull") def test_num_1(): content = ak.Array( ["one", "two", "three", "four", "five", "six", "seven", "eight", "nine"] @@ -25,11 +24,8 @@ def test_num_1(): ) cuda_array = ak.to_backend(array, "cuda") assert ak.num(cuda_array, 0) == ak.num(array, 0) - with pytest.raises(NotImplementedError): - ak.num(cuda_array, 1) -@pytest.mark.xfail(reason="unimplemented CUDA Kernels (awkward_ByteMaskedArray_numnull") def test_num_2(): content = ak.Array( ["one", "two", "three", "four", "five", "six", "seven", "eight", "nine"] @@ -40,8 +36,6 @@ def test_num_2(): ) cuda_array = ak.to_backend(array, "cuda") assert ak.num(cuda_array, 0) == ak.num(array, 0) - with pytest.raises(NotImplementedError): - ak.num(cuda_array, 1) def test_num_3(): From 9a735937c37d5beebc891f7c1457cce69c043f0f Mon Sep 17 00:00:00 2001 From: Manasvi Goyal Date: Wed, 31 Jan 2024 18:18:53 +0100 Subject: [PATCH 06/18] feat: add python kernel definition for awkward_sorting_ranges_length --- kernel-specification.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/kernel-specification.yml b/kernel-specification.yml index 852db8c235..cfa38f2e82 100644 --- a/kernel-specification.yml +++ b/kernel-specification.yml @@ -5917,5 +5917,9 @@ kernels: - {name: parentslength, type: "int64_t", dir: in, role: default} description: null definition: | - Insert Python definition here + def awkward_sorting_ranges_length(tolength, parents, parentslength): + tolength[0] = 2 + for i in range(1, parentslength): + if parents[i - 1] != parents[i]: + tolength[0] = tolength[0] + 1 automatic-tests: false From d9e0ff75077ed40680e9fe49992f8bf5d24d75e6 Mon Sep 17 00:00:00 2001 From: ManasviGoyal Date: Thu, 1 Feb 2024 11:14:13 +0100 Subject: [PATCH 07/18] feat: use `cupy.cumsum` --- ...kward_ByteMaskedArray_getitem_nextcarry.cu | 2 +- ...eMaskedArray_getitem_nextcarry_outindex.cu | 2 +- .../awkward_ByteMaskedArray_numnull.cu | 2 +- .../awkward_ByteMaskedArray_reduce_next_64.cu | 8 +-- ...rray_reduce_next_nonlocal_nextshifts_64.cu | 43 ++++++++-------- ..._next_nonlocal_nextshifts_fromshifts_64.cu | 49 +++++++++---------- ...em_next_missing_jagged_getmaskstartstop.cu | 2 +- .../awkward_Index_nones_as_index.cu | 6 +-- .../awkward_IndexedArray_flatten_nextcarry.cu | 2 +- .../awkward_IndexedArray_getitem_nextcarry.cu | 2 +- ...IndexedArray_getitem_nextcarry_outindex.cu | 2 +- .../awkward_IndexedArray_index_of_nulls.cu | 2 +- .../awkward_IndexedArray_numnull.cu | 2 +- .../awkward_IndexedArray_numnull_parents.cu | 2 +- .../awkward_IndexedArray_reduce_next_64.cu | 6 +-- ...rray_reduce_next_nonlocal_nextshifts_64.cu | 37 +++++++------- ..._next_nonlocal_nextshifts_fromshifts_64.cu | 7 ++- ...xedOptionArray_rpad_and_clip_mask_axis1.cu | 2 +- ...kward_ListArray_getitem_jagged_carrylen.cu | 2 +- ...ard_ListArray_getitem_next_range_counts.cu | 2 +- ...rd_ListArray_rpad_and_clip_length_axis1.cu | 2 +- ...MaskedArray_getitem_next_jagged_project.cu | 2 +- ...d_RegularArray_reduce_local_nextparents.cu | 2 +- ...egularArray_reduce_nonlocal_preparenext.cu | 2 +- .../awkward_UnionArray_project.cu | 2 +- .../awkward_sorting_ranges_length.cu | 2 +- 26 files changed, 92 insertions(+), 102 deletions(-) diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_ByteMaskedArray_getitem_nextcarry.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_ByteMaskedArray_getitem_nextcarry.cu index e43bbc3997..7e5a0eef7f 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_ByteMaskedArray_getitem_nextcarry.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_ByteMaskedArray_getitem_nextcarry.cu @@ -5,7 +5,7 @@ // (tocarry, mask, length, validwhen, invocation_index, err_code) = args // scan_in_array = cupy.empty(length, dtype=cupy.int64) // cuda_kernel_templates.get_function(fetch_specialization(['awkward_ByteMaskedArray_getitem_nextcarry_a', tocarry.dtype, mask.dtype]))(grid, block, (tocarry, mask, length, validwhen, scan_in_array, invocation_index, err_code)) -// scan_in_array = inclusive_scan(grid, block, (scan_in_array, invocation_index, err_code)) +// scan_in_array = cupy.cumsum(scan_in_array) // cuda_kernel_templates.get_function(fetch_specialization(['awkward_ByteMaskedArray_getitem_nextcarry_b', tocarry.dtype, mask.dtype]))(grid, block, (tocarry, mask, length, validwhen, scan_in_array, invocation_index, err_code)) // out["awkward_ByteMaskedArray_getitem_nextcarry_a", {dtype_specializations}] = None // out["awkward_ByteMaskedArray_getitem_nextcarry_b", {dtype_specializations}] = None diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_ByteMaskedArray_getitem_nextcarry_outindex.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_ByteMaskedArray_getitem_nextcarry_outindex.cu index a661fb3aa7..c69ce2d14d 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_ByteMaskedArray_getitem_nextcarry_outindex.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_ByteMaskedArray_getitem_nextcarry_outindex.cu @@ -5,7 +5,7 @@ // (tocarry, outindex, mask, length, validwhen, invocation_index, err_code) = args // scan_in_array = cupy.empty(length, dtype=cupy.int64) // cuda_kernel_templates.get_function(fetch_specialization(['awkward_ByteMaskedArray_getitem_nextcarry_outindex_a', tocarry.dtype, outindex.dtype, mask.dtype]))(grid, block, (tocarry, outindex, mask, length, validwhen, scan_in_array, invocation_index, err_code)) -// scan_in_array = inclusive_scan(grid, block, (scan_in_array, invocation_index, err_code)) +// scan_in_array = cupy.cumsum(scan_in_array) // cuda_kernel_templates.get_function(fetch_specialization(['awkward_ByteMaskedArray_getitem_nextcarry_outindex_b', tocarry.dtype, outindex.dtype, mask.dtype]))(grid, block, (tocarry, outindex, mask, length, validwhen, scan_in_array, invocation_index, err_code)) // out["awkward_ByteMaskedArray_getitem_nextcarry_outindex_a", {dtype_specializations}] = None // out["awkward_ByteMaskedArray_getitem_nextcarry_outindex_b", {dtype_specializations}] = None diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_ByteMaskedArray_numnull.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_ByteMaskedArray_numnull.cu index d22d8709dc..c685b12913 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_ByteMaskedArray_numnull.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_ByteMaskedArray_numnull.cu @@ -5,7 +5,7 @@ // (numnull, mask, length, validwhen, invocation_index, err_code) = args // scan_in_array = cupy.empty(length, dtype=cupy.int64) // cuda_kernel_templates.get_function(fetch_specialization(['awkward_ByteMaskedArray_numnull_a', numnull.dtype, mask.dtype]))(grid, block, (numnull, mask, length, validwhen, scan_in_array, invocation_index, err_code)) -// scan_in_array = exclusive_scan(grid, block, (scan_in_array, invocation_index, err_code)) +// scan_in_array = cupy.cumsum(scan_in_array) // cuda_kernel_templates.get_function(fetch_specialization(['awkward_ByteMaskedArray_numnull_b', numnull.dtype, mask.dtype]))(grid, block, (numnull, mask, length, validwhen, scan_in_array, invocation_index, err_code)) // out["awkward_ByteMaskedArray_numnull_a", {dtype_specializations}] = None // out["awkward_ByteMaskedArray_numnull_b", {dtype_specializations}] = None diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_ByteMaskedArray_reduce_next_64.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_ByteMaskedArray_reduce_next_64.cu index 4cf1c8b58d..b842e18683 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_ByteMaskedArray_reduce_next_64.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_ByteMaskedArray_reduce_next_64.cu @@ -4,9 +4,9 @@ // def f(grid, block, args): // (nextcarry, nextparents, outindex, mask, parents, length, validwhen, invocation_index, err_code) = args // scan_in_array = cupy.empty(length, dtype=cupy.int64) -// cuda_kernel_templates.get_function(fetch_specialization(['awkward_ByteMaskedArray_reduce_next_64_a', nextcarry.dtype, nextparents.dtype, outindex.dtype]))(grid, block, (nextcarry, nextparents, outindex, mask, parents, length, validwhen, scan_in_array, invocation_index, err_code)) -// scan_in_array = inclusive_scan(grid, block, (scan_in_array, length, invocation_index, err_code)) -// cuda_kernel_templates.get_function(fetch_specialization(['awkward_ByteMaskedArray_reduce_next_64_b', nextcarry.dtype, nextparents.dtype, outindex.dtype]))(grid, block, (nextcarry, nextparents, outindex, mask, parents, length, validwhen, scan_in_array, invocation_index, err_code)) +// cuda_kernel_templates.get_function(fetch_specialization(['awkward_ByteMaskedArray_reduce_next_64_a', nextcarry.dtype, nextparents.dtype, outindex.dtype, mask.dtype, parents.dtype]))(grid, block, (nextcarry, nextparents, outindex, mask, parents, length, validwhen, scan_in_array, invocation_index, err_code)) +// scan_in_array = cupy.cumsum(scan_in_array) +// cuda_kernel_templates.get_function(fetch_specialization(['awkward_ByteMaskedArray_reduce_next_64_b', nextcarry.dtype, nextparents.dtype, outindex.dtype, mask.dtype, parents.dtype]))(grid, block, (nextcarry, nextparents, outindex, mask, parents, length, validwhen, scan_in_array, invocation_index, err_code)) // out["awkward_ByteMaskedArray_reduce_next_64_a", {dtype_specializations}] = None // out["awkward_ByteMaskedArray_reduce_next_64_b", {dtype_specializations}] = None // END PYTHON @@ -29,6 +29,8 @@ awkward_ByteMaskedArray_reduce_next_64_a(T* nextcarry, if (thread_id < length) { if ((mask[thread_id] != 0) == validwhen) { scan_in_array[thread_id] = 1; + } else { + scan_in_array[thread_id] = 0; } } } diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_ByteMaskedArray_reduce_next_nonlocal_nextshifts_64.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_ByteMaskedArray_reduce_next_nonlocal_nextshifts_64.cu index 867193a2cc..9481e4a661 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_ByteMaskedArray_reduce_next_nonlocal_nextshifts_64.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_ByteMaskedArray_reduce_next_nonlocal_nextshifts_64.cu @@ -6,24 +6,23 @@ // scan_in_array_k = cupy.empty(length, dtype=cupy.int64) // scan_in_array_nullsum = cupy.empty(length, dtype=cupy.int64) // cuda_kernel_templates.get_function(fetch_specialization(["awkward_ByteMaskedArray_reduce_next_nonlocal_nextshifts_64_a", nextshifts.dtype, mask.dtype]))(grid, block, (nextshifts, mask, length, valid_when, scan_in_array_k, scan_in_array_nullsum, invocation_index, err_code)) -// scan_in_array_k = inclusive_scan(grid, block, (scan_in_array_k, invocation_index, err_code)) -// scan_in_array_nullsum = inclusive_scan(grid, block, (scan_in_array_nullsum, invocation_index, err_code)) -// cuda_kernel_templates.get_function(fetch_specialization(["awkward_ByteMaskedArray_reduce_next_nonlocal_nextshifts_64_a", nextshifts.dtype, mask.dtype]))(grid, block, (nextshifts, mask, length, valid_when, scan_in_array_k, scan_in_array_nullsum, invocation_index, err_code)) +// scan_in_array_k = cupy.cumsum(scan_in_array_k) +// scan_in_array_nullsum = cupy.cumsum(scan_in_array_nullsum) +// cuda_kernel_templates.get_function(fetch_specialization(["awkward_ByteMaskedArray_reduce_next_nonlocal_nextshifts_64_b", nextshifts.dtype, mask.dtype]))(grid, block, (nextshifts, mask, length, valid_when, scan_in_array_k, scan_in_array_nullsum, invocation_index, err_code)) // out["awkward_ByteMaskedArray_reduce_next_nonlocal_nextshifts_64_a", {dtype_specializations}] = None // out["awkward_ByteMaskedArray_reduce_next_nonlocal_nextshifts_64_b", {dtype_specializations}] = None // END PYTHON template __global__ void -awkward_ByteMaskedArray_reduce_next_nonlocal_nextshifts_64_a( - T* nextshifts, - const C* mask, - int64_t length, - bool valid_when, - int64_t* scan_in_array_k, - int64_t* scan_in_array_nullsum, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_ByteMaskedArray_reduce_next_nonlocal_nextshifts_64_a(T* nextshifts, + const C* mask, + int64_t length, + bool valid_when, + int64_t* scan_in_array_k, + int64_t* scan_in_array_nullsum, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; @@ -41,22 +40,20 @@ awkward_ByteMaskedArray_reduce_next_nonlocal_nextshifts_64_a( template __global__ void -awkward_ByteMaskedArray_reduce_next_nonlocal_nextshifts_64_b( - T* nextshifts, - const C* mask, - int64_t length, - bool valid_when, - int64_t* scan_in_array_k, - int64_t* scan_in_array_nullsum, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_ByteMaskedArray_reduce_next_nonlocal_nextshifts_64_b(T* nextshifts, + const C* mask, + int64_t length, + bool valid_when, + int64_t* scan_in_array_k, + int64_t* scan_in_array_nullsum, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; if (thread_id < length) { if ((mask[thread_id] != 0) == (valid_when != 0)) { - nextshifts[scan_in_array_k[thread_id] - 1] = - scan_in_array_nullsum[thread_id] - 1; + nextshifts[scan_in_array_k[thread_id] - 1] = scan_in_array_nullsum[thread_id]; } } } diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_ByteMaskedArray_reduce_next_nonlocal_nextshifts_fromshifts_64.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_ByteMaskedArray_reduce_next_nonlocal_nextshifts_fromshifts_64.cu index 373460d7e3..6fb1ff705b 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_ByteMaskedArray_reduce_next_nonlocal_nextshifts_fromshifts_64.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_ByteMaskedArray_reduce_next_nonlocal_nextshifts_fromshifts_64.cu @@ -5,26 +5,25 @@ // (nextshifts, mask, length, valid_when, shifts, invocation_index, err_code) = args // scan_in_array_k = cupy.empty(length, dtype=cupy.int64) // scan_in_array_nullsum = cupy.empty(length, dtype=cupy.int64) -// cuda_kernel_templates.get_function(fetch_specialization(["awkward_ByteMaskedArray_reduce_next_nonlocal_nextshifts_fromshifts_64_a", nextshifts.dtype, mask.dtype]))(grid, block, (nextshifts, mask, length, valid_when, scan_in_array_k, scan_in_array_nullsum, invocation_index, err_code)) -// scan_in_array_k = inclusive_scan(grid, block, (scan_in_array_k, invocation_index, err_code)) -// scan_in_array_nullsum = inclusive_scan(grid, block, (scan_in_array_nullsum, invocation_index, err_code)) -// cuda_kernel_templates.get_function(fetch_specialization(["awkward_ByteMaskedArray_reduce_next_nonlocal_nextshifts_fromshifts_64_a", nextshifts.dtype, mask.dtype]))(grid, block, (nextshifts, mask, length, valid_when, scan_in_array_k, scan_in_array_nullsum, invocation_index, err_code)) +// cuda_kernel_templates.get_function(fetch_specialization(["awkward_ByteMaskedArray_reduce_next_nonlocal_nextshifts_fromshifts_64_a", nextshifts.dtype, mask.dtype, shifts.dtype]))(grid, block, (nextshifts, mask, length, valid_when, shifts, scan_in_array_k, scan_in_array_nullsum, invocation_index, err_code)) +// scan_in_array_k = cupy.cumsum(scan_in_array_k) +// scan_in_array_nullsum = cupy.cumsum(scan_in_array_nullsum) +// cuda_kernel_templates.get_function(fetch_specialization(["awkward_ByteMaskedArray_reduce_next_nonlocal_nextshifts_fromshifts_64_b", nextshifts.dtype, mask.dtype, shifts.dtype]))(grid, block, (nextshifts, mask, length, valid_when, shifts, scan_in_array_k, scan_in_array_nullsum, invocation_index, err_code)) // out["awkward_ByteMaskedArray_reduce_next_nonlocal_nextshifts_fromshifts_64_a", {dtype_specializations}] = None // out["awkward_ByteMaskedArray_reduce_next_nonlocal_nextshifts_fromshifts_64_b", {dtype_specializations}] = None // END PYTHON template __global__ void -awkward_ByteMaskedArray_reduce_next_nonlocal_nextshifts_fromshifts_64_a( - T* nextshifts, - const C* mask, - int64_t length, - bool valid_when, - const U* shifts, - int64_t* scan_in_array_k, - int64_t* scan_in_array_nullsum, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_ByteMaskedArray_reduce_next_nonlocal_nextshifts_fromshifts_64_a(T* nextshifts, + const C* mask, + int64_t length, + bool valid_when, + const U* shifts, + int64_t* scan_in_array_k, + int64_t* scan_in_array_nullsum, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; @@ -42,23 +41,21 @@ awkward_ByteMaskedArray_reduce_next_nonlocal_nextshifts_fromshifts_64_a( template __global__ void -awkward_ByteMaskedArray_reduce_next_nonlocal_nextshifts_fromshifts_64_b( - T* nextshifts, - const C* mask, - int64_t length, - bool valid_when, - const U* shifts, - int64_t* scan_in_array_k, - int64_t* scan_in_array_nullsum, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_ByteMaskedArray_reduce_next_nonlocal_nextshifts_fromshifts_64_b(T* nextshifts, + const C* mask, + int64_t length, + bool valid_when, + const U* shifts, + int64_t* scan_in_array_k, + int64_t* scan_in_array_nullsum, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; if (thread_id < length) { if ((mask[thread_id] != 0) == (valid_when != 0)) { - nextshifts[scan_in_array_k[thread_id] - 1] = - shifts[thread_id] + (scan_in_array_nullsum[thread_id] - 1); + nextshifts[scan_in_array_k[thread_id] - 1] = shifts[thread_id] + scan_in_array_nullsum[thread_id]; } } } diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_Content_getitem_next_missing_jagged_getmaskstartstop.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_Content_getitem_next_missing_jagged_getmaskstartstop.cu index 9f24700d5c..40fcb25548 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_Content_getitem_next_missing_jagged_getmaskstartstop.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_Content_getitem_next_missing_jagged_getmaskstartstop.cu @@ -5,7 +5,7 @@ // (index_in, offsets_in, mask_out, starts_out, stops_out, length, invocation_index, err_code) = args // scan_in_array = cupy.empty(length, dtype=cupy.int64) // cuda_kernel_templates.get_function(fetch_specialization(["awkward_Content_getitem_next_missing_jagged_getmaskstartstop_a", index_in.dtype, offsets_in.dtype, mask_out.dtype, starts_out.dtype, stops_out.dtype]))(grid, block, (index_in, offsets_in, mask_out, starts_out, stops_out, length, scan_in_array, invocation_index, err_code)) -// scan_in_array = inclusive_scan(grid, block, (scan_in_array, invocation_index, err_code)) +// scan_in_array = cupy.cumsum(scan_in_array) // cuda_kernel_templates.get_function(fetch_specialization(["awkward_Content_getitem_next_missing_jagged_getmaskstartstop_b", index_in.dtype, offsets_in.dtype, mask_out.dtype, starts_out.dtype, stops_out.dtype]))(grid, block, (index_in, offsets_in, mask_out, starts_out, stops_out, length, scan_in_array, invocation_index, err_code)) // out["awkward_Content_getitem_next_missing_jagged_getmaskstartstop_a", {dtype_specializations}] = None // out["awkward_Content_getitem_next_missing_jagged_getmaskstartstop_b", {dtype_specializations}] = None diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_Index_nones_as_index.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_Index_nones_as_index.cu index 6ec8d96685..ecea88ae39 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_Index_nones_as_index.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_Index_nones_as_index.cu @@ -6,8 +6,8 @@ // scan_in_array = cupy.empty(length, dtype=cupy.int64) // scan_in_array_n_non_null = cupy.empty(length, dtype=cupy.int64) // cuda_kernel_templates.get_function(fetch_specialization(["awkward_Index_nones_as_index_a", toindex.dtype]))(grid, block, (toindex, length, scan_in_array, scan_in_array_n_non_null, invocation_index, err_code)) -// scan_in_array = exclusive_scan(grid, block, (scan_in_array, invocation_index, err_code)) -// scan_in_array_n_non_null = exclusive_scan(grid, block, (scan_in_array_n_non_null, invocation_index, err_code)) +// scan_in_array = cupy.cumsum(scan_in_array) +// scan_in_array_n_non_null = cupy.cumsum(scan_in_array_n_non_null) // cuda_kernel_templates.get_function(fetch_specialization(["awkward_Index_nones_as_index_b", toindex.dtype]))(grid, block, (toindex, length, scan_in_array, scan_in_array_n_non_null, invocation_index, err_code)) // out["awkward_Index_nones_as_index_a", {dtype_specializations}] = None // out["awkward_Index_nones_as_index_b", {dtype_specializations}] = None @@ -52,5 +52,3 @@ awkward_Index_nones_as_index_b(T* toindex, } } } - -// fails for [-1] diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_flatten_nextcarry.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_flatten_nextcarry.cu index 2fdb9cd26c..459022618c 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_flatten_nextcarry.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_flatten_nextcarry.cu @@ -9,7 +9,7 @@ enum class INDEXEDARRAY_FLATTEN_NEXTCARRY_ERRORS { // (tocarry, fromindex, lenindex, lencontent, invocation_index, err_code) = args // scan_in_array = cupy.empty(lenindex, dtype=cupy.int64) // cuda_kernel_templates.get_function(fetch_specialization(["awkward_IndexedArray_flatten_nextcarry_a", tocarry.dtype, fromindex.dtype]))(grid, block, (tocarry, fromindex, lenindex, lencontent, scan_in_array, invocation_index, err_code)) -// scan_in_array = inclusive_scan(grid, block, (scan_in_array, invocation_index, err_code)) +// scan_in_array = cupy.cumsum(scan_in_array) // cuda_kernel_templates.get_function(fetch_specialization(["awkward_IndexedArray_flatten_nextcarry_b", tocarry.dtype, fromindex.dtype]))(grid, block, (tocarry, fromindex, lenindex, lencontent, scan_in_array, invocation_index, err_code)) // out["awkward_IndexedArray_flatten_nextcarry_a", {dtype_specializations}] = None // out["awkward_IndexedArray_flatten_nextcarry_b", {dtype_specializations}] = None diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_getitem_nextcarry.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_getitem_nextcarry.cu index f530a6bd45..a7ebe048d9 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_getitem_nextcarry.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_getitem_nextcarry.cu @@ -9,7 +9,7 @@ enum class INDEXEDARRAY_GETITEM_NEXTCARRY_ERRORS { // (tocarry, fromindex, lenindex, lencontent, invocation_index, err_code) = args // scan_in_array = cupy.empty(lenindex, dtype=cupy.int64) // cuda_kernel_templates.get_function(fetch_specialization(["awkward_IndexedArray_getitem_nextcarry_a", tocarry.dtype, fromindex.dtype]))(grid, block, (tocarry, fromindex, lenindex, lencontent, scan_in_array, invocation_index, err_code)) -// scan_in_array = inclusive_scan(grid, block, (scan_in_array, invocation_index, err_code)) +// scan_in_array = cupy.cumsum(scan_in_array) // cuda_kernel_templates.get_function(fetch_specialization(["awkward_IndexedArray_getitem_nextcarry_b", tocarry.dtype, fromindex.dtype]))(grid, block, (tocarry, fromindex, lenindex, lencontent, scan_in_array, invocation_index, err_code)) // out["awkward_IndexedArray_getitem_nextcarry_a", {dtype_specializations}] = None // out["awkward_IndexedArray_getitem_nextcarry_b", {dtype_specializations}] = None diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_getitem_nextcarry_outindex.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_getitem_nextcarry_outindex.cu index 237790a617..825efa2666 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_getitem_nextcarry_outindex.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_getitem_nextcarry_outindex.cu @@ -9,7 +9,7 @@ enum class INDEXEDARRAY_GETITEM_NEXTCARRY_OUTINDEX_ERRORS { // (tocarry, toindex, fromindex, lenindex, lencontent, invocation_index, err_code) = args // scan_in_array = cupy.empty(lenindex, dtype=cupy.int64) // cuda_kernel_templates.get_function(fetch_specialization(["awkward_IndexedArray_getitem_nextcarry_outindex_a", tocarry.dtype, toindex.dtype, fromindex.dtype]))(grid, block, (tocarry, toindex, fromindex, lenindex, lencontent, scan_in_array, invocation_index, err_code)) -// scan_in_array = inclusive_scan(grid, block, (scan_in_array, invocation_index, err_code)) +// scan_in_array = cupy.cumsum(scan_in_array) // cuda_kernel_templates.get_function(fetch_specialization(["awkward_IndexedArray_getitem_nextcarry_outindex_b", tocarry.dtype, toindex.dtype, fromindex.dtype]))(grid, block, (tocarry, toindex, fromindex, lenindex, lencontent, scan_in_array, invocation_index, err_code)) // out["awkward_IndexedArray_getitem_nextcarry_outindex_a", {dtype_specializations}] = None // out["awkward_IndexedArray_getitem_nextcarry_outindex_b", {dtype_specializations}] = None diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_index_of_nulls.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_index_of_nulls.cu index a5beadb17f..311d62a3df 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_index_of_nulls.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_index_of_nulls.cu @@ -5,7 +5,7 @@ // (toindex, fromindex, lenindex, parents, starts, invocation_index, err_code) = args // scan_in_array = cupy.empty(lenindex, dtype=cupy.int64) // cuda_kernel_templates.get_function(fetch_specialization(["awkward_IndexedArray_index_of_nulls_a", toindex.dtype, fromindex.dtype, parents.dtype, starts.dtype]))(grid, block, (toindex, fromindex, lenindex, parents, starts, scan_in_array, invocation_index, err_code)) -// scan_in_array = inclusive_scan(grid, block, (scan_in_array, invocation_index, err_code)) +// scan_in_array = cupy.cumsum(scan_in_array) // cuda_kernel_templates.get_function(fetch_specialization(["awkward_IndexedArray_index_of_nulls_b", toindex.dtype, fromindex.dtype, parents.dtype, starts.dtype]))(grid, block, (toindex, fromindex, lenindex, parents, starts, scan_in_array, invocation_index, err_code)) // out["awkward_IndexedArray_index_of_nulls_a", {dtype_specializations}] = None // out["awkward_IndexedArray_index_of_nulls_b", {dtype_specializations}] = None diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_numnull.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_numnull.cu index bbd820c2f0..210f90b557 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_numnull.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_numnull.cu @@ -5,7 +5,7 @@ // (numnull, fromindex, lenindex, invocation_index, err_code) = args // scan_in_array = cupy.empty(lenindex, dtype=cupy.int64) // cuda_kernel_templates.get_function(fetch_specialization(['awkward_IndexedArray_numnull_a', numnull.dtype, fromindex.dtype]))(grid, block, (numnull, fromindex, lenindex, scan_in_array, invocation_index, err_code)) -// scan_in_array = exclusive_scan(grid, block, (scan_in_array, invocation_index, err_code)) +// scan_in_array = cupy.cumsum(scan_in_array) // cuda_kernel_templates.get_function(fetch_specialization(['awkward_IndexedArray_numnull_b', numnull.dtype, fromindex.dtype]))(grid, block, (numnull, fromindex, lenindex, scan_in_array, invocation_index, err_code)) // out["awkward_IndexedArray_numnull_a", {dtype_specializations}] = None // out["awkward_IndexedArray_numnull_b", {dtype_specializations}] = None diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_numnull_parents.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_numnull_parents.cu index 0169e9e452..e30a7751f0 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_numnull_parents.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_numnull_parents.cu @@ -5,7 +5,7 @@ // (numnull, tolength, fromindex, lenindex, invocation_index, err_code) = args // scan_in_array = cupy.empty(lenindex, dtype=cupy.int64) // cuda_kernel_templates.get_function(fetch_specialization(['awkward_IndexedArray_numnull_parents_a', numnull.dtype, tolength.dtype, fromindex.dtype]))(grid, block, (numnull, tolength, fromindex, lenindex, scan_in_array, invocation_index, err_code)) -// scan_in_array = exclusive_scan(grid, block, (scan_in_array, invocation_index, err_code)) +// scan_in_array = cupy.cumsum(scan_in_array) // cuda_kernel_templates.get_function(fetch_specialization(['awkward_IndexedArray_numnull_parents_b', numnull.dtype, tolength.dtype, fromindex.dtype]))(grid, block, (numnull, tolength, fromindex, lenindex, scan_in_array, invocation_index, err_code)) // out["awkward_IndexedArray_numnull_parents_a", {dtype_specializations}] = None // out["awkward_IndexedArray_numnull_parents_b", {dtype_specializations}] = None diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_reduce_next_64.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_reduce_next_64.cu index 32aff24eb4..230c5d8a94 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_reduce_next_64.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_reduce_next_64.cu @@ -4,9 +4,9 @@ // def f(grid, block, args): // (nextcarry, nextparents, outindex, index, parents, length, invocation_index, err_code) = args // scan_in_array = cupy.empty(length, dtype=cupy.int64) -// cuda_kernel_templates.get_function(fetch_specialization(["awkward_IndexedArray_reduce_next_64_a", nextcarry.dtype, nextparents.dtype, outindex.dtype, index.dtype, parents.dtype]))(grid, block, (nextcarry, nextparents, outindex, index, parents, length, invocation_index, err_code)) -// scan_in_array = inclusive_scan(grid, block, (scan_in_array, invocation_index, err_code)) -// cuda_kernel_templates.get_function(fetch_specialization(["awkward_IndexedArray_reduce_next_64_b", nextcarry.dtype, nextparents.dtype, outindex.dtype, index.dtype, parents.dtype]))(grid, block, (nextcarry, nextparents, outindex, index, parents, length, invocation_index, err_code)) +// cuda_kernel_templates.get_function(fetch_specialization(["awkward_IndexedArray_reduce_next_64_a", nextcarry.dtype, nextparents.dtype, outindex.dtype, index.dtype, parents.dtype]))(grid, block, (nextcarry, nextparents, outindex, index, parents, length, scan_in_array, invocation_index, err_code)) +// scan_in_array = cupy.cumsum(scan_in_array) +// cuda_kernel_templates.get_function(fetch_specialization(["awkward_IndexedArray_reduce_next_64_b", nextcarry.dtype, nextparents.dtype, outindex.dtype, index.dtype, parents.dtype]))(grid, block, (nextcarry, nextparents, outindex, index, parents, length, scan_in_array, invocation_index, err_code)) // out["awkward_IndexedArray_reduce_next_64_a", {dtype_specializations}] = None // out["awkward_IndexedArray_reduce_next_64_b", {dtype_specializations}] = None // END PYTHON diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_reduce_next_nonlocal_nextshifts_64.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_reduce_next_nonlocal_nextshifts_64.cu index 7ad21c6b06..71de7aab0f 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_reduce_next_nonlocal_nextshifts_64.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_reduce_next_nonlocal_nextshifts_64.cu @@ -6,8 +6,8 @@ // scan_in_array_k = cupy.empty(length, dtype=cupy.int64) // scan_in_array_nullsum = cupy.empty(length, dtype=cupy.int64) // cuda_kernel_templates.get_function(fetch_specialization(["awkward_IndexedArray_reduce_next_nonlocal_nextshifts_64_a", nextshifts.dtype, index.dtype]))(grid, block, (nextshifts, index, length, scan_in_array_k, scan_in_array_nullsum, invocation_index, err_code)) -// scan_in_array_k = inclusive_scan(grid, block, (scan_in_array_k, invocation_index, err_code)) -// scan_in_array_nullsum = inclusive_scan(grid, block, (scan_in_array_nullsum, invocation_index, err_code)) +// scan_in_array_k = cupy.cumsum(scan_in_array_k) +// scan_in_array_nullsum = cupy.cumsum(scan_in_array_nullsum) // cuda_kernel_templates.get_function(fetch_specialization(["awkward_IndexedArray_reduce_next_nonlocal_nextshifts_64_b", nextshifts.dtype, index.dtype]))(grid, block, (nextshifts, index, length, scan_in_array_k, scan_in_array_nullsum, invocation_index, err_code)) // out["awkward_IndexedArray_reduce_next_nonlocal_nextshifts_64_a", {dtype_specializations}] = None // out["awkward_IndexedArray_reduce_next_nonlocal_nextshifts_64_b", {dtype_specializations}] = None @@ -15,14 +15,13 @@ template __global__ void -awkward_IndexedArray_reduce_next_nonlocal_nextshifts_64_a( - T* nextshifts, - const C* index, - int64_t length, - int64_t* scan_in_array_k, - int64_t* scan_in_array_nullsum, - uint64_t invocation_code, - uint64_t* err_code) { +awkward_IndexedArray_reduce_next_nonlocal_nextshifts_64_a(T* nextshifts, + const C* index, + int64_t length, + int64_t* scan_in_array_k, + int64_t* scan_in_array_nullsum, + uint64_t invocation_code, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; @@ -40,21 +39,19 @@ awkward_IndexedArray_reduce_next_nonlocal_nextshifts_64_a( template __global__ void -awkward_IndexedArray_reduce_next_nonlocal_nextshifts_64_b( - T* nextshifts, - const C* index, - int64_t length, - int64_t* scan_in_array_k, - int64_t* scan_in_array_nullsum, - uint64_t invocation_code, - uint64_t* err_code) { +awkward_IndexedArray_reduce_next_nonlocal_nextshifts_64_b(T* nextshifts, + const C* index, + int64_t length, + int64_t* scan_in_array_k, + int64_t* scan_in_array_nullsum, + uint64_t invocation_code, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; if (thread_id < length) { if (index[thread_id] >= 0) { - nextshifts[scan_in_array_k[thread_id] - 1] = - scan_in_array_nullsum[thread_id] - 1; + nextshifts[scan_in_array_k[thread_id] - 1] = scan_in_array_nullsum[thread_id]; } } } diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_reduce_next_nonlocal_nextshifts_fromshifts_64.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_reduce_next_nonlocal_nextshifts_fromshifts_64.cu index a75a32d0cf..a61abd0e7d 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_reduce_next_nonlocal_nextshifts_fromshifts_64.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_reduce_next_nonlocal_nextshifts_fromshifts_64.cu @@ -6,8 +6,8 @@ // scan_in_array_k = cupy.empty(length, dtype=cupy.int64) // scan_in_array_nullsum = cupy.empty(length, dtype=cupy.int64) // cuda_kernel_templates.get_function(fetch_specialization(["awkward_IndexedArray_reduce_next_nonlocal_nextshifts_fromshifts_64_a", nextshifts.dtype, index.dtype, shifts.dtype]))(grid, block, (nextshifts, index, length, shifts, scan_in_array_k, scan_in_array_nullsum, invocation_index, err_code)) -// scan_in_array_k = inclusive_scan(grid, block, (scan_in_array_k, invocation_index, err_code)) -// scan_in_array_nullsum = inclusive_scan(grid, block, (scan_in_array_nullsum, invocation_index, err_code)) +// scan_in_array_k = cupy.cumsum(scan_in_array_k) +// scan_in_array_nullsum = cupy.cumsum(scan_in_array_nullsum) // cuda_kernel_templates.get_function(fetch_specialization(["awkward_IndexedArray_reduce_next_nonlocal_nextshifts_fromshifts_64_b", nextshifts.dtype, index.dtype, shifts.dtype]))(grid, block, (nextshifts, index, length, shifts, scan_in_array_k, scan_in_array_nullsum, invocation_index, err_code)) // out["awkward_IndexedArray_reduce_next_nonlocal_nextshifts_fromshifts_64_a", {dtype_specializations}] = None // out["awkward_IndexedArray_reduce_next_nonlocal_nextshifts_fromshifts_64_b", {dtype_specializations}] = None @@ -55,8 +55,7 @@ awkward_IndexedArray_reduce_next_nonlocal_nextshifts_fromshifts_64_b( if (thread_id < length) { if (index[thread_id] >= 0) { - nextshifts[scan_in_array_k[thread_id] - 1] = - shifts[thread_id] + (scan_in_array_nullsum[thread_id] - 1); + nextshifts[scan_in_array_k[thread_id] - 1] = shifts[thread_id] + scan_in_array_nullsum[thread_id]; } } } diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedOptionArray_rpad_and_clip_mask_axis1.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedOptionArray_rpad_and_clip_mask_axis1.cu index 5bcfd361c4..854eb3ccbb 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedOptionArray_rpad_and_clip_mask_axis1.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedOptionArray_rpad_and_clip_mask_axis1.cu @@ -5,7 +5,7 @@ // (toindex, frommask, length, invocation_index, err_code) = args // scan_in_array = cupy.empty(length, dtype=cupy.int64) // cuda_kernel_templates.get_function(fetch_specialization(["awkward_IndexedOptionArray_rpad_and_clip_mask_axis1_a", toindex.dtype, frommask.dtype]))(grid, block, (toindex, frommask, length, scan_in_array, invocation_index, err_code)) -// scan_in_array = inclusive_scan(grid, block, (scan_in_array, invocation_index, err_code)) +// scan_in_array = cupy.cumsum(scan_in_array) // cuda_kernel_templates.get_function(fetch_specialization(["awkward_IndexedOptionArray_rpad_and_clip_mask_axis1_b", toindex.dtype, frommask.dtype]))(grid, block, (toindex, frommask, length, scan_in_array, invocation_index, err_code)) // out["awkward_IndexedOptionArray_rpad_and_clip_mask_axis1_a", {dtype_specializations}] = None // out["awkward_IndexedOptionArray_rpad_and_clip_mask_axis1_b", {dtype_specializations}] = None diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_getitem_jagged_carrylen.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_getitem_jagged_carrylen.cu index c81638e0d4..3675674da2 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_getitem_jagged_carrylen.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_getitem_jagged_carrylen.cu @@ -5,7 +5,7 @@ // (carrylen, slicestarts, slicestops, sliceouterlen, invocation_index, err_code) = args // scan_in_array = cupy.empty(sliceouterlen, dtype=cupy.int64) // cuda_kernel_templates.get_function(fetch_specialization(["awkward_ListArray_getitem_jagged_carrylen_a", carrylen.dtype, slicestarts.dtype, slicestops.dtype]))(grid, block, (carrylen, slicestarts, slicestops, sliceouterlen, scan_in_array, invocation_index, err_code)) -// scan_in_array = exclusive_scan(grid, block, (scan_in_array, invocation_index, err_code)) +// scan_in_array = cupy.cumsum(scan_in_array) // cuda_kernel_templates.get_function(fetch_specialization(["awkward_ListArray_getitem_jagged_carrylen_b", carrylen.dtype, slicestarts.dtype, slicestops.dtype]))(grid, block, (carrylen, slicestarts, slicestops, sliceouterlen, scan_in_array, invocation_index, err_code)) // out["awkward_ListArray_getitem_jagged_carrylen_a", {dtype_specializations}] = None // out["awkward_ListArray_getitem_jagged_carrylen_b", {dtype_specializations}] = None diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_getitem_next_range_counts.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_getitem_next_range_counts.cu index 7a3f3d18cd..e524ee546d 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_getitem_next_range_counts.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_getitem_next_range_counts.cu @@ -5,7 +5,7 @@ // (total, fromoffsets, lenstarts, invocation_total, err_code) = args // scan_in_array = cupy.empty(lenstarts, dtype=cupy.int64) // cuda_kernel_templates.get_function(fetch_specialization(["awkward_ListArray_getitem_next_range_counts_a", total.dtype, fromoffsets.dtype]))(grid, block, (total, fromoffsets, lenstarts, scan_in_array, invocation_total, err_code)) -// scan_in_array = exclusive_scan(grid, block, (scan_in_array, invocation_total, err_code)) +// scan_in_array = cupy.cumsum(scan_in_array) // cuda_kernel_templates.get_function(fetch_specialization(["awkward_ListArray_getitem_next_range_counts_b", total.dtype, fromoffsets.dtype]))(grid, block, (total, fromoffsets, lenstarts, scan_in_array, invocation_total, err_code)) // out["awkward_ListArray_getitem_next_range_counts_a", {dtype_specializations}] = None // out["awkward_ListArray_getitem_next_range_counts_b", {dtype_specializations}] = None diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_rpad_and_clip_length_axis1.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_rpad_and_clip_length_axis1.cu index 1ed393afa1..20b8835907 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_rpad_and_clip_length_axis1.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_rpad_and_clip_length_axis1.cu @@ -5,7 +5,7 @@ // (tomin, fromstarts, fromstops, target, lenstarts, invocation_index, err_code) = args // scan_in_array = cupy.empty(lenstarts, dtype=cupy.int64) // cuda_kernel_templates.get_function(fetch_specialization(["awkward_ListArray_rpad_and_clip_length_axis1_a", tomin.dtype, fromstarts.dtype, fromstops.dtype]))(grid, block, (tomin, fromstarts, fromstops, target, lenstarts, scan_in_array, invocation_index, err_code)) -// scan_in_array = exclusive_scan(grid, block, (scan_in_array, invocation_index, err_code)) +// scan_in_array = cupy.cumsum(scan_in_array) // cuda_kernel_templates.get_function(fetch_specialization(["awkward_ListArray_rpad_and_clip_length_axis1_b", tomin.dtype, fromstarts.dtype, fromstops.dtype]))(grid, block, (tomin, fromstarts, fromstops, target, lenstarts, scan_in_array, invocation_index, err_code)) // out["awkward_ListArray_rpad_and_clip_length_axis1_a", {dtype_specializations}] = None // out["awkward_ListArray_rpad_and_clip_length_axis1_b", {dtype_specializations}] = None diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_MaskedArray_getitem_next_jagged_project.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_MaskedArray_getitem_next_jagged_project.cu index 26372ebe46..dfc34956a8 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_MaskedArray_getitem_next_jagged_project.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_MaskedArray_getitem_next_jagged_project.cu @@ -5,7 +5,7 @@ // (index, starts_in, stops_in, starts_out, stops_out, length, invocation_index, err_code) = args // scan_in_array = cupy.empty(length, dtype=cupy.int64) // cuda_kernel_templates.get_function(fetch_specialization(["awkward_MaskedArray_getitem_next_jagged_project_a", index.dtype, starts_in.dtype, stops_in.dtype, starts_out.dtype, stops_out.dtype]))(grid, block, (index, starts_in, stops_in, starts_out, stops_out, length, scan_in_array, invocation_index, err_code)) -// scan_in_array = inclusive_scan(grid, block, (scan_in_array, invocation_index, err_code)) +// scan_in_array = cupy.cumsum(scan_in_array) // cuda_kernel_templates.get_function(fetch_specialization(["awkward_MaskedArray_getitem_next_jagged_project_b", index.dtype, starts_in.dtype, stops_in.dtype, starts_out.dtype, stops_out.dtype]))(grid, block, (index, starts_in, stops_in, starts_out, stops_out, length, scan_in_array, invocation_index, err_code)) // out["awkward_MaskedArray_getitem_next_jagged_project_a", {dtype_specializations}] = None // out["awkward_MaskedArray_getitem_next_jagged_project_b", {dtype_specializations}] = None diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_RegularArray_reduce_local_nextparents.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_RegularArray_reduce_local_nextparents.cu index 5f6b566339..4dffc06d42 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_RegularArray_reduce_local_nextparents.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_RegularArray_reduce_local_nextparents.cu @@ -5,7 +5,7 @@ // (nextparents, size, length, invocation_index, err_code) = args // scan_in_array = cupy.empty(length * size, dtype=cupy.int64) // cuda_kernel_templates.get_function(fetch_specialization(['awkward_RegularArray_reduce_local_nextparents_a', nextparents.dtype]))(grid, block, (nextparents, size, length, scan_in_array, invocation_index, err_code)) -// scan_in_array = exclusive_scan(grid, block, (scan_in_array, invocation_index, err_code)) +// scan_in_array = cupy.cumsum(scan_in_array) // cuda_kernel_templates.get_function(fetch_specialization(['awkward_RegularArray_reduce_local_nextparents_b', nextparents.dtype]))(grid, block, (nextparents, size, length, scan_in_array, invocation_index, err_code)) // out["awkward_RegularArray_reduce_local_nextparents_a", {dtype_specializations}] = None // out["awkward_RegularArray_reduce_local_nextparents_b", {dtype_specializations}] = None diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_RegularArray_reduce_nonlocal_preparenext.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_RegularArray_reduce_nonlocal_preparenext.cu index 2a970ea87d..5daea340d2 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_RegularArray_reduce_nonlocal_preparenext.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_RegularArray_reduce_nonlocal_preparenext.cu @@ -5,7 +5,7 @@ // (nextcarry, nextparents, parents, size, length, invocation_index, err_code) = args // scan_in_array = cupy.empty(length * size, dtype=cupy.int64) // cuda_kernel_templates.get_function(fetch_specialization(['awkward_RegularArray_reduce_nonlocal_preparenext_a', nextcarry.dtype, nextparents.dtype, parents.dtype]))(grid, block, (nextcarry, nextparents, parents, size, length, scan_in_array, invocation_index, err_code)) -// scan_in_array = exclusive_scan(grid, block, (scan_in_array, invocation_index, err_code)) +// scan_in_array = cupy.cumsum(scan_in_array) // cuda_kernel_templates.get_function(fetch_specialization(['awkward_RegularArray_reduce_nonlocal_preparenext_b', nextcarry.dtype, nextparents.dtype, parents.dtype]))(grid, block, (nextcarry, nextparents, parents, size, length, scan_in_array, invocation_index, err_code)) // out["awkward_RegularArray_reduce_nonlocal_preparenext_a", {dtype_specializations}] = None // out["awkward_RegularArray_reduce_nonlocal_preparenext_b", {dtype_specializations}] = None diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_UnionArray_project.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_UnionArray_project.cu index 24ed394fbd..a0a9d7bde6 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_UnionArray_project.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_UnionArray_project.cu @@ -3,7 +3,7 @@ // (lenout, tocarry, fromtags, fromindex, length, which, invocation_index, err_code) = args // scan_in_array = cupy.empty(length, dtype=cupy.int64) // cuda_kernel_templates.get_function(fetch_specialization(["awkward_UnionArray_project_a", lenout.dtype, tocarry.dtype, fromtags.dtype, fromindex.dtype]))(grid, block, (lenout, tocarry, fromtags, fromindex, length, which, scan_in_array, invocation_index, err_code)) -// scan_in_array = inclusive_scan(grid, block, (scan_in_array, invocation_index, err_code)) +// scan_in_array = cupy.cumsum(scan_in_array) // cuda_kernel_templates.get_function(fetch_specialization(["awkward_UnionArray_project_b", lenout.dtype, tocarry.dtype, fromtags.dtype, fromindex.dtype]))(grid, block, (lenout, tocarry, fromtags, fromindex, length, which, scan_in_array, invocation_index, err_code)) // out["awkward_UnionArray_project_a", {dtype_specializations}] = None // out["awkward_UnionArray_project_b", {dtype_specializations}] = None diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_sorting_ranges_length.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_sorting_ranges_length.cu index 4dc66c0fbe..f69c754079 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_sorting_ranges_length.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_sorting_ranges_length.cu @@ -5,7 +5,7 @@ // (tolength, parents, parentslength, invocation_index, err_code) = args // scan_in_array = cupy.empty(parentslength, dtype=cupy.int64) // cuda_kernel_templates.get_function(fetch_specialization(['awkward_sorting_ranges_length_a', tolength.dtype, parents.dtype]))(grid, block, (tolength, parents, parentslength, scan_in_array, invocation_index, err_code)) -// scan_in_array = exclusive_scan(grid, block, (scan_in_array, invocation_index, err_code)) +// scan_in_array = cupy.cumsum(scan_in_array) // cuda_kernel_templates.get_function(fetch_specialization(['awkward_sorting_ranges_length_b', tolength.dtype, parents.dtype]))(grid, block, (tolength, parents, parentslength, scan_in_array, invocation_index, err_code)) // out["awkward_sorting_ranges_length_a", {dtype_specializations}] = None // out["awkward_sorting_ranges_length_b", {dtype_specializations}] = None From 2f9cef3e5065ba488ed648032763dd60c118e4db Mon Sep 17 00:00:00 2001 From: ManasviGoyal Date: Thu, 1 Feb 2024 11:15:21 +0100 Subject: [PATCH 08/18] test: remove XFAIL --- tests-cuda/test_1276_cuda_transfers.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests-cuda/test_1276_cuda_transfers.py b/tests-cuda/test_1276_cuda_transfers.py index 75627b15b5..8886fd7b70 100644 --- a/tests-cuda/test_1276_cuda_transfers.py +++ b/tests-cuda/test_1276_cuda_transfers.py @@ -284,7 +284,6 @@ def test_tocuda_unimplementedkernels14(): assert ak.to_list(copyback_bytemaskedarray) == ak.to_list(bytemaskedarray) -@pytest.mark.xfail(reason="awkward_ListArray_broadcast_tooffsets is not implemented") def test_tocuda_unimplementedkernels15(): ioa = ak.contents.IndexedOptionArray( ak.index.Index32([-30, 19, 6, 7, -3, 21, 13, 22, 17, 9, -12, 16]), From 1b4249903605e2242b34ea0f1a9930c9be83487e Mon Sep 17 00:00:00 2001 From: ManasviGoyal Date: Thu, 1 Feb 2024 16:21:07 +0100 Subject: [PATCH 09/18] fix: check all kernels for length = 0 --- dev/generate-kernel-signatures.py | 2 +- dev/generate-tests.py | 2 +- kernel-test-data.json | 1616 ++++++++++++++--- .../awkward_ByteMaskedArray_numnull.cu | 2 +- ...em_next_missing_jagged_getmaskstartstop.cu | 18 +- .../awkward_IndexedArray_numnull.cu | 2 +- .../awkward_IndexedArray_numnull_parents.cu | 4 +- .../awkward_IndexedArray_numnull_unique_64.cu | 15 - ...kward_ListArray_getitem_jagged_carrylen.cu | 4 +- ...ard_ListArray_getitem_next_range_counts.cu | 2 +- ...rd_ListArray_rpad_and_clip_length_axis1.cu | 2 +- ...Array_getitem_next_range_spreadadvanced.cu | 1 - .../awkward_UnionArray_project.cu | 3 +- .../awkward_sorting_ranges_length.cu | 4 +- 14 files changed, 1434 insertions(+), 243 deletions(-) delete mode 100644 src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_numnull_unique_64.cu diff --git a/dev/generate-kernel-signatures.py b/dev/generate-kernel-signatures.py index 6bd43476cf..6343bfe570 100644 --- a/dev/generate-kernel-signatures.py +++ b/dev/generate-kernel-signatures.py @@ -22,7 +22,7 @@ "awkward_ByteMaskedArray_numnull", "awkward_IndexedArray_numnull", "awkward_IndexedArray_numnull_parents", - "awkward_IndexedArray_numnull_unique_64", + "awkward_IndexedArray_numnull_unique", "awkward_NumpyArray_fill", "awkward_ListArray_fill", "awkward_IndexedArray_fill", diff --git a/dev/generate-tests.py b/dev/generate-tests.py index c3ac69fdf8..ad14a157c7 100644 --- a/dev/generate-tests.py +++ b/dev/generate-tests.py @@ -686,7 +686,7 @@ def gencpuunittests(specdict): "awkward_ByteMaskedArray_numnull", "awkward_IndexedArray_numnull", "awkward_IndexedArray_numnull_parents", - "awkward_IndexedArray_numnull_unique_64", + "awkward_IndexedArray_numnull_unique", "awkward_NumpyArray_fill", "awkward_ListArray_fill", "awkward_IndexedArray_fill", diff --git a/kernel-test-data.json b/kernel-test-data.json index b73947ab77..610b424d41 100644 --- a/kernel-test-data.json +++ b/kernel-test-data.json @@ -895,6 +895,17 @@ "name": "awkward_index_rpad_and_clip_axis0", "status": true, "tests": [ + { + "error": false, + "message": "", + "inputs": { + "length": 0, + "target": 2 + }, + "outputs": { + "toindex": [] + } + }, { "error": false, "message": "", @@ -1033,6 +1044,19 @@ "name": "awkward_BitMaskedArray_to_ByteMaskedArray", "status": true, "tests": [ + { + "error": false, + "message": "", + "inputs": { + "bitmasklength": 0, + "frombitmask": [], + "lsb_order": false, + "validwhen": false + }, + "outputs": { + "tobytemask": [] + } + }, { "error": false, "message": "", @@ -1078,6 +1102,18 @@ "name": "awkward_ByteMaskedArray_getitem_nextcarry", "status": true, "tests": [ + { + "error": false, + "message": "", + "inputs": { + "length": 0, + "mask": [], + "validwhen": false + }, + "outputs": { + "tocarry": [] + } + }, { "error": false, "message": "", @@ -1108,6 +1144,19 @@ "name": "awkward_ByteMaskedArray_getitem_nextcarry_outindex", "status": true, "tests": [ + { + "error": false, + "message": "", + "inputs": { + "length": 0, + "mask": [], + "validwhen": true + }, + "outputs": { + "outindex": [], + "tocarry": [] + } + }, { "error": false, "message": "", @@ -1140,6 +1189,18 @@ "name": "awkward_ByteMaskedArray_numnull", "status": true, "tests": [ + { + "error": false, + "message": "", + "inputs": { + "length": 0, + "mask": [], + "validwhen": false + }, + "outputs": { + "numnull": [0] + } + }, { "error": false, "message": "", @@ -1350,6 +1411,18 @@ "name": "awkward_ByteMaskedArray_toIndexedOptionArray", "status": true, "tests": [ + { + "error": false, + "message": "", + "inputs": { + "length": 0, + "mask": [], + "validwhen": false + }, + "outputs": { + "toindex": [] + } + }, { "error": false, "message": "", @@ -1380,6 +1453,30 @@ "name": "awkward_IndexedArray_flatten_nextcarry", "status": true, "tests": [ + { + "error": false, + "message": "", + "inputs": { + "fromindex": [], + "lencontent": 0, + "lenindex": 0 + }, + "outputs": { + "tocarry": [] + } + }, + { + "error": true, + "message": "index out of range", + "inputs": { + "fromindex": [0, 1], + "lencontent": 0, + "lenindex": 2 + }, + "outputs": { + "tocarry": [] + } + }, { "error": true, "message": "index out of range", @@ -1408,8 +1505,32 @@ }, { "name": "awkward_IndexedArray_getitem_nextcarry", - "status": false, + "status": true, "tests": [ + { + "error": false, + "message": "", + "inputs": { + "fromindex": [], + "lencontent": 0, + "lenindex": 0 + }, + "outputs": { + "tocarry": [] + } + }, + { + "error": true, + "message": "index out of range", + "inputs": { + "fromindex": [0, 1], + "lencontent": 0, + "lenindex": 2 + }, + "outputs": { + "tocarry": [] + } + }, { "error": true, "message": "index out of range", @@ -1750,8 +1871,34 @@ }, { "name": "awkward_IndexedArray_getitem_nextcarry_outindex", - "status": false, + "status": true, "tests": [ + { + "error": false, + "message": "", + "inputs": { + "fromindex": [], + "lencontent": 0, + "lenindex": 0 + }, + "outputs": { + "tocarry": [], + "toindex": [] + } + }, + { + "error": true, + "message": "index out of range", + "inputs": { + "fromindex": [0, 1, 2, 4], + "lencontent": 0, + "lenindex": 4 + }, + "outputs": { + "tocarry": [], + "toindex": [] + } + }, { "error": true, "message": "index out of range", @@ -1797,6 +1944,17 @@ "name": "awkward_IndexedArray_numnull", "status": true, "tests": [ + { + "error": false, + "message": "", + "inputs": { + "fromindex": [], + "lenindex": 0 + }, + "outputs": { + "numnull": [0] + } + }, { "error": false, "message": "", @@ -1986,6 +2144,17 @@ "name": "awkward_IndexedArray_numnull_parents", "status": true, "tests": [ + { + "error": false, + "inputs": { + "fromindex": [], + "lenindex": 0 + }, + "outputs": { + "numnull": [], + "tolength": [0] + } + }, { "error": false, "inputs": { @@ -2066,7 +2235,7 @@ ] }, { - "name": "awkward_IndexedArray_numnull_unique_64", + "name": "awkward_IndexedArray_numnull_unique", "status": true, "tests": [ { @@ -2125,6 +2294,18 @@ "name": "awkward_IndexedArray_overlay_mask", "status": true, "tests": [ + { + "error": false, + "message": "", + "inputs": { + "fromindex": [], + "length": 0, + "mask": [] + }, + "outputs": { + "toindex": [] + } + }, { "error": false, "message": "", @@ -4431,6 +4612,39 @@ "name": "awkward_RegularArray_reduce_local_nextparents", "status": true, "tests": [ + { + "error": false, + "message": "", + "inputs": { + "size": 3, + "length": 0 + }, + "outputs": { + "nextparents": [] + } + }, + { + "error": false, + "message": "", + "inputs": { + "size": 0, + "length": 0 + }, + "outputs": { + "nextparents": [] + } + }, + { + "error": false, + "message": "", + "inputs": { + "size": 0, + "length": 2 + }, + "outputs": { + "nextparents": [] + } + }, { "error": false, "message": "", @@ -4459,6 +4673,45 @@ "name": "awkward_RegularArray_reduce_nonlocal_preparenext", "status": true, "tests": [ + { + "error": false, + "message": "", + "inputs": { + "parents": [], + "size": 3, + "length": 0 + }, + "outputs": { + "nextcarry": [], + "nextparents": [] + } + }, + { + "error": false, + "message": "", + "inputs": { + "parents": [], + "size": 0, + "length": 0 + }, + "outputs": { + "nextcarry": [], + "nextparents": [] + } + }, + { + "error": false, + "message": "", + "inputs": { + "parents": [0, 1], + "size": 0, + "length": 2 + }, + "outputs": { + "nextcarry": [], + "nextparents": [] + } + }, { "error": false, "message": "", @@ -4508,40 +4761,33 @@ "error": false, "message": "", "inputs": { - "length": 2, + "length": 0, "size": 3 }, "outputs": { - "toindex": [0, 1, 2, 0, 1, 2] + "toindex": [] } }, { "error": false, "message": "", "inputs": { - "length": 6, - "size": 5 + "length": 0, + "size": 0 }, "outputs": { - "toindex": [0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4] + "toindex": [] } - } - ] - }, - { - "name": "awkward_RegularArray_rpad_and_clip_axis1", - "status": true, - "tests": [ + }, { "error": false, "message": "", "inputs": { - "length": 3, - "size": 2, - "target": 2 + "length": 2, + "size": 0 }, "outputs": { - "toindex": [0, 1, 2, 3, 4, 5] + "toindex": [] } }, { @@ -4549,7 +4795,95 @@ "message": "", "inputs": { "length": 2, - "size": 3, + "size": 3 + }, + "outputs": { + "toindex": [0, 1, 2, 0, 1, 2] + } + }, + { + "error": false, + "message": "", + "inputs": { + "length": 6, + "size": 5 + }, + "outputs": { + "toindex": [0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4] + } + } + ] + }, + { + "name": "awkward_RegularArray_rpad_and_clip_axis1", + "status": true, + "tests": [ + { + "error": false, + "message": "", + "inputs": { + "length": 3, + "size": 0, + "target": 2 + }, + "outputs": { + "toindex": [-1, -1, -1, -1, -1, -1] + } + }, + { + "error": false, + "message": "", + "inputs": { + "length": 0, + "size": 0, + "target": 0 + }, + "outputs": { + "toindex": [] + } + }, + { + "error": false, + "message": "", + "inputs": { + "length": 0, + "size": 0, + "target": 2 + }, + "outputs": { + "toindex": [] + } + }, + { + "error": false, + "message": "", + "inputs": { + "length": 0, + "size": 2, + "target": 2 + }, + "outputs": { + "toindex": [] + } + }, + { + "error": false, + "message": "", + "inputs": { + "length": 3, + "size": 2, + "target": 2 + }, + "outputs": { + "toindex": [0, 1, 2, 3, 4, 5] + } + }, + { + "error": false, + "message": "", + "inputs": { + "length": 2, + "size": 3, "target": 3 }, "outputs": { @@ -4622,6 +4956,42 @@ "name": "awkward_RegularArray_getitem_carry", "status": true, "tests": [ + { + "error": false, + "message": "", + "inputs": { + "fromcarry": [0, 0], + "lencarry": 2, + "size": 0 + }, + "outputs": { + "tocarry": [] + } + }, + { + "error": false, + "message": "", + "inputs": { + "fromcarry": [], + "lencarry": 0, + "size": 1 + }, + "outputs": { + "tocarry": [] + } + }, + { + "error": false, + "message": "", + "inputs": { + "fromcarry": [], + "lencarry": 0, + "size": 0 + }, + "outputs": { + "tocarry": [] + } + }, { "error": false, "message": "", @@ -5156,6 +5526,71 @@ "name": "awkward_RegularArray_getitem_jagged_expand", "status": true, "tests": [ + { + "error": false, + "message": "", + "inputs": { + "regularlength": 0, + "regularsize": 0, + "singleoffsets": [0] + }, + "outputs": { + "multistarts": [], + "multistops": [] + } + }, + { + "error": false, + "message": "", + "inputs": { + "regularlength": 1, + "regularsize": 0, + "singleoffsets": [1] + }, + "outputs": { + "multistarts": [], + "multistops": [] + } + }, + { + "error": false, + "message": "", + "inputs": { + "regularlength": 0, + "regularsize": 0, + "singleoffsets": [0] + }, + "outputs": { + "multistarts": [], + "multistops": [] + } + }, + { + "error": false, + "message": "", + "inputs": { + "regularlength": 1, + "regularsize": 1, + "singleoffsets": [0, 2] + }, + "outputs": { + "multistarts": [0], + "multistops": [2] + } + }, + { + "error": false, + "message": "", + "inputs": { + "regularlength": 2, + "regularsize": 1, + "singleoffsets": [0, 2] + }, + "outputs": { + "multistarts": [0, 0], + "multistops": [2, 2] + } + }, { "error": false, "message": "", @@ -8921,6 +9356,18 @@ "name": "awkward_ListArray_getitem_next_range_spreadadvanced", "status": true, "tests": [ + { + "error": false, + "message": "", + "inputs": { + "fromadvanced": [], + "fromoffsets": [0], + "lenstarts": 0 + }, + "outputs": { + "toadvanced": [] + } + }, { "error": false, "message": "", @@ -8963,6 +9410,42 @@ "name": "awkward_RegularArray_getitem_next_range_spreadadvanced", "status": true, "tests": [ + { + "error": false, + "message": "", + "inputs": { + "fromadvanced": [], + "length": 0, + "nextsize": 2 + }, + "outputs": { + "toadvanced": [] + } + }, + { + "error": false, + "message": "", + "inputs": { + "fromadvanced": [], + "length": 1, + "nextsize": 0 + }, + "outputs": { + "toadvanced": [] + } + }, + { + "error": false, + "message": "", + "inputs": { + "fromadvanced": [], + "length": 0, + "nextsize": 0 + }, + "outputs": { + "toadvanced": [] + } + }, { "error": false, "message": "", @@ -8999,11 +9482,11 @@ "inputs": { "inneroffsets": [0], "inneroffsetslen": 1, - "outeroffsets": [0, 0, 0, 0, 0], - "outeroffsetslen": 5 + "outeroffsets": [], + "outeroffsetslen": 0 }, "outputs": { - "tooffsets": [0, 0, 0, 0, 0] + "tooffsets": [] } }, { @@ -9424,6 +9907,20 @@ "name": "awkward_MaskedArray_getitem_next_jagged_project", "status": true, "tests": [ + { + "error": false, + "message": "", + "inputs": { + "index": [], + "length": 0, + "starts_in": [], + "stops_in": [] + }, + "outputs": { + "starts_out": [], + "stops_out": [] + } + }, { "error": false, "message": "", @@ -9888,6 +10385,19 @@ "name": "awkward_ByteMaskedArray_overlay_mask", "status": true, "tests": [ + { + "error": false, + "message": "", + "inputs": { + "length": 0, + "mymask": [], + "theirmask": [], + "validwhen": false + }, + "outputs": { + "tomask": [] + } + }, { "error": false, "message": "", @@ -9900,60 +10410,112 @@ "outputs": { "tomask": [0, 0] } - } - ] - }, - { - "name": "awkward_IndexedArray_flatten_none2empty", - "status": false, - "tests": [ + }, { "error": false, "message": "", "inputs": { - "offsets": [0, 1, 1, 6], - "offsetslength": 4, - "outindex": [0, 1, 2, 1], - "outindexlength": 4 + "length": 2, + "mymask": [0, 0], + "theirmask": [0, 0], + "validwhen": true }, "outputs": { - "outoffsets": [0, 1, 1, 6, 6] + "tomask": [1, 1] } }, { "error": false, "message": "", "inputs": { - "offsets": [0, 3, 3, 5], - "offsetslength": 4, - "outindex": [0, 1, 1, 1, 2], - "outindexlength": 5 + "length": 3, + "mymask": [1, 0], + "theirmask": [0, 1], + "validwhen": false }, "outputs": { - "outoffsets": [0, 3, 3, 3, 3, 5] + "tomask": [1, 1] } }, { "error": false, "message": "", "inputs": { - "offsets": [0, 3, 3, 4, 7], - "offsetslength": 5, - "outindex": [0, 1, 2, 1, 3], - "outindexlength": 5 + "length": 3, + "mymask": [0, 0], + "theirmask": [0, 1], + "validwhen": false }, "outputs": { - "outoffsets": [0, 3, 3, 4, 4, 7] + "tomask": [0, 1] } }, { "error": false, "message": "", "inputs": { - "offsets": [0, 3, 3, 5, 6, 6, 10], - "offsetslength": 7, - "outindex": [0, 1, 2, 3, 4, 1, 5], - "outindexlength": 7 + "length": 3, + "mymask": [1, 0], + "theirmask": [0, 0], + "validwhen": false + }, + "outputs": { + "tomask": [1, 0] + } + } + ] + }, + { + "name": "awkward_IndexedArray_flatten_none2empty", + "status": false, + "tests": [ + { + "error": false, + "message": "", + "inputs": { + "offsets": [0, 1, 1, 6], + "offsetslength": 4, + "outindex": [0, 1, 2, 1], + "outindexlength": 4 + }, + "outputs": { + "outoffsets": [0, 1, 1, 6, 6] + } + }, + { + "error": false, + "message": "", + "inputs": { + "offsets": [0, 3, 3, 5], + "offsetslength": 4, + "outindex": [0, 1, 1, 1, 2], + "outindexlength": 5 + }, + "outputs": { + "outoffsets": [0, 3, 3, 3, 3, 5] + } + }, + { + "error": false, + "message": "", + "inputs": { + "offsets": [0, 3, 3, 4, 7], + "offsetslength": 5, + "outindex": [0, 1, 2, 1, 3], + "outindexlength": 5 + }, + "outputs": { + "outoffsets": [0, 3, 3, 4, 4, 7] + } + }, + { + "error": false, + "message": "", + "inputs": { + "offsets": [0, 3, 3, 5, 6, 6, 10], + "offsetslength": 7, + "outindex": [0, 1, 2, 3, 4, 1, 5], + "outindexlength": 7 }, "outputs": { "outoffsets": [0, 3, 3, 5, 6, 6, 6, 10] @@ -10002,8 +10564,22 @@ }, { "name": "awkward_IndexedArray_reduce_next_64", - "status": false, + "status": true, "tests": [ + { + "error": false, + "message": "", + "inputs": { + "index": [], + "length": 0, + "parents": [] + }, + "outputs": { + "nextcarry": [], + "nextparents": [], + "outindex": [] + } + }, { "error": false, "message": "", @@ -10164,6 +10740,45 @@ "name": "awkward_IndexedArray_simplify", "status": true, "tests": [ + { + "error": true, + "message": "index out of range", + "inputs": { + "innerindex": [], + "innerlength": 0, + "outerindex": [0, 3], + "outerlength": 2 + }, + "outputs": { + "toindex": [] + } + }, + { + "error": false, + "message": "", + "inputs": { + "innerindex": [], + "innerlength": 0, + "outerindex": [], + "outerlength": 0 + }, + "outputs": { + "toindex": [] + } + }, + { + "error": false, + "message": "", + "inputs": { + "innerindex": [0, 1], + "innerlength": 2, + "outerindex": [], + "outerlength": 0 + }, + "outputs": { + "toindex": [123, 123] + } + }, { "error": true, "message": "index out of range", @@ -10625,6 +11240,39 @@ "name": "awkward_IndexedArray_validity", "status": true, "tests": [ + { + "error": false, + "message": "", + "inputs": { + "index": [], + "isoption": true, + "lencontent": 3, + "length": 0 + }, + "outputs": {} + }, + { + "error": false, + "message": "", + "inputs": { + "index": [], + "isoption": true, + "lencontent": 0, + "length": 0 + }, + "outputs": {} + }, + { + "error": true, + "message": "index[i] >= len(content)", + "inputs": { + "index": [0, 1, 1, 1, 1, 3], + "isoption": true, + "lencontent": 0, + "length": 6 + }, + "outputs": {} + }, { "error": true, "message": "index[i] >= len(content)", @@ -11071,6 +11719,17 @@ "name": "awkward_IndexedOptionArray_rpad_and_clip_mask_axis1", "status": true, "tests": [ + { + "error": false, + "message": "", + "inputs": { + "frommask": [], + "length": 0 + }, + "outputs": { + "toindex": [] + } + }, { "error": false, "message": "", @@ -11088,6 +11747,18 @@ "name": "awkward_index_rpad_and_clip_axis1", "status": true, "tests": [ + { + "error": false, + "message": "", + "inputs": { + "length": 0, + "target": 1 + }, + "outputs": { + "tostarts": [], + "tostops": [] + } + }, { "error": false, "message": "", @@ -11359,8 +12030,56 @@ }, { "name": "awkward_ListArray_getitem_jagged_carrylen", - "status": false, + "status": true, "tests": [ + { + "error": false, + "message": "", + "inputs": { + "slicestarts": [], + "slicestops": [], + "sliceouterlen": 0 + }, + "outputs": { + "carrylen": [0] + } + }, + { + "error": false, + "message": "", + "inputs": { + "slicestarts": [0, 2], + "slicestops": [0, 2], + "sliceouterlen": 2 + }, + "outputs": { + "carrylen": [0] + } + }, + { + "error": false, + "message": "", + "inputs": { + "slicestarts": [2], + "slicestops": [4], + "sliceouterlen": 1 + }, + "outputs": { + "carrylen": [2] + } + }, + { + "error": false, + "message": "", + "inputs": { + "slicestarts": [1], + "slicestops": [1], + "sliceouterlen": 1 + }, + "outputs": { + "carrylen": [0] + } + }, { "error": false, "message": "", @@ -11846,137 +12565,71 @@ ] }, { - "name": "awkward_ListArray_getitem_jagged_carrylen", - "status": true, + "name": "awkward_ListArray_getitem_jagged_expand", + "status": false, "tests": [ { - "error": false, - "message": "", + "error": true, + "message": "cannot fit jagged slice into nested list", "inputs": { - "slicestarts": [0, 2], - "slicestops": [0, 2], - "sliceouterlen": 2 + "fromstarts": [0, 2], + "fromstops": [2, 4], + "jaggedsize": 1, + "length": 2, + "singleoffsets": [0, 3, 4] }, "outputs": { - "carrylen": [0] + "multistarts": [0, 3, 0, 3], + "multistops": [3, 4, 3, 4], + "tocarry": [0, 1, 2, 3] } }, { - "error": false, - "message": "", + "error": true, + "message": "stops[i] < starts[i]", "inputs": { - "slicestarts": [0, 2], - "slicestops": [2, 5], - "sliceouterlen": 2 + "fromstarts": [5], + "fromstops": [4], + "jaggedsize": 2, + "length": 1, + "singleoffsets": [0, 3, 4] }, "outputs": { - "carrylen": [5] + "multistarts": [0, 3], + "multistops": [3, 4], + "tocarry": [2, 3] } }, { "error": false, "message": "", "inputs": { - "slicestarts": [2], - "slicestops": [4], - "sliceouterlen": 1 + "fromstarts": [0, 2], + "fromstops": [2, 4], + "jaggedsize": 2, + "length": 2, + "singleoffsets": [0, 3, 4] }, "outputs": { - "carrylen": [2] + "multistarts": [0, 3, 0, 3], + "multistops": [3, 4, 3, 4], + "tocarry": [0, 1, 2, 3] } }, { "error": false, "message": "", "inputs": { - "slicestarts": [0, 1, 3, 5, 7], - "slicestops": [1, 3, 5, 7, 9], - "sliceouterlen": 5 + "fromstarts": [2], + "fromstops": [4], + "jaggedsize": 2, + "length": 1, + "singleoffsets": [0, 3, 4] }, "outputs": { - "carrylen": [9] - } - }, - { - "error": false, - "message": "", - "inputs": { - "slicestarts": [1], - "slicestops": [1], - "sliceouterlen": 1 - }, - "outputs": { - "carrylen": [0] - } - } - ] - }, - { - "name": "awkward_ListArray_getitem_jagged_expand", - "status": false, - "tests": [ - { - "error": true, - "message": "cannot fit jagged slice into nested list", - "inputs": { - "fromstarts": [0, 2], - "fromstops": [2, 4], - "jaggedsize": 1, - "length": 2, - "singleoffsets": [0, 3, 4] - }, - "outputs": { - "multistarts": [0, 3, 0, 3], - "multistops": [3, 4, 3, 4], - "tocarry": [0, 1, 2, 3] - } - }, - { - "error": true, - "message": "stops[i] < starts[i]", - "inputs": { - "fromstarts": [5], - "fromstops": [4], - "jaggedsize": 2, - "length": 1, - "singleoffsets": [0, 3, 4] - }, - "outputs": { - "multistarts": [0, 3], - "multistops": [3, 4], - "tocarry": [2, 3] - } - }, - { - "error": false, - "message": "", - "inputs": { - "fromstarts": [0, 2], - "fromstops": [2, 4], - "jaggedsize": 2, - "length": 2, - "singleoffsets": [0, 3, 4] - }, - "outputs": { - "multistarts": [0, 3, 0, 3], - "multistops": [3, 4, 3, 4], - "tocarry": [0, 1, 2, 3] - } - }, - { - "error": false, - "message": "", - "inputs": { - "fromstarts": [2], - "fromstops": [4], - "jaggedsize": 2, - "length": 1, - "singleoffsets": [0, 3, 4] - }, - "outputs": { - "multistarts": [0, 3], - "multistops": [3, 4], - "tocarry": [2, 3] + "multistarts": [0, 3], + "multistops": [3, 4], + "tocarry": [2, 3] } } ] @@ -12173,6 +12826,19 @@ "name": "awkward_ListArray_rpad_and_clip_length_axis1", "status": true, "tests": [ + { + "error": false, + "message": "", + "inputs": { + "fromstarts": [], + "fromstops": [], + "lenstarts": 0, + "target": 1 + }, + "outputs": { + "tomin": [0] + } + }, { "error": false, "message": "", @@ -12322,6 +12988,39 @@ "name": "awkward_ListArray_validity", "status": true, "tests": [ + { + "error": false, + "message": "", + "inputs": { + "lencontent": 0, + "length": 0, + "starts": [], + "stops": [] + }, + "outputs": {} + }, + { + "error": false, + "message": "", + "inputs": { + "lencontent": 2, + "length": 0, + "starts": [], + "stops": [] + }, + "outputs": {} + }, + { + "error": true, + "message": "stop[i] > len(content)", + "inputs": { + "lencontent": 0, + "length": 3, + "starts": [0, 0, 1], + "stops": [0, 1, 5] + }, + "outputs": {} + }, { "error": true, "message": "start[i] > stop[i]", @@ -13565,6 +14264,19 @@ "name": "awkward_IndexedArray_fill", "status": true, "tests": [ + { + "error": false, + "message": "", + "inputs": { + "base": 0, + "fromindex": [], + "length": 0, + "toindexoffset": 0 + }, + "outputs": { + "toindex": [] + } + }, { "error": false, "message": "", @@ -13636,6 +14348,18 @@ "name": "awkward_IndexedArray_fill_count", "status": true, "tests": [ + { + "error": false, + "message": "", + "inputs": { + "base": 0, + "length": 0, + "toindexoffset": 0 + }, + "outputs": { + "toindex": [] + } + }, { "error": false, "message": "", @@ -13690,6 +14414,22 @@ "name": "awkward_ListArray_fill", "status": true, "tests": [ + { + "error": false, + "message": "", + "inputs": { + "base": 0, + "fromstarts": [], + "fromstops": [], + "length": 0, + "tostartsoffset": 0, + "tostopsoffset": 0 + }, + "outputs": { + "tostarts": [], + "tostops": [] + } + }, { "error": false, "message": "", @@ -13868,6 +14608,18 @@ "toindex": [0, 0, 1, 1] } }, + { + "error": false, + "message": "", + "inputs": { + "fromindex": [], + "length": 0, + "toindexoffset": 0 + }, + "outputs": { + "toindex": [] + } + }, { "error": false, "message": "", @@ -13905,6 +14657,18 @@ "name": "awkward_UnionArray_validity", "status": true, "tests": [ + { + "error": false, + "message": "", + "inputs": { + "index": [], + "lencontents": [], + "length": 0, + "numcontents": 2, + "tags": [] + }, + "outputs": {} + }, { "error": true, "message": "tags[i] < 0", @@ -13969,86 +14733,337 @@ "error": false, "message": "", "inputs": { - "index": [0, 1, 2, 0, 1, 2, 3], - "lencontents": [3, 4], - "length": 7, - "numcontents": 2, - "tags": [0, 0, 0, 1, 1, 1, 1] + "index": [0, 1, 2, 0, 1, 2, 3], + "lencontents": [3, 4], + "length": 7, + "numcontents": 2, + "tags": [0, 0, 0, 1, 1, 1, 1] + }, + "outputs": {} + }, + { + "error": false, + "message": "", + "inputs": { + "index": [0, 1, 0, 1, 2, 3], + "lencontents": [2, 4, 32, 49, 0, 0], + "length": 6, + "numcontents": 2, + "tags": [0, 0, 1, 1, 1, 1] + }, + "outputs": {} + }, + { + "error": false, + "message": "", + "inputs": { + "index": [0, 0, 1, 1, 2, 3, 2, 4], + "lencontents": [5, 3, 32, 33], + "length": 8, + "numcontents": 2, + "tags": [0, 1, 1, 0, 0, 0, 1, 0] + }, + "outputs": {} + }, + { + "error": false, + "message": "", + "inputs": { + "index": [0, 0, 1, 1, 2, 3, 2, 4], + "lencontents": [5, 3, 32, 625, 0, 0, 0], + "length": 8, + "numcontents": 2, + "tags": [0, 1, 1, 0, 0, 0, 1, 0] + }, + "outputs": {} + }, + { + "error": false, + "message": "", + "inputs": { + "index": [0, 0, 1, 1, 2, 2, 3], + "lencontents": [3, 4, 32, 177], + "length": 7, + "numcontents": 2, + "tags": [0, 1, 1, 0, 0, 1, 1] + }, + "outputs": {} + } + ] + }, + { + "name": "awkward_ByteMaskedArray_reduce_next_nonlocal_nextshifts_fromshifts_64", + "status": true, + "tests": [ + { + "error": false, + "message": "", + "inputs": { + "length": 0, + "mask": [], + "valid_when": false, + "shifts": [] + }, + "outputs": { + "nextshifts": [] + } + }, + { + "error": false, + "message": "", + "inputs": { + "length": 7, + "mask": [0, 0, 0, 1, 1, 0, 0], + "valid_when": false, + "shifts": [0, 1, 1, 0, 1, 1, 0] + }, + "outputs": { + "nextshifts": [0, 1, 1, 3, 2] + } + }, + { + "error": false, + "message": "", + "inputs": { + "length": 1, + "mask": [0], + "valid_when": false, + "shifts": [0] + }, + "outputs": { + "nextshifts": [0] + } + }, + { + "error": false, + "message": "", + "inputs": { + "length": 1, + "mask": [0], + "valid_when": false, + "shifts": [1] + }, + "outputs": { + "nextshifts": [1] + } + }, + { + "error": false, + "message": "", + "inputs": { + "length": 1, + "mask": [0], + "valid_when": true, + "shifts": [1] + }, + "outputs": { + "nextshifts": [123] + } + }, + { + "error": false, + "message": "", + "inputs": { + "length": 7, + "mask": [0, 0, 0, 1, 1, 0, 0], + "valid_when": true, + "shifts": [0, 1, 1, 0, 1, 1, 0] + }, + "outputs": { + "nextshifts": [3, 4] + } + }, + { + "error": false, + "message": "", + "inputs": { + "length": 5, + "mask": [0, 1, 0, 1, 1], + "valid_when": true, + "shifts": [0, 0, 1, 0, 0] + }, + "outputs": { + "nextshifts": [1, 2, 2] + } + } + ] + }, + { + "name": "awkward_ByteMaskedArray_reduce_next_nonlocal_nextshifts_64", + "status": true, + "tests": [ + { + "error": false, + "message": "", + "inputs": { + "length": 0, + "mask": [], + "valid_when": false + }, + "outputs": { + "nextshifts": [] + } + }, + { + "error": false, + "message": "", + "inputs": { + "length": 7, + "mask": [0, 0, 0, 1, 1, 0, 0], + "valid_when": false + }, + "outputs": { + "nextshifts": [0, 0, 0, 2, 2] + } + }, + { + "error": false, + "message": "", + "inputs": { + "length": 1, + "mask": [0], + "valid_when": false + }, + "outputs": { + "nextshifts": [0] + } + }, + { + "error": false, + "message": "", + "inputs": { + "length": 1, + "mask": [0], + "valid_when": true + }, + "outputs": { + "nextshifts": [123] + } + }, + { + "error": false, + "message": "", + "inputs": { + "length": 7, + "mask": [0, 0, 0, 1, 1, 0, 0], + "valid_when": true + }, + "outputs": { + "nextshifts": [3, 3] + } + }, + { + "error": false, + "message": "", + "inputs": { + "length": 5, + "mask": [0, 1, 0, 1, 1], + "valid_when": true + }, + "outputs": { + "nextshifts": [1, 2, 2] + } + } + ] + }, + { + "name": "awkward_ByteMaskedArray_reduce_next_64", + "status": true, + "tests": [ + { + "error": false, + "message": "", + "inputs": { + "mask": [], + "parents": [], + "length": 0, + "validwhen": false }, - "outputs": {} + "outputs": { + "nextcarry": [], + "nextparents": [], + "outindex": [] + } }, { "error": false, "message": "", "inputs": { - "index": [0, 1, 0, 1, 2, 3], - "lencontents": [2, 4, 32, 49, 0, 0], - "length": 6, - "numcontents": 2, - "tags": [0, 0, 1, 1, 1, 1] + "mask": [0, 0, 0, 1, 1, 0, 0], + "parents": [0, 0, 1, 1, 2, 2, 2], + "length": 7, + "validwhen": false }, - "outputs": {} + "outputs": { + "nextcarry": [0, 1, 2, 5, 6], + "nextparents": [0, 0, 1, 2, 2], + "outindex": [0, 1, 2, -1, -1, 3, 4] + } }, { "error": false, "message": "", "inputs": { - "index": [0, 0, 1, 1, 2, 3, 2, 4], - "lencontents": [5, 3, 32, 33], - "length": 8, - "numcontents": 2, - "tags": [0, 1, 1, 0, 0, 0, 1, 0] + "mask": [0], + "parents": [2], + "length": 1, + "validwhen": false }, - "outputs": {} + "outputs": { + "nextcarry": [0], + "nextparents": [2], + "outindex": [0] + } }, { "error": false, "message": "", "inputs": { - "index": [0, 0, 1, 1, 2, 3, 2, 4], - "lencontents": [5, 3, 32, 625, 0, 0, 0], - "length": 8, - "numcontents": 2, - "tags": [0, 1, 1, 0, 0, 0, 1, 0] + "mask": [1], + "parents": [1], + "length": 1, + "validwhen": false }, - "outputs": {} + "outputs": { + "nextcarry": [123], + "nextparents": [123], + "outindex": [-1] + } }, { "error": false, "message": "", "inputs": { - "index": [0, 0, 1, 1, 2, 2, 3], - "lencontents": [3, 4, 32, 177], - "length": 7, - "numcontents": 2, - "tags": [0, 1, 1, 0, 0, 1, 1] + "mask": [0, 1, 0, 1, 1], + "parents": [0, 0, 1, 1, 1], + "length": 5, + "validwhen": true }, - "outputs": {} + "outputs": { + "nextcarry": [1, 3, 4], + "nextparents": [0, 1, 1], + "outindex": [-1, 0, -1, 1, 2] + } } ] }, { - "name": "awkward_ByteMaskedArray_reduce_next_nonlocal_nextshifts_64", - "status": false, + "name": "awkward_IndexedArray_index_of_nulls", + "status": true, "tests": [ { "error": false, "message": "", "inputs": { - "length": 7, - "mask": [0, 0, 0, 1, 1, 0, 0], - "valid_when": false + "fromindex": [], + "lenindex": 0, + "parents": [], + "starts": [] }, "outputs": { - "nextshifts": [0, 0, 0, 2, 2] + "toindex": [] } - } - ] - }, - { - "name": "awkward_IndexedArray_index_of_nulls", - "status": true, - "tests": [ + }, { "error": false, "message": "", @@ -14273,8 +15288,19 @@ }, { "name": "awkward_IndexedArray_reduce_next_nonlocal_nextshifts_64", - "status": false, + "status": true, "tests": [ + { + "error": false, + "message": "", + "inputs": { + "index": [], + "length": 0 + }, + "outputs": { + "nextshifts": [] + } + }, { "error": false, "message": "", @@ -14389,8 +15415,20 @@ }, { "name": "awkward_IndexedArray_reduce_next_nonlocal_nextshifts_fromshifts_64", - "status": false, + "status": true, "tests": [ + { + "error": false, + "message": "", + "inputs": { + "index": [], + "length": 0, + "shifts": [] + }, + "outputs": { + "nextshifts": [] + } + }, { "error": false, "message": "", @@ -15279,6 +16317,19 @@ "name": "awkward_ListArray_getitem_next_at", "status": true, "tests": [ + { + "error": false, + "message": "", + "inputs": { + "at": -2, + "fromstarts": [], + "fromstops": [], + "lenstarts": 0 + }, + "outputs": { + "tocarry": [] + } + }, { "error": true, "message": "index out of range", @@ -15974,6 +17025,17 @@ "name": "awkward_ListArray_getitem_next_range_counts", "status": true, "tests": [ + { + "error": false, + "message": "", + "inputs": { + "fromoffsets": [], + "lenstarts": 0 + }, + "outputs": { + "total": [0] + } + }, { "error": false, "message": "", @@ -16151,6 +17213,51 @@ "name": "awkward_ListArray_rpad_axis1", "status": true, "tests": [ + { + "error": false, + "message": "", + "inputs": { + "fromstarts": [], + "fromstops": [], + "length": 0, + "target": 4 + }, + "outputs": { + "toindex": [], + "tostarts": [], + "tostops": [] + } + }, + { + "error": false, + "message": "", + "inputs": { + "fromstarts": [], + "fromstops": [], + "length": 0, + "target": 0 + }, + "outputs": { + "toindex": [], + "tostarts": [], + "tostops": [] + } + }, + { + "error": false, + "message": "", + "inputs": { + "fromstarts": [0, 3, 4, 5, 8], + "fromstops": [3, 3, 6, 8, 9], + "length": 5, + "target": 0 + }, + "outputs": { + "toindex": [0, 1, 2, 4, 5, 5, 6, 7, 8, 123, 123], + "tostarts": [0, 3, 3, 5, 8], + "tostops": [3, 3, 5, 8, 9] + } + }, { "error": false, "message": "", @@ -17830,6 +18937,18 @@ "toptr": [0, 1, 3] } }, + { + "error": false, + "message": "", + "inputs": { + "tooffset": 0, + "fromptr": [], + "length": 0 + }, + "outputs": { + "toptr": [] + } + }, { "error": false, "message": "", @@ -18298,6 +19417,17 @@ "name": "awkward_UnionArray_fillna", "status": true, "tests": [ + { + "error": false, + "message": "", + "inputs": { + "fromindex": [], + "length": 0 + }, + "outputs": { + "toindex": [] + } + }, { "error": false, "message": "", @@ -18447,6 +19577,19 @@ "name": "awkward_UnionArray_filltags", "status": true, "tests": [ + { + "error": false, + "message": "", + "inputs": { + "base": 0, + "fromtags": [], + "length": 0, + "totagsoffset": 0 + }, + "outputs": { + "totags": [] + } + }, { "error": false, "message": "", @@ -18466,6 +19609,18 @@ "name": "awkward_UnionArray_filltags_const", "status": true, "tests": [ + { + "error": false, + "message": "", + "inputs": { + "base": 0, + "length": 0, + "totagsoffset": 0 + }, + "outputs": { + "totags": [] + } + }, { "error": false, "message": "", @@ -18518,7 +19673,7 @@ }, { "name": "awkward_UnionArray_project", - "status": false, + "status": true, "tests": [ { "error": false, @@ -18871,6 +20026,19 @@ "toindex": [0, 1, -1, -1, -1, 5, -1, 7, 8, 9, -1, -1, -1, 13, -1, -1] } }, + { + "error": false, + "message": "", + "inputs": { + "bitmasklength": 0, + "frombitmask": [], + "lsb_order": false, + "validwhen": false + }, + "outputs": { + "toindex": [] + } + }, { "error": false, "message": "", @@ -18969,7 +20137,7 @@ }, { "name": "awkward_Content_getitem_next_missing_jagged_getmaskstartstop", - "status": false, + "status": true, "tests": [ { "error": false, @@ -18985,6 +20153,20 @@ "stops_out": [1, 1] } }, + { + "error": false, + "message": "", + "inputs": { + "index_in": [], + "length": 0, + "offsets_in": [] + }, + "outputs": { + "mask_out": [], + "starts_out": [], + "stops_out": [] + } + }, { "error": false, "message": "", @@ -22205,6 +23387,25 @@ "name": "awkward_UnionArray_simplify", "status": true, "tests": [ + { + "error": false, + "message": "", + "inputs": { + "base": 0, + "innerindex": [], + "innertags": [], + "innerwhich": 0, + "length": 0, + "outerindex": [], + "outertags": [], + "outerwhich": 1, + "towhich": 1 + }, + "outputs": { + "toindex": [], + "totags": [] + } + }, { "error": false, "message": "", @@ -22249,6 +23450,17 @@ "name": "awkward_sorting_ranges_length", "status": true, "tests": [ + { + "error": false, + "message": "", + "inputs": { + "parents": [], + "parentslength": 0 + }, + "outputs": { + "tolength": [2] + } + }, { "error": false, "message": "", diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_ByteMaskedArray_numnull.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_ByteMaskedArray_numnull.cu index c685b12913..ecdc304e94 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_ByteMaskedArray_numnull.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_ByteMaskedArray_numnull.cu @@ -44,6 +44,6 @@ awkward_ByteMaskedArray_numnull_b(T* numnull, uint64_t invocation_index, uint64_t* err_code) { if (err_code[0] == NO_ERROR) { - *numnull = scan_in_array[length - 1]; + *numnull = length > 0 ? scan_in_array[length - 1] : 0; } } diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_Content_getitem_next_missing_jagged_getmaskstartstop.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_Content_getitem_next_missing_jagged_getmaskstartstop.cu index 40fcb25548..e63598235e 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_Content_getitem_next_missing_jagged_getmaskstartstop.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_Content_getitem_next_missing_jagged_getmaskstartstop.cu @@ -3,7 +3,7 @@ // BEGIN PYTHON // def f(grid, block, args): // (index_in, offsets_in, mask_out, starts_out, stops_out, length, invocation_index, err_code) = args -// scan_in_array = cupy.empty(length, dtype=cupy.int64) +// scan_in_array = cupy.empty(length + 1, dtype=cupy.int64) // cuda_kernel_templates.get_function(fetch_specialization(["awkward_Content_getitem_next_missing_jagged_getmaskstartstop_a", index_in.dtype, offsets_in.dtype, mask_out.dtype, starts_out.dtype, stops_out.dtype]))(grid, block, (index_in, offsets_in, mask_out, starts_out, stops_out, length, scan_in_array, invocation_index, err_code)) // scan_in_array = cupy.cumsum(scan_in_array) // cuda_kernel_templates.get_function(fetch_specialization(["awkward_Content_getitem_next_missing_jagged_getmaskstartstop_b", index_in.dtype, offsets_in.dtype, mask_out.dtype, starts_out.dtype, stops_out.dtype]))(grid, block, (index_in, offsets_in, mask_out, starts_out, stops_out, length, scan_in_array, invocation_index, err_code)) @@ -25,12 +25,12 @@ awkward_Content_getitem_next_missing_jagged_getmaskstartstop_a( uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; - if (thread_id < length) { - if (index_in[thread_id] >= 0) { - scan_in_array[thread_id] = 1; + scan_in_array[0] = 0; + if (index_in[thread_id] < 0) { + scan_in_array[thread_id + 1] = 0; } else { - scan_in_array[thread_id] = 0; + scan_in_array[thread_id + 1] = 1; } } } @@ -52,15 +52,13 @@ awkward_Content_getitem_next_missing_jagged_getmaskstartstop_b( int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; if (thread_id < length) { - int64_t pre_in = scan_in_array[thread_id] - 1; - starts_out[thread_id] = offsets_in[pre_in]; - + starts_out[thread_id] = offsets_in[scan_in_array[thread_id]]; if (index_in[thread_id] < 0) { mask_out[thread_id] = -1; - stops_out[thread_id] = offsets_in[pre_in]; + stops_out[thread_id] = offsets_in[scan_in_array[thread_id + 1]]; } else { mask_out[thread_id] = thread_id; - stops_out[thread_id] = offsets_in[pre_in + 1]; + stops_out[thread_id] = offsets_in[scan_in_array[thread_id + 1]]; } } } diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_numnull.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_numnull.cu index 210f90b557..97e9d0d0e4 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_numnull.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_numnull.cu @@ -42,6 +42,6 @@ awkward_IndexedArray_numnull_b(T* numnull, uint64_t invocation_index, uint64_t* err_code) { if (err_code[0] == NO_ERROR) { - *numnull = scan_in_array[lenindex - 1]; + *numnull = lenindex > 0 ? scan_in_array[lenindex - 1] : 0; } } diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_numnull_parents.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_numnull_parents.cu index e30a7751f0..a0dc7a320b 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_numnull_parents.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_numnull_parents.cu @@ -44,10 +44,8 @@ awkward_IndexedArray_numnull_parents_b(T* numnull, uint64_t invocation_index, uint64_t* err_code) { if (err_code[0] == NO_ERROR) { + *tolength = lenindex > 0 ? scan_in_array[lenindex - 1] : 0; int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; - if(thread_id == 0) { - *tolength = scan_in_array[lenindex - 1]; - } if (thread_id < lenindex) { if (fromindex[thread_id] < 0) { numnull[thread_id] = 1; diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_numnull_unique_64.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_numnull_unique_64.cu deleted file mode 100644 index 85306498d0..0000000000 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_numnull_unique_64.cu +++ /dev/null @@ -1,15 +0,0 @@ -// BSD 3-Clause License; see https://github.com/scikit-hep/awkward/blob/main/LICENSE - -template -__global__ void -awkward_IndexedArray_numnull_unique_64(T* toindex, - int64_t lenindex, - uint64_t invocation_index, - uint64_t* err_code) { - if (err_code[0] == NO_ERROR) { - int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; - if (thread_id <= lenindex) { - toindex[thread_id] = (thread_id < lenindex ? thread_id : -1); - } - } -} diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_getitem_jagged_carrylen.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_getitem_jagged_carrylen.cu index 3675674da2..bd7c3e68e9 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_getitem_jagged_carrylen.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_getitem_jagged_carrylen.cu @@ -24,7 +24,7 @@ awkward_ListArray_getitem_jagged_carrylen_a(T* carrylen, int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; if (thread_id < sliceouterlen) { - scan_in_array[thread_id] = (T)(slicestops[thread_id] - slicestarts[thread_id]); + scan_in_array[thread_id] = (int64_t)(slicestops[thread_id] - slicestarts[thread_id]); } } } @@ -39,6 +39,6 @@ awkward_ListArray_getitem_jagged_carrylen_b(T* carrylen, uint64_t invocation_index, uint64_t* err_code) { if (err_code[0] == NO_ERROR) { - *carrylen = (T)scan_in_array[sliceouterlen - 1]; + *carrylen = sliceouterlen > 0 ? scan_in_array[sliceouterlen - 1] : 0; } } diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_getitem_next_range_counts.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_getitem_next_range_counts.cu index e524ee546d..e6b1e1ec7b 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_getitem_next_range_counts.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_getitem_next_range_counts.cu @@ -37,6 +37,6 @@ awkward_ListArray_getitem_next_range_counts_b(T* total, uint64_t invocation_total, uint64_t* err_code) { if (err_code[0] == NO_ERROR) { - *total = scan_in_array[lenstarts - 1]; + *total = lenstarts > 0 ? scan_in_array[lenstarts - 1] : 0; } } diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_rpad_and_clip_length_axis1.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_rpad_and_clip_length_axis1.cu index 20b8835907..17b59c891f 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_rpad_and_clip_length_axis1.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_rpad_and_clip_length_axis1.cu @@ -42,6 +42,6 @@ awkward_ListArray_rpad_and_clip_length_axis1_b(T* tomin, uint64_t invocation_index, uint64_t* err_code) { if (err_code[0] == NO_ERROR) { - *tomin = scan_in_array[lenstarts - 1]; + *tomin = lenstarts > 0 ? scan_in_array[lenstarts - 1] : 0; } } diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_RegularArray_getitem_next_range_spreadadvanced.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_RegularArray_getitem_next_range_spreadadvanced.cu index 9984fc787e..d1d668dad1 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_RegularArray_getitem_next_range_spreadadvanced.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_RegularArray_getitem_next_range_spreadadvanced.cu @@ -15,6 +15,5 @@ awkward_RegularArray_getitem_next_range_spreadadvanced(T* toadvanced, if (thread_id < length) { toadvanced[(thread_id * nextsize) + thready_id] = fromadvanced[thread_id]; } - toadvanced[(thread_id * nextsize) + thready_id] = fromadvanced[thread_id]; } } diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_UnionArray_project.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_UnionArray_project.cu index a0a9d7bde6..60f30f854a 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_UnionArray_project.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_UnionArray_project.cu @@ -44,9 +44,8 @@ awkward_UnionArray_project_b(T* lenout, uint64_t invocation_index, uint64_t* err_code) { if (err_code[0] == NO_ERROR) { - lenout[0] = scan_in_array[length - 1]; + *lenout = length > 0 ? scan_in_array[length - 1] : 0; int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; - if (thread_id < length) { if (fromtags[thread_id] == which) { tocarry[scan_in_array[thread_id] - 1] = fromindex[thread_id]; diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_sorting_ranges_length.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_sorting_ranges_length.cu index f69c754079..a0fb302d0d 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_sorting_ranges_length.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_sorting_ranges_length.cu @@ -3,7 +3,7 @@ // BEGIN PYTHON // def f(grid, block, args): // (tolength, parents, parentslength, invocation_index, err_code) = args -// scan_in_array = cupy.empty(parentslength, dtype=cupy.int64) +// scan_in_array = cupy.empty(parentslength + 1, dtype=cupy.int64) // cuda_kernel_templates.get_function(fetch_specialization(['awkward_sorting_ranges_length_a', tolength.dtype, parents.dtype]))(grid, block, (tolength, parents, parentslength, scan_in_array, invocation_index, err_code)) // scan_in_array = cupy.cumsum(scan_in_array) // cuda_kernel_templates.get_function(fetch_specialization(['awkward_sorting_ranges_length_b', tolength.dtype, parents.dtype]))(grid, block, (tolength, parents, parentslength, scan_in_array, invocation_index, err_code)) @@ -44,6 +44,6 @@ awkward_sorting_ranges_length_b(T* tolength, uint64_t invocation_index, uint64_t* err_code) { if (err_code[0] == NO_ERROR) { - *tolength = scan_in_array[parentslength - 1]; + *tolength = parentslength > 0 ? scan_in_array[parentslength - 1] : scan_in_array[0]; } } From 6183703c7877b796e198faf29c0e538c474fd342 Mon Sep 17 00:00:00 2001 From: ManasviGoyal Date: Fri, 2 Feb 2024 09:44:24 +0100 Subject: [PATCH 10/18] fix: failing tests-spec --- dev/generate-kernel-signatures.py | 2 +- dev/generate-tests.py | 2 +- kernel-test-data.json | 10 +++++----- .../awkward_ListArray_getitem_next_range_counts.cu | 2 +- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/dev/generate-kernel-signatures.py b/dev/generate-kernel-signatures.py index 6343bfe570..6bd43476cf 100644 --- a/dev/generate-kernel-signatures.py +++ b/dev/generate-kernel-signatures.py @@ -22,7 +22,7 @@ "awkward_ByteMaskedArray_numnull", "awkward_IndexedArray_numnull", "awkward_IndexedArray_numnull_parents", - "awkward_IndexedArray_numnull_unique", + "awkward_IndexedArray_numnull_unique_64", "awkward_NumpyArray_fill", "awkward_ListArray_fill", "awkward_IndexedArray_fill", diff --git a/dev/generate-tests.py b/dev/generate-tests.py index ad14a157c7..c3ac69fdf8 100644 --- a/dev/generate-tests.py +++ b/dev/generate-tests.py @@ -686,7 +686,7 @@ def gencpuunittests(specdict): "awkward_ByteMaskedArray_numnull", "awkward_IndexedArray_numnull", "awkward_IndexedArray_numnull_parents", - "awkward_IndexedArray_numnull_unique", + "awkward_IndexedArray_numnull_unique_64", "awkward_NumpyArray_fill", "awkward_ListArray_fill", "awkward_IndexedArray_fill", diff --git a/kernel-test-data.json b/kernel-test-data.json index 610b424d41..9b6b0e3059 100644 --- a/kernel-test-data.json +++ b/kernel-test-data.json @@ -900,7 +900,7 @@ "message": "", "inputs": { "length": 0, - "target": 2 + "target": 0 }, "outputs": { "toindex": [] @@ -2235,7 +2235,7 @@ ] }, { - "name": "awkward_IndexedArray_numnull_unique", + "name": "awkward_IndexedArray_numnull_unique_64", "status": true, "tests": [ { @@ -10428,7 +10428,7 @@ "error": false, "message": "", "inputs": { - "length": 3, + "length": 2, "mymask": [1, 0], "theirmask": [0, 1], "validwhen": false @@ -10441,7 +10441,7 @@ "error": false, "message": "", "inputs": { - "length": 3, + "length": 2, "mymask": [0, 0], "theirmask": [0, 1], "validwhen": false @@ -10454,7 +10454,7 @@ "error": false, "message": "", "inputs": { - "length": 3, + "length": 2, "mymask": [1, 0], "theirmask": [0, 0], "validwhen": false diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_getitem_next_range_counts.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_getitem_next_range_counts.cu index e6b1e1ec7b..57ed4a05c3 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_getitem_next_range_counts.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_getitem_next_range_counts.cu @@ -23,7 +23,7 @@ awkward_ListArray_getitem_next_range_counts_a(T* total, int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; if (thread_id < lenstarts) { - scan_in_array[thread_id] = (T)(fromoffsets[thread_id + 1] - fromoffsets[thread_id]); + scan_in_array[thread_id] = (int64_t)fromoffsets[thread_id + 1] - fromoffsets[thread_id]; } } } From 63069fd6e86d9bac4f7e2e76b86f1f9835e79cc0 Mon Sep 17 00:00:00 2001 From: ManasviGoyal Date: Fri, 2 Feb 2024 09:52:23 +0100 Subject: [PATCH 11/18] fix: add missing src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_numnull_unique_64.cu --- .../awkward_IndexedArray_numnull_unique_64.cu | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_numnull_unique_64.cu diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_numnull_unique_64.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_numnull_unique_64.cu new file mode 100644 index 0000000000..85306498d0 --- /dev/null +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_numnull_unique_64.cu @@ -0,0 +1,15 @@ +// BSD 3-Clause License; see https://github.com/scikit-hep/awkward/blob/main/LICENSE + +template +__global__ void +awkward_IndexedArray_numnull_unique_64(T* toindex, + int64_t lenindex, + uint64_t invocation_index, + uint64_t* err_code) { + if (err_code[0] == NO_ERROR) { + int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; + if (thread_id <= lenindex) { + toindex[thread_id] = (thread_id < lenindex ? thread_id : -1); + } + } +} From 293f0b588170f79ebe24d24618275f60b194e332 Mon Sep 17 00:00:00 2001 From: ManasviGoyal Date: Fri, 2 Feb 2024 10:51:39 +0100 Subject: [PATCH 12/18] fix: awkward_IndexedArray_numnull_parents.cu --- ...em_next_missing_jagged_getmaskstartstop.cu | 38 +++++++++---------- .../awkward_IndexedArray_numnull_parents.cu | 19 +++------- ..._next_nonlocal_nextshifts_fromshifts_64.cu | 34 ++++++++--------- ...egularArray_reduce_nonlocal_preparenext.cu | 30 +++++++-------- 4 files changed, 55 insertions(+), 66 deletions(-) diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_Content_getitem_next_missing_jagged_getmaskstartstop.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_Content_getitem_next_missing_jagged_getmaskstartstop.cu index e63598235e..f21e3cbd5f 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_Content_getitem_next_missing_jagged_getmaskstartstop.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_Content_getitem_next_missing_jagged_getmaskstartstop.cu @@ -13,16 +13,15 @@ template __global__ void -awkward_Content_getitem_next_missing_jagged_getmaskstartstop_a( - T* index_in, - C* offsets_in, - U* mask_out, - V* starts_out, - W* stops_out, - int64_t length, - int64_t* scan_in_array, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_Content_getitem_next_missing_jagged_getmaskstartstop_a(T* index_in, + C* offsets_in, + U* mask_out, + V* starts_out, + W* stops_out, + int64_t length, + int64_t* scan_in_array, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; if (thread_id < length) { @@ -38,16 +37,15 @@ awkward_Content_getitem_next_missing_jagged_getmaskstartstop_a( template __global__ void -awkward_Content_getitem_next_missing_jagged_getmaskstartstop_b( - T* index_in, - C* offsets_in, - U* mask_out, - V* starts_out, - W* stops_out, - int64_t length, - int64_t* scan_in_array, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_Content_getitem_next_missing_jagged_getmaskstartstop_b(T* index_in, + C* offsets_in, + U* mask_out, + V* starts_out, + W* stops_out, + int64_t length, + int64_t* scan_in_array, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_numnull_parents.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_numnull_parents.cu index a0dc7a320b..5cc314be99 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_numnull_parents.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_numnull_parents.cu @@ -14,8 +14,8 @@ template __global__ void awkward_IndexedArray_numnull_parents_a(T* numnull, - U* tolength, - const C* fromindex, + C* tolength, + const U* fromindex, int64_t lenindex, int64_t* scan_in_array, uint64_t invocation_index, @@ -25,9 +25,11 @@ awkward_IndexedArray_numnull_parents_a(T* numnull, if (thread_id < lenindex) { if (fromindex[thread_id] < 0) { + numnull[thread_id] = 1; scan_in_array[thread_id] = 1; } else { + numnull[thread_id] = 0; scan_in_array[thread_id] = 0; } } @@ -37,22 +39,13 @@ awkward_IndexedArray_numnull_parents_a(T* numnull, template __global__ void awkward_IndexedArray_numnull_parents_b(T* numnull, - U* tolength, - const C* fromindex, + C* tolength, + const U* fromindex, int64_t lenindex, int64_t* scan_in_array, uint64_t invocation_index, uint64_t* err_code) { if (err_code[0] == NO_ERROR) { *tolength = lenindex > 0 ? scan_in_array[lenindex - 1] : 0; - int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; - if (thread_id < lenindex) { - if (fromindex[thread_id] < 0) { - numnull[thread_id] = 1; - } - else { - numnull[thread_id] = 0; - } - } } } diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_reduce_next_nonlocal_nextshifts_fromshifts_64.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_reduce_next_nonlocal_nextshifts_fromshifts_64.cu index a61abd0e7d..6d8b848a90 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_reduce_next_nonlocal_nextshifts_fromshifts_64.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_reduce_next_nonlocal_nextshifts_fromshifts_64.cu @@ -15,15 +15,14 @@ template __global__ void -awkward_IndexedArray_reduce_next_nonlocal_nextshifts_fromshifts_64_a( - T* nextshifts, - const C* index, - int64_t length, - const U* shifts, - int64_t* scan_in_array_k, - int64_t* scan_in_array_nullsum, - uint64_t invocation_code, - uint64_t* err_code) { +awkward_IndexedArray_reduce_next_nonlocal_nextshifts_fromshifts_64_a(T* nextshifts, + const C* index, + int64_t length, + const U* shifts, + int64_t* scan_in_array_k, + int64_t* scan_in_array_nullsum, + uint64_t invocation_code, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; @@ -41,15 +40,14 @@ awkward_IndexedArray_reduce_next_nonlocal_nextshifts_fromshifts_64_a( template __global__ void -awkward_IndexedArray_reduce_next_nonlocal_nextshifts_fromshifts_64_b( - T* nextshifts, - const C* index, - int64_t length, - const U* shifts, - int64_t* scan_in_array_k, - int64_t* scan_in_array_nullsum, - uint64_t invocation_code, - uint64_t* err_code) { +awkward_IndexedArray_reduce_next_nonlocal_nextshifts_fromshifts_64_b(T* nextshifts, + const C* index, + int64_t length, + const U* shifts, + int64_t* scan_in_array_k, + int64_t* scan_in_array_nullsum, + uint64_t invocation_code, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_RegularArray_reduce_nonlocal_preparenext.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_RegularArray_reduce_nonlocal_preparenext.cu index 5daea340d2..fb55380011 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_RegularArray_reduce_nonlocal_preparenext.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_RegularArray_reduce_nonlocal_preparenext.cu @@ -14,14 +14,14 @@ template __global__ void awkward_RegularArray_reduce_nonlocal_preparenext_a(T* nextcarry, - C* nextparents, - const U* parents, - int64_t size, - int64_t length, - int64_t* scan_in_array, - uint64_t invocation_index, - uint64_t* err_code) { - if (err_code[0] == NO_ERROR) { + C* nextparents, + const U* parents, + int64_t size, + int64_t length, + int64_t* scan_in_array, + uint64_t invocation_index, + uint64_t* err_code) { +if (err_code[0] == NO_ERROR) { int64_t thready_id = blockIdx.x * blockDim.x + threadIdx.x; int64_t len = length * size; if (thready_id < len) { @@ -33,13 +33,13 @@ awkward_RegularArray_reduce_nonlocal_preparenext_a(T* nextcarry, template __global__ void awkward_RegularArray_reduce_nonlocal_preparenext_b(T* nextcarry, - C* nextparents, - const U* parents, - int64_t size, - int64_t length, - int64_t* scan_in_array, - uint64_t invocation_index, - uint64_t* err_code) { + C* nextparents, + const U* parents, + int64_t size, + int64_t length, + int64_t* scan_in_array, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thready_id = (blockIdx.x * blockDim.x + threadIdx.x) / length; int64_t thread_id = (blockIdx.x * blockDim.x + threadIdx.x) % length; From ad1bd14bc414476438cd12a6ecfc1ebf4c082558 Mon Sep 17 00:00:00 2001 From: ManasviGoyal Date: Fri, 2 Feb 2024 13:29:57 +0100 Subject: [PATCH 13/18] feat: add 2 kernels that use a temp array --- dev/generate-kernel-signatures.py | 2 + dev/generate-tests.py | 2 + kernel-test-data.json | 27 ++++++++- src/awkward/_connect/cuda/__init__.py | 2 + ...fsetArray_reduce_nonlocal_nextstarts_64.cu | 50 ++++++++++++++++ ...ularArray_getitem_next_array_regularize.cu | 60 +++++++++++++++++++ 6 files changed, 141 insertions(+), 2 deletions(-) create mode 100644 src/awkward/_connect/cuda/cuda_kernels/awkward_ListOffsetArray_reduce_nonlocal_nextstarts_64.cu create mode 100644 src/awkward/_connect/cuda/cuda_kernels/awkward_RegularArray_getitem_next_array_regularize.cu diff --git a/dev/generate-kernel-signatures.py b/dev/generate-kernel-signatures.py index 6bd43476cf..53e141726d 100644 --- a/dev/generate-kernel-signatures.py +++ b/dev/generate-kernel-signatures.py @@ -47,6 +47,7 @@ "awkward_RegularArray_getitem_next_range", "awkward_RegularArray_getitem_next_range_spreadadvanced", "awkward_RegularArray_getitem_next_array", + "awkward_RegularArray_getitem_next_array_regularize", "awkward_RegularArray_reduce_local_nextparents", "awkward_RegularArray_reduce_nonlocal_preparenext", "awkward_missing_repeat", @@ -58,6 +59,7 @@ "awkward_ListArray_getitem_next_at", "awkward_ListArray_getitem_next_range_counts", "awkward_ListArray_rpad_and_clip_length_axis1", + "awkward_ListOffsetArray_reduce_nonlocal_nextstarts_64", "awkward_NumpyArray_reduce_adjust_starts_64", "awkward_NumpyArray_reduce_adjust_starts_shifts_64", "awkward_RegularArray_getitem_next_at", diff --git a/dev/generate-tests.py b/dev/generate-tests.py index c3ac69fdf8..b8d3eca3e2 100644 --- a/dev/generate-tests.py +++ b/dev/generate-tests.py @@ -711,6 +711,7 @@ def gencpuunittests(specdict): "awkward_RegularArray_getitem_next_range", "awkward_RegularArray_getitem_next_range_spreadadvanced", "awkward_RegularArray_getitem_next_array", + "awkward_RegularArray_getitem_next_array_regularize", "awkward_RegularArray_reduce_local_nextparents", "awkward_RegularArray_reduce_nonlocal_preparenext", "awkward_missing_repeat", @@ -722,6 +723,7 @@ def gencpuunittests(specdict): "awkward_ListArray_getitem_next_at", "awkward_ListArray_getitem_next_range_counts", "awkward_ListArray_rpad_and_clip_length_axis1", + "awkward_ListOffsetArray_reduce_nonlocal_nextstarts_64", "awkward_NumpyArray_reduce_adjust_starts_64", "awkward_NumpyArray_reduce_adjust_starts_shifts_64", "awkward_RegularArray_getitem_next_at", diff --git a/kernel-test-data.json b/kernel-test-data.json index 9b6b0e3059..7e1dc7de2b 100644 --- a/kernel-test-data.json +++ b/kernel-test-data.json @@ -7398,8 +7398,20 @@ }, { "name": "awkward_RegularArray_getitem_next_array_regularize", - "status": false, + "status": true, "tests": [ + { + "error": false, + "message": "", + "inputs": { + "fromarray": [], + "lenarray": 0, + "size": 2 + }, + "outputs": { + "toarray": [] + } + }, { "error": false, "message": "", @@ -18249,8 +18261,19 @@ }, { "name": "awkward_ListOffsetArray_reduce_nonlocal_nextstarts_64", - "status": false, + "status": true, "tests": [ + { + "error": false, + "message": "", + "inputs": { + "nextlen": 0, + "nextparents": [] + }, + "outputs": { + "nextstarts": [] + } + }, { "error": false, "message": "", diff --git a/src/awkward/_connect/cuda/__init__.py b/src/awkward/_connect/cuda/__init__.py index 40a5702889..42481e7e2c 100644 --- a/src/awkward/_connect/cuda/__init__.py +++ b/src/awkward/_connect/cuda/__init__.py @@ -93,7 +93,9 @@ def fetch_template_specializations(kernel_dict): "awkward_ListArray_compact_offsets", "awkward_ListArray_getitem_jagged_carrylen", "awkward_ListArray_rpad_and_clip_length_axis1", + "awkward_ListOffsetArray_reduce_nonlocal_nextstarts_64", "awkward_MaskedArray_getitem_next_jagged_project", + "awkward_RegularArray_getitem_next_array_regularize", "awkward_RegularArray_reduce_local_nextparents", "awkward_RegularArray_reduce_nonlocal_preparenext", "awkward_UnionArray_project", diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_ListOffsetArray_reduce_nonlocal_nextstarts_64.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListOffsetArray_reduce_nonlocal_nextstarts_64.cu new file mode 100644 index 0000000000..68e9453b29 --- /dev/null +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListOffsetArray_reduce_nonlocal_nextstarts_64.cu @@ -0,0 +1,50 @@ +// BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE + +// BEGIN PYTHON +// def f(grid, block, args): +// (nextstarts, nextparents, nextlen, invocation_index, err_code) = args +// scan_in_array = cupy.empty(nextlen, dtype=cupy.int64) +// cuda_kernel_templates.get_function(fetch_specialization(["awkward_ListOffsetArray_reduce_nonlocal_nextstarts_64_a", nextstarts.dtype, nextparents.dtype]))(grid, block, (nextstarts, nextparents, nextlen, scan_in_array, invocation_index, err_code)) +// cuda_kernel_templates.get_function(fetch_specialization(["awkward_ListOffsetArray_reduce_nonlocal_nextstarts_64_b", nextstarts.dtype, nextparents.dtype]))(grid, block, (nextstarts, nextparents, nextlen, scan_in_array, invocation_index, err_code)) +// out["awkward_ListOffsetArray_reduce_nonlocal_nextstarts_64_a", {dtype_specializations}] = None +// out["awkward_ListOffsetArray_reduce_nonlocal_nextstarts_64_b", {dtype_specializations}] = None +// END PYTHON + +template +__global__ void +awkward_ListOffsetArray_reduce_nonlocal_nextstarts_64_a(T* nextstarts, + const C* nextparents, + int64_t nextlen, + int64_t* scan_in_array, + uint64_t invocation_index, + uint64_t* err_code) { + if (err_code[0] == NO_ERROR) { + int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; + + if (thread_id < nextlen) { + if (thread_id == 0) { + scan_in_array[0] = -1; + } + scan_in_array[thread_id + 1] = nextparents[thread_id]; + } + } +} + +template +__global__ void +awkward_ListOffsetArray_reduce_nonlocal_nextstarts_64_b(T* nextstarts, + const C* nextparents, + int64_t nextlen, + int64_t* scan_in_array, + uint64_t invocation_index, + uint64_t* err_code) { + if (err_code[0] == NO_ERROR) { + int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; + + if (thread_id < nextlen) { + if (nextparents[thread_id] != scan_in_array[thread_id]) { + nextstarts[nextparents[thread_id]] = thread_id; + } + } + } +} diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_RegularArray_getitem_next_array_regularize.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_RegularArray_getitem_next_array_regularize.cu new file mode 100644 index 0000000000..7242c2b4fa --- /dev/null +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_RegularArray_getitem_next_array_regularize.cu @@ -0,0 +1,60 @@ +// BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE + +// BEGIN PYTHON +// def f(grid, block, args): +// (toarray, fromarray, lenarray, size, invocation_index, err_code) = args +// scan_in_array = cupy.empty(lenarray, dtype=cupy.int64) +// cuda_kernel_templates.get_function(fetch_specialization(['awkward_RegularArray_getitem_next_array_regularize_a', toarray.dtype, fromarray.dtype]))(grid, block, (toarray, fromarray, lenarray, size, scan_in_array, invocation_index, err_code)) +// cuda_kernel_templates.get_function(fetch_specialization(['awkward_RegularArray_getitem_next_array_regularize_b', toarray.dtype, fromarray.dtype]))(grid, block, (toarray, fromarray, lenarray, size, scan_in_array, invocation_index, err_code)) +// out["awkward_RegularArray_getitem_next_array_regularize_a", {dtype_specializations}] = None +// out["awkward_RegularArray_getitem_next_array_regularize_b", {dtype_specializations}] = None +// END PYTHON + +enum class REGULARARRAY_GETITEM_NEXT_ARRAY_REGULARIZE_ERRORS { + IND_OUT_OF_RANGE // message: "index out of range" +}; + +template +__global__ void +awkward_RegularArray_getitem_next_array_regularize_a(T* toarray, + const C* fromarray, + int64_t lenarray, + int64_t size, + int64_t* scan_in_array, + uint64_t invocation_index, + uint64_t* err_code) { + if (err_code[0] == NO_ERROR) { + int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; + + if (thread_id < lenarray) { + scan_in_array[thread_id] = fromarray[thread_id]; + if (scan_in_array[thread_id] < 0) { + scan_in_array[thread_id] = fromarray[thread_id] + size; + } + if (!(0 <= scan_in_array[thread_id] && scan_in_array[thread_id] < size)) { + RAISE_ERROR(REGULARARRAY_GETITEM_NEXT_ARRAY_REGULARIZE_ERRORS::IND_OUT_OF_RANGE) + } + } + } +} + +template +__global__ void +awkward_RegularArray_getitem_next_array_regularize_b(T* toarray, + const C* fromarray, + int64_t lenarray, + int64_t size, + int64_t* scan_in_array, + uint64_t invocation_index, + uint64_t* err_code) { + if (err_code[0] == NO_ERROR) { + int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; + + if (thread_id < lenarray) { + toarray[thread_id] = scan_in_array[thread_id]; + if (!(0 <= toarray[thread_id] && toarray[thread_id] < size)) { + RAISE_ERROR(REGULARARRAY_GETITEM_NEXT_ARRAY_REGULARIZE_ERRORS::IND_OUT_OF_RANGE) + } + } + } +} From 7b911772b9bb768d87c4bed47efda0400451cff4 Mon Sep 17 00:00:00 2001 From: ManasviGoyal Date: Fri, 2 Feb 2024 13:59:36 +0100 Subject: [PATCH 14/18] fix: use cupy.min instead of atomicMin() in awkward_ListArray_min_range --- kernel-test-data.json | 12 +++++ src/awkward/_connect/cuda/__init__.py | 1 + .../awkward_ListArray_min_range.cu | 52 +++++++++++++++---- .../cuda/cuda_kernels/awkward_reduce_sum.cu | 24 ++++----- 4 files changed, 66 insertions(+), 23 deletions(-) diff --git a/kernel-test-data.json b/kernel-test-data.json index 7e1dc7de2b..b167335346 100644 --- a/kernel-test-data.json +++ b/kernel-test-data.json @@ -12784,6 +12784,18 @@ "name": "awkward_ListArray_min_range", "status": true, "tests": [ + { + "error": false, + "message": "", + "inputs": { + "fromstarts": [], + "fromstops": [], + "lenstarts": 0 + }, + "outputs": { + "tomin": [0] + } + }, { "error": false, "message": "", diff --git a/src/awkward/_connect/cuda/__init__.py b/src/awkward/_connect/cuda/__init__.py index 42481e7e2c..df44325b91 100644 --- a/src/awkward/_connect/cuda/__init__.py +++ b/src/awkward/_connect/cuda/__init__.py @@ -92,6 +92,7 @@ def fetch_template_specializations(kernel_dict): "awkward_IndexedOptionArray_rpad_and_clip_mask_axis1", "awkward_ListArray_compact_offsets", "awkward_ListArray_getitem_jagged_carrylen", + "awkward_ListArray_min_range", "awkward_ListArray_rpad_and_clip_length_axis1", "awkward_ListOffsetArray_reduce_nonlocal_nextstarts_64", "awkward_MaskedArray_getitem_next_jagged_project", diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_min_range.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_min_range.cu index a49f52173c..f4a332a722 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_min_range.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_min_range.cu @@ -1,21 +1,51 @@ // BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE +// BEGIN PYTHON +// def f(grid, block, args): +// (tomin, fromstarts, fromstops, lenstarts, invocation_index, err_code) = args +// scan_in_array = cupy.empty(lenstarts, dtype=cupy.int64) +// cuda_kernel_templates.get_function(fetch_specialization(["awkward_ListArray_min_range_a", tomin.dtype, fromstarts.dtype, fromstops.dtype]))(grid, block, (tomin, fromstarts, fromstops, lenstarts, scan_in_array, invocation_index, err_code)) +// if lenstarts > 0: +// scan_in_array[0] = cupy.min(scan_in_array) +// cuda_kernel_templates.get_function(fetch_specialization(["awkward_ListArray_min_range_b", tomin.dtype, fromstarts.dtype, fromstops.dtype]))(grid, block, (tomin, fromstarts, fromstops, lenstarts, scan_in_array, invocation_index, err_code)) +// out["awkward_ListArray_min_range_a", {dtype_specializations}] = None +// out["awkward_ListArray_min_range_b", {dtype_specializations}] = None +// END PYTHON + template __global__ void -awkward_ListArray_min_range(T* tomin, - const C* fromstarts, - const U* fromstops, - int64_t lenstarts, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_ListArray_min_range_a(T* tomin, + const C* fromstarts, + const U* fromstops, + int64_t lenstarts, + int64_t* scan_in_array, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; - int64_t shorter = fromstops[0] - fromstarts[0]; - if (thread_id >=1 && thread_id < lenstarts) { - int64_t rangeval = fromstops[thread_id] - fromstarts[thread_id]; - shorter = (shorter < rangeval) ? shorter : rangeval; - atomicMin(tomin, shorter); + if (thread_id < lenstarts) { + if (thread_id == 0) { + scan_in_array[thread_id] = fromstops[0] - fromstarts[0]; + } + else { + int64_t rangeval = fromstops[thread_id] - fromstarts[thread_id]; + scan_in_array[thread_id] = rangeval; + } } } } + +template +__global__ void +awkward_ListArray_min_range_b(T* tomin, + const C* fromstarts, + const U* fromstops, + int64_t lenstarts, + int64_t* scan_in_array, + uint64_t invocation_index, + uint64_t* err_code) { + if (err_code[0] == NO_ERROR) { + *tomin = lenstarts > 0 ? scan_in_array[0] : 0; + } +} diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_sum.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_sum.cu index abbee36a18..4da0cea790 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_sum.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_sum.cu @@ -3,10 +3,11 @@ // BEGIN PYTHON // def f(grid, block, args): // (toptr, fromptr, parents, lenparents, outlength, invocation_index, err_code) = args -// atomicAdd_toptr = cupy.array(toptr, dtype=cupy.uint64) -// cuda_kernel_templates.get_function(fetch_specialization(["awkward_reduce_sum_a", toptr.dtype, fromptr.dtype, parents.dtype]))(grid, block, (toptr, fromptr, parents, lenparents, outlength, atomicAdd_toptr, invocation_index, err_code)) -// cuda_kernel_templates.get_function(fetch_specialization(["awkward_reduce_sum_b", toptr.dtype, fromptr.dtype, parents.dtype]))(grid, block, (toptr, fromptr, parents, lenparents, outlength, atomicAdd_toptr, invocation_index, err_code)) -// cuda_kernel_templates.get_function(fetch_specialization(["awkward_reduce_sum_c", toptr.dtype, fromptr.dtype, parents.dtype]))(grid, block, (toptr, fromptr, parents, lenparents, outlength, atomicAdd_toptr, invocation_index, err_code)) +// scan_in_array = cupy.array(toptr, dtype=cupy.int64) +// cuda_kernel_templates.get_function(fetch_specialization(["awkward_reduce_sum_a", toptr.dtype, fromptr.dtype, parents.dtype]))(grid, block, (toptr, fromptr, parents, lenparents, outlength, scan_in_array, invocation_index, err_code)) +// cuda_kernel_templates.get_function(fetch_specialization(["awkward_reduce_sum_b", toptr.dtype, fromptr.dtype, parents.dtype]))(grid, block, (toptr, fromptr, parents, lenparents, outlength, scan_in_array, invocation_index, err_code)) +// scan_in_array = cupy.cumsum(scan_in_array) +// cuda_kernel_templates.get_function(fetch_specialization(["awkward_reduce_sum_c", toptr.dtype, fromptr.dtype, parents.dtype]))(grid, block, (toptr, fromptr, parents, lenparents, outlength, scan_in_array, invocation_index, err_code)) // out["awkward_reduce_sum_a", {dtype_specializations}] = None // out["awkward_reduce_sum_b", {dtype_specializations}] = None // out["awkward_reduce_sum_c", {dtype_specializations}] = None @@ -19,14 +20,14 @@ awkward_reduce_sum_a(T* toptr, const U* parents, int64_t lenparents, int64_t outlength, - uint64_t* atomicAdd_toptr, + int64_t* scan_in_array, uint64_t invocation_index, uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; if (thread_id < outlength) { - atomicAdd_toptr[thread_id] = 0; + scan_in_array[thread_id] = 0; } } } @@ -38,15 +39,14 @@ awkward_reduce_sum_b(T* toptr, const U* parents, int64_t lenparents, int64_t outlength, - uint64_t* atomicAdd_toptr, + int64_t* scan_in_array, uint64_t invocation_index, uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; if (thread_id < lenparents) { - atomicAdd(atomicAdd_toptr + parents[thread_id], - (uint64_t)fromptr[thread_id]); + scan_in_array[thread_id] = (T)(parents[thread_id] + fromptr[thread_id]); } } } @@ -58,14 +58,14 @@ awkward_reduce_sum_c(T* toptr, const U* parents, int64_t lenparents, int64_t outlength, - uint64_t* atomicAdd_toptr, + int64_t* scan_in_array, uint64_t invocation_index, uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; - if (thread_id < outlength) { - toptr[thread_id] = (T)atomicAdd_toptr[thread_id]; + if (thread_id < lenparents) { + toptr[parents[thread_id]] = scan_in_array[thread_id]; } } } From 2dea9b154739856f37bca04e5139c19ea367d03b Mon Sep 17 00:00:00 2001 From: ManasviGoyal Date: Fri, 2 Feb 2024 14:06:46 +0100 Subject: [PATCH 15/18] fix: lenstarts = 0 case in awkward_ListArray_min_range --- kernel-specification.yml | 11 ++++++----- kernel-test-data.json | 2 +- .../cuda/cuda_kernels/awkward_ListArray_min_range.cu | 4 +++- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/kernel-specification.yml b/kernel-specification.yml index cfa38f2e82..4fbc49edb8 100644 --- a/kernel-specification.yml +++ b/kernel-specification.yml @@ -1960,11 +1960,12 @@ kernels: description: null definition: | def awkward_ListArray_min_range(tomin, fromstarts, fromstops, lenstarts): - shorter = fromstops[0] - fromstarts[0] - for i in range(1, lenstarts): - rangeval = fromstops[i] - fromstarts[i] - shorter = shorter if shorter < rangeval else rangeval - tomin[0] = shorter + if lenstarts > 0: + shorter = fromstops[0] - fromstarts[0] + for i in range(1, lenstarts): + rangeval = fromstops[i] - fromstarts[i] + shorter = shorter if shorter < rangeval else rangeval + tomin[0] = shorter automatic-tests: true - name: awkward_ListArray_rpad_and_clip_length_axis1 diff --git a/kernel-test-data.json b/kernel-test-data.json index b167335346..5c2987457a 100644 --- a/kernel-test-data.json +++ b/kernel-test-data.json @@ -12793,7 +12793,7 @@ "lenstarts": 0 }, "outputs": { - "tomin": [0] + "tomin": [] } }, { diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_min_range.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_min_range.cu index f4a332a722..ef85350eae 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_min_range.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_min_range.cu @@ -46,6 +46,8 @@ awkward_ListArray_min_range_b(T* tomin, uint64_t invocation_index, uint64_t* err_code) { if (err_code[0] == NO_ERROR) { - *tomin = lenstarts > 0 ? scan_in_array[0] : 0; + if (lenstarts > 0) { + *tomin = scan_in_array[0]; + } } } From 184e01428722412b43ac68a0bfbf5ba2c5b2c4c1 Mon Sep 17 00:00:00 2001 From: ManasviGoyal Date: Fri, 2 Feb 2024 15:13:29 +0100 Subject: [PATCH 16/18] fix: awkward_ListArray_getitem_jagged_expand --- kernel-test-data.json | 2 +- ...awkward_ListArray_getitem_jagged_expand.cu | 28 ++++++++++--------- 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/kernel-test-data.json b/kernel-test-data.json index 5c2987457a..052edda1d4 100644 --- a/kernel-test-data.json +++ b/kernel-test-data.json @@ -12578,7 +12578,7 @@ }, { "name": "awkward_ListArray_getitem_jagged_expand", - "status": false, + "status": true, "tests": [ { "error": true, diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_getitem_jagged_expand.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_getitem_jagged_expand.cu index 030a04b73e..f371df21f8 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_getitem_jagged_expand.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_getitem_jagged_expand.cu @@ -18,20 +18,22 @@ awkward_ListArray_getitem_jagged_expand(T* multistarts, uint64_t invocation_index, uint64_t* err_code) { if (err_code[0] == NO_ERROR) { - int64_t thread_id = (blockIdx.x * blockDim.x + threadIdx.x) % length; + int64_t thread_id = (blockIdx.x * blockDim.x + threadIdx.x) / jaggedsize; int64_t thready_id = (blockIdx.x * blockDim.x + threadIdx.x) % jaggedsize; - W start = fromstarts[thread_id]; - X stop = fromstops[thread_id]; - if (stop < start) { - RAISE_ERROR(LISTARRAY_GETITEM_JAGGED_EXPAND_ERRORS::STOPS_LT_START) + if (thread_id < length && thready_id < jaggedsize) { + W start = fromstarts[thread_id]; + X stop = fromstops[thread_id]; + if (stop < start) { + RAISE_ERROR(LISTARRAY_GETITEM_JAGGED_EXPAND_ERRORS::STOPS_LT_START) + } + if ((stop - start) != jaggedsize) { + RAISE_ERROR(LISTARRAY_GETITEM_JAGGED_EXPAND_ERRORS::FIT_ERR) + } + multistarts[(thread_id * jaggedsize) + thready_id] = + singleoffsets[thready_id]; + multistops[(thread_id * jaggedsize) + thready_id] = + singleoffsets[(thready_id + 1)]; + tocarry[(thread_id * jaggedsize) + thready_id] = (start + thready_id); } - if ((stop - start) != jaggedsize) { - RAISE_ERROR(LISTARRAY_GETITEM_JAGGED_EXPAND_ERRORS::FIT_ERR) - } - multistarts[(thread_id * jaggedsize) + thready_id] = - singleoffsets[thready_id]; - multistops[(thread_id * jaggedsize) + thready_id] = - singleoffsets[(thready_id + 1)]; - tocarry[(thread_id * jaggedsize) + thready_id] = (start + thready_id); } } From cb037d0a9c701038a35716099ee9f4b8758cbf56 Mon Sep 17 00:00:00 2001 From: ManasviGoyal Date: Fri, 2 Feb 2024 15:31:19 +0100 Subject: [PATCH 17/18] refactor: remove changes in awkward_rduce_sum --- .../cuda/cuda_kernels/awkward_reduce_sum.cu | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_sum.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_sum.cu index 4da0cea790..abbee36a18 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_sum.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_sum.cu @@ -3,11 +3,10 @@ // BEGIN PYTHON // def f(grid, block, args): // (toptr, fromptr, parents, lenparents, outlength, invocation_index, err_code) = args -// scan_in_array = cupy.array(toptr, dtype=cupy.int64) -// cuda_kernel_templates.get_function(fetch_specialization(["awkward_reduce_sum_a", toptr.dtype, fromptr.dtype, parents.dtype]))(grid, block, (toptr, fromptr, parents, lenparents, outlength, scan_in_array, invocation_index, err_code)) -// cuda_kernel_templates.get_function(fetch_specialization(["awkward_reduce_sum_b", toptr.dtype, fromptr.dtype, parents.dtype]))(grid, block, (toptr, fromptr, parents, lenparents, outlength, scan_in_array, invocation_index, err_code)) -// scan_in_array = cupy.cumsum(scan_in_array) -// cuda_kernel_templates.get_function(fetch_specialization(["awkward_reduce_sum_c", toptr.dtype, fromptr.dtype, parents.dtype]))(grid, block, (toptr, fromptr, parents, lenparents, outlength, scan_in_array, invocation_index, err_code)) +// atomicAdd_toptr = cupy.array(toptr, dtype=cupy.uint64) +// cuda_kernel_templates.get_function(fetch_specialization(["awkward_reduce_sum_a", toptr.dtype, fromptr.dtype, parents.dtype]))(grid, block, (toptr, fromptr, parents, lenparents, outlength, atomicAdd_toptr, invocation_index, err_code)) +// cuda_kernel_templates.get_function(fetch_specialization(["awkward_reduce_sum_b", toptr.dtype, fromptr.dtype, parents.dtype]))(grid, block, (toptr, fromptr, parents, lenparents, outlength, atomicAdd_toptr, invocation_index, err_code)) +// cuda_kernel_templates.get_function(fetch_specialization(["awkward_reduce_sum_c", toptr.dtype, fromptr.dtype, parents.dtype]))(grid, block, (toptr, fromptr, parents, lenparents, outlength, atomicAdd_toptr, invocation_index, err_code)) // out["awkward_reduce_sum_a", {dtype_specializations}] = None // out["awkward_reduce_sum_b", {dtype_specializations}] = None // out["awkward_reduce_sum_c", {dtype_specializations}] = None @@ -20,14 +19,14 @@ awkward_reduce_sum_a(T* toptr, const U* parents, int64_t lenparents, int64_t outlength, - int64_t* scan_in_array, + uint64_t* atomicAdd_toptr, uint64_t invocation_index, uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; if (thread_id < outlength) { - scan_in_array[thread_id] = 0; + atomicAdd_toptr[thread_id] = 0; } } } @@ -39,14 +38,15 @@ awkward_reduce_sum_b(T* toptr, const U* parents, int64_t lenparents, int64_t outlength, - int64_t* scan_in_array, + uint64_t* atomicAdd_toptr, uint64_t invocation_index, uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; if (thread_id < lenparents) { - scan_in_array[thread_id] = (T)(parents[thread_id] + fromptr[thread_id]); + atomicAdd(atomicAdd_toptr + parents[thread_id], + (uint64_t)fromptr[thread_id]); } } } @@ -58,14 +58,14 @@ awkward_reduce_sum_c(T* toptr, const U* parents, int64_t lenparents, int64_t outlength, - int64_t* scan_in_array, + uint64_t* atomicAdd_toptr, uint64_t invocation_index, uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; - if (thread_id < lenparents) { - toptr[parents[thread_id]] = scan_in_array[thread_id]; + if (thread_id < outlength) { + toptr[thread_id] = (T)atomicAdd_toptr[thread_id]; } } } From e182174a9130f41ce457b3968e3b8c8ed9f4269b Mon Sep 17 00:00:00 2001 From: ManasviGoyal Date: Fri, 2 Feb 2024 17:49:51 +0100 Subject: [PATCH 18/18] fix: formatting --- ...kward_BitMaskedArray_to_ByteMaskedArray.cu | 17 ++++--- ...rd_BitMaskedArray_to_IndexedOptionArray.cu | 17 ++++--- ...kward_ByteMaskedArray_getitem_nextcarry.cu | 30 ++++++----- ...eMaskedArray_getitem_nextcarry_outindex.cu | 34 +++++++------ .../awkward_ByteMaskedArray_numnull.cu | 30 ++++++----- .../awkward_ByteMaskedArray_overlay_mask.cu | 15 +++--- .../awkward_ByteMaskedArray_reduce_next_64.cu | 42 +++++++-------- ...rray_reduce_next_nonlocal_nextshifts_64.cu | 34 +++++++------ ..._next_nonlocal_nextshifts_fromshifts_64.cu | 38 +++++++------- ...rd_ByteMaskedArray_toIndexedOptionArray.cu | 13 ++--- ...em_next_missing_jagged_getmaskstartstop.cu | 38 +++++++------- .../awkward_Index_nones_as_index.cu | 26 +++++----- .../cuda_kernels/awkward_IndexedArray_fill.cu | 15 +++--- .../awkward_IndexedArray_fill_count.cu | 13 ++--- .../awkward_IndexedArray_flatten_nextcarry.cu | 30 ++++++----- .../awkward_IndexedArray_getitem_nextcarry.cu | 30 ++++++----- ...IndexedArray_getitem_nextcarry_outindex.cu | 34 +++++++------ .../awkward_IndexedArray_index_of_nulls.cu | 34 +++++++------ .../awkward_IndexedArray_numnull.cu | 26 +++++----- .../awkward_IndexedArray_numnull_parents.cu | 30 ++++++----- .../awkward_IndexedArray_numnull_unique_64.cu | 9 ++-- .../awkward_IndexedArray_overlay_mask.cu | 13 ++--- .../awkward_IndexedArray_reduce_next_64.cu | 38 +++++++------- ...IndexedArray_reduce_next_fix_offsets_64.cu | 13 ++--- ...rray_reduce_next_nonlocal_nextshifts_64.cu | 30 ++++++----- ..._next_nonlocal_nextshifts_fromshifts_64.cu | 34 +++++++------ .../awkward_IndexedArray_simplify.cu | 15 +++--- .../awkward_IndexedArray_validity.cu | 13 ++--- ...xedOptionArray_rpad_and_clip_mask_axis1.cu | 26 +++++----- .../awkward_ListArray_compact_offsets.cu | 13 ++--- .../cuda_kernels/awkward_ListArray_fill.cu | 21 ++++---- ...kward_ListArray_getitem_jagged_carrylen.cu | 30 ++++++----- ...awkward_ListArray_getitem_jagged_expand.cu | 21 ++++---- .../awkward_ListArray_getitem_next_array.cu | 21 ++++---- ...d_ListArray_getitem_next_array_advanced.cu | 23 +++++---- .../awkward_ListArray_getitem_next_at.cu | 15 +++--- ...ard_ListArray_getitem_next_range_counts.cu | 26 +++++----- .../awkward_ListArray_min_range.cu | 30 ++++++----- ...rd_ListArray_rpad_and_clip_length_axis1.cu | 34 +++++++------ .../awkward_ListArray_validity.cu | 19 +++---- ...awkward_ListOffsetArray_flatten_offsets.cu | 15 +++--- ...fsetArray_reduce_nonlocal_nextstarts_64.cu | 26 +++++----- ...ard_ListOffsetArray_rpad_and_clip_axis1.cu | 13 ++--- .../awkward_ListOffsetArray_rpad_axis1.cu | 13 ++--- ...MaskedArray_getitem_next_jagged_project.cu | 38 +++++++------- .../cuda_kernels/awkward_NumpyArray_fill.cu | 13 ++--- ...ward_NumpyArray_reduce_adjust_starts_64.cu | 13 ++--- ...mpyArray_reduce_adjust_starts_shifts_64.cu | 15 +++--- ...mpyArray_reduce_mask_ByteMaskedArray_64.cu | 13 ++--- .../awkward_RegularArray_getitem_carry.cu | 13 ++--- ...ward_RegularArray_getitem_jagged_expand.cu | 15 +++--- ...awkward_RegularArray_getitem_next_array.cu | 17 ++++--- ...egularArray_getitem_next_array_advanced.cu | 19 +++---- ...ularArray_getitem_next_array_regularize.cu | 30 ++++++----- .../awkward_RegularArray_getitem_next_at.cu | 13 ++--- ...awkward_RegularArray_getitem_next_range.cu | 17 ++++--- ...Array_getitem_next_range_spreadadvanced.cu | 13 ++--- .../awkward_RegularArray_localindex.cu | 11 ++-- ...d_RegularArray_reduce_local_nextparents.cu | 26 +++++----- ...egularArray_reduce_nonlocal_preparenext.cu | 34 +++++++------ ...wkward_RegularArray_rpad_and_clip_axis1.cu | 13 ++--- .../awkward_UnionArray_fillindex.cu | 13 ++--- .../awkward_UnionArray_fillindex_count.cu | 11 ++-- .../cuda_kernels/awkward_UnionArray_fillna.cu | 11 ++-- .../awkward_UnionArray_filltags.cu | 15 +++--- .../awkward_UnionArray_filltags_const.cu | 13 ++--- .../awkward_UnionArray_project.cu | 38 +++++++------- .../awkward_UnionArray_simplify.cu | 27 +++++----- .../awkward_UnionArray_simplify_one.cu | 21 ++++---- .../awkward_UnionArray_validity.cu | 15 +++--- .../awkward_index_rpad_and_clip_axis0.cu | 11 ++-- .../awkward_index_rpad_and_clip_axis1.cu | 13 ++--- .../cuda/cuda_kernels/awkward_localindex.cu | 9 ++-- .../cuda_kernels/awkward_missing_repeat.cu | 15 +++--- .../cuda_kernels/awkward_reduce_argmax.cu | 30 ++++++----- .../cuda_kernels/awkward_reduce_argmin.cu | 30 ++++++----- .../cuda_kernels/awkward_reduce_count_64.cu | 17 ++++--- .../awkward_reduce_countnonzero.cu | 34 +++++++------ .../cuda/cuda_kernels/awkward_reduce_max.cu | 34 +++++++------ .../cuda/cuda_kernels/awkward_reduce_min.cu | 34 +++++++------ .../cuda_kernels/awkward_reduce_prod_bool.cu | 34 +++++++------ .../cuda/cuda_kernels/awkward_reduce_sum.cu | 51 ++++++++++--------- .../cuda_kernels/awkward_reduce_sum_bool.cu | 34 +++++++------ .../awkward_reduce_sum_int32_bool_64.cu | 34 +++++++------ .../awkward_reduce_sum_int64_bool_64.cu | 34 +++++++------ .../awkward_sorting_ranges_length.cu | 26 +++++----- 86 files changed, 1050 insertions(+), 924 deletions(-) diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_BitMaskedArray_to_ByteMaskedArray.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_BitMaskedArray_to_ByteMaskedArray.cu index 6599717f47..77525faa9d 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_BitMaskedArray_to_ByteMaskedArray.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_BitMaskedArray_to_ByteMaskedArray.cu @@ -2,15 +2,16 @@ template __global__ void -awkward_BitMaskedArray_to_ByteMaskedArray(T* tobytemask, - const C* frombitmask, - int64_t bitmasklength, - bool validwhen, - bool lsb_order, - uint64_t invocation_index, - uint64_t* err_code) { - uint64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; +awkward_BitMaskedArray_to_ByteMaskedArray( + T* tobytemask, + const C* frombitmask, + int64_t bitmasklength, + bool validwhen, + bool lsb_order, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { + int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; if (thread_id < bitmasklength) { if (lsb_order) { uint8_t byte = frombitmask[thread_id]; diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_BitMaskedArray_to_IndexedOptionArray.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_BitMaskedArray_to_IndexedOptionArray.cu index 5566c6a8a5..4d35527ab0 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_BitMaskedArray_to_IndexedOptionArray.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_BitMaskedArray_to_IndexedOptionArray.cu @@ -1,14 +1,15 @@ // BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE -template +template __global__ void -awkward_BitMaskedArray_to_IndexedOptionArray(C* toindex, - const T* frombitmask, - int64_t bitmasklength, - bool validwhen, - bool lsb_order, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_BitMaskedArray_to_IndexedOptionArray( + T* toindex, + const C* frombitmask, + int64_t bitmasklength, + bool validwhen, + bool lsb_order, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; if (lsb_order) { diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_ByteMaskedArray_getitem_nextcarry.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_ByteMaskedArray_getitem_nextcarry.cu index 7e5a0eef7f..d97cfe9de5 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_ByteMaskedArray_getitem_nextcarry.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_ByteMaskedArray_getitem_nextcarry.cu @@ -13,13 +13,14 @@ template __global__ void -awkward_ByteMaskedArray_getitem_nextcarry_a(T* tocarry, - const C* mask, - int64_t length, - bool validwhen, - int64_t* scan_in_array, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_ByteMaskedArray_getitem_nextcarry_a( + T* tocarry, + const C* mask, + int64_t length, + bool validwhen, + int64_t* scan_in_array, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; @@ -35,13 +36,14 @@ awkward_ByteMaskedArray_getitem_nextcarry_a(T* tocarry, template __global__ void -awkward_ByteMaskedArray_getitem_nextcarry_b(T* tocarry, - const C* mask, - int64_t length, - bool validwhen, - int64_t* scan_in_array, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_ByteMaskedArray_getitem_nextcarry_b( + T* tocarry, + const C* mask, + int64_t length, + bool validwhen, + int64_t* scan_in_array, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_ByteMaskedArray_getitem_nextcarry_outindex.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_ByteMaskedArray_getitem_nextcarry_outindex.cu index c69ce2d14d..8f416fcc69 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_ByteMaskedArray_getitem_nextcarry_outindex.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_ByteMaskedArray_getitem_nextcarry_outindex.cu @@ -13,14 +13,15 @@ template __global__ void -awkward_ByteMaskedArray_getitem_nextcarry_outindex_a(T* tocarry, - C* outindex, - const U* mask, - int64_t length, - bool validwhen, - int64_t* scan_in_array, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_ByteMaskedArray_getitem_nextcarry_outindex_a( + T* tocarry, + C* outindex, + const U* mask, + int64_t length, + bool validwhen, + int64_t* scan_in_array, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; @@ -36,14 +37,15 @@ awkward_ByteMaskedArray_getitem_nextcarry_outindex_a(T* tocarry, template __global__ void -awkward_ByteMaskedArray_getitem_nextcarry_outindex_b(T* tocarry, - C* outindex, - const U* mask, - int64_t length, - bool validwhen, - int64_t* scan_in_array, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_ByteMaskedArray_getitem_nextcarry_outindex_b( + T* tocarry, + C* outindex, + const U* mask, + int64_t length, + bool validwhen, + int64_t* scan_in_array, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_ByteMaskedArray_numnull.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_ByteMaskedArray_numnull.cu index ecdc304e94..c16cd68d41 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_ByteMaskedArray_numnull.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_ByteMaskedArray_numnull.cu @@ -13,13 +13,14 @@ template __global__ void -awkward_ByteMaskedArray_numnull_a(T* numnull, - const C* mask, - int64_t length, - bool validwhen, - int64_t* scan_in_array, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_ByteMaskedArray_numnull_a( + T* numnull, + const C* mask, + int64_t length, + bool validwhen, + int64_t* scan_in_array, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; @@ -36,13 +37,14 @@ awkward_ByteMaskedArray_numnull_a(T* numnull, template __global__ void -awkward_ByteMaskedArray_numnull_b(T* numnull, - const C* mask, - int64_t length, - bool validwhen, - int64_t* scan_in_array, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_ByteMaskedArray_numnull_b( + T* numnull, + const C* mask, + int64_t length, + bool validwhen, + int64_t* scan_in_array, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { *numnull = length > 0 ? scan_in_array[length - 1] : 0; } diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_ByteMaskedArray_overlay_mask.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_ByteMaskedArray_overlay_mask.cu index d90019a267..e3175808fe 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_ByteMaskedArray_overlay_mask.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_ByteMaskedArray_overlay_mask.cu @@ -2,13 +2,14 @@ template __global__ void -awkward_ByteMaskedArray_overlay_mask(T* tomask, - const C* theirmask, - const U* mymask, - int64_t length, - bool validwhen, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_ByteMaskedArray_overlay_mask( + T* tomask, + const C* theirmask, + const U* mymask, + int64_t length, + bool validwhen, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; if (thread_id < length) { diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_ByteMaskedArray_reduce_next_64.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_ByteMaskedArray_reduce_next_64.cu index b842e18683..451afd7612 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_ByteMaskedArray_reduce_next_64.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_ByteMaskedArray_reduce_next_64.cu @@ -13,16 +13,17 @@ template __global__ void -awkward_ByteMaskedArray_reduce_next_64_a(T* nextcarry, - C* nextparents, - U* outindex, - const V* mask, - const W* parents, - int64_t length, - bool validwhen, - int64_t* scan_in_array, - uint64_t* invocation_index, - uint64_t* err_code) { +awkward_ByteMaskedArray_reduce_next_64_a( + T* nextcarry, + C* nextparents, + U* outindex, + const V* mask, + const W* parents, + int64_t length, + bool validwhen, + int64_t* scan_in_array, + uint64_t* invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; @@ -38,16 +39,17 @@ awkward_ByteMaskedArray_reduce_next_64_a(T* nextcarry, template __global__ void -awkward_ByteMaskedArray_reduce_next_64_b(T* nextcarry, - C* nextparents, - U* outindex, - const V* mask, - const W* parents, - int64_t length, - bool validwhen, - int64_t* scan_in_array, - uint64_t* invocation_index, - uint64_t* err_code) { +awkward_ByteMaskedArray_reduce_next_64_b( + T* nextcarry, + C* nextparents, + U* outindex, + const V* mask, + const W* parents, + int64_t length, + bool validwhen, + int64_t* scan_in_array, + uint64_t* invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_ByteMaskedArray_reduce_next_nonlocal_nextshifts_64.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_ByteMaskedArray_reduce_next_nonlocal_nextshifts_64.cu index 9481e4a661..5506d7ef64 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_ByteMaskedArray_reduce_next_nonlocal_nextshifts_64.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_ByteMaskedArray_reduce_next_nonlocal_nextshifts_64.cu @@ -15,14 +15,15 @@ template __global__ void -awkward_ByteMaskedArray_reduce_next_nonlocal_nextshifts_64_a(T* nextshifts, - const C* mask, - int64_t length, - bool valid_when, - int64_t* scan_in_array_k, - int64_t* scan_in_array_nullsum, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_ByteMaskedArray_reduce_next_nonlocal_nextshifts_64_a( + T* nextshifts, + const C* mask, + int64_t length, + bool valid_when, + int64_t* scan_in_array_k, + int64_t* scan_in_array_nullsum, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; @@ -40,14 +41,15 @@ awkward_ByteMaskedArray_reduce_next_nonlocal_nextshifts_64_a(T* nextshifts, template __global__ void -awkward_ByteMaskedArray_reduce_next_nonlocal_nextshifts_64_b(T* nextshifts, - const C* mask, - int64_t length, - bool valid_when, - int64_t* scan_in_array_k, - int64_t* scan_in_array_nullsum, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_ByteMaskedArray_reduce_next_nonlocal_nextshifts_64_b( + T* nextshifts, + const C* mask, + int64_t length, + bool valid_when, + int64_t* scan_in_array_k, + int64_t* scan_in_array_nullsum, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_ByteMaskedArray_reduce_next_nonlocal_nextshifts_fromshifts_64.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_ByteMaskedArray_reduce_next_nonlocal_nextshifts_fromshifts_64.cu index 6fb1ff705b..21fee4ef44 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_ByteMaskedArray_reduce_next_nonlocal_nextshifts_fromshifts_64.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_ByteMaskedArray_reduce_next_nonlocal_nextshifts_fromshifts_64.cu @@ -15,15 +15,16 @@ template __global__ void -awkward_ByteMaskedArray_reduce_next_nonlocal_nextshifts_fromshifts_64_a(T* nextshifts, - const C* mask, - int64_t length, - bool valid_when, - const U* shifts, - int64_t* scan_in_array_k, - int64_t* scan_in_array_nullsum, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_ByteMaskedArray_reduce_next_nonlocal_nextshifts_fromshifts_64_a( + T* nextshifts, + const C* mask, + int64_t length, + bool valid_when, + const U* shifts, + int64_t* scan_in_array_k, + int64_t* scan_in_array_nullsum, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; @@ -41,15 +42,16 @@ awkward_ByteMaskedArray_reduce_next_nonlocal_nextshifts_fromshifts_64_a(T* nexts template __global__ void -awkward_ByteMaskedArray_reduce_next_nonlocal_nextshifts_fromshifts_64_b(T* nextshifts, - const C* mask, - int64_t length, - bool valid_when, - const U* shifts, - int64_t* scan_in_array_k, - int64_t* scan_in_array_nullsum, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_ByteMaskedArray_reduce_next_nonlocal_nextshifts_fromshifts_64_b( + T* nextshifts, + const C* mask, + int64_t length, + bool valid_when, + const U* shifts, + int64_t* scan_in_array_k, + int64_t* scan_in_array_nullsum, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_ByteMaskedArray_toIndexedOptionArray.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_ByteMaskedArray_toIndexedOptionArray.cu index 1339e73b13..a104214b17 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_ByteMaskedArray_toIndexedOptionArray.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_ByteMaskedArray_toIndexedOptionArray.cu @@ -2,12 +2,13 @@ template __global__ void -awkward_ByteMaskedArray_toIndexedOptionArray(T* toindex, - const C* mask, - int64_t length, - bool validwhen, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_ByteMaskedArray_toIndexedOptionArray( + T* toindex, + const C* mask, + int64_t length, + bool validwhen, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; if (thread_id < length) { diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_Content_getitem_next_missing_jagged_getmaskstartstop.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_Content_getitem_next_missing_jagged_getmaskstartstop.cu index f21e3cbd5f..e63598235e 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_Content_getitem_next_missing_jagged_getmaskstartstop.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_Content_getitem_next_missing_jagged_getmaskstartstop.cu @@ -13,15 +13,16 @@ template __global__ void -awkward_Content_getitem_next_missing_jagged_getmaskstartstop_a(T* index_in, - C* offsets_in, - U* mask_out, - V* starts_out, - W* stops_out, - int64_t length, - int64_t* scan_in_array, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_Content_getitem_next_missing_jagged_getmaskstartstop_a( + T* index_in, + C* offsets_in, + U* mask_out, + V* starts_out, + W* stops_out, + int64_t length, + int64_t* scan_in_array, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; if (thread_id < length) { @@ -37,15 +38,16 @@ awkward_Content_getitem_next_missing_jagged_getmaskstartstop_a(T* index_in, template __global__ void -awkward_Content_getitem_next_missing_jagged_getmaskstartstop_b(T* index_in, - C* offsets_in, - U* mask_out, - V* starts_out, - W* stops_out, - int64_t length, - int64_t* scan_in_array, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_Content_getitem_next_missing_jagged_getmaskstartstop_b( + T* index_in, + C* offsets_in, + U* mask_out, + V* starts_out, + W* stops_out, + int64_t length, + int64_t* scan_in_array, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_Index_nones_as_index.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_Index_nones_as_index.cu index ecea88ae39..b8c0f6a024 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_Index_nones_as_index.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_Index_nones_as_index.cu @@ -15,12 +15,13 @@ template __global__ void -awkward_Index_nones_as_index_a(T* toindex, - int64_t length, - int64_t* scan_in_array, - int64_t* scan_in_array_n_non_null, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_Index_nones_as_index_a( + T* toindex, + int64_t length, + int64_t* scan_in_array, + int64_t* scan_in_array_n_non_null, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; if (thread_id < length) { @@ -38,12 +39,13 @@ awkward_Index_nones_as_index_a(T* toindex, template __global__ void -awkward_Index_nones_as_index_b(T* toindex, - int64_t length, - int64_t* scan_in_array, - int64_t* scan_in_array_n_non_null, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_Index_nones_as_index_b( + T* toindex, + int64_t length, + int64_t* scan_in_array, + int64_t* scan_in_array_n_non_null, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t n_non_null = scan_in_array[length - 1]; int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_fill.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_fill.cu index 98cc1ab21a..8608d48930 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_fill.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_fill.cu @@ -2,13 +2,14 @@ template __global__ void -awkward_IndexedArray_fill(T* toindex, - int64_t toindexoffset, - const C* fromindex, - int64_t length, - int64_t base, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_IndexedArray_fill( + T* toindex, + int64_t toindexoffset, + const C* fromindex, + int64_t length, + int64_t base, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; if (thread_id < length) { diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_fill_count.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_fill_count.cu index 6583a04fcf..3ab72241fe 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_fill_count.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_fill_count.cu @@ -2,12 +2,13 @@ template __global__ void -awkward_IndexedArray_fill_count(T* toindex, - int64_t toindexoffset, - int64_t length, - int64_t base, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_IndexedArray_fill_count( + T* toindex, + int64_t toindexoffset, + int64_t length, + int64_t base, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; if (thread_id < length) { diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_flatten_nextcarry.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_flatten_nextcarry.cu index 459022618c..8a0d09deb9 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_flatten_nextcarry.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_flatten_nextcarry.cu @@ -17,13 +17,14 @@ enum class INDEXEDARRAY_FLATTEN_NEXTCARRY_ERRORS { template __global__ void -awkward_IndexedArray_flatten_nextcarry_a(T* tocarry, - const C* fromindex, - int64_t lenindex, - int64_t lencontent, - int64_t* scan_in_array, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_IndexedArray_flatten_nextcarry_a( + T* tocarry, + const C* fromindex, + int64_t lenindex, + int64_t lencontent, + int64_t* scan_in_array, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; if (thread_id < lenindex) { @@ -41,13 +42,14 @@ awkward_IndexedArray_flatten_nextcarry_a(T* tocarry, template __global__ void -awkward_IndexedArray_flatten_nextcarry_b(T* tocarry, - const C* fromindex, - int64_t lenindex, - int64_t lencontent, - int64_t* scan_in_array, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_IndexedArray_flatten_nextcarry_b( + T* tocarry, + const C* fromindex, + int64_t lenindex, + int64_t lencontent, + int64_t* scan_in_array, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_getitem_nextcarry.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_getitem_nextcarry.cu index a7ebe048d9..081b597e91 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_getitem_nextcarry.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_getitem_nextcarry.cu @@ -17,13 +17,14 @@ enum class INDEXEDARRAY_GETITEM_NEXTCARRY_ERRORS { template __global__ void -awkward_IndexedArray_getitem_nextcarry_a(T* tocarry, - const C* fromindex, - int64_t lenindex, - int64_t lencontent, - int64_t* scan_in_array, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_IndexedArray_getitem_nextcarry_a( + T* tocarry, + const C* fromindex, + int64_t lenindex, + int64_t lencontent, + int64_t* scan_in_array, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; if (thread_id < lenindex) { @@ -41,13 +42,14 @@ awkward_IndexedArray_getitem_nextcarry_a(T* tocarry, template __global__ void -awkward_IndexedArray_getitem_nextcarry_b(T* tocarry, - const C* fromindex, - int64_t lenindex, - int64_t lencontent, - int64_t* scan_in_array, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_IndexedArray_getitem_nextcarry_b( + T* tocarry, + const C* fromindex, + int64_t lenindex, + int64_t lencontent, + int64_t* scan_in_array, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_getitem_nextcarry_outindex.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_getitem_nextcarry_outindex.cu index 825efa2666..7ef0bf9cdd 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_getitem_nextcarry_outindex.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_getitem_nextcarry_outindex.cu @@ -17,14 +17,15 @@ enum class INDEXEDARRAY_GETITEM_NEXTCARRY_OUTINDEX_ERRORS { template __global__ void -awkward_IndexedArray_getitem_nextcarry_outindex_a(T* tocarry, - C* toindex, - const U* fromindex, - int64_t lenindex, - int64_t lencontent, - int64_t* scan_in_array, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_IndexedArray_getitem_nextcarry_outindex_a( + T* tocarry, + C* toindex, + const U* fromindex, + int64_t lenindex, + int64_t lencontent, + int64_t* scan_in_array, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; if (thread_id < lenindex) { @@ -43,14 +44,15 @@ awkward_IndexedArray_getitem_nextcarry_outindex_a(T* tocarry, template __global__ void -awkward_IndexedArray_getitem_nextcarry_outindex_b(T* tocarry, - C* toindex, - const U* fromindex, - int64_t lenindex, - int64_t lencontent, - int64_t* scan_in_array, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_IndexedArray_getitem_nextcarry_outindex_b( + T* tocarry, + C* toindex, + const U* fromindex, + int64_t lenindex, + int64_t lencontent, + int64_t* scan_in_array, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_index_of_nulls.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_index_of_nulls.cu index 311d62a3df..aaaae57f13 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_index_of_nulls.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_index_of_nulls.cu @@ -13,14 +13,15 @@ template __global__ void -awkward_IndexedArray_index_of_nulls_a(T* toindex, - const C* fromindex, - int64_t lenindex, - const U* parents, - const V* starts, - int64_t* scan_in_array, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_IndexedArray_index_of_nulls_a( + T* toindex, + const C* fromindex, + int64_t lenindex, + const U* parents, + const V* starts, + int64_t* scan_in_array, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; @@ -36,14 +37,15 @@ awkward_IndexedArray_index_of_nulls_a(T* toindex, template __global__ void -awkward_IndexedArray_index_of_nulls_b(T* toindex, - const C* fromindex, - int64_t lenindex, - const U* parents, - const V* starts, - int64_t* scan_in_array, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_IndexedArray_index_of_nulls_b( + T* toindex, + const C* fromindex, + int64_t lenindex, + const U* parents, + const V* starts, + int64_t* scan_in_array, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_numnull.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_numnull.cu index 97e9d0d0e4..b340562e55 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_numnull.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_numnull.cu @@ -13,12 +13,13 @@ template __global__ void -awkward_IndexedArray_numnull_a(T* numnull, - const C* fromindex, - int64_t lenindex, - int64_t* scan_in_array, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_IndexedArray_numnull_a( + T* numnull, + const C* fromindex, + int64_t lenindex, + int64_t* scan_in_array, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; @@ -35,12 +36,13 @@ awkward_IndexedArray_numnull_a(T* numnull, template __global__ void -awkward_IndexedArray_numnull_b(T* numnull, - const C* fromindex, - int64_t lenindex, - int64_t* scan_in_array, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_IndexedArray_numnull_b( + T* numnull, + const C* fromindex, + int64_t lenindex, + int64_t* scan_in_array, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { *numnull = lenindex > 0 ? scan_in_array[lenindex - 1] : 0; } diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_numnull_parents.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_numnull_parents.cu index 5cc314be99..2948351950 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_numnull_parents.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_numnull_parents.cu @@ -13,13 +13,14 @@ template __global__ void -awkward_IndexedArray_numnull_parents_a(T* numnull, - C* tolength, - const U* fromindex, - int64_t lenindex, - int64_t* scan_in_array, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_IndexedArray_numnull_parents_a( + T* numnull, + C* tolength, + const U* fromindex, + int64_t lenindex, + int64_t* scan_in_array, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; @@ -38,13 +39,14 @@ awkward_IndexedArray_numnull_parents_a(T* numnull, template __global__ void -awkward_IndexedArray_numnull_parents_b(T* numnull, - C* tolength, - const U* fromindex, - int64_t lenindex, - int64_t* scan_in_array, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_IndexedArray_numnull_parents_b( + T* numnull, + C* tolength, + const U* fromindex, + int64_t lenindex, + int64_t* scan_in_array, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { *tolength = lenindex > 0 ? scan_in_array[lenindex - 1] : 0; } diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_numnull_unique_64.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_numnull_unique_64.cu index 85306498d0..cdcafd636f 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_numnull_unique_64.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_numnull_unique_64.cu @@ -2,10 +2,11 @@ template __global__ void -awkward_IndexedArray_numnull_unique_64(T* toindex, - int64_t lenindex, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_IndexedArray_numnull_unique_64( + T* toindex, + int64_t lenindex, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; if (thread_id <= lenindex) { diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_overlay_mask.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_overlay_mask.cu index 9d19d37e5e..b9decedf61 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_overlay_mask.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_overlay_mask.cu @@ -2,12 +2,13 @@ template __global__ void -awkward_IndexedArray_overlay_mask(T* toindex, - const C* mask, - const U* fromindex, - int64_t length, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_IndexedArray_overlay_mask( + T* toindex, + const C* mask, + const U* fromindex, + int64_t length, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; if (thread_id < length) { diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_reduce_next_64.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_reduce_next_64.cu index 230c5d8a94..33baa5d585 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_reduce_next_64.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_reduce_next_64.cu @@ -13,15 +13,16 @@ template __global__ void -awkward_IndexedArray_reduce_next_64_a(T* nextcarry, - C* nextparents, - U* outindex, - const V* index, - const W* parents, - int64_t length, - int64_t* scan_in_array, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_IndexedArray_reduce_next_64_a( + T* nextcarry, + C* nextparents, + U* outindex, + const V* index, + const W* parents, + int64_t length, + int64_t* scan_in_array, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; @@ -37,15 +38,16 @@ awkward_IndexedArray_reduce_next_64_a(T* nextcarry, template __global__ void -awkward_IndexedArray_reduce_next_64_b(T* nextcarry, - C* nextparents, - U* outindex, - const V* index, - const W* parents, - int64_t length, - int64_t* scan_in_array, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_IndexedArray_reduce_next_64_b( + T* nextcarry, + C* nextparents, + U* outindex, + const V* index, + const W* parents, + int64_t length, + int64_t* scan_in_array, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_reduce_next_fix_offsets_64.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_reduce_next_fix_offsets_64.cu index 0bb661fc73..5fce2fa869 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_reduce_next_fix_offsets_64.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_reduce_next_fix_offsets_64.cu @@ -2,12 +2,13 @@ template __global__ void -awkward_IndexedArray_reduce_next_fix_offsets_64(T* outoffsets, - const C* starts, - int64_t startslength, - int64_t outindexlength, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_IndexedArray_reduce_next_fix_offsets_64( + T* outoffsets, + const C* starts, + int64_t startslength, + int64_t outindexlength, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; if (thread_id < startslength) { diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_reduce_next_nonlocal_nextshifts_64.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_reduce_next_nonlocal_nextshifts_64.cu index 71de7aab0f..c04c239b61 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_reduce_next_nonlocal_nextshifts_64.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_reduce_next_nonlocal_nextshifts_64.cu @@ -15,13 +15,14 @@ template __global__ void -awkward_IndexedArray_reduce_next_nonlocal_nextshifts_64_a(T* nextshifts, - const C* index, - int64_t length, - int64_t* scan_in_array_k, - int64_t* scan_in_array_nullsum, - uint64_t invocation_code, - uint64_t* err_code) { +awkward_IndexedArray_reduce_next_nonlocal_nextshifts_64_a( + T* nextshifts, + const C* index, + int64_t length, + int64_t* scan_in_array_k, + int64_t* scan_in_array_nullsum, + uint64_t invocation_code, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; @@ -39,13 +40,14 @@ awkward_IndexedArray_reduce_next_nonlocal_nextshifts_64_a(T* nextshifts, template __global__ void -awkward_IndexedArray_reduce_next_nonlocal_nextshifts_64_b(T* nextshifts, - const C* index, - int64_t length, - int64_t* scan_in_array_k, - int64_t* scan_in_array_nullsum, - uint64_t invocation_code, - uint64_t* err_code) { +awkward_IndexedArray_reduce_next_nonlocal_nextshifts_64_b( + T* nextshifts, + const C* index, + int64_t length, + int64_t* scan_in_array_k, + int64_t* scan_in_array_nullsum, + uint64_t invocation_code, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_reduce_next_nonlocal_nextshifts_fromshifts_64.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_reduce_next_nonlocal_nextshifts_fromshifts_64.cu index 6d8b848a90..a61abd0e7d 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_reduce_next_nonlocal_nextshifts_fromshifts_64.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_reduce_next_nonlocal_nextshifts_fromshifts_64.cu @@ -15,14 +15,15 @@ template __global__ void -awkward_IndexedArray_reduce_next_nonlocal_nextshifts_fromshifts_64_a(T* nextshifts, - const C* index, - int64_t length, - const U* shifts, - int64_t* scan_in_array_k, - int64_t* scan_in_array_nullsum, - uint64_t invocation_code, - uint64_t* err_code) { +awkward_IndexedArray_reduce_next_nonlocal_nextshifts_fromshifts_64_a( + T* nextshifts, + const C* index, + int64_t length, + const U* shifts, + int64_t* scan_in_array_k, + int64_t* scan_in_array_nullsum, + uint64_t invocation_code, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; @@ -40,14 +41,15 @@ awkward_IndexedArray_reduce_next_nonlocal_nextshifts_fromshifts_64_a(T* nextshif template __global__ void -awkward_IndexedArray_reduce_next_nonlocal_nextshifts_fromshifts_64_b(T* nextshifts, - const C* index, - int64_t length, - const U* shifts, - int64_t* scan_in_array_k, - int64_t* scan_in_array_nullsum, - uint64_t invocation_code, - uint64_t* err_code) { +awkward_IndexedArray_reduce_next_nonlocal_nextshifts_fromshifts_64_b( + T* nextshifts, + const C* index, + int64_t length, + const U* shifts, + int64_t* scan_in_array_k, + int64_t* scan_in_array_nullsum, + uint64_t invocation_code, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_simplify.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_simplify.cu index 92382a33cc..8191338414 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_simplify.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_simplify.cu @@ -6,13 +6,14 @@ enum class INDEXEDARRAY_SIMPLIFY_ERRORS { template __global__ void -awkward_IndexedArray_simplify(T* toindex, - const C* outerindex, - int64_t outerlength, - const U* innerindex, - int64_t innerlength, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_IndexedArray_simplify( + T* toindex, + const C* outerindex, + int64_t outerlength, + const U* innerindex, + int64_t innerlength, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; if (thread_id < outerlength) { diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_validity.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_validity.cu index e0f33e9a46..5506fbd498 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_validity.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_validity.cu @@ -7,12 +7,13 @@ enum class INDEXEDARRAY_VALIDITY_ERRORS { template __global__ void -awkward_IndexedArray_validity(const T* index, - int64_t length, - int64_t lencontent, - bool isoption, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_IndexedArray_validity( + const T* index, + int64_t length, + int64_t lencontent, + bool isoption, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; if (thread_id < length) { diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedOptionArray_rpad_and_clip_mask_axis1.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedOptionArray_rpad_and_clip_mask_axis1.cu index 854eb3ccbb..95fe4a8936 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedOptionArray_rpad_and_clip_mask_axis1.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedOptionArray_rpad_and_clip_mask_axis1.cu @@ -13,12 +13,13 @@ template __global__ void -awkward_IndexedOptionArray_rpad_and_clip_mask_axis1_a(T* toindex, - const C* frommask, - int64_t length, - int64_t* scan_in_array, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_IndexedOptionArray_rpad_and_clip_mask_axis1_a( + T* toindex, + const C* frommask, + int64_t length, + int64_t* scan_in_array, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; @@ -34,12 +35,13 @@ awkward_IndexedOptionArray_rpad_and_clip_mask_axis1_a(T* toindex, template __global__ void -awkward_IndexedOptionArray_rpad_and_clip_mask_axis1_b(T* toindex, - const C* frommask, - int64_t length, - int64_t* scan_in_array, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_IndexedOptionArray_rpad_and_clip_mask_axis1_b( + T* toindex, + const C* frommask, + int64_t length, + int64_t* scan_in_array, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_compact_offsets.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_compact_offsets.cu index d2f89e63eb..feec5ecd4c 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_compact_offsets.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_compact_offsets.cu @@ -14,12 +14,13 @@ enum class LISTARRAY_COMPACT_OFFSETS_ERRORS { template __global__ void -awkward_ListArray_compact_offsets_a(T* tooffsets, - const C* fromstarts, - const U* fromstops, - int64_t length, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_ListArray_compact_offsets_a( + T* tooffsets, + const C* fromstarts, + const U* fromstops, + int64_t length, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; tooffsets[0] = 0; diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_fill.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_fill.cu index 80e110fee0..849b766054 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_fill.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_fill.cu @@ -2,16 +2,17 @@ template __global__ void -awkward_ListArray_fill(T* tostarts, - int64_t tostartsoffset, - C* tostops, - int64_t tostopsoffset, - const U* fromstarts, - const V* fromstops, - int64_t length, - int64_t base, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_ListArray_fill( + T* tostarts, + int64_t tostartsoffset, + C* tostops, + int64_t tostopsoffset, + const U* fromstarts, + const V* fromstops, + int64_t length, + int64_t base, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; if (thread_id < length) { diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_getitem_jagged_carrylen.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_getitem_jagged_carrylen.cu index bd7c3e68e9..a6e01f3f82 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_getitem_jagged_carrylen.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_getitem_jagged_carrylen.cu @@ -13,13 +13,14 @@ template __global__ void -awkward_ListArray_getitem_jagged_carrylen_a(T* carrylen, - const C* slicestarts, - const U* slicestops, - int64_t sliceouterlen, - int64_t* scan_in_array, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_ListArray_getitem_jagged_carrylen_a( + T* carrylen, + const C* slicestarts, + const U* slicestops, + int64_t sliceouterlen, + int64_t* scan_in_array, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; @@ -31,13 +32,14 @@ awkward_ListArray_getitem_jagged_carrylen_a(T* carrylen, template __global__ void -awkward_ListArray_getitem_jagged_carrylen_b(T* carrylen, - const C* slicestarts, - const U* slicestops, - int64_t sliceouterlen, - int64_t* scan_in_array, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_ListArray_getitem_jagged_carrylen_b( + T* carrylen, + const C* slicestarts, + const U* slicestops, + int64_t sliceouterlen, + int64_t* scan_in_array, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { *carrylen = sliceouterlen > 0 ? scan_in_array[sliceouterlen - 1] : 0; } diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_getitem_jagged_expand.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_getitem_jagged_expand.cu index f371df21f8..cf60c17dc1 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_getitem_jagged_expand.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_getitem_jagged_expand.cu @@ -7,16 +7,17 @@ enum class LISTARRAY_GETITEM_JAGGED_EXPAND_ERRORS { template __global__ void -awkward_ListArray_getitem_jagged_expand(T* multistarts, - C* multistops, - const U* singleoffsets, - V* tocarry, - const W* fromstarts, - const X* fromstops, - int64_t jaggedsize, - int64_t length, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_ListArray_getitem_jagged_expand( + T* multistarts, + C* multistops, + const U* singleoffsets, + V* tocarry, + const W* fromstarts, + const X* fromstops, + int64_t jaggedsize, + int64_t length, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = (blockIdx.x * blockDim.x + threadIdx.x) / jaggedsize; int64_t thready_id = (blockIdx.x * blockDim.x + threadIdx.x) % jaggedsize; diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_getitem_next_array.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_getitem_next_array.cu index 0a94139d32..42c3573983 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_getitem_next_array.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_getitem_next_array.cu @@ -8,16 +8,17 @@ enum class LISTARRAY_GETITEM_NEXT_ARRAY_ERRORS { template __global__ void -awkward_ListArray_getitem_next_array(T* tocarry, - C* toadvanced, - const U* fromstarts, - const V* fromstops, - const W* fromarray, - int64_t lenstarts, - int64_t lenarray, - int64_t lencontent, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_ListArray_getitem_next_array( + T* tocarry, + C* toadvanced, + const U* fromstarts, + const V* fromstops, + const W* fromarray, + int64_t lenstarts, + int64_t lenarray, + int64_t lencontent, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = (blockIdx.x * blockDim.x + threadIdx.x) / lenarray; int64_t thready_id = (blockIdx.x * blockDim.x + threadIdx.x) % lenarray; diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_getitem_next_array_advanced.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_getitem_next_array_advanced.cu index a88e0d7df4..349a63c0f4 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_getitem_next_array_advanced.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_getitem_next_array_advanced.cu @@ -8,17 +8,18 @@ enum class LISTARRAY_GETITEM_NEXT_ARRAY_ADVANCED_ERRORS { template __global__ void -awkward_ListArray_getitem_next_array_advanced(T* tocarry, - C* toadvanced, - const U* fromstarts, - const V* fromstops, - const W* fromarray, - const X* fromadvanced, - int64_t lenstarts, - int64_t lenarray, - int64_t lencontent, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_ListArray_getitem_next_array_advanced( + T* tocarry, + C* toadvanced, + const U* fromstarts, + const V* fromstops, + const W* fromarray, + const X* fromadvanced, + int64_t lenstarts, + int64_t lenarray, + int64_t lencontent, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_getitem_next_at.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_getitem_next_at.cu index a04dd50aed..421f0d15c1 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_getitem_next_at.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_getitem_next_at.cu @@ -6,13 +6,14 @@ enum class LISTARRAY_GETITEM_NEXT_AT_ERRORS { template __global__ void -awkward_ListArray_getitem_next_at(T* tocarry, - const C* fromstarts, - const U* fromstops, - int64_t lenstarts, - int64_t at, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_ListArray_getitem_next_at( + T* tocarry, + const C* fromstarts, + const U* fromstops, + int64_t lenstarts, + int64_t at, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_getitem_next_range_counts.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_getitem_next_range_counts.cu index 57ed4a05c3..f05144decf 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_getitem_next_range_counts.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_getitem_next_range_counts.cu @@ -13,12 +13,13 @@ template __global__ void -awkward_ListArray_getitem_next_range_counts_a(T* total, - const C* fromoffsets, - int64_t lenstarts, - int64_t* scan_in_array, - uint64_t invocation_total, - uint64_t* err_code) { +awkward_ListArray_getitem_next_range_counts_a( + T* total, + const C* fromoffsets, + int64_t lenstarts, + int64_t* scan_in_array, + uint64_t invocation_total, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; @@ -30,12 +31,13 @@ awkward_ListArray_getitem_next_range_counts_a(T* total, template __global__ void -awkward_ListArray_getitem_next_range_counts_b(T* total, - const C* fromoffsets, - int64_t lenstarts, - int64_t* scan_in_array, - uint64_t invocation_total, - uint64_t* err_code) { +awkward_ListArray_getitem_next_range_counts_b( + T* total, + const C* fromoffsets, + int64_t lenstarts, + int64_t* scan_in_array, + uint64_t invocation_total, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { *total = lenstarts > 0 ? scan_in_array[lenstarts - 1] : 0; } diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_min_range.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_min_range.cu index ef85350eae..f4fb8268b0 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_min_range.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_min_range.cu @@ -14,13 +14,14 @@ template __global__ void -awkward_ListArray_min_range_a(T* tomin, - const C* fromstarts, - const U* fromstops, - int64_t lenstarts, - int64_t* scan_in_array, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_ListArray_min_range_a( + T* tomin, + const C* fromstarts, + const U* fromstops, + int64_t lenstarts, + int64_t* scan_in_array, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; @@ -38,13 +39,14 @@ awkward_ListArray_min_range_a(T* tomin, template __global__ void -awkward_ListArray_min_range_b(T* tomin, - const C* fromstarts, - const U* fromstops, - int64_t lenstarts, - int64_t* scan_in_array, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_ListArray_min_range_b( + T* tomin, + const C* fromstarts, + const U* fromstops, + int64_t lenstarts, + int64_t* scan_in_array, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { if (lenstarts > 0) { *tomin = scan_in_array[0]; diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_rpad_and_clip_length_axis1.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_rpad_and_clip_length_axis1.cu index 17b59c891f..37be0aa11b 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_rpad_and_clip_length_axis1.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_rpad_and_clip_length_axis1.cu @@ -13,14 +13,15 @@ template __global__ void -awkward_ListArray_rpad_and_clip_length_axis1_a(T* tomin, - const C* fromstarts, - const U* fromstops, - int64_t target, - int64_t lenstarts, - int64_t* scan_in_array, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_ListArray_rpad_and_clip_length_axis1_a( + T* tomin, + const C* fromstarts, + const U* fromstops, + int64_t target, + int64_t lenstarts, + int64_t* scan_in_array, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; @@ -33,14 +34,15 @@ awkward_ListArray_rpad_and_clip_length_axis1_a(T* tomin, template __global__ void -awkward_ListArray_rpad_and_clip_length_axis1_b(T* tomin, - const C* fromstarts, - const U* fromstops, - int64_t target, - int64_t lenstarts, - int64_t* scan_in_array, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_ListArray_rpad_and_clip_length_axis1_b( + T* tomin, + const C* fromstarts, + const U* fromstops, + int64_t target, + int64_t lenstarts, + int64_t* scan_in_array, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { *tomin = lenstarts > 0 ? scan_in_array[lenstarts - 1] : 0; } diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_validity.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_validity.cu index befc592c13..c37de8100b 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_validity.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_validity.cu @@ -6,19 +6,20 @@ enum class LISTARRAY_VALIDITY_ERRORS { ERROR_STOP_CONTENT // message: "stop[i] > len(content)" }; -template +template __global__ void -awkward_ListArray_validity(const C* starts, - const T* stops, - int64_t length, - int64_t lencontent, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_ListArray_validity( + const T* starts, + const C* stops, + int64_t length, + int64_t lencontent, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; if (thread_id < length) { - C start = starts[thread_id]; - T stop = stops[thread_id]; + T start = starts[thread_id]; + C stop = stops[thread_id]; if (start != stop) { if (start > stop) { RAISE_ERROR(LISTARRAY_VALIDITY_ERRORS::ERROR_START_STOP) diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_ListOffsetArray_flatten_offsets.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListOffsetArray_flatten_offsets.cu index 87e09eb853..5bf7bab47e 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_ListOffsetArray_flatten_offsets.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListOffsetArray_flatten_offsets.cu @@ -2,13 +2,14 @@ template __global__ void -awkward_ListOffsetArray_flatten_offsets(T* tooffsets, - const C* outeroffsets, - int64_t outeroffsetslen, - const U* inneroffsets, - int64_t inneroffsetslen, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_ListOffsetArray_flatten_offsets( + T* tooffsets, + const C* outeroffsets, + int64_t outeroffsetslen, + const U* inneroffsets, + int64_t inneroffsetslen, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; if (thread_id < outeroffsetslen) { diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_ListOffsetArray_reduce_nonlocal_nextstarts_64.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListOffsetArray_reduce_nonlocal_nextstarts_64.cu index 68e9453b29..d7aa492421 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_ListOffsetArray_reduce_nonlocal_nextstarts_64.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListOffsetArray_reduce_nonlocal_nextstarts_64.cu @@ -12,12 +12,13 @@ template __global__ void -awkward_ListOffsetArray_reduce_nonlocal_nextstarts_64_a(T* nextstarts, - const C* nextparents, - int64_t nextlen, - int64_t* scan_in_array, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_ListOffsetArray_reduce_nonlocal_nextstarts_64_a( + T* nextstarts, + const C* nextparents, + int64_t nextlen, + int64_t* scan_in_array, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; @@ -32,12 +33,13 @@ awkward_ListOffsetArray_reduce_nonlocal_nextstarts_64_a(T* nextstarts, template __global__ void -awkward_ListOffsetArray_reduce_nonlocal_nextstarts_64_b(T* nextstarts, - const C* nextparents, - int64_t nextlen, - int64_t* scan_in_array, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_ListOffsetArray_reduce_nonlocal_nextstarts_64_b( + T* nextstarts, + const C* nextparents, + int64_t nextlen, + int64_t* scan_in_array, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_ListOffsetArray_rpad_and_clip_axis1.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListOffsetArray_rpad_and_clip_axis1.cu index c3032e77d8..44249232e9 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_ListOffsetArray_rpad_and_clip_axis1.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListOffsetArray_rpad_and_clip_axis1.cu @@ -2,12 +2,13 @@ template __global__ void -awkward_ListOffsetArray_rpad_and_clip_axis1(T* toindex, - const C* fromoffsets, - int64_t length, - int64_t target, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_ListOffsetArray_rpad_and_clip_axis1( + T* toindex, + const C* fromoffsets, + int64_t length, + int64_t target, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = (blockIdx.x * blockDim.x + threadIdx.x) / target; int64_t thready_id = (blockIdx.x * blockDim.x + threadIdx.x) % target; diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_ListOffsetArray_rpad_axis1.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListOffsetArray_rpad_axis1.cu index 449f6b84ea..a21f746f5c 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_ListOffsetArray_rpad_axis1.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListOffsetArray_rpad_axis1.cu @@ -2,12 +2,13 @@ template __global__ void -awkward_ListOffsetArray_rpad_axis1(T* toindex, - const C* fromoffsets, - int64_t fromlength, - int64_t target, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_ListOffsetArray_rpad_axis1( + T* toindex, + const C* fromoffsets, + int64_t fromlength, + int64_t target, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = (blockIdx.x * blockDim.x + threadIdx.x) / target; int64_t thready_id = (blockIdx.x * blockDim.x + threadIdx.x) % target; diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_MaskedArray_getitem_next_jagged_project.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_MaskedArray_getitem_next_jagged_project.cu index dfc34956a8..af0ab2faea 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_MaskedArray_getitem_next_jagged_project.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_MaskedArray_getitem_next_jagged_project.cu @@ -13,15 +13,16 @@ template __global__ void -awkward_MaskedArray_getitem_next_jagged_project_a(T* index, - C* starts_in, - U* stops_in, - V* starts_out, - W* stops_out, - int64_t length, - int64_t* scan_in_array, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_MaskedArray_getitem_next_jagged_project_a( + T* index, + C* starts_in, + U* stops_in, + V* starts_out, + W* stops_out, + int64_t length, + int64_t* scan_in_array, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; @@ -37,15 +38,16 @@ awkward_MaskedArray_getitem_next_jagged_project_a(T* index, template __global__ void -awkward_MaskedArray_getitem_next_jagged_project_b(T* index, - C* starts_in, - U* stops_in, - V* starts_out, - W* stops_out, - int64_t length, - int64_t* scan_in_array, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_MaskedArray_getitem_next_jagged_project_b( + T* index, + C* starts_in, + U* stops_in, + V* starts_out, + W* stops_out, + int64_t length, + int64_t* scan_in_array, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_NumpyArray_fill.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_NumpyArray_fill.cu index ca5a6e6f18..e0ef17c78f 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_NumpyArray_fill.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_NumpyArray_fill.cu @@ -2,12 +2,13 @@ template __global__ void -awkward_NumpyArray_fill(T* toptr, - int64_t tooffset, - const C* fromptr, - int64_t length, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_NumpyArray_fill( + T* toptr, + int64_t tooffset, + const C* fromptr, + int64_t length, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; if (thread_id < length) { diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_NumpyArray_reduce_adjust_starts_64.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_NumpyArray_reduce_adjust_starts_64.cu index 59ac8caf21..75595ff90f 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_NumpyArray_reduce_adjust_starts_64.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_NumpyArray_reduce_adjust_starts_64.cu @@ -2,12 +2,13 @@ template __global__ void -awkward_NumpyArray_reduce_adjust_starts_64(T* toptr, - int64_t outlength, - const C* parents, - const U* starts, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_NumpyArray_reduce_adjust_starts_64( + T* toptr, + int64_t outlength, + const C* parents, + const U* starts, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; if (thread_id < outlength) { diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_NumpyArray_reduce_adjust_starts_shifts_64.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_NumpyArray_reduce_adjust_starts_shifts_64.cu index 526dbde17c..eead767e76 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_NumpyArray_reduce_adjust_starts_shifts_64.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_NumpyArray_reduce_adjust_starts_shifts_64.cu @@ -2,13 +2,14 @@ template __global__ void -awkward_NumpyArray_reduce_adjust_starts_shifts_64(T* toptr, - int64_t outlength, - const C* parents, - const U* starts, - const V* shifts, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_NumpyArray_reduce_adjust_starts_shifts_64( + T* toptr, + int64_t outlength, + const C* parents, + const U* starts, + const V* shifts, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; if (thread_id < outlength) { diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_NumpyArray_reduce_mask_ByteMaskedArray_64.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_NumpyArray_reduce_mask_ByteMaskedArray_64.cu index 1d8400ea67..17282b7176 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_NumpyArray_reduce_mask_ByteMaskedArray_64.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_NumpyArray_reduce_mask_ByteMaskedArray_64.cu @@ -2,12 +2,13 @@ template __global__ void -awkward_NumpyArray_reduce_mask_ByteMaskedArray_64(T* toptr, - const C* parents, - int64_t lenparents, - int64_t outlength, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_NumpyArray_reduce_mask_ByteMaskedArray_64( + T* toptr, + const C* parents, + int64_t lenparents, + int64_t outlength, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; if (thread_id < outlength) { diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_RegularArray_getitem_carry.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_RegularArray_getitem_carry.cu index 4a1c41ad4f..c5f993afd1 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_RegularArray_getitem_carry.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_RegularArray_getitem_carry.cu @@ -2,12 +2,13 @@ template __global__ void -awkward_RegularArray_getitem_carry(T* tocarry, - const C* fromcarry, - int64_t lencarry, - int64_t size, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_RegularArray_getitem_carry( + T* tocarry, + const C* fromcarry, + int64_t lencarry, + int64_t size, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = (blockIdx.x * blockDim.x + threadIdx.x) / size; int64_t thready_id = (blockIdx.x * blockDim.x + threadIdx.x) % size; diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_RegularArray_getitem_jagged_expand.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_RegularArray_getitem_jagged_expand.cu index 71a0c1f1c2..8c232aca7f 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_RegularArray_getitem_jagged_expand.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_RegularArray_getitem_jagged_expand.cu @@ -2,13 +2,14 @@ template __global__ void -awkward_RegularArray_getitem_jagged_expand(T* multistarts, - C* multistops, - const U* singleoffsets, - int64_t regularsize, - int64_t regularlength, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_RegularArray_getitem_jagged_expand( + T* multistarts, + C* multistops, + const U* singleoffsets, + int64_t regularsize, + int64_t regularlength, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = (blockIdx.x * blockDim.x + threadIdx.x) / regularsize; int64_t thready_id = (blockIdx.x * blockDim.x + threadIdx.x) % regularsize; diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_RegularArray_getitem_next_array.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_RegularArray_getitem_next_array.cu index d7d188d8c4..ba4b229917 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_RegularArray_getitem_next_array.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_RegularArray_getitem_next_array.cu @@ -2,14 +2,15 @@ template __global__ void -awkward_RegularArray_getitem_next_array(T* tocarry, - C* toadvanced, - const U* fromarray, - int64_t length, - int64_t lenarray, - int64_t size, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_RegularArray_getitem_next_array( + T* tocarry, + C* toadvanced, + const U* fromarray, + int64_t length, + int64_t lenarray, + int64_t size, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = (blockIdx.x * blockDim.x + threadIdx.x) / lenarray; int64_t thready_id = (blockIdx.x * blockDim.x + threadIdx.x) % lenarray; diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_RegularArray_getitem_next_array_advanced.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_RegularArray_getitem_next_array_advanced.cu index fdbe2c8206..79382736d2 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_RegularArray_getitem_next_array_advanced.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_RegularArray_getitem_next_array_advanced.cu @@ -2,15 +2,16 @@ template __global__ void -awkward_RegularArray_getitem_next_array_advanced(T* tocarry, - C* toadvanced, - const U* fromadvanced, - const V* fromarray, - int64_t length, - int64_t lenarray, - int64_t size, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_RegularArray_getitem_next_array_advanced( + T* tocarry, + C* toadvanced, + const U* fromadvanced, + const V* fromarray, + int64_t length, + int64_t lenarray, + int64_t size, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; if (thread_id < length) { diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_RegularArray_getitem_next_array_regularize.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_RegularArray_getitem_next_array_regularize.cu index 7242c2b4fa..c5b9d50cc1 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_RegularArray_getitem_next_array_regularize.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_RegularArray_getitem_next_array_regularize.cu @@ -16,13 +16,14 @@ enum class REGULARARRAY_GETITEM_NEXT_ARRAY_REGULARIZE_ERRORS { template __global__ void -awkward_RegularArray_getitem_next_array_regularize_a(T* toarray, - const C* fromarray, - int64_t lenarray, - int64_t size, - int64_t* scan_in_array, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_RegularArray_getitem_next_array_regularize_a( + T* toarray, + const C* fromarray, + int64_t lenarray, + int64_t size, + int64_t* scan_in_array, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; @@ -40,13 +41,14 @@ awkward_RegularArray_getitem_next_array_regularize_a(T* toarray, template __global__ void -awkward_RegularArray_getitem_next_array_regularize_b(T* toarray, - const C* fromarray, - int64_t lenarray, - int64_t size, - int64_t* scan_in_array, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_RegularArray_getitem_next_array_regularize_b( + T* toarray, + const C* fromarray, + int64_t lenarray, + int64_t size, + int64_t* scan_in_array, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_RegularArray_getitem_next_at.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_RegularArray_getitem_next_at.cu index be74e01221..8f1282974d 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_RegularArray_getitem_next_at.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_RegularArray_getitem_next_at.cu @@ -6,12 +6,13 @@ enum class REGULARARRAY_GETITEM_NEXT_AT_ERRORS { template __global__ void -awkward_RegularArray_getitem_next_at(T* tocarry, - int64_t at, - int64_t length, - int64_t size, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_RegularArray_getitem_next_at( + T* tocarry, + int64_t at, + int64_t length, + int64_t size, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; int64_t regular_at = at; diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_RegularArray_getitem_next_range.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_RegularArray_getitem_next_range.cu index 7af734b0a1..ff25c86a11 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_RegularArray_getitem_next_range.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_RegularArray_getitem_next_range.cu @@ -2,14 +2,15 @@ template __global__ void -awkward_RegularArray_getitem_next_range(T* tocarry, - int64_t regular_start, - int64_t step, - int64_t length, - int64_t size, - int64_t nextsize, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_RegularArray_getitem_next_range( + T* tocarry, + int64_t regular_start, + int64_t step, + int64_t length, + int64_t size, + int64_t nextsize, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = (blockIdx.x * blockDim.x + threadIdx.x) / nextsize; int64_t thready_id = (blockIdx.x * blockDim.x + threadIdx.x) % nextsize; diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_RegularArray_getitem_next_range_spreadadvanced.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_RegularArray_getitem_next_range_spreadadvanced.cu index d1d668dad1..814b31aa9a 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_RegularArray_getitem_next_range_spreadadvanced.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_RegularArray_getitem_next_range_spreadadvanced.cu @@ -2,12 +2,13 @@ template __global__ void -awkward_RegularArray_getitem_next_range_spreadadvanced(T* toadvanced, - const C* fromadvanced, - int64_t length, - int64_t nextsize, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_RegularArray_getitem_next_range_spreadadvanced( + T* toadvanced, + const C* fromadvanced, + int64_t length, + int64_t nextsize, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = (blockIdx.x * blockDim.x + threadIdx.x) / nextsize; int64_t thready_id = (blockIdx.x * blockDim.x + threadIdx.x) % nextsize; diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_RegularArray_localindex.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_RegularArray_localindex.cu index dce60cd4ee..597098e2a2 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_RegularArray_localindex.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_RegularArray_localindex.cu @@ -2,11 +2,12 @@ template __global__ void -awkward_RegularArray_localindex(T* toindex, - int64_t size, - int64_t length, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_RegularArray_localindex( + T* toindex, + int64_t size, + int64_t length, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = (blockIdx.x * blockDim.x + threadIdx.x) / size; int64_t thready_id = (blockIdx.x * blockDim.x + threadIdx.x) % size; diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_RegularArray_reduce_local_nextparents.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_RegularArray_reduce_local_nextparents.cu index 4dffc06d42..b55ee1d7f6 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_RegularArray_reduce_local_nextparents.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_RegularArray_reduce_local_nextparents.cu @@ -13,12 +13,13 @@ template __global__ void -awkward_RegularArray_reduce_local_nextparents_a(T* nextparents, - int64_t size, - int64_t length, - int64_t* scan_in_array, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_RegularArray_reduce_local_nextparents_a( + T* nextparents, + int64_t size, + int64_t length, + int64_t* scan_in_array, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; int64_t len = length * size; @@ -30,12 +31,13 @@ awkward_RegularArray_reduce_local_nextparents_a(T* nextparents, template __global__ void -awkward_RegularArray_reduce_local_nextparents_b(T* nextparents, - int64_t size, - int64_t length, - int64_t* scan_in_array, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_RegularArray_reduce_local_nextparents_b( + T* nextparents, + int64_t size, + int64_t length, + int64_t* scan_in_array, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = (blockIdx.x * blockDim.x + threadIdx.x) / size; int64_t thready_id = (blockIdx.x * blockDim.x + threadIdx.x) % size; diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_RegularArray_reduce_nonlocal_preparenext.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_RegularArray_reduce_nonlocal_preparenext.cu index fb55380011..e8d063fae7 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_RegularArray_reduce_nonlocal_preparenext.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_RegularArray_reduce_nonlocal_preparenext.cu @@ -13,14 +13,15 @@ template __global__ void -awkward_RegularArray_reduce_nonlocal_preparenext_a(T* nextcarry, - C* nextparents, - const U* parents, - int64_t size, - int64_t length, - int64_t* scan_in_array, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_RegularArray_reduce_nonlocal_preparenext_a( + T* nextcarry, + C* nextparents, + const U* parents, + int64_t size, + int64_t length, + int64_t* scan_in_array, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thready_id = blockIdx.x * blockDim.x + threadIdx.x; int64_t len = length * size; @@ -32,14 +33,15 @@ if (err_code[0] == NO_ERROR) { template __global__ void -awkward_RegularArray_reduce_nonlocal_preparenext_b(T* nextcarry, - C* nextparents, - const U* parents, - int64_t size, - int64_t length, - int64_t* scan_in_array, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_RegularArray_reduce_nonlocal_preparenext_b( + T* nextcarry, + C* nextparents, + const U* parents, + int64_t size, + int64_t length, + int64_t* scan_in_array, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thready_id = (blockIdx.x * blockDim.x + threadIdx.x) / length; int64_t thread_id = (blockIdx.x * blockDim.x + threadIdx.x) % length; diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_RegularArray_rpad_and_clip_axis1.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_RegularArray_rpad_and_clip_axis1.cu index ada6cfd004..fd9d23ab41 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_RegularArray_rpad_and_clip_axis1.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_RegularArray_rpad_and_clip_axis1.cu @@ -2,12 +2,13 @@ template __global__ void -awkward_RegularArray_rpad_and_clip_axis1(T* toindex, - int64_t target, - int64_t size, - int64_t length, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_RegularArray_rpad_and_clip_axis1( + T* toindex, + int64_t target, + int64_t size, + int64_t length, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = (blockIdx.x * blockDim.x + threadIdx.x) / target; int64_t thready_id = (blockIdx.x * blockDim.x + threadIdx.x) % target; diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_UnionArray_fillindex.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_UnionArray_fillindex.cu index 1b3e2d0444..6f76000d3d 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_UnionArray_fillindex.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_UnionArray_fillindex.cu @@ -2,12 +2,13 @@ template __global__ void -awkward_UnionArray_fillindex(T* toindex, - int64_t toindexoffset, - const C* fromindex, - int64_t length, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_UnionArray_fillindex( + T* toindex, + int64_t toindexoffset, + const C* fromindex, + int64_t length, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; if (thread_id < length) { diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_UnionArray_fillindex_count.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_UnionArray_fillindex_count.cu index ff9676b49b..88d9559305 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_UnionArray_fillindex_count.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_UnionArray_fillindex_count.cu @@ -2,11 +2,12 @@ template __global__ void -awkward_UnionArray_fillindex_count(T* toindex, - int64_t toindexoffset, - int64_t length, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_UnionArray_fillindex_count( + T* toindex, + int64_t toindexoffset, + int64_t length, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; if (thread_id < length) { diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_UnionArray_fillna.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_UnionArray_fillna.cu index fe8b2643ea..d44e103852 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_UnionArray_fillna.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_UnionArray_fillna.cu @@ -2,11 +2,12 @@ template __global__ void -awkward_UnionArray_fillna(T* toindex, - const C* fromindex, - int64_t length, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_UnionArray_fillna( + T* toindex, + const C* fromindex, + int64_t length, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; if (thread_id < length) { diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_UnionArray_filltags.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_UnionArray_filltags.cu index 314c2dc96a..c826e3dcb3 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_UnionArray_filltags.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_UnionArray_filltags.cu @@ -2,13 +2,14 @@ template __global__ void -awkward_UnionArray_filltags(T* totags, - int64_t totagsoffset, - const C* fromtags, - int64_t length, - int64_t base, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_UnionArray_filltags( + T* totags, + int64_t totagsoffset, + const C* fromtags, + int64_t length, + int64_t base, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; if (thread_id < length) { diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_UnionArray_filltags_const.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_UnionArray_filltags_const.cu index a4f62bda6b..bf8935abef 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_UnionArray_filltags_const.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_UnionArray_filltags_const.cu @@ -2,12 +2,13 @@ template __global__ void -awkward_UnionArray_filltags_const(T* totags, - int64_t totagsoffset, - int64_t length, - int64_t base, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_UnionArray_filltags_const( + T* totags, + int64_t totagsoffset, + int64_t length, + int64_t base, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; if (thread_id < length) { diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_UnionArray_project.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_UnionArray_project.cu index 60f30f854a..b758010d14 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_UnionArray_project.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_UnionArray_project.cu @@ -11,15 +11,16 @@ template __global__ void -awkward_UnionArray_project_a(T* lenout, - C* tocarry, - const U* fromtags, - const V* fromindex, - int64_t length, - int64_t which, - int64_t* scan_in_array, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_UnionArray_project_a( + T* lenout, + C* tocarry, + const U* fromtags, + const V* fromindex, + int64_t length, + int64_t which, + int64_t* scan_in_array, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; if (thread_id < length) { @@ -34,15 +35,16 @@ awkward_UnionArray_project_a(T* lenout, template __global__ void -awkward_UnionArray_project_b(T* lenout, - C* tocarry, - const U* fromtags, - const V* fromindex, - int64_t length, - int64_t which, - int64_t* scan_in_array, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_UnionArray_project_b( + T* lenout, + C* tocarry, + const U* fromtags, + const V* fromindex, + int64_t length, + int64_t which, + int64_t* scan_in_array, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { *lenout = length > 0 ? scan_in_array[length - 1] : 0; int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_UnionArray_simplify.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_UnionArray_simplify.cu index bcba812aba..a1bf597f06 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_UnionArray_simplify.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_UnionArray_simplify.cu @@ -2,19 +2,20 @@ template __global__ void -awkward_UnionArray_simplify(T* totags, - C* toindex, - const U* outertags, - const V* outerindex, - const W* innertags, - const X* innerindex, - int64_t towhich, - int64_t innerwhich, - int64_t outerwhich, - int64_t length, - int64_t base, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_UnionArray_simplify( + T* totags, + C* toindex, + const U* outertags, + const V* outerindex, + const W* innertags, + const X* innerindex, + int64_t towhich, + int64_t innerwhich, + int64_t outerwhich, + int64_t length, + int64_t base, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; if (thread_id < length) { diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_UnionArray_simplify_one.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_UnionArray_simplify_one.cu index 7d78b98c4c..936975a787 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_UnionArray_simplify_one.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_UnionArray_simplify_one.cu @@ -2,16 +2,17 @@ template __global__ void -awkward_UnionArray_simplify_one(T* totags, - C* toindex, - const U* fromtags, - const V* fromindex, - int64_t towhich, - int64_t fromwhich, - int64_t length, - int64_t base, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_UnionArray_simplify_one( + T* totags, + C* toindex, + const U* fromtags, + const V* fromindex, + int64_t towhich, + int64_t fromwhich, + int64_t length, + int64_t base, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; if (thread_id < length) { diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_UnionArray_validity.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_UnionArray_validity.cu index d0a3899109..e24e5a3cf9 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_UnionArray_validity.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_UnionArray_validity.cu @@ -9,13 +9,14 @@ enum class UNIONARRAY_VALIDITY_ERRORS { template __global__ void -awkward_UnionArray_validity(const T* tags, - const C* index, - int64_t length, - int64_t numcontents, - const U* lencontents, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_UnionArray_validity( + const T* tags, + const C* index, + int64_t length, + int64_t numcontents, + const U* lencontents, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; if (thread_id < length) { diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_index_rpad_and_clip_axis0.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_index_rpad_and_clip_axis0.cu index bb2eb7db61..b7bdacfc99 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_index_rpad_and_clip_axis0.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_index_rpad_and_clip_axis0.cu @@ -2,11 +2,12 @@ template __global__ void -awkward_index_rpad_and_clip_axis0(T* toindex, - int64_t target, - int64_t length, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_index_rpad_and_clip_axis0( + T* toindex, + int64_t target, + int64_t length, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; if (thread_id < length) { diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_index_rpad_and_clip_axis1.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_index_rpad_and_clip_axis1.cu index 5237e63204..cd5d1f6f5b 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_index_rpad_and_clip_axis1.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_index_rpad_and_clip_axis1.cu @@ -2,12 +2,13 @@ template __global__ void -awkward_index_rpad_and_clip_axis1(T* tostarts, - C* tostops, - int64_t target, - int64_t length, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_index_rpad_and_clip_axis1( + T* tostarts, + C* tostops, + int64_t target, + int64_t length, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; if (thread_id < length) { diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_localindex.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_localindex.cu index 5e13416426..afdec75eff 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_localindex.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_localindex.cu @@ -2,10 +2,11 @@ template __global__ void -awkward_localindex(T* toindex, - int64_t length, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_localindex( + T* toindex, + int64_t length, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; if (thread_id < length) { diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_missing_repeat.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_missing_repeat.cu index e16b0d700f..efd631085a 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_missing_repeat.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_missing_repeat.cu @@ -2,13 +2,14 @@ template __global__ void -awkward_missing_repeat(T* outindex, - const C* index, - int64_t indexlength, - int64_t repetitions, - int64_t regularsize, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_missing_repeat( + T* outindex, + const C* index, + int64_t indexlength, + int64_t repetitions, + int64_t regularsize, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = (blockIdx.x * blockDim.x + threadIdx.x) / indexlength; int64_t thready_id = (blockIdx.x * blockDim.x + threadIdx.x) % indexlength; diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_argmax.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_argmax.cu index ca88685e0e..7cd5da0f33 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_argmax.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_argmax.cu @@ -11,13 +11,14 @@ template __global__ void -awkward_reduce_argmax_a(T* toptr, - const C* fromptr, - const U* parents, - int64_t lenparents, - int64_t outlength, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_reduce_argmax_a( + T* toptr, + const C* fromptr, + const U* parents, + int64_t lenparents, + int64_t outlength, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; if (thread_id < outlength) { @@ -28,13 +29,14 @@ awkward_reduce_argmax_a(T* toptr, template __global__ void -awkward_reduce_argmax_b(T* toptr, - const C* fromptr, - const U* parents, - int64_t lenparents, - int64_t outlength, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_reduce_argmax_b( + T* toptr, + const C* fromptr, + const U* parents, + int64_t lenparents, + int64_t outlength, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_argmin.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_argmin.cu index 7ef169e498..282ebd11cc 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_argmin.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_argmin.cu @@ -11,13 +11,14 @@ template __global__ void -awkward_reduce_argmin_a(T* toptr, - const C* fromptr, - const U* parents, - int64_t lenparents, - int64_t outlength, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_reduce_argmin_a( + T* toptr, + const C* fromptr, + const U* parents, + int64_t lenparents, + int64_t outlength, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; if (thread_id < outlength) { @@ -28,13 +29,14 @@ awkward_reduce_argmin_a(T* toptr, template __global__ void -awkward_reduce_argmin_b(T* toptr, - const C* fromptr, - const U* parents, - int64_t lenparents, - int64_t outlength, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_reduce_argmin_b( + T* toptr, + const C* fromptr, + const U* parents, + int64_t lenparents, + int64_t outlength, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_count_64.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_count_64.cu index afab5efeaa..0870da2ff7 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_count_64.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_count_64.cu @@ -14,14 +14,15 @@ template __global__ void -awkward_reduce_count_64_a(T* toptr, - const bool* fromptr, - const U* parents, - int64_t lenparents, - int64_t outlength, - uint64_t* atomicAdd_toptr, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_reduce_count_64_a( + T* toptr, + const bool* fromptr, + const U* parents, + int64_t lenparents, + int64_t outlength, + uint64_t* atomicAdd_toptr, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; if (thread_id < outlength) { diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_countnonzero.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_countnonzero.cu index b6fdd6f92d..6b07dfa208 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_countnonzero.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_countnonzero.cu @@ -14,14 +14,15 @@ template __global__ void -awkward_reduce_countnonzero_a(T* toptr, - const C* fromptr, - const U* parents, - int64_t lenparents, - int64_t outlength, - uint64_t* atomicAdd_toptr, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_reduce_countnonzero_a( + T* toptr, + const C* fromptr, + const U* parents, + int64_t lenparents, + int64_t outlength, + uint64_t* atomicAdd_toptr, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; @@ -33,14 +34,15 @@ awkward_reduce_countnonzero_a(T* toptr, template __global__ void -awkward_reduce_countnonzero_b(T* toptr, - const C* fromptr, - const U* parents, - int64_t lenparents, - int64_t outlength, - uint64_t* atomicAdd_toptr, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_reduce_countnonzero_b( + T* toptr, + const C* fromptr, + const U* parents, + int64_t lenparents, + int64_t outlength, + uint64_t* atomicAdd_toptr, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_max.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_max.cu index 14b2fd1351..3c20b653ac 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_max.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_max.cu @@ -11,14 +11,15 @@ template __global__ void -awkward_reduce_max_a(T* toptr, - const C* fromptr, - const U* parents, - int64_t lenparents, - int64_t outlength, - T identity, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_reduce_max_a( + T* toptr, + const C* fromptr, + const U* parents, + int64_t lenparents, + int64_t outlength, + T identity, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; if (thread_id < outlength) { @@ -29,14 +30,15 @@ awkward_reduce_max_a(T* toptr, template __global__ void -awkward_reduce_max_b(T* toptr, - const C* fromptr, - const U* parents, - int64_t lenparents, - int64_t outlength, - T identity, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_reduce_max_b( + T* toptr, + const C* fromptr, + const U* parents, + int64_t lenparents, + int64_t outlength, + T identity, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_min.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_min.cu index fc08a13175..ae0e2dcb61 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_min.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_min.cu @@ -11,14 +11,15 @@ template __global__ void -awkward_reduce_min_a(T* toptr, - const C* fromptr, - const U* parents, - int64_t lenparents, - int64_t outlength, - T identity, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_reduce_min_a( + T* toptr, + const C* fromptr, + const U* parents, + int64_t lenparents, + int64_t outlength, + T identity, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; if (thread_id < outlength) { @@ -29,14 +30,15 @@ awkward_reduce_min_a(T* toptr, template __global__ void -awkward_reduce_min_b(T* toptr, - const C* fromptr, - const U* parents, - int64_t lenparents, - int64_t outlength, - T identity, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_reduce_min_b( + T* toptr, + const C* fromptr, + const U* parents, + int64_t lenparents, + int64_t outlength, + T identity, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_prod_bool.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_prod_bool.cu index d688b424e4..74843af6c0 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_prod_bool.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_prod_bool.cu @@ -14,14 +14,15 @@ template __global__ void -awkward_reduce_prod_bool_a(T* toptr, - const C* fromptr, - const U* parents, - int64_t lenparents, - int64_t outlength, - uint64_t* atomicAdd_toptr, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_reduce_prod_bool_a( + T* toptr, + const C* fromptr, + const U* parents, + int64_t lenparents, + int64_t outlength, + uint64_t* atomicAdd_toptr, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; @@ -33,14 +34,15 @@ awkward_reduce_prod_bool_a(T* toptr, template __global__ void -awkward_reduce_prod_bool_b(T* toptr, - const C* fromptr, - const U* parents, - int64_t lenparents, - int64_t outlength, - uint64_t* atomicAdd_toptr, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_reduce_prod_bool_b( + T* toptr, + const C* fromptr, + const U* parents, + int64_t lenparents, + int64_t outlength, + uint64_t* atomicAdd_toptr, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_sum.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_sum.cu index abbee36a18..13c5a31dbf 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_sum.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_sum.cu @@ -14,14 +14,15 @@ template __global__ void -awkward_reduce_sum_a(T* toptr, - const C* fromptr, - const U* parents, - int64_t lenparents, - int64_t outlength, - uint64_t* atomicAdd_toptr, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_reduce_sum_a( + T* toptr, + const C* fromptr, + const U* parents, + int64_t lenparents, + int64_t outlength, + uint64_t* atomicAdd_toptr, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; @@ -33,14 +34,15 @@ awkward_reduce_sum_a(T* toptr, template __global__ void -awkward_reduce_sum_b(T* toptr, - const C* fromptr, - const U* parents, - int64_t lenparents, - int64_t outlength, - uint64_t* atomicAdd_toptr, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_reduce_sum_b( + T* toptr, + const C* fromptr, + const U* parents, + int64_t lenparents, + int64_t outlength, + uint64_t* atomicAdd_toptr, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; @@ -53,14 +55,15 @@ awkward_reduce_sum_b(T* toptr, template __global__ void -awkward_reduce_sum_c(T* toptr, - const C* fromptr, - const U* parents, - int64_t lenparents, - int64_t outlength, - uint64_t* atomicAdd_toptr, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_reduce_sum_c( + T* toptr, + const C* fromptr, + const U* parents, + int64_t lenparents, + int64_t outlength, + uint64_t* atomicAdd_toptr, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_sum_bool.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_sum_bool.cu index 86fdc77fb0..0e062a6c78 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_sum_bool.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_sum_bool.cu @@ -14,14 +14,15 @@ template __global__ void -awkward_reduce_sum_bool_a(T* toptr, - const C* fromptr, - const U* parents, - int64_t lenparents, - int64_t outlength, - uint64_t* atomicAdd_toptr, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_reduce_sum_bool_a( + T* toptr, + const C* fromptr, + const U* parents, + int64_t lenparents, + int64_t outlength, + uint64_t* atomicAdd_toptr, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; @@ -33,14 +34,15 @@ awkward_reduce_sum_bool_a(T* toptr, template __global__ void -awkward_reduce_sum_bool_b(T* toptr, - const C* fromptr, - const U* parents, - int64_t lenparents, - int64_t outlength, - uint64_t* atomicAdd_toptr, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_reduce_sum_bool_b( + T* toptr, + const C* fromptr, + const U* parents, + int64_t lenparents, + int64_t outlength, + uint64_t* atomicAdd_toptr, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_sum_int32_bool_64.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_sum_int32_bool_64.cu index dd10241e90..8bdb3fccc2 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_sum_int32_bool_64.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_sum_int32_bool_64.cu @@ -14,14 +14,15 @@ template __global__ void -awkward_reduce_sum_int32_bool_64_a(T* toptr, - const C* fromptr, - const U* parents, - int64_t lenparents, - int64_t outlength, - uint64_t* atomicAdd_toptr, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_reduce_sum_int32_bool_64_a( + T* toptr, + const C* fromptr, + const U* parents, + int64_t lenparents, + int64_t outlength, + uint64_t* atomicAdd_toptr, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; @@ -33,14 +34,15 @@ awkward_reduce_sum_int32_bool_64_a(T* toptr, template __global__ void -awkward_reduce_sum_int32_bool_64_b(T* toptr, - const C* fromptr, - const U* parents, - int64_t lenparents, - int64_t outlength, - uint64_t* atomicAdd_toptr, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_reduce_sum_int32_bool_64_b( + T* toptr, + const C* fromptr, + const U* parents, + int64_t lenparents, + int64_t outlength, + uint64_t* atomicAdd_toptr, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_sum_int64_bool_64.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_sum_int64_bool_64.cu index 5a515169e0..041558a663 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_sum_int64_bool_64.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_sum_int64_bool_64.cu @@ -14,14 +14,15 @@ template __global__ void -awkward_reduce_sum_int64_bool_64_a(T* toptr, - const C* fromptr, - const U* parents, - int64_t lenparents, - int64_t outlength, - uint64_t* atomicAdd_toptr, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_reduce_sum_int64_bool_64_a( + T* toptr, + const C* fromptr, + const U* parents, + int64_t lenparents, + int64_t outlength, + uint64_t* atomicAdd_toptr, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; @@ -33,14 +34,15 @@ awkward_reduce_sum_int64_bool_64_a(T* toptr, template __global__ void -awkward_reduce_sum_int64_bool_64_b(T* toptr, - const C* fromptr, - const U* parents, - int64_t lenparents, - int64_t outlength, - uint64_t* atomicAdd_toptr, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_reduce_sum_int64_bool_64_b( + T* toptr, + const C* fromptr, + const U* parents, + int64_t lenparents, + int64_t outlength, + uint64_t* atomicAdd_toptr, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_sorting_ranges_length.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_sorting_ranges_length.cu index a0fb302d0d..d0fdb4ddbc 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_sorting_ranges_length.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_sorting_ranges_length.cu @@ -13,12 +13,13 @@ template __global__ void -awkward_sorting_ranges_length_a(T* tolength, - const C* parents, - int64_t parentslength, - int64_t* scan_in_array, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_sorting_ranges_length_a( + T* tolength, + const C* parents, + int64_t parentslength, + int64_t* scan_in_array, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; if (thread_id == 0 ) { @@ -37,12 +38,13 @@ awkward_sorting_ranges_length_a(T* tolength, template __global__ void -awkward_sorting_ranges_length_b(T* tolength, - const C* parents, - int64_t parentslength, - int64_t* scan_in_array, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_sorting_ranges_length_b( + T* tolength, + const C* parents, + int64_t parentslength, + int64_t* scan_in_array, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { *tolength = parentslength > 0 ? scan_in_array[parentslength - 1] : scan_in_array[0]; }