Skip to content

Commit dd4753b

Browse files
authored
feat: add CUDA kernels that calculate length/sum (#2992)
* feat: add cumulative sum CUDA kernels * feat: add CUDA kernels (need to be fixed) * feat: add more kernels with cumulative sum * added exclusive_scan function and add new cuda kernels * test: remove XFAIL for awkward_ByteMaskedArray_numnull * feat: add python kernel definition for awkward_sorting_ranges_length * feat: use `cupy.cumsum` * test: remove XFAIL * fix: check all kernels for length = 0 * fix: failing tests-spec * fix: add missing src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_numnull_unique_64.cu * fix: awkward_IndexedArray_numnull_parents.cu * feat: add 2 kernels that use a temp array * fix: use cupy.min instead of atomicMin() in awkward_ListArray_min_range * fix: lenstarts = 0 case in awkward_ListArray_min_range * fix: awkward_ListArray_getitem_jagged_expand * refactor: remove changes in awkward_rduce_sum * fix: formatting
1 parent 9096a7c commit dd4753b

File tree

94 files changed

+3373
-919
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

94 files changed

+3373
-919
lines changed

dev/generate-kernel-signatures.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,16 @@
1212

1313

1414
cuda_kernels_impl = [
15+
"awkward_Index_nones_as_index",
1516
"awkward_ListArray_min_range",
1617
"awkward_ListArray_validity",
1718
"awkward_BitMaskedArray_to_ByteMaskedArray",
1819
"awkward_ListArray_compact_offsets",
1920
"awkward_ListOffsetArray_flatten_offsets",
2021
"awkward_IndexedArray_overlay_mask",
22+
"awkward_ByteMaskedArray_numnull",
23+
"awkward_IndexedArray_numnull",
24+
"awkward_IndexedArray_numnull_parents",
2125
"awkward_IndexedArray_numnull_unique_64",
2226
"awkward_NumpyArray_fill",
2327
"awkward_ListArray_fill",
@@ -43,12 +47,19 @@
4347
"awkward_RegularArray_getitem_next_range",
4448
"awkward_RegularArray_getitem_next_range_spreadadvanced",
4549
"awkward_RegularArray_getitem_next_array",
50+
"awkward_RegularArray_getitem_next_array_regularize",
51+
"awkward_RegularArray_reduce_local_nextparents",
52+
"awkward_RegularArray_reduce_nonlocal_preparenext",
4653
"awkward_missing_repeat",
4754
"awkward_RegularArray_getitem_jagged_expand",
4855
"awkward_ListArray_getitem_jagged_expand",
56+
"awkward_ListArray_getitem_jagged_carrylen",
4957
"awkward_ListArray_getitem_next_array_advanced",
5058
"awkward_ListArray_getitem_next_array",
5159
"awkward_ListArray_getitem_next_at",
60+
"awkward_ListArray_getitem_next_range_counts",
61+
"awkward_ListArray_rpad_and_clip_length_axis1",
62+
"awkward_ListOffsetArray_reduce_nonlocal_nextstarts_64",
5263
"awkward_NumpyArray_reduce_adjust_starts_64",
5364
"awkward_NumpyArray_reduce_adjust_starts_shifts_64",
5465
"awkward_RegularArray_getitem_next_at",
@@ -86,6 +97,7 @@
8697
"awkward_reduce_sum_bool",
8798
"awkward_reduce_prod_bool",
8899
"awkward_reduce_countnonzero",
100+
"awkward_sorting_ranges_length",
89101
]
90102

91103

dev/generate-tests.py

Lines changed: 47 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,13 @@ def __init__(self, name, typename, direction, role="default"):
3535
self.role = role
3636

3737

38+
no_role_kernels = [
39+
"awkward_NumpyArray_sort_asstrings_uint8",
40+
"awkward_argsort",
41+
"awkward_sort",
42+
]
43+
44+
3845
class Specification:
3946
def __init__(self, templatized_kernel_name, spec, testdata, blacklisted):
4047
self.templatized_kernel_name = templatized_kernel_name
@@ -51,6 +58,8 @@ def __init__(self, templatized_kernel_name, spec, testdata, blacklisted):
5158
)
5259
if blacklisted:
5360
self.tests = []
61+
elif templatized_kernel_name in no_role_kernels:
62+
self.tests = []
5463
else:
5564
self.tests = self.gettests(testdata)
5665

@@ -185,6 +194,7 @@ def gettests(self, testdata):
185194

186195
def readspec():
187196
specdict = {}
197+
specdict_unit = {}
188198
with open(os.path.join(CURRENT_DIR, "..", "kernel-specification.yml")) as f:
189199
loadfile = yaml.load(f, Loader=yaml.CSafeLoader)
190200

@@ -193,6 +203,13 @@ def readspec():
193203
data = json.load(f)["tests"]
194204

195205
for spec in indspec:
206+
for childfunc in spec["specializations"]:
207+
specdict_unit[childfunc["name"]] = Specification(
208+
spec["name"],
209+
childfunc,
210+
data,
211+
not spec["automatic-tests"],
212+
)
196213
if "def " in spec["definition"]:
197214
for childfunc in spec["specializations"]:
198215
specdict[childfunc["name"]] = Specification(
@@ -201,7 +218,7 @@ def readspec():
201218
data,
202219
not spec["automatic-tests"],
203220
)
204-
return specdict
221+
return specdict, specdict_unit
205222

206223

207224
def getdtypes(args):
@@ -215,6 +232,8 @@ def getdtypes(args):
215232
typename = typename + "_"
216233
if count == 1:
217234
dtypes.append("cupy." + typename)
235+
elif count == 2:
236+
dtypes.append("cupy." + typename)
218237
return dtypes
219238

220239

@@ -239,7 +258,12 @@ def checkintrange(test_args, error, args):
239258
if "int" in typename or "uint" in typename:
240259
dtype = gettypename(typename)
241260
min_val, max_val = np.iinfo(dtype).min, np.iinfo(dtype).max
242-
if "List" in typename:
261+
if "List[List" in typename:
262+
for row in val:
263+
for data in row:
264+
if not (min_val <= data <= max_val):
265+
flag = False
266+
elif "List" in typename:
243267
for data in val:
244268
if not (min_val <= data <= max_val):
245269
flag = False
@@ -652,12 +676,16 @@ def gencpuunittests(specdict):
652676

653677

654678
cuda_kernels_tests = [
679+
"awkward_Index_nones_as_index",
655680
"awkward_ListArray_min_range",
656681
"awkward_ListArray_validity",
657682
"awkward_BitMaskedArray_to_ByteMaskedArray",
658683
"awkward_ListArray_compact_offsets",
659684
"awkward_ListOffsetArray_flatten_offsets",
660685
"awkward_IndexedArray_overlay_mask",
686+
"awkward_ByteMaskedArray_numnull",
687+
"awkward_IndexedArray_numnull",
688+
"awkward_IndexedArray_numnull_parents",
661689
"awkward_IndexedArray_numnull_unique_64",
662690
"awkward_NumpyArray_fill",
663691
"awkward_ListArray_fill",
@@ -683,12 +711,19 @@ def gencpuunittests(specdict):
683711
"awkward_RegularArray_getitem_next_range",
684712
"awkward_RegularArray_getitem_next_range_spreadadvanced",
685713
"awkward_RegularArray_getitem_next_array",
714+
"awkward_RegularArray_getitem_next_array_regularize",
715+
"awkward_RegularArray_reduce_local_nextparents",
716+
"awkward_RegularArray_reduce_nonlocal_preparenext",
686717
"awkward_missing_repeat",
687718
"awkward_RegularArray_getitem_jagged_expand",
688719
"awkward_ListArray_getitem_jagged_expand",
720+
"awkward_ListArray_getitem_jagged_carrylen",
689721
"awkward_ListArray_getitem_next_array_advanced",
690722
"awkward_ListArray_getitem_next_array",
691723
"awkward_ListArray_getitem_next_at",
724+
"awkward_ListArray_getitem_next_range_counts",
725+
"awkward_ListArray_rpad_and_clip_length_axis1",
726+
"awkward_ListOffsetArray_reduce_nonlocal_nextstarts_64",
692727
"awkward_NumpyArray_reduce_adjust_starts_64",
693728
"awkward_NumpyArray_reduce_adjust_starts_shifts_64",
694729
"awkward_RegularArray_getitem_next_at",
@@ -726,6 +761,7 @@ def gencpuunittests(specdict):
726761
"awkward_reduce_sum_bool",
727762
"awkward_reduce_prod_bool",
728763
"awkward_reduce_countnonzero",
764+
"awkward_sorting_ranges_length",
729765
]
730766

731767

@@ -966,8 +1002,12 @@ def gencudaunittests(specdict):
9661002
)
9671003
)
9681004
elif count == 2:
969-
raise NotImplementedError
970-
1005+
f.write(
1006+
" " * 4
1007+
+ "{} = cupy.array({}, dtype=cupy.{})\n".format(
1008+
arg, val, typename
1009+
)
1010+
)
9711011
cuda_string = (
9721012
"funcC = cupy_backend['"
9731013
+ spec.templatized_kernel_name
@@ -1068,10 +1108,10 @@ def evalkernels():
10681108
if __name__ == "__main__":
10691109
genpykernels()
10701110
evalkernels()
1071-
specdict = readspec()
1111+
specdict, specdict_unit = readspec()
10721112
genspectests(specdict)
10731113
gencpukerneltests(specdict)
1074-
gencpuunittests(specdict)
1114+
gencpuunittests(specdict_unit)
10751115
genunittests()
10761116
gencudakerneltests(specdict)
1077-
gencudaunittests(specdict)
1117+
gencudaunittests(specdict_unit)

kernel-specification.yml

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1960,11 +1960,12 @@ kernels:
19601960
description: null
19611961
definition: |
19621962
def awkward_ListArray_min_range(tomin, fromstarts, fromstops, lenstarts):
1963-
shorter = fromstops[0] - fromstarts[0]
1964-
for i in range(1, lenstarts):
1965-
rangeval = fromstops[i] - fromstarts[i]
1966-
shorter = shorter if shorter < rangeval else rangeval
1967-
tomin[0] = shorter
1963+
if lenstarts > 0:
1964+
shorter = fromstops[0] - fromstarts[0]
1965+
for i in range(1, lenstarts):
1966+
rangeval = fromstops[i] - fromstarts[i]
1967+
shorter = shorter if shorter < rangeval else rangeval
1968+
tomin[0] = shorter
19681969
automatic-tests: true
19691970

19701971
- name: awkward_ListArray_rpad_and_clip_length_axis1
@@ -5917,5 +5918,9 @@ kernels:
59175918
- {name: parentslength, type: "int64_t", dir: in, role: default}
59185919
description: null
59195920
definition: |
5920-
Insert Python definition here
5921+
def awkward_sorting_ranges_length(tolength, parents, parentslength):
5922+
tolength[0] = 2
5923+
for i in range(1, parentslength):
5924+
if parents[i - 1] != parents[i]:
5925+
tolength[0] = tolength[0] + 1
59215926
automatic-tests: false

0 commit comments

Comments
 (0)