Skip to content

Commit b82f284

Browse files
committed
code cleanup
1 parent e10689a commit b82f284

File tree

3 files changed

+81
-38
lines changed

3 files changed

+81
-38
lines changed

doc/articles/nonzero_1d.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ def __call__(self):
4848

4949

5050
#-------------------------------------------------------------------------------
51-
NUMBER = 400
51+
NUMBER = 100
5252

5353
def seconds_to_display(seconds: float) -> str:
5454
seconds /= NUMBER

src/_arraykit.c

Lines changed: 69 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -3535,7 +3535,7 @@ resolve_dtype_iter(PyObject *Py_UNUSED(m), PyObject *arg) {
35353535
//------------------------------------------------------------------------------
35363536
// general utility
35373537

3538-
#define NONZERO_APPEND_INDEX { \
3538+
#define NONZERO_APPEND_INDEX_RELATIVE { \
35393539
if (AK_UNLIKELY(count == capacity)) { \
35403540
capacity <<= 1; \
35413541
indices = (npy_int64*)realloc(indices, sizeof(npy_int64) * capacity);\
@@ -3546,6 +3546,17 @@ resolve_dtype_iter(PyObject *Py_UNUSED(m), PyObject *arg) {
35463546
indices[count++] = p - p_start; \
35473547
} \
35483548

3549+
#define NONZERO_APPEND_INDEX_ABSOLUTE { \
3550+
if (AK_UNLIKELY(count == capacity)) { \
3551+
capacity <<= 1; \
3552+
indices = (npy_int64*)realloc(indices, sizeof(npy_int64) * capacity);\
3553+
if (indices == NULL) { \
3554+
return NULL; \
3555+
} \
3556+
} \
3557+
indices[count++] = i; \
3558+
} \
3559+
35493560
// Given a Boolean, contiguous 1D array, return the index positions in an int64 array.
35503561
// Through experimentation it has been verified that doing full-size allocation of memory does not permit outperforming NumPy at 10_000_000 scale; but doing less optimizations does help. Using bit masks does not improve perforamnce over pointer arithmetic. Prescanning for all empty is very effective. Note that NumPy befits from first counting the nonzeros, then allocating only enough data for the expexted number.
35513562
static inline PyObject*
@@ -3567,44 +3578,71 @@ AK_nonzero_1d(PyArrayObject* array) {
35673578
Py_ssize_t capacity = count_max < 1024 ? count_max : count_max / 8;
35683579
npy_int64* indices = (npy_int64*)malloc(sizeof(npy_int64) * capacity);
35693580

3570-
// array is contiguous, 1d, boolean
35713581
NPY_BEGIN_THREADS_DEF;
35723582
NPY_BEGIN_THREADS;
35733583

3574-
npy_bool* p_start = (npy_bool*)PyArray_DATA(array);
3575-
npy_bool* p = p_start;
3576-
npy_bool* p_end = p + count_max;
3577-
npy_bool* p_end_roll = p_end - size_div.rem;
3584+
if (PyArray_IS_C_CONTIGUOUS(array)) {
3585+
npy_bool* p_start = (npy_bool*)PyArray_DATA(array);
3586+
npy_bool* p = p_start;
3587+
npy_bool* p_end = p + count_max;
3588+
npy_bool* p_end_roll = p_end - size_div.rem;
35783589

3579-
while (p < p_end_roll) {
3580-
if (*(npy_uint64*)p == 0) {
3581-
p += 8; // no true within this 8 byte roll region
3582-
continue;
3590+
while (p < p_end_roll) {
3591+
if (*(npy_uint64*)p == 0) {
3592+
p += 8; // no true within this 8 byte roll region
3593+
continue;
3594+
}
3595+
if (*p) {NONZERO_APPEND_INDEX_RELATIVE;}
3596+
p++;
3597+
if (*p) {NONZERO_APPEND_INDEX_RELATIVE;}
3598+
p++;
3599+
if (*p) {NONZERO_APPEND_INDEX_RELATIVE;}
3600+
p++;
3601+
if (*p) {NONZERO_APPEND_INDEX_RELATIVE;}
3602+
p++;
3603+
if (*p) {NONZERO_APPEND_INDEX_RELATIVE;}
3604+
p++;
3605+
if (*p) {NONZERO_APPEND_INDEX_RELATIVE;}
3606+
p++;
3607+
if (*p) {NONZERO_APPEND_INDEX_RELATIVE;}
3608+
p++;
3609+
if (*p) {NONZERO_APPEND_INDEX_RELATIVE;}
3610+
p++;
3611+
}
3612+
while (p < p_end) {
3613+
if (*p) {NONZERO_APPEND_INDEX_RELATIVE;}
3614+
p++;
35833615
}
3584-
if (*p) {NONZERO_APPEND_INDEX;}
3585-
p++;
3586-
if (*p) {NONZERO_APPEND_INDEX;}
3587-
p++;
3588-
if (*p) {NONZERO_APPEND_INDEX;}
3589-
p++;
3590-
if (*p) {NONZERO_APPEND_INDEX;}
3591-
p++;
3592-
if (*p) {NONZERO_APPEND_INDEX;}
3593-
p++;
3594-
if (*p) {NONZERO_APPEND_INDEX;}
3595-
p++;
3596-
if (*p) {NONZERO_APPEND_INDEX;}
3597-
p++;
3598-
if (*p) {NONZERO_APPEND_INDEX;}
3599-
p++;
36003616
}
3601-
while (p < p_end) {
3602-
if (*p) {NONZERO_APPEND_INDEX;}
3603-
p++;
3617+
else {
3618+
npy_intp i = 0; // position within Boolean array
3619+
npy_intp i_end = count_max;
3620+
npy_intp i_end_roll = count_max - size_div.rem;
3621+
while (i < i_end_roll) {
3622+
if (*(npy_bool*)PyArray_GETPTR1(array, i)) {NONZERO_APPEND_INDEX_ABSOLUTE;}
3623+
i++;
3624+
if (*(npy_bool*)PyArray_GETPTR1(array, i)) {NONZERO_APPEND_INDEX_ABSOLUTE;}
3625+
i++;
3626+
if (*(npy_bool*)PyArray_GETPTR1(array, i)) {NONZERO_APPEND_INDEX_ABSOLUTE;}
3627+
i++;
3628+
if (*(npy_bool*)PyArray_GETPTR1(array, i)) {NONZERO_APPEND_INDEX_ABSOLUTE;}
3629+
i++;
3630+
if (*(npy_bool*)PyArray_GETPTR1(array, i)) {NONZERO_APPEND_INDEX_ABSOLUTE;}
3631+
i++;
3632+
if (*(npy_bool*)PyArray_GETPTR1(array, i)) {NONZERO_APPEND_INDEX_ABSOLUTE;}
3633+
i++;
3634+
if (*(npy_bool*)PyArray_GETPTR1(array, i)) {NONZERO_APPEND_INDEX_ABSOLUTE;}
3635+
i++;
3636+
if (*(npy_bool*)PyArray_GETPTR1(array, i)) {NONZERO_APPEND_INDEX_ABSOLUTE;}
3637+
i++;
3638+
}
3639+
while (i < i_end) {
3640+
if (*(npy_bool*)PyArray_GETPTR1(array, i)) {NONZERO_APPEND_INDEX_ABSOLUTE;}
3641+
i++;
3642+
}
36043643
}
36053644
NPY_END_THREADS;
36063645

3607-
36083646
npy_intp dims = {count};
36093647
final = PyArray_SimpleNewFromData(1, &dims, NPY_INT64, (void*)indices);
36103648
if (!final) {
@@ -3616,7 +3654,7 @@ AK_nonzero_1d(PyArrayObject* array) {
36163654
PyArray_CLEARFLAGS((PyArrayObject*)final, NPY_ARRAY_WRITEABLE);
36173655
return final;
36183656
}
3619-
#undef NONZERO_APPEND_INDEX
3657+
#undef NONZERO_APPEND_INDEX_RELATIVE
36203658

36213659
static PyObject*
36223660
nonzero_1d(PyObject *Py_UNUSED(m), PyObject *a) {
@@ -3630,10 +3668,6 @@ nonzero_1d(PyObject *Py_UNUSED(m), PyObject *a) {
36303668
PyErr_SetString(PyExc_ValueError, "Array must be of type bool");
36313669
return NULL;
36323670
}
3633-
if (!PyArray_IS_C_CONTIGUOUS(array)) {
3634-
PyErr_SetString(PyExc_ValueError, "Array must be contiguous");
3635-
return NULL;
3636-
}
36373671
return AK_nonzero_1d(array);
36383672
}
36393673

test/test_nonzero_1d.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,16 @@ def test_nonzero_1d_e(self) -> None:
8686
self.assertEqual(nonzero_1d(a1).tolist(), [0, 999, 9_999_999])
8787

8888
def test_nonzero_1d_f(self) -> None:
89+
# non-contiguous
90+
a1 = np.arange(40).reshape(10, 4) % 3 == 0
91+
a2 = a1[:, 3]
92+
self.assertEqual(nonzero_1d(a2).tolist(), [0, 3, 6, 9])
93+
94+
a3 = a1[:, 1]
95+
self.assertEqual(nonzero_1d(a3).tolist(), [2, 5, 8])
96+
97+
def test_nonzero_1d_g(self) -> None:
8998
a1 = np.arange(20).reshape(4, 5) % 3 == 0
9099
a2 = a1[:, 4]
91-
post = nonzero_1d(a2)
92-
100+
# array([False, True, False, False])
101+
self.assertEqual(nonzero_1d(a2).tolist(), [1])

0 commit comments

Comments
 (0)