Skip to content
This repository was archived by the owner on Jun 27, 2025. It is now read-only.

Commit 94ab3d7

Browse files
authored
Merge pull request #24 from static-frame/23/get-lower-bit
Narrowed handling of array-optimized lookups in `get_all`, `get_any`
2 parents dee1ffb + e82f8fc commit 94ab3d7

File tree

2 files changed

+41
-10
lines changed

2 files changed

+41
-10
lines changed

arraymap.c

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -179,24 +179,25 @@ at_to_kat(int array_t, PyArrayObject* a) {
179179
}
180180

181181

182+
// To determine when we can use direct array lookups, this function return 1 if we match, 0 if we do not match. Given a keys array type and the kind of lookup key, return true only for the largest KAT types.s
182183
int
183184
kat_is_kind(KeysArrayType kat, char kind) {
184185
switch (kat) {
185186
case KAT_INT64:
186-
case KAT_INT32:
187-
case KAT_INT16:
188-
case KAT_INT8:
187+
// case KAT_INT32:
188+
// case KAT_INT16:
189+
// case KAT_INT8:
189190
return kind == 'i';
190191

191192
case KAT_UINT64:
192-
case KAT_UINT32:
193-
case KAT_UINT16:
194-
case KAT_UINT8:
193+
// case KAT_UINT32:
194+
// case KAT_UINT16:
195+
// case KAT_UINT8:
195196
return kind == 'u';
196197

197198
case KAT_FLOAT64:
198-
case KAT_FLOAT32:
199-
case KAT_FLOAT16:
199+
// case KAT_FLOAT32:
200+
// case KAT_FLOAT16:
200201
return kind == 'f';
201202

202203
case KAT_UNICODE:
@@ -1816,7 +1817,7 @@ get(FAMObject *self, PyObject *key, PyObject *missing) {
18161817
npy_type_dst v; \
18171818
for (; i < key_size; i++) { \
18181819
v = post_deref(*(npy_type_src*)PyArray_GETPTR1(key_array, i)); \
1819-
table_pos = lookup_func(self, v, hash_func(v), kat); \
1820+
table_pos = lookup_func(self, v, hash_func(v), kat); \
18201821
if (table_pos < 0 || (self->table[table_pos].hash == -1)) { \
18211822
Py_DECREF(array); \
18221823
if (PyErr_Occurred()) { \
@@ -1934,9 +1935,10 @@ fam_get_all(FAMObject *self, PyObject *key) {
19341935
// if key is an np array of the same kind as this FAMs keys, we can do optimized lookups; otherwise, we have to go through scalar to do full branching and coercion into lookup
19351936
int key_array_t = PyArray_TYPE(key_array);
19361937

1938+
// NOTE: we only match numeric kinds of the KAT is 64 bit; we could support, for each key_array_t, a switch for every KAT, but the size of that code is huge and the performance benefit is not massive
19371939
if (kat_is_kind(self->keys_array_type, PyArray_DESCR(key_array)->kind)) {
19381940
Py_ssize_t table_pos;
1939-
switch (key_array_t) {
1941+
switch (key_array_t) { // type of passed in array
19401942
case NPY_INT64:
19411943
GET_ALL_SCALARS(npy_int64, npy_int64, KAT_INT64, lookup_hash_int, int_to_hash, PyLong_FromLongLong,);
19421944
break;

test/test_unit.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -894,6 +894,35 @@ def test_fam_array_get_all_l():
894894
_ = fam.get_all(np.array(["2022-01", "2023-01", "1988-01"], np.datetime64))
895895

896896

897+
def test_fam_array_get_all_m1():
898+
# NOTE: small than 64bit arrays in FAMs do not get optimal array lookup performance
899+
a1 = np.array((1, 100, 300), dtype=np.int32)
900+
a1.flags.writeable = False
901+
fam = FrozenAutoMap(a1)
902+
post1 = fam.get_all(np.array([300, 100], dtype=np.int64))
903+
assert post1.tolist() == [2, 1]
904+
905+
906+
def test_fam_array_get_all_m2():
907+
a1 = np.array((1, 100, 300), dtype=np.int16)
908+
a1.flags.writeable = False
909+
fam = FrozenAutoMap(a1)
910+
post1 = fam.get_all(np.array([300, 100], dtype=np.int64))
911+
assert post1.tolist() == [2, 1]
912+
913+
914+
def test_fam_array_get_all_m3():
915+
a1 = np.array((1, 100, 30), dtype=np.int8)
916+
a1.flags.writeable = False
917+
fam = FrozenAutoMap(a1)
918+
post1 = fam.get_all(np.array([30, 100], dtype=np.int64))
919+
assert post1.tolist() == [2, 1]
920+
921+
post2 = fam.get_all(np.array([30, 100], dtype=np.int8))
922+
assert post2.tolist() == [2, 1]
923+
924+
925+
897926
# -------------------------------------------------------------------------------
898927

899928

0 commit comments

Comments
 (0)