Skip to content
This repository was archived by the owner on Jun 27, 2025. It is now read-only.

Commit d29ce54

Browse files
authored
Merge pull request #6 from static-frame/5/unicode-eq
Fix `memcmp` usage to provide units in bytes
2 parents 4747222 + 257e97a commit d29ce54

File tree

3 files changed

+23
-15
lines changed

3 files changed

+23
-15
lines changed

arraymap.c

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ typedef struct TableElement{
4242
# define LOAD 0.9
4343
# define SCAN 16
4444

45+
const static size_t UCS4_SIZE = sizeof(Py_UCS4);
4546

4647
typedef enum KeysArrayType{
4748
KAT_LIST = 0, // must be falsy
@@ -790,8 +791,8 @@ lookup_hash_unicode(
790791
Py_ssize_t table_pos = hash & mask;
791792

792793
PyArrayObject *a = (PyArrayObject *)self->keys;
793-
// REVIEW: is this a new descr reference?
794-
Py_ssize_t dt_size = PyArray_DESCR(a)->elsize / sizeof(Py_UCS4);
794+
Py_ssize_t dt_size = PyArray_DESCR(a)->elsize / UCS4_SIZE;
795+
Py_ssize_t cmp_bytes = Py_MIN(key_size, dt_size) * UCS4_SIZE;
795796

796797
Py_hash_t h = 0;
797798
Py_UCS4* p_start = NULL;
@@ -808,7 +809,7 @@ lookup_hash_unicode(
808809
}
809810
p_start = (Py_UCS4*)PyArray_GETPTR1(a, table[table_pos].keys_pos);
810811
// memcmp returns 0 on match
811-
if (!memcmp(p_start, key, Py_MIN(key_size, dt_size))) {
812+
if (!memcmp(p_start, key, cmp_bytes)) {
812813
return table_pos;
813814
}
814815
table_pos++;
@@ -833,6 +834,7 @@ lookup_hash_string(
833834

834835
PyArrayObject *a = (PyArrayObject *)self->keys;
835836
Py_ssize_t dt_size = PyArray_DESCR(a)->elsize / sizeof(char);
837+
Py_ssize_t cmp_bytes = Py_MIN(key_size, dt_size);
836838

837839
Py_hash_t h = 0;
838840
char* p_start = NULL;
@@ -849,7 +851,7 @@ lookup_hash_string(
849851
}
850852
p_start = (char*)PyArray_GETPTR1(a, table[table_pos].keys_pos);
851853
// memcmp returns 0 on match
852-
if (!memcmp(p_start, key, Py_MIN(key_size, dt_size))) {
854+
if (!memcmp(p_start, key, cmp_bytes)) {
853855
return table_pos;
854856
}
855857
table_pos++;
@@ -1110,7 +1112,7 @@ lookup_unicode(FAMObject *self, PyObject* key) {
11101112
return -1;
11111113
}
11121114
PyArrayObject *a = (PyArrayObject *)self->keys;
1113-
Py_ssize_t dt_size = PyArray_DESCR(a)->elsize / sizeof(Py_UCS4);
1115+
Py_ssize_t dt_size = PyArray_DESCR(a)->elsize / UCS4_SIZE;
11141116
// if the key_size is greater than the dtype size of the array, we know there cannot be a match
11151117
Py_ssize_t k_size = PyUnicode_GetLength(key);
11161118
if (k_size > dt_size) {
@@ -1435,8 +1437,8 @@ copy_to_new(PyTypeObject *cls, FAMObject *self, FAMObject *new)
14351437
new->key_buffer = NULL;
14361438
if (new->keys_array_type == KAT_UNICODE) {
14371439
PyArrayObject *a = (PyArrayObject *)new->keys;
1438-
Py_ssize_t dt_size = PyArray_DESCR(a)->elsize / sizeof(Py_UCS4);
1439-
new->key_buffer = (Py_UCS4*)PyMem_Malloc((dt_size+1) * sizeof(Py_UCS4));
1440+
Py_ssize_t dt_size = PyArray_DESCR(a)->elsize / UCS4_SIZE;
1441+
new->key_buffer = (Py_UCS4*)PyMem_Malloc((dt_size+1) * UCS4_SIZE);
14401442
}
14411443

14421444
Py_ssize_t table_size_alloc = new->table_size + SCAN - 1;
@@ -1922,8 +1924,8 @@ fam_init(PyObject *self, PyObject *args, PyObject *kwargs)
19221924
break;
19231925
case KAT_UNICODE: {
19241926
// Over allocate buffer by 1 so there is room for null at end. This buffer is only used in lookup();
1925-
Py_ssize_t dt_size = PyArray_DESCR(a)->elsize / sizeof(Py_UCS4);
1926-
fam->key_buffer = (Py_UCS4*)PyMem_Malloc((dt_size+1) * sizeof(Py_UCS4));
1927+
Py_ssize_t dt_size = PyArray_DESCR(a)->elsize / UCS4_SIZE;
1928+
fam->key_buffer = (Py_UCS4*)PyMem_Malloc((dt_size+1) * UCS4_SIZE);
19271929
INSERT_FLEXIBLE(Py_UCS4, insert_unicode, ucs4_get_end_p);
19281930
break;
19291931
}

doc/articles/npy-opt.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,9 @@
55
from typing import NamedTuple
66
from itertools import repeat
77

8-
import automap
9-
from automap import AutoMap
10-
from automap import FrozenAutoMap
8+
import arraymap
9+
from arraymap import AutoMap
10+
from arraymap import FrozenAutoMap
1111

1212
import matplotlib.pyplot as plt
1313
import numpy as np
@@ -386,7 +386,7 @@ def get_array(size: int) -> np.ndarray:
386386
def get_versions() -> str:
387387
import platform
388388

389-
return f"OS: {platform.system()} / AutoMap / NumPy: {np.__version__}\n"
389+
return f"OS: {platform.system()} / ArrayMap: {arraymap.__version__} / NumPy: {np.__version__}\n"
390390

391391

392392
CLS_FF = (
@@ -402,7 +402,7 @@ def get_versions() -> str:
402402
FF_ORDER = [f.NAME for f in sorted(CLS_FF, key=lambda ff: ff.SORT)]
403403

404404
# -------------------------------------------------------------------------------
405-
NUMBER = 50
405+
NUMBER = 2
406406

407407
from itertools import product
408408

@@ -476,7 +476,7 @@ def plot_performance(frame, suffix: str = ""):
476476
fig.text(0.05, 0.96, f"AutoMap {suffix.title()}: {NUMBER} Iterations", fontsize=10)
477477
fig.text(0.05, 0.90, get_versions(), fontsize=6)
478478

479-
fp = f"/tmp/automap-{suffix}.png"
479+
fp = f"/tmp/arraymap-{suffix}.png"
480480
plt.subplots_adjust(
481481
left=0.075,
482482
bottom=0.05,

test/test_unit.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,12 @@ def test_fam_constructor_array_unicode_b():
136136
assert k in fam
137137

138138

139+
def test_fam_constructor_array_unicode_c():
140+
a1 = np.array(("z0Ct", "z0DS", "z0E9"))
141+
a1.flags.writeable = False
142+
fam = FrozenAutoMap(a1)
143+
144+
139145
def test_fam_copy_array_unicode_a():
140146
a1 = np.array(("a", "ccc", "bb"))
141147
a1.flags.writeable = False

0 commit comments

Comments
 (0)