Skip to content

Commit 47bae2e

Browse files
committed
[Arm64] lj_new_str() crc32 optimization
1 parent 10aeff6 commit 47bae2e

File tree

3 files changed

+256
-2
lines changed

3 files changed

+256
-2
lines changed

src/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ CCOPT= -O2 -fomit-frame-pointer
5050
CCOPT_x86= -march=i686 -msse -msse2 -mfpmath=sse
5151
CCOPT_x64=
5252
CCOPT_arm=
53-
CCOPT_arm64=
53+
CCOPT_arm64= -march=armv8-a+crc
5454
CCOPT_ppc=
5555
CCOPT_mips=
5656
#

src/arm64/src/lj_str_hash_arm64.h

Lines changed: 251 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,251 @@
1+
/*
2+
* This file defines string hash function using CRC32. It takes advantage of
3+
* Arm64 hardware support (crc32 instruction) to speedup the CRC32
4+
* computation. The hash functions try to compute CRC32 of length and up
5+
* to 128 bytes of given string.
6+
*/
7+
8+
#ifndef _LJ_STR_HASH_ARM64_H_
9+
#define _LJ_STR_HASH_ARM64_H_
10+
11+
#if defined(__aarch64__) && defined(__ARM_FEATURE_CRC32) && defined(__GNUC__)
12+
13+
#include <stdint.h>
14+
#include <sys/types.h>
15+
#include <unistd.h>
16+
#include <time.h>
17+
#include <sys/auxv.h>
18+
#include <stdio.h>
19+
#include <arm_acle.h>
20+
21+
#include "../../lj_def.h"
22+
23+
#ifndef LJ_AINLINE
24+
#define LJ_AINLINE inline __attribute__((always_inline))
25+
#endif
26+
27+
#ifdef __MINGW32__
28+
#define random() ((long) rand())
29+
#define srandom(seed) srand(seed)
30+
#endif
31+
static const uint64_t* cast_uint64p(const char* str)
32+
{
33+
return (const uint64_t*)(void*)str;
34+
}
35+
36+
static const uint32_t* cast_uint32p(const char* str)
37+
{
38+
return (const uint32_t*)(void*)str;
39+
}
40+
41+
static LJ_AINLINE uint32_t lj_str_hash_1_4(const char* str, uint32_t len)
42+
{
43+
uint32_t v = str[0], h = 0;
44+
v = (v << 8) | str[len >> 1];
45+
v = (v << 8) | str[len - 1];
46+
v = (v << 8) | len;
47+
return __crc32cw(h, v);
48+
}
49+
50+
static LJ_AINLINE uint32_t lj_str_hash_4_16(const char* str, size_t len)
51+
{
52+
uint64_t v1, v2, h = 0;
53+
54+
if (len >= 8) {
55+
v1 = *cast_uint64p(str);
56+
v2 = *cast_uint64p(str + len - 8);
57+
} else {
58+
v1 = *cast_uint32p(str);
59+
v2 = *cast_uint32p(str + len - 4);
60+
}
61+
62+
h = __crc32cw(h, len);
63+
h = __crc32cd(h, v1);
64+
h = __crc32cd(h, v2);
65+
66+
return h;
67+
}
68+
69+
static LJ_AINLINE uint32_t lj_str_hash_16_128(const char* str, size_t len)
70+
{
71+
uint64_t h1 = 0, h2 = 0;
72+
uint32_t i;
73+
74+
h1 = __crc32cw(h1, len);
75+
76+
for (i = 0; i < len - 16; i += 16) {
77+
h1 += __crc32cd(h1, *cast_uint64p(str + i));
78+
h2 += __crc32cd(h2, *cast_uint64p(str + i + 8));
79+
}
80+
81+
h1 = __crc32cd(h1, *cast_uint64p(str + len - 16));
82+
h2 = __crc32cd(h2, *cast_uint64p(str + len - 8));
83+
84+
return __crc32cw(h1, h2);
85+
}
86+
87+
/* **************************************************************************
88+
*
89+
* Following is code about hashing string with length >= 128
90+
*
91+
* **************************************************************************
92+
*/
93+
94+
static uint32_t random_pos[32][2];
95+
static const int8_t log2_tab[128] = { -1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,
96+
4,4,4,4,4,4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
97+
5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,6,6,
98+
6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
99+
6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6 };
100+
101+
/* return floor(log2(n)) */
102+
static LJ_AINLINE uint32_t log2_floor(uint32_t n)
103+
{
104+
if (n <= 127) {
105+
return log2_tab[n];
106+
}
107+
108+
if ((n >> 8) <= 127) {
109+
return log2_tab[n >> 8] + 8;
110+
}
111+
112+
if ((n >> 16) <= 127) {
113+
return log2_tab[n >> 16] + 16;
114+
}
115+
116+
if ((n >> 24) <= 127) {
117+
return log2_tab[n >> 24] + 24;
118+
}
119+
120+
return 31;
121+
}
122+
123+
#define POW2_MASK(n) ((1L << (n)) - 1)
124+
/* This function is to populate `random_pos` such that random_pos[i][*]
125+
* contains random value in the range of [2**i, 2**(i+1)).
126+
*/
127+
static void arm64_init_random(void)
128+
{
129+
int i, seed, rml;
130+
131+
/* Calculate the ceil(log2(RAND_MAX)) */
132+
rml = log2_floor(RAND_MAX);
133+
if (RAND_MAX & (RAND_MAX - 1)) {
134+
rml += 1;
135+
}
136+
137+
/* Init seed */
138+
seed = 0;
139+
seed = __crc32cw(seed, getpid());
140+
seed = __crc32cw(seed, time(NULL));
141+
srandom(seed);
142+
143+
/* Now start to populate the random_pos[][]. */
144+
for (i = 0; i < 3; i++) {
145+
/* No need to provide random value for chunk smaller than 8 bytes */
146+
random_pos[i][0] = random_pos[i][1] = 0;
147+
}
148+
149+
for (; i < rml; i++) {
150+
random_pos[i][0] = random() & POW2_MASK(i+1);
151+
random_pos[i][1] = random() & POW2_MASK(i+1);
152+
}
153+
154+
for (; i < 31; i++) {
155+
int j;
156+
for (j = 0; j < 2; j++) {
157+
uint32_t v, scale;
158+
scale = random_pos[i - rml][0];
159+
if (scale == 0) {
160+
scale = 1;
161+
}
162+
v = (random() * scale) & POW2_MASK(i+1);
163+
random_pos[i][j] = v;
164+
}
165+
}
166+
}
167+
#undef POW2_MASK
168+
169+
void __attribute__((constructor)) x64_init_random_constructor()
170+
{
171+
arm64_init_random();
172+
}
173+
174+
/* Return a pre-computed random number in the range of [1**chunk_sz_order,
175+
* 1**(chunk_sz_order+1)). It is "unsafe" in the sense that the return value
176+
* may be greater than chunk-size; it is up to the caller to make sure
177+
* "chunk-base + return-value-of-this-func" has valid virtual address.
178+
*/
179+
static LJ_AINLINE uint32_t get_random_pos_unsafe(uint32_t chunk_sz_order,
180+
uint32_t idx)
181+
{
182+
uint32_t pos = random_pos[chunk_sz_order][idx & 1];
183+
return pos;
184+
}
185+
186+
static LJ_NOINLINE uint32_t lj_str_hash_128_above(const char* str,
187+
uint32_t len)
188+
{
189+
uint32_t chunk_num, chunk_sz, chunk_sz_log2, i, pos1, pos2;
190+
uint32_t h1, h2, v;
191+
const char* chunk_ptr;
192+
193+
chunk_num = 16;
194+
chunk_sz = len / chunk_num;
195+
chunk_sz_log2 = log2_floor(chunk_sz);
196+
197+
pos1 = get_random_pos_unsafe(chunk_sz_log2, 0);
198+
pos2 = get_random_pos_unsafe(chunk_sz_log2, 1);
199+
200+
h1 = 0;
201+
h1 = __crc32cw(h1, len);
202+
h2 = 0;
203+
204+
/* loop over 14 chunks, 2 chunks at a time */
205+
for (i = 0, chunk_ptr = str; i < (chunk_num / 2 - 1);
206+
chunk_ptr += chunk_sz, i++) {
207+
208+
v = *cast_uint64p(chunk_ptr + pos1);
209+
h1 = __crc32cd(h1, v);
210+
211+
v = *cast_uint64p(chunk_ptr + chunk_sz + pos2);
212+
h2 = __crc32cd(h2, v);
213+
}
214+
215+
/* the last two chunks */
216+
v = *cast_uint64p(chunk_ptr + pos1);
217+
h1 = __crc32cd(h1, v);
218+
219+
v = *cast_uint64p(chunk_ptr + chunk_sz - 8 - pos2);
220+
h2 = __crc32cd(h2, v);
221+
222+
/* process the trailing part */
223+
h1 = __crc32cd(h1, *cast_uint64p(str));
224+
h2 = __crc32cd(h2, *cast_uint64p(str + len - 8));
225+
226+
h1 = __crc32cw(h1, h2);
227+
return h1;
228+
}
229+
230+
231+
/* NOTE: the "len" should not be zero */
232+
static LJ_AINLINE uint32_t lj_str_hash(const char* str, size_t len)
233+
{
234+
if (len < 128) {
235+
if (len >= 16) {
236+
return lj_str_hash_16_128(str, len);
237+
}
238+
239+
if ((len >= 4) && (len < 16)) {
240+
return lj_str_hash_4_16(str, len);
241+
}
242+
243+
return lj_str_hash_1_4(str, len);
244+
}
245+
return lj_str_hash_128_above(str, len);
246+
}
247+
#define LJ_ARCH_STR_HASH lj_str_hash
248+
#else
249+
#undef LJ_ARCH_STR_HASH
250+
#endif
251+
#endif /*_LJ_STR_HASH_ARM64_H_*/

src/lj_str.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -162,8 +162,11 @@ MSize
162162
lj_str_indep_hash(GCstr *str) {
163163
return lj_str_original_hash(strdata(str), str->len);
164164
}
165-
165+
#if defined(__aarch64__)
166+
#include "arm64/src/lj_str_hash_arm64.h"
167+
#else
166168
#include "x64/src/lj_str_hash_x64.h"
169+
#endif
167170

168171
#if defined(LJ_ARCH_STR_HASH)
169172
#define LJ_STR_HASH LJ_ARCH_STR_HASH

0 commit comments

Comments
 (0)