Skip to content

Commit 8d99540

Browse files
author
Zoltan Herczeg
committed
JIT compiler update
1 parent 8d56880 commit 8d99540

16 files changed

+1850
-350
lines changed

src/sljit/sljitLir.c

Lines changed: 53 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -993,14 +993,14 @@ static const char* op0_names[] = {
993993
static const char* op1_names[] = {
994994
"", ".u8", ".s8", ".u16",
995995
".s16", ".u32", ".s32", "32",
996-
".p", "not", "clz",
996+
".p", "not", "clz", "ctz"
997997
};
998998

999999
static const char* op2_names[] = {
10001000
"add", "addc", "sub", "subc",
10011001
"mul", "and", "or", "xor",
10021002
"shl", "mshl", "lshr", "mlshr",
1003-
"ashr", "mashr"
1003+
"ashr", "mashr", "rotl", "rotr"
10041004
};
10051005

10061006
static const char* op_src_names[] = {
@@ -1326,7 +1326,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op1(struct sljit_compiler
13261326
}
13271327

13281328
#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
1329-
CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_MOV && GET_OPCODE(op) <= SLJIT_CLZ);
1329+
CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_MOV && GET_OPCODE(op) <= SLJIT_CTZ);
13301330

13311331
switch (GET_OPCODE(op)) {
13321332
case SLJIT_NOT:
@@ -1387,7 +1387,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op2(struct sljit_compiler
13871387
}
13881388

13891389
#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
1390-
CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_ADD && GET_OPCODE(op) <= SLJIT_MASHR);
1390+
CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_ADD && GET_OPCODE(op) <= SLJIT_ROTR);
13911391

13921392
switch (GET_OPCODE(op)) {
13931393
case SLJIT_AND:
@@ -1423,6 +1423,10 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op2(struct sljit_compiler
14231423
CHECK_ARGUMENT((compiler->last_flags & 0xff) == GET_FLAG_TYPE(SLJIT_SET_CARRY));
14241424
CHECK_ARGUMENT((op & SLJIT_32) == (compiler->last_flags & SLJIT_32));
14251425
break;
1426+
case SLJIT_ROTL:
1427+
case SLJIT_ROTR:
1428+
CHECK_ARGUMENT(!(op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)));
1429+
break;
14261430
default:
14271431
SLJIT_UNREACHABLE();
14281432
break;
@@ -1456,6 +1460,35 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op2(struct sljit_compiler
14561460
CHECK_RETURN_OK;
14571461
}
14581462

1463+
static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op,
1464+
sljit_s32 src_dst,
1465+
sljit_s32 src1, sljit_sw src1w,
1466+
sljit_s32 src2, sljit_sw src2w)
1467+
{
1468+
#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
1469+
CHECK_ARGUMENT(GET_OPCODE(op) == SLJIT_SHL || GET_OPCODE(op) == SLJIT_LSHR
1470+
|| GET_OPCODE(op) == SLJIT_MSHL || GET_OPCODE(op) == SLJIT_MLSHR);
1471+
CHECK_ARGUMENT((op & ~(0xff | SLJIT_32 | SLJIT_SHIFT_INTO_NON_ZERO)) == 0);
1472+
CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(src_dst));
1473+
FUNCTION_CHECK_SRC(src1, src1w);
1474+
FUNCTION_CHECK_SRC(src2, src2w);
1475+
#endif
1476+
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
1477+
if (SLJIT_UNLIKELY(!!compiler->verbose)) {
1478+
fprintf(compiler->verbose, " %s%s.into%s ", op2_names[GET_OPCODE(op) - SLJIT_OP2_BASE], !(op & SLJIT_32) ? "" : "32",
1479+
(op & SLJIT_SHIFT_INTO_NON_ZERO) ? ".nz" : "");
1480+
1481+
sljit_verbose_reg(compiler, src_dst);
1482+
fprintf(compiler->verbose, ", ");
1483+
sljit_verbose_param(compiler, src1, src1w);
1484+
fprintf(compiler->verbose, ", ");
1485+
sljit_verbose_param(compiler, src2, src2w);
1486+
fprintf(compiler->verbose, "\n");
1487+
}
1488+
#endif
1489+
CHECK_RETURN_OK;
1490+
}
1491+
14591492
static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op,
14601493
sljit_s32 src, sljit_sw srcw)
14611494
{
@@ -2809,6 +2842,22 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compil
28092842
return SLJIT_ERR_UNSUPPORTED;
28102843
}
28112844

2845+
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op,
2846+
sljit_s32 src_dst,
2847+
sljit_s32 src1, sljit_sw src1w,
2848+
sljit_s32 src2, sljit_sw src2w)
2849+
{
2850+
SLJIT_UNUSED_ARG(compiler);
2851+
SLJIT_UNUSED_ARG(op);
2852+
SLJIT_UNUSED_ARG(src_dst);
2853+
SLJIT_UNUSED_ARG(src1);
2854+
SLJIT_UNUSED_ARG(src1w);
2855+
SLJIT_UNUSED_ARG(src2);
2856+
SLJIT_UNUSED_ARG(src2w);
2857+
SLJIT_UNREACHABLE();
2858+
return SLJIT_ERR_UNSUPPORTED;
2859+
}
2860+
28122861
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op,
28132862
sljit_s32 src, sljit_sw srcw)
28142863
{

src/sljit/sljitLir.h

Lines changed: 66 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -609,7 +609,8 @@ static SLJIT_INLINE sljit_sw sljit_get_executable_offset(struct sljit_compiler *
609609
static SLJIT_INLINE sljit_uw sljit_get_generated_code_size(struct sljit_compiler *compiler) { return compiler->executable_size; }
610610

611611
/* Returns with non-zero if the feature or limitation type passed as its
612-
argument is present on the current CPU.
612+
argument is present on the current CPU. The return value is one, if a
613+
feature is fully supported, and it is two, if partially supported.
613614
614615
Some features (e.g. floating point operations) require hardware (CPU)
615616
support while others (e.g. move with update) are emulated if not available.
@@ -625,10 +626,14 @@ static SLJIT_INLINE sljit_uw sljit_get_generated_code_size(struct sljit_compiler
625626
#define SLJIT_HAS_ZERO_REGISTER 2
626627
/* [Emulated] Count leading zero is supported. */
627628
#define SLJIT_HAS_CLZ 3
629+
/* [Emulated] Count trailing zero is supported. */
630+
#define SLJIT_HAS_CTZ 4
631+
/* [Emulated] Rotate left/right is supported. */
632+
#define SLJIT_HAS_ROT 5
628633
/* [Emulated] Conditional move is supported. */
629-
#define SLJIT_HAS_CMOV 4
634+
#define SLJIT_HAS_CMOV 6
630635
/* [Emulated] Prefetch instruction is available (emulated as a nop). */
631-
#define SLJIT_HAS_PREFETCH 5
636+
#define SLJIT_HAS_PREFETCH 7
632637

633638
#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
634639
/* [Not emulated] SSE2 support is available on x86. */
@@ -1061,6 +1066,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile
10611066
Note: immediate source argument is not supported */
10621067
#define SLJIT_CLZ (SLJIT_OP1_BASE + 10)
10631068
#define SLJIT_CLZ32 (SLJIT_CLZ | SLJIT_32)
1069+
/* Count trailing zeroes
1070+
Flags: - (may destroy flags)
1071+
Note: immediate source argument is not supported */
1072+
#define SLJIT_CTZ (SLJIT_OP1_BASE + 11)
1073+
#define SLJIT_CTZ32 (SLJIT_CTZ | SLJIT_32)
10641074

10651075
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
10661076
sljit_s32 dst, sljit_sw dstw,
@@ -1132,6 +1142,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
11321142
always masked by the length of the shift operation. */
11331143
#define SLJIT_MASHR (SLJIT_OP2_BASE + 13)
11341144
#define SLJIT_MASHR32 (SLJIT_MASHR | SLJIT_32)
1145+
/* Flags: - (may destroy flags)
1146+
Let bit_length be the length of the rotate operation: 32 or 64.
1147+
The second operand is always masked by (bit_length - 1). */
1148+
#define SLJIT_ROTL (SLJIT_OP2_BASE + 14)
1149+
#define SLJIT_ROTL32 (SLJIT_ROTL | SLJIT_32)
1150+
/* Flags: - (may destroy flags)
1151+
Let bit_length be the length of the rotate operation: 32 or 64.
1152+
The second operand is always masked by (bit_length - 1). */
1153+
#define SLJIT_ROTR (SLJIT_OP2_BASE + 15)
1154+
#define SLJIT_ROTR32 (SLJIT_ROTR | SLJIT_32)
11351155

11361156
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,
11371157
sljit_s32 dst, sljit_sw dstw,
@@ -1145,6 +1165,49 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compil
11451165
sljit_s32 src1, sljit_sw src1w,
11461166
sljit_s32 src2, sljit_sw src2w);
11471167

1168+
/* Emit a left or right shift operation, where the bits shifted
1169+
in comes from a separate source operand. All operands are
1170+
interpreted as unsigned integers.
1171+
1172+
In the followings the value_mask variable is 31 for 32 bit
1173+
operations and word_size - 1 otherwise.
1174+
1175+
op must be one of the following operations:
1176+
SLJIT_SHL or SLJIT_SHL32:
1177+
src_dst <<= src2
1178+
src_dst |= ((src1 >> 1) >> (src2 ^ value_mask))
1179+
SLJIT_MSHL or SLJIT_MSHL32:
1180+
src2 &= value_mask
1181+
perform the SLJIT_SHL or SLJIT_SHL32 operation
1182+
SLJIT_LSHR or SLJIT_LSHR32:
1183+
src_dst >>= src2
1184+
src_dst |= ((src1 << 1) << (src2 ^ value_mask))
1185+
SLJIT_MLSHR or SLJIT_MLSHR32:
1186+
src2 &= value_mask
1187+
perform the SLJIT_LSHR or SLJIT_LSHR32 operation
1188+
1189+
op can be combined (or'ed) with SLJIT_SHIFT_INTO_NON_ZERO
1190+
1191+
src_dst must be a register which content is updated after
1192+
the operation is completed
1193+
src1 / src1w contains the bits which shifted into src_dst
1194+
src2 / src2w contains the shift amount
1195+
1196+
Note: a rotate operation can be performed if src_dst and
1197+
src1 are set to the same register
1198+
1199+
Flags: - (may destroy flags) */
1200+
1201+
/* The src2 contains a non-zero value. Improves the generated
1202+
code on certain architectures, which provides a small
1203+
performance improvement. */
1204+
#define SLJIT_SHIFT_INTO_NON_ZERO 0x200
1205+
1206+
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op,
1207+
sljit_s32 src_dst,
1208+
sljit_s32 src1, sljit_sw src1w,
1209+
sljit_s32 src2, sljit_sw src2w);
1210+
11481211
/* Starting index of opcodes for sljit_emit_op2. */
11491212
#define SLJIT_OP_SRC_BASE 128
11501213

src/sljit/sljitNativeARM_32.c

Lines changed: 96 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
109109
#define ORR 0xe1800000
110110
#define PUSH 0xe92d0000
111111
#define POP 0xe8bd0000
112+
#define RBIT 0xe6ff0f30
112113
#define RSB 0xe0600000
113114
#define RSC 0xe0e00000
114115
#define SBC 0xe0c00000
@@ -959,12 +960,19 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
959960
#endif
960961

961962
case SLJIT_HAS_CLZ:
963+
case SLJIT_HAS_ROT:
962964
case SLJIT_HAS_CMOV:
963965
#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
966+
case SLJIT_HAS_CTZ:
964967
case SLJIT_HAS_PREFETCH:
965968
#endif
966969
return 1;
967970

971+
#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
972+
case SLJIT_HAS_CTZ:
973+
return 2;
974+
#endif
975+
968976
default:
969977
return 0;
970978
}
@@ -1478,11 +1486,24 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
14781486
return push_inst(compiler, MVN | (flags & SET_FLAGS) | RD(dst) | RM(src2));
14791487

14801488
case SLJIT_CLZ:
1481-
SLJIT_ASSERT(!(flags & INV_IMM));
1482-
SLJIT_ASSERT(!(src2 & SRC2_IMM));
1489+
SLJIT_ASSERT(!(flags & INV_IMM) && !(src2 & SRC2_IMM));
14831490
FAIL_IF(push_inst(compiler, CLZ | RD(dst) | RM(src2)));
14841491
return SLJIT_SUCCESS;
14851492

1493+
case SLJIT_CTZ:
1494+
SLJIT_ASSERT(!(flags & INV_IMM) && !(src2 & SRC2_IMM));
1495+
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED));
1496+
#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
1497+
FAIL_IF(push_inst(compiler, RSB | SRC2_IMM | RD(TMP_REG1) | RN(src2) | 0));
1498+
FAIL_IF(push_inst(compiler, AND | RD(TMP_REG2) | RN(src2) | RM(TMP_REG1)));
1499+
FAIL_IF(push_inst(compiler, CLZ | RD(dst) | RM(TMP_REG2)));
1500+
FAIL_IF(push_inst(compiler, CMP | SET_FLAGS | SRC2_IMM | RN(dst) | 32));
1501+
return push_inst(compiler, (EOR ^ 0xf0000000) | SRC2_IMM | RD(dst) | RN(dst) | 0x1f);
1502+
#else /* !SLJIT_CONFIG_ARM_V5 */
1503+
FAIL_IF(push_inst(compiler, RBIT | RD(dst) | RM(src2)));
1504+
return push_inst(compiler, CLZ | RD(dst) | RM(dst));
1505+
#endif /* SLJIT_CONFIG_ARM_V5 */
1506+
14861507
case SLJIT_ADD:
14871508
SLJIT_ASSERT(!(flags & INV_IMM));
14881509

@@ -1553,6 +1574,19 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
15531574
is_masked = GET_OPCODE(op) == SLJIT_MASHR;
15541575
break;
15551576

1577+
case SLJIT_ROTL:
1578+
if (compiler->shift_imm == 0x20) {
1579+
FAIL_IF(push_inst(compiler, RSB | SRC2_IMM | RD(TMP_REG2) | RN(src2) | 0));
1580+
src2 = TMP_REG2;
1581+
} else
1582+
compiler->shift_imm = (sljit_uw)(-(sljit_sw)compiler->shift_imm) & 0x1f;
1583+
/* fallthrough */
1584+
1585+
case SLJIT_ROTR:
1586+
shift_type = 3;
1587+
is_masked = 0;
1588+
break;
1589+
15561590
default:
15571591
SLJIT_UNREACHABLE();
15581592
return SLJIT_SUCCESS;
@@ -2125,6 +2159,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
21252159
return emit_op(compiler, op, ALLOW_ANY_IMM, dst, dstw, TMP_REG1, 0, src, srcw);
21262160

21272161
case SLJIT_CLZ:
2162+
case SLJIT_CTZ:
21282163
return emit_op(compiler, op, 0, dst, dstw, TMP_REG1, 0, src, srcw);
21292164
}
21302165

@@ -2165,6 +2200,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile
21652200
case SLJIT_MLSHR:
21662201
case SLJIT_ASHR:
21672202
case SLJIT_MASHR:
2203+
case SLJIT_ROTL:
2204+
case SLJIT_ROTR:
21682205
if (src2 & SLJIT_IMM) {
21692206
compiler->shift_imm = src2w & 0x1f;
21702207
return emit_op(compiler, op, 0, dst, dstw, TMP_REG1, 0, src1, src1w);
@@ -2188,6 +2225,63 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compil
21882225
return sljit_emit_op2(compiler, op, TMP_REG2, 0, src1, src1w, src2, src2w);
21892226
}
21902227

2228+
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op,
2229+
sljit_s32 src_dst,
2230+
sljit_s32 src1, sljit_sw src1w,
2231+
sljit_s32 src2, sljit_sw src2w)
2232+
{
2233+
sljit_s32 is_left;
2234+
2235+
CHECK_ERROR();
2236+
CHECK(check_sljit_emit_shift_into(compiler, op, src_dst, src1, src1w, src2, src2w));
2237+
2238+
op = GET_OPCODE(op);
2239+
is_left = (op == SLJIT_SHL || op == SLJIT_MSHL);
2240+
2241+
if (src_dst == src1) {
2242+
SLJIT_SKIP_CHECKS(compiler);
2243+
return sljit_emit_op2(compiler, is_left ? SLJIT_ROTL : SLJIT_ROTR, src_dst, 0, src_dst, 0, src2, src2w);
2244+
}
2245+
2246+
ADJUST_LOCAL_OFFSET(src1, src1w);
2247+
ADJUST_LOCAL_OFFSET(src2, src2w);
2248+
2249+
/* Shift type of ROR is 3. */
2250+
if (src2 & SLJIT_IMM) {
2251+
src2w &= 0x1f;
2252+
2253+
if (src2w == 0)
2254+
return SLJIT_SUCCESS;
2255+
} else if (src2 & SLJIT_MEM) {
2256+
FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG2, src2, src2w, TMP_REG2));
2257+
src2 = TMP_REG2;
2258+
}
2259+
2260+
if (src1 & SLJIT_MEM) {
2261+
FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG1, src1, src1w, TMP_REG1));
2262+
src1 = TMP_REG1;
2263+
} else if (src1 & SLJIT_IMM) {
2264+
FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)src1w));
2265+
src1 = TMP_REG1;
2266+
}
2267+
2268+
if (src2 & SLJIT_IMM) {
2269+
FAIL_IF(push_inst(compiler, MOV | RD(src_dst) | RM(src_dst) | ((sljit_uw)(is_left ? 0 : 1) << 5) | ((sljit_uw)src2w << 7)));
2270+
src2w = (src2w ^ 0x1f) + 1;
2271+
return push_inst(compiler, ORR | RD(src_dst) | RN(src_dst) | RM(src1) | ((sljit_uw)(is_left ? 1 : 0) << 5) | ((sljit_uw)src2w << 7));
2272+
}
2273+
2274+
if (op == SLJIT_MSHL || op == SLJIT_MLSHR) {
2275+
FAIL_IF(push_inst(compiler, AND | SRC2_IMM | RD(TMP_REG2) | RN(src2) | 0x1f));
2276+
src2 = TMP_REG2;
2277+
}
2278+
2279+
FAIL_IF(push_inst(compiler, MOV | RD(src_dst) | RM8(src2) | ((sljit_uw)(is_left ? 0 : 1) << 5) | 0x10 | RM(src_dst)));
2280+
FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | RM(src1) | ((sljit_uw)(is_left ? 1 : 0) << 5) | (1 << 7)));
2281+
FAIL_IF(push_inst(compiler, EOR | SRC2_IMM | RD(TMP_REG2) | RN(src2) | 0x1f));
2282+
return push_inst(compiler, ORR | RD(src_dst) | RN(src_dst) | RM(TMP_REG1) | ((sljit_uw)(is_left ? 1 : 0) << 5) | 0x10 | RM8(TMP_REG2));
2283+
}
2284+
21912285
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op,
21922286
sljit_s32 src, sljit_sw srcw)
21932287
{

0 commit comments

Comments
 (0)