Skip to content

Commit 093d4a6

Browse files
kaivalnpGreg Miller
authored andcommitted
Speed up NumericUtils#{subtract,add} (#15303)
1 parent 3340a54 commit 093d4a6

File tree

3 files changed

+182
-7
lines changed

3 files changed

+182
-7
lines changed

lucene/CHANGES.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,8 @@ Optimizations
9292

9393
* GITHUB#15397: NumericComparator: immediately check whether a segment is competitive with the recorded bottom (Martijn van Groningen)
9494

95+
# GITHUB#15303: Speed up NumericUtils#{add,subtract} by operating on integers instead of bytes. (Kaival Parikh)
96+
9597
Bug Fixes
9698
---------------------
9799
* GITHUB#14161: PointInSetQuery's constructor now throws IllegalArgumentException
Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
package org.apache.lucene.benchmark.jmh;
18+
19+
import java.math.BigInteger;
20+
import java.util.Arrays;
21+
import java.util.concurrent.ThreadLocalRandom;
22+
import java.util.concurrent.TimeUnit;
23+
import org.apache.lucene.util.NumericUtils;
24+
import org.openjdk.jmh.annotations.Benchmark;
25+
import org.openjdk.jmh.annotations.BenchmarkMode;
26+
import org.openjdk.jmh.annotations.Fork;
27+
import org.openjdk.jmh.annotations.Level;
28+
import org.openjdk.jmh.annotations.Measurement;
29+
import org.openjdk.jmh.annotations.Mode;
30+
import org.openjdk.jmh.annotations.OutputTimeUnit;
31+
import org.openjdk.jmh.annotations.Param;
32+
import org.openjdk.jmh.annotations.Scope;
33+
import org.openjdk.jmh.annotations.Setup;
34+
import org.openjdk.jmh.annotations.State;
35+
import org.openjdk.jmh.annotations.Warmup;
36+
37+
@BenchmarkMode(Mode.Throughput)
38+
@OutputTimeUnit(TimeUnit.MICROSECONDS)
39+
@State(Scope.Benchmark)
40+
// first iteration is complete garbage, so make sure we really warmup
41+
@Warmup(iterations = 4, time = 1)
42+
// real iterations. not useful to spend tons of time here, better to fork more
43+
@Measurement(iterations = 5, time = 1)
44+
// engage some noise reduction
45+
@Fork(
46+
value = 3,
47+
jvmArgsAppend = {"-Xmx2g", "-Xms2g", "-XX:+AlwaysPreTouch"})
48+
public class NumericUtilsBenchmark {
49+
@Param({"1", "128", "207", "256", "300", "512", "702", "1024"})
50+
int size;
51+
52+
private byte[] subA;
53+
private byte[] subB;
54+
private byte[] subResult;
55+
private byte[] subExpected;
56+
57+
private byte[] addA;
58+
private byte[] addB;
59+
private byte[] addResult;
60+
private byte[] addExpected;
61+
62+
@Setup(Level.Iteration)
63+
public void subInit() {
64+
ThreadLocalRandom random = ThreadLocalRandom.current();
65+
66+
subA = new byte[size];
67+
subB = new byte[size];
68+
subResult = new byte[size];
69+
subExpected = new byte[size];
70+
71+
random.nextBytes(subA);
72+
random.nextBytes(subB);
73+
74+
// Treat as unsigned integers
75+
BigInteger aBig = new BigInteger(1, subA);
76+
BigInteger bBig = new BigInteger(1, subB);
77+
78+
// Swap a <-> b if a < b
79+
if (aBig.compareTo(bBig) < 0) {
80+
byte[] temp = subA;
81+
subA = subB;
82+
subB = temp;
83+
84+
BigInteger tempBig = aBig;
85+
aBig = bBig;
86+
bBig = tempBig;
87+
}
88+
89+
byte[] temp = aBig.subtract(bBig).toByteArray();
90+
if (temp.length == size + 1) { // BigInteger pads with extra 0 if MSB is 1
91+
assert temp[0] == 0;
92+
System.arraycopy(temp, 1, subExpected, 0, size);
93+
} else {
94+
System.arraycopy(temp, 0, subExpected, size - temp.length, temp.length);
95+
}
96+
}
97+
98+
@Setup(Level.Iteration)
99+
public void addInit() {
100+
ThreadLocalRandom random = ThreadLocalRandom.current();
101+
102+
addA = new byte[size];
103+
addB = new byte[size];
104+
addResult = new byte[size];
105+
addExpected = new byte[size];
106+
107+
random.nextBytes(addA);
108+
random.nextBytes(addB);
109+
110+
// Treat as unsigned integers
111+
BigInteger aBig = new BigInteger(1, addA);
112+
BigInteger bBig = new BigInteger(1, addB);
113+
114+
byte[] temp = aBig.add(bBig).toByteArray();
115+
if (temp.length == size + 1) { // BigInteger pads with extra 0 if MSB is 1
116+
if (temp[0] != 0) { // overflow
117+
addInit(); // re-init
118+
return;
119+
}
120+
System.arraycopy(temp, 1, addExpected, 0, size);
121+
} else {
122+
System.arraycopy(temp, 0, addExpected, size - temp.length, temp.length);
123+
}
124+
}
125+
126+
@Benchmark
127+
public void subtract() {
128+
NumericUtils.subtract(size, 0, subA, subB, subResult);
129+
assert Arrays.equals(subExpected, subResult);
130+
}
131+
132+
@Benchmark
133+
public void add() {
134+
NumericUtils.add(size, 0, addA, addB, addResult);
135+
assert Arrays.equals(addExpected, addResult);
136+
}
137+
}

lucene/core/src/java/org/apache/lucene/util/NumericUtils.java

Lines changed: 43 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -93,17 +93,35 @@ public static int sortableFloatBits(int bits) {
9393
public static void subtract(int bytesPerDim, int dim, byte[] a, byte[] b, byte[] result) {
9494
int start = dim * bytesPerDim;
9595
int end = start + bytesPerDim;
96+
9697
int borrow = 0;
97-
for (int i = end - 1; i >= start; i--) {
98-
int diff = (a[i] & 0xff) - (b[i] & 0xff) - borrow;
98+
int i;
99+
100+
int limit = start + (bytesPerDim & ~3);
101+
for (i = end - 1; i >= limit; i--) {
102+
int diff = Byte.toUnsignedInt(a[i]) - Byte.toUnsignedInt(b[i]) - borrow;
99103
if (diff < 0) {
100-
diff += 256;
101104
borrow = 1;
102105
} else {
103106
borrow = 0;
104107
}
105108
result[i - start] = (byte) diff;
106109
}
110+
111+
for (i -= 3; i >= start; i -= 4) {
112+
int aInt = (int) BitUtil.VH_BE_INT.get(a, i);
113+
int bInt = (int) BitUtil.VH_BE_INT.get(b, i);
114+
115+
long diff = Integer.toUnsignedLong(aInt) - Integer.toUnsignedLong(bInt) - borrow;
116+
if (diff < 0) {
117+
borrow = 1;
118+
} else {
119+
borrow = 0;
120+
}
121+
122+
BitUtil.VH_BE_INT.set(result, i - start, (int) diff);
123+
}
124+
107125
if (borrow != 0) {
108126
throw new IllegalArgumentException("a < b");
109127
}
@@ -116,17 +134,35 @@ public static void subtract(int bytesPerDim, int dim, byte[] a, byte[] b, byte[]
116134
public static void add(int bytesPerDim, int dim, byte[] a, byte[] b, byte[] result) {
117135
int start = dim * bytesPerDim;
118136
int end = start + bytesPerDim;
137+
119138
int carry = 0;
120-
for (int i = end - 1; i >= start; i--) {
121-
int digitSum = (a[i] & 0xff) + (b[i] & 0xff) + carry;
122-
if (digitSum > 255) {
123-
digitSum -= 256;
139+
int i;
140+
141+
int limit = start + (bytesPerDim & ~3);
142+
for (i = end - 1; i >= limit; i--) {
143+
int digitSum = Byte.toUnsignedInt(a[i]) + Byte.toUnsignedInt(b[i]) + carry;
144+
if (digitSum >= 256) {
124145
carry = 1;
125146
} else {
126147
carry = 0;
127148
}
128149
result[i - start] = (byte) digitSum;
129150
}
151+
152+
for (i -= 3; i >= start; i -= 4) {
153+
int aInt = (int) BitUtil.VH_BE_INT.get(a, i);
154+
int bInt = (int) BitUtil.VH_BE_INT.get(b, i);
155+
156+
long digitSum = Integer.toUnsignedLong(aInt) + Integer.toUnsignedLong(bInt) + carry;
157+
if (digitSum >= 0x100000000L) {
158+
carry = 1;
159+
} else {
160+
carry = 0;
161+
}
162+
163+
BitUtil.VH_BE_INT.set(result, i - start, (int) digitSum);
164+
}
165+
130166
if (carry != 0) {
131167
throw new IllegalArgumentException("a + b overflows bytesPerDim=" + bytesPerDim);
132168
}

0 commit comments

Comments
 (0)