Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ Improvements
SIGSEGV with the old unsafe "mmap hack", which has been replaced by MemorySegments.
(Uwe Schindler, Robert Muir)

* GITHUB#15038: BitSet is now a sealed abstract class permitting only FixedBitSet and SparseFixedBitSet.

Optimizations
---------------------
* GITHUB#14011: Reduce allocation rate in HNSW concurrent merge. (Viliam Durina)
Expand Down
6 changes: 5 additions & 1 deletion lucene/core/src/java/org/apache/lucene/util/BitSet.java
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,13 @@
/**
* Base implementation for a bit set.
*
* <p>We have explicitly sealed to keep call sites (e.g. nextSetBit/prevSetBit) at most bimorphic
* for better JIT performance.
*
* @lucene.internal
*/
public abstract class BitSet implements Bits, Accountable {
public abstract sealed class BitSet implements Bits, Accountable
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add a small comment explaining that we're doing this to help call sites of methods on the BitSet class be bimorphic at most, to help with performance?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

added small comment explaining this.

permits FixedBitSet, SparseFixedBitSet {

/**
* Build a {@link BitSet} from the content of the provided {@link DocIdSetIterator}. NOTE: this
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
*
* @lucene.internal
*/
public class SparseFixedBitSet extends BitSet {
public final class SparseFixedBitSet extends BitSet {

private static final long BASE_RAM_BYTES_USED =
RamUsageEstimator.shallowSizeOfInstance(SparseFixedBitSet.class);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,15 @@ public FixedBitSet copyOf(BitSet bs, int length) throws IOException {
return set;
}

@Override
protected FixedBitSet fromJavaUtilBitSet(java.util.BitSet set, int numBits) {
FixedBitSet fbs = new FixedBitSet(numBits);
for (int i = set.nextSetBit(0); i >= 0; i = set.nextSetBit(i + 1)) {
fbs.set(i);
}
return fbs;
}

@SuppressWarnings("NarrowCalculation")
public void testApproximateCardinality() {
// The approximate cardinality works in such a way that it should be pretty accurate on a bitset
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,16 @@ public SparseFixedBitSet copyOf(BitSet bs, int length) throws IOException {
}

@Override
protected void assertEquals(BitSet set1, SparseFixedBitSet set2, int maxDoc) {
protected SparseFixedBitSet fromJavaUtilBitSet(java.util.BitSet set, int numBits) {
SparseFixedBitSet sfbs = new SparseFixedBitSet(numBits);
for (int i = set.nextSetBit(0); i >= 0; i = set.nextSetBit(i + 1)) {
sfbs.set(i);
}
return sfbs;
}

@Override
protected void assertEquals(java.util.BitSet set1, SparseFixedBitSet set2, int maxDoc) {
super.assertEquals(set1, set2, maxDoc);
// check invariants of the sparse set
int nonZeroLongCount = 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,10 @@
*/
package org.apache.lucene.tests.util;

import com.carrotsearch.randomizedtesting.generators.RandomNumbers;
import java.io.IOException;
import java.util.Collection;
import java.util.Collections;
import java.util.Random;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.BitDocIdSet;
import org.apache.lucene.util.BitSet;
import org.apache.lucene.util.BitSetIterator;
Expand Down Expand Up @@ -65,7 +61,9 @@ static java.util.BitSet randomSet(int numBits, float percentSet) {
return randomSet(numBits, (int) (percentSet * numBits));
}

protected void assertEquals(BitSet set1, T set2, int maxDoc) {
protected abstract T fromJavaUtilBitSet(java.util.BitSet set, int numBits);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I worry that copying from a java.util.BitSet defeats the purpose of the test, since most tests would be comparing two same implementations of BitSet instead of JavaUtilBitSet vs. a BitSet implementation. Can we try to refactor tests to work directly against a java.util.BitSet without copying to a BitSet?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

tried refactoring it using java.util.Bitset.
Had to hack through two issues.

  1. java.util.BitSet.nextSetBit() returns -1 when there are no more set bits while lucene Bitset return DocIdSetIterator.NO_MORE_DOCS i.e. 2147483647.
  2. java.util.BitSet.clear() throws error if from > to while lucene does nothing.


protected void assertEquals(java.util.BitSet set1, T set2, int maxDoc) {
for (int i = 0; i < maxDoc; ++i) {
assertEquals("Different at " + i, set1.get(i), set2.get(i));
}
Expand All @@ -75,20 +73,20 @@ protected void assertEquals(BitSet set1, T set2, int maxDoc) {
public void testCardinality() throws IOException {
final int numBits = 1 + random().nextInt(100000);
for (float percentSet : new float[] {0, 0.01f, 0.1f, 0.5f, 0.9f, 0.99f, 1f}) {
BitSet set1 = new JavaUtilBitSet(randomSet(numBits, percentSet), numBits);
T set2 = copyOf(set1, numBits);
assertEquals(set1.cardinality(), set2.cardinality());
java.util.BitSet jdkSet = randomSet(numBits, percentSet);
T luceneSet = fromJavaUtilBitSet(jdkSet, numBits);
assertEquals(jdkSet.cardinality(), luceneSet.cardinality());
}
}

/** Test {@link BitSet#prevSetBit(int)}. */
public void testPrevSetBit() throws IOException {
final int numBits = 1 + random().nextInt(100000);
for (float percentSet : new float[] {0, 0.01f, 0.1f, 0.5f, 0.9f, 0.99f, 1f}) {
BitSet set1 = new JavaUtilBitSet(randomSet(numBits, percentSet), numBits);
T set2 = copyOf(set1, numBits);
java.util.BitSet jdkSet = randomSet(numBits, percentSet);
T luceneSet = fromJavaUtilBitSet(jdkSet, numBits);
for (int i = 0; i < numBits; ++i) {
assertEquals(Integer.toString(i), set1.prevSetBit(i), set2.prevSetBit(i));
assertEquals(Integer.toString(i), jdkSet.previousSetBit(i), luceneSet.prevSetBit(i));
}
}
}
Expand All @@ -97,27 +95,10 @@ public void testPrevSetBit() throws IOException {
public void testNextSetBit() throws IOException {
final int numBits = 1 + random().nextInt(100000);
for (float percentSet : new float[] {0, 0.01f, 0.1f, 0.5f, 0.9f, 0.99f, 1f}) {
BitSet set1 = new JavaUtilBitSet(randomSet(numBits, percentSet), numBits);
T set2 = copyOf(set1, numBits);
java.util.BitSet jdkSet = randomSet(numBits, percentSet);
T luceneSet = fromJavaUtilBitSet(jdkSet, numBits);
for (int i = 0; i < numBits; ++i) {
assertEquals(set1.nextSetBit(i), set2.nextSetBit(i));
}
}
}

/** Test {@link BitSet#nextSetBit(int, int)}. */
public void testNextSetBitInRange() throws IOException {
Random random = random();
final int numBits = 1 + random().nextInt(100000);
for (float percentSet : new float[] {0, 0.01f, 0.1f, 0.5f, 0.9f, 0.99f, 1f}) {
BitSet set1 = new JavaUtilBitSet(randomSet(numBits, percentSet), numBits);
T set2 = copyOf(set1, numBits);
for (int start = 0; start < numBits; ++start) {
int end = RandomNumbers.randomIntBetween(random, start + 1, numBits);
assertEquals(
"start=" + start + ", end=" + end + ", numBits=" + numBits,
set1.nextSetBit(start, end),
set2.nextSetBit(start, end));
assertEquals(normalizeJdkNextSetBit(jdkSet.nextSetBit(i)), luceneSet.nextSetBit(i));
}
}
}
Expand All @@ -126,47 +107,48 @@ public void testNextSetBitInRange() throws IOException {
public void testSet() throws IOException {
Random random = random();
final int numBits = 1 + random.nextInt(100000);
BitSet set1 = new JavaUtilBitSet(randomSet(numBits, 0), numBits);
T set2 = copyOf(set1, numBits);
java.util.BitSet jdkSet = randomSet(numBits, 0);
T luceneSet = fromJavaUtilBitSet(jdkSet, numBits);
final int iters = 10000 + random.nextInt(10000);
for (int i = 0; i < iters; ++i) {
final int index = random.nextInt(numBits);
set1.set(index);
set2.set(index);
jdkSet.set(index);
luceneSet.set(index);
}
assertEquals(set1, set2, numBits);
assertEquals(jdkSet, luceneSet, numBits);
}

/** Test the {@link BitSet#getAndSet} method. */
public void testGetAndSet() throws IOException {
Random random = random();
final int numBits = 1 + random.nextInt(100000);
BitSet set1 = new JavaUtilBitSet(randomSet(numBits, 0), numBits);
T set2 = copyOf(set1, numBits);
java.util.BitSet jdkSet = randomSet(numBits, 0);
T luceneSet = fromJavaUtilBitSet(jdkSet, numBits);
final int iters = 10000 + random.nextInt(10000);
for (int i = 0; i < iters; ++i) {
final int index = random.nextInt(numBits);
boolean v1 = set1.getAndSet(index);
boolean v2 = set2.getAndSet(index);
boolean v1 = jdkSet.get(index);
jdkSet.set(index); // emulate getAndSet
boolean v2 = luceneSet.getAndSet(index);
assertEquals(v1, v2);
}
assertEquals(set1, set2, numBits);
assertEquals(jdkSet, luceneSet, numBits);
}

/** Test the {@link BitSet#clear(int)} method. */
public void testClear() throws IOException {
Random random = random();
final int numBits = 1 + random.nextInt(100000);
for (float percentSet : new float[] {0, 0.01f, 0.1f, 0.5f, 0.9f, 0.99f, 1f}) {
BitSet set1 = new JavaUtilBitSet(randomSet(numBits, percentSet), numBits);
T set2 = copyOf(set1, numBits);
java.util.BitSet jdkSet = randomSet(numBits, percentSet);
T luceneSet = fromJavaUtilBitSet(jdkSet, numBits);
final int iters = 1 + random.nextInt(numBits * 2);
for (int i = 0; i < iters; ++i) {
final int index = random.nextInt(numBits);
set1.clear(index);
set2.clear(index);
jdkSet.clear(index);
luceneSet.clear(index);
}
assertEquals(set1, set2, numBits);
assertEquals(jdkSet, luceneSet, numBits);
}
}

Expand All @@ -175,15 +157,19 @@ public void testClearRange() throws IOException {
Random random = random();
final int numBits = 1 + random.nextInt(100000);
for (float percentSet : new float[] {0, 0.01f, 0.1f, 0.5f, 0.9f, 0.99f, 1f}) {
BitSet set1 = new JavaUtilBitSet(randomSet(numBits, percentSet), numBits);
T set2 = copyOf(set1, numBits);
java.util.BitSet jdkSet = randomSet(numBits, percentSet);
T luceneSet = fromJavaUtilBitSet(jdkSet, numBits);
final int iters = atLeast(random, 10);
for (int i = 0; i < iters; ++i) {
final int from = random.nextInt(numBits);
final int to = random.nextInt(numBits + 1);
set1.clear(from, to);
set2.clear(from, to);
assertEquals(set1, set2, numBits);
if (from > to) {
// JDK would throw, Lucene no-ops: so skip
continue;
}
jdkSet.clear(from, to);
luceneSet.clear(from, to);
assertEquals(jdkSet, luceneSet, numBits);
}
}
}
Expand All @@ -193,17 +179,21 @@ public void testClearAll() throws IOException {
Random random = random();
final int numBits = 1 + random.nextInt(100000);
for (float percentSet : new float[] {0, 0.01f, 0.1f, 0.5f, 0.9f, 0.99f, 1f}) {
BitSet set1 = new JavaUtilBitSet(randomSet(numBits, percentSet), numBits);
T set2 = copyOf(set1, numBits);
java.util.BitSet jdkSet = randomSet(numBits, percentSet);
T luceneSet = fromJavaUtilBitSet(jdkSet, numBits);
final int iters = atLeast(random, 10);
for (int i = 0; i < iters; ++i) {
set1.clear();
set2.clear();
assertEquals(set1, set2, numBits);
jdkSet.clear();
luceneSet.clear();
assertEquals(jdkSet, luceneSet, numBits);
}
}
}

private static int normalizeJdkNextSetBit(int bit) {
return bit == -1 ? DocIdSetIterator.NO_MORE_DOCS : bit;
}

private DocIdSet randomCopy(BitSet set, int numBits) throws IOException {
switch (random().nextInt(5)) {
case 0:
Expand Down Expand Up @@ -234,22 +224,31 @@ private DocIdSet randomCopy(BitSet set, int numBits) throws IOException {

private void testOr(float load) throws IOException {
final int numBits = 1 + random().nextInt(100000);
BitSet set1 = new JavaUtilBitSet(randomSet(numBits, 0), numBits); // empty
T set2 = copyOf(set1, numBits);
java.util.BitSet jdkSet = randomSet(numBits, 0); // empty
T luceneSet = fromJavaUtilBitSet(jdkSet, numBits);

final int iterations = atLeast(10);
for (int iter = 0; iter < iterations; ++iter) {
DocIdSet otherSet =
randomCopy(new JavaUtilBitSet(randomSet(numBits, load), numBits), numBits);
randomCopy(fromJavaUtilBitSet(randomSet(numBits, load), numBits), numBits);
DocIdSetIterator otherIterator = otherSet.iterator();
if (otherIterator != null) {
set1.or(otherIterator);
set2.or(otherSet.iterator());
assertEquals(set1, set2, numBits);
jdkSet.or(toJavaUtil(otherIterator, numBits));
luceneSet.or(otherSet.iterator());
assertEquals(jdkSet, luceneSet, numBits);
}
}
}

/** Helper: consume a DocIdSetIterator into a java.util.BitSet. */
private static java.util.BitSet toJavaUtil(DocIdSetIterator it, int numBits) throws IOException {
java.util.BitSet bs = new java.util.BitSet(numBits);
for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
bs.set(doc);
}
return bs;
}

/** Test {@link BitSet#or(DocIdSetIterator)} on sparse sets. */
public void testOrSparse() throws IOException {
testOr(0.001f);
Expand All @@ -264,89 +263,4 @@ public void testOrDense() throws IOException {
public void testOrRandom() throws IOException {
testOr(random().nextFloat());
}

private static class JavaUtilBitSet extends BitSet {

private final java.util.BitSet bitSet;
private final int numBits;

JavaUtilBitSet(java.util.BitSet bitSet, int numBits) {
this.bitSet = bitSet;
this.numBits = numBits;
}

@Override
public void clear() {
bitSet.clear();
}

@Override
public void clear(int index) {
bitSet.clear(index);
}

@Override
public boolean get(int index) {
return bitSet.get(index);
}

@Override
public boolean getAndSet(int index) {
boolean v = get(index);
set(index);
return v;
}

@Override
public int length() {
return numBits;
}

@Override
public long ramBytesUsed() {
return -1;
}

@Override
public Collection<Accountable> getChildResources() {
return Collections.emptyList();
}

@Override
public void set(int i) {
bitSet.set(i);
}

@Override
public void clear(int startIndex, int endIndex) {
if (startIndex >= endIndex) {
return;
}
bitSet.clear(startIndex, endIndex);
}

@Override
public int cardinality() {
return bitSet.cardinality();
}

@Override
public int approximateCardinality() {
return bitSet.cardinality();
}

@Override
public int prevSetBit(int index) {
return bitSet.previousSetBit(index);
}

@Override
public int nextSetBit(int start, int upperBound) {
int next = bitSet.nextSetBit(start);
if (next == -1 || next >= upperBound) {
next = DocIdSetIterator.NO_MORE_DOCS;
}
return next;
}
}
}
Loading