diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 28aeee6b0e4f..1f0a714c2280 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -56,6 +56,8 @@ Improvements SIGSEGV with the old unsafe "mmap hack", which has been replaced by MemorySegments. (Uwe Schindler, Robert Muir) +* GITHUB#15038: BitSet is now a sealed abstract class permitting only FixedBitSet and SparseFixedBitSet. + Optimizations --------------------- * GITHUB#14011: Reduce allocation rate in HNSW concurrent merge. (Viliam Durina) diff --git a/lucene/core/src/java/org/apache/lucene/util/BitSet.java b/lucene/core/src/java/org/apache/lucene/util/BitSet.java index 347e05d2e523..586c34276afd 100644 --- a/lucene/core/src/java/org/apache/lucene/util/BitSet.java +++ b/lucene/core/src/java/org/apache/lucene/util/BitSet.java @@ -22,9 +22,13 @@ /** * Base implementation for a bit set. * + *

We have explicitly sealed to keep call sites (e.g. nextSetBit/prevSetBit) at most bimorphic + * for better JIT performance. + * * @lucene.internal */ -public abstract class BitSet implements Bits, Accountable { +public abstract sealed class BitSet implements Bits, Accountable + permits FixedBitSet, SparseFixedBitSet { /** * Build a {@link BitSet} from the content of the provided {@link DocIdSetIterator}. NOTE: this diff --git a/lucene/core/src/java/org/apache/lucene/util/SparseFixedBitSet.java b/lucene/core/src/java/org/apache/lucene/util/SparseFixedBitSet.java index 3104948cc4c6..1d3cfcb15bb3 100644 --- a/lucene/core/src/java/org/apache/lucene/util/SparseFixedBitSet.java +++ b/lucene/core/src/java/org/apache/lucene/util/SparseFixedBitSet.java @@ -34,7 +34,7 @@ * * @lucene.internal */ -public class SparseFixedBitSet extends BitSet { +public final class SparseFixedBitSet extends BitSet { private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(SparseFixedBitSet.class); diff --git a/lucene/core/src/test/org/apache/lucene/util/TestFixedBitSet.java b/lucene/core/src/test/org/apache/lucene/util/TestFixedBitSet.java index efdebee71585..6d7d2eb1c7e8 100644 --- a/lucene/core/src/test/org/apache/lucene/util/TestFixedBitSet.java +++ b/lucene/core/src/test/org/apache/lucene/util/TestFixedBitSet.java @@ -38,6 +38,15 @@ public FixedBitSet copyOf(BitSet bs, int length) throws IOException { return set; } + @Override + protected FixedBitSet fromJavaUtilBitSet(java.util.BitSet set, int numBits) { + FixedBitSet fbs = new FixedBitSet(numBits); + for (int i = set.nextSetBit(0); i >= 0; i = set.nextSetBit(i + 1)) { + fbs.set(i); + } + return fbs; + } + @SuppressWarnings("NarrowCalculation") public void testApproximateCardinality() { // The approximate cardinality works in such a way that it should be pretty accurate on a bitset diff --git a/lucene/core/src/test/org/apache/lucene/util/TestSparseFixedBitSet.java b/lucene/core/src/test/org/apache/lucene/util/TestSparseFixedBitSet.java index 4cb1b92a0a82..4458dc96bcb0 100644 --- a/lucene/core/src/test/org/apache/lucene/util/TestSparseFixedBitSet.java +++ b/lucene/core/src/test/org/apache/lucene/util/TestSparseFixedBitSet.java @@ -35,7 +35,16 @@ public SparseFixedBitSet copyOf(BitSet bs, int length) throws IOException { } @Override - protected void assertEquals(BitSet set1, SparseFixedBitSet set2, int maxDoc) { + protected SparseFixedBitSet fromJavaUtilBitSet(java.util.BitSet set, int numBits) { + SparseFixedBitSet sfbs = new SparseFixedBitSet(numBits); + for (int i = set.nextSetBit(0); i >= 0; i = set.nextSetBit(i + 1)) { + sfbs.set(i); + } + return sfbs; + } + + @Override + protected void assertEquals(java.util.BitSet set1, SparseFixedBitSet set2, int maxDoc) { super.assertEquals(set1, set2, maxDoc); // check invariants of the sparse set int nonZeroLongCount = 0; diff --git a/lucene/test-framework/src/java/org/apache/lucene/tests/util/BaseBitSetTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/tests/util/BaseBitSetTestCase.java index 205d8861322a..408e118fd863 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/tests/util/BaseBitSetTestCase.java +++ b/lucene/test-framework/src/java/org/apache/lucene/tests/util/BaseBitSetTestCase.java @@ -16,14 +16,10 @@ */ package org.apache.lucene.tests.util; -import com.carrotsearch.randomizedtesting.generators.RandomNumbers; import java.io.IOException; -import java.util.Collection; -import java.util.Collections; import java.util.Random; import org.apache.lucene.search.DocIdSet; import org.apache.lucene.search.DocIdSetIterator; -import org.apache.lucene.util.Accountable; import org.apache.lucene.util.BitDocIdSet; import org.apache.lucene.util.BitSet; import org.apache.lucene.util.BitSetIterator; @@ -65,7 +61,9 @@ static java.util.BitSet randomSet(int numBits, float percentSet) { return randomSet(numBits, (int) (percentSet * numBits)); } - protected void assertEquals(BitSet set1, T set2, int maxDoc) { + protected abstract T fromJavaUtilBitSet(java.util.BitSet set, int numBits); + + protected void assertEquals(java.util.BitSet set1, T set2, int maxDoc) { for (int i = 0; i < maxDoc; ++i) { assertEquals("Different at " + i, set1.get(i), set2.get(i)); } @@ -75,9 +73,9 @@ protected void assertEquals(BitSet set1, T set2, int maxDoc) { public void testCardinality() throws IOException { final int numBits = 1 + random().nextInt(100000); for (float percentSet : new float[] {0, 0.01f, 0.1f, 0.5f, 0.9f, 0.99f, 1f}) { - BitSet set1 = new JavaUtilBitSet(randomSet(numBits, percentSet), numBits); - T set2 = copyOf(set1, numBits); - assertEquals(set1.cardinality(), set2.cardinality()); + java.util.BitSet jdkSet = randomSet(numBits, percentSet); + T luceneSet = fromJavaUtilBitSet(jdkSet, numBits); + assertEquals(jdkSet.cardinality(), luceneSet.cardinality()); } } @@ -85,10 +83,10 @@ public void testCardinality() throws IOException { public void testPrevSetBit() throws IOException { final int numBits = 1 + random().nextInt(100000); for (float percentSet : new float[] {0, 0.01f, 0.1f, 0.5f, 0.9f, 0.99f, 1f}) { - BitSet set1 = new JavaUtilBitSet(randomSet(numBits, percentSet), numBits); - T set2 = copyOf(set1, numBits); + java.util.BitSet jdkSet = randomSet(numBits, percentSet); + T luceneSet = fromJavaUtilBitSet(jdkSet, numBits); for (int i = 0; i < numBits; ++i) { - assertEquals(Integer.toString(i), set1.prevSetBit(i), set2.prevSetBit(i)); + assertEquals(Integer.toString(i), jdkSet.previousSetBit(i), luceneSet.prevSetBit(i)); } } } @@ -97,27 +95,10 @@ public void testPrevSetBit() throws IOException { public void testNextSetBit() throws IOException { final int numBits = 1 + random().nextInt(100000); for (float percentSet : new float[] {0, 0.01f, 0.1f, 0.5f, 0.9f, 0.99f, 1f}) { - BitSet set1 = new JavaUtilBitSet(randomSet(numBits, percentSet), numBits); - T set2 = copyOf(set1, numBits); + java.util.BitSet jdkSet = randomSet(numBits, percentSet); + T luceneSet = fromJavaUtilBitSet(jdkSet, numBits); for (int i = 0; i < numBits; ++i) { - assertEquals(set1.nextSetBit(i), set2.nextSetBit(i)); - } - } - } - - /** Test {@link BitSet#nextSetBit(int, int)}. */ - public void testNextSetBitInRange() throws IOException { - Random random = random(); - final int numBits = 1 + random().nextInt(100000); - for (float percentSet : new float[] {0, 0.01f, 0.1f, 0.5f, 0.9f, 0.99f, 1f}) { - BitSet set1 = new JavaUtilBitSet(randomSet(numBits, percentSet), numBits); - T set2 = copyOf(set1, numBits); - for (int start = 0; start < numBits; ++start) { - int end = RandomNumbers.randomIntBetween(random, start + 1, numBits); - assertEquals( - "start=" + start + ", end=" + end + ", numBits=" + numBits, - set1.nextSetBit(start, end), - set2.nextSetBit(start, end)); + assertEquals(normalizeJdkNextSetBit(jdkSet.nextSetBit(i)), luceneSet.nextSetBit(i)); } } } @@ -126,31 +107,32 @@ public void testNextSetBitInRange() throws IOException { public void testSet() throws IOException { Random random = random(); final int numBits = 1 + random.nextInt(100000); - BitSet set1 = new JavaUtilBitSet(randomSet(numBits, 0), numBits); - T set2 = copyOf(set1, numBits); + java.util.BitSet jdkSet = randomSet(numBits, 0); + T luceneSet = fromJavaUtilBitSet(jdkSet, numBits); final int iters = 10000 + random.nextInt(10000); for (int i = 0; i < iters; ++i) { final int index = random.nextInt(numBits); - set1.set(index); - set2.set(index); + jdkSet.set(index); + luceneSet.set(index); } - assertEquals(set1, set2, numBits); + assertEquals(jdkSet, luceneSet, numBits); } /** Test the {@link BitSet#getAndSet} method. */ public void testGetAndSet() throws IOException { Random random = random(); final int numBits = 1 + random.nextInt(100000); - BitSet set1 = new JavaUtilBitSet(randomSet(numBits, 0), numBits); - T set2 = copyOf(set1, numBits); + java.util.BitSet jdkSet = randomSet(numBits, 0); + T luceneSet = fromJavaUtilBitSet(jdkSet, numBits); final int iters = 10000 + random.nextInt(10000); for (int i = 0; i < iters; ++i) { final int index = random.nextInt(numBits); - boolean v1 = set1.getAndSet(index); - boolean v2 = set2.getAndSet(index); + boolean v1 = jdkSet.get(index); + jdkSet.set(index); // emulate getAndSet + boolean v2 = luceneSet.getAndSet(index); assertEquals(v1, v2); } - assertEquals(set1, set2, numBits); + assertEquals(jdkSet, luceneSet, numBits); } /** Test the {@link BitSet#clear(int)} method. */ @@ -158,15 +140,15 @@ public void testClear() throws IOException { Random random = random(); final int numBits = 1 + random.nextInt(100000); for (float percentSet : new float[] {0, 0.01f, 0.1f, 0.5f, 0.9f, 0.99f, 1f}) { - BitSet set1 = new JavaUtilBitSet(randomSet(numBits, percentSet), numBits); - T set2 = copyOf(set1, numBits); + java.util.BitSet jdkSet = randomSet(numBits, percentSet); + T luceneSet = fromJavaUtilBitSet(jdkSet, numBits); final int iters = 1 + random.nextInt(numBits * 2); for (int i = 0; i < iters; ++i) { final int index = random.nextInt(numBits); - set1.clear(index); - set2.clear(index); + jdkSet.clear(index); + luceneSet.clear(index); } - assertEquals(set1, set2, numBits); + assertEquals(jdkSet, luceneSet, numBits); } } @@ -175,15 +157,19 @@ public void testClearRange() throws IOException { Random random = random(); final int numBits = 1 + random.nextInt(100000); for (float percentSet : new float[] {0, 0.01f, 0.1f, 0.5f, 0.9f, 0.99f, 1f}) { - BitSet set1 = new JavaUtilBitSet(randomSet(numBits, percentSet), numBits); - T set2 = copyOf(set1, numBits); + java.util.BitSet jdkSet = randomSet(numBits, percentSet); + T luceneSet = fromJavaUtilBitSet(jdkSet, numBits); final int iters = atLeast(random, 10); for (int i = 0; i < iters; ++i) { final int from = random.nextInt(numBits); final int to = random.nextInt(numBits + 1); - set1.clear(from, to); - set2.clear(from, to); - assertEquals(set1, set2, numBits); + if (from > to) { + // JDK would throw, Lucene no-ops: so skip + continue; + } + jdkSet.clear(from, to); + luceneSet.clear(from, to); + assertEquals(jdkSet, luceneSet, numBits); } } } @@ -193,17 +179,21 @@ public void testClearAll() throws IOException { Random random = random(); final int numBits = 1 + random.nextInt(100000); for (float percentSet : new float[] {0, 0.01f, 0.1f, 0.5f, 0.9f, 0.99f, 1f}) { - BitSet set1 = new JavaUtilBitSet(randomSet(numBits, percentSet), numBits); - T set2 = copyOf(set1, numBits); + java.util.BitSet jdkSet = randomSet(numBits, percentSet); + T luceneSet = fromJavaUtilBitSet(jdkSet, numBits); final int iters = atLeast(random, 10); for (int i = 0; i < iters; ++i) { - set1.clear(); - set2.clear(); - assertEquals(set1, set2, numBits); + jdkSet.clear(); + luceneSet.clear(); + assertEquals(jdkSet, luceneSet, numBits); } } } + private static int normalizeJdkNextSetBit(int bit) { + return bit == -1 ? DocIdSetIterator.NO_MORE_DOCS : bit; + } + private DocIdSet randomCopy(BitSet set, int numBits) throws IOException { switch (random().nextInt(5)) { case 0: @@ -234,22 +224,31 @@ private DocIdSet randomCopy(BitSet set, int numBits) throws IOException { private void testOr(float load) throws IOException { final int numBits = 1 + random().nextInt(100000); - BitSet set1 = new JavaUtilBitSet(randomSet(numBits, 0), numBits); // empty - T set2 = copyOf(set1, numBits); + java.util.BitSet jdkSet = randomSet(numBits, 0); // empty + T luceneSet = fromJavaUtilBitSet(jdkSet, numBits); final int iterations = atLeast(10); for (int iter = 0; iter < iterations; ++iter) { DocIdSet otherSet = - randomCopy(new JavaUtilBitSet(randomSet(numBits, load), numBits), numBits); + randomCopy(fromJavaUtilBitSet(randomSet(numBits, load), numBits), numBits); DocIdSetIterator otherIterator = otherSet.iterator(); if (otherIterator != null) { - set1.or(otherIterator); - set2.or(otherSet.iterator()); - assertEquals(set1, set2, numBits); + jdkSet.or(toJavaUtil(otherIterator, numBits)); + luceneSet.or(otherSet.iterator()); + assertEquals(jdkSet, luceneSet, numBits); } } } + /** Helper: consume a DocIdSetIterator into a java.util.BitSet. */ + private static java.util.BitSet toJavaUtil(DocIdSetIterator it, int numBits) throws IOException { + java.util.BitSet bs = new java.util.BitSet(numBits); + for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) { + bs.set(doc); + } + return bs; + } + /** Test {@link BitSet#or(DocIdSetIterator)} on sparse sets. */ public void testOrSparse() throws IOException { testOr(0.001f); @@ -264,89 +263,4 @@ public void testOrDense() throws IOException { public void testOrRandom() throws IOException { testOr(random().nextFloat()); } - - private static class JavaUtilBitSet extends BitSet { - - private final java.util.BitSet bitSet; - private final int numBits; - - JavaUtilBitSet(java.util.BitSet bitSet, int numBits) { - this.bitSet = bitSet; - this.numBits = numBits; - } - - @Override - public void clear() { - bitSet.clear(); - } - - @Override - public void clear(int index) { - bitSet.clear(index); - } - - @Override - public boolean get(int index) { - return bitSet.get(index); - } - - @Override - public boolean getAndSet(int index) { - boolean v = get(index); - set(index); - return v; - } - - @Override - public int length() { - return numBits; - } - - @Override - public long ramBytesUsed() { - return -1; - } - - @Override - public Collection getChildResources() { - return Collections.emptyList(); - } - - @Override - public void set(int i) { - bitSet.set(i); - } - - @Override - public void clear(int startIndex, int endIndex) { - if (startIndex >= endIndex) { - return; - } - bitSet.clear(startIndex, endIndex); - } - - @Override - public int cardinality() { - return bitSet.cardinality(); - } - - @Override - public int approximateCardinality() { - return bitSet.cardinality(); - } - - @Override - public int prevSetBit(int index) { - return bitSet.previousSetBit(index); - } - - @Override - public int nextSetBit(int start, int upperBound) { - int next = bitSet.nextSetBit(start); - if (next == -1 || next >= upperBound) { - next = DocIdSetIterator.NO_MORE_DOCS; - } - return next; - } - } }