Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.VectorEncoding;
import org.apache.lucene.index.VectorSimilarityFunction;
import org.apache.lucene.search.AcceptDocs;
import org.apache.lucene.search.KnnCollector;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.Bits;
Expand Down Expand Up @@ -88,6 +89,7 @@ public CentroidIterator getCentroidIterator(
IndexInput centroids,
float[] targetQuery,
IndexInput postingListSlice,
AcceptDocs acceptDocs,
float visitRatio
) throws IOException {
final FieldEntry fieldEntry = fields.get(fieldInfo.number);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import org.apache.lucene.util.Bits;
import org.elasticsearch.core.IOUtils;
import org.elasticsearch.index.codec.vectors.GenericFlatVectorReaders;
import org.elasticsearch.search.vectors.ESAcceptDocs;
import org.elasticsearch.search.vectors.IVFKnnSearchStrategy;

import java.io.Closeable;
Expand Down Expand Up @@ -114,6 +115,7 @@ public abstract CentroidIterator getCentroidIterator(
IndexInput centroids,
float[] target,
IndexInput postingListSlice,
AcceptDocs acceptDocs,
float visitRatio
) throws IOException;

Expand Down Expand Up @@ -283,8 +285,17 @@ public final void search(String field, float[] target, KnnCollector knnCollector
"vector query dimension: " + target.length + " differs from field dimension: " + fieldInfo.getVectorDimension()
);
}
final ESAcceptDocs esAcceptDocs;
if (acceptDocs instanceof ESAcceptDocs) {
esAcceptDocs = (ESAcceptDocs) acceptDocs;
} else {
esAcceptDocs = null;
}
int numVectors = getReaderForField(field).getFloatVectorValues(field).size();
float percentFiltered = Math.max(0f, Math.min(1f, (float) acceptDocs.cost() / numVectors));
float percentFiltered = Math.max(
0f,
Math.min(1f, (float) (esAcceptDocs == null ? acceptDocs.cost() : esAcceptDocs.approximateCost()) / numVectors)
);
float visitRatio = DYNAMIC_VISIT_RATIO;
// Search strategy may be null if this is being called from checkIndex (e.g. from a test)
if (knnCollector.getSearchStrategy() instanceof IVFKnnSearchStrategy ivfSearchStrategy) {
Expand All @@ -311,6 +322,7 @@ public final void search(String field, float[] target, KnnCollector knnCollector
entry.centroidSlice(ivfCentroids),
target,
postListSlice,
esAcceptDocs == null ? acceptDocs : esAcceptDocs,
visitRatio
);
Bits acceptDocsBits = acceptDocs.bits();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.VectorEncoding;
import org.apache.lucene.index.VectorSimilarityFunction;
import org.apache.lucene.search.AcceptDocs;
import org.apache.lucene.search.KnnCollector;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.Bits;
Expand Down Expand Up @@ -87,6 +88,7 @@ public CentroidIterator getCentroidIterator(
IndexInput centroids,
float[] targetQuery,
IndexInput postingListSlice,
AcceptDocs acceptDocs,
float visitRatio
) throws IOException {
final FieldEntry fieldEntry = fields.get(fieldInfo.number);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,14 @@
import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.ScorerSupplier;
import org.apache.lucene.search.TaskExecutor;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopDocsCollector;
import org.apache.lucene.search.Weight;
import org.apache.lucene.search.knn.KnnCollectorManager;
import org.apache.lucene.search.knn.KnnSearchStrategy;
import org.apache.lucene.util.Bits;
import org.elasticsearch.search.profile.query.QueryProfiler;

import java.io.IOException;
Expand Down Expand Up @@ -182,20 +183,31 @@ private TopDocs searchLeaf(LeafReaderContext ctx, Weight filterWeight, IVFCollec
TopDocs getLeafResults(LeafReaderContext ctx, Weight filterWeight, IVFCollectorManager knnCollectorManager, float visitRatio)
throws IOException {
final LeafReader reader = ctx.reader();
final Bits liveDocs = reader.getLiveDocs();
final int maxDoc = reader.maxDoc();

if (filterWeight == null) {
AcceptDocs acceptDocs = AcceptDocs.fromLiveDocs(reader.getLiveDocs(), reader.maxDoc());
return approximateSearch(ctx, acceptDocs, Integer.MAX_VALUE, knnCollectorManager, visitRatio);
return approximateSearch(
ctx,
liveDocs == null ? ESAcceptDocs.ESAcceptDocsAll.INSTANCE : new ESAcceptDocs.BitsAcceptDocs(liveDocs, maxDoc),
Integer.MAX_VALUE,
knnCollectorManager,
visitRatio
);
}

Scorer scorer = filterWeight.scorer(ctx);
if (scorer == null) {
ScorerSupplier supplier = filterWeight.scorerSupplier(ctx);
if (supplier == null) {
return TopDocsCollector.EMPTY_TOPDOCS;
}

AcceptDocs acceptDocs = AcceptDocs.fromIteratorSupplier(scorer::iterator, reader.getLiveDocs(), reader.maxDoc());
final int cost = acceptDocs.cost();
return approximateSearch(ctx, acceptDocs, cost + 1, knnCollectorManager, visitRatio);
return approximateSearch(
ctx,
new ESAcceptDocs.ScorerSupplierAcceptDocs(supplier, liveDocs, maxDoc),
Integer.MAX_VALUE,
knnCollectorManager,
visitRatio
);
}

abstract TopDocs approximateSearch(
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,243 @@
/*
* @notice
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* Modifications copyright (C) 2025 Elasticsearch B.V.
*/
package org.elasticsearch.search.vectors;

import org.apache.lucene.search.AcceptDocs;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.FilteredDocIdSetIterator;
import org.apache.lucene.search.ScorerSupplier;
import org.apache.lucene.util.BitSet;
import org.apache.lucene.util.BitSetIterator;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.FixedBitSet;

import java.io.IOException;
import java.util.Objects;
import java.util.Optional;

import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;

/**
* An extension of {@link AcceptDocs} that provides additional methods to get an approximate cost
* and a BitSet representation of the accepted documents.
*/
public abstract sealed class ESAcceptDocs extends AcceptDocs {

/** Returns an approximate cost of the accepted documents.
* This is generally much cheaper than {@link #cost()}, as implementations may
* not fully evaluate filters to provide this estimate and may ignore deletions
* @return the approximate cost
* @throws IOException if an I/O error occurs
*/
public abstract int approximateCost() throws IOException;

/**
* Returns an optional BitSet representing the accepted documents.
* If a BitSet representation is not available, returns an empty Optional. An empty optional indicates that
* there are some accepted documents, but they cannot be represented as a BitSet efficiently.
* Null implies that all documents are accepted.
* @return an Optional containing the BitSet of accepted documents, or empty if not available, or null if all documents are accepted
* @throws IOException if an I/O error occurs
*/
public abstract Optional<BitSet> getBitSet() throws IOException;

private static BitSet createBitSet(DocIdSetIterator iterator, Bits liveDocs, int maxDoc) throws IOException {
if (liveDocs == null && iterator instanceof BitSetIterator bitSetIterator) {
// If we already have a BitSet and no deletions, reuse the BitSet
return bitSetIterator.getBitSet();
} else {
int threshold = maxDoc >> 7; // same as BitSet#of
if (iterator.cost() >= threshold) {
FixedBitSet bitSet = new FixedBitSet(maxDoc);
bitSet.or(iterator);
if (liveDocs != null) {
liveDocs.applyMask(bitSet, 0);
}
return bitSet;
} else {
return BitSet.of(liveDocs == null ? iterator : new FilteredDocIdSetIterator(iterator) {
@Override
protected boolean match(int doc) {
return liveDocs.get(doc);
}
}, maxDoc); // create a sparse bitset
}
}
}

/** An AcceptDocs that accepts all documents. */
public static final class ESAcceptDocsAll extends ESAcceptDocs {
public static final ESAcceptDocsAll INSTANCE = new ESAcceptDocsAll();

private ESAcceptDocsAll() {}

@Override
public int approximateCost() throws IOException {
return 0;
}

@Override
public Optional<BitSet> getBitSet() throws IOException {
return null;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can return an empty Optional here

Copy link
Contributor

@iverase iverase Nov 11, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

null indicates that we accept all documents. Empty Optional indicates there are some filtered documents but cannot be represented as a BitSet efficiently. So null is ok here.

}

@Override
public Bits bits() throws IOException {
return null;
}

@Override
public DocIdSetIterator iterator() throws IOException {
return null;
}

@Override
public int cost() throws IOException {
return 0;
}
}

/** An AcceptDocs that wraps a Bits instance. Generally indicates that no filter was provided, but there are deleted docs */
public static final class BitsAcceptDocs extends ESAcceptDocs {
private final Bits bits;
private final BitSet bitSetRef;
private final int maxDoc;
private final int approximateCost;

BitsAcceptDocs(Bits bits, int maxDoc) {
if (bits != null && bits.length() != maxDoc) {
throw new IllegalArgumentException("Bits length = " + bits.length() + " != maxDoc = " + maxDoc);
}
this.bits = Objects.requireNonNull(bits);
if (bits instanceof BitSet bitSet) {
this.maxDoc = Objects.requireNonNull(bitSet).cardinality();
this.approximateCost = Objects.requireNonNull(bitSet).approximateCardinality();
this.bitSetRef = bitSet;
} else {
this.maxDoc = maxDoc;
this.approximateCost = maxDoc;
this.bitSetRef = null;
}
}

@Override
public Bits bits() {
return bits;
}

@Override
public DocIdSetIterator iterator() {
if (bitSetRef != null) {
return new BitSetIterator(bitSetRef, maxDoc);
}
return new FilteredDocIdSetIterator(DocIdSetIterator.all(maxDoc)) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

shortcut here is because we assume that the caller has ensured thatliveDocs will not be null ?

@Override
protected boolean match(int doc) {
return bits.get(doc);
}
};
}

@Override
public int cost() {
// We have no better estimate. This should be ok in practice since background merges should
// keep the number of deletes under control (< 20% by default).
return maxDoc;
}

@Override
public int approximateCost() {
return approximateCost;
}

@Override
public Optional<BitSet> getBitSet() {
if (bits == null) {
return null;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

as with the other getBitSet maybe we could return an empty optional?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

its a trinary expression on purpose.

  • null == all
  • Empty == not efficiently a bitset
  • Present == efficiently a bitset

}
return Optional.ofNullable(bitSetRef);
}
}

/** An AcceptDocs that wraps a ScorerSupplier. Indicates that a filter was provided. */
public static final class ScorerSupplierAcceptDocs extends ESAcceptDocs {
private final ScorerSupplier scorerSupplier;
private BitSet acceptBitSet;
private final Bits liveDocs;
private final int maxDoc;
private int cardinality = -1;

ScorerSupplierAcceptDocs(ScorerSupplier scorerSupplier, Bits liveDocs, int maxDoc) {
this.scorerSupplier = scorerSupplier;
this.liveDocs = liveDocs;
this.maxDoc = maxDoc;
}

private void createBitSetIfNecessary() throws IOException {
if (acceptBitSet == null) {
acceptBitSet = createBitSet(scorerSupplier.get(NO_MORE_DOCS).iterator(), liveDocs, maxDoc);
}
}

@Override
public Bits bits() throws IOException {
createBitSetIfNecessary();
return acceptBitSet;
}

@Override
public DocIdSetIterator iterator() throws IOException {
if (acceptBitSet != null) {
return new BitSetIterator(acceptBitSet, cardinality);
}
return liveDocs == null
? scorerSupplier.get(NO_MORE_DOCS).iterator()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why do we use NO_MORE_DOCS here instead of maxDoc ?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Its the typical pattern. It simply indicates "Hey iterator, we have no ideas on the best skipping pattern, just assume we will iterate everything"

: new FilteredDocIdSetIterator(scorerSupplier.get(NO_MORE_DOCS).iterator()) {
@Override
protected boolean match(int doc) {
return liveDocs.get(doc);
}
};
}

@Override
public int cost() throws IOException {
createBitSetIfNecessary();
if (cardinality == -1) {
cardinality = acceptBitSet.cardinality();
}
return cardinality;
}

@Override
public int approximateCost() throws IOException {
if (acceptBitSet != null) {
return cardinality != -1 ? cardinality : acceptBitSet.approximateCardinality();
}
return Math.toIntExact(scorerSupplier.cost());
}

@Override
public Optional<BitSet> getBitSet() throws IOException {
createBitSetIfNecessary();
return Optional.of(acceptBitSet);
}
}
}
Loading