apache · romseygeek · Oct 6, 2025 · Sep 5, 2025 · Sep 9, 2025 · Sep 9, 2025
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
@@ -159,6 +159,8 @@ Optimizations
 * GITHUB#15151: Use `SimScorer#score` bulk API to compute impact scores per
   block of postings. (Adrien Grand)
 
+* GITHUB#15167: FirstPassGroupingCollector supports ignoring docs without group field (Binlong Gao)
+
 * GITHUB#15160: Increased the size used for blocks of postings from 128 to 256.
   This gives a noticeable speedup to many queries. (Adrien Grand)
 

diff --git a/lucene/grouping/src/java/org/apache/lucene/search/grouping/DoubleRangeGroupSelector.java b/lucene/grouping/src/java/org/apache/lucene/search/grouping/DoubleRangeGroupSelector.java
@@ -33,7 +33,7 @@ public class DoubleRangeGroupSelector extends GroupSelector<DoubleRange> {
   private final DoubleRangeFactory rangeFactory;
 
   private Set<DoubleRange> inSecondPass;
-  private boolean includeEmpty = true;
+  private boolean includeEmpty;
   private boolean positioned;
   private DoubleRange current;
 
@@ -88,7 +88,6 @@ public DoubleRange copyValue() throws IOException {
   @Override
   public void setGroups(Collection<SearchGroup<DoubleRange>> searchGroups) {
     inSecondPass = new HashSet<>();
-    includeEmpty = false;
     for (SearchGroup<DoubleRange> group : searchGroups) {
       if (group.groupValue == null) {
         includeEmpty = true;

diff --git a/lucene/grouping/src/java/org/apache/lucene/search/grouping/FirstPassGroupingCollector.java b/lucene/grouping/src/java/org/apache/lucene/search/grouping/FirstPassGroupingCollector.java
@@ -44,6 +44,7 @@
 public class FirstPassGroupingCollector<T> extends SimpleCollector {
 
   private final GroupSelector<T> groupSelector;
+  private final boolean ignoreDocsWithoutGroupField;
 
   private final FieldComparator<?>[] comparators;
   private final LeafFieldComparator[] leafComparators;
@@ -73,7 +74,28 @@ public class FirstPassGroupingCollector<T> extends SimpleCollector {
    */
   public FirstPassGroupingCollector(
       GroupSelector<T> groupSelector, Sort groupSort, int topNGroups) {
+    this(groupSelector, groupSort, topNGroups, false);
+  }
+
+  /**
+   * Create the first pass collector with ignoreDocsWithoutGroupField
+   *
+   * @param groupSelector a GroupSelector used to defined groups
+   * @param groupSort The {@link Sort} used to sort the groups. The top sorted document within each
+   *     group according to groupSort, determines how that group sorts against other groups. This
+   *     must be non-null, ie, if you want to groupSort by relevance use Sort.RELEVANCE.
+   * @param topNGroups How many top groups to keep.
+   * @param ignoreDocsWithoutGroupField if true, ignore documents that don't have the group field
+   *     instead of putting them in a null group
+   */
+  @SuppressWarnings({"unchecked", "rawtypes"})
+  public FirstPassGroupingCollector(
+      GroupSelector<T> groupSelector,
+      Sort groupSort,
+      int topNGroups,
+      boolean ignoreDocsWithoutGroupField) {
     this.groupSelector = groupSelector;
+    this.ignoreDocsWithoutGroupField = ignoreDocsWithoutGroupField;
     if (topNGroups < 1) {
       throw new IllegalArgumentException("topNGroups must be >= 1 (got " + topNGroups + ")");
     }
@@ -197,12 +219,14 @@ public void collect(int doc) throws IOException {
       return;
     }
 
-    // TODO: should we add option to mean "ignore docs that
-    // don't have the group field" (instead of stuffing them
-    // under null group)?
-    groupSelector.advanceTo(doc);
+    GroupSelector.State state = groupSelector.advanceTo(doc);
     T groupValue = groupSelector.currentValue();
 
+    // Skip documents without group field if option is enabled
+    if (ignoreDocsWithoutGroupField && state == GroupSelector.State.SKIP) {
+      return;
+    }
+
     final CollectedSearchGroup<T> group = groupMap.get(groupValue);
 
     if (group == null) {

diff --git a/lucene/grouping/src/java/org/apache/lucene/search/grouping/GroupingSearch.java b/lucene/grouping/src/java/org/apache/lucene/search/grouping/GroupingSearch.java
@@ -56,6 +56,7 @@ public class GroupingSearch {
   private boolean cacheScores;
   private boolean allGroups;
   private boolean allGroupHeads;
+  private boolean ignoreDocsWithoutGroupField;
 
   private Collection<?> matchingGroups;
   private Bits matchingGroupHeads;
@@ -138,7 +139,7 @@ protected TopGroups groupByFieldOrFunction(
     int topN = groupOffset + groupLimit;
 
     final FirstPassGroupingCollector firstPassCollector =
-        new FirstPassGroupingCollector(grouper, groupSort, topN);
+        new FirstPassGroupingCollector(grouper, groupSort, topN, ignoreDocsWithoutGroupField);
     final AllGroupsCollector allGroupsCollector =
         allGroups ? new AllGroupsCollector(grouper) : null;
     final AllGroupHeadsCollector allGroupHeadsCollector =
@@ -358,4 +359,16 @@ public GroupingSearch setAllGroupHeads(boolean allGroupHeads) {
   public Bits getAllGroupHeads() {
     return matchingGroupHeads;
   }
+
+  /**
+   * Whether to ignore documents that don't have the group field instead of putting them in a null
+   * group.
+   *
+   * @param ignoreDocsWithoutGroupField Whether to ignore documents without group field
+   * @return <code>this</code>
+   */
+  public GroupingSearch setIgnoreDocsWithoutGroupField(boolean ignoreDocsWithoutGroupField) {
+    this.ignoreDocsWithoutGroupField = ignoreDocsWithoutGroupField;
+    return this;
+  }
 }
diff --git a/lucene/grouping/src/java/org/apache/lucene/search/grouping/LongRangeGroupSelector.java b/lucene/grouping/src/java/org/apache/lucene/search/grouping/LongRangeGroupSelector.java
@@ -34,7 +34,7 @@ public class LongRangeGroupSelector extends GroupSelector<LongRange> {
   private final LongRangeFactory rangeFactory;
 
   private Set<LongRange> inSecondPass;
-  private boolean includeEmpty = true;
+  private boolean includeEmpty;
   private boolean positioned;
   private LongRange current;
 
@@ -89,7 +89,6 @@ public LongRange copyValue() throws IOException {
   @Override
   public void setGroups(Collection<SearchGroup<LongRange>> searchGroups) {
     inSecondPass = new HashSet<>();
-    includeEmpty = false;
     for (SearchGroup<LongRange> group : searchGroups) {
       if (group.groupValue == null) {
         includeEmpty = true;

diff --git a/lucene/grouping/src/java/org/apache/lucene/search/grouping/ValueSourceGroupSelector.java b/lucene/grouping/src/java/org/apache/lucene/search/grouping/ValueSourceGroupSelector.java
@@ -35,6 +35,7 @@ public class ValueSourceGroupSelector extends GroupSelector<MutableValue> {
   private final Map<Object, Object> context;
 
   private Set<MutableValue> secondPassGroups;
+  private boolean includeEmpty;
 
   /**
    * Create a new ValueSourceGroupSelector
@@ -61,8 +62,12 @@ public void setScorer(Scorable scorer) throws IOException {}
   @Override
   public State advanceTo(int doc) throws IOException {
     this.filler.fillValue(doc);
+    MutableValue value = filler.getValue();
+    if (value.exists() == false) {
+      return includeEmpty ? State.ACCEPT : State.SKIP;
+    }
     if (secondPassGroups != null) {
-      if (secondPassGroups.contains(filler.getValue()) == false) {
+      if (secondPassGroups.contains(value) == false) {
         return State.SKIP;
       }
     }
@@ -83,7 +88,11 @@ public MutableValue copyValue() {
   public void setGroups(Collection<SearchGroup<MutableValue>> searchGroups) {
     secondPassGroups = new HashSet<>();
     for (SearchGroup<MutableValue> group : searchGroups) {
-      secondPassGroups.add(group.groupValue);
+      if (group.groupValue.exists() == false) {
+        includeEmpty = true;
+      } else {
+        secondPassGroups.add(group.groupValue);
+      }
     }
   }
 }
diff --git a/lucene/grouping/src/test/org/apache/lucene/search/grouping/BaseGroupSelectorTestCase.java b/lucene/grouping/src/test/org/apache/lucene/search/grouping/BaseGroupSelectorTestCase.java
@@ -379,6 +379,49 @@ private void indexRandomDocs(RandomIndexWriter w) throws IOException {
     }
   }
 
+  public void testIgnoreDocsWithoutGroupField() throws IOException {
+    Shard shard = new Shard();
+
+    // Add documents with group field
+    Document doc = new Document();
+    doc.add(new TextField("text", "foo", Field.Store.NO));
+    addGroupField(doc, 1);
+    shard.writer.addDocument(doc);
+
+    doc = new Document();
+    doc.add(new TextField("text", "foo", Field.Store.NO));
+    addGroupField(doc, 2);
+    shard.writer.addDocument(doc);
+
+    // Add document without group field
+    doc = new Document();
+    doc.add(new TextField("text", "foo", Field.Store.NO));
+    shard.writer.addDocument(doc);
+
+    IndexSearcher searcher = shard.getIndexSearcher();
+    Query query = new TermQuery(new Term("text", "foo"));
+
+    // Test default behavior (include null group)
+    GroupingSearch grouping1 = new GroupingSearch(getGroupSelector());
+    TopGroups<T> groups1 = grouping1.search(searcher, query, 0, 10);
+    int defaultGroupCount = groups1.groups.length;
+
+    // Test ignoring docs without group field
+    GroupingSearch grouping2 = new GroupingSearch(getGroupSelector());
+    grouping2.setIgnoreDocsWithoutGroupField(true);
+    TopGroups<T> groups2 = grouping2.search(searcher, query, 0, 10);
+    int ignoreGroupCount = groups2.groups.length;
+
+    assertTrue(
+        "Expected ignoreGroupCount <= defaultGroupCount, got "
+            + ignoreGroupCount
+            + " vs "
+            + defaultGroupCount,
+        ignoreGroupCount <= defaultGroupCount);
+
+    shard.close();
+  }
+
   private void assertSortsBefore(GroupDocs<T> first, GroupDocs<T> second) {
     Object[] groupSortValues = second.groupSortValues();
     Object[] prevSortValues = first.groupSortValues();

diff --git a/lucene/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java b/lucene/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java
@@ -49,6 +49,7 @@
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.search.FieldDoc;
 import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.MatchAllDocsQuery;
 import org.apache.lucene.search.MultiCollector;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.ScoreDoc;
@@ -193,6 +194,84 @@ public void testBasic() throws Exception {
     dir.close();
   }
 
+  public void testIgnoreDocsWithoutGroupField() throws IOException {
+    Directory dir = newDirectory();
+    RandomIndexWriter w =
+        new RandomIndexWriter(random(), dir, newIndexWriterConfig(new MockAnalyzer(random())));
+
+    String groupField = "group";
+    // Add documents with group field
+    Document doc = new Document();
+    addGroupField(doc, groupField, "group1");
+    // doc.add(new SortedDocValuesField("group", new BytesRef("group1")));
+    doc.add(new TextField("content", "test", Field.Store.YES));
+    w.addDocument(doc);
+
+    doc = new Document();
+    addGroupField(doc, groupField, "group2");
+    doc.add(new TextField("content", "test", Field.Store.YES));
+    w.addDocument(doc);
+
+    // Add document without group field
+    doc = new Document();
+    doc.add(new TextField("content", "test", Field.Store.YES));
+    w.addDocument(doc);
+
+    DirectoryReader reader = w.getReader();
+    w.close();
+
+    IndexSearcher searcher = newSearcher(reader);
+
+    // Test default behavior (include null group)
+    FirstPassGroupingCollector<BytesRef> collector1 =
+        new FirstPassGroupingCollector<>(new TermGroupSelector(groupField), Sort.RELEVANCE, 10);
+    searcher.search(new MatchAllDocsQuery(), collector1);
+    Collection<SearchGroup<BytesRef>> groups1 = collector1.getTopGroups(0);
+
+    assertEquals(3, groups1.size()); // Should include null group
+
+    // Test ignoring docs without group field
+    FirstPassGroupingCollector<BytesRef> collector2 =
+        new FirstPassGroupingCollector<>(
+            new TermGroupSelector(groupField), Sort.RELEVANCE, 10, true);
+    searcher.search(new MatchAllDocsQuery(), collector2);
+    Collection<SearchGroup<BytesRef>> groups2 = collector2.getTopGroups(0);
+
+    assertEquals(2, groups2.size()); // Should exclude null group
+
+    reader.close();
+    dir.close();
+  }
+
+  public void testAllDocsWithoutGroupField() throws IOException {
+    Directory dir = newDirectory();
+    RandomIndexWriter w =
+        new RandomIndexWriter(random(), dir, newIndexWriterConfig(new MockAnalyzer(random())));
+
+    // Add documents without group field
+    for (int i = 0; i < 5; i++) {
+      Document doc = new Document();
+      doc.add(new TextField("content", "test", Field.Store.YES));
+      w.addDocument(doc);
+    }
+
+    DirectoryReader reader = w.getReader();
+    w.close();
+
+    IndexSearcher searcher = newSearcher(reader);
+
+    // Test ignoring docs without group field when all docs lack the field
+    FirstPassGroupingCollector<BytesRef> collector =
+        new FirstPassGroupingCollector<>(new TermGroupSelector("group"), Sort.RELEVANCE, 10, true);
+    searcher.search(new MatchAllDocsQuery(), collector);
+    Collection<SearchGroup<BytesRef>> groups = collector.getTopGroups(0);
+
+    assertNull(groups); // Should return null when no groups found
+
+    reader.close();
+    dir.close();
+  }
+
   private void addGroupField(Document doc, String groupField, String value) {
     doc.add(new SortedDocValuesField(groupField, new BytesRef(value)));
   }