apache · romseygeek · Oct 6, 2025 · Sep 5, 2025 · Sep 9, 2025 · Sep 9, 2025
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
@@ -315,6 +315,8 @@ Optimizations
   and uses an optimized branchless approach. Any subclasses that have implemented the optimized method
   need to remove it as it will disappear in Lucene 11.  (Uwe Schindler)
 
+* GITHUB#15167: FirstPassGroupingCollector supports ignoring docs without group field (Binlong Gao)
+
 Changes in Runtime Behavior
 ---------------------
 * GITHUB#14823: Decrease TieredMergePolicy's default number of segments per

diff --git a/lucene/grouping/src/java/org/apache/lucene/search/grouping/FirstPassGroupingCollector.java b/lucene/grouping/src/java/org/apache/lucene/search/grouping/FirstPassGroupingCollector.java
@@ -32,6 +32,7 @@
 import org.apache.lucene.search.Sort;
 import org.apache.lucene.search.SortField;
 import org.apache.lucene.util.CollectionUtil;
+import org.apache.lucene.util.mutable.MutableValue;
 
 /**
  * FirstPassGroupingCollector is the first of two passes necessary to collect grouped hits. This
@@ -44,6 +45,7 @@
 public class FirstPassGroupingCollector<T> extends SimpleCollector {
 
   private final GroupSelector<T> groupSelector;
+  private final boolean ignoreDocsWithoutGroupField;
 
   private final FieldComparator<?>[] comparators;
   private final LeafFieldComparator[] leafComparators;
@@ -74,7 +76,28 @@ public class FirstPassGroupingCollector<T> extends SimpleCollector {
   @SuppressWarnings({"unchecked", "rawtypes"})
   public FirstPassGroupingCollector(
       GroupSelector<T> groupSelector, Sort groupSort, int topNGroups) {
+    this(groupSelector, groupSort, topNGroups, false);
+  }
+
+  /**
+   * Create the first pass collector with ignoreDocsWithoutGroupField
+   *
+   * @param groupSelector a GroupSelector used to defined groups
+   * @param groupSort The {@link Sort} used to sort the groups. The top sorted document within each
+   *     group according to groupSort, determines how that group sorts against other groups. This
+   *     must be non-null, ie, if you want to groupSort by relevance use Sort.RELEVANCE.
+   * @param topNGroups How many top groups to keep.
+   * @param ignoreDocsWithoutGroupField if true, ignore documents that don't have the group field
+   *     instead of putting them in a null group
+   */
+  @SuppressWarnings({"unchecked", "rawtypes"})
+  public FirstPassGroupingCollector(
+      GroupSelector<T> groupSelector,
+      Sort groupSort,
+      int topNGroups,
+      boolean ignoreDocsWithoutGroupField) {
     this.groupSelector = groupSelector;
+    this.ignoreDocsWithoutGroupField = ignoreDocsWithoutGroupField;
     if (topNGroups < 1) {
       throw new IllegalArgumentException("topNGroups must be >= 1 (got " + topNGroups + ")");
     }
@@ -198,12 +221,14 @@ public void collect(int doc) throws IOException {
       return;
     }
 
-    // TODO: should we add option to mean "ignore docs that
-    // don't have the group field" (instead of stuffing them
-    // under null group)?
     groupSelector.advanceTo(doc);
     T groupValue = groupSelector.currentValue();
 
+    // Skip documents without group field if option is enabled
+    if (ignoreDocsWithoutGroupField && isNullGroupValue(groupValue)) {
+      return;
+    }
+
     final CollectedSearchGroup<T> group = groupMap.get(groupValue);
 
     if (group == null) {
@@ -363,4 +388,15 @@ protected void doSetNextReader(LeafReaderContext readerContext) throws IOExcepti
   public GroupSelector<T> getGroupSelector() {
     return groupSelector;
   }
+
+  private boolean isNullGroupValue(T groupValue) {
+    if (groupValue == null) {
+      return true;
+    }
+    // For ValueSourceGroupSelector, check if MutableValue exists
+    if (groupValue instanceof MutableValue) {
+      return !((MutableValue) groupValue).exists();
+    }
+    return false;
+  }
 }
diff --git a/lucene/grouping/src/java/org/apache/lucene/search/grouping/GroupingSearch.java b/lucene/grouping/src/java/org/apache/lucene/search/grouping/GroupingSearch.java
@@ -56,6 +56,7 @@ public class GroupingSearch {
   private boolean cacheScores;
   private boolean allGroups;
   private boolean allGroupHeads;
+  private boolean ignoreDocsWithoutGroupField;
 
   private Collection<?> matchingGroups;
   private Bits matchingGroupHeads;
@@ -138,7 +139,7 @@ protected TopGroups groupByFieldOrFunction(
     int topN = groupOffset + groupLimit;
 
     final FirstPassGroupingCollector firstPassCollector =
-        new FirstPassGroupingCollector(grouper, groupSort, topN);
+        new FirstPassGroupingCollector(grouper, groupSort, topN, ignoreDocsWithoutGroupField);
     final AllGroupsCollector allGroupsCollector =
         allGroups ? new AllGroupsCollector(grouper) : null;
     final AllGroupHeadsCollector allGroupHeadsCollector =
@@ -358,4 +359,16 @@ public GroupingSearch setAllGroupHeads(boolean allGroupHeads) {
   public Bits getAllGroupHeads() {
     return matchingGroupHeads;
   }
+
+  /**
+   * Whether to ignore documents that don't have the group field instead of putting them in a null
+   * group.
+   *
+   * @param ignoreDocsWithoutGroupField Whether to ignore documents without group field
+   * @return <code>this</code>
+   */
+  public GroupingSearch setIgnoreDocsWithoutGroupField(boolean ignoreDocsWithoutGroupField) {
+    this.ignoreDocsWithoutGroupField = ignoreDocsWithoutGroupField;
+    return this;
+  }
 }
diff --git a/lucene/grouping/src/test/org/apache/lucene/search/grouping/BaseGroupSelectorTestCase.java b/lucene/grouping/src/test/org/apache/lucene/search/grouping/BaseGroupSelectorTestCase.java
@@ -379,6 +379,49 @@ private void indexRandomDocs(RandomIndexWriter w) throws IOException {
     }
   }
 
+  public void testIgnoreDocsWithoutGroupField() throws IOException {
+    Shard shard = new Shard();
+
+    // Add documents with group field
+    Document doc = new Document();
+    doc.add(new TextField("text", "foo", Field.Store.NO));
+    addGroupField(doc, 1);
+    shard.writer.addDocument(doc);
+
+    doc = new Document();
+    doc.add(new TextField("text", "foo", Field.Store.NO));
+    addGroupField(doc, 2);
+    shard.writer.addDocument(doc);
+
+    // Add document without group field
+    doc = new Document();
+    doc.add(new TextField("text", "foo", Field.Store.NO));
+    shard.writer.addDocument(doc);
+
+    IndexSearcher searcher = shard.getIndexSearcher();
+    Query query = new TermQuery(new Term("text", "foo"));
+
+    // Test default behavior (include null group)
+    GroupingSearch grouping1 = new GroupingSearch(getGroupSelector());
+    TopGroups<T> groups1 = grouping1.search(searcher, query, 0, 10);
+    int defaultGroupCount = groups1.groups.length;
+
+    // Test ignoring docs without group field
+    GroupingSearch grouping2 = new GroupingSearch(getGroupSelector());
+    grouping2.setIgnoreDocsWithoutGroupField(true);
+    TopGroups<T> groups2 = grouping2.search(searcher, query, 0, 10);
+    int ignoreGroupCount = groups2.groups.length;
+
+    assertTrue(
+        "Expected ignoreGroupCount <= defaultGroupCount, got "
+            + ignoreGroupCount
+            + " vs "
+            + defaultGroupCount,
+        ignoreGroupCount <= defaultGroupCount);
+
+    shard.close();
+  }
+
   private void assertSortsBefore(GroupDocs<T> first, GroupDocs<T> second) {
     Object[] groupSortValues = second.groupSortValues();
     Object[] prevSortValues = first.groupSortValues();

diff --git a/lucene/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java b/lucene/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java
@@ -49,6 +49,7 @@
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.search.FieldDoc;
 import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.MatchAllDocsQuery;
 import org.apache.lucene.search.MultiCollector;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.ScoreDoc;
@@ -193,6 +194,84 @@ public void testBasic() throws Exception {
     dir.close();
   }
 
+  public void testIgnoreDocsWithoutGroupField() throws IOException {
+    Directory dir = newDirectory();
+    RandomIndexWriter w =
+        new RandomIndexWriter(random(), dir, newIndexWriterConfig(new MockAnalyzer(random())));
+
+    String groupField = "group";
+    // Add documents with group field
+    Document doc = new Document();
+    addGroupField(doc, groupField, "group1");
+    // doc.add(new SortedDocValuesField("group", new BytesRef("group1")));
+    doc.add(new TextField("content", "test", Field.Store.YES));
+    w.addDocument(doc);
+
+    doc = new Document();
+    addGroupField(doc, groupField, "group2");
+    doc.add(new TextField("content", "test", Field.Store.YES));
+    w.addDocument(doc);
+
+    // Add document without group field
+    doc = new Document();
+    doc.add(new TextField("content", "test", Field.Store.YES));
+    w.addDocument(doc);
+
+    DirectoryReader reader = w.getReader();
+    w.close();
+
+    IndexSearcher searcher = newSearcher(reader);
+
+    // Test default behavior (include null group)
+    FirstPassGroupingCollector<BytesRef> collector1 =
+        new FirstPassGroupingCollector<>(new TermGroupSelector(groupField), Sort.RELEVANCE, 10);
+    searcher.search(new MatchAllDocsQuery(), collector1);
+    Collection<SearchGroup<BytesRef>> groups1 = collector1.getTopGroups(0);
+
+    assertEquals(3, groups1.size()); // Should include null group
+
+    // Test ignoring docs without group field
+    FirstPassGroupingCollector<BytesRef> collector2 =
+        new FirstPassGroupingCollector<>(
+            new TermGroupSelector(groupField), Sort.RELEVANCE, 10, true);
+    searcher.search(new MatchAllDocsQuery(), collector2);
+    Collection<SearchGroup<BytesRef>> groups2 = collector2.getTopGroups(0);
+
+    assertEquals(2, groups2.size()); // Should exclude null group
+
+    reader.close();
+    dir.close();
+  }
+
+  public void testAllDocsWithoutGroupField() throws IOException {
+    Directory dir = newDirectory();
+    RandomIndexWriter w =
+        new RandomIndexWriter(random(), dir, newIndexWriterConfig(new MockAnalyzer(random())));
+
+    // Add documents without group field
+    for (int i = 0; i < 5; i++) {
+      Document doc = new Document();
+      doc.add(new TextField("content", "test", Field.Store.YES));
+      w.addDocument(doc);
+    }
+
+    DirectoryReader reader = w.getReader();
+    w.close();
+
+    IndexSearcher searcher = newSearcher(reader);
+
+    // Test ignoring docs without group field when all docs lack the field
+    FirstPassGroupingCollector<BytesRef> collector =
+        new FirstPassGroupingCollector<>(new TermGroupSelector("group"), Sort.RELEVANCE, 10, true);
+    searcher.search(new MatchAllDocsQuery(), collector);
+    Collection<SearchGroup<BytesRef>> groups = collector.getTopGroups(0);
+
+    assertNull(groups); // Should return null when no groups found
+
+    reader.close();
+    dir.close();
+  }
+
   private void addGroupField(Document doc, String groupField, String value) {
     doc.add(new SortedDocValuesField(groupField, new BytesRef(value)));
   }