apache · sgup432 · Aug 26, 2025 · Aug 26, 2025 · Sep 18, 2025 · Sep 18, 2025
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
@@ -56,6 +56,8 @@ Improvements
   SIGSEGV with the old unsafe "mmap hack", which has been replaced by MemorySegments.
   (Uwe Schindler, Robert Muir)
 
+* GITHUB#15124: Use RamUsageEstimator to calculate size for non-accountable queries. (Sagar Upadhyaya)
+
 Optimizations
 ---------------------
 * GITHUB#14011: Reduce allocation rate in HNSW concurrent merge. (Viliam Durina)

diff --git a/lucene/core/src/java/org/apache/lucene/search/BooleanClause.java b/lucene/core/src/java/org/apache/lucene/search/BooleanClause.java
@@ -65,6 +65,12 @@ public String toString() {
         return "-";
       }
     };
+
+    private static final Occur[] VALUES = values();
+
+    public static Occur[] cachedValues() {
+      return VALUES;
+    }
   }
 
   /** Constructs a BooleanClause. */

diff --git a/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java b/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java
@@ -126,6 +126,10 @@ public BooleanQuery build() {
   private final List<BooleanClause> clauses; // used for toString() and getClauses()
   // WARNING: Do not let clauseSets escape from this class as it breaks immutability:
   private final Map<Occur, Collection<Query>> clauseSets; // used for equals/hashCode
+  private final List<Query> mustClauseQueries;
+  private final List<Query> mustNotClauseQueries;
+  private final List<Query> filterClauseQueries;
+  private final List<Query> shouldClauseQueries;
 
   private BooleanQuery(int minimumNumberShouldMatch, BooleanClause[] clauses) {
     this.minimumNumberShouldMatch = minimumNumberShouldMatch;
@@ -137,9 +141,36 @@ private BooleanQuery(int minimumNumberShouldMatch, BooleanClause[] clauses) {
     // but not for FILTER and MUST_NOT
     clauseSets.put(Occur.FILTER, new HashSet<>());
     clauseSets.put(Occur.MUST_NOT, new HashSet<>());
+    // We store the queries per clauses in a list beforehand. As otherwise during repeated visit()
+    // calls(like in QueryCache), we need to iterate over clauseSets.keySet() which allocates
+    // HashMap$HashSetIterator.<init>
+    // for every call causing some performance impact
+
+    List<Query> mustClauseQueries = new ArrayList<>();
+    List<Query> mustNotClauseQueries = new ArrayList<>();
+    List<Query> filterClauseQueries = new ArrayList<>();
+    List<Query> shouldClauseQueries = new ArrayList<>();
     for (BooleanClause clause : clauses) {
+      switch (clause.occur()) {
+        case MUST:
+          mustClauseQueries.add(clause.query());
+          break;
+        case FILTER:
+          filterClauseQueries.add(clause.query());
+          break;
+        case SHOULD:
+          shouldClauseQueries.add(clause.query());
+          break;
+        case MUST_NOT:
+          mustNotClauseQueries.add(clause.query());
+          break;
+      }
       clauseSets.get(clause.occur()).add(clause.query());
     }
+    this.mustClauseQueries = List.copyOf(mustClauseQueries);
+    this.mustNotClauseQueries = List.copyOf(mustNotClauseQueries);
+    this.filterClauseQueries = List.copyOf(filterClauseQueries);
+    this.shouldClauseQueries = List.copyOf(shouldClauseQueries);
   }
 
   /** Gets the minimum number of the optional BooleanClauses which must be satisfied. */
@@ -649,15 +680,23 @@ public Query rewrite(IndexSearcher indexSearcher) throws IOException {
   @Override
   public void visit(QueryVisitor visitor) {
     QueryVisitor sub = visitor.getSubVisitor(Occur.MUST, this);
-    for (BooleanClause.Occur occur : clauseSets.keySet()) {
+    for (Occur occur : Occur.cachedValues()) {
       if (clauseSets.get(occur).size() > 0) {
         if (occur == Occur.MUST) {
-          for (Query q : clauseSets.get(occur)) {
+          for (Query q : mustClauseQueries) {
             q.visit(sub);
           }
         } else {
           QueryVisitor v = sub.getSubVisitor(occur, this);
-          for (Query q : clauseSets.get(occur)) {
+          List<Query> queryList;
+          queryList =
+              switch (occur) {
+                case FILTER -> filterClauseQueries;
+                case SHOULD -> shouldClauseQueries;
+                case MUST_NOT -> mustNotClauseQueries;
+                case MUST -> mustClauseQueries;
+              };
+          for (Query q : queryList) {
             q.visit(v);
           }
         }

diff --git a/lucene/core/src/java/org/apache/lucene/search/LRUQueryCache.java b/lucene/core/src/java/org/apache/lucene/search/LRUQueryCache.java
@@ -18,7 +18,6 @@
 
 import static org.apache.lucene.util.RamUsageEstimator.HASHTABLE_RAM_BYTES_PER_ENTRY;
 import static org.apache.lucene.util.RamUsageEstimator.LINKED_HASHTABLE_RAM_BYTES_PER_ENTRY;
-import static org.apache.lucene.util.RamUsageEstimator.QUERY_DEFAULT_RAM_BYTES_USED;
 
 import java.io.IOException;
 import java.util.ArrayList;
@@ -91,7 +90,7 @@ public class LRUQueryCache implements QueryCache, Accountable {
   private final Predicate<LeafReaderContext> leavesToCache;
   // maps queries that are contained in the cache to a singleton so that this
   // cache does not store several copies of the same query
-  private final Map<Query, Query> uniqueQueries;
+  private final Map<Query, Record> uniqueQueries;
   // The contract between this set and the per-leaf caches is that per-leaf caches
   // are only allowed to store sub-sets of the queries that are contained in
   // mostRecentlyUsedQueries. This is why write operations are performed under a lock
@@ -176,6 +175,11 @@ public LRUQueryCache(int maxSize, long maxRamBytesUsed) {
     this(maxSize, maxRamBytesUsed, new MinSegmentSizePredicate(10000), 10);
   }
 
+  // pkg-private for testing
+  Map<Query, Record> getUniqueQueries() {
+    return uniqueQueries;
+  }
+
   // pkg-private for testing
   static class MinSegmentSizePredicate implements Predicate<LeafReaderContext> {
     private final int minSize;
@@ -300,32 +304,35 @@ CacheAndCount get(Query key, IndexReader.CacheHelper cacheHelper) {
       return null;
     }
     // this get call moves the query to the most-recently-used position
-    final Query singleton = uniqueQueries.get(key);
-    if (singleton == null) {
+    final Record record = uniqueQueries.get(key);
+    if (record == null || record.query == null) {
       onMiss(readerKey, key);
       return null;
     }
-    final CacheAndCount cached = leafCache.get(singleton);
+    final CacheAndCount cached = leafCache.get(record.query);
     if (cached == null) {
-      onMiss(readerKey, singleton);
+      onMiss(readerKey, record.query);
     } else {
-      onHit(readerKey, singleton);
+      onHit(readerKey, record.query);
     }
     return cached;
   }
 
-  private void putIfAbsent(Query query, CacheAndCount cached, IndexReader.CacheHelper cacheHelper) {
+  public void putIfAbsent(Query query, CacheAndCount cached, IndexReader.CacheHelper cacheHelper) {
     assert query instanceof BoostQuery == false;
     assert query instanceof ConstantScoreQuery == false;
     // under a lock to make sure that mostRecentlyUsedQueries and cache remain sync'ed
     writeLock.lock();
     try {
-      Query singleton = uniqueQueries.putIfAbsent(query, query);
-      if (singleton == null) {
-        onQueryCache(query, getRamBytesUsed(query));
-      } else {
-        query = singleton;
-      }
+      Record record =
+          uniqueQueries.computeIfAbsent(
+              query,
+              q -> {
+                long queryRamBytesUsed = getRamBytesUsed(q);
+                onQueryCache(q, queryRamBytesUsed);
+                return new Record(q, queryRamBytesUsed);
+              });
+      query = record.query;
       final IndexReader.CacheKey key = cacheHelper.getKey();
       LeafCache leafCache = cache.get(key);
       if (leafCache == null) {
@@ -347,25 +354,24 @@ private void evictIfNecessary() {
     assert writeLock.isHeldByCurrentThread();
     // under a lock to make sure that mostRecentlyUsedQueries and cache keep sync'ed
     if (requiresEviction()) {
-
-      Iterator<Query> iterator = mostRecentlyUsedQueries.iterator();
+      Iterator<Map.Entry<Query, Record>> iterator = uniqueQueries.entrySet().iterator();
       do {
-        final Query query = iterator.next();
-        final int size = mostRecentlyUsedQueries.size();
+        final Map.Entry<Query, Record> entry = iterator.next();
+        final int size = uniqueQueries.size();
         iterator.remove();
-        if (size == mostRecentlyUsedQueries.size()) {
+        if (size == uniqueQueries.size()) {
           // size did not decrease, because the hash of the query changed since it has been
           // put into the cache
           throw new ConcurrentModificationException(
               "Removal from the cache failed! This "
                   + "is probably due to a query which has been modified after having been put into "
                   + " the cache or a badly implemented clone(). Query class: ["
-                  + query.getClass()
+                  + entry.getKey().getClass()
                   + "], query: ["
-                  + query
+                  + entry.getKey()
                   + "]");
         }
-        onEviction(query);
+        onEviction(entry.getKey(), entry.getValue().queryRamBytesUsed);
       } while (iterator.hasNext() && requiresEviction());
     }
   }
@@ -394,18 +400,18 @@ public void clearCoreCacheKey(Object coreKey) {
   public void clearQuery(Query query) {
     writeLock.lock();
     try {
-      final Query singleton = uniqueQueries.remove(query);
-      if (singleton != null) {
-        onEviction(singleton);
+      final Record record = uniqueQueries.remove(query);
+      if (record != null && record.query != null) {
+        onEviction(record.query, record.queryRamBytesUsed);
       }
     } finally {
       writeLock.unlock();
     }
   }
 
-  private void onEviction(Query singleton) {
+  private void onEviction(Query singleton, long querySizeInBytes) {
     assert writeLock.isHeldByCurrentThread();
-    onQueryEviction(singleton, getRamBytesUsed(singleton));
+    onQueryEviction(singleton, querySizeInBytes);
     for (LeafCache leafCache : cache.values()) {
       leafCache.remove(singleton);
     }
@@ -426,10 +432,9 @@ public void clear() {
   }
 
   private static long getRamBytesUsed(Query query) {
-    return LINKED_HASHTABLE_RAM_BYTES_PER_ENTRY
-        + (query instanceof Accountable accountableQuery
-            ? accountableQuery.ramBytesUsed()
-            : QUERY_DEFAULT_RAM_BYTES_USED);
+    // Here 32 represents a rough shallow size for a query object
+    long queryRamBytesUsed = RamUsageEstimator.sizeOf(query, 32);
+    return LINKED_HASHTABLE_RAM_BYTES_PER_ENTRY + queryRamBytesUsed;
   }
 
   // pkg-private for testing
@@ -705,6 +710,17 @@ public long ramBytesUsed() {
     }
   }
 
+  // pkg-private for testing
+  static class Record {
+    final Query query;
+    final long queryRamBytesUsed;
+
+    Record(Query query, long queryRamBytesUsed) {
+      this.query = query;
+      this.queryRamBytesUsed = queryRamBytesUsed;
+    }
+  }
+
   private class CachingWrapperWeight extends ConstantScoreWeight {
 
     private final Weight in;

diff --git a/lucene/core/src/test/org/apache/lucene/search/TestLRUQueryCache.java b/lucene/core/src/test/org/apache/lucene/search/TestLRUQueryCache.java
@@ -74,6 +74,7 @@
 import org.apache.lucene.util.Accountable;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.RamUsageEstimator;
 import org.apache.lucene.util.SuppressForbidden;
 
 public class TestLRUQueryCache extends LuceneTestCase {
@@ -1062,6 +1063,65 @@ public void testBooleanQueryCachesSubClauses() throws IOException {
     dir.close();
   }
 
+  public void testQuerySizeBytesAreCached() throws IOException {
+    try (Directory dir = newDirectory();
+        RandomIndexWriter w = new RandomIndexWriter(random(), dir)) {
+      Document doc = new Document();
+      doc.add(new StringField("foo", "bar", Store.YES));
+      doc.add(new StringField("foo", "quux", Store.YES));
+      w.addDocument(doc);
+      w.commit();
+      final IndexReader reader = w.getReader();
+      final IndexSearcher searcher = newSearcher(reader);
+      final LRUQueryCache queryCache =
+          new LRUQueryCache(1000000, 10000000, _ -> true, Float.POSITIVE_INFINITY);
+      searcher.setQueryCache(queryCache);
+      searcher.setQueryCachingPolicy(ALWAYS_CACHE);
+      StringBuilder sb = new StringBuilder();
+      sb.append("a".repeat(100));
+      String term = sb.toString();
+      TermQuery termQuery = new TermQuery(new Term("foo", term));
+      long expectedQueryInBytes =
+          LINKED_HASHTABLE_RAM_BYTES_PER_ENTRY + RamUsageEstimator.sizeOf(termQuery, 32);
+      searcher.search(new ConstantScoreQuery(termQuery), 1);
+
+      assertEquals(1, queryCache.getUniqueQueries().size());
+      assertEquals(
+          expectedQueryInBytes, queryCache.getUniqueQueries().get(termQuery).queryRamBytesUsed);
+      reader.close();
+    }
+  }
+
+  public void testCacheRamBytesWithALargeTermQuery() throws IOException {
+    try (Directory dir = newDirectory();
+        RandomIndexWriter w = new RandomIndexWriter(random(), dir)) {
+      Document doc = new Document();
+      doc.add(new StringField("foo", "bar", Store.YES));
+      doc.add(new StringField("foo", "quux", Store.YES));
+      w.addDocument(doc);
+      w.commit();
+      final IndexReader reader = w.getReader();
+      final IndexSearcher searcher = newSearcher(reader);
+      final LRUQueryCache queryCache =
+          new LRUQueryCache(1000000, 10000000, _ -> true, Float.POSITIVE_INFINITY);
+      searcher.setQueryCache(queryCache);
+      searcher.setQueryCachingPolicy(ALWAYS_CACHE);
+      StringBuilder sb = new StringBuilder();
+      // Create a large string for the field value so it certainly exceeds the default query size we
+      // use ie 1024 bytes.
+      sb.append("a".repeat(1200));
+      String longTerm = sb.toString();
+      TermQuery must = new TermQuery(new Term("foo", longTerm));
+      long queryInBytes = RamUsageEstimator.sizeOf(must, 32);
+      assertTrue(queryInBytes > QUERY_DEFAULT_RAM_BYTES_USED);
+      searcher.search(new ConstantScoreQuery(must), 1);
+
+      assertEquals(1, queryCache.cachedQueries().size());
+      assertTrue(queryCache.ramBytesUsed() >= queryInBytes);
+      reader.close();
+    }
+  }
+
   private static Term randomTerm() {
     final String term = RandomPicks.randomFrom(random(), Arrays.asList("foo", "bar", "baz"));
     return new Term("foo", term);