diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index bc5fd58775d1..9fd20d0b5111 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -76,6 +76,8 @@ Bug Fixes returns a string which can be parsed back into the original node. (Peter Barna, Adam Schwartz) +* GITHUB#15133: Take the FST suffix cache into account when reporting FST ramBytesUsed (Anh Dung Bui) + * GITHUB#14847: Allow Faiss vector format to index >2GB of vectors per-field per-segment by using MemorySegment APIs (instead of ByteBuffer) to copy bytes to native memory. (Kaival Parikh) diff --git a/lucene/core/src/java/org/apache/lucene/util/fst/FSTCompiler.java b/lucene/core/src/java/org/apache/lucene/util/fst/FSTCompiler.java index 43a3bfc62631..b99a80d09c92 100644 --- a/lucene/core/src/java/org/apache/lucene/util/fst/FSTCompiler.java +++ b/lucene/core/src/java/org/apache/lucene/util/fst/FSTCompiler.java @@ -1038,6 +1038,9 @@ public long fstRamBytesUsed() { if (dataOutput instanceof Accountable) { ramBytesUsed += ((Accountable) dataOutput).ramBytesUsed(); } + if (suffixDedupCache != null) { + ramBytesUsed += suffixDedupCache.ramBytesUsed(); + } return ramBytesUsed; } diff --git a/lucene/core/src/java/org/apache/lucene/util/fst/FSTSuffixNodeCache.java b/lucene/core/src/java/org/apache/lucene/util/fst/FSTSuffixNodeCache.java index f33f09e90723..8171dbd26c18 100644 --- a/lucene/core/src/java/org/apache/lucene/util/fst/FSTSuffixNodeCache.java +++ b/lucene/core/src/java/org/apache/lucene/util/fst/FSTSuffixNodeCache.java @@ -17,7 +17,9 @@ package org.apache.lucene.util.fst; import java.io.IOException; +import org.apache.lucene.util.Accountable; import org.apache.lucene.util.ByteBlockPool; +import org.apache.lucene.util.RamUsageEstimator; import org.apache.lucene.util.packed.PackedInts; import org.apache.lucene.util.packed.PagedGrowableWriter; @@ -48,7 +50,10 @@ * PagedGrowableWriter} to store the mapping, which allows efficient packing the hash & address long * values, and uses {@link ByteBlockPool} to store the actual node content (arcs & outputs). */ -final class FSTSuffixNodeCache { +final class FSTSuffixNodeCache implements Accountable { + + private static final long BASE_RAM_BYTES = + RamUsageEstimator.shallowSizeOfInstance(FSTSuffixNodeCache.class); // primary table -- we add nodes into this until it reaches the requested tableSizeLimit/2, then // we move it to fallback @@ -234,8 +239,18 @@ private long hash(FSTCompiler.UnCompiledNode node) { return h; } + @Override + public long ramBytesUsed() { + long ramBytesUsed = BASE_RAM_BYTES + primaryTable.ramBytesUsed(); + if (fallbackTable != null) { + ramBytesUsed += fallbackTable.ramBytesUsed(); + } + return ramBytesUsed; + } + /** Inner class because it needs access to hash function and FST bytes. */ - class PagedGrowableHash { + class PagedGrowableHash implements Accountable { + // storing the FST node address where the position is the masked hash of the node arcs private PagedGrowableWriter fstNodeAddress; // storing the local copiedNodes address in the same position as fstNodeAddress @@ -485,5 +500,12 @@ private FST.BytesReader getBytesReader(long nodeAddress, long hashSlot) { bytesReader.setPosDelta(nodeAddress - localAddress); return bytesReader; } + + @Override + public long ramBytesUsed() { + return copiedNodes.ramBytesUsed() + + fstNodeAddress.ramBytesUsed() + + copiedNodeAddress.ramBytesUsed(); + } } }