Implement Indexed Priority Queue to improve grouping performance (#251)

dilakshan · dilakshan · commit a65e75dbe269 · 2025-04-13T17:46:54.000Z
Signed-off-by: dilakshan &lt;mdilakshan@isa.ae&gt;
diff --git a/src/main/java/org/opensearch/plugin/insights/core/service/TopQueriesService.java b/src/main/java/org/opensearch/plugin/insights/core/service/TopQueriesService.java
@@ -28,7 +28,6 @@
 import java.util.Locale;
 import java.util.Map;
 import java.util.Objects;
-import java.util.concurrent.PriorityBlockingQueue;
 import java.util.concurrent.atomic.AtomicReference;
 import java.util.function.Predicate;
 import java.util.stream.Collectors;
@@ -46,6 +45,7 @@
 import org.opensearch.plugin.insights.core.service.grouper.MinMaxHeapQueryGrouper;
 import org.opensearch.plugin.insights.core.service.grouper.QueryGrouper;
 import org.opensearch.plugin.insights.core.utils.ExporterReaderUtils;
+import org.opensearch.plugin.insights.core.utils.IndexedPriorityQueue;
 import org.opensearch.plugin.insights.rules.model.AggregationType;
 import org.opensearch.plugin.insights.rules.model.Attribute;
 import org.opensearch.plugin.insights.rules.model.GroupingType;
@@ -99,7 +99,7 @@ public class TopQueriesService {
     /**
      * The internal thread-safe store that holds the top n queries insight data
      */
-    private final PriorityBlockingQueue<SearchQueryRecord> topQueriesStore;
+    private final IndexedPriorityQueue<SearchQueryRecord> topQueriesStore;
 
     /**
      * The AtomicReference of a snapshot of the current window top queries for getters to consume
@@ -144,7 +144,7 @@ public class TopQueriesService {
         this.topNSize = QueryInsightsSettings.DEFAULT_TOP_N_SIZE;
         this.windowSize = QueryInsightsSettings.DEFAULT_WINDOW_SIZE;
         this.windowStart = -1L;
-        topQueriesStore = new PriorityBlockingQueue<>(topNSize, (a, b) -> SearchQueryRecord.compare(a, b, metricType));
+        topQueriesStore = new IndexedPriorityQueue<>(topNSize, (a, b) -> SearchQueryRecord.compare(a, b, metricType));
         topQueriesCurrentSnapshot = new AtomicReference<>(new ArrayList<>());
         topQueriesHistorySnapshot = new AtomicReference<>(new ArrayList<>());
         queryGrouper = new MinMaxHeapQueryGrouper(
@@ -411,8 +411,7 @@ void consumeRecords(final List<SearchQueryRecord> records) {
         // add records in current window, if there are any, to the top n store
         addToTopNStore(recordsInThisWindow);
         // update the current window snapshot for getters to consume
-        final List<SearchQueryRecord> newSnapShot = new ArrayList<>(topQueriesStore);
-        newSnapShot.sort((a, b) -> SearchQueryRecord.compare(a, b, metricType));
+        final List<SearchQueryRecord> newSnapShot = topQueriesStore.toSortedList();
         topQueriesCurrentSnapshot.set(newSnapShot);
     }
 
@@ -422,8 +421,9 @@ private void addToTopNStore(final List<SearchQueryRecord> records) {
                 queryGrouper.add(record);
             }
         } else {
-            topQueriesStore.addAll(records);
-            // remove top elements for fix sizing priority queue
+            for (SearchQueryRecord record : records) {
+                topQueriesStore.insert(record);
+            }
             while (topQueriesStore.size() > topNSize) {
                 topQueriesStore.poll();
             }
@@ -442,7 +442,7 @@ private void rotateWindowIfNecessary(final long newWindowStart) {
             final List<SearchQueryRecord> history = new ArrayList<>();
             // rotate the current window to history store only if the data belongs to the last window
             if (windowStart == newWindowStart - windowSize.getMillis()) {
-                history.addAll(topQueriesStore);
+                history.addAll(topQueriesStore.getAllElements());
             }
             topQueriesHistorySnapshot.set(history);
             topQueriesStore.clear();
diff --git a/src/main/java/org/opensearch/plugin/insights/core/service/grouper/MinMaxHeapQueryGrouper.java b/src/main/java/org/opensearch/plugin/insights/core/service/grouper/MinMaxHeapQueryGrouper.java
@@ -11,10 +11,10 @@
 import static org.opensearch.plugin.insights.settings.QueryInsightsSettings.TOP_N_QUERIES_MAX_GROUPS_EXCLUDING_N;
 
 import java.util.concurrent.ConcurrentHashMap;
-import java.util.concurrent.PriorityBlockingQueue;
 import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.Logger;
 import org.opensearch.common.collect.Tuple;
+import org.opensearch.plugin.insights.core.utils.IndexedPriorityQueue;
 import org.opensearch.plugin.insights.rules.model.AggregationType;
 import org.opensearch.plugin.insights.rules.model.Attribute;
 import org.opensearch.plugin.insights.rules.model.GroupingType;
@@ -58,14 +58,14 @@ public class MinMaxHeapQueryGrouper implements QueryGrouper {
     /**
      * Min heap to keep track of the Top N query groups and is passed from TopQueriesService as the topQueriesStore
      */
-    private final PriorityBlockingQueue<SearchQueryRecord> minHeapTopQueriesStore;
+    private final IndexedPriorityQueue<SearchQueryRecord> minHeapTopQueriesStore;
     /**
      * The Max heap is an overflow data structure used to manage records that exceed the capacity of the Min heap.
      * It stores all records not included in the Top N query results. When the aggregate measurement for one of these
      * records is updated and it now qualifies as part of the Top N, the record is moved from the Max heap to the Min heap,
      * and the records are rearranged accordingly.
      */
-    private final PriorityBlockingQueue<SearchQueryRecord> maxHeapQueryStore;
+    private final IndexedPriorityQueue<SearchQueryRecord> maxHeapQueryStore;
 
     /**
      * Top N size based on the configuration set
@@ -84,7 +84,7 @@ public MinMaxHeapQueryGrouper(
         final MetricType metricType,
         final GroupingType groupingType,
         final AggregationType aggregationType,
-        final PriorityBlockingQueue<SearchQueryRecord> topQueriesStore,
+        final IndexedPriorityQueue<SearchQueryRecord> topQueriesStore,
         final int topNSize
     ) {
         this.groupingType = groupingType;
@@ -94,7 +94,7 @@ public MinMaxHeapQueryGrouper(
         this.minHeapTopQueriesStore = topQueriesStore;
         this.topNSize = topNSize;
         this.maxGroups = QueryInsightsSettings.DEFAULT_GROUPS_EXCLUDING_TOPN_LIMIT;
-        this.maxHeapQueryStore = new PriorityBlockingQueue<>(maxGroups, (a, b) -> SearchQueryRecord.compare(b, a, metricType));
+        this.maxHeapQueryStore = new IndexedPriorityQueue<>(maxGroups, (a, b) -> SearchQueryRecord.compare(b, a, metricType));
     }
 
     /**
@@ -207,7 +207,7 @@ public void updateTopNSize(final int newSize) {
     }
 
     private void addToMinPQ(final SearchQueryRecord searchQueryRecord, final String groupId) {
-        minHeapTopQueriesStore.add(searchQueryRecord);
+        minHeapTopQueriesStore.insert(searchQueryRecord);
         groupIdToAggSearchQueryRecord.put(groupId, new Tuple<>(searchQueryRecord, true));
         overflow();
     }
@@ -232,7 +232,7 @@ private void addAndPromote(
     private void overflow() {
         if (minHeapTopQueriesStore.size() > topNSize) {
             SearchQueryRecord recordMovedFromMinToMax = minHeapTopQueriesStore.poll();
-            maxHeapQueryStore.add(recordMovedFromMinToMax);
+            maxHeapQueryStore.insert(recordMovedFromMinToMax);
             groupIdToAggSearchQueryRecord.put(recordMovedFromMinToMax.getGroupingId(), new Tuple<>(recordMovedFromMinToMax, false));
         }
     }
diff --git a/src/main/java/org/opensearch/plugin/insights/core/utils/IndexedPriorityQueue.java b/src/main/java/org/opensearch/plugin/insights/core/utils/IndexedPriorityQueue.java
@@ -0,0 +1,127 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.plugin.insights.core.utils;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+public class IndexedPriorityQueue<T> {
+
+    private final List<T> heap;
+    private final Map<T, Integer> indexMap;
+    private final Comparator<? super T> comparator;
+
+    public IndexedPriorityQueue(int initialCapacity, Comparator<? super T> comparator) {
+        this.comparator = comparator;
+        this.heap = new ArrayList<>(initialCapacity);
+        this.indexMap = new HashMap<>(initialCapacity);
+    }
+
+    public synchronized boolean insert(T item) {
+        if (indexMap.containsKey(item)) return false;
+        heap.add(item);
+        int idx = heap.size() - 1;
+        indexMap.put(item, idx);
+        siftUp(idx);
+        return true;
+    }
+
+    public synchronized boolean remove(T item) {
+        Integer idx = indexMap.remove(item);
+        if (idx == null) return false;
+
+        int lastIdx = heap.size() - 1;
+        if (idx != lastIdx) {
+            T lastItem = heap.get(lastIdx);
+            heap.set(idx, lastItem);
+            indexMap.put(lastItem, idx);
+            siftDown(idx);
+            siftUp(idx);
+        }
+        heap.remove(lastIdx);
+        return true;
+    }
+
+    public synchronized Collection<T> getAllElements() {
+        return new ArrayList<>(heap);
+    }
+
+    public synchronized T poll() {
+        if (heap.isEmpty()) return null;
+        T result = heap.get(0);
+        remove(result);
+        return result;
+    }
+
+    public synchronized T peek() {
+        return heap.isEmpty() ? null : heap.get(0);
+    }
+
+    public synchronized boolean contains(T item) {
+        return indexMap.containsKey(item);
+    }
+
+    public synchronized int size() {
+        return heap.size();
+    }
+
+    public synchronized void clear() {
+        heap.clear();
+        indexMap.clear();
+    }
+
+    public synchronized List<T> toSortedList() {
+        List<T> sorted = new ArrayList<>(heap);
+        sorted.sort(comparator);
+        return sorted;
+    }
+
+    private void siftUp(int idx) {
+        T item = heap.get(idx);
+        while (idx > 0) {
+            int parentIdx = (idx - 1) >>> 1;
+            T parent = heap.get(parentIdx);
+            if (comparator.compare(item, parent) >= 0) break;
+            heap.set(idx, parent);
+            indexMap.put(parent, idx);
+            idx = parentIdx;
+        }
+        heap.set(idx, item);
+        indexMap.put(item, idx);
+    }
+
+    private void siftDown(int idx) {
+        int half = heap.size() >>> 1;
+        T item = heap.get(idx);
+        while (idx < half) {
+            int left = (idx << 1) + 1;
+            int right = left + 1;
+            int smallest = left;
+
+            if (right < heap.size() && comparator.compare(heap.get(right), heap.get(left)) < 0) smallest = right;
+
+            T smallestItem = heap.get(smallest);
+            if (comparator.compare(item, smallestItem) <= 0) break;
+
+            heap.set(idx, smallestItem);
+            indexMap.put(smallestItem, idx);
+            idx = smallest;
+        }
+        heap.set(idx, item);
+        indexMap.put(item, idx);
+    }
+
+    public boolean isEmpty() {
+        return heap.isEmpty();
+    }
+}
diff --git a/src/test/java/org/opensearch/plugin/insights/core/service/grouper/MinMaxHeapQueryGrouperTests.java b/src/test/java/org/opensearch/plugin/insights/core/service/grouper/MinMaxHeapQueryGrouperTests.java
@@ -11,9 +11,9 @@
 import java.util.HashSet;
 import java.util.List;
 import java.util.Set;
-import java.util.concurrent.PriorityBlockingQueue;
 import org.junit.Before;
 import org.opensearch.plugin.insights.QueryInsightsTestUtils;
+import org.opensearch.plugin.insights.core.utils.IndexedPriorityQueue;
 import org.opensearch.plugin.insights.rules.model.AggregationType;
 import org.opensearch.plugin.insights.rules.model.Attribute;
 import org.opensearch.plugin.insights.rules.model.GroupingType;
@@ -27,7 +27,7 @@
  */
 public class MinMaxHeapQueryGrouperTests extends OpenSearchTestCase {
     private MinMaxHeapQueryGrouper minMaxHeapQueryGrouper;
-    private PriorityBlockingQueue<SearchQueryRecord> topQueriesStore = new PriorityBlockingQueue<>(
+    private IndexedPriorityQueue<SearchQueryRecord> topQueriesStore = new IndexedPriorityQueue<>(
         100,
         (a, b) -> SearchQueryRecord.compare(a, b, MetricType.LATENCY)
     );