@@ -6,13 +6,13 @@ package utils
6
6
7
7
import (
8
8
"fmt"
9
+ "github.com/hashicorp/golang-lru/v2/expirable"
9
10
"net/http"
10
11
"net/url"
11
12
"regexp"
12
13
"strconv"
13
14
"time"
14
15
15
- lru "github.com/hashicorp/golang-lru"
16
16
"github.com/prometheus/client_golang/prometheus"
17
17
"github.com/prometheus/client_golang/prometheus/promauto"
18
18
)
@@ -25,39 +25,35 @@ var (
25
25
type FailedQueryCache struct {
26
26
regex * regexp.Regexp
27
27
errorExtract * regexp.Regexp
28
- lruCache * lru. Cache
28
+ lruCache * expirable. LRU [ string , int ]
29
29
cachedHits prometheus.Counter
30
30
cachedQueries prometheus.Gauge
31
31
}
32
32
33
- func NewFailedQueryCache (capacity int , reg prometheus.Registerer ) ( * FailedQueryCache , error ) {
33
+ func NewFailedQueryCache (capacity int , ttlDuration time. Duration , reg prometheus.Registerer ) * FailedQueryCache {
34
34
regex := regexp .MustCompile (`[\s\n\t]+` )
35
35
errorExtract := regexp .MustCompile (`Code\((\d+)\)` )
36
- lruCache , err := lru .New (capacity )
37
- if err != nil {
38
- lruCache = nil
39
- err = fmt .Errorf ("failed to create lru cache: %s" , err )
40
- return nil , err
41
- }
36
+ lruCacheWithTTL := expirable .NewLRU [string , int ](capacity , nil , ttlDuration )
37
+
42
38
cachedHits := promauto .With (reg ).NewCounter (prometheus.CounterOpts {
43
39
Namespace : "cortex" ,
44
- Name : "cached_failed_queries_count" ,
45
- Help : "Total number of queries that hit the failed query cache." ,
40
+ Name : "cached_failed_queries_count" ,
41
+ Help : "Total number of queries that hit the failed query cache." ,
46
42
})
47
43
cachedQueries := promauto .With (reg ).NewGauge (prometheus.GaugeOpts {
48
44
Namespace : "cortex" ,
49
- Name : "failed_query_cache_size" ,
50
- Help : "How many queries are cached in the failed query cache." ,
45
+ Name : "failed_query_cache_size" ,
46
+ Help : "How many queries are cached in the failed query cache." ,
51
47
})
52
48
cachedQueries .Set (0 )
53
49
54
50
return & FailedQueryCache {
55
51
regex : regex ,
56
52
errorExtract : errorExtract ,
57
- lruCache : lruCache ,
53
+ lruCache : lruCacheWithTTL ,
58
54
cachedHits : cachedHits ,
59
55
cachedQueries : cachedQueries ,
60
- }, err
56
+ }
61
57
}
62
58
63
59
// UpdateFailedQueryCache returns true if query is cached so that callsite can increase counter, returns message as a string for callsite to log outcome
@@ -92,19 +88,20 @@ func (f *FailedQueryCache) updateFailedQueryCache(err error, queryExpressionNorm
92
88
93
89
func (f * FailedQueryCache ) addCacheEntry (queryExpressionNormalized string , queryExpressionRangeLength int ) {
94
90
// Checks if queryExpression is already in cache, and updates time range length value to min of stored and new value.
95
- if contains , _ := f .lruCache .ContainsOrAdd (queryExpressionNormalized , queryExpressionRangeLength ); contains {
91
+ if contains := f .lruCache .Contains (queryExpressionNormalized ); contains {
96
92
if oldValue , ok := f .lruCache .Get (queryExpressionNormalized ); ok {
97
- queryExpressionRangeLength = min (queryExpressionRangeLength , oldValue .( int ) )
93
+ queryExpressionRangeLength = min (queryExpressionRangeLength , oldValue )
98
94
}
99
- f .lruCache .Add (queryExpressionNormalized , queryExpressionRangeLength )
100
95
}
96
+ f .lruCache .Add (queryExpressionNormalized , queryExpressionRangeLength )
97
+
101
98
f .cachedQueries .Set (float64 (f .lruCache .Len ()))
102
99
}
103
100
104
101
// QueryHitCache checks if the lru cache is hit and returns whether to increment counter for cache hits along with appropriate message.
105
- func queryHitCache (queryExpressionNormalized string , queryExpressionRangeLength int , lruCache * lru. Cache , cachedHits prometheus.Counter ) (bool , string ) {
106
- if value , ok := lruCache .Get (queryExpressionNormalized ); ok && value .( int ) <= queryExpressionRangeLength {
107
- cachedQueryRangeSeconds := value .( int )
102
+ func queryHitCache (queryExpressionNormalized string , queryExpressionRangeLength int , lruCache * expirable. LRU [ string , int ] , cachedHits prometheus.Counter ) (bool , string ) {
103
+ if value , ok := lruCache .Get (queryExpressionNormalized ); ok && value <= queryExpressionRangeLength {
104
+ cachedQueryRangeSeconds := value
108
105
message := createLogMessage ("Retrieved query from cache" , queryExpressionNormalized , cachedQueryRangeSeconds , queryExpressionRangeLength , nil )
109
106
cachedHits .Inc ()
110
107
return true , message
@@ -159,7 +156,7 @@ func (f *FailedQueryCache) UpdateFailedQueryCache(err error, query url.Values, q
159
156
queryExpressionRangeLength := getQueryRangeSeconds (query )
160
157
// TODO(hc.zhu): add a flag for the threshold
161
158
// The current gateway timeout is 5 minutes, so we cache the failed query running longer than 5 minutes - 10 seconds.
162
- if queryResponseTime > time .Second * (60 * 5 - 10 ) {
159
+ if queryResponseTime > time .Second * (60 * 5 - 10 ) {
163
160
// Cache long running queries regardless of the error code. The most common case is "context canceled".
164
161
f .addCacheEntry (queryExpressionNormalized , queryExpressionRangeLength )
165
162
message := createLogMessage ("Cached a failed long running query" , queryExpressionNormalized , - 1 , queryExpressionRangeLength , err )
0 commit comments