Skip to content

Commit 56325f1

Browse files
committed
noticket: Better metrics for YdbSessionManager: Add counter-based metrics and min/max pool size gauges
1 parent 585f468 commit 56325f1

File tree

11 files changed

+232
-114
lines changed

11 files changed

+232
-114
lines changed
Lines changed: 19 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
import io.prometheus.client.Collector;
44
import io.prometheus.client.GaugeMetricFamily;
55
import io.prometheus.client.SimpleCollector;
6-
import lombok.experimental.Accessors;
76
import org.slf4j.Logger;
87
import org.slf4j.LoggerFactory;
98
import tech.ydb.yoj.InternalApi;
@@ -13,11 +12,14 @@
1312
import java.util.function.Supplier;
1413

1514
@InternalApi
16-
public class GaugeSupplierCollector extends SimpleCollector<GaugeSupplierCollector.Child> implements Collector.Describable {
17-
private static final Logger log = LoggerFactory.getLogger(GaugeSupplierCollector.class);
15+
public class SupplierCollector extends SimpleCollector<SupplierCollector.Child> implements Collector.Describable {
16+
private static final Logger log = LoggerFactory.getLogger(SupplierCollector.class);
1817

19-
private GaugeSupplierCollector(Builder builder) {
18+
private final Collector.Type type;
19+
20+
private SupplierCollector(Builder builder) {
2021
super(builder);
22+
this.type = builder.type;
2123
}
2224

2325
public static Builder build() {
@@ -34,7 +36,7 @@ public List<MetricFamilySamples> collect() {
3436
log.error("Could not add child sample", e);
3537
}
3638
});
37-
return familySamplesList(Type.GAUGE, samples);
39+
return familySamplesList(type, samples);
3840
}
3941

4042
@Override
@@ -51,20 +53,26 @@ public void supplier(Supplier<Number> supplier) {
5153
this.noLabelsChild.supplier(supplier);
5254
}
5355

54-
@Accessors(fluent = true)
55-
public static class Builder extends SimpleCollector.Builder<Builder, GaugeSupplierCollector> {
56+
public static class Builder extends SimpleCollector.Builder<Builder, SupplierCollector> {
57+
private Collector.Type type = Collector.Type.GAUGE;
58+
59+
public Builder type(Type type) {
60+
this.type = type;
61+
return this;
62+
}
63+
5664
@Override
57-
public GaugeSupplierCollector create() {
58-
return new GaugeSupplierCollector(this);
65+
public SupplierCollector create() {
66+
return new SupplierCollector(this);
5967
}
6068
}
6169

6270
public class Child {
6371
private Supplier<? extends Number> supplier = () -> 0.;
6472

65-
public GaugeSupplierCollector supplier(Supplier<? extends Number> supplier) {
73+
public SupplierCollector supplier(Supplier<? extends Number> supplier) {
6674
this.supplier = supplier;
67-
return GaugeSupplierCollector.this;
75+
return SupplierCollector.this;
6876
}
6977

7078
public double getValue() {
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
package tech.ydb.yoj.repository.ydb;
2+
3+
import lombok.Getter;
4+
import tech.ydb.core.grpc.GrpcTransport;
5+
import tech.ydb.scheme.SchemeClient;
6+
import tech.ydb.table.SessionPoolStats;
7+
import tech.ydb.table.TableClient;
8+
import tech.ydb.topic.TopicClient;
9+
import tech.ydb.yoj.repository.ydb.client.SessionManager;
10+
import tech.ydb.yoj.repository.ydb.client.YdbSchemaOperations;
11+
import tech.ydb.yoj.repository.ydb.client.YdbSessionManager;
12+
import tech.ydb.yoj.util.lang.Exceptions;
13+
14+
/*package*/ final class SessionClient implements AutoCloseable {
15+
private final TableClient tableClient;
16+
private final SchemeClient schemeClient;
17+
private final TopicClient topicClient;
18+
19+
@Getter
20+
private final SessionManager sessionManager;
21+
22+
@Getter
23+
private final YdbSchemaOperations schemaOperations;
24+
25+
private final SessionMetrics metrics;
26+
27+
/*package*/ SessionClient(YdbConfig config, YdbRepository.Settings repositorySettings, GrpcTransport transport) {
28+
this.tableClient = createClient(config, repositorySettings, transport);
29+
this.schemeClient = SchemeClient.newClient(transport).build();
30+
this.topicClient = TopicClient.newClient(transport).build();
31+
32+
this.sessionManager = new YdbSessionManager(tableClient, config.getSessionCreationTimeout());
33+
this.schemaOperations = new YdbSchemaOperations(
34+
config.getTablespace(), sessionManager, schemeClient, topicClient
35+
);
36+
37+
this.metrics = new SessionMetrics(tableClient, repositorySettings.metrics());
38+
}
39+
40+
public boolean isHealthy() {
41+
// We consider the database healthy if the number of sessions in the pool is greater than 0.
42+
// Bad sessions will be dropped either due to keep-alive or on the very first error that occurs in that session.
43+
//
44+
// If idleCount == 0, this may mean that the application has just started, or that the database cannot handle the load.
45+
// To account for that case, we check pendingAcquireCount (how many clients are waiting to acquire a session),
46+
// and if it’s more than maxSize of the client queue, we consider the database to be unhealthy.
47+
SessionPoolStats sessionPoolStats = tableClient.sessionPoolStats();
48+
return sessionPoolStats.getIdleCount() > 0 ||
49+
//todo: maybe we should consider pendingAcquireCount > 0 problematic, because there are clients waiting?
50+
sessionPoolStats.getPendingAcquireCount() <= sessionPoolStats.getMaxSize();
51+
}
52+
53+
@Override
54+
public void close() {
55+
Exceptions.closeAll(metrics, tableClient, schemeClient, topicClient);
56+
}
57+
58+
private static TableClient createClient(
59+
YdbConfig config, YdbRepository.Settings repositorySettings, GrpcTransport transport
60+
) {
61+
return buildTableClient(repositorySettings, transport)
62+
.keepQueryText(false)
63+
.sessionKeepAliveTime(config.getSessionKeepAliveTime())
64+
.sessionMaxIdleTime(config.getSessionMaxIdleTime())
65+
.sessionPoolSize(config.getSessionPoolMin(), config.getSessionPoolMax())
66+
.build();
67+
}
68+
69+
private static TableClient.Builder buildTableClient(
70+
YdbRepository.Settings repositorySettings, GrpcTransport transport
71+
) {
72+
// TODO(nvamelichev@): Replace this with expression switch with type pattern as soon as we migrate to Java 21+
73+
var queryImplementation = repositorySettings.queryImplementation();
74+
if (queryImplementation instanceof QueryImplementation.TableService) {
75+
return TableClient.newClient(transport);
76+
} else if (queryImplementation instanceof QueryImplementation.QueryService) {
77+
return tech.ydb.query.impl.TableClientImpl.newClient(transport);
78+
} else {
79+
throw new UnsupportedOperationException("Unknown QueryImplementation: <" + queryImplementation.getClass() + ">");
80+
}
81+
}
82+
}
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
package tech.ydb.yoj.repository.ydb;
2+
3+
import io.prometheus.client.Collector;
4+
import io.prometheus.client.Gauge;
5+
import tech.ydb.table.TableClient;
6+
import tech.ydb.yoj.repository.ydb.metrics.SupplierCollector;
7+
8+
/*package*/ final class SessionMetrics implements AutoCloseable {
9+
private static final SupplierCollector legacyCollector = SupplierCollector.build()
10+
.type(Collector.Type.GAUGE)
11+
.namespace("ydb")
12+
.subsystem("session_manager")
13+
.name("pool_stats")
14+
.help("YDB SDK Session pool statistics (as gauges with instant values)")
15+
.labelNames("type")
16+
.register();
17+
18+
private static final Gauge sessionPoolSettings = Gauge.build()
19+
.namespace("ydb")
20+
.subsystem("session_manager")
21+
.name("pool_settings")
22+
.help("YDB SDK Session pool settings")
23+
.labelNames("repository", "type")
24+
.register();
25+
private static final SupplierCollector sessionPoolCounters = SupplierCollector.build()
26+
.type(Collector.Type.COUNTER)
27+
.namespace("ydb")
28+
.subsystem("session_manager")
29+
.name("pool_counters")
30+
.help("YDB SDK Session pool statistics (as total counters)")
31+
.labelNames("repository", "type")
32+
.register();
33+
34+
private final String label;
35+
36+
/*package*/ SessionMetrics(TableClient tableClient, YdbRepository.Settings.Metrics metrics) {
37+
this.label = metrics.repositoryLabel();
38+
39+
legacyCollector
40+
.labels("pending_acquire_count").supplier(() -> tableClient.sessionPoolStats().getPendingAcquireCount())
41+
.labels("acquired_count").supplier(() -> tableClient.sessionPoolStats().getAcquiredCount())
42+
.labels("idle_count").supplier(() -> tableClient.sessionPoolStats().getIdleCount());
43+
44+
sessionPoolSettings.labels(label, "min_size").set(tableClient.sessionPoolStats().getMinSize());
45+
sessionPoolSettings.labels(label, "max_size").set(tableClient.sessionPoolStats().getMaxSize());
46+
47+
sessionPoolCounters
48+
.labels(label, "requested_total").supplier(() -> tableClient.sessionPoolStats().getRequestedTotal())
49+
.labels(label, "acquired_total").supplier(() -> tableClient.sessionPoolStats().getAcquiredTotal())
50+
.labels(label, "released_total").supplier(() -> tableClient.sessionPoolStats().getReleasedTotal())
51+
.labels(label, "created_total").supplier(() -> tableClient.sessionPoolStats().getCreatedTotal())
52+
.labels(label, "deleted_total").supplier(() -> tableClient.sessionPoolStats().getDeletedTotal())
53+
.labels(label, "failed_total").supplier(() -> tableClient.sessionPoolStats().getFailedTotal());
54+
}
55+
56+
@Override
57+
public void close() {
58+
sessionPoolSettings.remove(label, "min_size");
59+
sessionPoolSettings.remove(label, "max_size");
60+
61+
sessionPoolCounters.remove(label, "requested_total");
62+
sessionPoolCounters.remove(label, "acquired_total");
63+
sessionPoolCounters.remove(label, "released_total");
64+
sessionPoolCounters.remove(label, "created_total");
65+
sessionPoolCounters.remove(label, "deleted_total");
66+
sessionPoolCounters.remove(label, "failed_total");
67+
}
68+
}

0 commit comments

Comments
 (0)