Skip to content

Commit d99f184

Browse files
committed
WIP WIP WIP Handle indeterminate YDB request state
1 parent 26f2caf commit d99f184

File tree

19 files changed

+317
-123
lines changed

19 files changed

+317
-123
lines changed

aspect/src/main/java/tech/ydb/yoj/aspect/tx/YojTransactionAspect.java

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import org.aspectj.lang.annotation.Aspect;
77
import tech.ydb.yoj.repository.db.Tx;
88
import tech.ydb.yoj.repository.db.TxManager;
9+
import tech.ydb.yoj.repository.db.exception.ConditionallyRetryableException;
910
import tech.ydb.yoj.repository.db.exception.RetryableException;
1011

1112
/**
@@ -68,7 +69,7 @@ private Object doInTransaction(ProceedingJoinPoint pjp, YojTransactional transac
6869

6970
return localTx.tx(() -> safeCall(pjp));
7071
}
71-
} catch (CallRetryableException | CallException e) {
72+
} catch (CallRetryableException | CallConditionallyRetryableException | CallException e) {
7273
throw e.getCause();
7374
}
7475
}
@@ -88,17 +89,28 @@ Object safeCall(ProceedingJoinPoint pjp) {
8889
return pjp.proceed();
8990
} catch (RetryableException e) {
9091
throw new CallRetryableException(e);
92+
} catch (ConditionallyRetryableException e) {
93+
throw new CallConditionallyRetryableException(e);
9194
} catch (Throwable e) {
9295
throw new CallException(e);
9396
}
9497
}
9598

9699
/**
97-
* It's a hint for tx manager to retry was requested
100+
* It's a hint for tx manager that an unconditional retry was requested
98101
*/
99102
static class CallRetryableException extends RetryableException {
100103
CallRetryableException(RetryableException e) {
101-
super(e.getMessage(), e.getCause());
104+
super(e.getMessage(), e.getRetryPolicy(), e.getCause());
105+
}
106+
}
107+
108+
/**
109+
* It's a hint for tx manager that a conditional retry was requested
110+
*/
111+
static class CallConditionallyRetryableException extends ConditionallyRetryableException {
112+
CallConditionallyRetryableException(ConditionallyRetryableException e) {
113+
super(e.getMessage(), e.getRetryPolicy(), e.getCause());
102114
}
103115
}
104116

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
11
package tech.ydb.yoj.repository.ydb.exception;
22

33
import tech.ydb.yoj.repository.db.exception.RetryableException;
4+
import tech.ydb.yoj.util.retry.RetryPolicy;
45

56
/**
67
* Tried to use a no longer active or valid YDB session, e.g. on a node that is now down.
78
*/
89
public class BadSessionException extends RetryableException {
910
public BadSessionException(String message) {
10-
super(message);
11+
super(message, RetryPolicy.retryImmediately());
1112
}
1213
}

repository-ydb-common/src/main/java/tech/ydb/yoj/repository/ydb/exception/YdbClientInternalException.java

Lines changed: 0 additions & 23 deletions
This file was deleted.
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
package tech.ydb.yoj.repository.ydb.exception;
2+
3+
import lombok.Getter;
4+
import tech.ydb.yoj.ExperimentalApi;
5+
import tech.ydb.yoj.repository.db.exception.ConditionallyRetryableException;
6+
import tech.ydb.yoj.util.lang.Strings;
7+
import tech.ydb.yoj.util.retry.RetryPolicy;
8+
9+
/**
10+
* Base class for <em>conditionally-retryable</em> exceptions from the YDB database, the YDB Java SDK, and the GRPC client used by the YDB Java SDK.
11+
*
12+
* @see ConditionallyRetryableException conditionally-retryable exceptions
13+
*/
14+
// TODO(nvamelichev): Add subclasses of YdbConditionallyRetryableException as needed
15+
@ExperimentalApi(issue = "https://github.com/ydb-platform/yoj-project/issues/165")
16+
public class YdbConditionallyRetryableException extends ConditionallyRetryableException {
17+
private static final RetryPolicy UNDETERMINED_BACKOFF = RetryPolicy.expBackoff(5L, 500L, 0.1, 2.0);
18+
19+
@Getter
20+
private final Enum<?> statusCode;
21+
22+
public YdbConditionallyRetryableException(String message, Enum<?> statusCode, Object request, Object response) {
23+
super(Strings.join("\n", "[" + statusCode + "] " + message, request, response), UNDETERMINED_BACKOFF);
24+
this.statusCode = statusCode;
25+
}
26+
}

repository-ydb-common/src/main/java/tech/ydb/yoj/repository/ydb/exception/YdbUnauthenticatedException.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,14 @@
44
import tech.ydb.yoj.repository.db.exception.RetryableException;
55
import tech.ydb.yoj.repository.db.exception.UnavailableException;
66
import tech.ydb.yoj.util.lang.Strings;
7+
import tech.ydb.yoj.util.retry.RetryPolicy;
78

89
/**
910
* YDB authentication failure, possibly a transient one. E.g., used a recently expired token.
1011
*/
1112
public class YdbUnauthenticatedException extends RetryableException {
1213
public YdbUnauthenticatedException(Object request, Object response) {
13-
super(Strings.join("\n", request, response));
14+
super(Strings.join("\n", request, response), RetryPolicy.retryImmediately());
1415
}
1516

1617
@Override

repository-ydb-common/src/main/java/tech/ydb/yoj/repository/ydb/exception/YdbUnauthorizedException.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,15 @@
44
import tech.ydb.yoj.repository.db.exception.RetryableException;
55
import tech.ydb.yoj.repository.db.exception.UnavailableException;
66
import tech.ydb.yoj.util.lang.Strings;
7+
import tech.ydb.yoj.util.retry.RetryPolicy;
78

89
/**
910
* YDB authorization failure, possibly a transient one. E.g., the principal tried to write to the database but has no
1011
* write-allowing role assigned.
1112
*/
1213
public class YdbUnauthorizedException extends RetryableException {
1314
public YdbUnauthorizedException(Object request, Object response) {
14-
super(Strings.join("\n", request, response));
15+
super(Strings.join("\n", request, response), RetryPolicy.retryImmediately());
1516
}
1617

1718
@Override

repository-ydb-v2/src/main/java/tech/ydb/yoj/repository/ydb/YdbRepositoryTransaction.java

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,6 @@
5656
import tech.ydb.yoj.repository.db.exception.IllegalTransactionScanException;
5757
import tech.ydb.yoj.repository.db.exception.OptimisticLockException;
5858
import tech.ydb.yoj.repository.db.exception.RepositoryException;
59-
import tech.ydb.yoj.repository.db.exception.UnavailableException;
6059
import tech.ydb.yoj.repository.db.readtable.ReadTableParams;
6160
import tech.ydb.yoj.repository.ydb.bulk.BulkMapper;
6261
import tech.ydb.yoj.repository.ydb.client.ResultSetConverter;
@@ -65,8 +64,7 @@
6564
import tech.ydb.yoj.repository.ydb.exception.BadSessionException;
6665
import tech.ydb.yoj.repository.ydb.exception.ResultTruncatedException;
6766
import tech.ydb.yoj.repository.ydb.exception.UnexpectedException;
68-
import tech.ydb.yoj.repository.ydb.exception.YdbComponentUnavailableException;
69-
import tech.ydb.yoj.repository.ydb.exception.YdbOverloadedException;
67+
import tech.ydb.yoj.repository.ydb.exception.YdbConditionallyRetryableException;
7068
import tech.ydb.yoj.repository.ydb.exception.YdbRepositoryException;
7169
import tech.ydb.yoj.repository.ydb.merge.QueriesMerger;
7270
import tech.ydb.yoj.repository.ydb.readtable.ReadTableMapper;
@@ -176,8 +174,11 @@ private void doCommit() {
176174
Status status = YdbOperations.safeJoin(session.commitTransaction(txId, new CommitTxSettings()));
177175
validatePkConstraint(status.getIssues());
178176
validate("commit", status.getCode(), status.toString());
179-
} catch (YdbComponentUnavailableException | YdbOverloadedException e) {
180-
throw new UnavailableException("Unknown transaction state: commit was sent, but result is unknown", e);
177+
} catch (YdbConditionallyRetryableException e) {
178+
throw switch (options.getRetryOptions().getConditionalRetryMode()) {
179+
case NEVER, UNTIL_COMMIT -> e.failImmediately();
180+
case ALWAYS -> e;
181+
};
181182
}
182183
}
183184

@@ -222,7 +223,7 @@ private boolean isFinalActionNeeded(String actionName) {
222223
return false;
223224
}
224225
if (options.isReadOnly() && options.getIsolationLevel() != IsolationLevel.SNAPSHOT) {
225-
transactionLocal.log().info("No-op %s: read-only tx @%s", actionName, options.getIsolationLevel());
226+
transactionLocal.log().info("No-op %s: read-only non-SNAPSHOT tx @%s", actionName, options.getIsolationLevel());
226227
return false;
227228
}
228229
if (txId == null) {

repository-ydb-v2/src/main/java/tech/ydb/yoj/repository/ydb/client/YdbSessionManager.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import tech.ydb.table.TableClient;
66
import tech.ydb.yoj.InternalApi;
77
import tech.ydb.yoj.repository.db.exception.QueryInterruptedException;
8-
import tech.ydb.yoj.repository.db.exception.RetryableException;
8+
import tech.ydb.yoj.repository.db.exception.RetryableExceptionBase;
99
import tech.ydb.yoj.repository.db.exception.UnavailableException;
1010
import tech.ydb.yoj.repository.ydb.metrics.GaugeSupplierCollector;
1111

@@ -78,7 +78,7 @@ public void warmup() {
7878
try {
7979
session = getSession();
8080
break;
81-
} catch (RetryableException ex) {
81+
} catch (RetryableExceptionBase ex) {
8282
if (i == maxRetrySessionCreateCount - 1) {
8383
throw ex;
8484
}

repository-ydb-v2/src/main/java/tech/ydb/yoj/repository/ydb/client/YdbValidator.java

Lines changed: 47 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@
1212
import tech.ydb.yoj.repository.db.exception.OptimisticLockException;
1313
import tech.ydb.yoj.repository.db.exception.QueryCancelledException;
1414
import tech.ydb.yoj.repository.ydb.exception.BadSessionException;
15-
import tech.ydb.yoj.repository.ydb.exception.YdbClientInternalException;
1615
import tech.ydb.yoj.repository.ydb.exception.YdbComponentUnavailableException;
16+
import tech.ydb.yoj.repository.ydb.exception.YdbConditionallyRetryableException;
1717
import tech.ydb.yoj.repository.ydb.exception.YdbOverloadedException;
1818
import tech.ydb.yoj.repository.ydb.exception.YdbRepositoryException;
1919
import tech.ydb.yoj.repository.ydb.exception.YdbSchemaException;
@@ -39,63 +39,75 @@ public static void validate(String request, StatusCode statusCode, String respon
3939
}
4040

4141
// Current session can no longer be used. Retry immediately by creating a new session
42-
case BAD_SESSION,
43-
SESSION_EXPIRED,
44-
// Prepared statement or transaction was not found
45-
NOT_FOUND -> throw new BadSessionException(response);
42+
case BAD_SESSION, // This session is no longer available. Create a new session
43+
SESSION_EXPIRED, // The session has already expired. Create a new session
44+
NOT_FOUND -> { // Prepared statement or transaction was not found in current session. Create a new session
45+
throw new BadSessionException(response);
46+
}
4647

4748
// Transaction locks invalidated: somebody touched the same rows that we've read and/or changed in a SERIALIZABLE-level transaction.
4849
// Retry immediately
4950
case ABORTED -> throw new OptimisticLockException(response);
5051

52+
// The request was cancelled because the request timeout (CancelAfter) has expired. The request has been cancelled on the server.
53+
// Non-retryable
54+
case CANCELLED -> throw new QueryCancelledException(response);
55+
56+
// Client query timeouts. Non-retryable
57+
case CLIENT_DEADLINE_EXPIRED, // Deadline expired before the request was sent to the server
58+
CLIENT_DEADLINE_EXCEEDED -> { // Client could not get response from the server in time
59+
throw new DeadlineExceededException(response);
60+
}
61+
5162
// DB overloaded and similar conditions. Slow retry with exponential backoff
52-
case OVERLOADED,
53-
// DB took too long to respond
54-
TIMEOUT,
55-
// The request was cancelled because the request timeout (CancelAfter) has expired. The request has been cancelled on the server
56-
CANCELLED,
57-
// Not enough resources to process the request
58-
CLIENT_RESOURCE_EXHAUSTED,
59-
// Deadline expired before the request was sent to the server
60-
CLIENT_DEADLINE_EXPIRED,
61-
// The request was cancelled on the client, at the transport level (because the GRPC deadline expired)
62-
CLIENT_DEADLINE_EXCEEDED -> {
63+
case OVERLOADED, // A part of the system is overloaded. Retry the last action (query) and reduce the query rate.
64+
CLIENT_RESOURCE_EXHAUSTED -> { // Not enough resources to process the request
6365
checkGrpcContextStatus(response, null);
6466

65-
// The result of the request is unknown; it might have been cancelled... or it executed successfully!
6667
log.warn("""
6768
Database is overloaded, but we still got a reply from the DB
6869
Request: {}
6970
Response: {}""", request, response);
7071
throw new YdbOverloadedException(request, response);
7172
}
7273

73-
// Unknown error on the client side (most often at the transport level). Fast retry with fixed interval
74-
case CLIENT_CANCELLED,
75-
CLIENT_GRPC_ERROR,
76-
CLIENT_INTERNAL_ERROR -> {
74+
// The query cannot be executed in the current state. Non-retryable
75+
//
76+
// NB: Primary key/UNIQUE index violations are checked by YdbValidator.validatePkConstraint() separately,
77+
// before YdbValidator.validate() is called!
78+
// And all other, unknown "failed preconditions" are considered to be non-retryable.
79+
case PRECONDITION_FAILED -> throw new YdbRepositoryException(request, response);
80+
81+
// DB, one of its components, or the transport is temporarily unavailable. Fast retry with fixed interval
82+
case UNAVAILABLE, // DB responded that it or some of its subsystems are unavailable
83+
CLIENT_DISCOVERY_FAILED, // Error occurred while retrieving the list of endpoints
84+
CLIENT_LIMITS_REACHED, // Client-side session limit reached
85+
SESSION_BUSY -> { // Another query is being executed in this session, should retry with a new session
7786
checkGrpcContextStatus(response, null);
7887

7988
log.warn("""
80-
YDB SDK internal error or cancellation
89+
Some database components are not available, but we still got a reply from the DB
8190
Request: {}
8291
Response: {}""", request, response);
83-
throw new YdbClientInternalException(request, response);
92+
throw new YdbComponentUnavailableException(request, response);
8493
}
8594

86-
// DB, one of its components, or the transport is temporarily unavailable. Fast retry with fixed interval
87-
case UNAVAILABLE, // DB responded that it or some of its subsystems are unavailable
88-
TRANSPORT_UNAVAILABLE, // Network connectivity issues
89-
CLIENT_DISCOVERY_FAILED, // Error occurred while retrieving the list of endpoints
90-
CLIENT_LIMITS_REACHED, // Client-side session limit reached
91-
UNDETERMINED,
92-
SESSION_BUSY, // Another query is being executed in this session, should retry with a new session
93-
PRECONDITION_FAILED -> {
95+
// The result of the request is unknown; it might have never reached the server, have been cancelled... or executed successfully!
96+
case TIMEOUT, // Query timeout expired. If the query is conditionally retryable, retry it
97+
UNDETERMINED, // Unknown transaction state. We don't know if it has been committed or not
98+
CLIENT_CANCELLED, // GRPC call to the server has been cancelled. We don't know if the server performed the request or not
99+
TRANSPORT_UNAVAILABLE, // Network connectivity issues. We don't know if the server performed the request or not
100+
CLIENT_INTERNAL_ERROR -> { // Internal YDB SDK error, assumed to be transient
94101
log.warn("""
95-
Some database components are not available, but we still got a reply from the DB
102+
Indeterminate request state: it's not known whether the request reached the DB and was performed
96103
Request: {}
97104
Response: {}""", request, response);
98-
throw new YdbComponentUnavailableException(request, response);
105+
throw new YdbConditionallyRetryableException(
106+
"Indeterminate request state: it's not known whether the request reached the DB and was performed",
107+
statusCode,
108+
request,
109+
response
110+
);
99111
}
100112

101113
// GRPC client reports that the request was not authenticated properly. Retry immediately.
@@ -124,13 +136,13 @@ public static void validate(String request, StatusCode statusCode, String respon
124136

125137
// Serious internal error. No retries
126138
case CLIENT_CALL_UNIMPLEMENTED,
139+
CLIENT_GRPC_ERROR,
127140
BAD_REQUEST,
128141
UNSUPPORTED,
129142
INTERNAL_ERROR,
130143
GENERIC_ERROR,
131144
UNUSED_STATUS,
132-
// This status is used by other YDB services (not the {Table,Query}Service). This is *NOT* a form of PRECONDITION_FAILED!
133-
ALREADY_EXISTS -> {
145+
ALREADY_EXISTS -> { // Used by other YDB services (not the {Table,Query}Service). This is *NOT* a form of PRECONDITION_FAILED!
134146
log.error("""
135147
Bad response status
136148
Request: {}
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
package tech.ydb.yoj.repository.db;
2+
3+
import tech.ydb.yoj.ExperimentalApi;
4+
5+
/**
6+
* Specified how to retry YOJ tx on a {@link tech.ydb.yoj.repository.db.exception.ConditionallyRetryableException conditionally-retryable} error.
7+
* <p>The YOJ default is {@link #UNTIL_COMMIT}: the whole transaction body will be retried if a commit has not yet been attempted, or read-only
8+
* or scan mode is used.
9+
*/
10+
@ExperimentalApi(issue = "https://github.com/ydb-platform/yoj-project/issues/165")
11+
public enum ConditionalRetryMode {
12+
/**
13+
* Never retry conditionally-retryable errors, even if the transaction commit has not yet been attempted.
14+
*/
15+
NEVER,
16+
/**
17+
* Retry the whole transaction body on a conditionally-retryable error, but only if transaction commit has not yet been attempted,
18+
* or read-only or scan mode is used.
19+
*/
20+
UNTIL_COMMIT,
21+
/**
22+
* Retry the whole transaction body on a conditionally-retryable error, even if it occurred on a transaction commit.
23+
*/
24+
ALWAYS,
25+
}

0 commit comments

Comments
 (0)