48
48
import org .slf4j .LoggerFactory ;
49
49
50
50
import java .io .IOException ;
51
+ import java .util .ArrayList ;
51
52
import java .util .Collections ;
53
+ import java .util .List ;
52
54
import java .util .Map ;
55
+ import java .util .function .Function ;
53
56
import javax .annotation .Nullable ;
54
57
55
58
/**
@@ -100,9 +103,6 @@ public void run(ActionContext context) throws Exception {
100
103
// Enable legacy SQL
101
104
builder .setUseLegacySql (config .isLegacySQL ());
102
105
103
- // Location must match that of the dataset(s) referenced in the query.
104
- JobId jobId = JobId .newBuilder ().setRandomJob ().setLocation (config .getLocation ()).build ();
105
-
106
106
// API request - starts the query.
107
107
Credentials credentials = config .getServiceAccount () == null ?
108
108
null : GCPUtils .loadServiceAccountCredentials (config .getServiceAccount (),
@@ -126,13 +126,17 @@ public void run(ActionContext context) throws Exception {
126
126
127
127
QueryJobConfiguration queryConfig = builder .build ();
128
128
129
- Job queryJob = bigQuery .create (JobInfo .newBuilder (queryConfig ).setJobId (jobId ).build ());
130
129
131
- LOG . info ( "Executing SQL as job {}." , jobId . getJob ());
132
- LOG . debug ( "The BigQuery SQL is {}" , config . getSql ());
130
+ // Setting external retry strategy for BigQuery client due to BigQuery Client not retrying when a job clashes
131
+ // with another job, due to error being 400.
133
132
134
- // Wait for the query to complete
135
- queryJob .waitFor ();
133
+ final String retryableStringPattern = "Retrying the job with back-off" ;
134
+ List <Function <BigQueryException , Boolean >> retryRules = new ArrayList <>();
135
+ retryRules .add (
136
+ (BigQueryException e ) -> e .getCode () == 400
137
+ && (e .getMessage ().contains (retryableStringPattern ) || e .getReason ().contains (retryableStringPattern ))
138
+ );
139
+ Job queryJob = executeQueryJobWithCustomRetry (bigQuery , queryConfig , retryRules );
136
140
137
141
// Check for errors
138
142
if (queryJob .getStatus ().getError () != null ) {
@@ -169,6 +173,47 @@ public void run(ActionContext context) throws Exception {
169
173
context .getMetrics ().gauge (RECORDS_PROCESSED , rows );
170
174
}
171
175
176
+ /**
177
+ * Executes Query with added retry rules following:
178
+ * https://cloud.google.com/bigquery/sla
179
+ */
180
+ private Job executeQueryJobWithCustomRetry (BigQuery bigQuery , QueryJobConfiguration queryConfig ,
181
+ List <Function <BigQueryException , Boolean >> retryRules ) throws Exception {
182
+ // The longest amount of time to wait in-between retries.
183
+ final int maximum_backoff = 32 ;
184
+
185
+ // The maximum number of retries.
186
+ final int max_retries = 20 ;
187
+
188
+ int retries = 0 ;
189
+
190
+ while (true ) {
191
+ try {
192
+ // Location must match that of the dataset(s) referenced in the query.
193
+ JobId jobId = JobId .newBuilder ().setRandomJob ().setLocation (config .getLocation ()).build ();
194
+ Job queryJob = bigQuery .create (JobInfo .newBuilder (queryConfig ).setJobId (jobId ).build ());
195
+ LOG .info ("Executing SQL as job {}." , jobId .getJob ());
196
+ LOG .debug ("The BigQuery SQL is {}" , config .getSql ());
197
+
198
+ // Wait for the query to complete
199
+ queryJob .waitFor ();
200
+ return queryJob ;
201
+ } catch (BigQueryException bigQueryException ) {
202
+ if (retries >= max_retries ) {
203
+ LOG .error ("Run out of retries while executing query with backoff." );
204
+ throw bigQueryException ;
205
+ }
206
+ if (retryRules .stream ().noneMatch ((f -> f .apply (bigQueryException )))) {
207
+ throw bigQueryException ;
208
+ }
209
+ LOG .warn ("Received {} error from BigQuery, retrying..." , bigQueryException .getMessage ());
210
+ long sleep_time = Math .round ((Math .min (Math .pow (2 , retries ), maximum_backoff ) + Math .random ()) * 1000 );
211
+ Thread .sleep (sleep_time );
212
+ retries += 1 ;
213
+ }
214
+ }
215
+ }
216
+
172
217
@ Override
173
218
public AbstractBigQueryActionConfig getConfig () {
174
219
return config ;
0 commit comments