Skip to content

Commit b415021

Browse files
Add tests for retry logic based on response status code (#11)
* Add tests for retry logic based on response status code * Mark some tests fixed * Add comments linking to relevent docs * Retry 503 and 504 since they can be transient errors * Don't retry 409 None of the listed reasons seem like they could be transient https://learn.microsoft.com/en-us/rest/api/storageservices/blob-service-error-codes * Test current 429 behaviour * Fixup test setup name --------- Co-authored-by: Nick Robinson <[email protected]>
1 parent 8db6c5c commit b415021

File tree

4 files changed

+194
-6
lines changed

4 files changed

+194
-6
lines changed

Project.toml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,18 @@ version = "0.1.0"
44

55
[compat]
66
CloudBase = "1"
7+
HTTP = "1"
78
ReTestItems = "1"
9+
Sockets = "1"
810
Test = "1"
911
julia = "1.8"
1012

1113
[extras]
1214
CloudBase = "85eb1798-d7c4-4918-bb13-c944d38e27ed"
15+
HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3"
1316
ReTestItems = "817f1d60-ba6b-4fd5-9520-3cf149f6a823"
17+
Sockets = "6462fe0b-24de-5631-8697-dd941f90decc"
1418
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
1519

1620
[targets]
17-
test = ["CloudBase", "ReTestItems", "Test"]
21+
test = ["CloudBase", "HTTP", "ReTestItems", "Sockets", "Test"]

test/azure_blobs_exception_tests.jl

Lines changed: 185 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@
4141
@test false # Should have thrown an error
4242
catch e
4343
@test e isa ErrorException
44-
@test occursin("400 Bad Request", e.msg)
44+
@test occursin("400 Bad Request", e.msg) # Should this be 403 Forbidden? We've seen that with invalid SAS tokens
4545
@test occursin("Authentication information is not given in the correct format", e.msg)
4646
end
4747

@@ -151,3 +151,187 @@
151151
@test res == 1 # Rust CResult::Error
152152
end
153153
end # @testitem
154+
155+
### See Azure Blob Storage docs: https://learn.microsoft.com/en-us/rest/api/storageservices
156+
### - "Common REST API error codes":
157+
### https://learn.microsoft.com/en-us/rest/api/storageservices/common-rest-api-error-codes
158+
### - "Azure Blob Storage error codes":
159+
### https://learn.microsoft.com/en-us/rest/api/storageservices/blob-service-error-codes
160+
### - "Get Blob"
161+
### https://learn.microsoft.com/en-us/rest/api/storageservices/get-blob
162+
### - "Put Blob"
163+
### https://learn.microsoft.com/en-us/rest/api/storageservices/put-blob
164+
@testitem "BlobStorage retries" setup=[InitializeObjectStore] begin
165+
using CloudBase.CloudTest: Azurite
166+
import CloudBase
167+
using ObjectStore: blob_get!, blob_put, AzureCredentials
168+
import HTTP
169+
import Sockets
170+
171+
max_retries = InitializeObjectStore.max_retries
172+
173+
function test_status(method, response_status, headers=nothing)
174+
@assert method === :GET || method === :PUT
175+
nrequests = Ref(0)
176+
response_body = "response body from the dummy server"
177+
account = "myaccount"
178+
container = "mycontainer"
179+
shared_key_from_azurite = "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw=="
180+
181+
(port, tcp_server) = Sockets.listenany(8081)
182+
http_server = HTTP.serve!(tcp_server) do request::HTTP.Request
183+
if request.method == "GET" && request.target == "/$account/$container/_this_file_does_not_exist"
184+
# This is the exploratory ping from connect_and_test in lib.rs
185+
return HTTP.Response(404, "Yup, still doesn't exist")
186+
end
187+
nrequests[] += 1
188+
response = isnothing(headers) ? HTTP.Response(response_status, response_body) : HTTP.Response(response_status, headers, response_body)
189+
return response
190+
end
191+
192+
baseurl = "http://127.0.0.1:$port/$account/$container/"
193+
creds = AzureCredentials(account, container, shared_key_from_azurite, baseurl)
194+
195+
try
196+
method === :GET && blob_get!(joinpath(baseurl, "blob"), zeros(UInt8, 5), creds)
197+
method === :PUT && blob_put(joinpath(baseurl, "blob"), codeunits("a,b,c"), creds)
198+
@test false # Should have thrown an error
199+
catch e
200+
@test e isa ErrorException
201+
@test occursin(string(response_status), e.msg)
202+
response_status < 500 && (@test occursin("response body from the dummy server", e.msg))
203+
finally
204+
close(http_server)
205+
end
206+
wait(http_server)
207+
return nrequests[]
208+
end
209+
210+
@testset "400: Bad Request" begin
211+
# Returned when there's an error in the request URI, headers, or body. The response body
212+
# contains an error message explaining what the specific problem is.
213+
# See https://learn.microsoft.com/en-us/rest/api/storageservices/blob-service-error-codes
214+
# See https://www.rfc-editor.org/rfc/rfc9110#status.400
215+
nrequests = test_status(:GET, 400)
216+
@test nrequests == 1
217+
nrequests = test_status(:PUT, 400)
218+
@test nrequests == 1
219+
end
220+
221+
@testset "403: Forbidden" begin
222+
# Returned when you pass an invalid api-key.
223+
# See https://www.rfc-editor.org/rfc/rfc9110#status.403
224+
nrequests = test_status(:GET, 403)
225+
@test nrequests == 1
226+
nrequests = test_status(:PUT, 403)
227+
@test nrequests == 1
228+
end
229+
230+
@testset "404: Not Found" begin
231+
# Returned when container not found or blob not found
232+
# See https://learn.microsoft.com/en-us/rest/api/storageservices/blob-service-error-codes
233+
# See https://www.rfc-editor.org/rfc/rfc9110#status.404
234+
nrequests = test_status(:GET, 404)
235+
@test nrequests == 1
236+
end
237+
238+
@testset "405: Method Not Supported" begin
239+
# See https://www.rfc-editor.org/rfc/rfc9110#status.405
240+
nrequests = test_status(:GET, 405, ["Allow" => "PUT"])
241+
@test nrequests == 1
242+
nrequests = test_status(:PUT, 405, ["Allow" => "GET"])
243+
@test nrequests == 1
244+
end
245+
246+
@testset "409: Conflict" begin
247+
# Returned when write operations conflict.
248+
# See https://learn.microsoft.com/en-us/rest/api/storageservices/blob-service-error-codes
249+
# See https://www.rfc-editor.org/rfc/rfc9110#status.409
250+
nrequests = test_status(:GET, 409)
251+
@test nrequests == 1
252+
nrequests = test_status(:PUT, 409)
253+
@test nrequests == 1
254+
end
255+
256+
@testset "412: Precondition Failed" begin
257+
# Returned when an If-Match or If-None-Match header's condition evaluates to false
258+
# See https://learn.microsoft.com/en-us/rest/api/storageservices/put-blob#blob-custom-properties
259+
# See https://www.rfc-editor.org/rfc/rfc9110#status.412
260+
nrequests = test_status(:GET, 412)
261+
@test nrequests == 1
262+
nrequests = test_status(:PUT, 412)
263+
@test nrequests == 1
264+
end
265+
266+
@testset "413: Content Too Large" begin
267+
# See https://learn.microsoft.com/en-us/rest/api/storageservices/put-blob#remarks
268+
# If you attempt to upload either a block blob that's larger than the maximum
269+
# permitted size for that service version or a page blob that's larger than 8 TiB,
270+
# the service returns status code 413 (Request Entity Too Large). Blob Storage also
271+
# returns additional information about the error in the response, including the
272+
# maximum permitted blob size, in bytes.
273+
# See https://www.rfc-editor.org/rfc/rfc9110#status.413
274+
nrequests = test_status(:PUT, 413)
275+
@test nrequests == 1
276+
end
277+
278+
@testset "429: Too Many Requests" begin
279+
# See https://www.rfc-editor.org/rfc/rfc6585#section-4
280+
nrequests = test_status(:GET, 429)
281+
@test nrequests == 1
282+
nrequests = test_status(:PUT, 429)
283+
@test nrequests == 1
284+
# See https://www.rfc-editor.org/rfc/rfc9110#field.retry-after
285+
# TODO: We probably should respect the Retry-After header, but we currently don't
286+
# (and we don't know if Azure actually sets it)
287+
# This can happen when Azure is throttling us, so it might be a good idea to retry with some
288+
# larger initial backoff (very eager retries probably only make the situation worse).
289+
nrequests = test_status(:GET, 429, ["Retry-After" => 10])
290+
@test nrequests == 1 + max_retries broken=true
291+
nrequests = test_status(:PUT, 429, ["Retry-After" => 10])
292+
@test nrequests == 1 + max_retries broken=true
293+
end
294+
295+
@testset "502: Bad Gateway" begin
296+
# https://www.rfc-editor.org/rfc/rfc9110#status.502
297+
# The 502 (Bad Gateway) status code indicates that the server, while acting as a
298+
# gateway or proxy, received an invalid response from an inbound server it accessed
299+
# while attempting to fulfill the request.
300+
# This error can occur when you enter HTTP instead of HTTPS in the connection.
301+
nrequests = test_status(:GET, 502)
302+
@test nrequests == 1 + max_retries
303+
nrequests = test_status(:PUT, 502)
304+
@test nrequests == 1 + max_retries
305+
end
306+
307+
@testset "503: Service Unavailable" begin
308+
# See https://www.rfc-editor.org/rfc/rfc9110#status.503
309+
# The 503 (Service Unavailable) status code indicates that the server is currently
310+
# unable to handle the request due to a temporary overload or scheduled maintenance,
311+
# which will likely be alleviated after some delay. The server MAY send a Retry-After
312+
# header field (Section 10.2.3) to suggest an appropriate amount of time for the
313+
# client to wait before retrying the request.
314+
# See https://learn.microsoft.com/en-us/rest/api/storageservices/common-rest-api-error-codes
315+
# An operation on any of the Azure Storage services can return the following error codes:
316+
# Error code HTTP status code User message
317+
# ServerBusy Service Unavailable (503) The server is currently unable to receive requests. Please retry your request.
318+
# ServerBusy Service Unavailable (503) Ingress is over the account limit.
319+
# ServerBusy Service Unavailable (503) Egress is over the account limit.
320+
# ServerBusy Service Unavailable (503) Operations per second is over the account limit.
321+
nrequests = test_status(:GET, 503)
322+
@test nrequests == 1 + max_retries
323+
nrequests = test_status(:PUT, 503)
324+
@test nrequests == 1 + max_retries
325+
end
326+
327+
@testset "504: Gateway Timeout" begin
328+
# See https://www.rfc-editor.org/rfc/rfc9110#status.504
329+
# The 504 (Gateway Timeout) status code indicates that the server, while acting as
330+
# a gateway or proxy, did not receive a timely response from an upstream server it
331+
# needed to access in order to complete the request
332+
nrequests = test_status(:GET, 504)
333+
@test nrequests == 1 + max_retries
334+
nrequests = test_status(:PUT, 504)
335+
@test nrequests == 1 + max_retries
336+
end
337+
end

test/common_testsetup.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
using ObjectStore
33
# Since we currently only support centralized configs, we need to have one that is compatible
44
# with all the tests (some of the tests would take too long if we use default values).
5-
max_retries = 5
6-
retry_timeout_sec = 5
5+
max_retries = 2
6+
retry_timeout_sec = 2
77
init_object_store(ObjectStoreConfig(max_retries, retry_timeout_sec))
88
end

test/runtests.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,5 @@ using ReTestItems
22
using ObjectStore
33

44
withenv("RUST_BACKTRACE"=>1) do
5-
runtests(ObjectStore, testitem_timeout=120, nworkers=1)
6-
end
5+
runtests(ObjectStore, testitem_timeout=180, nworkers=1)
6+
end

0 commit comments

Comments
 (0)