Skip to content

Commit b08110e

Browse files
authored
Update to object_store_ffi v0.3 which supports multipart get and put (#23)
1 parent b2cc711 commit b08110e

File tree

6 files changed

+95
-6
lines changed

6 files changed

+95
-6
lines changed

Project.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
name = "RustyObjectStore"
22
uuid = "1b5eed3d-1f46-4baa-87f3-a4a892b23610"
3-
version = "0.1.0"
3+
version = "0.2.0"
44

55
[deps]
66
DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
@@ -16,7 +16,7 @@ ReTestItems = "1"
1616
Sockets = "1"
1717
Test = "1"
1818
julia = "1.8"
19-
object_store_ffi_jll = "0.2"
19+
object_store_ffi_jll = "0.3"
2020

2121
[extras]
2222
CloudBase = "85eb1798-d7c4-4918-bb13-c944d38e27ed"

README.md

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,39 @@ nbytes_read = get_object!(buffer, "path/to/example.csv", config)
4141
@assert String(buffer[1:nbytes_read]) == input
4242
```
4343

44+
One-time global configuration can be set using a StaticConfig object passed to init\_object\_store():
45+
```julia
46+
test_config = StaticConfig(
47+
n_threads=0,
48+
cache_capacity=20,
49+
cache_ttl_secs=30 * 60,
50+
cache_tti_secs=5 * 60,
51+
multipart_put_threshold=8 * 1024 * 1024,
52+
multipart_get_threshold=8 * 1024 * 1024,
53+
multipart_get_part_size=8 * 1024 * 1024
54+
)
55+
init_object_store(test_config)
56+
```
57+
n\_threads is the number of rust executor threads to use. The default 0 means to use threads equal
58+
to the number of cores.
59+
60+
cache\_capacity is the size of the LRU cache rust uses to cache connection objects. Here a connection
61+
means a unique combination of destination URL, credentials, and per-connection configuration such as
62+
timeouts; it does not mean an HTTP connection.
63+
64+
cache\_ttl\_secs is the time-to-live in seconds for the rust connection cache.
65+
66+
cache\_tti\_secs is the time in seconds that a connection can be idle before it is removed from the
67+
rust cache.
68+
69+
multipart\_put\_threshold is the size in bytes for which any put request over this size will use a
70+
multipart upload. The put part size is determined by the rust object\_store implementation, which
71+
uses 10MB.
72+
73+
multipart\_get\_threshold and multipart\_get\_part\_size configure automatic multipart gets. The part
74+
size can be greater than the threshold without breaking anything, but it may not make sense to do so.
75+
The default 8MB for these values was borrowed from CloudStore.jl.
76+
4477
## Design
4578

4679
#### Packaging

src/RustyObjectStore.jl

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,21 +45,34 @@ $TYPEDFIELDS
4545
cache_ttl_secs::Culonglong
4646
"The time-to-idle in seconds for entries in the client cache"
4747
cache_tti_secs::Culonglong
48+
"Put requests with a size in bytes greater than this will use multipart operations"
49+
multipart_put_threshold::Culonglong
50+
"Get requests with a size in bytes greater than this will use multipart operations"
51+
multipart_get_threshold::Culonglong
52+
"The size in bytes for each part of multipart get operations"
53+
multipart_get_part_size::Culonglong
54+
4855
end
4956

5057
function Base.show(io::IO, config::StaticConfig)
5158
print(io, "StaticConfig("),
5259
print(io, "n_threads=", Int(config.n_threads), ",")
5360
print(io, "cache_capacity=", Int(config.cache_capacity), ",")
5461
print(io, "cache_ttl_secs=", Int(config.cache_ttl_secs), ",")
55-
print(io, "cache_tti_secs=", Int(config.cache_tti_secs), ")")
62+
print(io, "cache_tti_secs=", Int(config.cache_tti_secs), ",")
63+
print(io, "multipart_put_threshold=", Int(config.multipart_put_threshold), ",")
64+
print(io, "multipart_get_threshold=", Int(config.multipart_get_threshold), ",")
65+
print(io, "multipart_get_part_size=", Int(config.multipart_get_part_size), ")")
5666
end
5767

5868
const DEFAULT_CONFIG = StaticConfig(
5969
n_threads=0,
6070
cache_capacity=20,
6171
cache_ttl_secs=30 * 60,
62-
cache_tti_secs=5 * 60
72+
cache_tti_secs=5 * 60,
73+
multipart_put_threshold=10 * 1024 * 1024,
74+
multipart_get_threshold=8 * 1024 * 1024,
75+
multipart_get_part_size=8 * 1024 * 1024
6376
)
6477

6578
const _OBJECT_STORE_STARTED = Ref(false)

test/azure_blobs_exception_tests.jl

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,26 @@
3535
end
3636
end
3737

38+
@testset "Insufficient output buffer size multipart" begin
39+
input = "1,2,3,4,5,6,7,8,9,1\n" ^ 1_000_000
40+
buffer = Vector{UInt8}(undef, 20_000_000)
41+
@assert sizeof(input) == 20_000_000
42+
@assert sizeof(buffer) == sizeof(input)
43+
44+
nbytes_written = put_object(codeunits(input), "test100B.csv", config)
45+
@test nbytes_written == 20_000_000
46+
47+
try
48+
# Buffer is over multipart threshold but too small for object
49+
buffer = Vector{UInt8}(undef, 10_000_000)
50+
nbytes_read = get_object!(buffer, "test100B.csv", config)
51+
@test false # Should have thrown an error
52+
catch err
53+
@test err isa RustyObjectStore.GetException
54+
@test err.msg == "failed to process get with error: Supplied buffer was too small"
55+
end
56+
end
57+
3858
@testset "Malformed credentials" begin
3959
input = "1,2,3,4,5,6,7,8,9,1\n" ^ 5
4060
buffer = Vector{UInt8}(undef, 100)

test/basic_unified_tests.jl

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ function run_read_write_test_cases(read_config::AbstractConfig, write_config::Ab
6767
@test String(buffer[1:nbytes_read]) == input
6868
end
6969

70-
# Large files should eventually use multipart upload / download requests
70+
# Large files should use multipart upload / download requests
7171
@testset "20MB file, 20MB buffer" begin
7272
input = "1,2,3,4,5,6,7,8,9,1\n" ^ 1_000_000
7373
buffer = Vector{UInt8}(undef, 20_000_000)
@@ -93,6 +93,20 @@ function run_read_write_test_cases(read_config::AbstractConfig, write_config::Ab
9393
@test nbytes_read == 20_000_000
9494
@test String(buffer[1:nbytes_read]) == input
9595
end
96+
97+
@testset "1MB file, 20MB buffer" begin
98+
input = "1,2,3,4,5,6,7,8,9,1\n" ^ 50_000
99+
100+
nbytes_written = put_object(codeunits(input), "test100B.csv", write_config)
101+
@test nbytes_written == 1_000_000
102+
103+
# Edge case for multpart download, file is less than threshold but buffer is greater
104+
buffer = Vector{UInt8}(undef, 20_000_000)
105+
nbytes_read = get_object!(buffer, "test100B.csv", read_config)
106+
@test nbytes_read == 1_000_000
107+
@test String(buffer[1:nbytes_read]) == input
108+
end
109+
96110
end
97111
end # @testsetup
98112

test/common_testsetup.jl

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,13 @@
11
@testsetup module InitializeObjectStore
22
using RustyObjectStore
3-
init_object_store()
3+
test_config = StaticConfig(
4+
n_threads=0,
5+
cache_capacity=20,
6+
cache_ttl_secs=30 * 60,
7+
cache_tti_secs=5 * 60,
8+
multipart_put_threshold=8 * 1024 * 1024,
9+
multipart_get_threshold=8 * 1024 * 1024,
10+
multipart_get_part_size=8 * 1024 * 1024
11+
)
12+
init_object_store(test_config)
413
end

0 commit comments

Comments
 (0)