From 1cb8a718f30604c99e50654dc47524f10b1fdf9e Mon Sep 17 00:00:00 2001 From: huiyuxie Date: Tue, 29 Jul 2025 19:14:06 -0400 Subject: [PATCH 1/6] First draft --- benchmark.jl | 28 ++++++++++++++++++++++++++++ src/array.jl | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+) create mode 100644 benchmark.jl diff --git a/benchmark.jl b/benchmark.jl new file mode 100644 index 0000000000..d85660f68b --- /dev/null +++ b/benchmark.jl @@ -0,0 +1,28 @@ +using CUDA +using Test + +a = CuArray([1, 2, 3, 4, 5, 6]) + +CUDA.fresize!(a, 2) +@test length(a) == 2 +@test Array(a) == [1, 2] + +CUDA.fresize!(a, 5) +@test length(a) == 5 +# @test Array(a)[1:3] == [1, 2, 3] +Array(a) + +# CUDA.fresize!(a, 2) +# @test length(a) == 2 +# @test Array(a)[1:2] == [1, 2] + +# # we should be able to resize an unsafe_wrapped array too, as it replaces the buffer +# b = unsafe_wrap(CuArray{Int}, pointer(a), 2) +# CUDA.fresize!(b, 3) +# @test length(b) == 3 +# @test Array(b)[1:2] == [1, 2] + +# b = CuArray{Int}(undef, 0) +# @test length(b) == 0 +# CUDA.fresize!(b, 1) +# @test length(b) == 1 \ No newline at end of file diff --git a/src/array.jl b/src/array.jl index d66396bfce..7c222c60c1 100644 --- a/src/array.jl +++ b/src/array.jl @@ -915,3 +915,39 @@ function Base.resize!(A::CuVector{T}, n::Integer) where T A end + + +function fresize!(A::CuVector{T}, n::Integer) where T + n == length(A) && return A + + # how to better choose the new size? + if n > length(A) || n < length(A) / 2 + len = n > length(A) ? max(n, 2 * length(A)) : n + + maxsize = len * aligned_sizeof(T) + bufsize = if isbitstype(T) + maxsize + else + # type tag array past the data + maxsize + n + end + + new_data = context!(context(A)) do + mem = pool_alloc(memory_type(A), bufsize) + ptr = convert(CuPtr{T}, mem) + m = min(length(A), n) + if m > 0 + GC.@preserve A unsafe_copyto!(ptr, pointer(A), m) + end + DataRef(pool_free, mem) + end + unsafe_free!(A) + A.data = new_data + A.maxsize = maxsize + end + + A.dims = (n,) + A.offset = 0 + + A +end \ No newline at end of file From 9d607b120bf503431eb7f39a094d494fa3a4cd43 Mon Sep 17 00:00:00 2001 From: huiyuxie Date: Wed, 30 Jul 2025 00:03:46 -0400 Subject: [PATCH 2/6] Test and benchmark --- src/array.jl | 9 ++++---- test/base/array.jl | 57 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 61 insertions(+), 5 deletions(-) diff --git a/src/array.jl b/src/array.jl index 7c222c60c1..ab16c87893 100644 --- a/src/array.jl +++ b/src/array.jl @@ -916,8 +916,8 @@ function Base.resize!(A::CuVector{T}, n::Integer) where T A end - -function fresize!(A::CuVector{T}, n::Integer) where T +# new version of resizing +function new_resize!(A::CuVector{T}, n::Integer) where T n == length(A) && return A # how to better choose the new size? @@ -929,7 +929,7 @@ function fresize!(A::CuVector{T}, n::Integer) where T maxsize else # type tag array past the data - maxsize + n + maxsize + len end new_data = context!(context(A)) do @@ -944,10 +944,9 @@ function fresize!(A::CuVector{T}, n::Integer) where T unsafe_free!(A) A.data = new_data A.maxsize = maxsize + A.offset = 0 end A.dims = (n,) - A.offset = 0 - A end \ No newline at end of file diff --git a/test/base/array.jl b/test/base/array.jl index 51fb2fc219..e4461c14ba 100644 --- a/test/base/array.jl +++ b/test/base/array.jl @@ -574,6 +574,63 @@ end @test length(b) == 0 resize!(b, 1) @test length(b) == 1 + + # new resizing + a = CuArray([1, 2, 3, 4, 5, 6]) + CUDA.new_resize!(a, 6) + @test length(a) == 6 + @test Array(a) == [1, 2, 3, 4, 5, 6] + + CUDA.new_resize!(a, 5) # cut less than half + @test length(a) == 5 + @test Array(a) == [1, 2, 3, 4, 5] + @test a.maxsize == 6 * sizeof(eltype(a)) + + CUDA.new_resize!(a, 2) # cut more than half + @test length(a) == 2 + @test Array(a) == [1, 2] + @test a.maxsize == 2 * sizeof(eltype(a)) + + CUDA.new_resize!(a, 1) # cut to half + @test length(a) == 1 + @test Array(a) == [1] + @test a.maxsize == 2 * sizeof(eltype(a)) + + CUDA.new_resize!(a, 2) # double the size + @test length(a) == 2 + @test Array(a)[1:1] == [1] + @test a.maxsize == 2 * sizeof(eltype(a)) + + CUDA.new_resize!(a, 3) # add less than half + @test length(a) == 3 + @test Array(a)[1:1] == [1] + @test a.maxsize == 4 * sizeof(eltype(a)) + + CUDA.new_resize!(a, 7) # add more than half + @test length(a) == 7 + @test Array(a)[1:1] == [1] + @test a.maxsize == 7 * sizeof(eltype(a)) + + a = CuArray([1, 2]) + # resizing an unsafe_wrapped array + b = unsafe_wrap(CuArray{Int}, pointer(a), 2) + CUDA.new_resize!(b, 3) + @test length(b) == 3 + @test Array(b)[1:2] == [1, 2] + @test b.maxsize == 4 * sizeof(eltype(b)) + + # corner cases + b = CuArray{Int}(undef, 0) + @test length(b) == 0 + CUDA.new_resize!(b, 1) + @test length(b) == 1 + @test b.maxsize == 1 * sizeof(eltype(b)) + + b = CuArray{Int}(undef, 1) + @test length(b) == 1 + CUDA.new_resize!(b, 0) + @test length(b) == 0 + @test b.maxsize == 0 * sizeof(eltype(b)) end @testset "aliasing" begin From 6197076b04b3f13b5c0760b6e0b9fca95c06fc00 Mon Sep 17 00:00:00 2001 From: huiyuxie Date: Wed, 30 Jul 2025 00:05:39 -0400 Subject: [PATCH 3/6] Remove benchmark --- benchmark.jl | 28 ---------------------------- 1 file changed, 28 deletions(-) delete mode 100644 benchmark.jl diff --git a/benchmark.jl b/benchmark.jl deleted file mode 100644 index d85660f68b..0000000000 --- a/benchmark.jl +++ /dev/null @@ -1,28 +0,0 @@ -using CUDA -using Test - -a = CuArray([1, 2, 3, 4, 5, 6]) - -CUDA.fresize!(a, 2) -@test length(a) == 2 -@test Array(a) == [1, 2] - -CUDA.fresize!(a, 5) -@test length(a) == 5 -# @test Array(a)[1:3] == [1, 2, 3] -Array(a) - -# CUDA.fresize!(a, 2) -# @test length(a) == 2 -# @test Array(a)[1:2] == [1, 2] - -# # we should be able to resize an unsafe_wrapped array too, as it replaces the buffer -# b = unsafe_wrap(CuArray{Int}, pointer(a), 2) -# CUDA.fresize!(b, 3) -# @test length(b) == 3 -# @test Array(b)[1:2] == [1, 2] - -# b = CuArray{Int}(undef, 0) -# @test length(b) == 0 -# CUDA.fresize!(b, 1) -# @test length(b) == 1 \ No newline at end of file From ac9cf4a2338c8b196450f669821426ebccd1a321 Mon Sep 17 00:00:00 2001 From: huiyuxie Date: Wed, 30 Jul 2025 00:06:57 -0400 Subject: [PATCH 4/6] Fix comments --- src/array.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/array.jl b/src/array.jl index ab16c87893..591cc570bc 100644 --- a/src/array.jl +++ b/src/array.jl @@ -880,8 +880,8 @@ Base.unsafe_convert(::Type{CuPtr{T}}, A::PermutedDimsArray) where {T} = resize!(a::CuVector, n::Integer) Resize `a` to contain `n` elements. If `n` is smaller than the current collection length, -the first `n` elements will be retained. If `n` is larger, the new elements are not -guaranteed to be initialized. +the first `n` elements will be retained. If `n` is larger, the new elements are initialized +with undefined values. """ function Base.resize!(A::CuVector{T}, n::Integer) where T n == length(A) && return A From 1f4b7da54e74cbdd992987cb929daa582f34e7a1 Mon Sep 17 00:00:00 2001 From: huiyuxie Date: Tue, 5 Aug 2025 14:46:23 -0400 Subject: [PATCH 5/6] Update --- src/array.jl | 93 ++++++++++++++++------------- test/base/array.jl | 143 ++++++++++++++++++++------------------------- 2 files changed, 113 insertions(+), 123 deletions(-) diff --git a/src/array.jl b/src/array.jl index 591cc570bc..53d97ece53 100644 --- a/src/array.jl +++ b/src/array.jl @@ -876,6 +876,8 @@ Base.unsafe_convert(::Type{CuPtr{T}}, A::PermutedDimsArray) where {T} = ## resizing +const RESIZE_THRESHOLD = 10 * 1024^2 # 10 MiB +const RESIZE_INCREMENT = 1 * 1024^2 # 1 MiB """ resize!(a::CuVector, n::Integer) @@ -886,51 +888,25 @@ with undefined values. function Base.resize!(A::CuVector{T}, n::Integer) where T n == length(A) && return A - # TODO: add additional space to allow for quicker resizing - maxsize = n * aligned_sizeof(T) - bufsize = if isbitstype(T) - maxsize - else - # type tag array past the data - maxsize + n - end + cap = A.maxsize ÷ aligned_sizeof(T) - # replace the data with a new one. this 'unshares' the array. - # as a result, we can safely support resizing unowned buffers. - new_data = context!(context(A)) do - mem = pool_alloc(memory_type(A), bufsize) - ptr = convert(CuPtr{T}, mem) - m = min(length(A), n) - if m > 0 - GC.@preserve A unsafe_copyto!(ptr, pointer(A), m) + # do nothing when new length is smaller than maxsize + if n > cap # n > length(A) + + # if maxsize is larger than 10 MiB + if A.maxsize > RESIZE_THRESHOLD + len = max(cap + RESIZE_INCREMENT ÷ aligned_sizeof(T), n) # add at least 1 MiB + else + len = max(n, 2 * length(A)) end - DataRef(pool_free, mem) - end - unsafe_free!(A) - - A.data = new_data - A.dims = (n,) - A.maxsize = maxsize - A.offset = 0 - - A -end - -# new version of resizing -function new_resize!(A::CuVector{T}, n::Integer) where T - n == length(A) && return A - - # how to better choose the new size? - if n > length(A) || n < length(A) / 2 - len = n > length(A) ? max(n, 2 * length(A)) : n maxsize = len * aligned_sizeof(T) - bufsize = if isbitstype(T) - maxsize - else - # type tag array past the data - maxsize + len - end + bufsize = if isbitstype(T) + maxsize + else + # type tag array past the data + maxsize + len + end new_data = context!(context(A)) do mem = pool_alloc(memory_type(A), bufsize) @@ -949,4 +925,37 @@ function new_resize!(A::CuVector{T}, n::Integer) where T A.dims = (n,) A -end \ No newline at end of file +end + +# function Base.resize!(A::CuVector{T}, n::Integer) where T +# n == length(A) && return A + +# if n > length(A) || n < length(A) / 2 +# len = n > length(A) ? max(n, 2 * length(A)) : n + +# maxsize = len * aligned_sizeof(T) +# bufsize = if isbitstype(T) +# maxsize +# else +# # type tag array past the data +# maxsize + len +# end + +# new_data = context!(context(A)) do +# mem = pool_alloc(memory_type(A), bufsize) +# ptr = convert(CuPtr{T}, mem) +# m = min(length(A), n) +# if m > 0 +# GC.@preserve A unsafe_copyto!(ptr, pointer(A), m) +# end +# DataRef(pool_free, mem) +# end +# unsafe_free!(A) +# A.data = new_data +# A.maxsize = maxsize +# A.offset = 0 +# end + +# A.dims = (n,) +# A +# end \ No newline at end of file diff --git a/test/base/array.jl b/test/base/array.jl index e4461c14ba..0b8c8ffa0f 100644 --- a/test/base/array.jl +++ b/test/base/array.jl @@ -550,87 +550,68 @@ end end @testset "resizing" begin - a = CuArray([1,2,3]) - - resize!(a, 3) - @test length(a) == 3 - @test Array(a) == [1,2,3] - - resize!(a, 5) - @test length(a) == 5 - @test Array(a)[1:3] == [1,2,3] - - resize!(a, 2) - @test length(a) == 2 - @test Array(a)[1:2] == [1,2] - - # we should be able to resize an unsafe_wrapped array too, as it replaces the buffer - b = unsafe_wrap(CuArray{Int}, pointer(a), 2) - resize!(b, 3) - @test length(b) == 3 - @test Array(b)[1:2] == [1,2] - - b = CuArray{Int}(undef, 0) - @test length(b) == 0 - resize!(b, 1) - @test length(b) == 1 - - # new resizing - a = CuArray([1, 2, 3, 4, 5, 6]) - CUDA.new_resize!(a, 6) - @test length(a) == 6 - @test Array(a) == [1, 2, 3, 4, 5, 6] - - CUDA.new_resize!(a, 5) # cut less than half - @test length(a) == 5 - @test Array(a) == [1, 2, 3, 4, 5] - @test a.maxsize == 6 * sizeof(eltype(a)) - - CUDA.new_resize!(a, 2) # cut more than half - @test length(a) == 2 - @test Array(a) == [1, 2] - @test a.maxsize == 2 * sizeof(eltype(a)) - - CUDA.new_resize!(a, 1) # cut to half - @test length(a) == 1 - @test Array(a) == [1] - @test a.maxsize == 2 * sizeof(eltype(a)) - - CUDA.new_resize!(a, 2) # double the size - @test length(a) == 2 - @test Array(a)[1:1] == [1] - @test a.maxsize == 2 * sizeof(eltype(a)) - - CUDA.new_resize!(a, 3) # add less than half - @test length(a) == 3 - @test Array(a)[1:1] == [1] - @test a.maxsize == 4 * sizeof(eltype(a)) - - CUDA.new_resize!(a, 7) # add more than half - @test length(a) == 7 - @test Array(a)[1:1] == [1] - @test a.maxsize == 7 * sizeof(eltype(a)) - - a = CuArray([1, 2]) - # resizing an unsafe_wrapped array - b = unsafe_wrap(CuArray{Int}, pointer(a), 2) - CUDA.new_resize!(b, 3) - @test length(b) == 3 - @test Array(b)[1:2] == [1, 2] - @test b.maxsize == 4 * sizeof(eltype(b)) - - # corner cases - b = CuArray{Int}(undef, 0) - @test length(b) == 0 - CUDA.new_resize!(b, 1) - @test length(b) == 1 - @test b.maxsize == 1 * sizeof(eltype(b)) - - b = CuArray{Int}(undef, 1) - @test length(b) == 1 - CUDA.new_resize!(b, 0) - @test length(b) == 0 - @test b.maxsize == 0 * sizeof(eltype(b)) + # 1) small arrays (<10 MiB): should still use doubling policy + a = CuArray([1, 2, 3]) + + # reallocation (add less than half) + CUDA.resize!(a, 4) + @test length(a) == 4 + @test Array(a)[1:3] == [1, 2, 3] + @test a.maxsize == max(4, 2*3) * sizeof(eltype(a)) + + # no reallocation + CUDA.resize!(a, 5) + @test length(a) == 5 + @test Array(a)[1:3] == [1, 2, 3] + @test a.maxsize == 6 * sizeof(eltype(a)) + + # reallocation (add more than half) + CUDA.resize!(a, 12) + @test length(a) == 12 + @test Array(a)[1:3] == [1, 2, 3] + @test a.maxsize == max(12, 2*5) * sizeof(eltype(a)) + + # 2) large arrays (>10 MiB): should use 1 MiB increments + b = CUDA.fill(1, 2*1024^2) + maxsize = b.maxsize + + # should bump by exactly 1 MiB + CUDA.resize!(b, 2*1024^2 + 1) + @test length(b) == 2*1024^2 + 1 + @test b.maxsize == maxsize + CUDA.RESIZE_INCREMENT + @test all(Array(b)[1:2*1024^2] .== 1) + + b = CUDA.fill(1, 2*1024^2) + maxsize = b.maxsize + + # should bump greater than 1 MiB + new = CUDA.RESIZE_INCREMENT ÷ sizeof(eltype(b)) + CUDA.resize!(b, 2*1024^2 + new + 1) + @test length(b) == 2*1024^2 + new + 1 + @test b.maxsize > maxsize + CUDA.RESIZE_INCREMENT + @test all(Array(b)[1:2*1024^2] .== 1) + + b = CUDA.fill(1, 2*1024^2) + maxsize = b.maxsize + + # no reallocation + CUDA.resize!(b, 2*1024^2 - 1) + @test length(b) == 2*1024^2 - 1 + @test b.maxsize == maxsize + @test all(Array(b)[1:2*1024^2 - 1] .== 1) + + # 3) corner cases + c = CuArray{Int}(undef, 0) + @test length(c) == 0 + CUDA.resize!(c, 1) + @test length(c) == 1 + @test c.maxsize == 1 * sizeof(eltype(c)) + + c = CuArray{Int}(undef, 1) + @test length(c) == 1 + CUDA.resize!(c, 0) + @test length(c) == 0 + @test c.maxsize == 1 * sizeof(eltype(c)) end @testset "aliasing" begin From 81b67a413bb340b991e6a0680bfef89f54500fa4 Mon Sep 17 00:00:00 2001 From: huiyuxie Date: Tue, 5 Aug 2025 16:34:53 -0400 Subject: [PATCH 6/6] Fix comments --- src/array.jl | 33 --------------------------------- test/base/array.jl | 2 +- 2 files changed, 1 insertion(+), 34 deletions(-) diff --git a/src/array.jl b/src/array.jl index 53d97ece53..64a5685043 100644 --- a/src/array.jl +++ b/src/array.jl @@ -926,36 +926,3 @@ function Base.resize!(A::CuVector{T}, n::Integer) where T A.dims = (n,) A end - -# function Base.resize!(A::CuVector{T}, n::Integer) where T -# n == length(A) && return A - -# if n > length(A) || n < length(A) / 2 -# len = n > length(A) ? max(n, 2 * length(A)) : n - -# maxsize = len * aligned_sizeof(T) -# bufsize = if isbitstype(T) -# maxsize -# else -# # type tag array past the data -# maxsize + len -# end - -# new_data = context!(context(A)) do -# mem = pool_alloc(memory_type(A), bufsize) -# ptr = convert(CuPtr{T}, mem) -# m = min(length(A), n) -# if m > 0 -# GC.@preserve A unsafe_copyto!(ptr, pointer(A), m) -# end -# DataRef(pool_free, mem) -# end -# unsafe_free!(A) -# A.data = new_data -# A.maxsize = maxsize -# A.offset = 0 -# end - -# A.dims = (n,) -# A -# end \ No newline at end of file diff --git a/test/base/array.jl b/test/base/array.jl index 0b8c8ffa0f..a0573c73f8 100644 --- a/test/base/array.jl +++ b/test/base/array.jl @@ -550,7 +550,7 @@ end end @testset "resizing" begin - # 1) small arrays (<10 MiB): should still use doubling policy + # 1) small arrays (<=10 MiB): should still use doubling policy a = CuArray([1, 2, 3]) # reallocation (add less than half)