From 9add95a768a6e1ce4f1f4ed72f7cfc89096273ef Mon Sep 17 00:00:00 2001 From: Felix Cremer Date: Fri, 11 Apr 2025 12:09:09 +0200 Subject: [PATCH 1/6] Add test case for zero fill concatdiskarray --- test/runtests.jl | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/test/runtests.jl b/test/runtests.jl index 2218cc8..3e13d07 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -492,6 +492,34 @@ end @test slic == Float64[1, 2, 3, 4, 1, 2, 3, 4] end + @testset "Concat DiskArray with fill zero tiles" begin + a = zeros(Int, 3, 4) + b = ones(Int, 2, 4) + c = fill(2, 3, 5) + d = fill(0, 2, 5) + aconc = DiskArrays.ConcatDiskArray(reshape([a, b, c, 0], 2, 2)) + abase = [a c; b d] + @test all(isequal.(aconc[:, :], abase)) + @test all(isequal.(aconc[3:4, 4:6], abase[3:4, 4:6])) + ch = DiskArrays.eachchunk(aconc) + @test ch.chunks[1] == [1:3, 4:5] + @test ch.chunks[2] == [1:4, 5:9] + + a = ones(100, 50) + b = [rem(i.I[3], 5) == 0 ? 0 : a for i in CartesianIndices((1, 1, 100))] + b[1] = 0 + a_conc = DiskArrays.ConcatDiskArray(b) + ch = eachchunk(a_conc) + @test ch.chunks[1] == [1:100] + @test ch.chunks[2] == [1:50] + @test ch.chunks[3] === DiskArrays.RegularChunks(1, 0, 100) + + @test all(isequal.(a_conc[2, 2, 1:5], [0, 1.0, 1.0, 1.0, 0])) + @test all(isequal.(a_conc[end, end, 95:100], [0, 1.0, 1.0, 1.0, 1.0, 0])) + + end + + @testset "Concat DiskArray with missing tiles" begin a = zeros(Int, 3, 4) b = ones(Int, 2, 4) @@ -518,6 +546,7 @@ end @test all(isequal.(a_conc[end, end, 95:100], [missing, 1.0, 1.0, 1.0, 1.0, missing])) end + end @testset "Broadcast with length 1 and 0 final dim" begin From b744d4fa65905d99b24a782b728f229fd7abfcc0 Mon Sep 17 00:00:00 2001 From: Felix Cremer Date: Fri, 11 Apr 2025 23:37:49 +0200 Subject: [PATCH 2/6] Enable arbitrary single values in concatDiskArray --- src/cat.jl | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/src/cat.jl b/src/cat.jl index 9d700b0..457e17e 100644 --- a/src/cat.jl +++ b/src/cat.jl @@ -1,4 +1,3 @@ - """ ConcatDiskArray <: AbstractDiskArray @@ -15,7 +14,7 @@ Returned from `cat` on disk arrays. It is also useful on its own as it can easily concatenate an array of disk arrays. """ -struct ConcatDiskArray{T,N,P,C,HC,ID} <: AbstractDiskArray{T,N} +struct ConcatDiskArray{T,N,P,C,HC, ID} <: AbstractDiskArray{T,N} parents::P startinds::NTuple{N,Vector{Int}} size::NTuple{N,Int} @@ -24,23 +23,25 @@ struct ConcatDiskArray{T,N,P,C,HC,ID} <: AbstractDiskArray{T,N} innerdims::Val{ID} end -function ConcatDiskArray(arrays::AbstractArray{Union{<:AbstractArray,Missing}}) +function ConcatDiskArray(arrays::AbstractArray{Union{<:AbstractArray,Missing}}; fill=missing) et = Base.nonmissingtype(eltype(arrays)) - T = Union{Missing,eltype(et)} + T = promotetype(typeof(fill), eltype(et)) N = ndims(arrays) M = ndims(et) _ConcatDiskArray(arrays, T, Val(N), Val(M)) end + function infer_eltypes(arrays) foldl(arrays, init=(-1, Union{})) do (M, T), a - if ismissing(a) - (M, promote_type(Missing, T)) + if !isa(a, AbstractArray) + (M, promote_type(typeof(a), T)) else M == -1 || ndims(a) == M || throw(ArgumentError("All arrays to concatenate must have equal ndims")) (ndims(a), promote_type(eltype(a), T)) end end end + function ConcatDiskArray(arrays::AbstractArray{<:AbstractArray}) N = ndims(arrays) T = eltype(eltype(arrays)) @@ -90,7 +91,7 @@ function arraysize_and_startinds(arrays1) sizes = map(i -> zeros(Int, i), size(arrays1)) for i in CartesianIndices(arrays1) ai = arrays1[i] - ismissing(ai) && continue + !isa(ai, AbstractArray) && continue sizecur = extenddims(size(ai), size(arrays1), 1) foreach(sizecur, i.I, sizes) do si, ind, sizeall if sizeall[ind] == 0 @@ -123,10 +124,11 @@ function readblock!(a::ConcatDiskArray, aout, inds::AbstractUnitRange...) # Find affected blocks and indices in blocks _concat_diskarray_block_io(a, inds...) do outer_range, array_range, I vout = view(aout, outer_range...) - if ismissing(I) - vout .= missing - else + #@show size(vout) + if I isa CartesianIndex readblock!(a.parents[I], vout, array_range...) + else + vout .= I end end end @@ -170,10 +172,10 @@ function _concat_diskarray_block_io(f, a::ConcatDiskArray, inds...) #Shorten array range to shape of actual array array_range = ntuple(j -> array_range[j], ID) outer_range = fix_outerrangeshape(outer_range, array_range) - if ismissing(myar) - f(outer_range, array_range, missing) - else + if myar isa AbstractArray f(outer_range, array_range, cI) + else + f(outer_range, array_range, myar) end end end @@ -189,13 +191,13 @@ function concat_chunksize(parents) newchunks = map(s -> Vector{Union{RegularChunks,IrregularChunks}}(undef, s), size(parents)) for i in CartesianIndices(parents) array = parents[i] - ismissing(array) && continue + !isa(array,AbstractArray) && continue chunks = eachchunk(array) foreach(chunks.chunks, i.I, newchunks) do c, ind, newc if !isassigned(newc, ind) newc[ind] = c elseif c != newc[ind] - throw(ArgumentError("Chunk sizes don't forma grid")) + throw(ArgumentError("Chunk sizes don't form a grid")) end end end From 2284968c26fc142f5931b077195339a7088358fb Mon Sep 17 00:00:00 2001 From: Felix Cremer Date: Sat, 12 Apr 2025 22:27:11 +0200 Subject: [PATCH 3/6] Update src/cat.jl Co-authored-by: Rafael Schouten --- src/cat.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cat.jl b/src/cat.jl index 457e17e..b6ea31b 100644 --- a/src/cat.jl +++ b/src/cat.jl @@ -91,7 +91,7 @@ function arraysize_and_startinds(arrays1) sizes = map(i -> zeros(Int, i), size(arrays1)) for i in CartesianIndices(arrays1) ai = arrays1[i] - !isa(ai, AbstractArray) && continue + ai isa AbstractArray || continue sizecur = extenddims(size(ai), size(arrays1), 1) foreach(sizecur, i.I, sizes) do si, ind, sizeall if sizeall[ind] == 0 From eaadc95d0f47002087595dd0bef486f0187cc0a0 Mon Sep 17 00:00:00 2001 From: Felix Cremer Date: Sat, 12 Apr 2025 22:27:32 +0200 Subject: [PATCH 4/6] Update src/cat.jl Co-authored-by: Rafael Schouten --- src/cat.jl | 1 - 1 file changed, 1 deletion(-) diff --git a/src/cat.jl b/src/cat.jl index b6ea31b..26756d6 100644 --- a/src/cat.jl +++ b/src/cat.jl @@ -124,7 +124,6 @@ function readblock!(a::ConcatDiskArray, aout, inds::AbstractUnitRange...) # Find affected blocks and indices in blocks _concat_diskarray_block_io(a, inds...) do outer_range, array_range, I vout = view(aout, outer_range...) - #@show size(vout) if I isa CartesianIndex readblock!(a.parents[I], vout, array_range...) else From afe15cc7a32c82a44d06e2c672d2639583c178fb Mon Sep 17 00:00:00 2001 From: Felix Cremer Date: Sat, 12 Apr 2025 22:37:47 +0200 Subject: [PATCH 5/6] Apply review Co-authored-by: Rafael Schouten --- src/cat.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/cat.jl b/src/cat.jl index 26756d6..a4e2bb7 100644 --- a/src/cat.jl +++ b/src/cat.jl @@ -23,9 +23,9 @@ struct ConcatDiskArray{T,N,P,C,HC, ID} <: AbstractDiskArray{T,N} innerdims::Val{ID} end -function ConcatDiskArray(arrays::AbstractArray{Union{<:AbstractArray,Missing}}; fill=missing) +function ConcatDiskArray(arrays::AbstractArray{Union{<:AbstractArray,Missing}}) et = Base.nonmissingtype(eltype(arrays)) - T = promotetype(typeof(fill), eltype(et)) + T = Union{Missing,eltype(et)} N = ndims(arrays) M = ndims(et) _ConcatDiskArray(arrays, T, Val(N), Val(M)) @@ -190,7 +190,7 @@ function concat_chunksize(parents) newchunks = map(s -> Vector{Union{RegularChunks,IrregularChunks}}(undef, s), size(parents)) for i in CartesianIndices(parents) array = parents[i] - !isa(array,AbstractArray) && continue + array isa AbstractArray || continue chunks = eachchunk(array) foreach(chunks.chunks, i.I, newchunks) do c, ind, newc if !isassigned(newc, ind) From 6781eae0548131f7cc32132f4dd051f1710a00fa Mon Sep 17 00:00:00 2001 From: Felix Cremer Date: Tue, 13 May 2025 15:20:03 +0200 Subject: [PATCH 6/6] Wrap fill values in MissingTile for ConcatDiskArray This introduces a MissingTile type to wrap the values for ConcatDiskArray. This allows to use vectors as elements in the fillvalue. --- src/cat.jl | 28 ++++++++++++++++------------ src/subarray.jl | 2 +- test/runtests.jl | 46 +++++++++++++++++++++++++++++++++++++++------- 3 files changed, 56 insertions(+), 20 deletions(-) diff --git a/src/cat.jl b/src/cat.jl index a4e2bb7..682751e 100644 --- a/src/cat.jl +++ b/src/cat.jl @@ -14,7 +14,7 @@ Returned from `cat` on disk arrays. It is also useful on its own as it can easily concatenate an array of disk arrays. """ -struct ConcatDiskArray{T,N,P,C,HC, ID} <: AbstractDiskArray{T,N} +struct ConcatDiskArray{T,N,P,C,HC,ID} <: AbstractDiskArray{T,N} parents::P startinds::NTuple{N,Vector{Int}} size::NTuple{N,Int} @@ -33,14 +33,13 @@ end function infer_eltypes(arrays) foldl(arrays, init=(-1, Union{})) do (M, T), a - if !isa(a, AbstractArray) - (M, promote_type(typeof(a), T)) - else + if a isa AbstractArray M == -1 || ndims(a) == M || throw(ArgumentError("All arrays to concatenate must have equal ndims")) - (ndims(a), promote_type(eltype(a), T)) + M = ndims(a) + end + (M, promote_type(eltype(a), T)) end end -end function ConcatDiskArray(arrays::AbstractArray{<:AbstractArray}) N = ndims(arrays) @@ -79,6 +78,11 @@ function ConcatDiskArray(arrays1::AbstractArray, T, ::Val{D},::Val{ID}) where {D return ConcatDiskArray{T,D,typeof(arrays1),typeof(chunks),typeof(hc),ID}(arrays1, startinds, sizes, chunks, hc,Val(ID)) end +struct MissingTile{F} + fillvalue::F +end +Base.eltype(::Type{MissingTile{F}}) where F = F + function extenddims(a::Tuple{Vararg{Any,N}}, b::Tuple{Vararg{Any,M}}, fillval) where {N,M} length(a) > length(b) && error("Wrong") extenddims((a..., fillval), b, fillval) @@ -91,7 +95,7 @@ function arraysize_and_startinds(arrays1) sizes = map(i -> zeros(Int, i), size(arrays1)) for i in CartesianIndices(arrays1) ai = arrays1[i] - ai isa AbstractArray || continue + ai isa MissingTile && continue sizecur = extenddims(size(ai), size(arrays1), 1) foreach(sizecur, i.I, sizes) do si, ind, sizeall if sizeall[ind] == 0 @@ -127,7 +131,7 @@ function readblock!(a::ConcatDiskArray, aout, inds::AbstractUnitRange...) if I isa CartesianIndex readblock!(a.parents[I], vout, array_range...) else - vout .= I + vout .= (I.fillvalue,) end end end @@ -171,10 +175,10 @@ function _concat_diskarray_block_io(f, a::ConcatDiskArray, inds...) #Shorten array range to shape of actual array array_range = ntuple(j -> array_range[j], ID) outer_range = fix_outerrangeshape(outer_range, array_range) - if myar isa AbstractArray - f(outer_range, array_range, cI) - else + if myar isa MissingTile f(outer_range, array_range, myar) + else + f(outer_range, array_range, cI) end end end @@ -190,7 +194,7 @@ function concat_chunksize(parents) newchunks = map(s -> Vector{Union{RegularChunks,IrregularChunks}}(undef, s), size(parents)) for i in CartesianIndices(parents) array = parents[i] - array isa AbstractArray || continue + array isa MissingTile && continue chunks = eachchunk(array) foreach(chunks.chunks, i.I, newchunks) do c, ind, newc if !isassigned(newc, ind) diff --git a/src/subarray.jl b/src/subarray.jl index 847752a..76b282d 100644 --- a/src/subarray.jl +++ b/src/subarray.jl @@ -44,7 +44,7 @@ function eachchunk_view(::Chunked, vv) end eachchunk_view(::Unchunked, a) = estimate_chunksize(a) -# Implementaion macro +# Implementation macro macro implement_subarray(t) t = esc(t) diff --git a/test/runtests.jl b/test/runtests.jl index 3e13d07..3cb9e11 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -497,17 +497,18 @@ end b = ones(Int, 2, 4) c = fill(2, 3, 5) d = fill(0, 2, 5) - aconc = DiskArrays.ConcatDiskArray(reshape([a, b, c, 0], 2, 2)) + aconc = DiskArrays.ConcatDiskArray(reshape([a, b, c, DiskArrays.MissingTile(0)], 2, 2)) abase = [a c; b d] @test all(isequal.(aconc[:, :], abase)) @test all(isequal.(aconc[3:4, 4:6], abase[3:4, 4:6])) ch = DiskArrays.eachchunk(aconc) @test ch.chunks[1] == [1:3, 4:5] @test ch.chunks[2] == [1:4, 5:9] + @test eltype(aconc) == Int a = ones(100, 50) - b = [rem(i.I[3], 5) == 0 ? 0 : a for i in CartesianIndices((1, 1, 100))] - b[1] = 0 + b = [rem(i.I[3], 5) == 0 ? DiskArrays.MissingTile(0) : a for i in CartesianIndices((1, 1, 100))] + b[1] = DiskArrays.MissingTile(0) a_conc = DiskArrays.ConcatDiskArray(b) ch = eachchunk(a_conc) @test ch.chunks[1] == [1:100] @@ -525,17 +526,18 @@ end b = ones(Int, 2, 4) c = fill(2, 3, 5) d = fill(missing, 2, 5) - aconc = DiskArrays.ConcatDiskArray(reshape([a, b, c, missing], 2, 2)) + aconc = DiskArrays.ConcatDiskArray(reshape([a, b, c, DiskArrays.MissingTile(missing)], 2, 2)) abase = [a c; b d] @test all(isequal.(aconc[:, :], abase)) @test all(isequal.(aconc[3:4, 4:6], abase[3:4, 4:6])) ch = DiskArrays.eachchunk(aconc) @test ch.chunks[1] == [1:3, 4:5] @test ch.chunks[2] == [1:4, 5:9] + @test eltype(aconc) == Union{Int, Missing} a = ones(100, 50) - b = [rem(i.I[3], 5) == 0 ? missing : a for i in CartesianIndices((1, 1, 100))] - b[1] = missing + b = [rem(i.I[3], 5) == 0 ? DiskArrays.MissingTile(missing) : a for i in CartesianIndices((1, 1, 100))] + b[1] = DiskArrays.MissingTile(missing) a_conc = DiskArrays.ConcatDiskArray(b) ch = eachchunk(a_conc) @test ch.chunks[1] == [1:100] @@ -547,6 +549,34 @@ end end + @testset "Concat DiskArray with fill zero vector tiles" begin + a = fill([1,1], 3, 4) + b = fill([1,2], 2, 4) + c = fill([2,1], 3, 5) + d = fill([2,2], 2, 5) + aconc = DiskArrays.ConcatDiskArray(reshape([a, b, c, DiskArrays.MissingTile([2,2])], 2, 2)) + abase = [a c; b d] + @test all(isequal.(aconc[:, :], abase)) + @test all(isequal.(aconc[3:4, 4:6], abase[3:4, 4:6])) + ch = DiskArrays.eachchunk(aconc) + @test ch.chunks[1] == [1:3, 4:5] + @test ch.chunks[2] == [1:4, 5:9] + @test eltype(aconc) == Vector{Int} + + a = fill([1,1], 100, 50) + b = [rem(i.I[3], 5) == 0 ? DiskArrays.MissingTile([0,0]) : a for i in CartesianIndices((1, 1, 100))] + b[1] = DiskArrays.MissingTile([0,0]) + a_conc = DiskArrays.ConcatDiskArray(b) + ch = eachchunk(a_conc) + @test ch.chunks[1] == [1:100] + @test ch.chunks[2] == [1:50] + @test ch.chunks[3] === DiskArrays.RegularChunks(1, 0, 100) + + @test all(isequal.(a_conc[2, 2, 1:5], [[0,0], [1,1],[1,1] , [1,1], [0,0]])) + @test all(isequal.(a_conc[end, end, 95:100], [[0,0], [1,1], [1,1], [1,1],[1,1], [0,0]])) + + end + end @testset "Broadcast with length 1 and 0 final dim" begin @@ -958,8 +988,10 @@ struct TestArray{T,N} <: AbstractArray{T,N} end DiskArrays.@implement_array_methods TestArray DiskArrays.@implement_permutedims TestArray DiskArrays.@implement_subarray TestArray - DiskArrays.@implement_diskarray TestArray @test DiskArrays.isdisk(TestArray) == true + DiskArrays.@implement_diskarray TestArray2 + @test DiskArrays.isdisk(TestArray2) == true + end # issue #123