From bf66dd1113bfb6047519ae6324a980b5e55ef55b Mon Sep 17 00:00:00 2001 From: Matthew Sotoudeh Date: Sat, 22 Feb 2025 21:45:15 +0000 Subject: [PATCH 1/4] Fix splay tree amortized time, deletions Fixes two bugs in the splay tree implementation: 1. Not splaying on failed searches or redundant insertions can break the O(log n) amortized time guarantee. Fix: Always splay at the end of search_node! 2. Deletion did not check that the node returned from search_node! had the desired key, so trying to delete keys not in the splay tree could cause it to accidentally delete unrelated keys. Fix: Check node.data for the return value of search_node!, as is done already for other operations. I've also added a regression test for bug (2). --- src/DataStructures.jl | 2 +- src/splay_tree.jl | 20 +++++++++----------- test/test_splay_tree.jl | 32 +++++++++++++++++++++++++++----- 3 files changed, 37 insertions(+), 17 deletions(-) diff --git a/src/DataStructures.jl b/src/DataStructures.jl index 5e5d767ec..fd30f01f4 100644 --- a/src/DataStructures.jl +++ b/src/DataStructures.jl @@ -51,7 +51,7 @@ module DataStructures export DiBitVector export RBTree, search_node, minimum_node - export SplayTree, maximum_node + export SplayTree, search_node!, maximum_node export AVLTree, sorted_rank export SplayTree, maximum_node diff --git a/src/splay_tree.jl b/src/splay_tree.jl index e6e804516..4767e8b7e 100644 --- a/src/splay_tree.jl +++ b/src/splay_tree.jl @@ -126,7 +126,7 @@ function _join!(tree::SplayTree, s::Union{SplayTreeNode, Nothing}, t::Union{Spla end end -function search_node(tree::SplayTree{K}, d::K) where K +function search_node!(tree::SplayTree{K}, d::K) where K node = tree.root prev = nothing while node != nothing && node.data != d @@ -137,7 +137,9 @@ function search_node(tree::SplayTree{K}, d::K) where K node = node.leftChild end end - return (node == nothing) ? prev : node + last = (node == nothing) ? prev : node + (last == nothing) || splay!(tree, last) + return last end function Base.haskey(tree::SplayTree{K}, d::K) where K @@ -145,11 +147,9 @@ function Base.haskey(tree::SplayTree{K}, d::K) where K if node === nothing return false else - node = search_node(tree, d) + node = search_node!(tree, d) (node === nothing) && return false - is_found = (node.data == d) - is_found && splay!(tree, node) - return is_found + return (node.data == d) end end @@ -157,13 +157,11 @@ Base.in(key, tree::SplayTree) = haskey(tree, key) function Base.delete!(tree::SplayTree{K}, d::K) where K node = tree.root - x = search_node(tree, d) - (x == nothing) && return tree + x = search_node!(tree, d) + (x == nothing || x.data != d) && return tree t = nothing s = nothing - splay!(tree, x) - if x.rightChild !== nothing t = x.rightChild t.parent = nothing @@ -183,7 +181,7 @@ end function Base.push!(tree::SplayTree{K}, d0) where K d = convert(K, d0) - is_present = search_node(tree, d) + is_present = search_node!(tree, d) if (is_present !== nothing) && (is_present.data == d) return tree end diff --git a/test/test_splay_tree.jl b/test/test_splay_tree.jl index bcd404174..44801a845 100644 --- a/test/test_splay_tree.jl +++ b/test/test_splay_tree.jl @@ -42,6 +42,28 @@ @test length(t) == 100 end + @testset "deleting missing values" begin + t = SplayTree{Int}() + for i in 1:100 + push!(t, i) + end + for i in 101:200 + delete!(t, i) + end + + @test length(t) == 100 + + for i in 1:100 + @test haskey(t, i) + end + + for i in 101:200 + push!(t, i) + end + + @test length(t) == 200 + end + @testset "handling different cases of delete!" begin t2 = SplayTree() for i in 1:100000 @@ -84,16 +106,16 @@ @test !in('c', t4) end - @testset "search_node" begin + @testset "search_node!" begin t5 = SplayTree() for i in 1:32 push!(t5, i) end - n1 = search_node(t5, 21) + n1 = search_node!(t5, 21) @test n1.data == 21 - n2 = search_node(t5, 35) + n2 = search_node!(t5, 35) @test n2.data == 32 - n3 = search_node(t5, 0) + n3 = search_node!(t5, 0) @test n3.data == 1 end @@ -131,4 +153,4 @@ node = node.rightChild end end -end \ No newline at end of file +end From a4f5182fdc92bbf808c90112e05eeaf12bcf1be9 Mon Sep 17 00:00:00 2001 From: Matthew Sotoudeh Date: Sat, 22 Feb 2025 23:07:22 +0000 Subject: [PATCH 2/4] Add some basic tree benchmarks --- benchmark/bench_trees.jl | 69 ++++++++++++++++++++++++++++++++++++++++ benchmark/benchmarks.jl | 1 + src/DataStructures.jl | 1 - 3 files changed, 70 insertions(+), 1 deletion(-) create mode 100644 benchmark/bench_trees.jl diff --git a/benchmark/bench_trees.jl b/benchmark/bench_trees.jl new file mode 100644 index 000000000..547967d3e --- /dev/null +++ b/benchmark/bench_trees.jl @@ -0,0 +1,69 @@ +module BenchTrees + +using DataStructures +using BenchmarkTools +using Random + +suite = BenchmarkGroup() + +rand_setup = ( + Random.seed!(1234); + idx1 = rand(1:30000, 1000); + didx1 = rand(1:1000, 500); +) + +# insert a bunch of keys, search for all of them, delete half of them randomly, +# then search for all of them again. +function test_rand(T) + t = T{Int}() + for i in idx1 + push!(t, i) + end + for i in idx1 + haskey(t, i) + end + for i in didx1 + delete!(t, idx1[i]) + end + for i in idx1 + haskey(t, i) + end +end + +# insert 1, ..., N, then push 1 many times. tests a regression from an older +# splay tree implementation where splays didn't happen on redundant pushes. +function test_redundant(T, N=100000) + t = T{Int}() + for i in 1:N + push!(t, i) + end + for i in 1:N + push!(t, 1) + end +end + +# insert 1, ..., N, then access element 1 for N iterations. splay trees should +# perform best here. +function test_biased(T, N=100000) + t = T{Int}() + for i in 1:N + push!(t, i) + end + for _ in 1:N + haskey(t, 1) + end +end + +trees = Dict("splay" => SplayTree, "avl" => AVLTree, "rb" => RBTree) +for (T_name, T) in trees + suite[T_name]["rand"] = + @benchmarkable test_rand($(T)) setup=rand_setup + suite[T_name]["redundant"] = + @benchmarkable test_redundant($(T)) + suite[T_name]["biased"] = + @benchmarkable test_biased($T) +end + +end # module + +BenchTrees.suite diff --git a/benchmark/benchmarks.jl b/benchmark/benchmarks.jl index 38e03e632..2b4b52e2d 100644 --- a/benchmark/benchmarks.jl +++ b/benchmark/benchmarks.jl @@ -12,3 +12,4 @@ const SUITE = BenchmarkGroup() SUITE["heap"] = include("bench_heap.jl") SUITE["SparseIntSet"] = include("bench_sparse_int_set.jl") +SUITE["trees"] = include("bench_trees.jl") diff --git a/src/DataStructures.jl b/src/DataStructures.jl index fd30f01f4..76ebc7cfd 100644 --- a/src/DataStructures.jl +++ b/src/DataStructures.jl @@ -53,7 +53,6 @@ module DataStructures export RBTree, search_node, minimum_node export SplayTree, search_node!, maximum_node export AVLTree, sorted_rank - export SplayTree, maximum_node export findkey From 5f228eee6ee815c29905810a6b4fb47f0b9233b3 Mon Sep 17 00:00:00 2001 From: Matthew Sotoudeh Date: Fri, 21 Mar 2025 03:27:46 +0000 Subject: [PATCH 3/4] Undo renaming of splay tree search_node! command --- src/DataStructures.jl | 2 +- src/splay_tree.jl | 8 ++++---- test/test_splay_tree.jl | 8 ++++---- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/DataStructures.jl b/src/DataStructures.jl index 76ebc7cfd..6ca01a0dd 100644 --- a/src/DataStructures.jl +++ b/src/DataStructures.jl @@ -51,7 +51,7 @@ module DataStructures export DiBitVector export RBTree, search_node, minimum_node - export SplayTree, search_node!, maximum_node + export SplayTree, search_node, maximum_node export AVLTree, sorted_rank export findkey diff --git a/src/splay_tree.jl b/src/splay_tree.jl index 4767e8b7e..ecc60697f 100644 --- a/src/splay_tree.jl +++ b/src/splay_tree.jl @@ -126,7 +126,7 @@ function _join!(tree::SplayTree, s::Union{SplayTreeNode, Nothing}, t::Union{Spla end end -function search_node!(tree::SplayTree{K}, d::K) where K +function search_node(tree::SplayTree{K}, d::K) where K node = tree.root prev = nothing while node != nothing && node.data != d @@ -147,7 +147,7 @@ function Base.haskey(tree::SplayTree{K}, d::K) where K if node === nothing return false else - node = search_node!(tree, d) + node = search_node(tree, d) (node === nothing) && return false return (node.data == d) end @@ -157,7 +157,7 @@ Base.in(key, tree::SplayTree) = haskey(tree, key) function Base.delete!(tree::SplayTree{K}, d::K) where K node = tree.root - x = search_node!(tree, d) + x = search_node(tree, d) (x == nothing || x.data != d) && return tree t = nothing s = nothing @@ -181,7 +181,7 @@ end function Base.push!(tree::SplayTree{K}, d0) where K d = convert(K, d0) - is_present = search_node!(tree, d) + is_present = search_node(tree, d) if (is_present !== nothing) && (is_present.data == d) return tree end diff --git a/test/test_splay_tree.jl b/test/test_splay_tree.jl index 44801a845..cf993516e 100644 --- a/test/test_splay_tree.jl +++ b/test/test_splay_tree.jl @@ -106,16 +106,16 @@ @test !in('c', t4) end - @testset "search_node!" begin + @testset "search_node" begin t5 = SplayTree() for i in 1:32 push!(t5, i) end - n1 = search_node!(t5, 21) + n1 = search_node(t5, 21) @test n1.data == 21 - n2 = search_node!(t5, 35) + n2 = search_node(t5, 35) @test n2.data == 32 - n3 = search_node!(t5, 0) + n3 = search_node(t5, 0) @test n3.data == 1 end From fd2997df8c62df8e9e1b0ee29191cf9bc98ccdf3 Mon Sep 17 00:00:00 2001 From: Matthew Sotoudeh Date: Sat, 22 Mar 2025 19:36:14 +0000 Subject: [PATCH 4/4] comment about why we don't "!" --- src/splay_tree.jl | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/splay_tree.jl b/src/splay_tree.jl index ecc60697f..60a79e5d0 100644 --- a/src/splay_tree.jl +++ b/src/splay_tree.jl @@ -126,6 +126,11 @@ function _join!(tree::SplayTree, s::Union{SplayTreeNode, Nothing}, t::Union{Spla end end +# search_node must do a splay even in the case of failed searches to guarantee +# the O(log n) amortized time bound. This might change the structure of the +# tree, but does NOT change the user-visible state (has_key, in-order +# traversals, etc., all keep the same results before and after). Hence we don't +# mark it with a "!". function search_node(tree::SplayTree{K}, d::K) where K node = tree.root prev = nothing