diff --git a/Project.toml b/Project.toml index b4c4590..4940ee8 100644 --- a/Project.toml +++ b/Project.toml @@ -13,6 +13,7 @@ PyCall = "438e738f-606a-5dbb-bf0a-cddfbfd45ab0" PyPlot = "d330b81b-6aea-500a-939a-2ce795aea3ee" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" SimpleTraits = "699a6c99-e7fa-54fc-8d76-47d257e15c1d" +SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" StructTypes = "856f2bd8-1eba-4b0a-8007-ebc267875bd4" @@ -24,6 +25,7 @@ JSON3 = "^1.0.1" PyCall = "^1.91.2" PyPlot = "^2.8.2" SimpleTraits = "^0.9.4" +SparseArrays = "^1.0.0" StatsBase = "^0.34.0" StructTypes = "^1.0.1" julia = "^1.0.0" diff --git a/src/SimpleHypergraphs.jl b/src/SimpleHypergraphs.jl index c27e022..8f5357e 100644 --- a/src/SimpleHypergraphs.jl +++ b/src/SimpleHypergraphs.jl @@ -9,6 +9,7 @@ using PyPlot using JSON3 using Random using LinearAlgebra +using SparseArrays using SimpleTraits export AbstractHypergraph, AbstractSimpleHypergraph @@ -33,14 +34,18 @@ export modularity export randompartition export AbstractCommunityFinder, CFModularityRandom, CFModularityCNMLike, CFLabelPropagationFinder export findcommunities +export quad_clustering_coefficient export nhv, nhe export random_walk export get_connected_components export conductance -export AbstractDistance, SnodeDistanceDijkstra, SedgeDistanceDijkstra +export AbstractDistance +export SnodeDistanceDijkstra, SedgeDistanceDijkstra, SnodeDistanceBFS, SedgeDistanceBFS export distance +# export SparseHypergraphView + export HyperNetX, GraphBased export draw @@ -82,12 +87,14 @@ include("models/bipartite.jl") include("models/twosection.jl") include("models/random-models.jl") include("models/dual.jl") +# include("models/sparse.jl") include("algorithms/conductance.jl") include("algorithms/distance.jl") include("algorithms/community/modularity.jl") include("algorithms/community/label-propagation.jl") +include("algorithms/community/clustering.jl") include("viz/drawing.jl") include("viz/widget.jl") diff --git a/src/algorithms/community/clustering.jl b/src/algorithms/community/clustering.jl new file mode 100644 index 0000000..4acf031 --- /dev/null +++ b/src/algorithms/community/clustering.jl @@ -0,0 +1,64 @@ +function _num_quads(hg::H, i::Int) where {H <: AbstractSimpleHypergraph} + quads = 0 + nv = nhv(hg) + ne = nhe(hg) + # TODO: there must be a better implementation + for α in 1:ne + for β in α+1:ne + for j in 1:nv + if i == j + continue + end + + if !(isnothing(hg[i,α]) || isnothing(hg[i,β]) || isnothing(hg[j,α]) || isnothing(hg[j,β])) + quads += 1 + end + end + end + end + quads +end + +function _max_num_quads(hg::H, i::Int) where {H <: AbstractSimpleHypergraph} + ne = nhe(hg) + he_degrees = length.(hg.he2v) + # TODO: there must be a better implementation + qmax = 0 + for α in 1:ne + for β in α+1:ne + if !(isnothing(hg[i,α]) || isnothing(hg[i,β])) + qmax += (min(he_degrees[α], he_degrees[β]) - 1) + end + end + end + qmax +end + + +""" + quad_clustering_coefficient(hg::H, i::Int) where {H <: AbstractSimpleHypergraph} + quad_clustering_coefficient(hg::H) where {H <: AbstractSimpleHypergraph} + + Implements the "quad clustering coefficient" (QCC), as described in: + Ha, Neri, and Annibale, Chaos 34, 043102 (2024), DOI: 10.1063/5.0188246 + + A *quad* is the shortest simple cycle in a hypergraph, consisting of two vertices `i` and `j` that are both + incident on the same two hyperedges `α` and `β`. The QCC is a density, describing the fraction of all possible + "quads" a particular vertex `i` participates in. It is always true that `0 <= QCC(inc, i) <= 1`, where `inc` is + the *incidence matrix* of a hypergraph `hg`. Note that, if a vertex is incident on less than two hyperedges, its + QCC must be 0. +""" +function quad_clustering_coefficient(hg::H, i::Int) where {H <: AbstractSimpleHypergraph} + if length(hg.v2he[i]) < 2 + return 0.0 + end + + q = _num_quads(hg, i) + qmax = _max_num_quads(hg, i) + + return q / qmax +end + +function quad_clustering_coefficient(hg::H) where {H <: AbstractSimpleHypergraph} + return [quad_clustering_coefficient(hg, i) for i in 1:nhv(hg)] +end \ No newline at end of file diff --git a/src/algorithms/distance.jl b/src/algorithms/distance.jl index c202342..5b6d3d0 100644 --- a/src/algorithms/distance.jl +++ b/src/algorithms/distance.jl @@ -33,16 +33,16 @@ end """ - distance(h::H, distance_method::SnodeDistanceDijkstra) where {H<:AbstractSimpleHypergraph} +distance(h::H, distance_method::SnodeDistanceDijkstra) where {H<:AbstractSimpleHypergraph} -Return the shortest `distance_method.s`-walk distance between the `distance_method.source_node` and -the node `distance_method.target_node` in the hypergraph `h`. +Return the shortest `distance_method.s`-walk distance between the `distance_method.source_node` and the node +`distance_method.target_node` in the hypergraph `h` using Dijkstra's algorithm (`SnodeDistanceDijkstra`). NOTE -The concepts of `s`-distance and `s`-walk have been defined in the -Python library [HyperNetX](https://github.com/pnnl/HyperNetX) +The concepts of `s`-distance and `s`-walk have been defined in the Python library +[HyperNetX](https://github.com/pnnl/HyperNetX). -From [HyperNetX](https://pnnl.github.io/HyperNetX/build/_modules/classes/hypergraph.html#Hypergraph.distance) +From [HyperNetX](https://pnnl.github.io/HyperNetX/build/_modules/classes/hypergraph.html#Hypergraph.distance): The `s`-distance is the shortest `s`-walk length between two nodes. An `s`-walk between nodes is a sequence of nodes that pairwise share at least `s` edges. The length of the shortest `s`-walk is 1 less than @@ -61,14 +61,14 @@ end """ distance(h::H, distance_method::SedgeDistanceDijkstra) where {H<:AbstractSimpleHypergraph} -Return the shortest `distance_method.s`-walk distance between the `distance_method.source_edge` and -the node `distance_method.target_edge` in the hypergraph `h`. +Return the shortest `distance_method.s`-walk distance between the `distance_method.source_edge` and the node +`distance_method.target_edge` in the hypergraph `h` using Dijkstra's algorithm (`SedgeDistanceDijkstra`). NOTE -The concepts of `s`-distance and `s`-walk have been defined in the -Python library [HyperNetX](https://github.com/pnnl/HyperNetX) +The concepts of `s`-distance and `s`-walk have been defined in the Python library +[HyperNetX](https://github.com/pnnl/HyperNetX) -From [HyperNetX](https://pnnl.github.io/HyperNetX/build/_modules/classes/hypergraph.html#Hypergraph.edge_distance) +From [HyperNetX](https://pnnl.github.io/HyperNetX/build/_modules/classes/hypergraph.html#Hypergraph.edge_distance): The `s`-distance is the shortest `s`-walk length between the edges. An `s`-walk between edges is a sequence of edges such that consecutive pairwise edges intersect in at least `s` nodes. The length of the shortest `s`-walk is 1 less than @@ -83,5 +83,38 @@ function distance(h::H, distance_method::SedgeDistanceDijkstra) where {H<:Abstra dj.dists[distance_method.target_edge] end +""" + diameter(h::H, distance_method::SnodeDistanceDijkstra) where {H<:AbstractSimpleHypergraph}} + + Return the diameter of a hypergraph `h` (maximum distance between any two nodes) based on Dijkstra's algorithm + (using a weighted `s`-walk) applied starting from each node in `h`. +""" +function Graphs.diameter(h::H, distance_method::SnodeDistanceDijkstra) where {H<:AbstractSimpleHypergraph} + A = adjacency_matrix(h; s=distance_method.s) + g = Graphs.Graph(A) + nv = nhv(h) + dist_mat = zeros(Int, nv, nv) + for i in 1:nv + dist_mat[i, :] .= Graphs.dijkstra_shortest_paths(g, i).dists + end + + maximum(dist_mat) +end + +""" + diameter(h::H, distance_method::SnodeDistanceDijkstra) where {H<:AbstractSimpleHypergraph}} -# TODO: distance in a directed hypergraph \ No newline at end of file + Return the diameter of a hypergraph `h` (maximum distance between any two edges) based Dijkstra's algorithm + (using a weighted `s`-walk) applied starting from each edge in `h`. +""" +function Graphs.diameter(h::H, distance_method::SedgeDistanceDijkstra) where {H<:AbstractSimpleHypergraph} + A = edge_adjacency_matrix(h; s=distance_method.s) + g = Graphs.Graph(A) + ne = nhe(h) + dist_mat = zeros(Int, ne, ne) + for i in 1:ne + dist_mat[i, :] .= Graphs.dijkstra_shortest_paths(g, i).dists + end + + maximum(dist_mat) +end \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index 1537cb6..77dec82 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -294,6 +294,41 @@ end; @test nhv(H∂) == 20 end; +@testset "SimpleHypergraphs quad clustering coefficient" begin + m1 = [ + true true + true nothing + true nothing + nothing true + nothing true + nothing true + ] + hg1 = Hypergraph(m1) + @test quad_clustering_coefficient(hg1) == zeros(6) + @test quad_clustering_coefficient(hg1, 1) == 0.0 + + m2 = [ + true true + true nothing + true true + nothing true + nothing true + ] + hg2 = Hypergraph(m2) + @test quad_clustering_coefficient(hg2, 1) == 0.5 + @test quad_clustering_coefficient(hg2, 2) == 0.0 + @test quad_clustering_coefficient(hg2) == [0.5, 0.0, 0.5, 0.0, 0.0] + + m3 = [ + true true + true true + true true + nothing true + ] + hg3 = Hypergraph(m3) + @test quad_clustering_coefficient(hg3) == [1.0, 1.0, 1.0, 0.0] +end + @testset "SimpleHypergraphs modularity " begin Random.seed!(1234); @@ -591,4 +626,8 @@ end; @test distance(h, SedgeDistanceDijkstra(1, 3, 1)) == 1 @test distance(h, SedgeDistanceDijkstra(2, 3, 3)) == 1 @test distance(h, SedgeDistanceDijkstra(1, 3, 3)) == typemax(Int) + + @test Graphs.diameter(h, SnodeDistanceDijkstra(1,1,1)) == 2 + @test Graphs.diameter(h, SnodeDistanceDijkstra(1,1,2)) == typemax(Int) + @test Graphs.diameter(h, SedgeDistanceDijkstra(1,1,1)) == 1 end;