diff --git a/src/Graphs.jl b/src/Graphs.jl index 86bc0946..8f20ec0d 100644 --- a/src/Graphs.jl +++ b/src/Graphs.jl @@ -210,6 +210,8 @@ export # connectivity connected_components, + connected_components!, + count_connected_components, strongly_connected_components, strongly_connected_components_kosaraju, strongly_connected_components_tarjan, diff --git a/src/connectivity.jl b/src/connectivity.jl index 18e0aca9..83a5ff3c 100644 --- a/src/connectivity.jl +++ b/src/connectivity.jl @@ -1,26 +1,33 @@ # Parts of this code were taken / derived from Graphs.jl. See LICENSE for # licensing details. """ - connected_components!(label, g) + connected_components!(label, g, [search_queue]) Fill `label` with the `id` of the connected component in the undirected graph `g` to which it belongs. Return a vector representing the component assigned to each vertex. The component value is the smallest vertex ID in the component. -### Performance +## Optional arguments +- `search_queue`, an empty `Vector{eltype(edgetype(g))}`, can be provided to avoid + reallocating this work array repeatedly on repeated calls of `connected_components!`. + If not provided, it is automatically instantiated. + +## Performance This algorithm is linear in the number of edges of the graph. """ -function connected_components!(label::AbstractVector, g::AbstractGraph{T}) where {T} +function connected_components!( + label::AbstractVector{T}, g::AbstractGraph{T}, search_queue::Vector{T}=Vector{T}() +) where {T} + empty!(search_queue) for u in vertices(g) label[u] != zero(T) && continue label[u] = u - Q = Vector{T}() - push!(Q, u) - while !isempty(Q) - src = popfirst!(Q) + push!(search_queue, u) + while !isempty(search_queue) + src = popfirst!(search_queue) for vertex in all_neighbors(g, src) if label[vertex] == zero(T) - push!(Q, vertex) + push!(search_queue, vertex) label[vertex] = u end end @@ -129,9 +136,74 @@ julia> is_connected(g) true ``` """ -function is_connected(g::AbstractGraph) +function is_connected(g::AbstractGraph{T}) where {T} mult = is_directed(g) ? 2 : 1 - return mult * ne(g) + 1 >= nv(g) && length(connected_components(g)) == 1 + if mult * ne(g) + 1 >= nv(g) + label = zeros(T, nv(g)) + connected_components!(label, g) + return allequal(label) + else + return false + end +end + +""" + count_connected_components( g, [label, search_queue]; reset_label::Bool=false) + +Return the number of connected components in `g`. + +Equivalent to `length(connected_components(g))` but uses fewer allocations by not +materializing the component vectors explicitly. + +## Optional arguments +Mutated work arrays, `label` and `search_queue` can be provided to avoid allocating these +arrays repeatedly on repeated calls of `count_connected_components`. +For `g :: AbstractGraph{T}`, `label` must be a zero-initialized `Vector{T}` of length +`nv(g)` and `search_queue` a `Vector{T}`. See also [`connected_components!`](@ref). + +## Keyword arguments +- `reset_label :: Bool` (default, `false`): if `true`, `label` is reset to a zero-vector + before returning. + +## Example +``` +julia> using Graphs + +julia> g = Graph(Edge.([1=>2, 2=>3, 3=>1, 4=>5, 5=>6, 6=>4, 7=>8])); + +length> connected_components(g) +3-element Vector{Vector{Int64}}: + [1, 2, 3] + [4, 5, 6] + [7, 8] + +julia> count_connected_components(g) +3 +``` +""" +function count_connected_components( + g::AbstractGraph{T}, + label::AbstractVector{T}=zeros(T, nv(g)), + search_queue::Vector{T}=Vector{T}(); + reset_label::Bool=false, +) where {T} + connected_components!(label, g, search_queue) + c = count_unique(label) + reset_label && fill!(label, zero(eltype(label))) + return c +end + +function count_unique(label::Vector{T}) where {T} + # effectively does `length(Set(label))` but faster, since `Set(label)` sizehints + # aggressively and assumes that most elements of `label` will be unique, which very + # rarely will be the case for caller `count_connected_components!` + seen = Set{T}() + for l in label + # faster than direct `push!(seen, l)` when `label` has few unique elements relative + # to `length(label)` + l ∉ seen && push!(seen, l) + end + return length(seen) end """ diff --git a/test/operators.jl b/test/operators.jl index bf4931eb..f849b5bc 100644 --- a/test/operators.jl +++ b/test/operators.jl @@ -268,6 +268,7 @@ for i in 3:4 @testset "Tensor Product: $g" for g in testgraphs(path_graph(i)) @test length(connected_components(tensor_product(g, g))) == 2 + @test count_connected_components(tensor_product(g, g)) == 2 end end diff --git a/test/spanningtrees/boruvka.jl b/test/spanningtrees/boruvka.jl index dfabbaeb..552b77be 100644 --- a/test/spanningtrees/boruvka.jl +++ b/test/spanningtrees/boruvka.jl @@ -21,14 +21,18 @@ g1t = GenericGraph(SimpleGraph(edges1)) @test res1.weight == cost_mst # acyclic graphs have n - c edges - @test nv(g1t) - length(connected_components(g1t)) == ne(g1t) + @test nv(g1t) - ne(g1t) == + length(connected_components(g1t)) == + count_connected_components(g1t) @test nv(g1t) == nv(g) res2 = boruvka_mst(g, distmx; minimize=false) edges2 = [Edge(src(e), dst(e)) for e in res2.mst] g2t = GenericGraph(SimpleGraph(edges2)) @test res2.weight == cost_max_vec_mst - @test nv(g2t) - length(connected_components(g2t)) == ne(g2t) + @test nv(g2t) - ne(g2t) == + length(connected_components(g2t)) == + count_connected_components(g2t) @test nv(g2t) == nv(g) end # second test @@ -60,14 +64,18 @@ edges3 = [Edge(src(e), dst(e)) for e in res3.mst] g3t = GenericGraph(SimpleGraph(edges3)) @test res3.weight == weight_vec2 - @test nv(g3t) - length(connected_components(g3t)) == ne(g3t) + @test nv(g3t) - ne(g3t) == + length(connected_components(g3t)) == + count_connected_components(g3t) @test nv(g3t) == nv(gx) res4 = boruvka_mst(g, distmx_sec; minimize=false) edges4 = [Edge(src(e), dst(e)) for e in res4.mst] g4t = GenericGraph(SimpleGraph(edges4)) @test res4.weight == weight_max_vec2 - @test nv(g4t) - length(connected_components(g4t)) == ne(g4t) + @test nv(g4t) - ne(g4t) == + length(connected_components(g4t)) == + count_connected_components(g4t) @test nv(g4t) == nv(gx) end @@ -123,14 +131,18 @@ edges5 = [Edge(src(e), dst(e)) for e in res5.mst] g5t = GenericGraph(SimpleGraph(edges5)) @test res5.weight == weight_vec3 - @test nv(g5t) - length(connected_components(g5t)) == ne(g5t) + @test nv(g5t) - ne(g5t) == + length(connected_components(g5t)) == + count_connected_components(g5t) @test nv(g5t) == nv(gd) res6 = boruvka_mst(g, distmx_third; minimize=false) edges6 = [Edge(src(e), dst(e)) for e in res6.mst] g6t = GenericGraph(SimpleGraph(edges6)) @test res6.weight == weight_max_vec3 - @test nv(g6t) - length(connected_components(g6t)) == ne(g6t) + @test nv(g6t) - ne(g6t) == + length(connected_components(g6t)) == + count_connected_components(g6t) @test nv(g6t) == nv(gd) end end