JuliaGraphs · CarloLucibello · Jul 29, 2022 · Jul 22, 2022 · Jul 22, 2022 · Jul 22, 2022
diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
 name = "GraphNeuralNetworks"
 uuid = "cffab07f-9bc2-4db1-8861-388f63bf7694"
 authors = ["Carlo Lucibello and contributors"]
-version = "0.4.4"
+version = "0.4.5"
 
 [deps]
 Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
@@ -29,7 +29,7 @@ Adapt = "3"
 CUDA = "3.3"
 ChainRulesCore = "1"
 DataStructures = "0.18"
-Flux = "0.13"
+Flux = "0.13.4"
 Functors = "0.2, 0.3"
 Graphs = "1.4"
 KrylovKit = "0.5"

diff --git a/docs/src/index.md b/docs/src/index.md
@@ -54,7 +54,7 @@ model = GNNChain(GCNConv(16 => 64),
                 Dense(64, 1)) |> device
 
 ps = Flux.params(model)
-opt = ADAM(1f-4)
+opt = Adam(1f-4)
 ```
 
 ### Training 

diff --git a/docs/src/tutorials/gnn_intro_pluto.jl b/docs/src/tutorials/gnn_intro_pluto.jl
@@ -266,7 +266,7 @@ Since everything in our model is differentiable and parameterized, we can add so
 Here, we make use of a semi-supervised or transductive learning procedure: We simply train against one node per class, but are allowed to make use of the complete input graph data.
 
 Training our model is very similar to any other Flux model.
-In addition to defining our network architecture, we define a loss criterion (here, `logitcrossentropy` and initialize a stochastic gradient optimizer (here, `ADAM`).
+In addition to defining our network architecture, we define a loss criterion (here, `logitcrossentropy` and initialize a stochastic gradient optimizer (here, `Adam`).
 After that, we perform multiple rounds of optimization, where each round consists of a forward and backward pass to compute the gradients of our model parameters w.r.t. to the loss derived from the forward pass.
 If you are not new to Flux, this scheme should appear familar to you. 
 
@@ -285,7 +285,7 @@ Let us now start training and see how our node embeddings evolve over time (best
 begin
 	model = GCN(num_features, num_classes)
     ps = Flux.params(model)
-    opt = ADAM(1e-2)
+    opt = Adam(1e-2)
 	epochs = 2000
 
 	emb = h

diff --git a/docs/src/tutorials/graph_classification_pluto.jl b/docs/src/tutorials/graph_classification_pluto.jl
@@ -202,7 +202,7 @@ function train!(model; epochs=200, η=1e-2, infotime=10)
 	device = Flux.cpu
 	model = model |> device
 	ps = Flux.params(model)
-    opt = ADAM(1e-3)
+    opt = Adam(1e-3)
 
 
     function report(epoch)

diff --git a/examples/graph_classification_tudataset.jl b/examples/graph_classification_tudataset.jl
@@ -82,7 +82,7 @@ function train(; kws...)
                      Dense(nhidden, 1))  |> device
 
     ps = Flux.params(model)
-    opt = ADAM(args.η)
+    opt = Adam(args.η)
 
     # LOGGING FUNCTION
 

diff --git a/examples/link_prediction_pubmed.jl b/examples/link_prediction_pubmed.jl
@@ -77,7 +77,7 @@ function train(; kws...)
     pred = DotPredictor()
 
     ps = Flux.params(model)
-    opt = ADAM(args.η)
+    opt = Adam(args.η)
 
     ### LOSS FUNCTION ############
 

diff --git a/examples/neural_ode_cora.jl b/examples/neural_ode_cora.jl
@@ -48,7 +48,7 @@ model = GNNChain(GCNConv(nin => nhidden, relu),
 ps = Flux.params(model);
 
 # ## Optimizer
-opt = ADAM(0.01)
+opt = Adam(0.01)
 
 
 function eval_loss_accuracy(X, y, mask)

diff --git a/examples/node_classification_cora.jl b/examples/node_classification_cora.jl
@@ -57,7 +57,7 @@ function train(; kws...)
                      Dense(nhidden, nout))  |> device
 
     ps = Flux.params(model)
-    opt = ADAM(args.η)
+    opt = Adam(args.η)
 
     display(g)
 

diff --git a/perf/neural_ode_mnist.jl b/perf/neural_ode_mnist.jl
@@ -40,7 +40,7 @@ model = Chain(Flux.flatten,
 ps = Flux.params(model);
 
 # ## Optimizer
-opt = ADAM(0.01)
+opt = Adam(0.01)
 
 function eval_loss_accuracy(X, y)
     ŷ = model(X)

diff --git a/perf/node_classification_cora_geometricflux.jl b/perf/node_classification_cora_geometricflux.jl
@@ -59,7 +59,7 @@ function train(; kws...)
                 Dense(nhidden, nout))  |> device
 
     ps = Flux.params(model)
-    opt = ADAM(args.η)
+    opt = Adam(args.η)
 
     @info g
 

diff --git a/src/layers/basic.jl b/src/layers/basic.jl
@@ -49,20 +49,6 @@ WithGraph(model, g::GNNGraph; traingraph=false) = WithGraph(model, g, traingraph
 @functor WithGraph
 Flux.trainable(l::WithGraph) = l.traingraph ? (; l.model, l.g) : (; l.model,)
 
-# Work around 
-# https://github.com/FluxML/Flux.jl/issues/1733
-# Revisit after 
-# https://github.com/FluxML/Flux.jl/pull/1742
-function Flux.destructure(m::WithGraph)
-    @assert m.traingraph == false # TODO
-    p, re = Flux.destructure(m.model)
-    function  re_withgraph(x)
-        WithGraph(re(x), m.g, m.traingraph)        
-    end
-
-    return p, re_withgraph
-end
-
 (l::WithGraph)(g::GNNGraph, x...; kws...) = l.model(g, x...; kws...)
 (l::WithGraph)(x...; kws...) = l.model(l.g, x...; kws...)
 
@@ -85,74 +71,112 @@ and if names are given, `m[:name] == m[1]` etc.
 # Examples
 
 ```juliarepl
-julia> m = GNNChain(GCNConv(2=>5), BatchNorm(5), x -> relu.(x), Dense(5, 4));
+julia> using Flux, GraphNeuralNetworks
+
+julia> m = GNNChain(GCNConv(2=>5), 
+                    BatchNorm(5), 
+                    x -> relu.(x), 
+                    Dense(5, 4))
+GNNChain(GCNConv(2 => 5), BatchNorm(5), #7, Dense(5 => 4))
 
 julia> x = randn(Float32, 2, 3);
 
-julia> g = GNNGraph([1,1,2,3], [2,3,1,1]);
+julia> g = rand_graph(3, 6)
+GNNGraph:
+    num_nodes = 3
+    num_edges = 6
 
 julia> m(g, x)
 4×3 Matrix{Float32}:
-  0.157941    0.15443     0.193471
-  0.0819516   0.0503105   0.122523
-  0.225933    0.267901    0.241878
- -0.0134364  -0.0120716  -0.0172505
+    -0.795592  -0.795592  -0.795592
+    -0.736409  -0.736409  -0.736409
+    0.994925   0.994925   0.994925
+    0.857549   0.857549   0.857549
+
+julia> m2 = GNNChain(enc = m, 
+                     dec = DotDecoder())
+GNNChain(enc = GNNChain(GCNConv(2 => 5), BatchNorm(5), #7, Dense(5 => 4)), dec = DotDecoder())
+
+julia> m2(g, x)
+1×6 Matrix{Float32}:
+ 2.90053  2.90053  2.90053  2.90053  2.90053  2.90053
+
+julia> m2[:enc](g, x) == m(g, x)
+true
 ```
 """
-struct GNNChain{T} <: GNNLayer
+struct GNNChain{T<:Union{Tuple, NamedTuple, AbstractVector}} <: GNNLayer
     layers::T
-
-    GNNChain(xs...) = new{typeof(xs)}(xs)
-
-    function GNNChain(; kw...)
-        :layers in Base.keys(kw) && throw(ArgumentError("a GNNChain cannot have a named layer called `layers`"))
-        isempty(kw) && return new{Tuple{}}(())
-        new{typeof(values(kw))}(values(kw))
-    end
 end
 
-@forward GNNChain.layers Base.getindex, Base.length, Base.first, Base.last,
-    Base.iterate, Base.lastindex, Base.keys
+@functor GNNChain
 
-Flux.functor(::Type{<:GNNChain}, c) = c.layers, ls -> GNNChain(ls...)
-Flux.functor(::Type{<:GNNChain}, c::Tuple) = c, ls -> GNNChain(ls...)
+GNNChain(xs...) = GNNChain(xs)
 
-# input from graph
-applylayer(l, g::GNNGraph) = GNNGraph(g, ndata=l(node_features(g)))
-applylayer(l::GNNLayer, g::GNNGraph) = l(g)
+function GNNChain(; kw...)
+    :layers in Base.keys(kw) && throw(ArgumentError("a GNNChain cannot have a named layer called `layers`"))
+    isempty(kw) && return GNNChain(())
+    GNNChain(values(kw))
+end
+
+@forward GNNChain.layers Base.getindex, Base.length, Base.first, Base.last,
+    Base.iterate, Base.lastindex, Base.keys, Base.firstindex
+
+(c::GNNChain)(g::GNNGraph, x) = _applychain(c.layers, g, x)
+(c::GNNChain)(g::GNNGraph) = _applychain(c.layers, g)
+
+## TODO see if this is faster for small chains
+## see https://github.com/FluxML/Flux.jl/pull/1809#discussion_r781691180
+# @generated function _applychain(layers::Tuple{Vararg{<:Any,N}}, g::GNNGraph, x) where {N}
+#     symbols = vcat(:x, [gensym() for _ in 1:N])
+#     calls = [:($(symbols[i+1]) = _applylayer(layers[$i], $(symbols[i]))) for i in 1:N]
+#     Expr(:block, calls...)
+# end
+# _applychain(layers::NamedTuple, g, x) = _applychain(Tuple(layers), x)
+
+function _applychain(layers, g::GNNGraph, x)  # type-unstable path, helps compile times
+    for l in layers
+        x = _applylayer(l, g, x)
+    end
+    return x
+end
 
-# explicit input
-applylayer(l, g::GNNGraph, x) = l(x)
-applylayer(l::GNNLayer, g::GNNGraph, x) = l(g, x)
+function _applychain(layers, g::GNNGraph)  # type-unstable path, helps compile times
+    for l in layers
+        g = _applylayer(l, g)
+    end
+    return g
+end
 
-# Handle Flux.Parallel
-applylayer(l::Parallel, g::GNNGraph) = GNNGraph(g, ndata=applylayer(l, g, node_features(g)))
-applylayer(l::Parallel, g::GNNGraph, x::AbstractArray) = mapreduce(f -> applylayer(f, g, x), l.connection, l.layers)
+# # explicit input
+_applylayer(l, g::GNNGraph, x) = l(x)
+_applylayer(l::GNNLayer, g::GNNGraph, x) = l(g, x)
 
 # input from graph
-applychain(::Tuple{}, g::GNNGraph) = g
-applychain(fs::Tuple, g::GNNGraph) = applychain(tail(fs), applylayer(first(fs), g))
-
-# explicit input
-applychain(::Tuple{}, g::GNNGraph, x) = x
-applychain(fs::Tuple, g::GNNGraph, x) = applychain(tail(fs), g, applylayer(first(fs), g, x))
+_applylayer(l, g::GNNGraph) = GNNGraph(g, ndata=l(node_features(g)))
+_applylayer(l::GNNLayer, g::GNNGraph) = l(g)
 
-(c::GNNChain)(g::GNNGraph, x) = applychain(Tuple(c.layers), g, x)
-(c::GNNChain)(g::GNNGraph) = applychain(Tuple(c.layers), g)
+# # Handle Flux.Parallel
+_applylayer(l::Parallel, g::GNNGraph) = GNNGraph(g, ndata=_applylayer(l, g, node_features(g)))
 
+function _applylayer(l::Parallel, g::GNNGraph, x::AbstractArray)
+    closures = map(f -> (x -> _applylayer(f, g, x)), l.layers)
+    return Parallel(l.connection, closures)(x)
+end
 
-Base.getindex(c::GNNChain, i::AbstractArray) = GNNChain(c.layers[i]...)
-Base.getindex(c::GNNChain{<:NamedTuple}, i::AbstractArray) = 
-    GNNChain(; NamedTuple{Base.keys(c)[i]}(Tuple(c.layers)[i])...)
+Base.getindex(c::GNNChain, i::AbstractArray) = GNNChain(c.layers[i])
+Base.getindex(c::GNNChain{<:NamedTuple}, i::AbstractArray) =
+    GNNChain(NamedTuple{keys(c)[i]}(Tuple(c.layers)[i]))
 
 function Base.show(io::IO, c::GNNChain)
     print(io, "GNNChain(")
     _show_layers(io, c.layers)
     print(io, ")")
 end
+
 _show_layers(io, layers::Tuple) = join(io, layers, ", ")
 _show_layers(io, layers::NamedTuple) = join(io, ["$k = $v" for (k, v) in pairs(layers)], ", ")
-
+_show_layers(io, layers::AbstractVector) = (print(io, "["); join(io, layers, ", "); print(io, "]"))
 
 """
     DotDecoder()
@@ -181,5 +205,5 @@ struct DotDecoder <: GNNLayer end
 
 function (::DotDecoder)(g, x)
     check_num_nodes(g, x)
-    apply_edges(xi_dot_xj, g, xi=x, xj=x)
+    return apply_edges(xi_dot_xj, g, xi=x, xj=x)
 end
diff --git a/test/examples/node_classification_cora.jl b/test/examples/node_classification_cora.jl
@@ -53,7 +53,7 @@ function train(Layer; verbose=false, kws...)
                      Dense(nhidden, nout))  |> device
 
     ps = Flux.params(model)
-    opt = ADAM(args.η)
+    opt = Adam(args.η)
 
 
     ## TRAINING

diff --git a/test/layers/basic.jl b/test/layers/basic.jl
@@ -1,11 +1,13 @@
 @testset "basic" begin
     @testset "GNNChain" begin
         n, din, d, dout = 10, 3, 4, 2
+        deg = 4
 
-        g = GNNGraph(random_regular_graph(n, 4), 
+        g = GNNGraph(random_regular_graph(n, deg), 
                     graph_type=GRAPH_T,
                     ndata= randn(Float32, din, n))
-
+        x = g.ndata.x
+
         gnn = GNNChain(GCNConv(din => d),
                        BatchNorm(d),
                        x -> tanh.(x),
@@ -17,6 +19,27 @@
 
         test_layer(gnn, g, rtol=1e-5, exclude_grad_fields=[:μ, :σ²])
 
+        @testset "constructor with names" begin
+            m = GNNChain(GCNConv(din=>d), 
+                    BatchNorm(d), 
+                    x -> relu.(x), 
+                    Dense(d, dout))
+
+            m2 = GNNChain(enc = m, 
+                     dec = DotDecoder())
+
+            @test m2[:enc] === m
+            @test m2(g, x) == m2[:dec](g, m2[:enc](g, x))
+        end
+
+        @testset "constructor with vector" begin
+            m = GNNChain(GCNConv(din=>d), 
+                    BatchNorm(d), 
+                    x -> relu.(x), 
+                    Dense(d, dout))
+            m2 = GNNChain([m.layers...])
+            @test m2(g, x) == m(g, x)
+        end
 
         @testset "Parallel" begin
             AddResidual(l) = Parallel(+, identity, l)