FluxML · mcabbott · Jul 25, 2022 · Jul 27, 2022 · Aug 1, 2022 · Aug 10, 2022
diff --git a/NEWS.md b/NEWS.md
@@ -1,11 +1,22 @@
 # Flux Release Notes
 
+# v0.14
+
+* The use of Zygote's implicit parameters (with `Flux.params` and global variables) is deprecated in favour of the explicit style.
+  The function `train!` has new methods (accepting the model itself) to handle this.
+
+* Sub-module `Flux.Optimise` has been removed, in favour of using [Optimisers.jl](https://github.com/FluxML/Optimisers.jl) more deeply.
+  The function `train!` now lives in sub-module `Flux.Train`, and has re-written internals.
+
+* One-hot arrays have moved to a new package [OneHotArrays.jl](https://github.com/FluxML/OneHotArrays.jl)
+
 ## v0.13.4
 * Added [`PairwiseFusion` layer](https://github.com/FluxML/Flux.jl/pull/1983)
 
-## v0.13
+## v0.13 (April 2022)
+
 * After a deprecations cycle, the datasets in `Flux.Data` have
-been removed in favour of MLDatasets.jl.
+  been removed in favour of [MLDatasets.jl](https://github.com/JuliaML/MLDatasets.jl).
 * `params` is not exported anymore since it is a common name and is also exported by Distributions.jl
 * `flatten` is not exported anymore due to clash with Iterators.flatten.
 * Remove Juno.jl progress bar support as it is now obsolete.
@@ -48,7 +59,7 @@ been removed in favour of MLDatasets.jl.
 * CUDA.jl 3.0 support
 * Bug fixes and optimizations.
 
-## v0.12.0
+## v0.12.0 (March 2021)
 
 * Add [identity_init](https://github.com/FluxML/Flux.jl/pull/1524).
 * Add [Orthogonal Matrix initialization](https://github.com/FluxML/Flux.jl/pull/1496) as described in [Exact solutions to the nonlinear dynamics of learning in deep linear neural networks](https://arxiv.org/abs/1312.6120).
@@ -73,7 +84,7 @@ been removed in favour of MLDatasets.jl.
 * Adds the [AdaBelief](https://arxiv.org/abs/2010.07468) optimiser.
 * Other new features and bug fixes (see GitHub releases page)
 
-## v0.11
+## v0.11 (July 2020)
 
 * Moved CUDA compatibility to use [CUDA.jl instead of CuArrays.jl](https://github.com/FluxML/Flux.jl/pull/1204)
 * Add [kaiming initialization](https://arxiv.org/abs/1502.01852) methods: [kaiming_uniform and kaiming_normal](https://github.com/FluxML/Flux.jl/pull/1243)
@@ -101,7 +112,7 @@ keyword argument. The `Dropout` struct *whose behavior is left unchanged) is the
 
 See GitHub's releases.
 
-## v0.10.0
+## v0.10.0 (November 2019)
 
 * The default AD engine has switched from [Tracker to Zygote.jl](https://github.com/FluxML/Flux.jl/pull/669)
   - The dependency on Tracker.jl has been removed.

diff --git a/Project.toml b/Project.toml
@@ -39,6 +39,7 @@ ProgressLogging = "0.1"
 Reexport = "0.2, 1.0"
 SpecialFunctions = "1.8.2, 2.1.2"
 StatsBase = "0.33"
+Yota = "0.7.4"
 Zygote = "0.6.34"
 julia = "1.6"
 
@@ -49,6 +50,7 @@ FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b"
 IterTools = "c8e1da08-722c-5040-9ed9-7db0dc04731e"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+Yota = "cd998857-8626-517d-b929-70ad188a48f0"
 
 [targets]
-test = ["Test", "Documenter", "IterTools", "LinearAlgebra", "FillArrays", "ComponentArrays"]
+test = ["Test", "Documenter", "IterTools", "LinearAlgebra", "FillArrays", "ComponentArrays", "Yota"]
diff --git a/src/Flux.jl b/src/Flux.jl
@@ -11,7 +11,7 @@ import Optimisers: Optimisers, trainable, destructure  # before v0.13, Flux owne
 
 using Zygote, ChainRulesCore
 using Zygote: Params, @adjoint, gradient, pullback, @nograd
-export gradient
+# export gradient  # stop exporting this, to make people say "using Zygote", and make easier to replace
 
 # Pirate error to catch a common mistake. (Internal function `base` because overloading `update!` is more likely to give ambiguities.)
 Optimisers.base(dx::Zygote.Grads) = error("Optimisers.jl cannot be used with Zygote.jl's implicit gradients, `Params` & `Grads`")
@@ -25,14 +25,15 @@ export Chain, Dense, Maxout, SkipConnection, Parallel, PairwiseFusion,
        fmap, cpu, gpu, f32, f64,
        testmode!, trainmode!
 
-include("optimise/Optimise.jl")
-using .Optimise
-using .Optimise: @epochs
-using .Optimise: skip
-export Descent, Adam, Momentum, Nesterov, RMSProp,
-  AdaGrad, AdaMax, AdaDelta, AMSGrad, NAdam, OAdam,
-  AdamW, RAdam, AdaBelief, InvDecay, ExpDecay,
-  WeightDecay, ClipValue, ClipNorm
+include("train/Train.jl")
+using .Train
+export train!
+# Stop exporting these, since Optimisers.jl exports the same names, 
+# and with this PR, Flux.Adam() is literally a wrapper around Adam().
+# export Descent, Adam, Momentum, Nesterov, RMSProp,
+#   AdaGrad, AdaMax, AdaDelta, AMSGrad, NAdam, OAdam,
+#   AdamW, RAdam, AdaBelief, InvDecay, ExpDecay,
+#   WeightDecay, ClipValue, ClipNorm
 
 using CUDA
 const use_cuda = Ref{Union{Nothing,Bool}}(nothing)

diff --git a/src/deprecations.jl b/src/deprecations.jl
@@ -34,11 +34,6 @@ struct Zeros
 end
 Zeros(args...) = Zeros()  # was used both Dense(10, 2, initb = Zeros) and Dense(rand(2,10), Zeros())
 
-function Optimise.update!(x::AbstractArray, x̄)
-  Base.depwarn("`Flux.Optimise.update!(x, x̄)` was not used internally and has been removed. Please write `x .-= x̄` instead.", :update!)
-  x .-= x̄
-end
-
 function Diagonal(size::Integer...; kw...)
   Base.depwarn("Flux.Diagonal is now Flux.Scale, and also allows an activation function.", :Diagonal)
   Scale(size...; kw...)
@@ -80,3 +75,6 @@ Base.@deprecate_binding RADAM RAdam
 Base.@deprecate_binding OADAM OAdam
 Base.@deprecate_binding ADAGrad AdaGrad
 Base.@deprecate_binding ADADelta AdaDelta
+
+# What remains from the Optimise sub-module has moved to Train:
+Base.@deprecate_binding Optimise Train
diff --git a/src/optimise/Optimise.jl b/src/optimise/Optimise.jl