remove Flux's optimisers, add new train! functions

FluxML · Jul 26, 2022 · a68470c · a68470c
1 parent b8bdc2d
commit a68470c
Show file tree

Hide file tree

Showing 9 changed files with 432 additions and 914 deletions.
diff --git a/NEWS.md b/NEWS.md
@@ -1,11 +1,22 @@
 # Flux Release Notes
 
+# v0.14
+
+* The use of Zygote's implicit parameters (with `Flux.params` and global variables) is deprecated in favour of the explicit style.
+  The function `train!` has new methods (accepting the model itself) to handle this.
+
+* Sub-module `Flux.Optimise` has been removed, in favour of using [Optimisers.jl](https://github.com/FluxML/Optimisers.jl) more deeply.
+  The function `train!` now lives in sub-module `Flux.Train`, and has re-written internals.
+
+* One-hot arrays have moved to a new package [OneHotArrays.jl](https://github.com/FluxML/OneHotArrays.jl)
+
 ## v0.13.4
 * Added [`PairwiseFusion` layer](https://github.com/FluxML/Flux.jl/pull/1983)
 
-## v0.13
+## v0.13 (April 2022)
+
 * After a deprecations cycle, the datasets in `Flux.Data` have
-been removed in favour of MLDatasets.jl.
+  been removed in favour of [MLDatasets.jl](https://github.com/JuliaML/MLDatasets.jl).
 * `params` is not exported anymore since it is a common name and is also exported by Distributions.jl
 * `flatten` is not exported anymore due to clash with Iterators.flatten.
 * Remove Juno.jl progress bar support as it is now obsolete.
@@ -48,7 +59,7 @@ been removed in favour of MLDatasets.jl.
 * CUDA.jl 3.0 support
 * Bug fixes and optimizations.
 
-## v0.12.0
+## v0.12.0 (March 2021)
 
 * Add [identity_init](https://github.com/FluxML/Flux.jl/pull/1524).
 * Add [Orthogonal Matrix initialization](https://github.com/FluxML/Flux.jl/pull/1496) as described in [Exact solutions to the nonlinear dynamics of learning in deep linear neural networks](https://arxiv.org/abs/1312.6120).
@@ -73,7 +84,7 @@ been removed in favour of MLDatasets.jl.
 * Adds the [AdaBelief](https://arxiv.org/abs/2010.07468) optimiser.
 * Other new features and bug fixes (see GitHub releases page)
 
-## v0.11
+## v0.11 (July 2020)
 
 * Moved CUDA compatibility to use [CUDA.jl instead of CuArrays.jl](https://github.com/FluxML/Flux.jl/pull/1204)
 * Add [kaiming initialization](https://arxiv.org/abs/1502.01852) methods: [kaiming_uniform and kaiming_normal](https://github.com/FluxML/Flux.jl/pull/1243)
@@ -101,7 +112,7 @@ keyword argument. The `Dropout` struct *whose behavior is left unchanged) is the
 
 See GitHub's releases.
 
-## v0.10.0
+## v0.10.0 (November 2019)
 
 * The default AD engine has switched from [Tracker to Zygote.jl](https://github.com/FluxML/Flux.jl/pull/669)
   - The dependency on Tracker.jl has been removed.

diff --git a/src/Flux.jl b/src/Flux.jl
@@ -11,7 +11,7 @@ import Optimisers: Optimisers, trainable, destructure  # before v0.13, Flux owne
 
 using Zygote, ChainRulesCore
 using Zygote: Params, @adjoint, gradient, pullback, @nograd
-export gradient
+# export gradient  # stop exporting this, to make people say "using Zygote", and make easier to replace
 
 # Pirate error to catch a common mistake. (Internal function `base` because overloading `update!` is more likely to give ambiguities.)
 Optimisers.base(dx::Zygote.Grads) = error("Optimisers.jl cannot be used with Zygote.jl's implicit gradients, `Params` & `Grads`")
@@ -25,14 +25,15 @@ export Chain, Dense, Maxout, SkipConnection, Parallel, PairwiseFusion,
        fmap, cpu, gpu, f32, f64,
        testmode!, trainmode!
 
-include("optimise/Optimise.jl")
-using .Optimise
-using .Optimise: @epochs
-using .Optimise: skip
-export Descent, Adam, Momentum, Nesterov, RMSProp,
-  AdaGrad, AdaMax, AdaDelta, AMSGrad, NAdam, OAdam,
-  AdamW, RAdam, AdaBelief, InvDecay, ExpDecay,
-  WeightDecay, ClipValue, ClipNorm
+include("train/Train.jl")
+using .Train
+export train!
+# Stop exporting these, since Optimisers.jl exports the same names, 
+# and with this PR, Flux.Adam() is literally a wrapper around Adam().
+# export Descent, Adam, Momentum, Nesterov, RMSProp,
+#   AdaGrad, AdaMax, AdaDelta, AMSGrad, NAdam, OAdam,
+#   AdamW, RAdam, AdaBelief, InvDecay, ExpDecay,
+#   WeightDecay, ClipValue, ClipNorm
 
 using CUDA
 const use_cuda = Ref{Union{Nothing,Bool}}(nothing)

diff --git a/src/deprecations.jl b/src/deprecations.jl
@@ -34,10 +34,10 @@ struct Zeros
 end
 Zeros(args...) = Zeros()  # was used both Dense(10, 2, initb = Zeros) and Dense(rand(2,10), Zeros())
 
-function Optimise.update!(x::AbstractArray, x̄)
-  Base.depwarn("`Flux.Optimise.update!(x, x̄)` was not used internally and has been removed. Please write `x .-= x̄` instead.", :update!)
-  x .-= x̄
-end
+# function Optimise.update!(x::AbstractArray, x̄)
+#   Base.depwarn("`Flux.Optimise.update!(x, x̄)` was not used internally and has been removed. Please write `x .-= x̄` instead.", :update!)
+#   x .-= x̄
+# end
 
 function Diagonal(size::Integer...; kw...)
   Base.depwarn("Flux.Diagonal is now Flux.Scale, and also allows an activation function.", :Diagonal)
@@ -80,3 +80,6 @@ Base.@deprecate_binding RADAM RAdam
 Base.@deprecate_binding OADAM OAdam
 Base.@deprecate_binding ADAGrad AdaGrad
 Base.@deprecate_binding ADADelta AdaDelta
+
+# What remains from the Optimise sub-module has moved to Train:
+Base.@deprecate_binding Optimise Train
diff --git a/src/optimise/Optimise.jl b/src/optimise/Optimise.jl