diff --git a/docs/make.jl b/docs/make.jl index c65e51723b..7c2ddfa9e6 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -15,9 +15,6 @@ makedocs( "Fitting a Line" => "getting_started/overview.md", "Gradients and Layers" => "getting_started/basics.md", ], - "Tutorials" => [ - "Linear Regression" => "tutorials/linear_regression.md", - ], "Building Models" => [ "Built-in Layers 📚" => "models/layers.md", "Recurrence" => "models/recurrence.md", @@ -44,11 +41,12 @@ makedocs( "Flat vs. Nested 📚" => "destructure.md", "Functors.jl 📚 (`fmap`, ...)" => "models/functors.md", ], + "Tutorials" => [ + "Linear Regression" => "tutorials/linear_regression.md", + "Custom Layers" => "tutorials/advanced.md", # TODO move freezing to Training + ], "Performance Tips" => "performance.md", "Flux's Ecosystem" => "ecosystem.md", - "Tutorials" => [ # TODO, maybe - "Custom Layers" => "models/advanced.md", # TODO move freezing to Training - ], ], format = Documenter.HTML( sidebar_sitename = false, diff --git a/docs/src/index.md b/docs/src/index.md index 60a300e0e4..f394bbe8b0 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -16,9 +16,9 @@ Other closely associated packages, also installed automatically, include [Zygote ## Learning Flux -The [quick start](models/quickstart.md) page trains a simple neural network. +The [quick start](getting_started/quickstart.md) page trains a simple neural network. -This rest of this documentation provides a from-scratch introduction to Flux's take on models and how they work, starting with [fitting a line](models/overview.md). Once you understand these docs, congratulations, you also understand [Flux's source code](https://github.com/FluxML/Flux.jl), which is intended to be concise, legible and a good reference for more advanced concepts. +This rest of this documentation provides a from-scratch introduction to Flux's take on models and how they work, starting with [fitting a line](getting_started/overview.md). Once you understand these docs, congratulations, you also understand [Flux's source code](https://github.com/FluxML/Flux.jl), which is intended to be concise, legible and a good reference for more advanced concepts. Sections with 📚 contain API listings. The same text is avalable at the Julia prompt, by typing for example `?gpu`. diff --git a/docs/src/models/activation.md b/docs/src/models/activation.md index 5e6e718098..ae14750aeb 100644 --- a/docs/src/models/activation.md +++ b/docs/src/models/activation.md @@ -1,5 +1,4 @@ - -# Activation Functions from NNlib.jl +# [Activation Functions from NNlib.jl](@id man-activations) These non-linearities used between layers of your model are exported by the [NNlib](https://github.com/FluxML/NNlib.jl) package. diff --git a/docs/src/models/functors.md b/docs/src/models/functors.md index 72b8db8318..7ad152cfa8 100644 --- a/docs/src/models/functors.md +++ b/docs/src/models/functors.md @@ -4,7 +4,7 @@ Flux models are deeply nested structures, and [Functors.jl](https://github.com/F New layers should be annotated using the `Functors.@functor` macro. This will enable [`params`](@ref Flux.params) to see the parameters inside, and [`gpu`](@ref) to move them to the GPU. -`Functors.jl` has its own [notes on basic usage](https://fluxml.ai/Functors.jl/stable/#Basic-Usage-and-Implementation) for more details. Additionally, the [Advanced Model Building and Customisation](../models/advanced.md) page covers the use cases of `Functors` in greater details. +`Functors.jl` has its own [notes on basic usage](https://fluxml.ai/Functors.jl/stable/#Basic-Usage-and-Implementation) for more details. Additionally, the [Advanced Model Building and Customisation](../tutorials/advanced.md) page covers the use cases of `Functors` in greater details. ```@docs Functors.@functor diff --git a/docs/src/training/optimisers.md b/docs/src/training/optimisers.md index 9d619f8d10..066196b4ba 100644 --- a/docs/src/training/optimisers.md +++ b/docs/src/training/optimisers.md @@ -4,7 +4,7 @@ CurrentModule = Flux # Optimisers -Consider a [simple linear regression](../getting_started/linear_regression.md). We create some dummy data, calculate a loss, and backpropagate to calculate gradients for the parameters `W` and `b`. +Consider a [simple linear regression](../tutorials/linear_regression.md). We create some dummy data, calculate a loss, and backpropagate to calculate gradients for the parameters `W` and `b`. ```julia using Flux diff --git a/docs/src/training/training.md b/docs/src/training/training.md index 70fa39a510..e119b8914e 100644 --- a/docs/src/training/training.md +++ b/docs/src/training/training.md @@ -36,7 +36,7 @@ Flux.Optimise.train! ``` There are plenty of examples in the [model zoo](https://github.com/FluxML/model-zoo), and -more information can be found on [Custom Training Loops](../models/advanced.md). +more information can be found on [Custom Training Loops](../tutorials/advanced.md). ## Loss Functions @@ -68,7 +68,7 @@ The model to be trained must have a set of tracked parameters that are used to c Such an object contains a reference to the model's parameters, not a copy, such that after their training, the model behaves according to their updated values. -Handling all the parameters on a layer by layer basis is explained in the [Layer Helpers](../getting_started/basics.md) section. Also, for freezing model parameters, see the [Advanced Usage Guide](../models/advanced.md). +Handling all the parameters on a layer by layer basis is explained in the [Layer Helpers](../getting_started/basics.md) section. Also, for freezing model parameters, see the [Advanced Usage Guide](../tutorials/advanced.md). ```@docs Flux.params diff --git a/docs/src/models/advanced.md b/docs/src/tutorials/advanced.md similarity index 100% rename from docs/src/models/advanced.md rename to docs/src/tutorials/advanced.md diff --git a/src/losses/functions.jl b/src/losses/functions.jl index 1bb14b2e74..ffda2ff99a 100644 --- a/src/losses/functions.jl +++ b/src/losses/functions.jl @@ -273,7 +273,7 @@ Return the binary cross-entropy loss, computed as agg(@.(-y * log(ŷ + ϵ) - (1 - y) * log(1 - ŷ + ϵ))) -Where typically, the prediction `ŷ` is given by the output of a [sigmoid](@ref Activation-Functions) activation. +Where typically, the prediction `ŷ` is given by the output of a [sigmoid](@ref man-activations) activation. The `ϵ` term is included to avoid infinity. Using [`logitbinarycrossentropy`](@ref) is recomended over `binarycrossentropy` for numerical stability.