Skip to content

Commit

Permalink
Merge pull request #171 from JuliaGNI/adjust-to-new-interface
Browse files Browse the repository at this point in the history
Adjust to new interface
  • Loading branch information
michakraus authored Nov 13, 2024
2 parents 388d07b + af47e48 commit 4bc5fc5
Show file tree
Hide file tree
Showing 25 changed files with 110 additions and 69 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ UpdateJulia = "770da0de-323d-4d28-9202-0e205c1e0aff"
Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"

[compat]
AbstractNeuralNetworks = "0.3"
AbstractNeuralNetworks = "0.3.2"
BandedMatrices = "0.17, 1"
ChainRules = "1"
ChainRulesCore = "1"
Expand Down
7 changes: 6 additions & 1 deletion docs/make.jl
Original file line number Diff line number Diff line change
Expand Up @@ -352,7 +352,8 @@ _latex_pages = [
]

_keys = [page[1] for page in _latex_pages]
filter!(key -> (key "HOME") & (key "Index of Docstrings") & (key "References"), _keys)
# don't generate docstring indices for specific chapters (introduction, conclusion, ...)
filter!(key -> (key "HOME") & (key "Index of Docstrings") & (key "References") & (key "Summary and Outlook"), _keys)
index_latex_pages = vcat([Dict(_latex_pages)[key] for key in _keys]...)

makedocs(;
Expand All @@ -371,3 +372,7 @@ deploydocs(;
devurl = "latest",
devbranch = "main",
)

# got the error "exception = GLFWError (NOT_INITIALIZED): The GLFW library is not initialized" before; also see https://discourse.julialang.org/t/warning-error-closing-screen/111939
import GLMakie
GLMakie.closeall()
3 changes: 2 additions & 1 deletion docs/src/GeometricMachineLearning.bib
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ @book{hairer2006geometric
title={Geometric Numerical integration: structure-preserving algorithms for ordinary differential equations},
author={Hairer, Ernst and Lubich, Christian and Wanner, Gerhard},
year={2006},
publisher={Springer}
publisher={Springer},
address={Heidelberg}
}


Expand Down
22 changes: 11 additions & 11 deletions docs/src/docstring_index.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,66 +7,66 @@
### Manifolds

```@index
Pages = Dict(index_latex_pages)["Manifolds"]
Pages = Dict(Main.index_latex_pages)["Manifolds"]
```

### Geometric Structure

```@index
Pages = Dict(index_latex_pages)["Geometric Structure"]
Pages = Dict(Main.index_latex_pages)["Geometric Structure"]
```

### Reduced Order Modeling

```@index
Pages = Dict(index_latex_pages)["Reduced Order Modeling"]
Pages = Dict(Main.index_latex_pages)["Reduced Order Modeling"]
```

### General Framework for Manifold Optimization

```@index
Pages = Dict(index_latex_pages)["General Framework for Manifold Optimization"]
Pages = Dict(Main.index_latex_pages)["General Framework for Manifold Optimization"]
```

### Optimizer Methods

```@index
Pages = Dict(index_latex_pages)["Optimizer Methods"]
Pages = Dict(Main.index_latex_pages)["Optimizer Methods"]
```

### Layers

```@index
Pages = Dict(index_latex_pages)["Layers"]
Pages = Dict(Main.index_latex_pages)["Layers"]
```

### Architectures

```@index
Pages = Dict(index_latex_pages)["Architectures"]
Pages = Dict(Main.index_latex_pages)["Architectures"]
```


### Transformers with Structure

```@index
Pages = Dict(index_latex_pages)["Transformers with Structure"]
Pages = Dict(Main.index_latex_pages)["Transformers with Structure"]
```

### Learning Nonlinear Spaces

```@index
Pages = [Dict(index_latex_pages)["Learning Nonlinear Spaces"]]
Pages = [Dict(Main.index_latex_pages)["Learning Nonlinear Spaces"]]
```

### Data Loader

```@index
Pages = Dict(index_latex_pages)["Data Loader"]
Pages = Dict(Main.index_latex_pages)["Data Loader"]
```

### Special Arrays, Tensors and Pullbacks

```@index
Pages = Dict(index_latex_pages)["Special Arrays, Tensors and Pullbacks"]
Pages = Dict(Main.index_latex_pages)["Special Arrays, Tensors and Pullbacks"]
```
8 changes: 4 additions & 4 deletions docs/src/optimizers/manifold_related/parallel_transport.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ A precise definition of parallel transport needs a notion of a *connection* [lan
```@eval
Main.theorem(raw"Given two elements ``B^A_1, B^A_2\in{}T_AG`` the parallel transport of ``B^A_2`` along the geodesic of ``B^A_1`` is given by
" * Main.indentation * raw"```math
" * Main.indentation * raw"\Pi_{A\to\gamma_{B^A_1}(t)} = A\exp(t\cdot{}A^{-1}B^A_1)A^{-1}B^A_2 = A\exp(t\cdot{}B_1)B_2,
" * Main.indentation * raw"\Pi_{A\to\gamma_{B^A_1}(t)}B^A_2 = A\exp(t\cdot{}A^{-1}B^A_1)A^{-1}B^A_2 = A\exp(t\cdot{}B_1)B_2,
" * Main.indentation * raw"```
" * Main.indentation * raw"where ``B_i := A^{-1}B^A_i.``")
```
Expand All @@ -19,18 +19,18 @@ For the Stiefel manifold this is not much more complicated[^1]:
```@eval
Main.theorem(raw"Given two elements ``\Delta_1, \Delta_2\in{}T_Y\mathcal{M}``, the parallel transport of ``\Delta_2`` along the geodesic of ``\Delta_1`` is given by
" * Main.indentation * raw"```math
" * Main.indentation * raw"\Pi_{Y\to\gamma_{\Delta_1}(t)} = \exp(t\cdot\Omega(Y, \Delta_1))\Delta_2 = \lambda(Y)\exp(\bar{B}_1)\lambda(Y)^{-1}\Delta_2,
" * Main.indentation * raw"\Pi_{Y\to\gamma_{\Delta_1}(t)}\Delta_2 = \exp(t\cdot\Omega(Y, \Delta_1))\Delta_2 = \lambda(Y)\exp(\bar{B}_1)\lambda(Y)^{-1}\Delta_2,
" * Main.indentation * raw"```
" * Main.indentation * raw"where ``\bar{B}_1 = \lambda(Y)^{-1}\Omega(Y, \Delta_1)\lambda(Y).``")
```

We can further modify the expression of parallel transport for the Stiefel manifold:

```math
\Pi_{Y\to\gamma_{\Delta_1}(t)} = \lambda(Y)\exp(B_1)\lambda(Y)\Omega(Y, \Delta_2)Y = \lambda(Y)\exp(B_1)B_2E,
\Pi_{Y\to\gamma_{\Delta_1}(t)}\Delta_2 = \lambda(Y)\exp(B_1)\lambda(Y)\Omega(Y, \Delta_2)Y = \lambda(Y)\exp(B_1)B_2E,
```

where ``B_2 = \lambda(Y)^{-1}\Omega(Y, \Delta_2)\lambda(Y).`` We can now define explicit updating rules for the [global section](@ref "Global Sections") ``\Lambda^{(\cdot)}``, the element of the homogeneous space ``Y^{(\cdot)}``, the tangent vector ``\Delta^{(\cdot)}`` and ``D^{(\cdot)} = (\Lambda^{(\cdot)})^{-1}\Omega(\Delta^{(\cdot)})\Lambda^{(cdot)}``, its representation in ``\mathfrak{g}^\mathrm{hor}``.
where ``B_2 = \lambda(Y)^{-1}\Omega(Y, \Delta_2)\lambda(Y).`` We can now define explicit updating rules for the [global section](@ref "Global Sections") ``\Lambda^{(\cdot)}``, the element of the homogeneous space ``Y^{(\cdot)}``, the tangent vector ``\Delta^{(\cdot)}`` and ``D^{(\cdot)} = (\Lambda^{(\cdot)})^{-1}\Omega(\Delta^{(\cdot)})\Lambda^{(\cdot)}``, its representation in ``\mathfrak{g}^\mathrm{hor}``.

We thus have:
1. ``\Lambda^{(t)} \leftarrow \Lambda^{(t-1)}\exp(B^{(t-1)}),``
Expand Down
7 changes: 5 additions & 2 deletions docs/src/tutorials/adjusting_the_loss_function.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,10 @@ using LinearAlgebra: norm # hide
# norm of parameters for single layer
network_parameter_norm(params::NamedTuple) = sum([norm(params[i]) for i in 1:length(params)])
# norm of parameters for entire network
network_parameter_norm(params) = sum([network_parameter_norm(param) for param in params])
function network_parameter_norm(params::NeuralNetworkParameters)
sum([network_parameter_norm(params[key]) for key in keys(params)])
end
network_parameter_norm(nn.params)
```

Expand All @@ -55,7 +58,7 @@ We now implement a custom loss such that:
struct CustomLoss <: GeometricMachineLearning.NetworkLoss end
const λ = .1
function (loss::CustomLoss)(model::Chain, params::Tuple, input::CT, output::CT) where {
function (loss::CustomLoss)(model::Chain, params::NeuralNetworkParameters, input::CT, output::CT) where {
T,
AT<:AbstractArray{T, 3},
CT<:@NamedTuple{q::AT, p::AT}
Expand Down
5 changes: 2 additions & 3 deletions docs/src/tutorials/grassmann_layer.md
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,6 @@ We then *lift* the neural network parameters via [`GlobalSection`](@ref).

```@example rosenbrock
λY = GlobalSection(nn.params)
nothing # hide
```

Expand Down Expand Up @@ -258,7 +257,7 @@ where `np` is the number of points in ``\mathcal{D}_2`` and ``W_2`` is the *Wass
where ``\nabla{}W_2`` is equivalent to the function `compute_wasserstein_gradient`.

```@example rosenbrock
function compute_gradient(ps::Tuple)
function compute_gradient(ps::NeuralNetworkParameters)
samples = randn(2, size(xyz_points, 2))
estimate, nn_pullback = Zygote.pullback(ps -> model(samples, ps), ps)
Expand All @@ -283,7 +282,7 @@ loss_array = zeros(training_steps)
for i in 1:training_steps
val, dp = compute_gradient(nn.params)
loss_array[i] = val
optimization_step!(optimizer, λY, nn.params, dp)
optimization_step!(optimizer, λY, nn.params, dp.params)
end
```

Expand Down
9 changes: 5 additions & 4 deletions docs/src/tutorials/mnist/mnist_tutorial.md
Original file line number Diff line number Diff line change
Expand Up @@ -151,10 +151,11 @@ accuracy_score2 = data["accuracy_score2"]
accuracy_score3 = data["accuracy_score3"]
accuracy_score4 = data["accuracy_score4"]
nn1 = NeuralNetwork(nn1.architecture, nn1.model, data["nn1weights"], CPU())
nn2 = NeuralNetwork(nn2.architecture, nn2.model, data["nn2weights"], CPU())
nn3 = NeuralNetwork(nn3.architecture, nn3.model, data["nn3weights"], CPU())
nn4 = NeuralNetwork(nn4.architecture, nn4.model, data["nn4weights"], CPU())
_nnp(ps::Tuple) = NeuralNetworkParameters{Tuple(Symbol("L$(i)") for i in 1:length(ps))}(ps)
nn1 = NeuralNetwork(nn1.architecture, nn1.model, _nnp(data["nn1weights"]), CPU())
nn2 = NeuralNetwork(nn2.architecture, nn2.model, _nnp(data["nn2weights"]), CPU())
nn3 = NeuralNetwork(nn3.architecture, nn3.model, _nnp(data["nn3weights"]), CPU())
nn4 = NeuralNetwork(nn4.architecture, nn4.model, _nnp(data["nn4weights"]), CPU())
morange = RGBf(255 / 256, 127 / 256, 14 / 256) # hide
mred = RGBf(214 / 256, 39 / 256, 40 / 256) # hide
Expand Down
10 changes: 5 additions & 5 deletions docs/src/tutorials/symplectic_autoencoder.md
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,8 @@ sae_nn_cpu = mtc(sae_nn_gpu)
using JLD2
sae_trained_parameters = load("sae_parameters.jld2")["sae_parameters"]
sae_nn_cpu = NeuralNetwork(sae_arch, Chain(sae_arch), sae_trained_parameters, CPU())
_nnp(ps::Tuple) = NeuralNetworkParameters{Tuple(Symbol("L$(i)") for i in 1:length(ps))}(ps)
sae_nn_cpu = NeuralNetwork(sae_arch, Chain(sae_arch), _nnp(sae_trained_parameters), CPU())
nothing # hide
```
Expand Down Expand Up @@ -284,7 +285,6 @@ dl = dl_cpu # hide
dl_integration = DataLoader(dl; autoencoder = false)
integrator_batch = Batch(integrator_batch_size, seq_length)
nothing # hide
```
```julia
Expand All @@ -301,7 +301,7 @@ We can now evaluate the solution:

```@example toda_lattice
nn_integrator_parameters = load("integrator_parameters.jld2")["integrator_parameters"] # hide
integrator_nn = NeuralNetwork(integrator_architecture, Chain(integrator_architecture), nn_integrator_parameters, backend) # hide
integrator_nn = NeuralNetwork(integrator_architecture, Chain(integrator_architecture), _nnp(nn_integrator_parameters), backend) # hide
ics = encoder(sae_nn_cpu)((q = dl.input.q[:, 1:seq_length, 1], p = dl.input.p[:, 1:seq_length, 1])) # hide
iterate(mtc(integrator_nn), ics; n_points = length(sol.t), prediction_window = seq_length) # hide
@time "time stepping with transformer" time_series = iterate(mtc(integrator_nn), ics; n_points = length(sol.t), prediction_window = seq_length)
Expand Down Expand Up @@ -487,7 +487,7 @@ train_integrator_loss2 = o_integrator(integrator_nn2, dl_integration, integrator

```@setup toda_lattice
nn_integrator_parameters2 = load("integrator_parameters_psd.jld2")["integrator_parameters"] # hide
integrator_nn2 = NeuralNetwork(integrator_architecture2, Chain(integrator_architecture2), nn_integrator_parameters2, backend) # hide
integrator_nn2 = NeuralNetwork(integrator_architecture2, Chain(integrator_architecture2), _nnp(nn_integrator_parameters2), backend) # hide
ics = encoder(psd_nn2)((q = dl_cpu.input.q[:, 1:seq_length, 1], p = dl_cpu.input.p[:, 1:seq_length, 1])) # hide
nothing # hide
```
Expand All @@ -509,7 +509,7 @@ time_steps
Now we do so with:

```@example toda_lattice
time_steps = (0, 2, 4)
time_steps = (0, 4, 5)
nothing # hide
```

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -214,9 +214,10 @@ nn_st = mtc(nn_st)
using JLD2 # hide
# get correct parameters from jld2 file # hide
f = load("transformer_rigid_body.jld2") # hide
nn_vpff = NeuralNetwork(nn_vpff.architecture, nn_vpff.model, f["nn_vpff_params"], nn_vpff.backend) # hide
nn_vpt = NeuralNetwork(nn_vpt.architecture, nn_vpt.model, f["nn_vpt_arb_params"], nn_vpt.backend) # hide
nn_st = NeuralNetwork(nn_st.architecture, nn_st.model, f["nn_st_params"], nn_st.backend) # hide
_nnp(ps::Tuple) = NeuralNetworkParameters{Tuple(Symbol("L$(i)") for i in 1:length(ps))}(ps) # hide
nn_vpff = NeuralNetwork(nn_vpff.architecture, nn_vpff.model, _nnp(f["nn_vpff_params"]), nn_vpff.backend) # hide
nn_vpt = NeuralNetwork(nn_vpt.architecture, nn_vpt.model, _nnp(f["nn_vpt_arb_params"]), nn_vpt.backend) # hide
nn_st = NeuralNetwork(nn_st.architecture, nn_st.model, _nnp(f["nn_st_params"]), nn_st.backend) # hide
nothing # hide
```

Expand Down
1 change: 1 addition & 0 deletions src/GeometricMachineLearning.jl
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ module GeometricMachineLearning
export Dense, Linear
export initialparameters
export parameterlength
export NeuralNetworkParameters

export σ, sigmoid, softmax

Expand Down
14 changes: 11 additions & 3 deletions src/architectures/autoencoder.jl
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,8 @@ using GeometricMachineLearning
using GeometricMachineLearning: UnknownEncoder
model = Chain(Dense(5, 3, tanh; use_bias = false), Dense(3, 2, identity; use_bias = false))
nn = NeuralNetwork(UnknownEncoder(5, 2, 2), model, initialparameters(model), CPU())
params = NeuralNetworkParameters(initialparameters(model))
nn = NeuralNetwork(UnknownEncoder(5, 2, 2), model, params, CPU())
typeof(nn) <: NeuralNetwork{<:GeometricMachineLearning.Encoder}
Expand Down Expand Up @@ -171,7 +172,8 @@ end
# """
function encoder_parameters(nn::NeuralNetwork{<:AutoEncoder})
n_encoder_layers = length(encoder_model(nn.architecture).layers)
nn.params[1:n_encoder_layers]
keys = Tuple(Symbol.(["L$(i)" for i in 1:n_encoder_layers]))
NeuralNetworkParameters(NamedTuple{keys}(Tuple([nn.params[key] for key in keys])))
end

# """
Expand All @@ -181,7 +183,13 @@ end
# """
function decoder_parameters(nn::NeuralNetwork{<:AutoEncoder})
n_decoder_layers = length(decoder_model(nn.architecture).layers)
nn.params[(end - (n_decoder_layers - 1)):end]
all_keys = keys(nn.params)
# "old keys" are the ones describing the correct parameters in nn.params
keys_old = Tuple(Symbol.(["L$(i)" for i in (length(all_keys) - (n_decoder_layers - 1)):length(all_keys)]))
n_keys = length(keys_old)
# "new keys" are the ones describing the keys in the new NamedTuple
keys_new = Tuple(Symbol.(["L$(i)" for i in 1:n_keys]))
NeuralNetworkParameters(NamedTuple{keys_new}(Tuple([nn.params[key] for key in keys_old])))
end

function Chain(arch::AutoEncoder)
Expand Down
3 changes: 2 additions & 1 deletion src/architectures/neural_network_integrator.jl
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,8 @@ using GeometricMachineLearning
model = ResNet(3, 0, identity)
weight = [1 0 0; 0 2 0; 0 0 1]
bias = [0, 0, 1]
nn = NeuralNetwork(model, Chain(model), ((weight = weight, bias = bias), ), CPU())
ps = NeuralNetworkParameters((L1 = (weight = weight, bias = bias), ))
nn = NeuralNetwork(model, Chain(model), ps, CPU())
ics = [1, 1, 1]
iterate(nn, ics; n_points = 4)
Expand Down
12 changes: 7 additions & 5 deletions src/data_loader/batch.jl
Original file line number Diff line number Diff line change
Expand Up @@ -19,17 +19,18 @@ Consider the following example for drawing batches of size 2 for an instance of
using GeometricMachineLearning
import Random
Random.seed!(123)
rng = Random.TaskLocalRNG()
Random.seed!(rng, 123)
dl = DataLoader(rand(5))
dl = DataLoader(rand(rng, 5))
batch = Batch(2)
batch(dl)
# output
[ Info: You have provided a matrix as input. The axes will be interpreted as (i) system dimension and (ii) number of parameters.
([(1, 4), (1, 3)], [(1, 2), (1, 1)], [(1, 5)])
([(1, 5), (1, 3)], [(1, 4), (1, 1)], [(1, 2)])
```
Here the first index is always 1 (the time dimension). We get a total number of 3 batches.
Expand Down Expand Up @@ -106,6 +107,7 @@ Here the distinction is between data that are *time-series like* and data that a
using GeometricMachineLearning
using GeometricMachineLearning: number_of_batches
import Random
Random.seed!(123)
dat = [1, 2, 3, 4, 5]
Expand All @@ -123,8 +125,8 @@ println(stdout, batch(dl₁), "\n", batch(dl₂))
Number of batches of dl₁: 2
Number of batches of dl₂: 2
([(1, 1), (3, 1), (4, 1)], [(2, 1)])
([(1, 4), (1, 3), (1, 2)], [(1, 5), (1, 1)])
([(1, 1), (4, 1), (2, 1)], [(3, 1)])
([(1, 3), (1, 2), (1, 4)], [(1, 1), (1, 5)])
```
Here we see that in the *autoencoder case* that last minibatch has an additional element.
Expand Down
2 changes: 1 addition & 1 deletion src/data_loader/data_loader.jl
Original file line number Diff line number Diff line change
Expand Up @@ -483,7 +483,7 @@ Compute the accuracy of a neural network classifier.
This needs an instance of [`DataLoader`](@ref) that stores the *test data*.
"""
function accuracy(model::Chain, ps::Tuple, dl::DataLoader{T, AT, BT}) where {T, T1<:Integer, AT<:AbstractArray{T}, BT<:AbstractArray{T1}}
function accuracy(model::Chain, ps::NeuralNetworkParameters, dl::DataLoader{T, AT, BT}) where {T, T1<:Integer, AT<:AbstractArray{T}, BT<:AbstractArray{T1}}
output_tensor = model(dl.input, ps)
output_estimate = assign_output_estimate(output_tensor, dl.output_time_steps)
backend = KernelAbstractions.get_backend(output_estimate)
Expand Down
8 changes: 6 additions & 2 deletions src/data_loader/optimize.jl
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ number_of_batches(dl, batch)
3
```
"""
function optimize_for_one_epoch!(opt::Optimizer, model, ps::Union{Tuple, NamedTuple}, dl::DataLoader{T}, batch::Batch, loss::Union{typeof(loss), NetworkLoss}, λY) where T
function optimize_for_one_epoch!(opt::Optimizer, model, ps::Union{NeuralNetworkParameters, NamedTuple}, dl::DataLoader{T}, batch::Batch, loss::Union{typeof(loss), NetworkLoss}, λY) where T
count = 0
total_error = T(0)
batches = batch(dl)
Expand All @@ -61,12 +61,16 @@ function optimize_for_one_epoch!(opt::Optimizer, model, ps::Union{Tuple, NamedTu
Zygote.pullback(ps -> loss(model, ps, input_nt_output_nt), ps)
end
total_error += loss_value
dp = pullback(one(loss_value))[1]
dp = return_correct_named_tuple(pullback(one(loss_value))[1])
optimization_step!(opt, λY, ps, dp)
end
total_error / count
end

# this is needed because of the specific way in which we store nn parameters
return_correct_named_tuple(dx::NamedTuple{(:params, )}) = dx.params
return_correct_named_tuple(dx) = dx

_copy(a::AbstractArray) = copy(a)
_copy(qp::QPT) = (q = copy(qp.q), p = copy(qp.p))
_copy(t::Tuple{<:QPTOAT, <:QPTOAT}) = _copy.(t)
Expand Down
Loading

0 comments on commit 4bc5fc5

Please sign in to comment.