Skip to content

Commit

Permalink
Merge pull request #138 from JuliaGNI/clean_up_symplectic_autoencoder…
Browse files Browse the repository at this point in the history
…_script

Symplectic Autoencoder and PSD architecture
  • Loading branch information
benedict-96 authored May 2, 2024
2 parents df18b20 + 08a4345 commit 7aa4e60
Show file tree
Hide file tree
Showing 13 changed files with 454 additions and 13 deletions.
5 changes: 5 additions & 0 deletions src/GeometricMachineLearning.jl
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,8 @@ module GeometricMachineLearning
include("architectures/regular_transformer_integrator.jl")
include("architectures/sympnet.jl")
include("architectures/autoencoder.jl")
include("architectures/symplectic_autoencoder.jl")
include("architectures/psd.jl")
include("architectures/fixed_width_network.jl")
include("architectures/hamiltonian_neural_network.jl")
include("architectures/lagrangian_neural_network.jl")
Expand All @@ -269,6 +271,9 @@ module GeometricMachineLearning
export LSTMNeuralNetwork
export ClassificationTransformer
export VolumePreservingFeedForward
export SymplecticAutoencoder, PSDArch

export solve!, encoder, decoder

export train!, apply!, jacobian!
export iterate
Expand Down
85 changes: 84 additions & 1 deletion src/architectures/autoencoder.jl
Original file line number Diff line number Diff line change
@@ -1,2 +1,85 @@
"""
Abstract `AutoEncoder` type. If a custom `<:AutoEncoder` architecture is implemented it should have the fields `full_dim`, `reduced_dim`, `n_encoder_blocks` and `n_decoder_blocks`. Further the routines `encoder`, `decoder`, `encoder_parameters` and `decoder_parameters` should be extended.
"""
abstract type AutoEncoder <: Architecture end

struct AutoEncoder <: Architecture end
"""
Abstract `Encoder` type. If a custom `<:Encoder` architecture is implemented it should have the fields `full_dim`, `reduced_dim` and `n_encoder_blocks`.
"""
abstract type Encoder <: Architecture end

"""
Abstract `Decoder` type. If a custom `<:Decoder` architecture is implemented it should have the fields `full_dim`, `reduced_dim` and `n_decoder_blocks`.
"""
abstract type Decoder <: Architecture end

struct UnknownEncoder <: Encoder
full_dim::Int
reduced_dim::Int
n_encoder_blocks::Int
end

struct UnknownDecoder <: Decoder
full_dim::Int
reduced_dim::Int
n_decoder_blocks::Int
end

"""
This function gives iterations from the full dimension to the reduced dimension (i.e. the intermediate steps). The iterations are given in ascending order.
"""
function compute_iterations(full_dim::Integer, reduced_dim::Integer, n_blocks::Integer)
iterations = Vector{Int}(reduced_dim : (full_dim - reduced_dim) ÷ (n_blocks - 1) : full_dim)
iterations[end] = full_dim
iterations
end

function compute_encoder_iterations(arch::AutoEncoder)
compute_iterations(arch.full_dim, arch.reduced_dim, arch.n_encoder_blocks)
end

function compute_decoder_iterations(arch::AutoEncoder)
compute_iterations(arch.full_dim, arch.reduced_dim, arch.n_decoder_blocks)
end

"""
Takes as input the autoencoder architecture and a vector of integers specifying the layer dimensions in the encoder. Has to return a tuple of `AbstractExplicitLayer`s.
"""
encoder_layers_from_iteration(::AutoEncoder, ::AbstractVector{<:Integer}) = error("You have to implement `encoder_layers_from_iteration` for this autoencoder architecture!")

"""
Takes as input the autoencoder architecture and a vector of integers specifying the layer dimensions in the decoder. Has to return a tuple of `AbstractExplicitLayer`s.
"""
decoder_layers_from_iteration(::AutoEncoder, ::AbstractVector{<:Integer}) = error("You have to implement `decoder_layers_from_iteration` for this autoencoder architecture!")

function encoder_model(arch::AutoEncoder)
encoder_iterations = reverse(compute_encoder_iterations(arch))
Chain(encoder_layers_from_iteration(arch, encoder_iterations)...)
end

function decoder_model(arch::AutoEncoder)
decoder_iterations = compute_decoder_iterations(arch)
Chain(decoder_layers_from_iteration(arch, decoder_iterations)...)
end

function encoder_parameters(nn::NeuralNetwork{<:AutoEncoder})
n_encoder_layers = length(encoder_model(nn.architecture).layers)
nn.params[1:n_encoder_layers]
end

function decoder_parameters(nn::NeuralNetwork{<:AutoEncoder})
n_decoder_layers = length(decoder_model(nn.architecture).layers)
nn.params[(end - (n_decoder_layers - 1)):end]
end

function Chain(arch::AutoEncoder)
Chain(encoder_model(arch).layers..., decoder_model(arch).layers...)
end

function encoder(nn::NeuralNetwork{<:AutoEncoder})
NeuralNetwork(UnknownEncoder(nn.architecture.full_dim, nn.architecture.reduced_dim, nn.architecture.n_encoder_blocks), encoder_model(nn.architecture), encoder_parameters(nn), get_backend(nn))
end

function decoder(nn::NeuralNetwork{<:AutoEncoder})
NeuralNetwork(UnknownDecoder(nn.architecture.full_dim, nn.architecture.reduced_dim, nn.architecture.n_encoder_blocks), decoder_model(nn.architecture), decoder_parameters(nn), get_backend(nn))
end
50 changes: 50 additions & 0 deletions src/architectures/psd.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
struct PSDArch <: AutoEncoder
full_dim::Int
reduced_dim::Int
n_encoder_blocks::Int
n_decoder_blocks::Int
end

function PSDArch(full_dim::Integer, reduced_dim::Integer)
@assert iseven(full_dim) && iseven(reduced_dim) "Full order and reduced dimension have to be even!"
PSDArch(full_dim, reduced_dim, 2, 2)
end

function encoder_layers_from_iteration(arch::PSDArch, encoder_iterations::AbstractVector{<:Integer})
@assert length(encoder_iterations) == 2
@assert arch.full_dim == encoder_iterations[1]
@assert arch.reduced_dim == encoder_iterations[2]

(PSDLayer(arch.full_dim, arch.reduced_dim), )
end

function decoder_layers_from_iteration(arch::PSDArch, decoder_iterations::AbstractVector{<:Integer})
@assert length(decoder_iterations) == 2
@assert arch.full_dim == decoder_iterations[2]
@assert arch.reduced_dim == decoder_iterations[1]

(PSDLayer(arch.reduced_dim, arch.full_dim), )
end

# this performs PSD
function solve!(nn::NeuralNetwork{<:PSDArch}, input::AbstractMatrix)
half_of_dimension_in_big_space = nn.architecture.full_dim ÷ 2
@views input_qp = hcat(input[1 : half_of_dimension_in_big_space, :], input[(half_of_dimension_in_big_space + 1) : end, :])
U_term = svd(input_qp).U
@views nn.params[1].weight.A .= U_term[:, 1 : nn.architecture.reduced_dim ÷ 2]
@views nn.params[2].weight.A .= U_term[:, 1 : nn.architecture.reduced_dim ÷ 2]

AutoEncoderLoss()(nn, input)
end

function solve!(nn::NeuralNetwork{<:PSDArch}, input::AbstractArray{T, 3}) where T
solve!(nn, reshape(input, size(input, 1), size(input, 2) * size(input, 3)))
end

function solve!(nn::NeuralNetwork{<:PSDArch}, dl::DataLoader{T, AT, <:Any, :RegularData}) where {T, AT <: AbstractArray{T}}
solve!(nn, dl.input)
end

function solve!(nn::NeuralNetwork{<:PSDArch}, dl::DataLoader{T, NT, <:Any, :RegularData}) where {T, AT <: AbstractArray{T}, NT <: NamedTuple{(:q, :p), Tuple{AT, AT}}}
solve!(nn, vcat(dl.input.q, dl.input.p))
end
101 changes: 101 additions & 0 deletions src/architectures/symplectic_autoencoder.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
struct SymplecticAutoencoder{EncoderInit, DecoderInit, AT} <: AutoEncoder
full_dim::Int
reduced_dim::Int
n_encoder_layers::Int
n_encoder_blocks::Int
n_decoder_layers::Int
n_decoder_blocks::Int
sympnet_upscale::Int
activation::AT
end

struct SymplecticEncoder{AT} <: Encoder
full_dim::Int
reduced_dim::Int
n_encoder_layers::Int
n_encoder_blocks::Int
sympnet_upscale::Int
activation::AT
end

struct SymplecticDecoder{AT} <: Encoder
full_dim::Int
reduced_dim::Int
n_decoder_layers::Int
n_decoder_blocks::Int
sympnet_upscale::Int
activation::AT
end

function SymplecticAutoencoder(full_dim::Integer, reduced_dim::Integer; n_encoder_layers::Integer = 4, n_encoder_blocks::Integer = 2, n_decoder_layers::Integer = 1, n_decoder_blocks::Integer = 3, sympnet_upscale::Integer = 5, activation = tanh, encoder_init_q::Bool = true, decoder_init_q::Bool = true)
@assert full_dim reduced_dim "The dimension of the full-order model hast to be larger than the dimension of the reduced order model!"
@assert iseven(full_dim) && iseven(reduced_dim) "The full-order model and the reduced-order model need to be even dimensional!"

if encoder_init_q && decoder_init_q
SymplecticAutoencoder{:EncoderInitQ, :DecoderInitQ, typeof(activation)}(full_dim, reduced_dim, n_encoder_layers, n_encoder_blocks, n_decoder_layers, n_decoder_blocks, sympnet_upscale, activation)
elseif encoder_init_q && !decoder_init_q
SymplecticAutoencoder{:EncoderInitQ, :DecoderInitP, typeof(activation)}(full_dim, reduced_dim, n_encoder_layers, n_encoder_blocks, n_decoder_layers, n_decoder_blocks, sympnet_upscale, activation)
elseif !encoder_init_q && decoder_init_q
SymplecticAutoencoder{:EncoderInitP, :DecoderInitQ, typeof(activation)}(full_dim, reduced_dim, n_encoder_layers, n_encoder_blocks, n_decoder_layers, n_decoder_blocks, sympnet_upscale, activation)
elseif !encoder_init_q && !decoder_init_q
SymplecticAutoencoder{:EncoderInitP, :DecoderInitP, typeof(activation)}(full_dim, reduced_dim, n_encoder_layers, n_encoder_blocks, n_decoder_layers, n_decoder_blocks, sympnet_upscale, activation)
end
end

"""
This function gives iterations from the full dimension to the reduced dimension (i.e. the intermediate steps). The iterations are given in ascending order. Only even steps are allowed here.
"""
function compute_iterations_for_symplectic_system(full_dim::Integer, reduced_dim::Integer, n_blocks::Integer)
full_dim2 = full_dim ÷ 2
reduced_dim2 = reduced_dim ÷ 2
iterations = Vector{Int}(reduced_dim2 : (full_dim2 - reduced_dim2) ÷ (n_blocks - 1) : full_dim2)
iterations[end] = full_dim2
iterations * 2
end

function compute_encoder_iterations(arch::SymplecticAutoencoder)
compute_iterations_for_symplectic_system(arch.full_dim, arch.reduced_dim, arch.n_encoder_blocks)
end

function compute_decoder_iterations(arch::SymplecticAutoencoder)
compute_iterations_for_symplectic_system(arch.full_dim, arch.reduced_dim, arch.n_decoder_blocks)
end

function encoder_or_decoder_layers_from_iteration(arch::SymplecticAutoencoder, encoder_iterations::AbstractVector{<:Integer}, n_encoder_layers::Integer, _determine_layer_type)
encoder_layers = ()
encoder_iterations_reduced = encoder_iterations[1:(end - 1)]
for (i, it) in zip(axes(encoder_iterations_reduced, 1), encoder_iterations_reduced)
for layer_index in 1:n_encoder_layers
encoder_layers = _determine_layer_type(layer_index) ? (encoder_layers..., GradientLayerQ(it, arch.sympnet_upscale * it, arch.activation)) : (encoder_layers..., GradientLayerP(it, arch.sympnet_upscale * it, arch.activation))
end
encoder_layers = (encoder_layers..., PSDLayer(it, encoder_iterations[i + 1]))
end

encoder_layers
end

function encoder_layers_from_iteration(arch::SymplecticAutoencoder{:EncoderInitQ}, encoder_iterations::AbstractVector{<:Integer})
encoder_or_decoder_layers_from_iteration(arch, encoder_iterations, arch.n_encoder_layers, isodd)
end

function encoder_layers_from_iteration(arch::SymplecticAutoencoder{:EncoderInitP}, encoder_iterations::AbstractVector{<:Integer})
encoder_or_decoder_layers_from_iteration(arch, encoder_iterations, arch.n_encoder_layers, iseven)
end

function decoder_layers_from_iteration(arch::SymplecticAutoencoder{<:Any, :DecoderInitQ}, decoder_iterations::AbstractVector{<:Integer})
encoder_or_decoder_layers_from_iteration(arch, decoder_iterations, arch.n_decoder_layers, isodd)
end

function decoder_layers_from_iteration(arch::SymplecticAutoencoder{<:Any, :DecoderInitP}, decoder_iterations::AbstractVector{<:Integer})
encoder_or_decoder_layers_from_iteration(arch, decoder_iterations, arch.n_decoder_layers, iseven)
end

function encoder(nn::NeuralNetwork{<:SymplecticAutoencoder})
arch = SymplecticEncoder(nn.architecture.full_dim, nn.architecture.reduced_dim, nn.architecture.n_encoder_layers, nn.architecture.n_encoder_blocks, nn.architecture.sympnet_upscale, nn.architecture.activation)
NeuralNetwork(arch, encoder_model(nn.architecture), encoder_parameters(nn), get_backend(nn))
end

function decoder(nn::NeuralNetwork{<:SymplecticAutoencoder})
arch = SymplecticDecoder(nn.architecture.full_dim, nn.architecture.reduced_dim, nn.architecture.n_decoder_layers, nn.architecture.n_decoder_blocks, nn.architecture.sympnet_upscale, nn.architecture.activation)
NeuralNetwork(arch, decoder_model(nn.architecture), decoder_parameters(nn), get_backend(nn))
end
32 changes: 32 additions & 0 deletions src/data_loader/batch.jl
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,38 @@ function convert_input_and_batch_indices_to_array(dl::DataLoader{T, BT}, batch::
input, output
end

function convert_input_and_batch_indices_to_array(dl::DataLoader{T, BT, Nothing, :RegularData}, batch::Batch, batch_indices_tuple::Vector{Tuple{Int, Int}}) where {T, AT<:AbstractArray{T, 3}, BT<:NamedTuple{(:q, :p), Tuple{AT, AT}}}
backend = KernelAbstractions.get_backend(dl.input)

# the batch size is smaller for the last batch
_batch_size = length(batch_indices_tuple)

batch_indices = convert_vector_of_tuples_to_matrix(backend, batch_indices_tuple)

q_input = KernelAbstractions.allocate(backend, T, dl.input_dim ÷ 2, batch.seq_length, _batch_size)
p_input = similar(q_input)

assign_input_from_vector_of_tuples! = assign_input_from_vector_of_tuples_kernel!(backend)
assign_input_from_vector_of_tuples!(q_input, p_input, dl.input, batch_indices, ndrange=(dl.input_dim ÷ 2, batch.seq_length, _batch_size))

(q = q_input, p = p_input)
end

function convert_input_and_batch_indices_to_array(dl::DataLoader{T, BT, Nothing, :RegularData}, batch::Batch, batch_indices_tuple::Vector{Tuple{Int, Int}}) where {T, BT<:AbstractArray{T, 3}}
backend = KernelAbstractions.get_backend(dl.input)

# the batch size is smaller for the last batch
_batch_size = length(batch_indices_tuple)

batch_indices = convert_vector_of_tuples_to_matrix(backend, batch_indices_tuple)

input = KernelAbstractions.allocate(backend, T, dl.input_dim, batch.seq_length, _batch_size)

assign_input_from_vector_of_tuples! = assign_input_from_vector_of_tuples_kernel!(backend)
assign_input_from_vector_of_tuples!(input, dl.input, batch_indices, ndrange=(dl.input_dim, batch.seq_length, _batch_size))

input
end

# for the case when the DataLoader also contains an output
function convert_input_and_batch_indices_to_array(dl::DataLoader{T, BT, OT}, ::Batch, batch_indices_tuple::Vector{Tuple{Int, Int}}) where {T, T1, BT<:AbstractArray{T, 3}, OT<:AbstractArray{T1, 3}}
Expand Down
22 changes: 22 additions & 0 deletions src/data_loader/optimize.jl
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,22 @@ function optimize_for_one_epoch!(opt::Optimizer, model, ps::Union{Tuple, NamedTu
total_error / count
end

function optimize_for_one_epoch!(opt::Optimizer, model, ps::Union{Tuple, NamedTuple}, dl::DataLoader{T, <:Any, Nothing, :RegularData}, batch::Batch, loss::Union{typeof(loss), NetworkLoss}) where T
count = 0
total_error = T(0)
batches = batch(dl)
@views for batch_indices in batches
count += 1
# these `copy`s should not be necessary! coming from a Zygote problem!
input_nt = convert_input_and_batch_indices_to_array(dl, batch, batch_indices)
loss_value, pullback = Zygote.pullback(ps -> loss(model, ps, input_nt), ps)
total_error += loss_value
dp = pullback(one(loss_value))[1]
optimization_step!(opt, model, ps, dp)
end
total_error / count
end

@doc raw"""
A functor for `Optimizer`. It is called with:
- `nn::NeuralNetwork`
Expand All @@ -48,6 +64,7 @@ function (o::Optimizer)(nn::NeuralNetwork, dl::DataLoader, batch::Batch, n_epoch
loss_array[i] = optimize_for_one_epoch!(o, nn.model, nn.params, dl, batch, loss)
ProgressMeter.next!(progress_object; showvalues = [(:TrainingLoss, loss_array[i])])
end

loss_array
end

Expand All @@ -60,3 +77,8 @@ function (o::Optimizer)(nn::NeuralNetwork{<:NeuralNetworkIntegrator}, dl::DataLo
loss = FeedForwardLoss()
o(nn, dl, batch, n_epochs, loss)
end

function (o::Optimizer)(nn::NeuralNetwork{<:AutoEncoder}, dl::DataLoader, batch::Batch{:FeedForward}, n_epochs::Int=1)
loss = AutoEncoderLoss()
o(nn, dl, batch, n_epochs, loss)
end
12 changes: 11 additions & 1 deletion src/loss/losses.jl
Original file line number Diff line number Diff line change
Expand Up @@ -56,4 +56,14 @@ function (loss::ClassificationTransformerLoss)(model::Union{Chain, AbstractExpli
norm(predicted_output_cropped - output) / norm(output)
end

struct FeedForwardLoss <: NetworkLoss end
struct FeedForwardLoss <: NetworkLoss end

struct AutoEncoderLoss <: NetworkLoss end

function (loss::AutoEncoderLoss)(nn::NeuralNetwork, input)
loss(nn.model, nn.params, input, input)
end

function (loss::AutoEncoderLoss)(model::Union{Chain, AbstractExplicitLayer}, ps::Union{Tuple, NamedTuple}, input)
loss(model, ps, input, input)
end
16 changes: 8 additions & 8 deletions src/reduced_system/reduced_system.jl
Original file line number Diff line number Diff line change
Expand Up @@ -12,26 +12,26 @@ It can be called using the following constructor: ReducedSystem(N, n, encoder, d
- ics: the initial condition for the big system.
- projection_error: the error $||M - \mathcal{R}\circ\mathcal{P}(M)||$ where $M$ is the snapshot matrix; $\mathcal{P}$ and $\mathcal{R}$ are the reduction and reconstruction respectively.
"""
struct ReducedSystem{T, ST<:SystemType}
struct ReducedSystem{T, ST<:SystemType, ET <: Encoder, DT <: Decoder, IT}
N::Integer
n::Integer
encoder
decoder
encoder::ET
decoder::DT
full_vector_field
reduced_vector_field
integrator
params
tspan
tstep
ics
tspan::Tuple{Int, Int}
tstep::T
ics::IT

function ReducedSystem(N::Integer, n::Integer, encoder, decoder, full_vector_field, reduced_vector_field, params, tspan, tstep, ics; integrator=ImplicitMidpoint(), system_type=Symplectic(), T=Float64)
new{T, typeof(system_type)}(N, n, encoder, decoder, full_vector_field, reduced_vector_field, integrator, params, tspan, tstep, ics)
new{T, typeof(system_type), typeof(encoder), typeof(decoder), typeof(ics)}(N, n, encoder, decoder, full_vector_field, reduced_vector_field, integrator, params, tspan, tstep, ics)
end
end

function ReducedSystem(N::Integer, n::Integer, encoder, decoder, full_vector_field, params, tspan, tstep, ics; integrator=ImplicitMidpoint(), system_type=Symplectic(), T=Float64)
ReducedSystem{T, typeof(system_type)}(
ReducedSystem{T, typeof(system_type), typeof(encoder), typeof(decoder), typeof(ics)}(
N, n, encoder, decoder, full_vector_field, build_reduced_vector_field(full_vector_field, decoder, N, n, T), integrator, params, tspan, tstep, ics
)
end
Expand Down
Loading

0 comments on commit 7aa4e60

Please sign in to comment.