Merge pull request #138 from JuliaGNI/clean_up_symplectic_autoencoder…

…_script Symplectic Autoencoder and PSD architecture
JuliaGNI · May 2, 2024 · 7aa4e60 · 7aa4e60
2 parents df18b20 + 08a4345
commit 7aa4e60
Show file tree

Hide file tree

Showing 13 changed files with 454 additions and 13 deletions.
diff --git a/src/GeometricMachineLearning.jl b/src/GeometricMachineLearning.jl
@@ -250,6 +250,8 @@ module GeometricMachineLearning
     include("architectures/regular_transformer_integrator.jl")
     include("architectures/sympnet.jl")
     include("architectures/autoencoder.jl")
+    include("architectures/symplectic_autoencoder.jl")
+    include("architectures/psd.jl")
     include("architectures/fixed_width_network.jl")
     include("architectures/hamiltonian_neural_network.jl")
     include("architectures/lagrangian_neural_network.jl")
@@ -269,6 +271,9 @@ module GeometricMachineLearning
     export LSTMNeuralNetwork
     export ClassificationTransformer
     export VolumePreservingFeedForward
+    export SymplecticAutoencoder, PSDArch
+
+    export solve!, encoder, decoder
 
     export train!, apply!, jacobian!
     export iterate

diff --git a/src/architectures/autoencoder.jl b/src/architectures/autoencoder.jl
@@ -1,2 +1,85 @@
+"""
+Abstract `AutoEncoder` type. If a custom `<:AutoEncoder` architecture is implemented it should have the fields `full_dim`, `reduced_dim`, `n_encoder_blocks` and `n_decoder_blocks`. Further the routines `encoder`, `decoder`, `encoder_parameters` and `decoder_parameters` should be extended.
+"""
+abstract type AutoEncoder <: Architecture end
 
-struct AutoEncoder <: Architecture end
+"""
+Abstract `Encoder` type. If a custom `<:Encoder` architecture is implemented it should have the fields `full_dim`, `reduced_dim` and `n_encoder_blocks`.
+"""
+abstract type Encoder <: Architecture end 
+
+"""
+Abstract `Decoder` type. If a custom `<:Decoder` architecture is implemented it should have the fields `full_dim`, `reduced_dim` and `n_decoder_blocks`.
+"""
+abstract type Decoder <: Architecture end
+
+struct UnknownEncoder <: Encoder 
+    full_dim::Int
+    reduced_dim::Int
+    n_encoder_blocks::Int
+end 
+
+struct UnknownDecoder <: Decoder 
+    full_dim::Int 
+    reduced_dim::Int 
+    n_decoder_blocks::Int
+end
+
+"""
+This function gives iterations from the full dimension to the reduced dimension (i.e. the intermediate steps). The iterations are given in ascending order. 
+"""
+function compute_iterations(full_dim::Integer, reduced_dim::Integer, n_blocks::Integer)
+    iterations = Vector{Int}(reduced_dim : (full_dim - reduced_dim) ÷ (n_blocks - 1) : full_dim)
+    iterations[end] = full_dim
+    iterations
+end
+
+function compute_encoder_iterations(arch::AutoEncoder)
+    compute_iterations(arch.full_dim, arch.reduced_dim, arch.n_encoder_blocks)
+end
+
+function compute_decoder_iterations(arch::AutoEncoder)
+    compute_iterations(arch.full_dim, arch.reduced_dim, arch.n_decoder_blocks)
+end
+
+"""
+Takes as input the autoencoder architecture and a vector of integers specifying the layer dimensions in the encoder. Has to return a tuple of `AbstractExplicitLayer`s.
+"""
+encoder_layers_from_iteration(::AutoEncoder, ::AbstractVector{<:Integer}) = error("You have to implement `encoder_layers_from_iteration` for this autoencoder architecture!")
+
+"""
+Takes as input the autoencoder architecture and a vector of integers specifying the layer dimensions in the decoder. Has to return a tuple of `AbstractExplicitLayer`s.
+"""
+decoder_layers_from_iteration(::AutoEncoder, ::AbstractVector{<:Integer}) = error("You have to implement `decoder_layers_from_iteration` for this autoencoder architecture!")
+
+function encoder_model(arch::AutoEncoder)
+    encoder_iterations = reverse(compute_encoder_iterations(arch))
+    Chain(encoder_layers_from_iteration(arch, encoder_iterations)...)
+end
+
+function decoder_model(arch::AutoEncoder)
+    decoder_iterations = compute_decoder_iterations(arch)
+    Chain(decoder_layers_from_iteration(arch, decoder_iterations)...)
+end
+
+function encoder_parameters(nn::NeuralNetwork{<:AutoEncoder})
+    n_encoder_layers = length(encoder_model(nn.architecture).layers)
+    nn.params[1:n_encoder_layers]
+end
+
+function decoder_parameters(nn::NeuralNetwork{<:AutoEncoder})
+    n_decoder_layers = length(decoder_model(nn.architecture).layers)
+    nn.params[(end - (n_decoder_layers - 1)):end]
+end
+
+function Chain(arch::AutoEncoder)
+    Chain(encoder_model(arch).layers..., decoder_model(arch).layers...)
+end
+
+function encoder(nn::NeuralNetwork{<:AutoEncoder})
+    NeuralNetwork(UnknownEncoder(nn.architecture.full_dim, nn.architecture.reduced_dim, nn.architecture.n_encoder_blocks), encoder_model(nn.architecture), encoder_parameters(nn), get_backend(nn))
+end
+
+function decoder(nn::NeuralNetwork{<:AutoEncoder})
+    NeuralNetwork(UnknownDecoder(nn.architecture.full_dim, nn.architecture.reduced_dim, nn.architecture.n_encoder_blocks), decoder_model(nn.architecture), decoder_parameters(nn), get_backend(nn))
+end
diff --git a/src/architectures/psd.jl b/src/architectures/psd.jl
@@ -0,0 +1,50 @@
+struct PSDArch <: AutoEncoder 
+    full_dim::Int 
+    reduced_dim::Int 
+    n_encoder_blocks::Int 
+    n_decoder_blocks::Int
+end
+
+function PSDArch(full_dim::Integer, reduced_dim::Integer)
+    @assert iseven(full_dim) && iseven(reduced_dim) "Full order and reduced dimension have to be even!"
+    PSDArch(full_dim, reduced_dim, 2, 2)
+end
+
+function encoder_layers_from_iteration(arch::PSDArch, encoder_iterations::AbstractVector{<:Integer})
+    @assert length(encoder_iterations) == 2
+    @assert arch.full_dim == encoder_iterations[1]
+    @assert arch.reduced_dim == encoder_iterations[2]
+
+    (PSDLayer(arch.full_dim, arch.reduced_dim), )
+end
+
+function decoder_layers_from_iteration(arch::PSDArch, decoder_iterations::AbstractVector{<:Integer})
+    @assert length(decoder_iterations) == 2
+    @assert arch.full_dim == decoder_iterations[2]
+    @assert arch.reduced_dim == decoder_iterations[1]
+
+    (PSDLayer(arch.reduced_dim, arch.full_dim), )
+end
+
+# this performs PSD 
+function solve!(nn::NeuralNetwork{<:PSDArch}, input::AbstractMatrix)
+    half_of_dimension_in_big_space = nn.architecture.full_dim ÷ 2
+    @views input_qp = hcat(input[1 : half_of_dimension_in_big_space, :], input[(half_of_dimension_in_big_space + 1) : end, :])
+    U_term = svd(input_qp).U
+    @views nn.params[1].weight.A .= U_term[:, 1 : nn.architecture.reduced_dim ÷ 2]
+    @views nn.params[2].weight.A .= U_term[:, 1 : nn.architecture.reduced_dim ÷ 2]
+
+    AutoEncoderLoss()(nn, input)
+end
+
+function solve!(nn::NeuralNetwork{<:PSDArch}, input::AbstractArray{T, 3}) where T 
+    solve!(nn, reshape(input, size(input, 1), size(input, 2) * size(input, 3)))
+end
+
+function solve!(nn::NeuralNetwork{<:PSDArch}, dl::DataLoader{T, AT, <:Any, :RegularData}) where {T, AT <: AbstractArray{T}}
+    solve!(nn, dl.input)
+end
+
+function solve!(nn::NeuralNetwork{<:PSDArch}, dl::DataLoader{T, NT, <:Any, :RegularData}) where {T, AT <: AbstractArray{T}, NT <: NamedTuple{(:q, :p), Tuple{AT, AT}}}
+    solve!(nn, vcat(dl.input.q, dl.input.p))
+end
diff --git a/src/architectures/symplectic_autoencoder.jl b/src/architectures/symplectic_autoencoder.jl
@@ -0,0 +1,101 @@
+struct SymplecticAutoencoder{EncoderInit, DecoderInit, AT} <: AutoEncoder 
+    full_dim::Int
+    reduced_dim::Int 
+    n_encoder_layers::Int
+    n_encoder_blocks::Int
+    n_decoder_layers::Int
+    n_decoder_blocks::Int
+    sympnet_upscale::Int
+    activation::AT
+end
+
+struct SymplecticEncoder{AT} <: Encoder
+    full_dim::Int
+    reduced_dim::Int 
+    n_encoder_layers::Int 
+    n_encoder_blocks::Int 
+    sympnet_upscale::Int 
+    activation::AT
+end
+
+struct SymplecticDecoder{AT} <: Encoder
+    full_dim::Int
+    reduced_dim::Int 
+    n_decoder_layers::Int 
+    n_decoder_blocks::Int 
+    sympnet_upscale::Int 
+    activation::AT
+end
+
+function SymplecticAutoencoder(full_dim::Integer, reduced_dim::Integer; n_encoder_layers::Integer = 4, n_encoder_blocks::Integer = 2, n_decoder_layers::Integer = 1, n_decoder_blocks::Integer = 3, sympnet_upscale::Integer = 5, activation = tanh, encoder_init_q::Bool = true, decoder_init_q::Bool = true)
+    @assert full_dim ≥ reduced_dim "The dimension of the full-order model hast to be larger than the dimension of the reduced order model!"
+    @assert iseven(full_dim) && iseven(reduced_dim) "The full-order model and the reduced-order model need to be even dimensional!"
+
+    if encoder_init_q && decoder_init_q
+        SymplecticAutoencoder{:EncoderInitQ, :DecoderInitQ, typeof(activation)}(full_dim, reduced_dim, n_encoder_layers, n_encoder_blocks, n_decoder_layers, n_decoder_blocks, sympnet_upscale, activation)
+    elseif encoder_init_q && !decoder_init_q
+        SymplecticAutoencoder{:EncoderInitQ, :DecoderInitP, typeof(activation)}(full_dim, reduced_dim, n_encoder_layers, n_encoder_blocks, n_decoder_layers, n_decoder_blocks, sympnet_upscale, activation)
+    elseif !encoder_init_q && decoder_init_q
+        SymplecticAutoencoder{:EncoderInitP, :DecoderInitQ, typeof(activation)}(full_dim, reduced_dim, n_encoder_layers, n_encoder_blocks, n_decoder_layers, n_decoder_blocks, sympnet_upscale, activation)
+    elseif !encoder_init_q && !decoder_init_q
+        SymplecticAutoencoder{:EncoderInitP, :DecoderInitP, typeof(activation)}(full_dim, reduced_dim, n_encoder_layers, n_encoder_blocks, n_decoder_layers, n_decoder_blocks, sympnet_upscale, activation)
+    end
+end
+
+"""
+This function gives iterations from the full dimension to the reduced dimension (i.e. the intermediate steps). The iterations are given in ascending order. Only even steps are allowed here.
+"""
+function compute_iterations_for_symplectic_system(full_dim::Integer, reduced_dim::Integer, n_blocks::Integer)
+    full_dim2 = full_dim ÷ 2 
+    reduced_dim2 = reduced_dim ÷ 2
+    iterations = Vector{Int}(reduced_dim2 : (full_dim2 - reduced_dim2) ÷ (n_blocks - 1) : full_dim2)
+    iterations[end] = full_dim2
+    iterations * 2
+end
+
+function compute_encoder_iterations(arch::SymplecticAutoencoder)
+    compute_iterations_for_symplectic_system(arch.full_dim, arch.reduced_dim, arch.n_encoder_blocks)
+end
+
+function compute_decoder_iterations(arch::SymplecticAutoencoder)
+    compute_iterations_for_symplectic_system(arch.full_dim, arch.reduced_dim, arch.n_decoder_blocks)
+end
+
+function encoder_or_decoder_layers_from_iteration(arch::SymplecticAutoencoder, encoder_iterations::AbstractVector{<:Integer}, n_encoder_layers::Integer, _determine_layer_type)
+    encoder_layers = ()
+    encoder_iterations_reduced = encoder_iterations[1:(end - 1)]
+    for (i, it) in zip(axes(encoder_iterations_reduced, 1), encoder_iterations_reduced)
+        for layer_index in 1:n_encoder_layers 
+            encoder_layers = _determine_layer_type(layer_index) ? (encoder_layers..., GradientLayerQ(it, arch.sympnet_upscale * it, arch.activation)) : (encoder_layers..., GradientLayerP(it, arch.sympnet_upscale * it, arch.activation))
+        end
+        encoder_layers = (encoder_layers..., PSDLayer(it, encoder_iterations[i + 1]))
+    end
+
+    encoder_layers
+end
+
+function encoder_layers_from_iteration(arch::SymplecticAutoencoder{:EncoderInitQ}, encoder_iterations::AbstractVector{<:Integer})
+    encoder_or_decoder_layers_from_iteration(arch, encoder_iterations, arch.n_encoder_layers, isodd)
+end
+
+function encoder_layers_from_iteration(arch::SymplecticAutoencoder{:EncoderInitP}, encoder_iterations::AbstractVector{<:Integer})
+    encoder_or_decoder_layers_from_iteration(arch, encoder_iterations, arch.n_encoder_layers, iseven)
+end
+
+function decoder_layers_from_iteration(arch::SymplecticAutoencoder{<:Any, :DecoderInitQ}, decoder_iterations::AbstractVector{<:Integer})
+    encoder_or_decoder_layers_from_iteration(arch, decoder_iterations, arch.n_decoder_layers, isodd)
+end
+
+function decoder_layers_from_iteration(arch::SymplecticAutoencoder{<:Any, :DecoderInitP}, decoder_iterations::AbstractVector{<:Integer})
+    encoder_or_decoder_layers_from_iteration(arch, decoder_iterations, arch.n_decoder_layers, iseven)
+end
+
+function encoder(nn::NeuralNetwork{<:SymplecticAutoencoder})
+    arch = SymplecticEncoder(nn.architecture.full_dim, nn.architecture.reduced_dim, nn.architecture.n_encoder_layers, nn.architecture.n_encoder_blocks, nn.architecture.sympnet_upscale, nn.architecture.activation)
+    NeuralNetwork(arch, encoder_model(nn.architecture), encoder_parameters(nn), get_backend(nn))
+end
+
+function decoder(nn::NeuralNetwork{<:SymplecticAutoencoder})
+    arch = SymplecticDecoder(nn.architecture.full_dim, nn.architecture.reduced_dim, nn.architecture.n_decoder_layers, nn.architecture.n_decoder_blocks, nn.architecture.sympnet_upscale, nn.architecture.activation)
+    NeuralNetwork(arch, decoder_model(nn.architecture), decoder_parameters(nn), get_backend(nn))
+end
diff --git a/src/data_loader/batch.jl b/src/data_loader/batch.jl
@@ -168,6 +168,38 @@ function convert_input_and_batch_indices_to_array(dl::DataLoader{T, BT}, batch::
     input, output
 end
 
+function convert_input_and_batch_indices_to_array(dl::DataLoader{T, BT, Nothing, :RegularData}, batch::Batch, batch_indices_tuple::Vector{Tuple{Int, Int}}) where {T, AT<:AbstractArray{T, 3}, BT<:NamedTuple{(:q, :p), Tuple{AT, AT}}}
+    backend = KernelAbstractions.get_backend(dl.input)
+
+    # the batch size is smaller for the last batch
+    _batch_size = length(batch_indices_tuple)
+
+    batch_indices = convert_vector_of_tuples_to_matrix(backend, batch_indices_tuple)
+
+    q_input = KernelAbstractions.allocate(backend, T, dl.input_dim ÷ 2, batch.seq_length, _batch_size)
+    p_input = similar(q_input)
+
+    assign_input_from_vector_of_tuples! = assign_input_from_vector_of_tuples_kernel!(backend)
+    assign_input_from_vector_of_tuples!(q_input, p_input, dl.input, batch_indices, ndrange=(dl.input_dim ÷ 2, batch.seq_length, _batch_size))
+
+    (q = q_input, p = p_input)
+end
+
+function convert_input_and_batch_indices_to_array(dl::DataLoader{T, BT, Nothing, :RegularData}, batch::Batch, batch_indices_tuple::Vector{Tuple{Int, Int}}) where {T, BT<:AbstractArray{T, 3}}
+    backend = KernelAbstractions.get_backend(dl.input)
+
+    # the batch size is smaller for the last batch 
+    _batch_size = length(batch_indices_tuple)
+
+    batch_indices = convert_vector_of_tuples_to_matrix(backend, batch_indices_tuple)
+
+    input = KernelAbstractions.allocate(backend, T, dl.input_dim, batch.seq_length, _batch_size)
+
+    assign_input_from_vector_of_tuples! = assign_input_from_vector_of_tuples_kernel!(backend)
+    assign_input_from_vector_of_tuples!(input, dl.input, batch_indices, ndrange=(dl.input_dim, batch.seq_length, _batch_size))
+
+    input
+end
 
 # for the case when the DataLoader also contains an output
 function convert_input_and_batch_indices_to_array(dl::DataLoader{T, BT, OT}, ::Batch, batch_indices_tuple::Vector{Tuple{Int, Int}}) where {T, T1, BT<:AbstractArray{T, 3}, OT<:AbstractArray{T1, 3}}

diff --git a/src/data_loader/optimize.jl b/src/data_loader/optimize.jl
@@ -31,6 +31,22 @@ function optimize_for_one_epoch!(opt::Optimizer, model, ps::Union{Tuple, NamedTu
     total_error / count
 end
 
+function optimize_for_one_epoch!(opt::Optimizer, model, ps::Union{Tuple, NamedTuple}, dl::DataLoader{T, <:Any, Nothing, :RegularData}, batch::Batch, loss::Union{typeof(loss), NetworkLoss}) where T
+    count = 0
+    total_error = T(0)
+    batches = batch(dl)
+    @views for batch_indices in batches 
+        count += 1
+        # these `copy`s should not be necessary! coming from a Zygote problem!
+        input_nt = convert_input_and_batch_indices_to_array(dl, batch, batch_indices)
+        loss_value, pullback = Zygote.pullback(ps -> loss(model, ps, input_nt), ps)
+        total_error += loss_value
+        dp = pullback(one(loss_value))[1]
+        optimization_step!(opt, model, ps, dp)
+    end
+    total_error / count
+end
+
 @doc raw"""
 A functor for `Optimizer`. It is called with:
     - `nn::NeuralNetwork`
@@ -48,6 +64,7 @@ function (o::Optimizer)(nn::NeuralNetwork, dl::DataLoader, batch::Batch, n_epoch
         loss_array[i] = optimize_for_one_epoch!(o, nn.model, nn.params, dl, batch, loss)
         ProgressMeter.next!(progress_object; showvalues = [(:TrainingLoss, loss_array[i])]) 
     end
+
     loss_array
 end
 
@@ -60,3 +77,8 @@ function (o::Optimizer)(nn::NeuralNetwork{<:NeuralNetworkIntegrator}, dl::DataLo
     loss = FeedForwardLoss()
     o(nn, dl, batch, n_epochs, loss)
 end
+
+function (o::Optimizer)(nn::NeuralNetwork{<:AutoEncoder}, dl::DataLoader, batch::Batch{:FeedForward}, n_epochs::Int=1)
+    loss = AutoEncoderLoss()
+    o(nn, dl, batch, n_epochs, loss)
+end
diff --git a/src/loss/losses.jl b/src/loss/losses.jl
@@ -56,4 +56,14 @@ function (loss::ClassificationTransformerLoss)(model::Union{Chain, AbstractExpli
     norm(predicted_output_cropped - output) / norm(output)
 end
 
-struct FeedForwardLoss <: NetworkLoss end
+struct FeedForwardLoss <: NetworkLoss end
+
+struct AutoEncoderLoss <: NetworkLoss end 
+
+function (loss::AutoEncoderLoss)(nn::NeuralNetwork, input)
+    loss(nn.model, nn.params, input, input)
+end
+
+function (loss::AutoEncoderLoss)(model::Union{Chain, AbstractExplicitLayer}, ps::Union{Tuple, NamedTuple}, input)
+    loss(model, ps, input, input)
+end
diff --git a/src/reduced_system/reduced_system.jl b/src/reduced_system/reduced_system.jl
@@ -12,26 +12,26 @@ It can be called using the following constructor: ReducedSystem(N, n, encoder, d
 - ics: the initial condition for the big system.
 - projection_error: the error $||M - \mathcal{R}\circ\mathcal{P}(M)||$ where $M$ is the snapshot matrix; $\mathcal{P}$ and $\mathcal{R}$ are the reduction and reconstruction respectively.
 """
-struct ReducedSystem{T, ST<:SystemType} 
+struct ReducedSystem{T, ST<:SystemType, ET <: Encoder, DT <: Decoder, IT} 
     N::Integer 
     n::Integer
-    encoder
-    decoder 
+    encoder::ET
+    decoder::DT
     full_vector_field
     reduced_vector_field 
     integrator 
     params
-    tspan 
-    tstep
-    ics
+    tspan::Tuple{Int, Int} 
+    tstep::T
+    ics::IT
 
     function ReducedSystem(N::Integer, n::Integer, encoder, decoder, full_vector_field, reduced_vector_field, params, tspan, tstep, ics; integrator=ImplicitMidpoint(), system_type=Symplectic(), T=Float64) 
-        new{T, typeof(system_type)}(N, n, encoder, decoder, full_vector_field, reduced_vector_field, integrator, params, tspan, tstep, ics)
+        new{T, typeof(system_type), typeof(encoder), typeof(decoder), typeof(ics)}(N, n, encoder, decoder, full_vector_field, reduced_vector_field, integrator, params, tspan, tstep, ics)
     end
 end
 
 function ReducedSystem(N::Integer, n::Integer, encoder, decoder, full_vector_field, params, tspan, tstep, ics; integrator=ImplicitMidpoint(), system_type=Symplectic(), T=Float64) 
-    ReducedSystem{T, typeof(system_type)}(
+    ReducedSystem{T, typeof(system_type), typeof(encoder), typeof(decoder), typeof(ics)}(
         N, n, encoder, decoder, full_vector_field, build_reduced_vector_field(full_vector_field, decoder, N, n, T), integrator, params, tspan, tstep, ics
     )
 end