diff --git a/Project.toml b/Project.toml index 62caa680..aaaccc94 100644 --- a/Project.toml +++ b/Project.toml @@ -10,6 +10,7 @@ Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" ReliabilityDiagrams = "e5f51471-6270-49e4-a15a-f1cfbff4f856" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" +Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [compat] julia = "1" diff --git a/examples/batchensemble.jl b/examples/batchensemble.jl new file mode 100644 index 00000000..cb44d2a5 --- /dev/null +++ b/examples/batchensemble.jl @@ -0,0 +1,207 @@ +## Classification of MNIST dataset +## with the convolutional neural network known as LeNet5. +## This script also combines various +## packages from the Julia ecosystem with Flux. +using Flux +using Flux.Data: DataLoader +using Flux.Optimise: Optimiser, WeightDecay +using Flux: onehotbatch, onecold, glorot_normal, label_smoothing +using Flux.Losses: logitcrossentropy +using Statistics, Random +using Logging: with_logger +using TensorBoardLogger: TBLogger, tb_overwrite, set_step!, set_step_increment! +using ProgressMeter: @showprogress +import MLDatasets +import BSON +using CUDA +using Formatting + +using DeepUncertainty + +# LeNet5 "constructor". +# The model can be adapted to any image size +# and any number of output classes. +function LeNet5(args; imgsize = (28, 28, 1), nclasses = 10) + out_conv_size = (imgsize[1] ÷ 4 - 3, imgsize[2] ÷ 4 - 3, 16) + + return Chain( + ConvBatchEnsemble((5, 5), imgsize[end] => 6, args.rank, args.ensemble_size, relu), + MaxPool((2, 2)), + ConvBatchEnsemble((5, 5), 6 => 16, args.rank, args.ensemble_size, relu), + MaxPool((2, 2)), + flatten, + DenseBatchEnsemble(prod(out_conv_size), 120, args.rank, args.ensemble_size, relu), + DenseBatchEnsemble(120, 84, args.rank, args.ensemble_size, relu), + DenseBatchEnsemble(84, nclasses, args.rank, args.ensemble_size), + ) +end + +function get_data(args) + xtrain, ytrain = MLDatasets.MNIST.traindata(Float32) + xtest, ytest = MLDatasets.MNIST.testdata(Float32) + + xtrain = reshape(xtrain, 28, 28, 1, :) + xtest = reshape(xtest, 28, 28, 1, :) + + ytrain, ytest = onehotbatch(ytrain, 0:9), onehotbatch(ytest, 0:9) + + train_loader = DataLoader( + (xtrain, ytrain), + batchsize = args.batchsize, + shuffle = true, + partial = false, + ) + test_loader = DataLoader((xtest, ytest), batchsize = args.batchsize, partial = false) + + return train_loader, test_loader +end + +loss(ŷ, y) = logitcrossentropy(ŷ, y) + +function accuracy(preds, labels) + acc = sum(onecold(preds |> cpu) .== onecold(labels |> cpu)) + return acc +end + +function eval_loss_accuracy(args, loader, model, device) + l = [0.0f0 for x = 1:args.ensemble_size] + acc = [0 for x = 1:args.ensemble_size] + ece_list = [0.0f0 for x = 1:args.ensemble_size] + ntot = 0 + mean_l = 0 + mean_acc = 0 + mean_ece = 0 + for (x, y) in loader + x = repeat(x, 1, 1, 1, args.ensemble_size) + x, y = x |> device, y |> device + # Perform the forward pass + ŷ = model(x) + ŷ = softmax(ŷ, dims = 1) + # Reshape the predictions into [classes, batch_size, ensemble_size + reshaped_ŷ = reshape(ŷ, size(ŷ)[1], args.batchsize, args.ensemble_size) + # Loop through each model's predictions + for ensemble = 1:args.ensemble_size + model_predictions = reshaped_ŷ[:, :, ensemble] + # Calculate individual loss + l[ensemble] += loss(model_predictions, y) * size(model_predictions)[end] + acc[ensemble] += accuracy(model_predictions, y) + ece_list[ensemble] += + ExpectedCalibrationError(model_predictions |> cpu, onecold(y |> cpu)) * + args.batchsize + end + # Get the mean predictions + mean_predictions = mean(reshaped_ŷ, dims = ndims(reshaped_ŷ)) + mean_predictions = dropdims(mean_predictions, dims = ndims(mean_predictions)) + mean_l += loss(mean_predictions, y) * size(mean_predictions)[end] + mean_acc += accuracy(mean_predictions, y) + mean_ece += + ExpectedCalibrationError(mean_predictions |> cpu, onecold(y |> cpu)) * + args.batchsize + ntot += size(mean_predictions)[end] + end + # Normalize the loss + losses = [loss / ntot |> round4 for loss in l] + acc = [a / ntot * 100 |> round4 for a in acc] + ece_list = [x / ntot |> round4 for x in ece_list] + # Calculate mean loss + mean_l = mean_l / ntot |> round4 + mean_acc = mean_acc / ntot * 100 |> round4 + mean_ece = mean_ece / ntot |> round4 + + # Print the per ensemble mode loss and accuracy + for ensemble = 1:args.ensemble_size + @info (format( + "Model {} Loss: {} Accuracy: {} ECE: {}", + ensemble, + losses[ensemble], + acc[ensemble], + ece_list[ensemble], + )) + end + @info (format( + "Mean Loss: {} Mean Accuracy: {} Mean ECE: {}", + mean_l, + mean_acc, + mean_ece, + )) + @info "===========================================================" + return nothing +end + +## utility functions +num_params(model) = sum(length, Flux.params(model)) +round4(x) = round(x, digits = 4) + +# arguments for the `train` function +Base.@kwdef mutable struct Args + η = 3e-4 # learning rate + λ = 0 # L2 regularizer param, implemented as weight decay + batchsize = 32 # batch size + epochs = 10 # number of epochs + seed = 0 # set seed > 0 for reproducibility + use_cuda = true # if true use cuda (if available) + infotime = 1 # report every `infotime` epochs + checktime = 5 # Save the model every `checktime` epochs. Set to 0 for no checkpoints. + savepath = "runs/" # results path + rank = 1 + ensemble_size = 4 +end + +function train(; kws...) + args = Args(; kws...) + args.seed > 0 && Random.seed!(args.seed) + use_cuda = args.use_cuda && CUDA.functional() + + if use_cuda + device = gpu + @info "Training on GPU" + else + device = cpu + @info "Training on CPU" + end + + ## DATA + train_loader, test_loader = get_data(args) + @info "Dataset MNIST: $(train_loader.nobs) train and $(test_loader.nobs) test examples" + + ## MODEL AND OPTIMIZER + model = LeNet5(args) |> device + @info "LeNet5 model: $(num_params(model)) trainable params" + + ps = Flux.params(model) + + opt = ADAM(args.η) + if args.λ > 0 # add weight decay, equivalent to L2 regularization + opt = Optimiser(WeightDecay(args.λ), opt) + end + + function report(epoch) + # @info "Train Metrics" + # eval_loss_accuracy(args, train_loader, model, device) + @info "Test metrics" + eval_loss_accuracy(args, test_loader, model, device) + end + + ## TRAINING + @info "Start Training" + report(0) + for epoch = 1:args.epochs + @showprogress for (x, y) in train_loader + # Make copies of batches for ensembles + x = repeat(x, 1, 1, 1, args.ensemble_size) + y = repeat(y, 1, args.ensemble_size) + x, y = x |> device, y |> device + gs = Flux.gradient(ps) do + ŷ = model(x) + loss(ŷ, y) + end + + Flux.Optimise.update!(opt, ps, gs) + end + + ## Printing and logging + epoch % args.infotime == 0 && report(epoch) + end +end + +train() diff --git a/src/DeepUncertainty.jl b/src/DeepUncertainty.jl index 5aacd657..74de5589 100644 --- a/src/DeepUncertainty.jl +++ b/src/DeepUncertainty.jl @@ -1,10 +1,17 @@ module DeepUncertainty +using Flux +using Random +using Flux: @functor, glorot_normal, create_bias + # Export layers export MCLayer, MCDense, MCConv +export DenseBatchEnsemble, ConvBatchEnsemble export mean_loglikelihood, brier_score, ExpectedCalibrationError, prediction_metrics include("metrics.jl") include("layers/mclayers.jl") +include("layers/BatchEnsemble/dense.jl") +include("layers/BatchEnsemble/conv.jl") end diff --git a/src/layers/BatchEnsemble/conv.jl b/src/layers/BatchEnsemble/conv.jl new file mode 100644 index 00000000..564c943b --- /dev/null +++ b/src/layers/BatchEnsemble/conv.jl @@ -0,0 +1,145 @@ +""" + ConvBatchEnsemble(filter, in => out, rank, + ensemble_size, σ = identity; + stride = 1, pad = 0, dilation = 1, + groups = 1, [bias, weight, init]) + ConvBatchEnsemble(layer, alpha, gamma, ensemble_bias, ensemble_act, rank) + +Creates a conv BatchEnsemble layer. Batch ensemble is a memory efficient alternative +for deep ensembles. In deep ensembles, if the ensemble size is N, N different models +are trained, making the time and memory complexity O(N * complexity of one network). +BatchEnsemble generates weight matrices for each member in the ensemble using a +couple of rank 1 vectors R (alpha), S (gamma), RS' and multiplying the result with +weight matrix W element wise. We also call R and S as fast weights. + +Reference - https://arxiv.org/abs/2002.06715 + +During both training and testing, we repeat the samples along the batch dimension +N times, where N is the ensemble_size. For example, if each mini batch has 10 samples +and our ensemble size is 4, then the actual input to the layer has 40 samples. +The output of the layer has 40 samples as well, and each 10 samples can be considered +as the output of an esnemble member. + +# Fields +- `layer`: The dense layer which transforms the pertubed input to output +- `alpha`: The first Fast weight of size (in_dim, ensemble_size) +- `gamma`: The second Fast weight of size (out_dim, ensemble_size) +- `ensemble_bias`: Bias added to the ensemble output, separate from dense layer bias +- `ensemble_act`: The activation function to be applied on ensemble output +- `rank`: Rank of the fast weights (rank > 1 doesn't work on GPU for now) + +# Arguments +- `filter::NTuple{N,Integer}`: Kernel dimensions, eg, (5, 5) +- `ch::Pair{<:Integer,<:Integer}`: Input channels => output channels +- `rank::Integer`: Rank of the fast weights +- `ensemble_size::Integer`: Number of models in the ensemble +- `σ::F=identity`: Activation of the dense layer, defaults to identity +- `init=glorot_normal`: Initialization function, defaults to glorot_normal +- `alpha_init=glorot_normal`: Initialization function for the alpha fast weight, + defaults to glorot_normal +- `gamma_init=glorot_normal`: Initialization function for the gamma fast weight, + defaults to glorot_normal +- `bias::Bool=true`: Toggle the usage of bias in the dense layer +- `ensemble_bias::Bool=true`: Toggle the usage of ensemble bias +- `ensemble_act::F=identity`: Activation function for enseble outputs +""" +struct ConvBatchEnsemble{L,F,M,B} + layer::L + alpha::M + gamma::M + ensemble_bias::B + ensemble_act::F + rank::Any + function ConvBatchEnsemble( + layer::L, + alpha::M, + gamma::M, + ensemble_bias = true, + ensemble_act::F = identity, + rank = 1, + ) where {M,F,L} + ensemble_bias = create_bias(gamma, ensemble_bias, size(gamma)[1], size(gamma)[2]) + new{typeof(layer),F,M,typeof(ensemble_bias)}( + layer, + alpha, + gamma, + ensemble_bias, + ensemble_act, + rank, + ) + end +end + +function ConvBatchEnsemble( + k::NTuple{N,Integer}, + ch::Pair{<:Integer,<:Integer}, + rank::Integer, + ensemble_size::Integer, + σ = identity; + init = glorot_normal, + alpha_init = glorot_normal, + gamma_init = glorot_normal, + stride = 1, + pad = 0, + dilation = 1, + groups = 1, + bias = true, + ensemble_bias = true, + ensemble_act = identity, +) where {N} + layer = Flux.Conv( + k, + ch, + σ; + stride = stride, + pad = pad, + dilation = dilation, + init = init, + groups = groups, + bias = bias, + ) + in_dim = ch[1] + out_dim = ch[2] + if rank >= 1 + alpha_shape = (in_dim, ensemble_size) + gamma_shape = (out_dim, ensemble_size) + else + error("Rank must be >= 1.") + end + alpha = alpha_init(alpha_shape) + gamma = gamma_init(gamma_shape) + + return ConvBatchEnsemble(layer, alpha, gamma, ensemble_bias, ensemble_act, rank) +end + +@functor ConvBatchEnsemble + +function (be::ConvBatchEnsemble)(x) + # Conv Batch Ensemble params + layer = be.layer + alpha = be.alpha + gamma = be.gamma + e_b = be.ensemble_bias + e_σ = be.ensemble_act + + batch_size = size(x)[end] + in_size = size(alpha)[1] + out_size = size(gamma)[1] + ensemble_size = size(alpha)[2] + samples_per_model = batch_size ÷ ensemble_size + + # Alpha, gamma shapes - [units, ensembles, rank] + e_b = repeat(e_b, samples_per_model) + alpha = repeat(alpha, samples_per_model) + gamma = repeat(gamma, samples_per_model) + # Reshape alpha, gamma to [units, batch_size, rank] + e_b = reshape(e_b, (1, 1, out_size, batch_size)) + alpha = reshape(alpha, (1, 1, in_size, batch_size)) + gamma = reshape(gamma, (1, 1, out_size, batch_size)) + + perturbed_x = x .* alpha + output = layer(perturbed_x) .* gamma + output = e_σ.(output .+ e_b) + + return output +end diff --git a/src/layers/BatchEnsemble/dense.jl b/src/layers/BatchEnsemble/dense.jl new file mode 100644 index 00000000..9ef92921 --- /dev/null +++ b/src/layers/BatchEnsemble/dense.jl @@ -0,0 +1,151 @@ +""" +DenseBatchEnsemble(in, out, rank, + ensemble_size, + σ=identity; + bias=true, + init=glorot_normal, + alpha_init=glorot_normal, + gamma_init=glorot_normal) +DenseBatchEnsemble(layer, alpha, gamma, ensemble_bias, ensemble_act, rank) + +Creates a dense BatchEnsemble layer. Batch ensemble is a memory efficient alternative +for deep ensembles. In deep ensembles, if the ensemble size is N, N different models +are trained, making the time and memory complexity O(N * complexity of one network). +BatchEnsemble generates weight matrices for each member in the ensemble using a +couple of rank 1 vectors R (alpha), S (gamma), RS' and multiplying the result with +weight matrix W element wise. We also call R and S as fast weights. + +Reference - https://arxiv.org/abs/2002.06715 + +During both training and testing, we repeat the samples along the batch dimension +N times, where N is the ensemble_size. For example, if each mini batch has 10 samples +and our ensemble size is 4, then the actual input to the layer has 40 samples. +The output of the layer has 40 samples as well, and each 10 samples can be considered +as the output of an esnemble member. + +# Fields +- `layer`: The dense layer which transforms the pertubed input to output +- `alpha`: The first Fast weight of size (in_dim, ensemble_size) +- `gamma`: The second Fast weight of size (out_dim, ensemble_size) +- `ensemble_bias`: Bias added to the ensemble output, separate from dense layer bias +- `ensemble_act`: The activation function to be applied on ensemble output +- `rank`: Rank of the fast weights (rank > 1 doesn't work on GPU for now) + +# Arguments +- `in::Integer`: Input dimension of features +- `out::Integer`: Output dimension of features +- `rank::Integer`: Rank of the fast weights +- `ensemble_size::Integer`: Number of models in the ensemble +- `σ::F=identity`: Activation of the dense layer, defaults to identity +- `init=glorot_normal`: Initialization function, defaults to glorot_normal +- `alpha_init=glorot_normal`: Initialization function for the alpha fast weight, + defaults to glorot_normal +- `gamma_init=glorot_normal`: Initialization function for the gamma fast weight, + defaults to glorot_normal +- `bias::Bool=true`: Toggle the usage of bias in the dense layer +- `ensemble_bias::Bool=true`: Toggle the usage of ensemble bias +- `ensemble_act::F=identity`: Activation function for enseble outputs +""" +struct DenseBatchEnsemble{L,F,M,B} + layer::L + alpha::M + gamma::M + ensemble_bias::B + ensemble_act::F + rank::Any + function DenseBatchEnsemble( + layer::L, + alpha::M, + gamma::M, + ensemble_bias = true, + ensemble_act::F = identity, + rank = 1, + ) where {M,F,L} + ensemble_bias = create_bias(gamma, ensemble_bias, size(gamma)[1], size(gamma)[2]) + new{typeof(layer),F,M,typeof(ensemble_bias)}( + layer, + alpha, + gamma, + ensemble_bias, + ensemble_act, + rank, + ) + end +end + +function DenseBatchEnsemble( + in::Integer, + out::Integer, + rank::Integer, + ensemble_size::Integer, + σ = identity; + init = glorot_normal, + alpha_init = glorot_normal, + gamma_init = glorot_normal, + bias = true, + ensemble_bias = true, + ensemble_act = identity, +) + + layer = Flux.Dense(in, out, σ; init = init, bias = bias) + if rank >= 1 + alpha_shape = (in, ensemble_size, rank) + gamma_shape = (out, ensemble_size, rank) + else + error("Rank must be >= 1.") + end + alpha = alpha_init(alpha_shape) + gamma = gamma_init(gamma_shape) + + return DenseBatchEnsemble(layer, alpha, gamma, ensemble_bias, ensemble_act, rank) +end + +@functor DenseBatchEnsemble + +""" +The forward pass for a DenseBatchEnsemble layer. The input is initially perturbed +using the first fast weight, then passed through the dense layer, and finall +multiplied by the second fast weight. + +# Arguments +- `x::AbstractVecOrMat`: Input tensors +""" +function (be::DenseBatchEnsemble)(x) + layer = be.layer + alpha = be.alpha + gamma = be.gamma + e_b = be.ensemble_bias + e_σ = be.ensemble_act + rank = be.rank + + batch_size = size(x)[end] + in_size = size(alpha)[1] + out_size = size(gamma)[1] + ensemble_size = size(alpha)[2] + samples_per_model = batch_size ÷ ensemble_size + + # Alpha, gamma shapes - [units, ensembles, rank] + alpha = reshape(alpha, (in_size, ensemble_size * rank)) + gamma = reshape(gamma, (out_size, ensemble_size * rank)) + # Repeat breaks on GPU when input dims > 2 + alpha = repeat(alpha, samples_per_model) + gamma = repeat(gamma, samples_per_model) + # Reshape alpha, gamma to [units, batch_size, rank] + alpha = reshape(alpha, (in_size, batch_size, rank)) + gamma = reshape(gamma, (out_size, batch_size, rank)) + # Reshape inputs to [units, batch_size, 1] for broadcasting + x = Flux.unsqueeze(x, (ndims(x) + 1)) + # Perturb the inputs + perturbed_x = x .* alpha + # Dense layer forward pass + outputs = layer(perturbed_x) .* gamma + # Reduce the rank dimension through summing it up + outputs = sum(outputs, dims = 3) + outputs = reshape(outputs, (out_size, samples_per_model, ensemble_size)) + # Reshape ensemble bias + e_b = Flux.unsqueeze(e_b, ndims(e_b)) + + outputs = e_σ.(outputs .+ e_b) + outputs = reshape(outputs, (out_size, batch_size)) + return outputs +end diff --git a/test/cuda/layers/batchensemble_gpu.jl b/test/cuda/layers/batchensemble_gpu.jl new file mode 100644 index 00000000..2d6ed695 --- /dev/null +++ b/test/cuda/layers/batchensemble_gpu.jl @@ -0,0 +1,77 @@ +@testset "Dense batchensemble" begin + ensemble_size = 4 + samples_per_model = 4 + input_dim = 5 + output_dim = 5 + rank = 1 + inputs = rand(Float32, input_dim, samples_per_model) + layer = DenseBatchEnsemble( + input_dim, + output_dim, + rank, + ensemble_size; + alpha_init = ones, + gamma_init = ones, + ) + layer = layer |> gpu + batch_inputs = gpu(repeat(inputs, 1, ensemble_size)) + batch_outputs = layer(batch_inputs) + # Do the computation in for loop to compare outputs + layer = layer |> cpu + loop_outputs = [] + for i = 1:ensemble_size + perturbed_inputs = inputs .* layer.alpha[i] + outputs = layer.layer(perturbed_inputs) .* layer.gamma[i] + outputs = layer.ensemble_act.(outputs .+ layer.ensemble_bias[i]) + push!(loop_outputs, outputs) + end + loop_outputs = Flux.batch(loop_outputs) + loop_outputs = reshape(loop_outputs, (output_dim, samples_per_model * ensemble_size)) + @test batch_outputs isa CuArray + @test size(batch_outputs) == size(loop_outputs) + @test isapprox(cpu(batch_outputs), loop_outputs, atol = 0.05) +end + +@testset "ConvBatchEnsemble" begin + ensemble_size = 4 + samples_per_model = 4 + input_dim = 5 + output_dim = 10 + rank = 1 + inputs = rand(Float32, 10, 10, input_dim, samples_per_model) + beconv = ConvBatchEnsemble( + (5, 5), + 5 => 10, + rank, + ensemble_size, + relu; + alpha_init = ones, + gamma_init = ones, + ) + beconv = beconv |> gpu + batch_inputs = gpu(repeat(inputs, 1, 1, 1, ensemble_size)) + batch_outputs = beconv(batch_inputs) + # Do the computation in for loop to compare outputs + beconv = beconv |> cpu + loop_outputs = [] + for i = 1:ensemble_size + perturbed_inputs = inputs .* beconv.alpha[i] + outputs = beconv.layer(perturbed_inputs) .* beconv.gamma[i] + outputs = beconv.ensemble_act.(outputs .+ beconv.ensemble_bias[i]) + push!(loop_outputs, outputs) + end + loop_outputs = Flux.batch(loop_outputs) + loop_outputs_size = size(batch_outputs) + loop_outputs = reshape( + loop_outputs, + ( + loop_outputs_size[1], + loop_outputs_size[2], + output_dim, + samples_per_model * ensemble_size, + ), + ) + @test batch_outputs isa CuArray + @test size(batch_outputs) == size(loop_outputs) + @test isapprox(cpu(batch_outputs), loop_outputs, atol = 0.05) +end diff --git a/test/cuda/layers/mclayers_gpu.jl b/test/cuda/layers/mclayers_gpu.jl new file mode 100644 index 00000000..f176cfbf --- /dev/null +++ b/test/cuda/layers/mclayers_gpu.jl @@ -0,0 +1,37 @@ +function test_sparsity(x, target_sparsity; atol = 0.05) + number_of_zeros = count(ele -> (ele == 0.0), x) + sparsity = number_of_zeros / sum(length, x) + @test isapprox(target_sparsity, sparsity; atol) +end + +@testset "MC Dense GPU" begin + dropout_rate = 0.35 + # Test MC Dense layer + a = gpu(rand(Float32, 8, 32)) + layer = gpu(MCDense(8, 16, dropout_rate)) + output = layer(a) + # Test if it's CuArray + @test output isa CuArray + @test isequal(size(output), (16, 32)) + test_sparsity(output, dropout_rate) + # Test MC dense dropout toggle + output = layer(a, dropout = false) + test_sparsity(output, 0) +end + +@testset "MC Conv GPU" begin + dropout_rate = 0.4 + # Test MC conv layer + a = gpu(rand(Float32, 32, 32, 3, 32)) + layer = MCConv((5, 5), 3 => 6, dropout_rate) |> gpu + output = layer(a) + # Test if it's CuArray + @test output isa CuArray + # Test the output shape + @test isequal(size(output), (28, 28, 6, 32)) + # Test the sparsity percentage in the array + test_sparsity(output, dropout_rate) + # Test MC conv dropout toggle + output = layer(a, dropout = false) + test_sparsity(output, 0) +end diff --git a/test/cuda/runtests.jl b/test/cuda/runtests.jl new file mode 100644 index 00000000..8e9a4141 --- /dev/null +++ b/test/cuda/runtests.jl @@ -0,0 +1,7 @@ +using Flux, Test, CUDA + +@info "Testing GPU Support" +CUDA.allowscalar(false) + +include("layers/mclayers_gpu.jl") +include("layers/batchensemble_gpu.jl") diff --git a/test/layers/batchensemble.jl b/test/layers/batchensemble.jl new file mode 100644 index 00000000..4107178e --- /dev/null +++ b/test/layers/batchensemble.jl @@ -0,0 +1,72 @@ +@testset "Dense batchensemble" begin + ensemble_size = 4 + samples_per_model = 4 + input_dim = 5 + output_dim = 5 + rank = 1 + inputs = rand(Float32, input_dim, samples_per_model) + layer = DenseBatchEnsemble( + input_dim, + output_dim, + rank, + ensemble_size; + alpha_init = ones, + gamma_init = ones, + ) + batch_inputs = repeat(inputs, 1, ensemble_size) + batch_outputs = layer(batch_inputs) + # Do the computation in for loop to compare outputs + loop_outputs = [] + for i = 1:ensemble_size + perturbed_inputs = inputs .* layer.alpha[i] + outputs = layer.layer(perturbed_inputs) .* layer.gamma[i] + outputs = layer.ensemble_act.(outputs .+ layer.ensemble_bias[i]) + push!(loop_outputs, outputs) + end + loop_outputs = Flux.batch(loop_outputs) + loop_outputs = reshape(loop_outputs, (output_dim, samples_per_model * ensemble_size)) + @test size(batch_outputs) == size(loop_outputs) + @test isapprox(batch_outputs, loop_outputs, atol = 0.05) +end + +@testset "ConvBatchEnsemble" begin + ensemble_size = 4 + samples_per_model = 4 + input_dim = 5 + output_dim = 10 + rank = 1 + inputs = rand(Float32, 10, 10, input_dim, samples_per_model) + beconv = ConvBatchEnsemble( + (5, 5), + 5 => 10, + rank, + ensemble_size, + relu; + alpha_init = ones, + gamma_init = ones, + ) + batch_inputs = repeat(inputs, 1, 1, 1, ensemble_size) + batch_outputs = beconv(batch_inputs) + + # Do the computation in for loop to compare outputs + loop_outputs = [] + for i = 1:ensemble_size + perturbed_inputs = inputs .* beconv.alpha[i] + outputs = beconv.layer(perturbed_inputs) .* beconv.gamma[i] + outputs = beconv.ensemble_act.(outputs .+ beconv.ensemble_bias[i]) + push!(loop_outputs, outputs) + end + loop_outputs = Flux.batch(loop_outputs) + loop_outputs_size = size(batch_outputs) + loop_outputs = reshape( + loop_outputs, + ( + loop_outputs_size[1], + loop_outputs_size[2], + output_dim, + samples_per_model * ensemble_size, + ), + ) + @test size(batch_outputs) == size(loop_outputs) + @test isapprox(batch_outputs, loop_outputs, atol = 0.05) +end diff --git a/test/layers/mclayers_test.jl b/test/layers/mclayers.jl similarity index 50% rename from test/layers/mclayers_test.jl rename to test/layers/mclayers.jl index 9d963b4a..1e8c2627 100644 --- a/test/layers/mclayers_test.jl +++ b/test/layers/mclayers.jl @@ -1,5 +1,8 @@ -using Test -using DeepUncertainty: MCDense, MCConv +function test_sparsity(x, target_sparsity; atol = 0.05) + number_of_zeros = count(ele -> (ele == 0.0), x) + sparsity = number_of_zeros / sum(length, x) + @test isapprox(target_sparsity, sparsity; atol) +end @testset "MC Dense" begin dropout_rate = 0.35 @@ -7,16 +10,11 @@ using DeepUncertainty: MCDense, MCConv a = rand(Float32, 8, 32) layer = MCDense(8, 16, dropout_rate) output = layer(a) - number_of_zeros = count(x -> (x == 0.0), output) - sparsity = number_of_zeros / sum(length, output) @test isequal(size(output), (16, 32)) - @test isapprox(dropout_rate, sparsity; atol = 0.05) - + test_sparsity(output, dropout_rate) # Test MC dense dropout toggle output = layer(a, dropout = false) - number_of_zeros = count(x -> (x == 0.0), output) - sparsity = number_of_zeros / sum(length, output) - @test isapprox(0, sparsity; atol = 0.05) + test_sparsity(output, 0) end @testset "MC Conv" begin @@ -25,16 +23,11 @@ end a = rand(Float32, 32, 32, 3, 32) layer = MCConv((5, 5), 3 => 6, dropout_rate) output = layer(a) - number_of_zeros = count(x -> (x == 0.0), output) - sparsity = number_of_zeros / sum(length, output) # Test the output shape @test isequal(size(output), (28, 28, 6, 32)) # Test the sparsity percentage in the array - @test isapprox(dropout_rate, sparsity; atol = 0.05) - + test_sparsity(output, dropout_rate) # Test MC conv dropout toggle output = layer(a, dropout = false) - number_of_zeros = count(x -> (x == 0.0), output) - sparsity = number_of_zeros / sum(length, output) - @test isapprox(0, sparsity; atol = 0.05) + test_sparsity(output, 0) end diff --git a/test/runtests.jl b/test/runtests.jl index 99d1e32a..476d7605 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,7 +1,20 @@ using DeepUncertainty using Test +using Flux +using Flux.CUDA +using Flux: cpu, gpu @testset "Layers" begin - # Test the layers - include("./layers/mclayers_test.jl") + # MC layers + include("./layers/mclayers.jl") + # Batch ensembe layers + include("./layers/batchensemble.jl") +end + +@testset "CUDA" begin + if CUDA.functional() + include("cuda/runtests.jl") + else + @warn "CUDA unavailable, not testing GPU support" + end end