Skip to content

Releases: hellomoto-ai/splatoon2-ml

VAE-GAN with Adoptive Beta 2

30 May 05:54
48b3a7f
Compare
Choose a tag to compare

This model is at Epoch 31, where it's GAN part starts to collapse.

The detail can be found here.

Model: VaeGan(
  (vae): VAE(
    (encoder): Encoder(
      (convs): Sequential(
        (0): EncoderBlock(
          (0): ReflectionPad2d((2, 2, 2, 2))
          (1): Conv2d(3, 64, kernel_size=(5, 5), stride=(2, 2), bias=False)
          (2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (3): ReLU()
        )
        (1): EncoderBlock(
          (0): ReflectionPad2d((2, 2, 2, 2))
          (1): Conv2d(64, 128, kernel_size=(5, 5), stride=(2, 2), bias=False)
          (2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (3): ReLU()
        )
        (2): EncoderBlock(
          (0): ReflectionPad2d((2, 2, 2, 2))
          (1): Conv2d(128, 256, kernel_size=(5, 5), stride=(2, 2), bias=False)
          (2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (3): ReLU()
        )
      )
      (map): Linear(in_features=36864, out_features=2048, bias=True)
    )
    (decoder): Decoder(
      (map): Linear(in_features=1024, out_features=36864, bias=True)
      (convs): Sequential(
        (0): DecoderBlock(
          (0): ConvTranspose2d(256, 256, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2), bias=False)
          (1): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=False, track_running_stats=False)
          (2): LeakyReLU(negative_slope=0.01)
        )
        (1): DecoderBlock(
          (0): ConvTranspose2d(256, 128, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2), bias=False)
          (1): InstanceNorm2d(128, eps=1e-05, momentum=0.1, affine=False, track_running_stats=False)
          (2): LeakyReLU(negative_slope=0.01)
        )
        (2): DecoderBlock(
          (0): ConvTranspose2d(128, 64, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2), bias=False)
          (1): InstanceNorm2d(64, eps=1e-05, momentum=0.1, affine=False, track_running_stats=False)
          (2): LeakyReLU(negative_slope=0.01)
        )
        (3): ReflectionPad2d((2, 2, 2, 2))
        (4): Conv2d(64, 3, kernel_size=(5, 5), stride=(1, 1))
      )
    )
  )
  (discriminator): Discriminator(
    (convs): Sequential(
      (0): ReflectionPad2d((2, 2, 2, 2))
      (1): Conv2d(3, 32, kernel_size=(5, 5), stride=(1, 1))
      (2): ReflectionPad2d((2, 2, 2, 2))
      (3): Conv2d(32, 128, kernel_size=(5, 5), stride=(2, 2))
      (4): ReflectionPad2d((2, 2, 2, 2))
      (5): Conv2d(128, 256, kernel_size=(5, 5), stride=(2, 2))
      (6): ReflectionPad2d((2, 2, 2, 2))
      (7): Conv2d(256, 256, kernel_size=(5, 5), stride=(2, 2))
    )
    (fc): Sequential(
      (0): Linear(in_features=36864, out_features=512, bias=True)
      (1): ReLU(inplace)
      (2): Linear(in_features=512, out_features=1, bias=True)
      (3): Sigmoid()
    )
  )
)
Optimizers: encoder: Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    eps: 1e-08
    lr: 0.0001
    weight_decay: 0
)
decoder: Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    eps: 1e-08
    lr: 0.0001
    weight_decay: 0
)
discriminator: Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    eps: 1e-08
    lr: 0.0001
    weight_decay: 0
)
Beta: Beta: 10.0
Beta Step: 0.1
Target KLD: 0.1

VAE-GAN with Adoptive Beta

24 May 04:09
4cc2090
Compare
Choose a tag to compare

This model is at Epoch 30, where it's GAN part starts to collapse.

The detail can be found here

Model: VaeGan(
  (vae): VAE(
    (encoder): Encoder(
      (convs): Sequential(
        (0): EncoderBlock(
          (0): ReflectionPad2d((2, 2, 2, 2))
          (1): Conv2d(3, 64, kernel_size=(5, 5), stride=(2, 2), bias=False)
          (2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (3): ReLU()
        )
        (1): EncoderBlock(
          (0): ReflectionPad2d((2, 2, 2, 2))
          (1): Conv2d(64, 128, kernel_size=(5, 5), stride=(2, 2), bias=False)
          (2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (3): ReLU()
        )
        (2): EncoderBlock(
          (0): ReflectionPad2d((2, 2, 2, 2))
          (1): Conv2d(128, 256, kernel_size=(5, 5), stride=(2, 2), bias=False)
          (2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (3): ReLU()
        )
      )
      (map): Linear(in_features=36864, out_features=2048, bias=True)
    )
    (decoder): Decoder(
      (map): Linear(in_features=1024, out_features=36864, bias=True)
      (convs): Sequential(
        (0): DecoderBlock(
          (0): ConvTranspose2d(256, 256, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2), bias=False)
          (1): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=False, track_running_stats=False)
          (2): LeakyReLU(negative_slope=0.01)
        )
        (1): DecoderBlock(
          (0): ConvTranspose2d(256, 128, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2), bias=False)
          (1): InstanceNorm2d(128, eps=1e-05, momentum=0.1, affine=False, track_running_stats=False)
          (2): LeakyReLU(negative_slope=0.01)
        )
        (2): DecoderBlock(
          (0): ConvTranspose2d(128, 64, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2), bias=False)
          (1): InstanceNorm2d(64, eps=1e-05, momentum=0.1, affine=False, track_running_stats=False)
          (2): LeakyReLU(negative_slope=0.01)
        )
        (3): ReflectionPad2d((2, 2, 2, 2))
        (4): Conv2d(64, 3, kernel_size=(5, 5), stride=(1, 1))
      )
    )
  )
  (discriminator): Discriminator(
    (convs): Sequential(
      (0): ReflectionPad2d((2, 2, 2, 2))
      (1): Conv2d(3, 32, kernel_size=(5, 5), stride=(1, 1))
      (2): ReflectionPad2d((2, 2, 2, 2))
      (3): Conv2d(32, 128, kernel_size=(5, 5), stride=(2, 2))
      (4): ReflectionPad2d((2, 2, 2, 2))
      (5): Conv2d(128, 256, kernel_size=(5, 5), stride=(2, 2))
      (6): ReflectionPad2d((2, 2, 2, 2))
      (7): Conv2d(256, 256, kernel_size=(5, 5), stride=(2, 2))
    )
    (fc): Sequential(
      (0): Linear(in_features=36864, out_features=512, bias=True)
      (1): ReLU(inplace)
      (2): Linear(in_features=512, out_features=1, bias=True)
      (3): Sigmoid()
    )
  )
)
Initial Beta: 10.0
Beta Step: 0.1
Target KLD: 0.1

AE-GAN with BN Cheating

16 May 16:12
dd01034
Compare
Choose a tag to compare

AE with Batch Normalization at the last layer of the encoder.

The detail can be found here.

This model is Epoch 43, which still produces some fake images.

Model: AeGan(
  (ae): AE(
    (encoder): Encoder(
      (convs): Sequential(
        (0): EncoderBlock(
          (0): ReflectionPad2d((2, 2, 2, 2))
          (1): Conv2d(3, 64, kernel_size=(5, 5), stride=(2, 2), bias=False)
          (2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (3): ReLU()
        )
        (1): EncoderBlock(
          (0): ReflectionPad2d((2, 2, 2, 2))
          (1): Conv2d(64, 128, kernel_size=(5, 5), stride=(2, 2), bias=False)
          (2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (3): ReLU()
        )
        (2): EncoderBlock(
          (0): ReflectionPad2d((2, 2, 2, 2))
          (1): Conv2d(128, 256, kernel_size=(5, 5), stride=(2, 2), bias=False)
          (2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (3): ReLU()
        )
      )
      (map): Sequential(
        (0): Linear(in_features=36864, out_features=1024, bias=False)
        (1): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (decoder): Decoder(
      (map): Linear(in_features=1024, out_features=36864, bias=True)
      (convs): Sequential(
        (0): DecoderBlock(
          (0): ConvTranspose2d(256, 256, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2), bias=False)
          (1): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=False, track_running_stats=False)
          (2): LeakyReLU(negative_slope=0.01)
        )
        (1): DecoderBlock(
          (0): ConvTranspose2d(256, 128, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2), bias=False)
          (1): InstanceNorm2d(128, eps=1e-05, momentum=0.1, affine=False, track_running_stats=False)
          (2): LeakyReLU(negative_slope=0.01)
        )
        (2): DecoderBlock(
          (0): ConvTranspose2d(128, 64, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2), bias=False)
          (1): InstanceNorm2d(64, eps=1e-05, momentum=0.1, affine=False, track_running_stats=False)
          (2): LeakyReLU(negative_slope=0.01)
        )
        (3): ReflectionPad2d((2, 2, 2, 2))
        (4): Conv2d(64, 3, kernel_size=(5, 5), stride=(1, 1))
      )
    )
  )
  (discriminator): Discriminator(
    (convs): Sequential(
      (0): ReflectionPad2d((2, 2, 2, 2))
      (1): Conv2d(3, 32, kernel_size=(5, 5), stride=(1, 1))
      (2): ReflectionPad2d((2, 2, 2, 2))
      (3): Conv2d(32, 128, kernel_size=(5, 5), stride=(2, 2))
      (4): ReflectionPad2d((2, 2, 2, 2))
      (5): Conv2d(128, 256, kernel_size=(5, 5), stride=(2, 2))
      (6): ReflectionPad2d((2, 2, 2, 2))
      (7): Conv2d(256, 256, kernel_size=(5, 5), stride=(2, 2))
    )
    (fc): Sequential(
      (0): Linear(in_features=36864, out_features=512, bias=True)
      (1): ReLU(inplace)
      (2): Linear(in_features=512, out_features=1, bias=True)
      (3): Sigmoid()
    )
  )
)

Simple VAE-GAN v3

19 Apr 04:01
09a2fe7
Compare
Choose a tag to compare

The model structure is the same as the previous ones, but the update methods are tweaked and improvement is more stable and yields better quality.

The detail can be found here.

Simple VAE-GAN v2

16 Apr 05:00
Compare
Choose a tag to compare

Trained with the same setting as v1.0 except fake images generated from randomly sampled latent variables are removed.

No noticeable difference was observed. The detail can be found here

VAE-GAN - Initial Model

13 Apr 16:14
978e9f0
Compare
Choose a tag to compare

First successfully trained VAE-GAN model.

Results are described here

Trained for 61 epochs, 38735 steps with batch size 64.

Torch version: 1.0.1.post2

Note on implementation difference from VAEGAN-PYTORCH

  • Batch normalization was replaced with instance normalization.
  • No non-linear activation in feature extraction part of the discriminator.
  • No loss-based balancing
  • No gradient clipping
Model: VaeGan(
  (vae): VAE(
    (encoder): Encoder(
      (convs): Sequential(
        (0): EncoderBlock(
          (0): ReflectionPad2d((2, 2, 2, 2))
          (1): Conv2d(3, 64, kernel_size=(5, 5), stride=(2, 2), bias=False)
          (2): InstanceNorm2d(64, eps=1e-05, momentum=0.9, affine=False, track_running_stats=False)
          (3): ReLU()
        )
        (1): EncoderBlock(
          (0): ReflectionPad2d((2, 2, 2, 2))
          (1): Conv2d(64, 128, kernel_size=(5, 5), stride=(2, 2), bias=False)
          (2): InstanceNorm2d(128, eps=1e-05, momentum=0.9, affine=False, track_running_stats=False)
          (3): ReLU()
        )
        (2): EncoderBlock(
          (0): ReflectionPad2d((2, 2, 2, 2))
          (1): Conv2d(128, 256, kernel_size=(5, 5), stride=(2, 2), bias=False)
          (2): InstanceNorm2d(256, eps=1e-05, momentum=0.9, affine=False, track_running_stats=False)
          (3): ReLU()
        )
      )
      (map): Linear(in_features=36864, out_features=2048, bias=True)
    )
    (decoder): Decoder(
      (map): Linear(in_features=1024, out_features=36864, bias=True)
      (convs): Sequential(
        (0): DecoderBlock(
          (0): ConvTranspose2d(256, 256, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2), bias=False)
          (1): InstanceNorm2d(256, eps=1e-05, momentum=0.9, affine=False, track_running_stats=False)
          (2): ReLU()
        )
        (1): DecoderBlock(
          (0): ConvTranspose2d(256, 128, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2), bias=False)
          (1): InstanceNorm2d(128, eps=1e-05, momentum=0.9, affine=False, track_running_stats=False)
          (2): ReLU()
        )
        (2): DecoderBlock(
          (0): ConvTranspose2d(128, 64, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2), bias=False)
          (1): InstanceNorm2d(64, eps=1e-05, momentum=0.9, affine=False, track_running_stats=False)
          (2): ReLU()
        )
        (3): ReflectionPad2d((2, 2, 2, 2))
        (4): Conv2d(64, 3, kernel_size=(5, 5), stride=(1, 1))
      )
    )
  )
  (discriminator): Discriminator(
    (convs): Sequential(
      (0): ReflectionPad2d((2, 2, 2, 2))
      (1): Conv2d(3, 32, kernel_size=(5, 5), stride=(1, 1))
      (2): ReflectionPad2d((2, 2, 2, 2))
      (3): Conv2d(32, 128, kernel_size=(5, 5), stride=(2, 2))
      (4): ReflectionPad2d((2, 2, 2, 2))
      (5): Conv2d(128, 256, kernel_size=(5, 5), stride=(2, 2))
      (6): ReflectionPad2d((2, 2, 2, 2))
      (7): Conv2d(256, 256, kernel_size=(5, 5), stride=(2, 2))
    )  # -> Output used for feature matching
    # ReLU applied here before fc
    (fc): Sequential(
      (0): Linear(in_features=36864, out_features=512, bias=True)
      (1): ReLU(inplace)
      (2): Linear(in_features=512, out_features=1, bias=True)
      (3): Sigmoid()
    )
  )
)
Optimizers: encoder: Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    eps: 1e-08
    lr: 0.0001
    weight_decay: 0
)
decoder: Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    eps: 1e-08
    lr: 0.0001
    weight_decay: 0
)
discriminator: Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    eps: 1e-08
    lr: 0.0001
    weight_decay: 0
)