From 2b1ef28a17ffe1354ed7e929b00cf3c41f133118 Mon Sep 17 00:00:00 2001 From: jbart Date: Sat, 15 Oct 2022 19:37:38 +1000 Subject: [PATCH 01/26] initial commit made necessary files --- recognition/45819061-VQVAE-OASIS/README.md | 0 recognition/45819061-VQVAE-OASIS/dataset.py | 0 recognition/45819061-VQVAE-OASIS/modules.py | 0 recognition/45819061-VQVAE-OASIS/predict.py | 0 recognition/45819061-VQVAE-OASIS/train.py | 0 recognition/45819061-VQVAE-OASIS/utils.py | 0 6 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 recognition/45819061-VQVAE-OASIS/README.md create mode 100644 recognition/45819061-VQVAE-OASIS/dataset.py create mode 100644 recognition/45819061-VQVAE-OASIS/modules.py create mode 100644 recognition/45819061-VQVAE-OASIS/predict.py create mode 100644 recognition/45819061-VQVAE-OASIS/train.py create mode 100644 recognition/45819061-VQVAE-OASIS/utils.py diff --git a/recognition/45819061-VQVAE-OASIS/README.md b/recognition/45819061-VQVAE-OASIS/README.md new file mode 100644 index 0000000000..e69de29bb2 diff --git a/recognition/45819061-VQVAE-OASIS/dataset.py b/recognition/45819061-VQVAE-OASIS/dataset.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/recognition/45819061-VQVAE-OASIS/modules.py b/recognition/45819061-VQVAE-OASIS/modules.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/recognition/45819061-VQVAE-OASIS/predict.py b/recognition/45819061-VQVAE-OASIS/predict.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/recognition/45819061-VQVAE-OASIS/train.py b/recognition/45819061-VQVAE-OASIS/train.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/recognition/45819061-VQVAE-OASIS/utils.py b/recognition/45819061-VQVAE-OASIS/utils.py new file mode 100644 index 0000000000..e69de29bb2 From bc8ad99e30fc67e69632414e135c4992fa8e4b5f Mon Sep 17 00:00:00 2001 From: jbart Date: Sun, 16 Oct 2022 17:46:00 +1000 Subject: [PATCH 02/26] made vector quantizer --- recognition/45819061-VQVAE-OASIS/modules.py | 50 +++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/recognition/45819061-VQVAE-OASIS/modules.py b/recognition/45819061-VQVAE-OASIS/modules.py index e69de29bb2..ea53d8f16e 100644 --- a/recognition/45819061-VQVAE-OASIS/modules.py +++ b/recognition/45819061-VQVAE-OASIS/modules.py @@ -0,0 +1,50 @@ +from matplotlib.cbook import flatten +import numpy as np +import tensorflow as tf +from tensorflow.keras.layers import Dense, Conv2D, Conv2DTranspose, Flatten, Reshape + + +class VectorQuantizer(tf.layers.Layer): + def __init__(self, num_embeddings, embedding_dim, beta=0.25): + super.__init__() + self.num_embeddings = num_embeddings + self.embedding_dim = embedding_dim + self.beta = beta + + # Initialise embeddings + w_init = tf.random_uniform_initializer() + self.embeddings = tf.Variable( + initial_value=w_init(shape=(self.embedding_dim, self.num_embeddings), dtype='float64'), + trainable=True, + name="VQ" + ) + + def call(self, x): + input_shape = tf.shape(x) + flattened = tf.reshape(x, (-1, self.embedding_dim)) + + # Quantization + encoding_indices = self.get_code_indices(flattened) + encodings = tf.one_hot(encoding_indices, self.num_embeddings) + quantized = tf.matmul(encodings, self.embeddings, transpose_b=True) + + quantized = tf.reshape(quantized, input_shape) + + commitment_loss = tf.norm(tf.stop_gradient(quantized) - x)**2 + codebook_loss = tf.norm(tf.stop_gradient(x) - quantized)**2 + self.add_loss(self.beta * commitment_loss + codebook_loss) + + quantized = x + tf.stop_gradient(quantized - x) + return quantized + + def get_code_indices(self, flattened_inputs): + similarity = tf.matmul(flattened_inputs, self.embeddings) + distances = ( + tf.reduce_sum(flattened_inputs**2, axis=1, keepdims=True) + + tf.reduce_sum(self.embeddings**2, axis=0) + - 2 * similarity + ) + + encoding_indices = tf.argmin(distances, axis=1) + return encoding_indices + From 70a3a229fbabb1e2f1054a8ce378e007b65d2d6a Mon Sep 17 00:00:00 2001 From: jbart Date: Sun, 16 Oct 2022 22:42:55 +1000 Subject: [PATCH 03/26] made rough vqvae model --- recognition/45819061-VQVAE-OASIS/modules.py | 34 ++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/recognition/45819061-VQVAE-OASIS/modules.py b/recognition/45819061-VQVAE-OASIS/modules.py index ea53d8f16e..a9fb4e57b1 100644 --- a/recognition/45819061-VQVAE-OASIS/modules.py +++ b/recognition/45819061-VQVAE-OASIS/modules.py @@ -1,7 +1,7 @@ from matplotlib.cbook import flatten import numpy as np import tensorflow as tf -from tensorflow.keras.layers import Dense, Conv2D, Conv2DTranspose, Flatten, Reshape +from tensorflow.keras.layers import Input, Dense, Conv2D, Conv2DTranspose, Flatten, Reshape class VectorQuantizer(tf.layers.Layer): @@ -48,3 +48,35 @@ def get_code_indices(self, flattened_inputs): encoding_indices = tf.argmin(distances, axis=1) return encoding_indices +class VQVAE(tf.keras.Model): + def __init__(self, latent_dim=32, num_embeddings=64, input_shape=(256, 256, 3)): + super().__init__() + self.latent_dim = latent_dim + + # Build encoder + encoder_in = Input(shape=input_shape) + x = Conv2D(32, 3, strides=2, activation='leakyrelu', padding='same')(encoder_in) + x = Conv2D(64, 3, strides=2, activation='leakyrelu', padding='same')(x) + x = Conv2D(64, 3, strides=2, activation='leakyrelu', padding='same')(x) + encoder_out = Conv2D(latent_dim, 1, padding='same')(x) + self.encoder = tf.keras.Model(encoder_in, encoder_out, name='encoder') + + # Build decoder + decoder_in = Input(shape=self.encoder.output.shape[1:]) + x = Conv2DTranspose(64, 3, strides=2, activation='leakyrelu', padding='same')(encoder_in) + x = Conv2D(64, 3, strides=2, activation='leakyrelu', padding='same')(x) + x = Conv2D(32, 3, strides=2, activation='leakyrelu', padding='same')(x) + decoder_out = Conv2D(1, 3, padding='same')(x) + self.decoder = tf.keras.Model(decoder_in, decoder_out, name='decoder') + + # Add VQ layer + self.vq_layer = VectorQuantizer(num_embeddings=num_embeddings, embedding_dim=latent_dim, name='vq') + + + def call(self, x, training=False): + x = self.encoder(x) + quantized = self.vq_layer(x) + return self.decoder(quantized) + + + \ No newline at end of file From c7fc18f5b1ed46fa06d7aa59746f32407a19120d Mon Sep 17 00:00:00 2001 From: jbart Date: Sun, 16 Oct 2022 22:59:50 +1000 Subject: [PATCH 04/26] edited gitignore to ignore dataset stored locally --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 92459a9d2f..cdb0859737 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,4 @@ -recognition/s4481540_Zhuoxiao_Chen/data/ +recognition/45819061-VQVAE-OASIS/data/ # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] From eefebeb56271c92dcebcd589fde9a80ebd48e8c8 Mon Sep 17 00:00:00 2001 From: jbart Date: Sun, 16 Oct 2022 23:25:16 +1000 Subject: [PATCH 05/26] fixed typo --- recognition/45819061-VQVAE-OASIS/modules.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/recognition/45819061-VQVAE-OASIS/modules.py b/recognition/45819061-VQVAE-OASIS/modules.py index a9fb4e57b1..6229cef728 100644 --- a/recognition/45819061-VQVAE-OASIS/modules.py +++ b/recognition/45819061-VQVAE-OASIS/modules.py @@ -64,9 +64,9 @@ def __init__(self, latent_dim=32, num_embeddings=64, input_shape=(256, 256, 3)): # Build decoder decoder_in = Input(shape=self.encoder.output.shape[1:]) x = Conv2DTranspose(64, 3, strides=2, activation='leakyrelu', padding='same')(encoder_in) - x = Conv2D(64, 3, strides=2, activation='leakyrelu', padding='same')(x) - x = Conv2D(32, 3, strides=2, activation='leakyrelu', padding='same')(x) - decoder_out = Conv2D(1, 3, padding='same')(x) + x = Conv2DTranspose(64, 3, strides=2, activation='leakyrelu', padding='same')(x) + x = Conv2DTranspose(32, 3, strides=2, activation='leakyrelu', padding='same')(x) + decoder_out = Conv2DTranspose(1, 3, padding='same')(x) self.decoder = tf.keras.Model(decoder_in, decoder_out, name='decoder') # Add VQ layer From ebb6ff0bed24274317e63103ceffa594d3563477 Mon Sep 17 00:00:00 2001 From: jbart Date: Sun, 16 Oct 2022 23:42:42 +1000 Subject: [PATCH 06/26] loaded datasets and applied scaling --- recognition/45819061-VQVAE-OASIS/dataset.py | 31 +++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/recognition/45819061-VQVAE-OASIS/dataset.py b/recognition/45819061-VQVAE-OASIS/dataset.py index e69de29bb2..1841ab55bb 100644 --- a/recognition/45819061-VQVAE-OASIS/dataset.py +++ b/recognition/45819061-VQVAE-OASIS/dataset.py @@ -0,0 +1,31 @@ +import tensorflow as tf +import numpy as np + +DATA_DIR = './data/keras_png_slices_data' +TRAIN_DATA = DATA_DIR + '/keras_png_slices_train/' +TEST_DATA = DATA_DIR + '/keras_png_slices_test/' +VALIDATE_DATA = DATA_DIR + '/keras_png_slices_validate/' + +x_train = tf.keras.utils.image_dataset_from_directory(TRAIN_DATA, labels=None) +x_validate = tf.keras.utils.image_dataset_from_directory(VALIDATE_DATA, labels=None) +x_test = tf.keras.utils.image_dataset_from_directory(TEST_DATA, labels=None) + +# get mean, variance of training set +data_info = x_train.reduce((0, 0, 0), + lambda x, y: x + ( + 1, # n + y, # running mean + (x[0]*y - x[1])**2/(x[0]*(x[0]+1)) # running variance + ) + ) + +data_mean = data_info[1]/data_info[0] +data_var = data_info[2]/data_info[0] + +# Basic scaling and preprocessing +def scaling(data): + return (data/255.0) - data/255.0 + +x_train = x_train.apply(scaling) +x_test = x_test.apply(scaling) +x_validate = x_validate.apply(scaling) From 02dafd4aca078417537f99dbb681ab8e043e971a Mon Sep 17 00:00:00 2001 From: jbart Date: Mon, 17 Oct 2022 11:41:43 +1000 Subject: [PATCH 07/26] made dataloading function and littl preprocessing --- recognition/45819061-VQVAE-OASIS/dataset.py | 66 ++++++++++++--------- 1 file changed, 38 insertions(+), 28 deletions(-) diff --git a/recognition/45819061-VQVAE-OASIS/dataset.py b/recognition/45819061-VQVAE-OASIS/dataset.py index 1841ab55bb..a5d038837e 100644 --- a/recognition/45819061-VQVAE-OASIS/dataset.py +++ b/recognition/45819061-VQVAE-OASIS/dataset.py @@ -1,31 +1,41 @@ +import os import tensorflow as tf import numpy as np +from tqdm import tqdm -DATA_DIR = './data/keras_png_slices_data' -TRAIN_DATA = DATA_DIR + '/keras_png_slices_train/' -TEST_DATA = DATA_DIR + '/keras_png_slices_test/' -VALIDATE_DATA = DATA_DIR + '/keras_png_slices_validate/' - -x_train = tf.keras.utils.image_dataset_from_directory(TRAIN_DATA, labels=None) -x_validate = tf.keras.utils.image_dataset_from_directory(VALIDATE_DATA, labels=None) -x_test = tf.keras.utils.image_dataset_from_directory(TEST_DATA, labels=None) - -# get mean, variance of training set -data_info = x_train.reduce((0, 0, 0), - lambda x, y: x + ( - 1, # n - y, # running mean - (x[0]*y - x[1])**2/(x[0]*(x[0]+1)) # running variance - ) - ) - -data_mean = data_info[1]/data_info[0] -data_var = data_info[2]/data_info[0] - -# Basic scaling and preprocessing -def scaling(data): - return (data/255.0) - data/255.0 - -x_train = x_train.apply(scaling) -x_test = x_test.apply(scaling) -x_validate = x_validate.apply(scaling) +BATCH_SIZE = 128 +DATA_DIR = 'data/keras_png_slices_data' +TRAIN_DATA = DATA_DIR + '/keras_png_slices_train' +TEST_DATA = DATA_DIR + '/keras_png_slices_test' +VALIDATE_DATA = DATA_DIR + '/keras_png_slices_validate' + +def reader(f): + return tf.io.decode_png(tf.io.read_file(f), channels=1) + +def load(files): + lst = map(reader, tqdm(files)) + imgs = np.asarray(list(lst), dtype='float32') + return imgs + + +def get_data(): + files_train = [os.path.join(TRAIN_DATA, f) for f in os.listdir(TRAIN_DATA) if os.path.isfile(os.path.join(TRAIN_DATA, f))] + files_test = [os.path.join(TEST_DATA, f) for f in os.listdir(TEST_DATA) if os.path.isfile(os.path.join(TEST_DATA, f))] + files_validate = [os.path.join(VALIDATE_DATA, f) for f in os.listdir(VALIDATE_DATA) if os.path.isfile(os.path.join(VALIDATE_DATA, f))] + + print("Loading data") + x_train = load(files_train) + x_test = load(files_test) + x_validate = load(files_validate) + + mean = np.mean(x_train) + var = np.mean(x_train) + + x_train -= 127.5 + x_train /= 127.5 + x_test -= 127.5 + x_test /= 127.5 + x_validate -= 127.5 + x_validate /= 127.5 + + return x_train, x_test, x_validate, mean, var From 8083f6696fda512c83cbb2337b60aaf1dfe932d7 Mon Sep 17 00:00:00 2001 From: jbart Date: Mon, 17 Oct 2022 11:42:01 +1000 Subject: [PATCH 08/26] made model trainer --- recognition/45819061-VQVAE-OASIS/train.py | 45 +++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/recognition/45819061-VQVAE-OASIS/train.py b/recognition/45819061-VQVAE-OASIS/train.py index e69de29bb2..79ce49454f 100644 --- a/recognition/45819061-VQVAE-OASIS/train.py +++ b/recognition/45819061-VQVAE-OASIS/train.py @@ -0,0 +1,45 @@ +import tensorflow as tf +import numpy as np +from dataset import get_data +from modules import VQVAE + + +class VQVAETrainer (tf.kears.models.Model): + def __init__(self, train_variance, latent_dim=32, num_embeddings=128): + super(VQVAETrainer, self).__init__() + self.train_variance = train_variance + self.latent_dim = latent_dim + self.num_embeddings = num_embeddings + + self.model = VQVAE(self.latent_dim, self.num_embeddings, (256, 256, 1)) + self.total_loss_tracker = tf.keras.metrics.Mean(name='total_loss') + self.reconstruction_loss_tracker = tf.keras.metrics.Mean(name='reconstruction_loss') + self.vq_loss_tracker = tf.keras.metrics.Mean(name='vq_loss') + + @property + def metrics(self): + return [ + self.total_loss_tracker, + self.reconstruction_loss_tracker, + self.vq_loss_tracker + ] + + def train_step(self, x): + with tf.GradientTape() as tape: + reconstructions = self.model(x) + + reconstruction_loss = (tf.reduce_mean((x - reconstructions)**2)/self.train_variance) + total_loss = reconstruction_loss + sum(self.model.losses) + + grads = tape.gradient(total_loss, self.model.trainable_variables) + self.optimizer.apply_gradients(zip(grads, self.model.trainable_variables)) + + self.total_loss_tracker.update_state(total_loss) + self.reconstruction_loss_tracker.update_state(reconstruction_loss) + self.vq_loss_tracker.update_state(sum(self.model.losses)) + + return { + "loss": self.total_loss_tracker.result(), + "reconstruction_loss": self.reconstruction_loss_tracker.result(), + "vqvae_loss": self.vq_loss_tracker.result() + } \ No newline at end of file From bf5fabb480c5f02f8d194d932b7218627eb9695d Mon Sep 17 00:00:00 2001 From: jbart Date: Mon, 17 Oct 2022 12:32:00 +1000 Subject: [PATCH 09/26] bug fixing and training testing --- recognition/45819061-VQVAE-OASIS/modules.py | 32 +++++++++++---------- recognition/45819061-VQVAE-OASIS/train.py | 14 +++++++-- 2 files changed, 28 insertions(+), 18 deletions(-) diff --git a/recognition/45819061-VQVAE-OASIS/modules.py b/recognition/45819061-VQVAE-OASIS/modules.py index 6229cef728..fa1bfc5d23 100644 --- a/recognition/45819061-VQVAE-OASIS/modules.py +++ b/recognition/45819061-VQVAE-OASIS/modules.py @@ -1,22 +1,22 @@ from matplotlib.cbook import flatten import numpy as np import tensorflow as tf -from tensorflow.keras.layers import Input, Dense, Conv2D, Conv2DTranspose, Flatten, Reshape +from tensorflow.keras.layers import Input, Layer, Dense, Conv2D, Conv2DTranspose, Flatten, Reshape -class VectorQuantizer(tf.layers.Layer): - def __init__(self, num_embeddings, embedding_dim, beta=0.25): - super.__init__() +class VectorQuantizer(Layer): + def __init__(self, num_embeddings, embedding_dim, beta=0.25, name="VQ"): + super().__init__() self.num_embeddings = num_embeddings self.embedding_dim = embedding_dim self.beta = beta - # Initialise embeddings + # Initialise flattenedembeddings w_init = tf.random_uniform_initializer() self.embeddings = tf.Variable( - initial_value=w_init(shape=(self.embedding_dim, self.num_embeddings), dtype='float64'), + initial_value=w_init(shape=(self.embedding_dim, self.num_embeddings), dtype='float32'), trainable=True, - name="VQ" + name=name ) def call(self, x): @@ -49,29 +49,31 @@ def get_code_indices(self, flattened_inputs): return encoding_indices class VQVAE(tf.keras.Model): - def __init__(self, latent_dim=32, num_embeddings=64, input_shape=(256, 256, 3)): + def __init__(self, latent_dim=32, num_embeddings=64, input_shape=(256, 256, 1)): super().__init__() self.latent_dim = latent_dim # Build encoder encoder_in = Input(shape=input_shape) - x = Conv2D(32, 3, strides=2, activation='leakyrelu', padding='same')(encoder_in) - x = Conv2D(64, 3, strides=2, activation='leakyrelu', padding='same')(x) - x = Conv2D(64, 3, strides=2, activation='leakyrelu', padding='same')(x) + x = Conv2D(32, 4, strides=2, activation='leaky_relu', padding='same')(encoder_in) + x = Conv2D(64, 4, strides=2, activation='leaky_relu', padding='same')(x) + x = Conv2D(64, 4, strides=2, activation='leaky_relu', padding='same')(x) encoder_out = Conv2D(latent_dim, 1, padding='same')(x) self.encoder = tf.keras.Model(encoder_in, encoder_out, name='encoder') # Build decoder decoder_in = Input(shape=self.encoder.output.shape[1:]) - x = Conv2DTranspose(64, 3, strides=2, activation='leakyrelu', padding='same')(encoder_in) - x = Conv2DTranspose(64, 3, strides=2, activation='leakyrelu', padding='same')(x) - x = Conv2DTranspose(32, 3, strides=2, activation='leakyrelu', padding='same')(x) - decoder_out = Conv2DTranspose(1, 3, padding='same')(x) + x = Conv2DTranspose(64, 4, strides=2, activation='leaky_relu', padding='same')(decoder_in) + x = Conv2DTranspose(64, 4, strides=2, activation='leaky_relu', padding='same')(x) + x = Conv2DTranspose(32, 4, strides=2, activation='leaky_relu', padding='same')(x) + decoder_out = Conv2DTranspose(1, 4, padding='same')(x) self.decoder = tf.keras.Model(decoder_in, decoder_out, name='decoder') # Add VQ layer self.vq_layer = VectorQuantizer(num_embeddings=num_embeddings, embedding_dim=latent_dim, name='vq') + #self.summary() + def call(self, x, training=False): x = self.encoder(x) diff --git a/recognition/45819061-VQVAE-OASIS/train.py b/recognition/45819061-VQVAE-OASIS/train.py index 79ce49454f..7eb8547a85 100644 --- a/recognition/45819061-VQVAE-OASIS/train.py +++ b/recognition/45819061-VQVAE-OASIS/train.py @@ -1,10 +1,10 @@ import tensorflow as tf import numpy as np -from dataset import get_data +from dataset import BATCH_SIZE, get_data from modules import VQVAE -class VQVAETrainer (tf.kears.models.Model): +class VQVAETrainer (tf.keras.models.Model): def __init__(self, train_variance, latent_dim=32, num_embeddings=128): super(VQVAETrainer, self).__init__() self.train_variance = train_variance @@ -42,4 +42,12 @@ def train_step(self, x): "loss": self.total_loss_tracker.result(), "reconstruction_loss": self.reconstruction_loss_tracker.result(), "vqvae_loss": self.vq_loss_tracker.result() - } \ No newline at end of file + } + + +x_train, x_test, x_validate, mean, variance = get_data() + +vqvae_trainer = VQVAETrainer(variance, 32, 128) +vqvae_trainer.compile(optimizer=tf.keras.optimizers.Adam()) +vqvae_trainer.fit(x_train, epochs=5, batch_size=BATCH_SIZE, use_multiprocessing=True, validation_data=x_validate) +vqvae_trainer.evaluate(x_test) \ No newline at end of file From 3c16cf431cf902c078780040573138a074fef000 Mon Sep 17 00:00:00 2001 From: jbart Date: Mon, 17 Oct 2022 12:34:29 +1000 Subject: [PATCH 10/26] refactoring to modules --- recognition/45819061-VQVAE-OASIS/modules.py | 18 +++++++++--------- recognition/45819061-VQVAE-OASIS/train.py | 2 +- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/recognition/45819061-VQVAE-OASIS/modules.py b/recognition/45819061-VQVAE-OASIS/modules.py index fa1bfc5d23..04409b84b6 100644 --- a/recognition/45819061-VQVAE-OASIS/modules.py +++ b/recognition/45819061-VQVAE-OASIS/modules.py @@ -55,23 +55,23 @@ def __init__(self, latent_dim=32, num_embeddings=64, input_shape=(256, 256, 1)): # Build encoder encoder_in = Input(shape=input_shape) - x = Conv2D(32, 4, strides=2, activation='leaky_relu', padding='same')(encoder_in) - x = Conv2D(64, 4, strides=2, activation='leaky_relu', padding='same')(x) - x = Conv2D(64, 4, strides=2, activation='leaky_relu', padding='same')(x) - encoder_out = Conv2D(latent_dim, 1, padding='same')(x) + x1 = Conv2D(32, 4, strides=2, activation='leaky_relu', padding='same')(encoder_in) + x2 = Conv2D(64, 4, strides=2, activation='leaky_relu', padding='same')(x1) + x3 = Conv2D(64, 3, strides=2, activation='leaky_relu', padding='same')(x2) + encoder_out = Conv2D(latent_dim, 1, padding='same')(x3) self.encoder = tf.keras.Model(encoder_in, encoder_out, name='encoder') # Build decoder decoder_in = Input(shape=self.encoder.output.shape[1:]) - x = Conv2DTranspose(64, 4, strides=2, activation='leaky_relu', padding='same')(decoder_in) - x = Conv2DTranspose(64, 4, strides=2, activation='leaky_relu', padding='same')(x) - x = Conv2DTranspose(32, 4, strides=2, activation='leaky_relu', padding='same')(x) - decoder_out = Conv2DTranspose(1, 4, padding='same')(x) + y1 = Conv2DTranspose(64, 4, strides=2, activation='leaky_relu', padding='same')(decoder_in) + y2 = Conv2DTranspose(64, 4, strides=2, activation='leaky_relu', padding='same')(y1) + y3 = Conv2DTranspose(32, 4, strides=2, activation='leaky_relu', padding='same')(y2) + decoder_out = Conv2DTranspose(1, 4, padding='same')(y3) self.decoder = tf.keras.Model(decoder_in, decoder_out, name='decoder') # Add VQ layer self.vq_layer = VectorQuantizer(num_embeddings=num_embeddings, embedding_dim=latent_dim, name='vq') - + #self.summary() diff --git a/recognition/45819061-VQVAE-OASIS/train.py b/recognition/45819061-VQVAE-OASIS/train.py index 7eb8547a85..86c275cf72 100644 --- a/recognition/45819061-VQVAE-OASIS/train.py +++ b/recognition/45819061-VQVAE-OASIS/train.py @@ -49,5 +49,5 @@ def train_step(self, x): vqvae_trainer = VQVAETrainer(variance, 32, 128) vqvae_trainer.compile(optimizer=tf.keras.optimizers.Adam()) -vqvae_trainer.fit(x_train, epochs=5, batch_size=BATCH_SIZE, use_multiprocessing=True, validation_data=x_validate) +vqvae_trainer.fit(x_train, epochs=5, batch_size=BATCH_SIZE, use_multiprocessing=True) vqvae_trainer.evaluate(x_test) \ No newline at end of file From 6c15f7dcbe3825b1a13a0a97693d5214c995c4e7 Mon Sep 17 00:00:00 2001 From: jbart Date: Mon, 17 Oct 2022 17:02:35 +1000 Subject: [PATCH 11/26] added residual layers and refactored accordingly --- recognition/45819061-VQVAE-OASIS/dataset.py | 9 ++---- recognition/45819061-VQVAE-OASIS/modules.py | 26 +++++++++++----- recognition/45819061-VQVAE-OASIS/train.py | 33 ++++++++++++++++++--- 3 files changed, 50 insertions(+), 18 deletions(-) diff --git a/recognition/45819061-VQVAE-OASIS/dataset.py b/recognition/45819061-VQVAE-OASIS/dataset.py index a5d038837e..95a2811bb8 100644 --- a/recognition/45819061-VQVAE-OASIS/dataset.py +++ b/recognition/45819061-VQVAE-OASIS/dataset.py @@ -31,11 +31,8 @@ def get_data(): mean = np.mean(x_train) var = np.mean(x_train) - x_train -= 127.5 - x_train /= 127.5 - x_test -= 127.5 - x_test /= 127.5 - x_validate -= 127.5 - x_validate /= 127.5 + x_train = x_train/255.0 - 0.5 + x_test = x_test/255.0 - 0.5 + x_validate = x_validate/255.0 - 0.5 return x_train, x_test, x_validate, mean, var diff --git a/recognition/45819061-VQVAE-OASIS/modules.py b/recognition/45819061-VQVAE-OASIS/modules.py index 04409b84b6..0bdbf25b14 100644 --- a/recognition/45819061-VQVAE-OASIS/modules.py +++ b/recognition/45819061-VQVAE-OASIS/modules.py @@ -1,7 +1,8 @@ +from base64 import decode from matplotlib.cbook import flatten import numpy as np import tensorflow as tf -from tensorflow.keras.layers import Input, Layer, Dense, Conv2D, Conv2DTranspose, Flatten, Reshape +from tensorflow.keras.layers import Input, Layer, ReLU, Add, Dense, Conv2D, Conv2DTranspose, Flatten, Reshape class VectorQuantizer(Layer): @@ -11,7 +12,7 @@ def __init__(self, num_embeddings, embedding_dim, beta=0.25, name="VQ"): self.embedding_dim = embedding_dim self.beta = beta - # Initialise flattenedembeddings + # Initialise flattened embeddings w_init = tf.random_uniform_initializer() self.embeddings = tf.Variable( initial_value=w_init(shape=(self.embedding_dim, self.num_embeddings), dtype='float32'), @@ -48,6 +49,15 @@ def get_code_indices(self, flattened_inputs): encoding_indices = tf.argmin(distances, axis=1) return encoding_indices + +def resblock(x, filters): + skip = Conv2D(filters, 1, strides=1, padding='same')(x) + x = Conv2D(filters, 3, strides=1, padding='same')(x) + x = ReLU()(x) + x = Conv2D(filters, 1, strides=1, padding='same')(x) + out = Add()([x, skip]) + return ReLU(out) + class VQVAE(tf.keras.Model): def __init__(self, latent_dim=32, num_embeddings=64, input_shape=(256, 256, 1)): super().__init__() @@ -57,16 +67,16 @@ def __init__(self, latent_dim=32, num_embeddings=64, input_shape=(256, 256, 1)): encoder_in = Input(shape=input_shape) x1 = Conv2D(32, 4, strides=2, activation='leaky_relu', padding='same')(encoder_in) x2 = Conv2D(64, 4, strides=2, activation='leaky_relu', padding='same')(x1) - x3 = Conv2D(64, 3, strides=2, activation='leaky_relu', padding='same')(x2) - encoder_out = Conv2D(latent_dim, 1, padding='same')(x3) + x3 = resblock(x2, 256) + encoder_out = resblock(x3, 256) self.encoder = tf.keras.Model(encoder_in, encoder_out, name='encoder') # Build decoder decoder_in = Input(shape=self.encoder.output.shape[1:]) - y1 = Conv2DTranspose(64, 4, strides=2, activation='leaky_relu', padding='same')(decoder_in) - y2 = Conv2DTranspose(64, 4, strides=2, activation='leaky_relu', padding='same')(y1) - y3 = Conv2DTranspose(32, 4, strides=2, activation='leaky_relu', padding='same')(y2) - decoder_out = Conv2DTranspose(1, 4, padding='same')(y3) + y1 = resblock(decoder_in, 256) + y2 = resblock(y1, 256) + y3 = Conv2DTranspose(64, 4, strides=2, activation='leaky_relu', padding='same')(y2) + decoder_out = Conv2DTranspose(32, 4, strides=2, activation='leaky_relu', padding='same')(y3) self.decoder = tf.keras.Model(decoder_in, decoder_out, name='decoder') # Add VQ layer diff --git a/recognition/45819061-VQVAE-OASIS/train.py b/recognition/45819061-VQVAE-OASIS/train.py index 86c275cf72..4500386eb6 100644 --- a/recognition/45819061-VQVAE-OASIS/train.py +++ b/recognition/45819061-VQVAE-OASIS/train.py @@ -1,3 +1,4 @@ +from matplotlib import pyplot as plt import tensorflow as tf import numpy as np from dataset import BATCH_SIZE, get_data @@ -25,6 +26,7 @@ def metrics(self): ] def train_step(self, x): + with tf.GradientTape() as tape: reconstructions = self.model(x) @@ -45,9 +47,32 @@ def train_step(self, x): } -x_train, x_test, x_validate, mean, variance = get_data() +x_train, x_test, x_validate, mean, data_variance = get_data() -vqvae_trainer = VQVAETrainer(variance, 32, 128) +data_variance = np.var(x_train / 255.0) + +vqvae_trainer = VQVAETrainer(data_variance, 128, 256) vqvae_trainer.compile(optimizer=tf.keras.optimizers.Adam()) -vqvae_trainer.fit(x_train, epochs=5, batch_size=BATCH_SIZE, use_multiprocessing=True) -vqvae_trainer.evaluate(x_test) \ No newline at end of file +vqvae_trainer.fit(x_train, epochs=30, batch_size=BATCH_SIZE, use_multiprocessing=True) + +def show_subplot(original, reconstructed): + plt.subplot(1, 2, 1) + plt.imshow(original.squeeze() + 0.5) + plt.title("Original") + plt.axis("off") + + plt.subplot(1, 2, 2) + plt.imshow(reconstructed.squeeze() + 0.5) + plt.title("Reconstructed") + plt.axis("off") + + plt.show() + + +trained_vqvae_model = vqvae_trainer.model +idx = np.random.choice(len(x_test), 10) +test_images = x_test[idx] +reconstructions_test = trained_vqvae_model.predict(test_images) + +for test_image, reconstructed_image in zip(test_images, reconstructions_test): + show_subplot(test_image, reconstructed_image) \ No newline at end of file From b9fa0e98afdad60118a6a343da58a9f163628a67 Mon Sep 17 00:00:00 2001 From: jbart Date: Mon, 17 Oct 2022 17:05:02 +1000 Subject: [PATCH 12/26] fixed a silly typo --- recognition/45819061-VQVAE-OASIS/modules.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/recognition/45819061-VQVAE-OASIS/modules.py b/recognition/45819061-VQVAE-OASIS/modules.py index 0bdbf25b14..c37d9d6e94 100644 --- a/recognition/45819061-VQVAE-OASIS/modules.py +++ b/recognition/45819061-VQVAE-OASIS/modules.py @@ -56,7 +56,7 @@ def resblock(x, filters): x = ReLU()(x) x = Conv2D(filters, 1, strides=1, padding='same')(x) out = Add()([x, skip]) - return ReLU(out) + return ReLU()(out) class VQVAE(tf.keras.Model): def __init__(self, latent_dim=32, num_embeddings=64, input_shape=(256, 256, 1)): From fa5557122f24d6ea8203adabd0e097a60bb4c143 Mon Sep 17 00:00:00 2001 From: jbart Date: Wed, 19 Oct 2022 19:04:48 +1000 Subject: [PATCH 13/26] edit gitignore, added save model --- recognition/45819061-VQVAE-OASIS/.gitignore | 6 +++++ recognition/45819061-VQVAE-OASIS/dataset.py | 2 +- recognition/45819061-VQVAE-OASIS/modules.py | 15 +++++------ recognition/45819061-VQVAE-OASIS/train.py | 28 ++++++++++++++++++--- 4 files changed, 40 insertions(+), 11 deletions(-) create mode 100644 recognition/45819061-VQVAE-OASIS/.gitignore diff --git a/recognition/45819061-VQVAE-OASIS/.gitignore b/recognition/45819061-VQVAE-OASIS/.gitignore new file mode 100644 index 0000000000..e815fdc86c --- /dev/null +++ b/recognition/45819061-VQVAE-OASIS/.gitignore @@ -0,0 +1,6 @@ + +# ignore data folder +data/ + +# constructed model +mymodel/ \ No newline at end of file diff --git a/recognition/45819061-VQVAE-OASIS/dataset.py b/recognition/45819061-VQVAE-OASIS/dataset.py index 95a2811bb8..1457c7bd61 100644 --- a/recognition/45819061-VQVAE-OASIS/dataset.py +++ b/recognition/45819061-VQVAE-OASIS/dataset.py @@ -3,7 +3,7 @@ import numpy as np from tqdm import tqdm -BATCH_SIZE = 128 +BATCH_SIZE = 64 DATA_DIR = 'data/keras_png_slices_data' TRAIN_DATA = DATA_DIR + '/keras_png_slices_train' TEST_DATA = DATA_DIR + '/keras_png_slices_test' diff --git a/recognition/45819061-VQVAE-OASIS/modules.py b/recognition/45819061-VQVAE-OASIS/modules.py index c37d9d6e94..afaafbf26c 100644 --- a/recognition/45819061-VQVAE-OASIS/modules.py +++ b/recognition/45819061-VQVAE-OASIS/modules.py @@ -2,7 +2,7 @@ from matplotlib.cbook import flatten import numpy as np import tensorflow as tf -from tensorflow.keras.layers import Input, Layer, ReLU, Add, Dense, Conv2D, Conv2DTranspose, Flatten, Reshape +from tensorflow.keras.layers import Input, Layer, ReLU, Add, Conv2D, Conv2DTranspose class VectorQuantizer(Layer): @@ -50,7 +50,7 @@ def get_code_indices(self, flattened_inputs): return encoding_indices -def resblock(x, filters): +def resblock(x, filters=256): skip = Conv2D(filters, 1, strides=1, padding='same')(x) x = Conv2D(filters, 3, strides=1, padding='same')(x) x = ReLU()(x) @@ -67,16 +67,17 @@ def __init__(self, latent_dim=32, num_embeddings=64, input_shape=(256, 256, 1)): encoder_in = Input(shape=input_shape) x1 = Conv2D(32, 4, strides=2, activation='leaky_relu', padding='same')(encoder_in) x2 = Conv2D(64, 4, strides=2, activation='leaky_relu', padding='same')(x1) - x3 = resblock(x2, 256) - encoder_out = resblock(x3, 256) + x3 = resblock(x2, 64) + x4 = resblock(x3, 64) + encoder_out = Conv2D(latent_dim, 1, padding="same")(x4) self.encoder = tf.keras.Model(encoder_in, encoder_out, name='encoder') # Build decoder decoder_in = Input(shape=self.encoder.output.shape[1:]) - y1 = resblock(decoder_in, 256) - y2 = resblock(y1, 256) + y1 = resblock(decoder_in, 64) + y2 = resblock(y1, 64) y3 = Conv2DTranspose(64, 4, strides=2, activation='leaky_relu', padding='same')(y2) - decoder_out = Conv2DTranspose(32, 4, strides=2, activation='leaky_relu', padding='same')(y3) + decoder_out = Conv2DTranspose(1, 4, strides=2, activation='leaky_relu', padding='same')(y3) self.decoder = tf.keras.Model(decoder_in, decoder_out, name='decoder') # Add VQ layer diff --git a/recognition/45819061-VQVAE-OASIS/train.py b/recognition/45819061-VQVAE-OASIS/train.py index 4500386eb6..8082cf1cc2 100644 --- a/recognition/45819061-VQVAE-OASIS/train.py +++ b/recognition/45819061-VQVAE-OASIS/train.py @@ -51,9 +51,11 @@ def train_step(self, x): data_variance = np.var(x_train / 255.0) -vqvae_trainer = VQVAETrainer(data_variance, 128, 256) +vqvae_trainer = VQVAETrainer(data_variance, 16, 64) vqvae_trainer.compile(optimizer=tf.keras.optimizers.Adam()) -vqvae_trainer.fit(x_train, epochs=30, batch_size=BATCH_SIZE, use_multiprocessing=True) +vqvae_trainer.fit(x_train, epochs=2, batch_size=BATCH_SIZE, use_multiprocessing=True) + +vqvae_trainer.model.save('mymodel') def show_subplot(original, reconstructed): plt.subplot(1, 2, 1) @@ -75,4 +77,24 @@ def show_subplot(original, reconstructed): reconstructions_test = trained_vqvae_model.predict(test_images) for test_image, reconstructed_image in zip(test_images, reconstructions_test): - show_subplot(test_image, reconstructed_image) \ No newline at end of file + show_subplot(test_image, reconstructed_image) + +encoder = vqvae_trainer.model.get_layer("encoder") +quantizer = vqvae_trainer.model.get_layer("vector_quantizer") + +encoded_outputs = encoder.predict(test_images) +flat_enc_outputs = encoded_outputs.reshape(-1, encoded_outputs.shape[-1]) +codebook_indices = quantizer.get_code_indices(flat_enc_outputs) +codebook_indices = codebook_indices.numpy().reshape(encoded_outputs.shape[:-1]) + +for i in range(len(test_images)): + plt.subplot(1, 2, 1) + plt.imshow(test_images[i].squeeze() + 0.5) + plt.title("Original") + plt.axis("off") + + plt.subplot(1, 2, 2) + plt.imshow(codebook_indices[i]) + plt.title("Code") + plt.axis("off") + plt.show() \ No newline at end of file From 5696e84191b5971db7b1791debdb0651ca67af81 Mon Sep 17 00:00:00 2001 From: jbart Date: Wed, 19 Oct 2022 22:32:38 +1000 Subject: [PATCH 14/26] add multiprocessing option to dataloading --- recognition/45819061-VQVAE-OASIS/dataset.py | 25 +++++++++++++++------ 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/recognition/45819061-VQVAE-OASIS/dataset.py b/recognition/45819061-VQVAE-OASIS/dataset.py index 1457c7bd61..c213346f3b 100644 --- a/recognition/45819061-VQVAE-OASIS/dataset.py +++ b/recognition/45819061-VQVAE-OASIS/dataset.py @@ -3,6 +3,7 @@ import numpy as np from tqdm import tqdm + BATCH_SIZE = 64 DATA_DIR = 'data/keras_png_slices_data' TRAIN_DATA = DATA_DIR + '/keras_png_slices_train' @@ -12,21 +13,31 @@ def reader(f): return tf.io.decode_png(tf.io.read_file(f), channels=1) -def load(files): - lst = map(reader, tqdm(files)) +def load(files, use_multiprocessing=False): + if use_multiprocessing: + import multiprocessing + pool = multiprocessing.Pool(use_multiprocessing) + lst = pool.map(reader, tqdm(files)) + else: + lst = map(reader, tqdm(files)) + imgs = np.asarray(list(lst), dtype='float32') return imgs - -def get_data(): +""" + Load data from predefined paths TRAIN_DATA, TEST_DATA, VALIDATE_DATA. + optional argument use_multiprocessing defaults to false can specify and integer to spawn child + processes to load faster on machines with sufficient capabilities +""" +def get_data(use_multiprocessing=False): files_train = [os.path.join(TRAIN_DATA, f) for f in os.listdir(TRAIN_DATA) if os.path.isfile(os.path.join(TRAIN_DATA, f))] files_test = [os.path.join(TEST_DATA, f) for f in os.listdir(TEST_DATA) if os.path.isfile(os.path.join(TEST_DATA, f))] files_validate = [os.path.join(VALIDATE_DATA, f) for f in os.listdir(VALIDATE_DATA) if os.path.isfile(os.path.join(VALIDATE_DATA, f))] print("Loading data") - x_train = load(files_train) - x_test = load(files_test) - x_validate = load(files_validate) + x_train = load(files_train, use_multiprocessing) + x_test = load(files_test, use_multiprocessing) + x_validate = load(files_validate, use_multiprocessing) mean = np.mean(x_train) var = np.mean(x_train) From 5c1424e81c5e1fec74f977adb00b7c7cda18a199 Mon Sep 17 00:00:00 2001 From: jbart Date: Wed, 19 Oct 2022 23:23:17 +1000 Subject: [PATCH 15/26] variable residual hidden layers added and add png to gitignnore --- .gitignore | 6 +----- recognition/45819061-VQVAE-OASIS/modules.py | 10 +++++----- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/.gitignore b/.gitignore index cdb0859737..84f0261cb9 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,3 @@ -recognition/45819061-VQVAE-OASIS/data/ # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] @@ -120,10 +119,7 @@ venv.bak/ # mypy .mypy_cache/ .dmypy.json -dmypy.json - -# Pyre type checker -.pyre/ +encoder_out.pyre/ # vscode config file .vscode/ diff --git a/recognition/45819061-VQVAE-OASIS/modules.py b/recognition/45819061-VQVAE-OASIS/modules.py index afaafbf26c..b41c77b08c 100644 --- a/recognition/45819061-VQVAE-OASIS/modules.py +++ b/recognition/45819061-VQVAE-OASIS/modules.py @@ -59,7 +59,7 @@ def resblock(x, filters=256): return ReLU()(out) class VQVAE(tf.keras.Model): - def __init__(self, latent_dim=32, num_embeddings=64, input_shape=(256, 256, 1)): + def __init__(self, latent_dim=32, num_embeddings=64, input_shape=(256, 256, 1), residual_hiddens=64): super().__init__() self.latent_dim = latent_dim @@ -67,15 +67,15 @@ def __init__(self, latent_dim=32, num_embeddings=64, input_shape=(256, 256, 1)): encoder_in = Input(shape=input_shape) x1 = Conv2D(32, 4, strides=2, activation='leaky_relu', padding='same')(encoder_in) x2 = Conv2D(64, 4, strides=2, activation='leaky_relu', padding='same')(x1) - x3 = resblock(x2, 64) - x4 = resblock(x3, 64) + x3 = resblock(x2, residual_hiddens) + x4 = resblock(x3, residual_hiddens) encoder_out = Conv2D(latent_dim, 1, padding="same")(x4) self.encoder = tf.keras.Model(encoder_in, encoder_out, name='encoder') # Build decoder decoder_in = Input(shape=self.encoder.output.shape[1:]) - y1 = resblock(decoder_in, 64) - y2 = resblock(y1, 64) + y1 = resblock(decoder_in, residual_hiddens) + y2 = resblock(y1, residual_hiddens) y3 = Conv2DTranspose(64, 4, strides=2, activation='leaky_relu', padding='same')(y2) decoder_out = Conv2DTranspose(1, 4, strides=2, activation='leaky_relu', padding='same')(y3) self.decoder = tf.keras.Model(decoder_in, decoder_out, name='decoder') From f607136e30749f0db15cf5127da9f2e09d23ba4e Mon Sep 17 00:00:00 2001 From: jbart Date: Thu, 20 Oct 2022 15:27:26 +1000 Subject: [PATCH 16/26] made pixelcnn and trainer --- recognition/45819061-VQVAE-OASIS/modules.py | 45 ++++++++++++ recognition/45819061-VQVAE-OASIS/train.py | 77 ++++++++++++++++++--- 2 files changed, 112 insertions(+), 10 deletions(-) diff --git a/recognition/45819061-VQVAE-OASIS/modules.py b/recognition/45819061-VQVAE-OASIS/modules.py index b41c77b08c..ec09996569 100644 --- a/recognition/45819061-VQVAE-OASIS/modules.py +++ b/recognition/45819061-VQVAE-OASIS/modules.py @@ -58,6 +58,51 @@ def resblock(x, filters=256): out = Add()([x, skip]) return ReLU()(out) + +class PixelCNN(Layer): + def __init__(self, mask_type, **kwargs): + super(PixelCNN, self).__init__() + self.mask_type = mask_type + self.conv = Conv2D(**kwargs) + + def build(self, input_shape): + self.conv.build(input_shape) + kernel_shape = self.conv.kernel.get_shape() + self.mask = np.zeros(shape=kernel_shape) + self.mask[:kernel_shape[0]//2, ...] = 1.0 + self.mask[kernel_shape[0]//2, kernel_shape[1]//2, ...] = 1.0 + if self.mask = 'B': + self.mask[kernel_shape[0]//2, kernel_shape[1]//2, kernel_shape[1]//2, ...] = 1.0 + + def call(self, inputs): + self.conv.kernel.assign(self.conv.kernel * self.mask) + return self.conv(inputs) + +class ResidualBlock(Layer): + def __init__(self, filters): + super(ResidualBlock, self).__init__() + self.conv1 = Conv2D(filters=filters, kernel_size=1, activation='leaky_relu') + self.pixelcnn = PixelCNN(mask_type='B', filters=filters//2, kernel_size=3, activation='leaky_relu', padding='same') + self.conv2 = Conv2D(filters=filters, kernel_size=1, activation='leaky_relu') + + def call(self, inputs): + x = self.conv1(inputs) + x = self.pixel_cnn(x) + x = self.conv2(x) + return Add([inputs, x]) + +def get_pixelcnn(input_shape, num_embeddings, num_residual_blocks=2, num_pixelcnn_layers=2, **kwargs): + pixelcnn_inputs = Input(shape=input_shape) + onehot = tf.one_hot(pixelcnn_inputs, num_embeddings) + x = PixelCNN(mask_type='A', filters=128, kernel_size=7, activation='leaky_relu', padding='same')(onehot) + for _ in range(num_residual_blocks): + x = ResidualBlock(filters=128) + for _ in range(num_pixelcnn_layers): + x = PixelCNN(mask_type='B', filters=128, kernel_size=1, strides=1, activation='leaky_relu', padding='same') + out = Conv2D(filters=num_embeddings, kernel_size=1, strides=1, padding="valid")(x) + return tf.keras.Model(pixelcnn_inputs, out, name='pixelcnn') + + class VQVAE(tf.keras.Model): def __init__(self, latent_dim=32, num_embeddings=64, input_shape=(256, 256, 1), residual_hiddens=64): super().__init__() diff --git a/recognition/45819061-VQVAE-OASIS/train.py b/recognition/45819061-VQVAE-OASIS/train.py index 8082cf1cc2..fc0d99376f 100644 --- a/recognition/45819061-VQVAE-OASIS/train.py +++ b/recognition/45819061-VQVAE-OASIS/train.py @@ -2,7 +2,7 @@ import tensorflow as tf import numpy as np from dataset import BATCH_SIZE, get_data -from modules import VQVAE +from modules import VQVAE, PixelCNN, get_pixelcnn class VQVAETrainer (tf.keras.models.Model): @@ -12,8 +12,8 @@ def __init__(self, train_variance, latent_dim=32, num_embeddings=128): self.latent_dim = latent_dim self.num_embeddings = num_embeddings - self.model = VQVAE(self.latent_dim, self.num_embeddings, (256, 256, 1)) - self.total_loss_tracker = tf.keras.metrics.Mean(name='total_loss') + self.model = VQVAE(self.latent_dim, self.num_embeddings, (256, 256, 1), residual_hiddens=16) + self.total_loss_tracker = tf.keras.metrics.Mean(namsamee='total_loss') self.reconstruction_loss_tracker = tf.keras.metrics.Mean(name='reconstruction_loss') self.vq_loss_tracker = tf.keras.metrics.Mean(name='vq_loss') @@ -22,7 +22,7 @@ def metrics(self): return [ self.total_loss_tracker, self.reconstruction_loss_tracker, - self.vq_loss_tracker + self.vq_loss_tracker, ] def train_step(self, x): @@ -43,17 +43,57 @@ def train_step(self, x): return { "loss": self.total_loss_tracker.result(), "reconstruction_loss": self.reconstruction_loss_tracker.result(), - "vqvae_loss": self.vq_loss_tracker.result() + "vqvae_loss": self.vq_loss_tracker.result(), + "ssim": tf.image.ssim(x, reconstructions, max_val=1.0) } + + def test_step(self, x): + x, _ = x + reconstructions = self.model(x, training=False) + return { + "ssim": tf.image.ssim(x, reconstructions, max_val=1.0) + } + -x_train, x_test, x_validate, mean, data_variance = get_data() +x_train, x_test, x_validate, mean, data_variance = get_data(0) data_variance = np.var(x_train / 255.0) +LATENT_DIM = 8 +NUM_EMBEDDINGS = 16 +vqvae_trainer = VQVAETrainer(data_variance, LATENT_DIM, NUM_EMBEDDINGS) +vqvae_trainer.compile(optimizer=tf.keras.optimizers.Adam(), metrics=['accuracy']) +history = vqvae_trainer.fit( + x=x_train, + epochs=20, + batch_size=BATCH_SIZE, + use_multiprocessing=True, + validation_data=(x_validate, x_validate), + shuffle=True, + validation_freq=1 +) + +# plot loss +plt.plot(history.history['loss']) +plt.plot(history.history['reconstruction_loss']) +plt.plot(history.history['vqvae_loss']) +plt.title('Model Loss') +plt.ylabel('loss') +plt.xlabel('epoch') +plt.ylim((0, 5000)) +plt.legend(['total loss', 'reconstruction loss', 'vqvae loss']) +plt.savefig('losses') +plt.close() + +plt.plot(history.history['ssim']) +plt.plot(history.history['val_ssim']) +plt.title('Model SSIM') +plt.ylabel('ssim') +plt.xlabel('epoch') +plt.legend(['training set', 'validation set']) +plt.savefig('ssim') +plt.close() -vqvae_trainer = VQVAETrainer(data_variance, 16, 64) -vqvae_trainer.compile(optimizer=tf.keras.optimizers.Adam()) -vqvae_trainer.fit(x_train, epochs=2, batch_size=BATCH_SIZE, use_multiprocessing=True) vqvae_trainer.model.save('mymodel') @@ -97,4 +137,21 @@ def show_subplot(original, reconstructed): plt.imshow(codebook_indices[i]) plt.title("Code") plt.axis("off") - plt.show() \ No newline at end of file + plt.show() + +encoded_training = encoder.predict(x_train) +flat_enc_training = encoded_training.reshape(-1, encoded_training.shape[-1]) +codebook_indices_training = quantizer.get_code_indices(flat_enc_training) +codebook_indices_training = codebook_indices_training.numpy().reshape(encoded_training.shape[:-1]) + +encoded_validation = encoder.predict(x_validate) +flat_enc_validation = encoded_validation.reshape(-1, encoded_validation.shape[-1]) +codebook_indices_validation = quantizer.get_code_indices(flat_enc_validation) +codebook_indices_validation = codebook_indices_validation.numpy().reshape(encoded_validation.shape[:-1]) + + +pixelcnn = get_pixelcnn(num_embeddings=NUM_EMBEDDINGS) +pixelcnn.compile(optimizer=tf.keras.optimizers.Adam(), loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=['accuracy']) +pixelcnn.fit(x=codebook_indices_training, y=codebook_indices_training, batch_size=BATCH_SIZE, epochs=30, validation_data=(codebook_indices_validation, codebook_indices_validation)) + + From 837ad8701b918c44c19db972c4aa1df0f48db0cb Mon Sep 17 00:00:00 2001 From: jbart Date: Thu, 20 Oct 2022 15:37:06 +1000 Subject: [PATCH 17/26] made sampler and testing for pixel cnn --- recognition/45819061-VQVAE-OASIS/modules.py | 12 +++++--- recognition/45819061-VQVAE-OASIS/train.py | 33 ++++++++++++++++++++- 2 files changed, 40 insertions(+), 5 deletions(-) diff --git a/recognition/45819061-VQVAE-OASIS/modules.py b/recognition/45819061-VQVAE-OASIS/modules.py index ec09996569..6f8fd55aba 100644 --- a/recognition/45819061-VQVAE-OASIS/modules.py +++ b/recognition/45819061-VQVAE-OASIS/modules.py @@ -3,7 +3,7 @@ import numpy as np import tensorflow as tf from tensorflow.keras.layers import Input, Layer, ReLU, Add, Conv2D, Conv2DTranspose - +import tensorflow_probability as tfp class VectorQuantizer(Layer): def __init__(self, num_embeddings, embedding_dim, beta=0.25, name="VQ"): @@ -71,7 +71,7 @@ def build(self, input_shape): self.mask = np.zeros(shape=kernel_shape) self.mask[:kernel_shape[0]//2, ...] = 1.0 self.mask[kernel_shape[0]//2, kernel_shape[1]//2, ...] = 1.0 - if self.mask = 'B': + if self.mask == 'B': self.mask[kernel_shape[0]//2, kernel_shape[1]//2, kernel_shape[1]//2, ...] = 1.0 def call(self, inputs): @@ -136,5 +136,9 @@ def call(self, x, training=False): quantized = self.vq_layer(x) return self.decoder(quantized) - - \ No newline at end of file +def get_pixelcnn_sampler(pixelcnn): + inputs = Input(shape=pixelcnn.input_shape[1:]) + outputs = pixelcnn(inputs, training=False) + categorical_layer = tfp.layers.DistributionLambda(tfp.distributions.Categorical) + outputs = categorical_layer(outputs) + return tf.keras.Model(inputs, outputs) \ No newline at end of file diff --git a/recognition/45819061-VQVAE-OASIS/train.py b/recognition/45819061-VQVAE-OASIS/train.py index fc0d99376f..3a59ba267c 100644 --- a/recognition/45819061-VQVAE-OASIS/train.py +++ b/recognition/45819061-VQVAE-OASIS/train.py @@ -2,7 +2,7 @@ import tensorflow as tf import numpy as np from dataset import BATCH_SIZE, get_data -from modules import VQVAE, PixelCNN, get_pixelcnn +from modules import VQVAE, PixelCNN, get_pixelcnn, get_pixelcnn_sampler class VQVAETrainer (tf.keras.models.Model): @@ -155,3 +155,34 @@ def show_subplot(original, reconstructed): pixelcnn.fit(x=codebook_indices_training, y=codebook_indices_training, batch_size=BATCH_SIZE, epochs=30, validation_data=(codebook_indices_validation, codebook_indices_validation)) +sampler = get_pixelcnn_sampler(pixelcnn) + +prior_batch_size = 10 +priors = np.zeros(shape=(prior_batch_size,) + pixelcnn.input_shape[1:]) +batch, rows, cols = priors.shape + +for row in range(rows): + for col in range(cols): + probs = sampler.predict(priors) + priors[:, row, col] = probs[:, row, col] + +pretrained_embeddings = quantizer.embeddings +prior_onehot = tf.one_hot(priors.astype("int32"), NUM_EMBEDDINGS).numpy() +quantized = tf.matmul(prior_onehot.astype("float32"), pretrained_embeddings, transpose_b=True) +quantized = tf.reshape(quantized, (-1, *(encoded_outputs.shape[1:]))) + +# Generate novel images. +decoder = vqvae_trainer.model.get_layer("decoder") +generated_samples = decoder.predict(quantized) + +for i in range(batch): + plt.subplot(1, 2, 1) + plt.imshow(priors[i]) + plt.title("Code") + plt.axis("off") + + plt.subplot(1, 2, 2) + plt.imshow(generated_samples[i].squeeze() + 0.5) + plt.title("Generated Sample") + plt.axis("off") + plt.show() \ No newline at end of file From 6059faa587275030f75cbe4e5723e692a9aec202 Mon Sep 17 00:00:00 2001 From: jbart Date: Thu, 20 Oct 2022 18:07:50 +1000 Subject: [PATCH 18/26] code refactoring --- recognition/45819061-VQVAE-OASIS/.gitignore | 5 +- recognition/45819061-VQVAE-OASIS/dataset.py | 12 +- recognition/45819061-VQVAE-OASIS/modules.py | 6 +- recognition/45819061-VQVAE-OASIS/train.py | 260 +++++++++++--------- 4 files changed, 149 insertions(+), 134 deletions(-) diff --git a/recognition/45819061-VQVAE-OASIS/.gitignore b/recognition/45819061-VQVAE-OASIS/.gitignore index e815fdc86c..f301c338a8 100644 --- a/recognition/45819061-VQVAE-OASIS/.gitignore +++ b/recognition/45819061-VQVAE-OASIS/.gitignore @@ -3,4 +3,7 @@ data/ # constructed model -mymodel/ \ No newline at end of file +mymodel/ + +# all images +**.png \ No newline at end of file diff --git a/recognition/45819061-VQVAE-OASIS/dataset.py b/recognition/45819061-VQVAE-OASIS/dataset.py index c213346f3b..9f4da6a8a8 100644 --- a/recognition/45819061-VQVAE-OASIS/dataset.py +++ b/recognition/45819061-VQVAE-OASIS/dataset.py @@ -39,11 +39,9 @@ def get_data(use_multiprocessing=False): x_test = load(files_test, use_multiprocessing) x_validate = load(files_validate, use_multiprocessing) - mean = np.mean(x_train) - var = np.mean(x_train) + # scale image data to [-1, 1] range + x_train = x_train/127.5 - 1.0 + x_test = x_test/127.5 - 1.0 + x_validate = x_validate/127.5 - 1.0 - x_train = x_train/255.0 - 0.5 - x_test = x_test/255.0 - 0.5 - x_validate = x_validate/255.0 - 0.5 - - return x_train, x_test, x_validate, mean, var + return x_train, x_test, x_validate diff --git a/recognition/45819061-VQVAE-OASIS/modules.py b/recognition/45819061-VQVAE-OASIS/modules.py index 6f8fd55aba..25833a1ccf 100644 --- a/recognition/45819061-VQVAE-OASIS/modules.py +++ b/recognition/45819061-VQVAE-OASIS/modules.py @@ -110,8 +110,8 @@ def __init__(self, latent_dim=32, num_embeddings=64, input_shape=(256, 256, 1), # Build encoder encoder_in = Input(shape=input_shape) - x1 = Conv2D(32, 4, strides=2, activation='leaky_relu', padding='same')(encoder_in) - x2 = Conv2D(64, 4, strides=2, activation='leaky_relu', padding='same')(x1) + x1 = Conv2D(16, 4, strides=2, activation='leaky_relu', padding='same')(encoder_in) + x2 = Conv2D(32, 4, strides=2, activation='leaky_relu', padding='same')(x1) x3 = resblock(x2, residual_hiddens) x4 = resblock(x3, residual_hiddens) encoder_out = Conv2D(latent_dim, 1, padding="same")(x4) @@ -121,7 +121,7 @@ def __init__(self, latent_dim=32, num_embeddings=64, input_shape=(256, 256, 1), decoder_in = Input(shape=self.encoder.output.shape[1:]) y1 = resblock(decoder_in, residual_hiddens) y2 = resblock(y1, residual_hiddens) - y3 = Conv2DTranspose(64, 4, strides=2, activation='leaky_relu', padding='same')(y2) + y3 = Conv2DTranspose(32, 4, strides=2, activation='leaky_relu', padding='same')(y2) decoder_out = Conv2DTranspose(1, 4, strides=2, activation='leaky_relu', padding='same')(y3) self.decoder = tf.keras.Model(decoder_in, decoder_out, name='decoder') diff --git a/recognition/45819061-VQVAE-OASIS/train.py b/recognition/45819061-VQVAE-OASIS/train.py index 3a59ba267c..a1bc24c7c1 100644 --- a/recognition/45819061-VQVAE-OASIS/train.py +++ b/recognition/45819061-VQVAE-OASIS/train.py @@ -2,7 +2,7 @@ import tensorflow as tf import numpy as np from dataset import BATCH_SIZE, get_data -from modules import VQVAE, PixelCNN, get_pixelcnn, get_pixelcnn_sampler +from modules import VQVAE, get_pixelcnn, get_pixelcnn_sampler class VQVAETrainer (tf.keras.models.Model): @@ -13,9 +13,10 @@ def __init__(self, train_variance, latent_dim=32, num_embeddings=128): self.num_embeddings = num_embeddings self.model = VQVAE(self.latent_dim, self.num_embeddings, (256, 256, 1), residual_hiddens=16) - self.total_loss_tracker = tf.keras.metrics.Mean(namsamee='total_loss') + self.total_loss_tracker = tf.keras.metrics.Mean(name='total_loss') self.reconstruction_loss_tracker = tf.keras.metrics.Mean(name='reconstruction_loss') self.vq_loss_tracker = tf.keras.metrics.Mean(name='vq_loss') + self.ssim_tracker = tf.keras.metrics.Mean(name='ssim') @property def metrics(self): @@ -23,6 +24,7 @@ def metrics(self): self.total_loss_tracker, self.reconstruction_loss_tracker, self.vq_loss_tracker, + self.ssim_tracker ] def train_step(self, x): @@ -40,149 +42,161 @@ def train_step(self, x): self.reconstruction_loss_tracker.update_state(reconstruction_loss) self.vq_loss_tracker.update_state(sum(self.model.losses)) + ssim = tf.image.ssim(x, reconstructions, max_val=2.0) + self.ssim_tracker.update_state(tf.reduce_mean(ssim)) + return { "loss": self.total_loss_tracker.result(), "reconstruction_loss": self.reconstruction_loss_tracker.result(), "vqvae_loss": self.vq_loss_tracker.result(), - "ssim": tf.image.ssim(x, reconstructions, max_val=1.0) + "ssim": self.ssim_tracker.result() } def test_step(self, x): x, _ = x reconstructions = self.model(x, training=False) + ssim = tf.image.ssim(x, reconstructions, max_val=2.0) + self.ssim_tracker.update_state(tf.reduce_mean(ssim)) return { - "ssim": tf.image.ssim(x, reconstructions, max_val=1.0) + "ssim": self.ssim_tracker.result() } - - -x_train, x_test, x_validate, mean, data_variance = get_data(0) - -data_variance = np.var(x_train / 255.0) LATENT_DIM = 8 NUM_EMBEDDINGS = 16 -vqvae_trainer = VQVAETrainer(data_variance, LATENT_DIM, NUM_EMBEDDINGS) -vqvae_trainer.compile(optimizer=tf.keras.optimizers.Adam(), metrics=['accuracy']) -history = vqvae_trainer.fit( - x=x_train, - epochs=20, - batch_size=BATCH_SIZE, - use_multiprocessing=True, - validation_data=(x_validate, x_validate), - shuffle=True, - validation_freq=1 -) - -# plot loss -plt.plot(history.history['loss']) -plt.plot(history.history['reconstruction_loss']) -plt.plot(history.history['vqvae_loss']) -plt.title('Model Loss') -plt.ylabel('loss') -plt.xlabel('epoch') -plt.ylim((0, 5000)) -plt.legend(['total loss', 'reconstruction loss', 'vqvae loss']) -plt.savefig('losses') -plt.close() - -plt.plot(history.history['ssim']) -plt.plot(history.history['val_ssim']) -plt.title('Model SSIM') -plt.ylabel('ssim') -plt.xlabel('epoch') -plt.legend(['training set', 'validation set']) -plt.savefig('ssim') -plt.close() - - -vqvae_trainer.model.save('mymodel') - -def show_subplot(original, reconstructed): - plt.subplot(1, 2, 1) - plt.imshow(original.squeeze() + 0.5) - plt.title("Original") - plt.axis("off") - - plt.subplot(1, 2, 2) - plt.imshow(reconstructed.squeeze() + 0.5) - plt.title("Reconstructed") - plt.axis("off") - - plt.show() +x_train, x_test, x_validate = get_data() + +def train(x_train, x_test, x_validate): + data_variance = np.var(x_train) + + vqvae_trainer = VQVAETrainer(data_variance, LATENT_DIM, NUM_EMBEDDINGS) + vqvae_trainer.compile(optimizer=tf.keras.optimizers.Adam()) + history = vqvae_trainer.fit( + x=x_train, + epochs=5, + batch_size=BATCH_SIZE, + use_multiprocessing=True, + validation_data=(x_validate, x_validate), + shuffle=True, + validation_freq=1 + ) + + eval_results = vqvae_trainer.evaluate(x_test, x_test, batch_size=BATCH_SIZE) + print("Structured similarity score:", eval_results) + + + # plot loss + plt.plot(history.history['loss']) + plt.plot(history.history['reconstruction_loss']) + plt.plot(history.history['vqvae_loss']) + plt.title('Model Loss') + plt.ylabel('loss') + plt.xlabel('epoch') + plt.ylim((0, 2)) + plt.legend(['total loss', 'reconstruction loss', 'vqvae loss']) + plt.savefig('losses') + plt.close() + -trained_vqvae_model = vqvae_trainer.model -idx = np.random.choice(len(x_test), 10) -test_images = x_test[idx] -reconstructions_test = trained_vqvae_model.predict(test_images) - -for test_image, reconstructed_image in zip(test_images, reconstructions_test): - show_subplot(test_image, reconstructed_image) + plt.ylim((0, 1)) + plt.legend(['training set', 'validation set']) + plt.savefig('ssim') + plt.close() -encoder = vqvae_trainer.model.get_layer("encoder") -quantizer = vqvae_trainer.model.get_layer("vector_quantizer") -encoded_outputs = encoder.predict(test_images) -flat_enc_outputs = encoded_outputs.reshape(-1, encoded_outputs.shape[-1]) -codebook_indices = quantizer.get_code_indices(flat_enc_outputs) -codebook_indices = codebook_indices.numpy().reshape(encoded_outputs.shape[:-1]) + vqvae_trainer.model.save('mymodel') -for i in range(len(test_images)): +def show_subplot(original, reconstructed, i): plt.subplot(1, 2, 1) - plt.imshow(test_images[i].squeeze() + 0.5) + plt.imshow(original.squeeze() + 0.5) plt.title("Original") plt.axis("off") plt.subplot(1, 2, 2) - plt.imshow(codebook_indices[i]) - plt.title("Code") - plt.axis("off") - plt.show() - -encoded_training = encoder.predict(x_train) -flat_enc_training = encoded_training.reshape(-1, encoded_training.shape[-1]) -codebook_indices_training = quantizer.get_code_indices(flat_enc_training) -codebook_indices_training = codebook_indices_training.numpy().reshape(encoded_training.shape[:-1]) - -encoded_validation = encoder.predict(x_validate) -flat_enc_validation = encoded_validation.reshape(-1, encoded_validation.shape[-1]) -codebook_indices_validation = quantizer.get_code_indices(flat_enc_validation) -codebook_indices_validation = codebook_indices_validation.numpy().reshape(encoded_validation.shape[:-1]) - - -pixelcnn = get_pixelcnn(num_embeddings=NUM_EMBEDDINGS) -pixelcnn.compile(optimizer=tf.keras.optimizers.Adam(), loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=['accuracy']) -pixelcnn.fit(x=codebook_indices_training, y=codebook_indices_training, batch_size=BATCH_SIZE, epochs=30, validation_data=(codebook_indices_validation, codebook_indices_validation)) - - -sampler = get_pixelcnn_sampler(pixelcnn) - -prior_batch_size = 10 -priors = np.zeros(shape=(prior_batch_size,) + pixelcnn.input_shape[1:]) -batch, rows, cols = priors.shape - -for row in range(rows): - for col in range(cols): - probs = sampler.predict(priors) - priors[:, row, col] = probs[:, row, col] - -pretrained_embeddings = quantizer.embeddings -prior_onehot = tf.one_hot(priors.astype("int32"), NUM_EMBEDDINGS).numpy() -quantized = tf.matmul(prior_onehot.astype("float32"), pretrained_embeddings, transpose_b=True) -quantized = tf.reshape(quantized, (-1, *(encoded_outputs.shape[1:]))) - -# Generate novel images. -decoder = vqvae_trainer.model.get_layer("decoder") -generated_samples = decoder.predict(quantized) - -for i in range(batch): - plt.subplot(1, 2, 1) - plt.imshow(priors[i]) - plt.title("Code") - plt.axis("off") - - plt.subplot(1, 2, 2) - plt.imshow(generated_samples[i].squeeze() + 0.5) - plt.title("Generated Sample") + plt.imshow(reconstructed.squeeze() + 0.5) + plt.title("Reconstructed") plt.axis("off") - plt.show() \ No newline at end of file + plt.savefig('fig'+str(i)) + plt.close() + +def demo_model(model, x_test): + idx = np.random.choice(len(x_test), 10) + test_images = x_test[idx] + reconstructions_test = model.predict(test_images) + + for i, (test_image, reconstructed_image) in enumerate(zip(test_images, reconstructions_test)): + show_subplot(test_image, reconstructed_image, i) + + encoder = model.get_layer("encoder") + quantizer = model.get_layer("vector_quantizer") + + encoded_outputs = encoder.predict(test_images) + flat_enc_outputs = encoded_outputs.reshape(-1, encoded_outputs.shape[-1]) + codebook_indices = quantizer.get_code_indices(flat_enc_outputs) + codebook_indices = codebook_indices.numpy().reshape(encoded_outputs.shape[:-1]) + + for i in range(len(test_images)): + plt.subplot(1, 2, 1) + plt.imshow(test_images[i].squeeze() + 0.5) + plt.title("Original") + plt.axis("off") + + plt.subplot(1, 2, 2) + plt.imshow(codebook_indices[i]) + plt.title("Code") + plt.axis("off") + plt.savefig('embedding'+str(i)) + plt.close() + +def pixelcnn_train(model, x_train, x_test, x_validate): + encoder = model.get_layer("encoder") + quantizer = model.get_layer("vector_quantizer") + decoder = model.get_layer("decoder") + + encoded_training = encoder.predict(x_train) + flat_enc_training = encoded_training.reshape(-1, encoded_training.shape[-1]) + codebook_indices_training = quantizer.get_code_indices(flat_enc_training) + codebook_indices_training = codebook_indices_training.numpy().reshape(encoded_training.shape[:-1]) + + encoded_validation = encoder.predict(x_validate) + flat_enc_validation = encoded_validation.reshape(-1, encoded_validation.shape[-1]) + codebook_indices_validation = quantizer.get_code_indices(flat_enc_validation) + codebook_indices_validation = codebook_indices_validation.numpy().reshape(encoded_validation.shape[:-1]) + + + pixelcnn = get_pixelcnn(num_embeddings=NUM_EMBEDDINGS) + pixelcnn.compile(optimizer=tf.keras.optimizers.Adam(), loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=['accuracy']) + pixelcnn.fit(x=codebook_indices_training, y=codebook_indices_training, batch_size=BATCH_SIZE, epochs=30, validation_data=(codebook_indices_validation, codebook_indices_validation)) + + + sampler = get_pixelcnn_sampler(pixelcnn) + + prior_batch_size = 10 + priors = np.zeros(shape=(prior_batch_size,) + pixelcnn.input_shape[1:]) + batch, rows, cols = priors.shape + + for row in range(rows): + for col in range(cols): + probs = sampler.predict(priors) + priors[:, row, col] = probs[:, row, col] + + pretrained_embeddings = quantizer.embeddings + prior_onehot = tf.one_hot(priors.astype("int32"), NUM_EMBEDDINGS).numpy() + quantized = tf.matmul(prior_onehot.astype("float32"), pretrained_embeddings, transpose_b=True) + quantized = tf.reshape(quantized, (-1, *(encoded_outputs.shape[1:]))) + + # Generate novel images. + generated_samples = decoder.predict(quantized) + + for i in range(batch): + plt.subplot(1, 2, 1) + plt.imshow(priors[i]) + plt.title("Code") + plt.axis("off") + + plt.subplot(1, 2, 2) + plt.imshow(generated_samples[i].squeeze() + 0.5) + plt.title("Generated Sample") + plt.axis("off") + plt.savefig('gen'+str(i)) + \ No newline at end of file From 949504eec59471aafc026e074cfe7aa476b71f5d Mon Sep 17 00:00:00 2001 From: jbart Date: Thu, 20 Oct 2022 18:08:03 +1000 Subject: [PATCH 19/26] code refactoring --- recognition/45819061-VQVAE-OASIS/train.py | 1 - 1 file changed, 1 deletion(-) diff --git a/recognition/45819061-VQVAE-OASIS/train.py b/recognition/45819061-VQVAE-OASIS/train.py index a1bc24c7c1..43de5f1b29 100644 --- a/recognition/45819061-VQVAE-OASIS/train.py +++ b/recognition/45819061-VQVAE-OASIS/train.py @@ -63,7 +63,6 @@ def test_step(self, x): LATENT_DIM = 8 NUM_EMBEDDINGS = 16 -x_train, x_test, x_validate = get_data() def train(x_train, x_test, x_validate): data_variance = np.var(x_train) From b075b272d46e6d1fcaeee05118b42c7989d6ac72 Mon Sep 17 00:00:00 2001 From: jbart Date: Thu, 20 Oct 2022 18:15:59 +1000 Subject: [PATCH 20/26] made driver script --- recognition/45819061-VQVAE-OASIS/driver.py | 11 +++++++++++ recognition/45819061-VQVAE-OASIS/train.py | 23 +++++++++++++++------- 2 files changed, 27 insertions(+), 7 deletions(-) create mode 100644 recognition/45819061-VQVAE-OASIS/driver.py diff --git a/recognition/45819061-VQVAE-OASIS/driver.py b/recognition/45819061-VQVAE-OASIS/driver.py new file mode 100644 index 0000000000..d571130c61 --- /dev/null +++ b/recognition/45819061-VQVAE-OASIS/driver.py @@ -0,0 +1,11 @@ +from matplotlib import pyplot as plt +import tensorflow as tf +import numpy as np +from dataset import BATCH_SIZE, get_data +from modules import VQVAE, get_pixelcnn, get_pixelcnn_sampler +from train import * + +x_train, x_test, x_validate = get_data() +model = train(x_train, x_test, x_validate) +demo_model(model, x_test) +pixelcnn = pixelcnn_train(model, x_train, x_test, x_validate) \ No newline at end of file diff --git a/recognition/45819061-VQVAE-OASIS/train.py b/recognition/45819061-VQVAE-OASIS/train.py index 43de5f1b29..d0019dd95c 100644 --- a/recognition/45819061-VQVAE-OASIS/train.py +++ b/recognition/45819061-VQVAE-OASIS/train.py @@ -64,14 +64,14 @@ def test_step(self, x): LATENT_DIM = 8 NUM_EMBEDDINGS = 16 -def train(x_train, x_test, x_validate): +def train(x_train, x_test, x_validate, epochs=30): data_variance = np.var(x_train) vqvae_trainer = VQVAETrainer(data_variance, LATENT_DIM, NUM_EMBEDDINGS) vqvae_trainer.compile(optimizer=tf.keras.optimizers.Adam()) history = vqvae_trainer.fit( x=x_train, - epochs=5, + epochs=epochs, batch_size=BATCH_SIZE, use_multiprocessing=True, validation_data=(x_validate, x_validate), @@ -103,7 +103,8 @@ def train(x_train, x_test, x_validate): plt.close() - vqvae_trainer.model.save('mymodel') + vqvae_trainer.model.save('vqvae') + return vqvae_trainer.model def show_subplot(original, reconstructed, i): plt.subplot(1, 2, 1) @@ -147,7 +148,7 @@ def demo_model(model, x_test): plt.savefig('embedding'+str(i)) plt.close() -def pixelcnn_train(model, x_train, x_test, x_validate): +def pixelcnn_train(model, x_train, x_test, x_validate, epochs=30): encoder = model.get_layer("encoder") quantizer = model.get_layer("vector_quantizer") decoder = model.get_layer("decoder") @@ -165,7 +166,13 @@ def pixelcnn_train(model, x_train, x_test, x_validate): pixelcnn = get_pixelcnn(num_embeddings=NUM_EMBEDDINGS) pixelcnn.compile(optimizer=tf.keras.optimizers.Adam(), loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=['accuracy']) - pixelcnn.fit(x=codebook_indices_training, y=codebook_indices_training, batch_size=BATCH_SIZE, epochs=30, validation_data=(codebook_indices_validation, codebook_indices_validation)) + pixelcnn.fit( + x=codebook_indices_training, + y=codebook_indices_training, + batch_size=BATCH_SIZE, + epochs=epochs, + validation_data=(codebook_indices_validation, codebook_indices_validation) + ) sampler = get_pixelcnn_sampler(pixelcnn) @@ -182,7 +189,7 @@ def pixelcnn_train(model, x_train, x_test, x_validate): pretrained_embeddings = quantizer.embeddings prior_onehot = tf.one_hot(priors.astype("int32"), NUM_EMBEDDINGS).numpy() quantized = tf.matmul(prior_onehot.astype("float32"), pretrained_embeddings, transpose_b=True) - quantized = tf.reshape(quantized, (-1, *(encoded_outputs.shape[1:]))) + quantized = tf.reshape(quantized, (-1, *(encoded_training.shape[1:]))) # Generate novel images. generated_samples = decoder.predict(quantized) @@ -198,4 +205,6 @@ def pixelcnn_train(model, x_train, x_test, x_validate): plt.title("Generated Sample") plt.axis("off") plt.savefig('gen'+str(i)) - \ No newline at end of file + + pixelcnn.save('pixelcnn') + return pixelcnn \ No newline at end of file From 646eee0c55716b62f04922f67e966c44450f206e Mon Sep 17 00:00:00 2001 From: jbart Date: Thu, 20 Oct 2022 16:27:08 +1100 Subject: [PATCH 21/26] refactoring making more modular and nonhardcoded --- recognition/45819061-VQVAE-OASIS/.gitignore | 3 +- recognition/45819061-VQVAE-OASIS/dataset.py | 14 +-- recognition/45819061-VQVAE-OASIS/driver.py | 40 ++++++- recognition/45819061-VQVAE-OASIS/modules.py | 19 ++-- recognition/45819061-VQVAE-OASIS/predict.py | 82 ++++++++++++++ recognition/45819061-VQVAE-OASIS/train.py | 118 +++++--------------- 6 files changed, 161 insertions(+), 115 deletions(-) diff --git a/recognition/45819061-VQVAE-OASIS/.gitignore b/recognition/45819061-VQVAE-OASIS/.gitignore index f301c338a8..5d44de01d3 100644 --- a/recognition/45819061-VQVAE-OASIS/.gitignore +++ b/recognition/45819061-VQVAE-OASIS/.gitignore @@ -3,7 +3,8 @@ data/ # constructed model -mymodel/ +vqvae/ +pixelcnn/ # all images **.png \ No newline at end of file diff --git a/recognition/45819061-VQVAE-OASIS/dataset.py b/recognition/45819061-VQVAE-OASIS/dataset.py index 9f4da6a8a8..0e616f7a8e 100644 --- a/recognition/45819061-VQVAE-OASIS/dataset.py +++ b/recognition/45819061-VQVAE-OASIS/dataset.py @@ -4,11 +4,7 @@ from tqdm import tqdm -BATCH_SIZE = 64 -DATA_DIR = 'data/keras_png_slices_data' -TRAIN_DATA = DATA_DIR + '/keras_png_slices_train' -TEST_DATA = DATA_DIR + '/keras_png_slices_test' -VALIDATE_DATA = DATA_DIR + '/keras_png_slices_validate' + def reader(f): return tf.io.decode_png(tf.io.read_file(f), channels=1) @@ -29,10 +25,10 @@ def load(files, use_multiprocessing=False): optional argument use_multiprocessing defaults to false can specify and integer to spawn child processes to load faster on machines with sufficient capabilities """ -def get_data(use_multiprocessing=False): - files_train = [os.path.join(TRAIN_DATA, f) for f in os.listdir(TRAIN_DATA) if os.path.isfile(os.path.join(TRAIN_DATA, f))] - files_test = [os.path.join(TEST_DATA, f) for f in os.listdir(TEST_DATA) if os.path.isfile(os.path.join(TEST_DATA, f))] - files_validate = [os.path.join(VALIDATE_DATA, f) for f in os.listdir(VALIDATE_DATA) if os.path.isfile(os.path.join(VALIDATE_DATA, f))] +def get_data(train_dir, test_dir, validate_dir, use_multiprocessing=False): + files_train = [os.path.join(train_dir, f) for f in os.listdir(train_dir) if os.path.isfile(os.path.join(train_dir, f))] + files_test = [os.path.join(test_dir, f) for f in os.listdir(test_dir) if os.path.isfile(os.path.join(test_dir, f))] + files_validate = [os.path.join(validate_dir, f) for f in os.listdir(validate_dir) if os.path.isfile(os.path.join(validate_dir, f))] print("Loading data") x_train = load(files_train, use_multiprocessing) diff --git a/recognition/45819061-VQVAE-OASIS/driver.py b/recognition/45819061-VQVAE-OASIS/driver.py index d571130c61..9ad37fd425 100644 --- a/recognition/45819061-VQVAE-OASIS/driver.py +++ b/recognition/45819061-VQVAE-OASIS/driver.py @@ -1,11 +1,39 @@ from matplotlib import pyplot as plt import tensorflow as tf import numpy as np -from dataset import BATCH_SIZE, get_data -from modules import VQVAE, get_pixelcnn, get_pixelcnn_sampler +from dataset import * +from modules import * +from predict import * from train import * -x_train, x_test, x_validate = get_data() -model = train(x_train, x_test, x_validate) -demo_model(model, x_test) -pixelcnn = pixelcnn_train(model, x_train, x_test, x_validate) \ No newline at end of file +VQVAE_DIR = "vqvae" +PIXELCNN_DIR = "pixelcnn" +LATENT_DIM = 32 +NUM_EMBEDDINGS = 64 +RESIDUAL_HIDDENS = 256 +EPOCHS = 50 +BATCH_SIZE = 64 +DATA_DIR = 'data/keras_png_slices_data' +TRAIN_DATA = DATA_DIR + '/keras_png_slices_train' +TEST_DATA = DATA_DIR + '/keras_png_slices_test' +VALIDATE_DATA = DATA_DIR + '/keras_png_slices_validate' + +#model = tf.keras.models.load_model(VQVAE_DIR) +#pixelcnn = tf.keras.models.load_model(PIXELCNN_DIR) + + +x_train, x_test, x_validate = get_data(TRAIN_DATA, TEST_DATA, VALIDATE_DATA) +model = train(x_train, x_test, x_validate, + epochs=EPOCHS, batch_size=BATCH_SIZE, out_dir=VQVAE_DIR, + latent_dim=LATENT_DIM, + num_embeddings=NUM_EMBEDDINGS, + residual_hiddens=RESIDUAL_HIDDENS +) + +demo_vqvae(model, x_test) + +pixelcnn = pixelcnn_train(model, x_train, x_test, x_validate, + epochs=EPOCHS, batch_size=BATCH_SIZE, out_dir=PIXELCNN_DIR, + num_embeddings=NUM_EMBEDDINGS +) +sample_images(model, pixelcnn) diff --git a/recognition/45819061-VQVAE-OASIS/modules.py b/recognition/45819061-VQVAE-OASIS/modules.py index 25833a1ccf..6f24de51e4 100644 --- a/recognition/45819061-VQVAE-OASIS/modules.py +++ b/recognition/45819061-VQVAE-OASIS/modules.py @@ -70,9 +70,9 @@ def build(self, input_shape): kernel_shape = self.conv.kernel.get_shape() self.mask = np.zeros(shape=kernel_shape) self.mask[:kernel_shape[0]//2, ...] = 1.0 - self.mask[kernel_shape[0]//2, kernel_shape[1]//2, ...] = 1.0 + self.mask[kernel_shape[0]//2, :kernel_shape[1]//2, ...] = 1.0 if self.mask == 'B': - self.mask[kernel_shape[0]//2, kernel_shape[1]//2, kernel_shape[1]//2, ...] = 1.0 + self.mask[kernel_shape[0]//2, kernel_shape[1]//2, ...] = 1.0 def call(self, inputs): self.conv.kernel.assign(self.conv.kernel * self.mask) @@ -87,18 +87,18 @@ def __init__(self, filters): def call(self, inputs): x = self.conv1(inputs) - x = self.pixel_cnn(x) + x = self.pixelcnn(x) x = self.conv2(x) - return Add([inputs, x]) + return Add()([inputs, x]) -def get_pixelcnn(input_shape, num_embeddings, num_residual_blocks=2, num_pixelcnn_layers=2, **kwargs): - pixelcnn_inputs = Input(shape=input_shape) +def get_pixelcnn(input_shape, num_embeddings, filters=256, num_residual_blocks=2, num_pixelcnn_layers=2, **kwargs): + pixelcnn_inputs = Input(shape=input_shape, dtype=tf.int32) onehot = tf.one_hot(pixelcnn_inputs, num_embeddings) - x = PixelCNN(mask_type='A', filters=128, kernel_size=7, activation='leaky_relu', padding='same')(onehot) + x = PixelCNN(mask_type='A', filters=filters, kernel_size=7, activation='leaky_relu', padding='same')(onehot) for _ in range(num_residual_blocks): - x = ResidualBlock(filters=128) + x = ResidualBlock(filters=filters)(x) for _ in range(num_pixelcnn_layers): - x = PixelCNN(mask_type='B', filters=128, kernel_size=1, strides=1, activation='leaky_relu', padding='same') + x = PixelCNN(mask_type='B', filters=filters, kernel_size=1, strides=1, activation='leaky_relu', padding='same')(x) out = Conv2D(filters=num_embeddings, kernel_size=1, strides=1, padding="valid")(x) return tf.keras.Model(pixelcnn_inputs, out, name='pixelcnn') @@ -107,6 +107,7 @@ class VQVAE(tf.keras.Model): def __init__(self, latent_dim=32, num_embeddings=64, input_shape=(256, 256, 1), residual_hiddens=64): super().__init__() self.latent_dim = latent_dim + self.num_embeddings = num_embeddings # Build encoder encoder_in = Input(shape=input_shape) diff --git a/recognition/45819061-VQVAE-OASIS/predict.py b/recognition/45819061-VQVAE-OASIS/predict.py index e69de29bb2..e9520cbeaa 100644 --- a/recognition/45819061-VQVAE-OASIS/predict.py +++ b/recognition/45819061-VQVAE-OASIS/predict.py @@ -0,0 +1,82 @@ +from modules import get_pixelcnn_sampler +import tensorflow as tf +import numpy as np +from matplotlib import pyplot as plt + +def show_subplot(original, reconstructed, i): + plt.subplot(1, 2, 1) + plt.imshow(original.squeeze() + 0.5) + plt.title("Original") + plt.axis("off") + + plt.subplot(1, 2, 2) + plt.imshow(reconstructed.squeeze() + 0.5) + plt.title("Reconstructed") + plt.axis("off") + plt.savefig('fig'+str(i)) + plt.close() + +def demo_vqvae(model, x_test): + idx = np.random.choice(len(x_test), 10) + test_images = x_test[idx] + reconstructions_test = model.predict(test_images) + + for i, (test_image, reconstructed_image) in enumerate(zip(test_images, reconstructions_test)): + show_subplot(test_image, reconstructed_image, i) + + encoder = model.get_layer("encoder") + quantizer = model.get_layer("vector_quantizer") + + encoded_outputs = encoder.predict(test_images) + flat_enc_outputs = encoded_outputs.reshape(-1, encoded_outputs.shape[-1]) + codebook_indices = quantizer.get_code_indices(flat_enc_outputs) + codebook_indices = codebook_indices.numpy().reshape(encoded_outputs.shape[:-1]) + + for i in range(len(test_images)): + plt.subplot(1, 2, 1) + plt.imshow(test_images[i].squeeze() + 0.5) + plt.title("Original") + plt.axis("off") + + plt.subplot(1, 2, 2) + plt.imshow(codebook_indices[i]) + plt.title("Code") + plt.axis("off") + plt.savefig('embedding'+str(i)) + plt.close() + + +def sample_images(vqvae, pixelcnn): + decoder = vqvae.get_layer('decoder') + quantizer = vqvae.get_layer('vector_quantizer') + sampler = get_pixelcnn_sampler(pixelcnn) + + prior_batch_size = 10 + priors = np.zeros(shape=(prior_batch_size,) + pixelcnn.input_shape[1:]) + batch, rows, cols = priors.shape + + for row in range(rows): + for col in range(cols): + probs = sampler.predict(priors, verbose=0) + priors[:, row, col] = probs[:, row, col] + + pretrained_embeddings = quantizer.embeddings + prior_onehot = tf.one_hot(priors.astype("int32"), vqvae.num_embeddings).numpy() + quantized = tf.matmul(prior_onehot.astype("float32"), pretrained_embeddings, transpose_b=True) + quantized = tf.reshape(quantized, (-1, *(vqvae.get_layer('encoder').compute_output_shape((1, 256, 256, 1))[1:]))) + + # Generate novel images. + generated_samples = decoder.predict(quantized) + + for i in range(batch): + plt.subplot(1, 2, 1) + plt.imshow(priors[i]) + plt.title("Code") + plt.axis("off") + + plt.subplot(1, 2, 2) + plt.imshow(generated_samples[i].squeeze() + 0.5) + plt.title("Generated Sample") + plt.axis("off") + plt.savefig('gen'+str(i)) + plt.close() \ No newline at end of file diff --git a/recognition/45819061-VQVAE-OASIS/train.py b/recognition/45819061-VQVAE-OASIS/train.py index d0019dd95c..3490d7ba8c 100644 --- a/recognition/45819061-VQVAE-OASIS/train.py +++ b/recognition/45819061-VQVAE-OASIS/train.py @@ -1,18 +1,17 @@ from matplotlib import pyplot as plt import tensorflow as tf import numpy as np -from dataset import BATCH_SIZE, get_data -from modules import VQVAE, get_pixelcnn, get_pixelcnn_sampler +from modules import VQVAE, get_pixelcnn class VQVAETrainer (tf.keras.models.Model): - def __init__(self, train_variance, latent_dim=32, num_embeddings=128): + def __init__(self, train_variance, latent_dim=32, num_embeddings=128, residual_hiddens=256): super(VQVAETrainer, self).__init__() self.train_variance = train_variance self.latent_dim = latent_dim self.num_embeddings = num_embeddings - self.model = VQVAE(self.latent_dim, self.num_embeddings, (256, 256, 1), residual_hiddens=16) + self.model = VQVAE(self.latent_dim, self.num_embeddings, (256, 256, 1), residual_hiddens=residual_hiddens) self.total_loss_tracker = tf.keras.metrics.Mean(name='total_loss') self.reconstruction_loss_tracker = tf.keras.metrics.Mean(name='reconstruction_loss') self.vq_loss_tracker = tf.keras.metrics.Mean(name='vq_loss') @@ -61,25 +60,24 @@ def test_step(self, x): "ssim": self.ssim_tracker.result() } -LATENT_DIM = 8 -NUM_EMBEDDINGS = 16 -def train(x_train, x_test, x_validate, epochs=30): + +def train(x_train, x_test, x_validate, epochs=30, batch_size=16, out_dir='vqvae', **kwargs): data_variance = np.var(x_train) - vqvae_trainer = VQVAETrainer(data_variance, LATENT_DIM, NUM_EMBEDDINGS) + vqvae_trainer = VQVAETrainer(data_variance, **kwargs) vqvae_trainer.compile(optimizer=tf.keras.optimizers.Adam()) history = vqvae_trainer.fit( x=x_train, epochs=epochs, - batch_size=BATCH_SIZE, + batch_size=batch_size, use_multiprocessing=True, validation_data=(x_validate, x_validate), shuffle=True, validation_freq=1 ) - eval_results = vqvae_trainer.evaluate(x_test, x_test, batch_size=BATCH_SIZE) + eval_results = vqvae_trainer.evaluate(x_test, x_test, batch_size=batch_size) print("Structured similarity score:", eval_results) @@ -96,63 +94,25 @@ def train(x_train, x_test, x_validate, epochs=30): plt.close() - + plt.plot(history.history['ssim']) + plt.plot(history.history['val_ssim']) + plt.title('Model ssim') + plt.ylabel('ssim') + plt.xlabel('epoch') plt.ylim((0, 1)) plt.legend(['training set', 'validation set']) plt.savefig('ssim') plt.close() - - vqvae_trainer.model.save('vqvae') + vqvae_trainer.model.summary() + vqvae_trainer.model.save(out_dir) return vqvae_trainer.model -def show_subplot(original, reconstructed, i): - plt.subplot(1, 2, 1) - plt.imshow(original.squeeze() + 0.5) - plt.title("Original") - plt.axis("off") - - plt.subplot(1, 2, 2) - plt.imshow(reconstructed.squeeze() + 0.5) - plt.title("Reconstructed") - plt.axis("off") - plt.savefig('fig'+str(i)) - plt.close() - -def demo_model(model, x_test): - idx = np.random.choice(len(x_test), 10) - test_images = x_test[idx] - reconstructions_test = model.predict(test_images) - - for i, (test_image, reconstructed_image) in enumerate(zip(test_images, reconstructions_test)): - show_subplot(test_image, reconstructed_image, i) +def pixelcnn_train(model, x_train, x_test, x_validate, epochs=30, batch_size=16, out_dir='pixelcnn', **kwargs): encoder = model.get_layer("encoder") quantizer = model.get_layer("vector_quantizer") - encoded_outputs = encoder.predict(test_images) - flat_enc_outputs = encoded_outputs.reshape(-1, encoded_outputs.shape[-1]) - codebook_indices = quantizer.get_code_indices(flat_enc_outputs) - codebook_indices = codebook_indices.numpy().reshape(encoded_outputs.shape[:-1]) - - for i in range(len(test_images)): - plt.subplot(1, 2, 1) - plt.imshow(test_images[i].squeeze() + 0.5) - plt.title("Original") - plt.axis("off") - - plt.subplot(1, 2, 2) - plt.imshow(codebook_indices[i]) - plt.title("Code") - plt.axis("off") - plt.savefig('embedding'+str(i)) - plt.close() - -def pixelcnn_train(model, x_train, x_test, x_validate, epochs=30): - encoder = model.get_layer("encoder") - quantizer = model.get_layer("vector_quantizer") - decoder = model.get_layer("decoder") - encoded_training = encoder.predict(x_train) flat_enc_training = encoded_training.reshape(-1, encoded_training.shape[-1]) codebook_indices_training = quantizer.get_code_indices(flat_enc_training) @@ -164,47 +124,25 @@ def pixelcnn_train(model, x_train, x_test, x_validate, epochs=30): codebook_indices_validation = codebook_indices_validation.numpy().reshape(encoded_validation.shape[:-1]) - pixelcnn = get_pixelcnn(num_embeddings=NUM_EMBEDDINGS) + pixelcnn = get_pixelcnn(encoded_training.shape[1:-1], **kwargs) pixelcnn.compile(optimizer=tf.keras.optimizers.Adam(), loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=['accuracy']) - pixelcnn.fit( + history = pixelcnn.fit( x=codebook_indices_training, y=codebook_indices_training, - batch_size=BATCH_SIZE, + batch_size=batch_size, epochs=epochs, validation_data=(codebook_indices_validation, codebook_indices_validation) ) - - sampler = get_pixelcnn_sampler(pixelcnn) - - prior_batch_size = 10 - priors = np.zeros(shape=(prior_batch_size,) + pixelcnn.input_shape[1:]) - batch, rows, cols = priors.shape - - for row in range(rows): - for col in range(cols): - probs = sampler.predict(priors) - priors[:, row, col] = probs[:, row, col] - - pretrained_embeddings = quantizer.embeddings - prior_onehot = tf.one_hot(priors.astype("int32"), NUM_EMBEDDINGS).numpy() - quantized = tf.matmul(prior_onehot.astype("float32"), pretrained_embeddings, transpose_b=True) - quantized = tf.reshape(quantized, (-1, *(encoded_training.shape[1:]))) - - # Generate novel images. - generated_samples = decoder.predict(quantized) - - for i in range(batch): - plt.subplot(1, 2, 1) - plt.imshow(priors[i]) - plt.title("Code") - plt.axis("off") - - plt.subplot(1, 2, 2) - plt.imshow(generated_samples[i].squeeze() + 0.5) - plt.title("Generated Sample") - plt.axis("off") - plt.savefig('gen'+str(i)) + plt.plot(history.history['accuracy']) + plt.plot(history.history['val_accuracy']) + plt.title('Model accuracy') + plt.ylabel('accuracy') + plt.xlabel('epoch') + plt.ylim((0, 1)) + plt.legend(['training set', 'validation set']) + plt.savefig('pcnnacc') + plt.close() pixelcnn.save('pixelcnn') return pixelcnn \ No newline at end of file From 41cc6bf33d77aaf109c6bc054242613f3cca7135 Mon Sep 17 00:00:00 2001 From: jbart Date: Thu, 20 Oct 2022 21:43:08 +1100 Subject: [PATCH 22/26] code refactoring and hyperparamerts --- recognition/45819061-VQVAE-OASIS/driver.py | 9 +++--- recognition/45819061-VQVAE-OASIS/modules.py | 27 +++++++++--------- recognition/45819061-VQVAE-OASIS/train.py | 31 ++++++++++++--------- 3 files changed, 36 insertions(+), 31 deletions(-) diff --git a/recognition/45819061-VQVAE-OASIS/driver.py b/recognition/45819061-VQVAE-OASIS/driver.py index 9ad37fd425..3700247df7 100644 --- a/recognition/45819061-VQVAE-OASIS/driver.py +++ b/recognition/45819061-VQVAE-OASIS/driver.py @@ -8,10 +8,10 @@ VQVAE_DIR = "vqvae" PIXELCNN_DIR = "pixelcnn" -LATENT_DIM = 32 -NUM_EMBEDDINGS = 64 -RESIDUAL_HIDDENS = 256 -EPOCHS = 50 +LATENT_DIM = 16 +NUM_EMBEDDINGS = 32 +RESIDUAL_HIDDENS = 64 +EPOCHS = 30 BATCH_SIZE = 64 DATA_DIR = 'data/keras_png_slices_data' TRAIN_DATA = DATA_DIR + '/keras_png_slices_train' @@ -21,7 +21,6 @@ #model = tf.keras.models.load_model(VQVAE_DIR) #pixelcnn = tf.keras.models.load_model(PIXELCNN_DIR) - x_train, x_test, x_validate = get_data(TRAIN_DATA, TEST_DATA, VALIDATE_DATA) model = train(x_train, x_test, x_validate, epochs=EPOCHS, batch_size=BATCH_SIZE, out_dir=VQVAE_DIR, diff --git a/recognition/45819061-VQVAE-OASIS/modules.py b/recognition/45819061-VQVAE-OASIS/modules.py index 6f24de51e4..493a54ed6c 100644 --- a/recognition/45819061-VQVAE-OASIS/modules.py +++ b/recognition/45819061-VQVAE-OASIS/modules.py @@ -6,8 +6,8 @@ import tensorflow_probability as tfp class VectorQuantizer(Layer): - def __init__(self, num_embeddings, embedding_dim, beta=0.25, name="VQ"): - super().__init__() + def __init__(self, num_embeddings, embedding_dim, beta=0.25, name="VQ", **kwargs): + super().__init__(**kwargs) self.num_embeddings = num_embeddings self.embedding_dim = embedding_dim self.beta = beta @@ -91,7 +91,7 @@ def call(self, inputs): x = self.conv2(x) return Add()([inputs, x]) -def get_pixelcnn(input_shape, num_embeddings, filters=256, num_residual_blocks=2, num_pixelcnn_layers=2, **kwargs): +def get_pixelcnn(input_shape, num_embeddings, filters=64, num_residual_blocks=2, num_pixelcnn_layers=2, **kwargs): pixelcnn_inputs = Input(shape=input_shape, dtype=tf.int32) onehot = tf.one_hot(pixelcnn_inputs, num_embeddings) x = PixelCNN(mask_type='A', filters=filters, kernel_size=7, activation='leaky_relu', padding='same')(onehot) @@ -111,19 +111,20 @@ def __init__(self, latent_dim=32, num_embeddings=64, input_shape=(256, 256, 1), # Build encoder encoder_in = Input(shape=input_shape) - x1 = Conv2D(16, 4, strides=2, activation='leaky_relu', padding='same')(encoder_in) - x2 = Conv2D(32, 4, strides=2, activation='leaky_relu', padding='same')(x1) - x3 = resblock(x2, residual_hiddens) - x4 = resblock(x3, residual_hiddens) - encoder_out = Conv2D(latent_dim, 1, padding="same")(x4) + x = Conv2D(32, 4, strides=2, activation='leaky_relu', padding='same')(encoder_in) + x = Conv2D(64, 4, strides=2, activation='leaky_relu', padding='same')(x) + x = resblock(x, residual_hiddens) + x = resblock(x, residual_hiddens) + encoder_out = Conv2D(latent_dim, 1, padding="same")(x) self.encoder = tf.keras.Model(encoder_in, encoder_out, name='encoder') # Build decoder decoder_in = Input(shape=self.encoder.output.shape[1:]) - y1 = resblock(decoder_in, residual_hiddens) - y2 = resblock(y1, residual_hiddens) - y3 = Conv2DTranspose(32, 4, strides=2, activation='leaky_relu', padding='same')(y2) - decoder_out = Conv2DTranspose(1, 4, strides=2, activation='leaky_relu', padding='same')(y3) + y = resblock(decoder_in, residual_hiddens) + y = resblock(y, residual_hiddens) + y = Conv2DTranspose(64, 4, strides=2, activation='leaky_relu', padding='same')(y) + y = Conv2DTranspose(32, 4, strides=2, activation='leaky_relu', padding='same')(y) + decoder_out = Conv2DTranspose(1, 3, strides=1, activation='leaky_relu', padding='same')(y) self.decoder = tf.keras.Model(decoder_in, decoder_out, name='decoder') # Add VQ layer @@ -132,7 +133,7 @@ def __init__(self, latent_dim=32, num_embeddings=64, input_shape=(256, 256, 1), #self.summary() - def call(self, x, training=False): + def call(self, x): x = self.encoder(x) quantized = self.vq_layer(x) return self.decoder(quantized) diff --git a/recognition/45819061-VQVAE-OASIS/train.py b/recognition/45819061-VQVAE-OASIS/train.py index 3490d7ba8c..1b00a03758 100644 --- a/recognition/45819061-VQVAE-OASIS/train.py +++ b/recognition/45819061-VQVAE-OASIS/train.py @@ -2,6 +2,7 @@ import tensorflow as tf import numpy as np from modules import VQVAE, get_pixelcnn +from tqdm import tqdm class VQVAETrainer (tf.keras.models.Model): @@ -22,8 +23,7 @@ def metrics(self): return [ self.total_loss_tracker, self.reconstruction_loss_tracker, - self.vq_loss_tracker, - self.ssim_tracker + self.vq_loss_tracker ] def train_step(self, x): @@ -66,12 +66,11 @@ def train(x_train, x_test, x_validate, epochs=30, batch_size=16, out_dir='vqvae' data_variance = np.var(x_train) vqvae_trainer = VQVAETrainer(data_variance, **kwargs) - vqvae_trainer.compile(optimizer=tf.keras.optimizers.Adam()) + vqvae_trainer.compile(optimizer=tf.keras.optimizers.Adam(3e-4)) history = vqvae_trainer.fit( x=x_train, epochs=epochs, batch_size=batch_size, - use_multiprocessing=True, validation_data=(x_validate, x_validate), shuffle=True, validation_freq=1 @@ -113,15 +112,21 @@ def pixelcnn_train(model, x_train, x_test, x_validate, epochs=30, batch_size=16, encoder = model.get_layer("encoder") quantizer = model.get_layer("vector_quantizer") - encoded_training = encoder.predict(x_train) - flat_enc_training = encoded_training.reshape(-1, encoded_training.shape[-1]) - codebook_indices_training = quantizer.get_code_indices(flat_enc_training) - codebook_indices_training = codebook_indices_training.numpy().reshape(encoded_training.shape[:-1]) - - encoded_validation = encoder.predict(x_validate) - flat_enc_validation = encoded_validation.reshape(-1, encoded_validation.shape[-1]) - codebook_indices_validation = quantizer.get_code_indices(flat_enc_validation) - codebook_indices_validation = codebook_indices_validation.numpy().reshape(encoded_validation.shape[:-1]) + codebook_indices_training = [] + codebook_indices_validation = [] + for i in tqdm(range(x_train.shape[0]//batch_size)): + encoded_training = encoder.predict(x_train[i*batch_size : (i+1)*batch_size], verbose=0) + x = encoded_training.reshape(-1, encoded_training.shape[-1]) + x = quantizer.get_code_indices(x) + codebook_indices_training.extend(x.numpy().reshape(encoded_training.shape[:-1])) + for j in tqdm(range(x_validate.shape[0]//batch_size)): + encoded_validation = encoder.predict(x_validate[j*batch_size : (j+1)*batch_size], verbose=0) + x = encoded_validation.reshape(-1, encoded_validation.shape[-1]) + x = quantizer.get_code_indices(x) + codebook_indices_validation.extend(x.numpy().reshape(encoded_validation.shape[:-1])) + + codebook_indices_training = np.asarray(codebook_indices_training) + codebook_indices_validation = np.asarray(codebook_indices_validation) pixelcnn = get_pixelcnn(encoded_training.shape[1:-1], **kwargs) From d7d195216c20fa71bc0b7380e9a3b42d72d1e7ca Mon Sep 17 00:00:00 2001 From: jbart Date: Thu, 20 Oct 2022 22:18:48 +1100 Subject: [PATCH 23/26] made readme and edited model parameters --- recognition/45819061-VQVAE-OASIS/README.md | 23 +++++++++++++++++++++ recognition/45819061-VQVAE-OASIS/driver.py | 8 +++---- recognition/45819061-VQVAE-OASIS/modules.py | 4 ++-- 3 files changed, 29 insertions(+), 6 deletions(-) diff --git a/recognition/45819061-VQVAE-OASIS/README.md b/recognition/45819061-VQVAE-OASIS/README.md index e69de29bb2..4e18605b7e 100644 --- a/recognition/45819061-VQVAE-OASIS/README.md +++ b/recognition/45819061-VQVAE-OASIS/README.md @@ -0,0 +1,23 @@ +# Vector Quantized - Variational Autoencoder for Generation of OASIS Brain data + + +Here we construct a Vector Quantized - Variational Autoencoder (VQ-VAE)model trained on the OASIS brain dataset to construct a generative model which can reproduce and generate brain scan images by sampling a discrete latent space much smaller than the desired images. + +# Problem +Development in computer technology in recognising and classifying brain disease is a growing field which aims to develop effective computer models that can recognise and classify information in brain scans to identify problems and characteristics of a patients brain. A limitation in the effectiveness of this technology currently stems from an insufficient amount of data to train these classification models and thus the models that are produced are undertrained and ineffective. We use a VQ-VAE as a way of learning the structure and characteristics of brain scans and encoding into a smaller compact latent space. We learn patterns and structures of this latent space and train a generative model that generates clear and new brain scans which can be used to train these classification models. + +# The Model +The model we train is a VQ-VAE consisting of an encoder feeding into a vector quantizer layer whose output then feeds into the decoder. The encoder and decoder are both made of to convolutional blocks and two residual layers. The convolutional layers are 4x4 windows with stride 2 and reduce the image data by a factor of four before passing to the residual layers. We use filter sizes 32, 64. Next, the residual layers are two convolutions (3x3 and 1x1) with filter size 32 and leaky relu activations between. The output of the residual block is the sum of the out put of this convolution wth the original data. Vector Quantizer layer consists of a codebook of embedding codes, the VQ layer takes the output of the encoder and computes relative distance to these embeddings to find the images supposed place in the latent space. VQ can be thought of as being given the identified key characteristics of the image by the encoder and then the VQ assigns the output the indices where such information is stored in the latent. Finally a decdoer takes a set of odewords from the latent space and via 2 transposed convolutional layers and residual blocks the image is rebuilt. During training the VQVAE attempts to maintain the integrity of its vector quantisation of the latent space and its reproduction of the image. +For generation of images we train a PixelCNN on the latent space discovered by the VQVAE to sample the latent space and discover new codes t pass to the decoder to generate realistic brain scans. + +# Requirements +Although versioning may not be strict this is what was used in this case. +- tensorflow = 2.10.0 +- tensorflow-probability = 0.18.0 +- tqdm = 4.64.1 +- matplotlib = 3.6.1 + +# Training +We train the models with Adam optimizers tracking commitment loss, codebook loss and reconstruction loss in the case of the VQVAE, and categorical entropy in the case of the pixelcnn. Filter sizes for each convolutional layer in teh sytem must be sufficiently large to avoid the model training to an unusable state as was the case below. + +![0.5](losses.png) ![](fig9.png) diff --git a/recognition/45819061-VQVAE-OASIS/driver.py b/recognition/45819061-VQVAE-OASIS/driver.py index 3700247df7..a6176504a9 100644 --- a/recognition/45819061-VQVAE-OASIS/driver.py +++ b/recognition/45819061-VQVAE-OASIS/driver.py @@ -8,10 +8,10 @@ VQVAE_DIR = "vqvae" PIXELCNN_DIR = "pixelcnn" -LATENT_DIM = 16 -NUM_EMBEDDINGS = 32 -RESIDUAL_HIDDENS = 64 -EPOCHS = 30 +LATENT_DIM = 32 +NUM_EMBEDDINGS = 64 +RESIDUAL_HIDDENS = 256 +EPOCHS = 75 BATCH_SIZE = 64 DATA_DIR = 'data/keras_png_slices_data' TRAIN_DATA = DATA_DIR + '/keras_png_slices_train' diff --git a/recognition/45819061-VQVAE-OASIS/modules.py b/recognition/45819061-VQVAE-OASIS/modules.py index 493a54ed6c..258c612008 100644 --- a/recognition/45819061-VQVAE-OASIS/modules.py +++ b/recognition/45819061-VQVAE-OASIS/modules.py @@ -91,7 +91,7 @@ def call(self, inputs): x = self.conv2(x) return Add()([inputs, x]) -def get_pixelcnn(input_shape, num_embeddings, filters=64, num_residual_blocks=2, num_pixelcnn_layers=2, **kwargs): +def get_pixelcnn(input_shape, num_embeddings, filters=256, num_residual_blocks=3, num_pixelcnn_layers=3, **kwargs): pixelcnn_inputs = Input(shape=input_shape, dtype=tf.int32) onehot = tf.one_hot(pixelcnn_inputs, num_embeddings) x = PixelCNN(mask_type='A', filters=filters, kernel_size=7, activation='leaky_relu', padding='same')(onehot) @@ -124,7 +124,7 @@ def __init__(self, latent_dim=32, num_embeddings=64, input_shape=(256, 256, 1), y = resblock(y, residual_hiddens) y = Conv2DTranspose(64, 4, strides=2, activation='leaky_relu', padding='same')(y) y = Conv2DTranspose(32, 4, strides=2, activation='leaky_relu', padding='same')(y) - decoder_out = Conv2DTranspose(1, 3, strides=1, activation='leaky_relu', padding='same')(y) + decoder_out = Conv2DTranspose(1, 3, strides=1, activation='tanh', padding='same')(y) self.decoder = tf.keras.Model(decoder_in, decoder_out, name='decoder') # Add VQ layer From 18144d2aac4840a197f124d75b182dabed951223 Mon Sep 17 00:00:00 2001 From: jbart Date: Fri, 21 Oct 2022 19:44:12 +1100 Subject: [PATCH 24/26] hyperparameters and vqvae residual block edit --- recognition/45819061-VQVAE-OASIS/dataset.py | 6 +- recognition/45819061-VQVAE-OASIS/driver.py | 7 +- recognition/45819061-VQVAE-OASIS/modules.py | 97 ++++++++++----------- recognition/45819061-VQVAE-OASIS/train.py | 29 +++--- 4 files changed, 70 insertions(+), 69 deletions(-) diff --git a/recognition/45819061-VQVAE-OASIS/dataset.py b/recognition/45819061-VQVAE-OASIS/dataset.py index 0e616f7a8e..e739665e00 100644 --- a/recognition/45819061-VQVAE-OASIS/dataset.py +++ b/recognition/45819061-VQVAE-OASIS/dataset.py @@ -36,8 +36,8 @@ def get_data(train_dir, test_dir, validate_dir, use_multiprocessing=False): x_validate = load(files_validate, use_multiprocessing) # scale image data to [-1, 1] range - x_train = x_train/127.5 - 1.0 - x_test = x_test/127.5 - 1.0 - x_validate = x_validate/127.5 - 1.0 + x_train = x_train/255.0 - 0.5 + x_test = x_test/255.0 - 0.5 + x_validate = x_validate/255.0 - 0.5 return x_train, x_test, x_validate diff --git a/recognition/45819061-VQVAE-OASIS/driver.py b/recognition/45819061-VQVAE-OASIS/driver.py index a6176504a9..c4c1b27e3e 100644 --- a/recognition/45819061-VQVAE-OASIS/driver.py +++ b/recognition/45819061-VQVAE-OASIS/driver.py @@ -8,16 +8,15 @@ VQVAE_DIR = "vqvae" PIXELCNN_DIR = "pixelcnn" -LATENT_DIM = 32 +LATENT_DIM = 16 NUM_EMBEDDINGS = 64 -RESIDUAL_HIDDENS = 256 -EPOCHS = 75 +RESIDUAL_HIDDENS = 128 +EPOCHS = 50 BATCH_SIZE = 64 DATA_DIR = 'data/keras_png_slices_data' TRAIN_DATA = DATA_DIR + '/keras_png_slices_train' TEST_DATA = DATA_DIR + '/keras_png_slices_test' VALIDATE_DATA = DATA_DIR + '/keras_png_slices_validate' - #model = tf.keras.models.load_model(VQVAE_DIR) #pixelcnn = tf.keras.models.load_model(PIXELCNN_DIR) diff --git a/recognition/45819061-VQVAE-OASIS/modules.py b/recognition/45819061-VQVAE-OASIS/modules.py index 258c612008..22b96a33bd 100644 --- a/recognition/45819061-VQVAE-OASIS/modules.py +++ b/recognition/45819061-VQVAE-OASIS/modules.py @@ -1,4 +1,5 @@ from base64 import decode +import code from matplotlib.cbook import flatten import numpy as np import tensorflow as tf @@ -15,7 +16,10 @@ def __init__(self, num_embeddings, embedding_dim, beta=0.25, name="VQ", **kwargs # Initialise flattened embeddings w_init = tf.random_uniform_initializer() self.embeddings = tf.Variable( - initial_value=w_init(shape=(self.embedding_dim, self.num_embeddings), dtype='float32'), + initial_value=w_init( + shape=(self.embedding_dim, self.num_embeddings), + dtype='float32' + ), trainable=True, name=name ) @@ -28,11 +32,11 @@ def call(self, x): encoding_indices = self.get_code_indices(flattened) encodings = tf.one_hot(encoding_indices, self.num_embeddings) quantized = tf.matmul(encodings, self.embeddings, transpose_b=True) - quantized = tf.reshape(quantized, input_shape) + + commitment_loss = tf.nn.l2_loss(tf.stop_gradient(quantized) - x)**2 + codebook_loss = tf.nn.l2_loss(quantized - tf.stop_gradient(x))**2 - commitment_loss = tf.norm(tf.stop_gradient(quantized) - x)**2 - codebook_loss = tf.norm(tf.stop_gradient(x) - quantized)**2 self.add_loss(self.beta * commitment_loss + codebook_loss) quantized = x + tf.stop_gradient(quantized - x) @@ -43,7 +47,7 @@ def get_code_indices(self, flattened_inputs): distances = ( tf.reduce_sum(flattened_inputs**2, axis=1, keepdims=True) + tf.reduce_sum(self.embeddings**2, axis=0) - - 2 * similarity + - 2 * similarity ) encoding_indices = tf.argmin(distances, axis=1) @@ -51,13 +55,10 @@ def get_code_indices(self, flattened_inputs): def resblock(x, filters=256): - skip = Conv2D(filters, 1, strides=1, padding='same')(x) - x = Conv2D(filters, 3, strides=1, padding='same')(x) - x = ReLU()(x) - x = Conv2D(filters, 1, strides=1, padding='same')(x) - out = Add()([x, skip]) - return ReLU()(out) - + xconv = Conv2D(filters, 3, strides=1, activation='relu', padding='same')(x) + xconv = Conv2D(x.shape[-1], 1, strides=1, padding='same')(xconv) + out = Add()([x, xconv]) + return ReLU()(out) class PixelCNN(Layer): def __init__(self, mask_type, **kwargs): @@ -89,54 +90,50 @@ def call(self, inputs): x = self.conv1(inputs) x = self.pixelcnn(x) x = self.conv2(x) - return Add()([inputs, x]) + return tf.add(inputs, x) -def get_pixelcnn(input_shape, num_embeddings, filters=256, num_residual_blocks=3, num_pixelcnn_layers=3, **kwargs): +def get_pixelcnn(input_shape, num_embeddings, filters=128, num_residual_blocks=2, num_pixelcnn_layers=2, **kwargs): pixelcnn_inputs = Input(shape=input_shape, dtype=tf.int32) onehot = tf.one_hot(pixelcnn_inputs, num_embeddings) - x = PixelCNN(mask_type='A', filters=filters, kernel_size=7, activation='leaky_relu', padding='same')(onehot) + x = PixelCNN(mask_type='A', filters=filters, kernel_size=8, activation='leaky_relu', padding='same')(onehot) for _ in range(num_residual_blocks): x = ResidualBlock(filters=filters)(x) for _ in range(num_pixelcnn_layers): - x = PixelCNN(mask_type='B', filters=filters, kernel_size=1, strides=1, activation='leaky_relu', padding='same')(x) + x = PixelCNN(mask_type='B', filters=filters, kernel_size=1, strides=1, activation='leaky_relu', padding='valid')(x) out = Conv2D(filters=num_embeddings, kernel_size=1, strides=1, padding="valid")(x) return tf.keras.Model(pixelcnn_inputs, out, name='pixelcnn') -class VQVAE(tf.keras.Model): - def __init__(self, latent_dim=32, num_embeddings=64, input_shape=(256, 256, 1), residual_hiddens=64): - super().__init__() - self.latent_dim = latent_dim - self.num_embeddings = num_embeddings - - # Build encoder - encoder_in = Input(shape=input_shape) - x = Conv2D(32, 4, strides=2, activation='leaky_relu', padding='same')(encoder_in) - x = Conv2D(64, 4, strides=2, activation='leaky_relu', padding='same')(x) - x = resblock(x, residual_hiddens) - x = resblock(x, residual_hiddens) - encoder_out = Conv2D(latent_dim, 1, padding="same")(x) - self.encoder = tf.keras.Model(encoder_in, encoder_out, name='encoder') - - # Build decoder - decoder_in = Input(shape=self.encoder.output.shape[1:]) - y = resblock(decoder_in, residual_hiddens) - y = resblock(y, residual_hiddens) - y = Conv2DTranspose(64, 4, strides=2, activation='leaky_relu', padding='same')(y) - y = Conv2DTranspose(32, 4, strides=2, activation='leaky_relu', padding='same')(y) - decoder_out = Conv2DTranspose(1, 3, strides=1, activation='tanh', padding='same')(y) - self.decoder = tf.keras.Model(decoder_in, decoder_out, name='decoder') - - # Add VQ layer - self.vq_layer = VectorQuantizer(num_embeddings=num_embeddings, embedding_dim=latent_dim, name='vq') - - #self.summary() - - - def call(self, x): - x = self.encoder(x) - quantized = self.vq_layer(x) - return self.decoder(quantized) +def get_vqvae(latent_dim=16, num_embeddings=64, input_shape=(256, 256, 1), residual_hiddens=64): + latent_dim = latent_dim + num_embeddings = num_embeddings + + # Build encoder + encoder_in = Input(shape=input_shape) + x = Conv2D(32, 4, strides=2, activation='leaky_relu', padding='same')(encoder_in) + x = Conv2D(residual_hiddens, 4, strides=2, activation='leaky_relu', padding='same')(x) + x = resblock(x, residual_hiddens) + x = resblock(x, residual_hiddens) + encoder_out = Conv2D(latent_dim, 1, padding="same")(x) + encoder = tf.keras.Model(encoder_in, encoder_out, name='encoder') + + # Build decoder + decoder_in = Input(shape=encoder.output.shape[1:]) + y = Conv2DTranspose(32, 4, strides=2, activation='leaky_relu', padding='same')(decoder_in) + y = Conv2DTranspose(residual_hiddens, 4, strides=2, activation='leaky_relu', padding='same')(y) + y = resblock(y, residual_hiddens) + y = resblock(y, residual_hiddens) + decoder_out = Conv2DTranspose(1, 3, strides=1, activation='leaky_relu', padding='same')(y) + decoder = tf.keras.Model(decoder_in, decoder_out, name='decoder') + + # Add VQ layer + vq_layer = VectorQuantizer(num_embeddings=num_embeddings, embedding_dim=latent_dim, name='vq') + + inputs = Input(shape=input_shape) + encoder_outputs = encoder(inputs) + quantized_latents = vq_layer(encoder_outputs) + reconstructions = decoder(quantized_latents) + return tf.keras.Model(inputs, reconstructions, name='vq-vae') def get_pixelcnn_sampler(pixelcnn): inputs = Input(shape=pixelcnn.input_shape[1:]) diff --git a/recognition/45819061-VQVAE-OASIS/train.py b/recognition/45819061-VQVAE-OASIS/train.py index 1b00a03758..b46b05096d 100644 --- a/recognition/45819061-VQVAE-OASIS/train.py +++ b/recognition/45819061-VQVAE-OASIS/train.py @@ -1,7 +1,7 @@ from matplotlib import pyplot as plt import tensorflow as tf import numpy as np -from modules import VQVAE, get_pixelcnn +from modules import get_vqvae, get_pixelcnn from tqdm import tqdm @@ -12,7 +12,7 @@ def __init__(self, train_variance, latent_dim=32, num_embeddings=128, residual_h self.latent_dim = latent_dim self.num_embeddings = num_embeddings - self.model = VQVAE(self.latent_dim, self.num_embeddings, (256, 256, 1), residual_hiddens=residual_hiddens) + self.model = get_vqvae(self.latent_dim, self.num_embeddings, (256, 256, 1), residual_hiddens=residual_hiddens) self.total_loss_tracker = tf.keras.metrics.Mean(name='total_loss') self.reconstruction_loss_tracker = tf.keras.metrics.Mean(name='reconstruction_loss') self.vq_loss_tracker = tf.keras.metrics.Mean(name='vq_loss') @@ -31,7 +31,7 @@ def train_step(self, x): with tf.GradientTape() as tape: reconstructions = self.model(x) - reconstruction_loss = (tf.reduce_mean((x - reconstructions)**2)/self.train_variance) + reconstruction_loss = tf.reduce_mean((x - reconstructions)**2)/self.train_variance total_loss = reconstruction_loss + sum(self.model.losses) grads = tape.gradient(total_loss, self.model.trainable_variables) @@ -41,7 +41,7 @@ def train_step(self, x): self.reconstruction_loss_tracker.update_state(reconstruction_loss) self.vq_loss_tracker.update_state(sum(self.model.losses)) - ssim = tf.image.ssim(x, reconstructions, max_val=2.0) + ssim = tf.image.ssim(x, reconstructions, max_val=1.0) self.ssim_tracker.update_state(tf.reduce_mean(ssim)) return { @@ -54,7 +54,7 @@ def train_step(self, x): def test_step(self, x): x, _ = x reconstructions = self.model(x, training=False) - ssim = tf.image.ssim(x, reconstructions, max_val=2.0) + ssim = tf.image.ssim(x, reconstructions, max_val=1.0) self.ssim_tracker.update_state(tf.reduce_mean(ssim)) return { "ssim": self.ssim_tracker.result() @@ -63,10 +63,10 @@ def test_step(self, x): def train(x_train, x_test, x_validate, epochs=30, batch_size=16, out_dir='vqvae', **kwargs): - data_variance = np.var(x_train) + data_variance = np.var(x_train+0.5) vqvae_trainer = VQVAETrainer(data_variance, **kwargs) - vqvae_trainer.compile(optimizer=tf.keras.optimizers.Adam(3e-4)) + vqvae_trainer.compile(optimizer=tf.keras.optimizers.Adam()) history = vqvae_trainer.fit( x=x_train, epochs=epochs, @@ -87,7 +87,7 @@ def train(x_train, x_test, x_validate, epochs=30, batch_size=16, out_dir='vqvae' plt.title('Model Loss') plt.ylabel('loss') plt.xlabel('epoch') - plt.ylim((0, 2)) + plt.ylim((0, 400)) plt.legend(['total loss', 'reconstruction loss', 'vqvae loss']) plt.savefig('losses') plt.close() @@ -114,12 +114,15 @@ def pixelcnn_train(model, x_train, x_test, x_validate, epochs=30, batch_size=16, codebook_indices_training = [] codebook_indices_validation = [] - for i in tqdm(range(x_train.shape[0]//batch_size)): + + # create training data for pixelcnn model using trained vqvae + # use a loop to reduce memory load + for i in range(x_train.shape[0]//batch_size): encoded_training = encoder.predict(x_train[i*batch_size : (i+1)*batch_size], verbose=0) x = encoded_training.reshape(-1, encoded_training.shape[-1]) x = quantizer.get_code_indices(x) codebook_indices_training.extend(x.numpy().reshape(encoded_training.shape[:-1])) - for j in tqdm(range(x_validate.shape[0]//batch_size)): + for j in range(x_validate.shape[0]//batch_size): encoded_validation = encoder.predict(x_validate[j*batch_size : (j+1)*batch_size], verbose=0) x = encoded_validation.reshape(-1, encoded_validation.shape[-1]) x = quantizer.get_code_indices(x) @@ -130,15 +133,17 @@ def pixelcnn_train(model, x_train, x_test, x_validate, epochs=30, batch_size=16, pixelcnn = get_pixelcnn(encoded_training.shape[1:-1], **kwargs) - pixelcnn.compile(optimizer=tf.keras.optimizers.Adam(), loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=['accuracy']) + pixelcnn.compile(optimizer=tf.keras.optimizers.Adam(3e-4), loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=['accuracy']) history = pixelcnn.fit( x=codebook_indices_training, y=codebook_indices_training, - batch_size=batch_size, + batch_size=batch_size*4, epochs=epochs, validation_data=(codebook_indices_validation, codebook_indices_validation) ) + pixelcnn.summary() + plt.plot(history.history['accuracy']) plt.plot(history.history['val_accuracy']) plt.title('Model accuracy') From 543fa9deef720e5c4ef0bb5c7293c63da32cf5ca Mon Sep 17 00:00:00 2001 From: jbart Date: Fri, 21 Oct 2022 20:38:00 +1100 Subject: [PATCH 25/26] final changes to readme and driver --- recognition/45819061-VQVAE-OASIS/README.md | 13 ++++++++++--- recognition/45819061-VQVAE-OASIS/driver.py | 4 ++-- recognition/45819061-VQVAE-OASIS/predict.py | 2 +- 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/recognition/45819061-VQVAE-OASIS/README.md b/recognition/45819061-VQVAE-OASIS/README.md index 4e18605b7e..b2e6018f2f 100644 --- a/recognition/45819061-VQVAE-OASIS/README.md +++ b/recognition/45819061-VQVAE-OASIS/README.md @@ -8,7 +8,8 @@ Development in computer technology in recognising and classifying brain disease # The Model The model we train is a VQ-VAE consisting of an encoder feeding into a vector quantizer layer whose output then feeds into the decoder. The encoder and decoder are both made of to convolutional blocks and two residual layers. The convolutional layers are 4x4 windows with stride 2 and reduce the image data by a factor of four before passing to the residual layers. We use filter sizes 32, 64. Next, the residual layers are two convolutions (3x3 and 1x1) with filter size 32 and leaky relu activations between. The output of the residual block is the sum of the out put of this convolution wth the original data. Vector Quantizer layer consists of a codebook of embedding codes, the VQ layer takes the output of the encoder and computes relative distance to these embeddings to find the images supposed place in the latent space. VQ can be thought of as being given the identified key characteristics of the image by the encoder and then the VQ assigns the output the indices where such information is stored in the latent. Finally a decdoer takes a set of odewords from the latent space and via 2 transposed convolutional layers and residual blocks the image is rebuilt. During training the VQVAE attempts to maintain the integrity of its vector quantisation of the latent space and its reproduction of the image. -For generation of images we train a PixelCNN on the latent space discovered by the VQVAE to sample the latent space and discover new codes t pass to the decoder to generate realistic brain scans. +For generation of images we train a PixelCNN on the latent space discovered by the VQVAE to sample the latent space and discover new codes t pass to the decoder to generate realistic brain scans. +The model we design in developed was based on that described in [Paper](https://arxiv.org/abs/1711.00937). # Requirements Although versioning may not be strict this is what was used in this case. @@ -18,6 +19,12 @@ Although versioning may not be strict this is what was used in this case. - matplotlib = 3.6.1 # Training -We train the models with Adam optimizers tracking commitment loss, codebook loss and reconstruction loss in the case of the VQVAE, and categorical entropy in the case of the pixelcnn. Filter sizes for each convolutional layer in teh sytem must be sufficiently large to avoid the model training to an unusable state as was the case below. +We train the models with Adam optimizers tracking commitment loss, codebook loss and reconstruction loss in the case of the VQVAE, and categorical entropy in the case of the pixelcnn. The loss function for the VQ-VAE is described in [Paper](https://arxiv.org/abs/1711.00937) and is essentially the distance of the output of the model at various stages (after decode, after encode) to expected values at that point and is designed to improve the reconstruction clarity as well as keep the latent space meaningful and interpretablke by the later PixelCNN. + + +We train the model using the VQVAETrainer class which contains all the logic required for the training. In our experiement we trained the model over the entire training set given with the OASIS brain data for 50 epochs. Relevant parameters such as dimension of the embedding space and filter sizes for layers are given in the driver.py script which trains a VQVAE model, PixelCNN model and produces figures demonstrating training statistics, and expected outputs of the final model. We include our findings below -![0.5](losses.png) ![](fig9.png) +![](losses.png) +![](ssim.png) +# Data +The data we used was this preprocessed OS brain data available here [Link](https://cloudstor.aarnet.edu.au/plus/s/tByzSZzvvVh0hZA). Since this data is already split into training, validation and testing sets we did not perform any dataset splitting. Before passing images to the model we normalised the encoding by loading as grayscale images and scaling all the values to be in the domain [-0.5, 0.5]. \ No newline at end of file diff --git a/recognition/45819061-VQVAE-OASIS/driver.py b/recognition/45819061-VQVAE-OASIS/driver.py index c4c1b27e3e..b41f156b75 100644 --- a/recognition/45819061-VQVAE-OASIS/driver.py +++ b/recognition/45819061-VQVAE-OASIS/driver.py @@ -17,8 +17,8 @@ TRAIN_DATA = DATA_DIR + '/keras_png_slices_train' TEST_DATA = DATA_DIR + '/keras_png_slices_test' VALIDATE_DATA = DATA_DIR + '/keras_png_slices_validate' -#model = tf.keras.models.load_model(VQVAE_DIR) -#pixelcnn = tf.keras.models.load_model(PIXELCNN_DIR) +#model = tf.keras.models.load_model(VQVAE_DIR, custom_objects={'VectorQuantizer': VectorQuantizer}) +#pixelcnn = tf.keras.models.load_model(PIXELCNN_DIR, custom_objects={'PixelCNN': PixelCNN, 'ResidualBlock': ResidualBlock}) x_train, x_test, x_validate = get_data(TRAIN_DATA, TEST_DATA, VALIDATE_DATA) model = train(x_train, x_test, x_validate, diff --git a/recognition/45819061-VQVAE-OASIS/predict.py b/recognition/45819061-VQVAE-OASIS/predict.py index e9520cbeaa..efb84642c5 100644 --- a/recognition/45819061-VQVAE-OASIS/predict.py +++ b/recognition/45819061-VQVAE-OASIS/predict.py @@ -61,7 +61,7 @@ def sample_images(vqvae, pixelcnn): priors[:, row, col] = probs[:, row, col] pretrained_embeddings = quantizer.embeddings - prior_onehot = tf.one_hot(priors.astype("int32"), vqvae.num_embeddings).numpy() + prior_onehot = tf.one_hot(priors.astype("int32"), quantizer.num_embeddings).numpy() quantized = tf.matmul(prior_onehot.astype("float32"), pretrained_embeddings, transpose_b=True) quantized = tf.reshape(quantized, (-1, *(vqvae.get_layer('encoder').compute_output_shape((1, 256, 256, 1))[1:]))) From 84c2fb16131f0dcd8efcd7ecf85a0f54e59228f3 Mon Sep 17 00:00:00 2001 From: jbart Date: Sun, 23 Oct 2022 18:30:17 +1100 Subject: [PATCH 26/26] something changed --- recognition/45819061-VQVAE-OASIS/README.md | 12 ++++++++++++ recognition/45819061-VQVAE-OASIS/driver.py | 6 +++--- recognition/45819061-VQVAE-OASIS/modules.py | 16 ++++++---------- recognition/45819061-VQVAE-OASIS/train.py | 6 +++--- 4 files changed, 24 insertions(+), 16 deletions(-) diff --git a/recognition/45819061-VQVAE-OASIS/README.md b/recognition/45819061-VQVAE-OASIS/README.md index b2e6018f2f..75b16db0d4 100644 --- a/recognition/45819061-VQVAE-OASIS/README.md +++ b/recognition/45819061-VQVAE-OASIS/README.md @@ -26,5 +26,17 @@ We train the model using the VQVAETrainer class which contains all the logic req ![](losses.png) ![](ssim.png) + +Here we have some example input, output pairs for the auto encoder +![](fig1.png) +![](fig2.png) + +And their representation in the codebook space in the bottleneck of the auto encoder +![](embedding1.png) +![](embedding2.png) + +And the results of a pixel cnn given the following codebook data. +![](gen1.png) +![](gen2.png) # Data The data we used was this preprocessed OS brain data available here [Link](https://cloudstor.aarnet.edu.au/plus/s/tByzSZzvvVh0hZA). Since this data is already split into training, validation and testing sets we did not perform any dataset splitting. Before passing images to the model we normalised the encoding by loading as grayscale images and scaling all the values to be in the domain [-0.5, 0.5]. \ No newline at end of file diff --git a/recognition/45819061-VQVAE-OASIS/driver.py b/recognition/45819061-VQVAE-OASIS/driver.py index b41f156b75..ddf4241cea 100644 --- a/recognition/45819061-VQVAE-OASIS/driver.py +++ b/recognition/45819061-VQVAE-OASIS/driver.py @@ -8,9 +8,9 @@ VQVAE_DIR = "vqvae" PIXELCNN_DIR = "pixelcnn" -LATENT_DIM = 16 -NUM_EMBEDDINGS = 64 -RESIDUAL_HIDDENS = 128 +LATENT_DIM = 32 +NUM_EMBEDDINGS = 128 +RESIDUAL_HIDDENS = 32 EPOCHS = 50 BATCH_SIZE = 64 DATA_DIR = 'data/keras_png_slices_data' diff --git a/recognition/45819061-VQVAE-OASIS/modules.py b/recognition/45819061-VQVAE-OASIS/modules.py index 22b96a33bd..48081aecad 100644 --- a/recognition/45819061-VQVAE-OASIS/modules.py +++ b/recognition/45819061-VQVAE-OASIS/modules.py @@ -90,12 +90,12 @@ def call(self, inputs): x = self.conv1(inputs) x = self.pixelcnn(x) x = self.conv2(x) - return tf.add(inputs, x) + return tf.keras.layers.add([inputs, x]) def get_pixelcnn(input_shape, num_embeddings, filters=128, num_residual_blocks=2, num_pixelcnn_layers=2, **kwargs): pixelcnn_inputs = Input(shape=input_shape, dtype=tf.int32) onehot = tf.one_hot(pixelcnn_inputs, num_embeddings) - x = PixelCNN(mask_type='A', filters=filters, kernel_size=8, activation='leaky_relu', padding='same')(onehot) + x = PixelCNN(mask_type='A', filters=filters, kernel_size=32, activation='leaky_relu', padding='same')(onehot) for _ in range(num_residual_blocks): x = ResidualBlock(filters=filters)(x) for _ in range(num_pixelcnn_layers): @@ -110,19 +110,15 @@ def get_vqvae(latent_dim=16, num_embeddings=64, input_shape=(256, 256, 1), resid # Build encoder encoder_in = Input(shape=input_shape) - x = Conv2D(32, 4, strides=2, activation='leaky_relu', padding='same')(encoder_in) - x = Conv2D(residual_hiddens, 4, strides=2, activation='leaky_relu', padding='same')(x) - x = resblock(x, residual_hiddens) - x = resblock(x, residual_hiddens) + x = Conv2D(32, 3, strides=2, activation='leaky_relu', padding='same')(encoder_in) + x = Conv2D(64, 3, strides=2, activation='leaky_relu', padding='same')(x) encoder_out = Conv2D(latent_dim, 1, padding="same")(x) encoder = tf.keras.Model(encoder_in, encoder_out, name='encoder') # Build decoder decoder_in = Input(shape=encoder.output.shape[1:]) - y = Conv2DTranspose(32, 4, strides=2, activation='leaky_relu', padding='same')(decoder_in) - y = Conv2DTranspose(residual_hiddens, 4, strides=2, activation='leaky_relu', padding='same')(y) - y = resblock(y, residual_hiddens) - y = resblock(y, residual_hiddens) + y = Conv2DTranspose(64, 3, strides=2, activation='leaky_relu', padding='same')(decoder_in) + y = Conv2DTranspose(32, 3, strides=2, activation='leaky_relu', padding='same')(y) decoder_out = Conv2DTranspose(1, 3, strides=1, activation='leaky_relu', padding='same')(y) decoder = tf.keras.Model(decoder_in, decoder_out, name='decoder') diff --git a/recognition/45819061-VQVAE-OASIS/train.py b/recognition/45819061-VQVAE-OASIS/train.py index b46b05096d..cf13dad2df 100644 --- a/recognition/45819061-VQVAE-OASIS/train.py +++ b/recognition/45819061-VQVAE-OASIS/train.py @@ -133,12 +133,12 @@ def pixelcnn_train(model, x_train, x_test, x_validate, epochs=30, batch_size=16, pixelcnn = get_pixelcnn(encoded_training.shape[1:-1], **kwargs) - pixelcnn.compile(optimizer=tf.keras.optimizers.Adam(3e-4), loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=['accuracy']) + pixelcnn.compile(optimizer=tf.keras.optimizers.Adam(), loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=['accuracy']) history = pixelcnn.fit( x=codebook_indices_training, y=codebook_indices_training, - batch_size=batch_size*4, - epochs=epochs, + batch_size=batch_size*2, + epochs=epochs*5, validation_data=(codebook_indices_validation, codebook_indices_validation) )