Initial commit

Initial commit for Bytehoven
paulvangentcom · Dec 16, 2017 · e442b1e · e442b1e
1 parent 047680a
commit e442b1e
Show file tree

Hide file tree

Showing 10 changed files with 263 additions and 0 deletions.
diff --git a/LICENSE b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2017 Paul van Gent
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/README.md b/README.md
@@ -0,0 +1,13 @@
+# Bytehoven - Sheet music recognition with ResNet50
+
+![Byethoven](images/Bytehoven.jpeg)
+
+This repository contains the resources used in the development of Bytehoven, a deep learning sheet music recognition model currently in development. [Please find the full tutorial here.](http://www.paulvangent.com/2017/12/07/deep-learning-music/) The current version recognises piano music from Bach, Beethoven, Brahms, Chopin, Grieg, Liszt, and Mozart.
+
+# Included files
+
+- datasets/Musicdata_Small.rar -- Dataset of small sized images (200*35px)
+- datasets/Musicdata_Medium.rar -- Dataset of medium sized images (400*70px)
+- model-weights/bytehoven-7-weights.hdf5 -- Model weights trained on medium sized set (full training log included)
+- ResNet50.py -- ResNet50 architecture implemented in Keras
+- run_ResNet50.py -- Example to initiate training run
diff --git a/ResNet50.py b/ResNet50.py
@@ -0,0 +1,146 @@
+'''
+Python file defining the ResNet50 architecture used in the project.
+
+2017 - Paul van Gent
+Adapted from https://github.com/fchollet/keras/blob/master/keras/applications/resnet50.py
+
+Licensed under the MIT Licens. Permission is hereby granted, free of charge, 
+to any person obtaining a copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation the rights to use, copy, modify,
+merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom 
+the Software is furnished to do so, subject to the following conditions:
+
+- The above copyright notice and this permission notice shall be included 
+  in all copies or substantial portions of the Software.
+'''
+
+import os
+import random
+
+#Disable GPU (out of memory errors)
+#os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
+
+import numpy as np
+from keras import layers
+from keras.layers import Input, Add, Dense, Activation, ZeroPadding2D, BatchNormalization, Flatten, Conv2D, AveragePooling2D, MaxPooling2D, GlobalMaxPooling2D
+from keras.models import Model
+import keras.backend as K
+
+def identity_block(input_tensor, kernel_size, filters, stage, block):
+    """The identity block is the block that has no conv layer at shortcut.
+    # Arguments
+        input_tensor: input tensor
+        kernel_size: default 3, the kernel size of middle conv layer at main path
+        filters: list of integers, the filters of 3 conv layer at main path
+        stage: integer, current stage label, used for generating layer names
+        block: 'a','b'..., current block label, used for generating layer names
+    # Returns
+        Output tensor for the block.
+    """
+    filters1, filters2, filters3 = filters
+    if K.image_data_format() == 'channels_last':
+        bn_axis = 3
+    else:
+        bn_axis = 1
+    conv_name_base = 'res' + str(stage) + block + '_branch'
+    bn_name_base = 'bn' + str(stage) + block + '_branch'
+
+    x = Conv2D(filters1, (1, 1), name=conv_name_base + '2a')(input_tensor)
+    x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x)
+    x = Activation('relu')(x)
+
+    x = Conv2D(filters2, kernel_size,
+               padding='same', name=conv_name_base + '2b')(x)
+    x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x)
+    x = Activation('relu')(x)
+
+    x = Conv2D(filters3, (1, 1), name=conv_name_base + '2c')(x)
+    x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x)
+
+    x = layers.add([x, input_tensor])
+    x = Activation('relu')(x)
+    return x
+
+def conv_block(input_tensor, kernel_size, filters, stage, block, strides=(2, 2)):
+    """A block that has a conv layer at shortcut.
+    # Arguments
+        input_tensor: input tensor
+        kernel_size: default 3, the kernel size of middle conv layer at main path
+        filters: list of integers, the filters of 3 conv layer at main path
+        stage: integer, current stage label, used for generating layer names
+        block: 'a','b'..., current block label, used for generating layer names
+    # Returns
+        Output tensor for the block.
+    Note that from stage 3, the first conv layer at main path is with strides=(2,2)
+    And the shortcut should have strides=(2,2) as well
+    """
+    filters1, filters2, filters3 = filters
+    if K.image_data_format() == 'channels_last':
+        bn_axis = 3
+    else:
+        bn_axis = 1
+    conv_name_base = 'res' + str(stage) + block + '_branch'
+    bn_name_base = 'bn' + str(stage) + block + '_branch'
+
+    x = Conv2D(filters1, (1, 1), strides=strides,
+               name=conv_name_base + '2a')(input_tensor)
+    x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x)
+    x = Activation('relu')(x)
+
+    x = Conv2D(filters2, kernel_size, padding='same',
+               name=conv_name_base + '2b')(x)
+    x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x)
+    x = Activation('relu')(x)
+
+    x = Conv2D(filters3, (1, 1), name=conv_name_base + '2c')(x)
+    x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x)
+
+    shortcut = Conv2D(filters3, (1, 1), strides=strides,
+                      name=conv_name_base + '1')(input_tensor)
+    shortcut = BatchNormalization(axis=bn_axis, name=bn_name_base + '1')(shortcut)
+
+    x = layers.add([x, shortcut])
+    x = Activation('relu')(x)
+    return x
+
+
+def ResNet50(input_shape=None, classes=1000):
+
+    x_input = Input(input_shape)
+    x = ZeroPadding2D((3, 3))(x_input)
+
+    x = Conv2D(
+        64, (7, 7), strides=(2, 2), padding='same', name='conv1')(x)
+    X = BatchNormalization(axis = 3, name = 'bn_conv1')(x)
+    x = Activation('relu')(x)
+    x = MaxPooling2D((3, 3), strides=(2, 2))(x)
+
+    x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1))
+    x = identity_block(x, 3, [64, 64, 256], stage=2, block='b')
+    x = identity_block(x, 3, [64, 64, 256], stage=2, block='c')
+
+    x = conv_block(x, 3, [128, 128, 512], stage=3, block='a')
+    x = identity_block(x, 3, [128, 128, 512], stage=3, block='b')
+    x = identity_block(x, 3, [128, 128, 512], stage=3, block='c')
+    x = identity_block(x, 3, [128, 128, 512], stage=3, block='d')
+
+    x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a')
+    x = identity_block(x, 3, [256, 256, 1024], stage=4, block='b')
+    x = identity_block(x, 3, [256, 256, 1024], stage=4, block='c')
+    x = identity_block(x, 3, [256, 256, 1024], stage=4, block='d')
+    x = identity_block(x, 3, [256, 256, 1024], stage=4, block='e')
+    x = identity_block(x, 3, [256, 256, 1024], stage=4, block='f')
+
+    x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a')
+    x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b')
+    x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c')
+
+    #x = AveragePooling2D((7, 7), name='avg_pool')(x)
+
+    x = Flatten()(x)
+    x = Dense(classes, activation='softmax', name='fc')(x)
+
+    # Create model.
+    model = Model(inputs = x_input, outputs = x, name='resnet50')
+
+    return model
diff --git a/datasets/Musicdata_Medium.rar b/datasets/Musicdata_Medium.rar
diff --git a/datasets/Musicdata_Small.rar b/datasets/Musicdata_Small.rar
diff --git a/images/Beethoven_Visualisation.jpg b/images/Beethoven_Visualisation.jpg
diff --git a/images/Bytehoven.jpeg b/images/Bytehoven.jpeg
diff --git a/images/Chopin_Visualisation.jpg b/images/Chopin_Visualisation.jpg
diff --git a/images/Filters.jpg b/images/Filters.jpg
diff --git a/run_ResNet50.py b/run_ResNet50.py
@@ -0,0 +1,83 @@
+import numpy as np
+from glob import glob
+from scipy import ndimage
+from keras import callbacks
+from keras.optimizers import Adamax, SGD, RMSprop
+
+import ResNet50
+
+def convert_to_one_hot(Y, C):
+    '''Converts array with labels to one-hot encoding
+    
+    Keyword Arguments:
+    Y -- 1-dimensional numpy array containing labeled values
+    C -- total number of labels in Y
+    '''
+
+    Y = np.eye(C)[Y.reshape(-1)].T
+    return Y
+
+def load_dataset(datapath, composers):
+    '''Loads dataset into memory
+
+    Keyword Arguments:
+    datapath -- absolute or relative path to dataset location
+    composers -- list of composer names included in the dataset
+    '''
+
+    folders = glob('%s/*' %datapath)
+    X_train = []
+    Y_train = []
+
+    for folder in folders:
+        files = glob('%s\\*.jpg' %folder)
+        print('working on composer: %s' %(folder.split('\\')[-1]))
+        for f in files:
+            im = ndimage.imread(f, mode='L')
+            im = im/255
+            im = im.reshape(im.shape[0], im.shape[1], 1)
+            X_train.append(im)
+            Y_train.append(composers.index(folder.split('\\')[-1]))
+
+    return np.asarray(X_train), np.asarray(Y_train)
+
+if __name__ == '__main__':
+    print('setting model')
+    model = ResNet50.ResNet50(input_shape = (70, 400, 1), classes = 7)
+
+    epochs = 100
+    learning_rate = 0.001
+    lr_decay = 0.001/100
+
+    print('compiling model...')
+    #optimizer_instance = Adam(lr=learning_rate, decay=lr_decay)#lr=0.0005, beta_1=0.9, beta_2=0.999, epsilon=0.001)
+    #optimizer_instance = Adamax(lr=learning_rate, decay=lr_decay)
+    optimizer_instance = SGD(lr=learning_rate, decay=lr_decay)
+    #optimizer_instance = RMSprop(lr=learning_rate, decay=lr_decay)
+
+    model.compile(optimizer=optimizer_instance, loss='categorical_crossentropy', metrics=['acc'])
+
+    print('loading dataset......')
+    composers = ['Bach', 'Beethoven', 'Brahms', 'Chopin', 'Grieg', 'Liszt', 'Mozart']
+    datapath = 'Dataset_Train_Medium/'
+    X_train, Y_train = load_dataset(datapath, composers)
+
+    datapath_val = 'Dataset_Dev_Medium/'
+    X_test, Y_test = load_dataset(datapath_val, composers)
+
+    print('applying one-hot-encoding')
+    Y_train = convert_to_one_hot(Y_train, 7).T
+    Y_test = convert_to_one_hot(Y_test, 7).T
+
+    print('setting up callbacks...')
+    nancheck = callbacks.TerminateOnNaN()
+    filepath = 'Models/weights-improvement-{epoch:02d}-{acc:.2f}.hdf5'
+    saver = callbacks.ModelCheckpoint(filepath, monitor='acc', verbose=1, save_best_only=False, mode='max', period=1)
+    logger = callbacks.CSVLogger('model-weights/trainingresults.log')
+    callbacklist = [nancheck, saver, logger]
+
+    print('starting model fitting')
+    model.fit(X_train, Y_train, validation_data = (X_test, Y_test), epochs=epochs, batch_size=72, callbacks=callbacklist)
+
+    print('Saving model.........')
+    model.save('second_run.h5')