From c8cbaf5f3b6c26c159cc809720c61387cc88a9b2 Mon Sep 17 00:00:00 2001 From: trasse Date: Tue, 10 Mar 2020 12:45:55 +0100 Subject: [PATCH 1/2] solution to Issue 70 & new options --- n2v/internals/N2V_DataGenerator2.py | 266 ++++++++++++++++++++++++++++ 1 file changed, 266 insertions(+) create mode 100644 n2v/internals/N2V_DataGenerator2.py diff --git a/n2v/internals/N2V_DataGenerator2.py b/n2v/internals/N2V_DataGenerator2.py new file mode 100644 index 0000000..c905daf --- /dev/null +++ b/n2v/internals/N2V_DataGenerator2.py @@ -0,0 +1,266 @@ +import numpy as np +from os.path import join +from glob import glob +import tifffile +from matplotlib import image +from csbdeep.utils import _raise + +class N2V_DataGenerator2(): + """ + The 'N2V_DataGenerator' enables training and validation data generation for Noise2Void. + """ + + def load_imgs(self, files, to32bit, dims='YX'): + """ + Helper to read a list of files. The images are not required to have same size, + but have to be of same dimensionality. + + Parameters + ---------- + files : list(String) + List of paths to tiff-files. + dims : String, optional(default='YX') + Dimensions of the images to read. Known dimensions are: 'TZYXC' + to32bit: makes conversion to 32 bit optional, if set to False the original datatype is used + + Returns + ------- + images : list(array(float)) + A list of the read tif-files. The images have dimensionality 'SZYXC' or 'SYXC' + """ + assert 'Y' in dims and 'X' in dims, "'dims' has to contain 'X' and 'Y'." + + tmp_dims = dims + for b in ['X', 'Y', 'Z', 'T', 'C']: + assert tmp_dims.count(b) <= 1, "'dims' has to contain {} at most once.".format(b) + tmp_dims = tmp_dims.replace(b, '') + + assert len(tmp_dims) == 0, "Unknown dimensions in 'dims'." + + if 'Z' in dims: + net_axes = 'ZYXC' + else: + net_axes = 'YXC' + + move_axis_from = () + move_axis_to = () + for d, b in enumerate(dims): + move_axis_from += tuple([d]) + if b == 'T': + move_axis_to += tuple([0]) + elif b == 'C': + move_axis_to += tuple([-1]) + elif b in 'XYZ': + if 'T' in dims: + move_axis_to += tuple([net_axes.index(b)+1]) + else: + move_axis_to += tuple([net_axes.index(b)]) + imgs = [] + for f in files: + if f.endswith('.tif') or f.endswith('.tiff'): + imread = tifffile.imread + elif f.endswith('.png'): + imread = image.imread + elif f.endswith('.jpg') or f.endswith('.jpeg') or f.endswith('.JPEG') or f.endswith('.JPG'): + _raise(Exception("JPEG is not supported, because it is not loss-less and breaks the pixel-wise independence assumption.")) + else: + _raise("Filetype '{}' is not supported.".format(f)) + + if to32bit: + img = imread(f).astype(np.float32) + else: + img = imread(f) + assert len(img.shape) == len(dims), "Number of image dimensions doesn't match 'dims'." + + img = np.moveaxis(img, move_axis_from, move_axis_to) + + if not ('T' in dims): + img = img[np.newaxis] + + if not ('C' in dims): + img = img[..., np.newaxis] + + imgs.append(img) + + return imgs + + def load_imgs_from_directory(self, directory, filter='*.tif', dims='YX', names_back = False, to32bit = True): + """ + Helper to read all files which match 'filter' from a directory. The images are not required to have same size, + but have to be of same dimensionality. + + Parameters + ---------- + directory : String + Directory from which the data is loaded. + filter : String, optional(default='*.tif') + Filter to match the file names. + dims : String, optional(default='YX') + Dimensions of the images to read. Known dimensions are: 'TZYXC' + + names_back: if set to True, the function returns the names of the input files as list + + to32bit: makes conversion to 32 bit optional, if set to False the original datatype is used + + Returns + ------- + images : list(array(float)) + A list of the read tif-files. The images have dimensionality 'SZYXC' or 'SYXC' + (optional): files, names of the input files as list + """ + + files = glob(join(directory, filter)) + files.sort() + if names_back: + return files, self.load_imgs(files, to32bit, dims=dims) + else: + return self.load_imgs(files, to32bit, dims=dims) + + + def generate_patches_from_list(self, data, num_patches_per_img=None, shape=(256, 256), augment=True, shuffle=False): + """ + Extracts patches from 'list_data', which is a list of images, and returns them in a 'numpy-array'. The images + can have different dimensionality. + + Parameters + ---------- + data : list(array(float)) + List of images with dimensions 'SZYXC' or 'SYXC' + num_patches_per_img : int, optional(default=None) + Number of patches to extract per image. If 'None', as many patches as fit i nto the + dimensions are extracted. + shape : tuple(int), optional(default=(256, 256)) + Shape of the extracted patches. + augment : bool, optional(default=True) + Rotate the patches in XY-Plane and flip them along X-Axis. This only works if the patches are square in XY. + shuffle : bool, optional(default=False) + Shuffles extracted patches across all given images (data). + + Returns + ------- + patches : array(float) + Numpy-Array with the patches. The dimensions are 'SZYXC' or 'SYXC' + """ + patches = [] + for img in data: + for s in range(img.shape[0]): + p = self.generate_patches(img[s][np.newaxis], num_patches=num_patches_per_img, shape=shape, augment=augment) + patches.append(p) + + patches = np.concatenate(patches, axis=0) + + if shuffle: + np.random.shuffle(patches) + + return patches + + def generate_patches(self, data, num_patches=None, shape=(256, 256), augment=True, shuffle_patches = True): + """ + Extracts patches from 'data'. The patches can be augmented, which means they get rotated three times + in XY-Plane and flipped along the X-Axis. Augmentation leads to an eight-fold increase in training data. + + Parameters + ---------- + data : list(array(float)) + List of images with dimensions 'SZYXC' or 'SYXC' + num_patches : int, optional(default=None) + Number of patches to extract per image. If 'None', as many patches as fit i nto the + dimensions are extracted. + shape : tuple(int), optional(default=(256, 256)) + Shape of the extracted patches. + augment : bool, optional(default=True) + Rotate the patches in XY-Plane and flip them along X-Axis. This only works if the patches are square in XY. + + Returns + ------- + patches : array(float) + Numpy-Array containing all patches (randomly shuffled along S-dimension). + The dimensions are 'SZYXC' or 'SYXC' + """ + + patches = self.__extract_patches__(data, num_patches=num_patches, shape=shape, n_dims=len(data.shape)-2) + if shape[-2] == shape[-1]: + if augment: + patches = self.__augment_patches__(patches=patches) + else: + if augment: + print("XY-Plane is not square. Omit augmentation!") + + if shuffle_patches: + np.random.shuffle(patches) + #print('Generated patches:', patches.shape) + return patches + + def __extract_patches__(self, data, num_patches=None, shape=(256, 256), n_dims=2): + if num_patches == None: + patches = [] + if n_dims == 2: + if data.shape[1] >= shape[0] and data.shape[2] >= shape[1]: + for y in range(0, data.shape[1] - shape[0]+1, shape[0]): + for x in range(0, data.shape[2] - shape[1]+1, shape[1]): + patches.append(data[:, y:y + shape[0], x:x + shape[1]]) + + return np.concatenate(patches) + elif data.shape[1] == shape[0] and data.shape[2] == shape[1]: + return data + else: + print("'shape' is too big.") + elif n_dims == 3: + if data.shape[1] >= shape[0] and data.shape[2] >= shape[1] and data.shape[3] >= shape[2]: + for z in range(0, data.shape[1] - shape[0]+1, shape[0]): + for y in range(0, data.shape[2] - shape[1]+1, shape[1]): + for x in range(0, data.shape[3] - shape[2]+1, shape[2]): + patches.append(data[:, z:z + shape[0], y:y + shape[1], x:x + shape[2]]) + + return np.concatenate(patches) + elif data.shape[1] == shape[0] and data.shape[2] == shape[1] and data.shape[3] == shape[ + 2]: + return data + else: + print("'shape' is too big.") + else: + print('Not implemented for more than 4 dimensional (ZYXC) data.') + else: + patches = [] + if n_dims == 2: + for i in range(num_patches): + y, x = np.random.randint(0, data.shape[1] - shape[0] + 1), np.random.randint(0, + data.shape[ + 2] - shape[ + 1] + 1) + patches.append(data[0, y:y + shape[0], x:x + shape[1]]) + + if len(patches) > 1: + return np.stack(patches) + else: + return np.array(patches)[np.newaxis] + elif n_dims == 3: + for i in range(num_patches): + z, y, x = np.random.randint(0, data.shape[1] - shape[0] + 1), np.random.randint(0, + data.shape[ + 2] - shape[ + 1] + 1), np.random.randint( + 0, data.shape[3] - shape[2] + 1) + patches.append(data[0, z:z + shape[0], y:y + shape[1], x:x + shape[2]]) + + if len(patches) > 1: + return np.stack(patches) + else: + return np.array(patches)[np.newaxis] + else: + print('Not implemented for more than 4 dimensional (ZYXC) data.') + + def __augment_patches__(self, patches): + if len(patches.shape[1:-1]) == 2: + augmented = np.concatenate((patches, + np.rot90(patches, k=1, axes=(1, 2)), + np.rot90(patches, k=2, axes=(1, 2)), + np.rot90(patches, k=3, axes=(1, 2)))) + elif len(patches.shape[1:-1]) == 3: + augmented = np.concatenate((patches, + np.rot90(patches, k=1, axes=(2, 3)), + np.rot90(patches, k=2, axes=(2, 3)), + np.rot90(patches, k=3, axes=(2, 3)))) + + augmented = np.concatenate((augmented, np.flip(augmented, axis=-2))) + return augmented From 5d08eeb694fd1a795c028789c560dd13e90052aa Mon Sep 17 00:00:00 2001 From: Tobias Rasse Date: Tue, 10 Mar 2020 13:21:56 +0100 Subject: [PATCH 2/2] Changes in N2V_DataGenerator --- n2v/internals/N2V_DataGenerator.py | 43 +++-- n2v/internals/N2V_DataGenerator2.py | 266 ---------------------------- 2 files changed, 28 insertions(+), 281 deletions(-) delete mode 100644 n2v/internals/N2V_DataGenerator2.py diff --git a/n2v/internals/N2V_DataGenerator.py b/n2v/internals/N2V_DataGenerator.py index 33ca496..973a5ef 100644 --- a/n2v/internals/N2V_DataGenerator.py +++ b/n2v/internals/N2V_DataGenerator.py @@ -10,7 +10,7 @@ class N2V_DataGenerator(): The 'N2V_DataGenerator' enables training and validation data generation for Noise2Void. """ - def load_imgs(self, files, dims='YX'): + def load_imgs(self, files, to32bit, dims='YX'): """ Helper to read a list of files. The images are not required to have same size, but have to be of same dimensionality. @@ -21,7 +21,8 @@ def load_imgs(self, files, dims='YX'): List of paths to tiff-files. dims : String, optional(default='YX') Dimensions of the images to read. Known dimensions are: 'TZYXC' - + to32bit: makes conversion to 32 bit optional, if set to False the original datatype is used + Returns ------- images : list(array(float)) @@ -65,7 +66,10 @@ def load_imgs(self, files, dims='YX'): else: _raise("Filetype '{}' is not supported.".format(f)) - img = imread(f).astype(np.float32) + if to32bit: + img = imread(f).astype(np.float32) + else: + img = imread(f) assert len(img.shape) == len(dims), "Number of image dimensions doesn't match 'dims'." img = np.moveaxis(img, move_axis_from, move_axis_to) @@ -80,7 +84,7 @@ def load_imgs(self, files, dims='YX'): return imgs - def load_imgs_from_directory(self, directory, filter='*.tif', dims='YX'): + def load_imgs_from_directory(self, directory, filter='*.tif', dims='YX', names_back = False, to32bit = True): """ Helper to read all files which match 'filter' from a directory. The images are not required to have same size, but have to be of same dimensionality. @@ -94,15 +98,23 @@ def load_imgs_from_directory(self, directory, filter='*.tif', dims='YX'): dims : String, optional(default='YX') Dimensions of the images to read. Known dimensions are: 'TZYXC' + names_back: if set to True, the function returns the names of the input files as list + + to32bit: makes conversion to 32 bit optional, if set to False the original datatype is used + Returns ------- images : list(array(float)) A list of the read tif-files. The images have dimensionality 'SZYXC' or 'SYXC' + (optional): files, names of the input files as list """ files = glob(join(directory, filter)) files.sort() - return self.load_imgs(files, dims=dims) + if names_back: + return files, self.load_imgs(files, to32bit, dims=dims) + else: + return self.load_imgs(files, to32bit, dims=dims) def generate_patches_from_list(self, data, num_patches_per_img=None, shape=(256, 256), augment=True, shuffle=False): @@ -142,7 +154,7 @@ def generate_patches_from_list(self, data, num_patches_per_img=None, shape=(256, return patches - def generate_patches(self, data, num_patches=None, shape=(256, 256), augment=True): + def generate_patches(self, data, num_patches=None, shape=(256, 256), augment=True, shuffle_patches = True): """ Extracts patches from 'data'. The patches can be augmented, which means they get rotated three times in XY-Plane and flipped along the X-Axis. Augmentation leads to an eight-fold increase in training data. @@ -174,17 +186,18 @@ def generate_patches(self, data, num_patches=None, shape=(256, 256), augment=Tru if augment: print("XY-Plane is not square. Omit augmentation!") - np.random.shuffle(patches) - print('Generated patches:', patches.shape) + if shuffle_patches: + np.random.shuffle(patches) + #print('Generated patches:', patches.shape) return patches def __extract_patches__(self, data, num_patches=None, shape=(256, 256), n_dims=2): if num_patches == None: patches = [] if n_dims == 2: - if data.shape[1] > shape[0] and data.shape[2] > shape[1]: - for y in range(0, data.shape[1] - shape[0], shape[0]): - for x in range(0, data.shape[2] - shape[1], shape[1]): + if data.shape[1] >= shape[0] and data.shape[2] >= shape[1]: + for y in range(0, data.shape[1] - shape[0]+1, shape[0]): + for x in range(0, data.shape[2] - shape[1]+1, shape[1]): patches.append(data[:, y:y + shape[0], x:x + shape[1]]) return np.concatenate(patches) @@ -193,10 +206,10 @@ def __extract_patches__(self, data, num_patches=None, shape=(256, 256), n_dims=2 else: print("'shape' is too big.") elif n_dims == 3: - if data.shape[1] > shape[0] and data.shape[2] > shape[1] and data.shape[3] > shape[2]: - for z in range(0, data.shape[1] - shape[0], shape[0]): - for y in range(0, data.shape[2] - shape[1], shape[1]): - for x in range(0, data.shape[3] - shape[2], shape[2]): + if data.shape[1] >= shape[0] and data.shape[2] >= shape[1] and data.shape[3] >= shape[2]: + for z in range(0, data.shape[1] - shape[0]+1, shape[0]): + for y in range(0, data.shape[2] - shape[1]+1, shape[1]): + for x in range(0, data.shape[3] - shape[2]+1, shape[2]): patches.append(data[:, z:z + shape[0], y:y + shape[1], x:x + shape[2]]) return np.concatenate(patches) diff --git a/n2v/internals/N2V_DataGenerator2.py b/n2v/internals/N2V_DataGenerator2.py deleted file mode 100644 index c905daf..0000000 --- a/n2v/internals/N2V_DataGenerator2.py +++ /dev/null @@ -1,266 +0,0 @@ -import numpy as np -from os.path import join -from glob import glob -import tifffile -from matplotlib import image -from csbdeep.utils import _raise - -class N2V_DataGenerator2(): - """ - The 'N2V_DataGenerator' enables training and validation data generation for Noise2Void. - """ - - def load_imgs(self, files, to32bit, dims='YX'): - """ - Helper to read a list of files. The images are not required to have same size, - but have to be of same dimensionality. - - Parameters - ---------- - files : list(String) - List of paths to tiff-files. - dims : String, optional(default='YX') - Dimensions of the images to read. Known dimensions are: 'TZYXC' - to32bit: makes conversion to 32 bit optional, if set to False the original datatype is used - - Returns - ------- - images : list(array(float)) - A list of the read tif-files. The images have dimensionality 'SZYXC' or 'SYXC' - """ - assert 'Y' in dims and 'X' in dims, "'dims' has to contain 'X' and 'Y'." - - tmp_dims = dims - for b in ['X', 'Y', 'Z', 'T', 'C']: - assert tmp_dims.count(b) <= 1, "'dims' has to contain {} at most once.".format(b) - tmp_dims = tmp_dims.replace(b, '') - - assert len(tmp_dims) == 0, "Unknown dimensions in 'dims'." - - if 'Z' in dims: - net_axes = 'ZYXC' - else: - net_axes = 'YXC' - - move_axis_from = () - move_axis_to = () - for d, b in enumerate(dims): - move_axis_from += tuple([d]) - if b == 'T': - move_axis_to += tuple([0]) - elif b == 'C': - move_axis_to += tuple([-1]) - elif b in 'XYZ': - if 'T' in dims: - move_axis_to += tuple([net_axes.index(b)+1]) - else: - move_axis_to += tuple([net_axes.index(b)]) - imgs = [] - for f in files: - if f.endswith('.tif') or f.endswith('.tiff'): - imread = tifffile.imread - elif f.endswith('.png'): - imread = image.imread - elif f.endswith('.jpg') or f.endswith('.jpeg') or f.endswith('.JPEG') or f.endswith('.JPG'): - _raise(Exception("JPEG is not supported, because it is not loss-less and breaks the pixel-wise independence assumption.")) - else: - _raise("Filetype '{}' is not supported.".format(f)) - - if to32bit: - img = imread(f).astype(np.float32) - else: - img = imread(f) - assert len(img.shape) == len(dims), "Number of image dimensions doesn't match 'dims'." - - img = np.moveaxis(img, move_axis_from, move_axis_to) - - if not ('T' in dims): - img = img[np.newaxis] - - if not ('C' in dims): - img = img[..., np.newaxis] - - imgs.append(img) - - return imgs - - def load_imgs_from_directory(self, directory, filter='*.tif', dims='YX', names_back = False, to32bit = True): - """ - Helper to read all files which match 'filter' from a directory. The images are not required to have same size, - but have to be of same dimensionality. - - Parameters - ---------- - directory : String - Directory from which the data is loaded. - filter : String, optional(default='*.tif') - Filter to match the file names. - dims : String, optional(default='YX') - Dimensions of the images to read. Known dimensions are: 'TZYXC' - - names_back: if set to True, the function returns the names of the input files as list - - to32bit: makes conversion to 32 bit optional, if set to False the original datatype is used - - Returns - ------- - images : list(array(float)) - A list of the read tif-files. The images have dimensionality 'SZYXC' or 'SYXC' - (optional): files, names of the input files as list - """ - - files = glob(join(directory, filter)) - files.sort() - if names_back: - return files, self.load_imgs(files, to32bit, dims=dims) - else: - return self.load_imgs(files, to32bit, dims=dims) - - - def generate_patches_from_list(self, data, num_patches_per_img=None, shape=(256, 256), augment=True, shuffle=False): - """ - Extracts patches from 'list_data', which is a list of images, and returns them in a 'numpy-array'. The images - can have different dimensionality. - - Parameters - ---------- - data : list(array(float)) - List of images with dimensions 'SZYXC' or 'SYXC' - num_patches_per_img : int, optional(default=None) - Number of patches to extract per image. If 'None', as many patches as fit i nto the - dimensions are extracted. - shape : tuple(int), optional(default=(256, 256)) - Shape of the extracted patches. - augment : bool, optional(default=True) - Rotate the patches in XY-Plane and flip them along X-Axis. This only works if the patches are square in XY. - shuffle : bool, optional(default=False) - Shuffles extracted patches across all given images (data). - - Returns - ------- - patches : array(float) - Numpy-Array with the patches. The dimensions are 'SZYXC' or 'SYXC' - """ - patches = [] - for img in data: - for s in range(img.shape[0]): - p = self.generate_patches(img[s][np.newaxis], num_patches=num_patches_per_img, shape=shape, augment=augment) - patches.append(p) - - patches = np.concatenate(patches, axis=0) - - if shuffle: - np.random.shuffle(patches) - - return patches - - def generate_patches(self, data, num_patches=None, shape=(256, 256), augment=True, shuffle_patches = True): - """ - Extracts patches from 'data'. The patches can be augmented, which means they get rotated three times - in XY-Plane and flipped along the X-Axis. Augmentation leads to an eight-fold increase in training data. - - Parameters - ---------- - data : list(array(float)) - List of images with dimensions 'SZYXC' or 'SYXC' - num_patches : int, optional(default=None) - Number of patches to extract per image. If 'None', as many patches as fit i nto the - dimensions are extracted. - shape : tuple(int), optional(default=(256, 256)) - Shape of the extracted patches. - augment : bool, optional(default=True) - Rotate the patches in XY-Plane and flip them along X-Axis. This only works if the patches are square in XY. - - Returns - ------- - patches : array(float) - Numpy-Array containing all patches (randomly shuffled along S-dimension). - The dimensions are 'SZYXC' or 'SYXC' - """ - - patches = self.__extract_patches__(data, num_patches=num_patches, shape=shape, n_dims=len(data.shape)-2) - if shape[-2] == shape[-1]: - if augment: - patches = self.__augment_patches__(patches=patches) - else: - if augment: - print("XY-Plane is not square. Omit augmentation!") - - if shuffle_patches: - np.random.shuffle(patches) - #print('Generated patches:', patches.shape) - return patches - - def __extract_patches__(self, data, num_patches=None, shape=(256, 256), n_dims=2): - if num_patches == None: - patches = [] - if n_dims == 2: - if data.shape[1] >= shape[0] and data.shape[2] >= shape[1]: - for y in range(0, data.shape[1] - shape[0]+1, shape[0]): - for x in range(0, data.shape[2] - shape[1]+1, shape[1]): - patches.append(data[:, y:y + shape[0], x:x + shape[1]]) - - return np.concatenate(patches) - elif data.shape[1] == shape[0] and data.shape[2] == shape[1]: - return data - else: - print("'shape' is too big.") - elif n_dims == 3: - if data.shape[1] >= shape[0] and data.shape[2] >= shape[1] and data.shape[3] >= shape[2]: - for z in range(0, data.shape[1] - shape[0]+1, shape[0]): - for y in range(0, data.shape[2] - shape[1]+1, shape[1]): - for x in range(0, data.shape[3] - shape[2]+1, shape[2]): - patches.append(data[:, z:z + shape[0], y:y + shape[1], x:x + shape[2]]) - - return np.concatenate(patches) - elif data.shape[1] == shape[0] and data.shape[2] == shape[1] and data.shape[3] == shape[ - 2]: - return data - else: - print("'shape' is too big.") - else: - print('Not implemented for more than 4 dimensional (ZYXC) data.') - else: - patches = [] - if n_dims == 2: - for i in range(num_patches): - y, x = np.random.randint(0, data.shape[1] - shape[0] + 1), np.random.randint(0, - data.shape[ - 2] - shape[ - 1] + 1) - patches.append(data[0, y:y + shape[0], x:x + shape[1]]) - - if len(patches) > 1: - return np.stack(patches) - else: - return np.array(patches)[np.newaxis] - elif n_dims == 3: - for i in range(num_patches): - z, y, x = np.random.randint(0, data.shape[1] - shape[0] + 1), np.random.randint(0, - data.shape[ - 2] - shape[ - 1] + 1), np.random.randint( - 0, data.shape[3] - shape[2] + 1) - patches.append(data[0, z:z + shape[0], y:y + shape[1], x:x + shape[2]]) - - if len(patches) > 1: - return np.stack(patches) - else: - return np.array(patches)[np.newaxis] - else: - print('Not implemented for more than 4 dimensional (ZYXC) data.') - - def __augment_patches__(self, patches): - if len(patches.shape[1:-1]) == 2: - augmented = np.concatenate((patches, - np.rot90(patches, k=1, axes=(1, 2)), - np.rot90(patches, k=2, axes=(1, 2)), - np.rot90(patches, k=3, axes=(1, 2)))) - elif len(patches.shape[1:-1]) == 3: - augmented = np.concatenate((patches, - np.rot90(patches, k=1, axes=(2, 3)), - np.rot90(patches, k=2, axes=(2, 3)), - np.rot90(patches, k=3, axes=(2, 3)))) - - augmented = np.concatenate((augmented, np.flip(augmented, axis=-2))) - return augmented