diff --git a/blm/blm_cluster.py b/blm/blm_cluster.py index c54b207..c443502 100644 --- a/blm/blm_cluster.py +++ b/blm/blm_cluster.py @@ -13,36 +13,35 @@ def _main(argv=None): - + # -------------------------------------------------------------------------------- # Check inputs # -------------------------------------------------------------------------------- # Create the parser and add argument parser = argparse.ArgumentParser(description="BLM cluster script") parser.add_argument('inputs_yml', type=str, nargs='?', default=os.path.join( - os.path.dirname(os.path.realpath(__file__)), - 'blm_config.yml'), - help='Path to inputs yaml file') + os.path.dirname(os.path.realpath(__file__)), + 'blm_config.yml'), + help='Path to inputs yaml file') # Parse the arguments args = parser.parse_args() - - # If the argument is just a filename without a directory, + + # If the argument is just a filename without a directory, # prepend the current working directory if os.path.dirname(args.inputs_yml) == '': args.inputs_yml = os.path.join(os.getcwd(), args.inputs_yml) inputs_yml = args.inputs_yml - + # Load the inputs yaml file with open(inputs_yml, 'r') as stream: - inputs = yaml.load(stream,Loader=yaml.FullLoader) + inputs = yaml.load(stream, Loader=yaml.FullLoader) - # -------------------------------------------------------------------------------- # Read Output directory, work out number of batches # -------------------------------------------------------------------------------- OutDir = inputs['outdir'] - + # Get number of nodes numNodes = inputs['numNodes'] @@ -112,18 +111,19 @@ def _main(argv=None): # Raise a value error if none of the above else: - raise ValueError('The cluster type, ' + inputs['clusterType'] + ', is not recognized.') + raise ValueError('The cluster type, ' + + inputs['clusterType'] + ', is not recognized.') else: # Raise a value error if the cluster type was not specified raise ValueError('Please specify "clusterType" in the inputs yaml.') - + # -------------------------------------------------------------------------------- # Connect to client # -------------------------------------------------------------------------------- - + # Connect to cluster - client = Client(cluster) + client = Client(cluster) # -------------------------------------------------------------------------------- # Run Setup @@ -140,7 +140,7 @@ def _main(argv=None): # tries to rerun it every time you call the result function again, e.g. after each # stage of the pipeline). del future_s - + # -------------------------------------------------------------------------------- # Run Batch Jobs # -------------------------------------------------------------------------------- @@ -152,12 +152,13 @@ def _main(argv=None): futures = [] # Submit jobs - for i in np.arange(1,nb+1): + for i in np.arange(1, nb+1): # Run the jobNum^{th} job. - future_b = client.submit(compute_product_forms, i, inputs_yml, pure=False) + future_b = client.submit( + compute_product_forms, i, inputs_yml, pure=False) - # Append to list + # Append to list futures.append(future_b) # Completed jobs @@ -181,7 +182,7 @@ def _main(argv=None): fileGroups = np.array_split(np.arange(nb)+1, numNodes) # Check for empty filegroups - fileGroups = [i for i in fileGroups if i.size!=0] + fileGroups = [i for i in fileGroups if i.size != 0] # Number of file groups numFileGroups = len(fileGroups) @@ -190,19 +191,21 @@ def _main(argv=None): futures = [] # Loop through nodes - for node in np.arange(1,numFileGroups + 1): + for node in np.arange(1, numFileGroups + 1): # Run the jobNum^{th} job. - future_c = client.submit(combine_batch_designs, 'XtX', OutDir, fileGroups[node-1], pure=False) + future_c = client.submit( + combine_batch_designs, 'XtX', OutDir, fileGroups[node-1], pure=False) - # Append to list + # Append to list futures.append(future_c) # Loop through nodes - for node in np.arange(1,numNodes + 1): + for node in np.arange(1, numNodes + 1): # Give the i^{th} node the i^{th} partition of the data - future_b = client.submit(combine_batch_masking, nb, node, numNodes, maskJob, inputs_yml, pure=False) + future_b = client.submit( + combine_batch_masking, nb, node, numNodes, maskJob, inputs_yml, pure=False) # Append to list futures.append(future_b) @@ -220,7 +223,8 @@ def _main(argv=None): maskJob = True # The first job does the analysis mask (this is why the 3rd argument is set to true) - future_b_first = client.submit(combine_batch_masking, nb, numNodes + 1, numNodes, maskJob, inputs_yml, pure=False) + future_b_first = client.submit( + combine_batch_masking, nb, numNodes + 1, numNodes, maskJob, inputs_yml, pure=False) res = future_b_first.result() del future_b_first, res @@ -239,7 +243,8 @@ def _main(argv=None): for jobNum in np.arange(pnvb): # Run the jobNum^{th} job. - future_c = client.submit(output_results, jobNum, pnvb, nb, inputs_yml, pure=False) + future_c = client.submit( + output_results, jobNum, pnvb, nb, inputs_yml, pure=False) # Append to list futures.append(future_c) @@ -272,5 +277,6 @@ def _main(argv=None): client.close() client.shutdown() + if __name__ == "__main__": _main(sys.argv[1:]) diff --git a/blm/lib/blm_batch.py b/blm/lib/blm_batch.py index 915db7c..4601bbb 100644 --- a/blm/lib/blm_batch.py +++ b/blm/lib/blm_batch.py @@ -1,35 +1,36 @@ +from blm.lib.fileio import * +import yaml +import os +import sys +import nibabel as nib +from numpy.lib.format import open_memmap +import numpy as np import warnings as w # These warnings are caused by numpy updates and should not be # output. -w.simplefilter(action = 'ignore', category = FutureWarning) -import numpy as np -from numpy.lib.format import open_memmap -import nibabel as nib -import sys -import os -import yaml +w.simplefilter(action='ignore', category=FutureWarning) np.set_printoptions(threshold=sys.maxsize) -from blm.lib.fileio import * + def compute_product_forms(*args): # Change to blm directory - os.chdir(os.path.dirname(os.path.realpath(__file__))) + os.chdir(os.path.dirname(os.path.realpath(__file__))) # Obtain batch number batchNo = args[0] # Work out if which file to look at for inputs - if len(args)==1 or (not args[1]): + if len(args) == 1 or (not args[1]): # Load in inputs - with open(os.path.join(os.getcwd(),'..','blm_config.yml'), 'r') as stream: - inputs = yaml.load(stream,Loader=yaml.FullLoader) + with open(os.path.join(os.getcwd(), '..', 'blm_config.yml'), 'r') as stream: + inputs = yaml.load(stream, Loader=yaml.FullLoader) else: if type(args[1]) is str: # In this case inputs file is first argument with open(os.path.join(args[1]), 'r') as stream: - inputs = yaml.load(stream,Loader=yaml.FullLoader) - else: + inputs = yaml.load(stream, Loader=yaml.FullLoader) + else: # In this case inputs structure is first argument. inputs = args[1] @@ -81,16 +82,16 @@ def compute_product_forms(*args): # Number of non-zero voxles v_am = np.prod(Y0.shape) - # Get the maximum memory a NIFTI could take in storage. - NIFTImem = sys.getsizeof(np.zeros([v_am,1],dtype='uint64')) + # Get the maximum memory a NIFTI could take in storage. + NIFTImem = sys.getsizeof(np.zeros([v_am, 1], dtype='uint64')) # Similar to blksize in SwE, we divide by 8 times the size of a nifti # to work out how many blocks we use. - blksize = int(np.floor(MAXMEM/8/NIFTImem/(p**2))); + blksize = int(np.floor(MAXMEM/8/NIFTImem/(p**2))) # Reduce X to X for this block. X = loadFile(inputs['X']) - X = X[(blksize*(batchNo-1)):min((blksize*batchNo),len(Y_files))] + X = X[(blksize*(batchNo-1)):min((blksize*batchNo), len(Y_files))] # Mask volumes (if they are given) if 'data_mask_files' in inputs: @@ -108,7 +109,7 @@ def compute_product_forms(*args): if len(M_files) == len(Y_files): # In this case we have a mask per Y volume - M_files = M_files[(blksize*(batchNo-1)):min((blksize*batchNo),len(M_files))] + M_files = M_files[(blksize*(batchNo-1)):min((blksize*batchNo), len(M_files))] else: @@ -140,7 +141,7 @@ def compute_product_forms(*args): # Load the file and check it's shape is 3d (as oppose to 4d with a 4th dimension # of 1) M_a = np.asarray(loadFile(inputs['analysis_mask']).dataobj) - M_a = M_a.reshape((M_a.shape[0],M_a.shape[1],M_a.shape[2])) + M_a = M_a.reshape((M_a.shape[0], M_a.shape[1], M_a.shape[2])) else: @@ -148,8 +149,8 @@ def compute_product_forms(*args): M_a = None # Reduce Y_files to only Y files for this block - Y_files = Y_files[(blksize*(batchNo-1)):min((blksize*batchNo),len(Y_files))] - + Y_files = Y_files[(blksize*(batchNo-1)):min((blksize*batchNo), len(Y_files))] + # Verify input verifyInput(Y_files, M_files, Y0) @@ -160,48 +161,50 @@ def compute_product_forms(*args): # Work out voxel specific design MX = applyMask(X, M) - # Get X'Y and Y'Y. + # Get X'Y and Y'Y. # ------------------------------------------------------------------ # Developer note: For these product matrices we do not need to worry # about missing rows in X. This is as the corresponding elements in - # Y should already be set to 0 and, as such, won't have any affect + # Y should already be set to 0 and, as such, won't have any affect # on these products. # ------------------------------------------------------------------ - # We are careful how we compute X'Y and Y'Y, in case either p is + # We are careful how we compute X'Y and Y'Y, in case either p is # large. We save these "chunk by chunk" as memory map objects just # in case they don't fit in working memory (this is only usually an # issue for very large designs). - memorySafeAtB(X.reshape(1,X.shape[0],X.shape[1]),Y,MAXMEM,os.path.join(OutDir,"tmp","XtY.npy")) - memorySafeAtB(Y,Y,MAXMEM,os.path.join(OutDir,"tmp","YtY.npy")) + memorySafeAtB(X.reshape(1, X.shape[0], X.shape[1]), Y, MAXMEM, os.path.join( + OutDir, "tmp", "XtY.npy")) + memorySafeAtB(Y, Y, MAXMEM, os.path.join(OutDir, "tmp", "YtY.npy")) # In a spatially varying design XtX has dimensions n by p by p. We # reshape to n by p^2 so that we can save as a csv. - XtX = MX.transpose(0,2,1) @ MX + XtX = MX.transpose(0, 2, 1) @ MX XtX = XtX.reshape([XtX.shape[0], XtX.shape[1]*XtX.shape[2]]) # Record product matrix X'X. - np.save(os.path.join(OutDir,"tmp","XtX" + str(batchNo)), - XtX) + np.save(os.path.join(OutDir, "tmp", "XtX" + str(batchNo)), + XtX) # Get map of number of observations at voxel. n_sv = nib.Nifti1Image(n_sv, Y0.affine, header=Y0.header) - nib.save(n_sv, os.path.join(OutDir,'tmp', - 'blm_vox_n_batch'+ str(batchNo) + '.nii')) + nib.save(n_sv, os.path.join(OutDir, 'tmp', + 'blm_vox_n_batch' + str(batchNo) + '.nii')) # Get Mmap, indicating which design each voxel must use for analysis, - # using an integer representing the order in which X'X, Z'X and Z'Z + # using an integer representing the order in which X'X, Z'X and Z'Z # appear in the `XtX.npy`, `ZtX.npy` and `ZtZ.npy` files respectively. Mmap = nib.Nifti1Image(Mmap, Y0.affine, header=Y0.header) - nib.save(Mmap, os.path.join(OutDir,'tmp', - 'blm_vox_uniqueM_batch'+ str(batchNo) + '.nii')) + nib.save(Mmap, os.path.join(OutDir, 'tmp', + 'blm_vox_uniqueM_batch' + str(batchNo) + '.nii')) w.resetwarnings() + def verifyInput(Y_files, M_files, Y0): # Obtain information about zero-th scan @@ -255,11 +258,11 @@ def verifyInput(Y_files, M_files, Y0): # ============================================================================ -# +# # The below function reads in the input files and thresholds and returns; Y # (as a numpy array), the overall mask (as a 3D numpy array), the spatially -# varying number of observationss (as a 3D numpy array), the array Y!=0 -# (resized appropriately for later computation) and a uniqueness map +# varying number of observationss (as a 3D numpy array), the array Y!=0 +# (resized appropriately for later computation) and a uniqueness map # representing which voxel has which design. # # ---------------------------------------------------------------------------- @@ -292,7 +295,7 @@ def obtainY(Y_files, M_files, M_t, M_a): # Load in one nifti to check NIFTI size Y0 = loadFile(Y_files[0]) d = np.asarray(Y0.dataobj, dtype=np.float64) - + # Get number of voxels. v = np.prod(d.shape) @@ -319,47 +322,47 @@ def obtainY(Y_files, M_files, M_t, M_a): # Mask Y if necesary if M_files: - + # Apply mask - M_indiv = np.asarray(loadFile(M_files[i]).dataobj) + M_indiv = np.asarray(loadFile(M_files[i]).dataobj) d = np.multiply( np.asarray(Y_indiv.dataobj, dtype=np.float64), M_indiv) - else: - #Just load in Y + else: + # Just load in Y d = np.asarray(Y_indiv.dataobj, dtype=np.float64) # If theres an initial threshold for the data apply it. if M_t is not None: - d[d0)[0]] = 1 - + Mask_am[np.where(np.count_nonzero(Y, axis=0) > 0)[0]] = 1 + # Apply full mask to Y - Y_fm = Y[:, np.where(np.count_nonzero(Y, axis=0)>0)[0]] + Y_fm = Y[:, np.where(np.count_nonzero(Y, axis=0) > 0)[0]] # Work out the mask. - M = (Y_fm!=0) + M = (Y_fm != 0) # Number of voxels in analysis mask if M_a is not None: @@ -383,18 +386,18 @@ def obtainY(Y_files, M_files, M_t, M_a): # the original ordering, as unique_id_nifti is now based on said # ordering) _, idx = np.unique(M, axis=1, return_index=True) - M = M[:,np.sort(idx)] + M = M[:, np.sort(idx)] # Reshape Y - Y = Y.reshape(Y.shape[0], Y.shape[1], 1).transpose((1,0,2)) + Y = Y.reshape(Y.shape[0], Y.shape[1], 1).transpose((1, 0, 2)) # Return results return Y, n_sv, M, Mmap # ============================================================================ -# -# The below function takes in a (2D) array, X, and applies a mask to it, +# +# The below function takes in a (2D) array, X, and applies a mask to it, # resulting in a 3D array, MX, where whenever data was missing at voxel v for # observations i1, i2,... etc, MX[v,:,:] is X but with rows i1, i2,... i3 # replaced with zeros. @@ -418,16 +421,16 @@ def obtainY(Y_files, M_files, M_t, M_a): # - `MX`: The (3D) "Masked" version of X. # # ============================================================================ -def applyMask(X,M): +def applyMask(X, M): # Get M in a form where each voxel's mask is mutliplied # by X M = M.transpose().reshape([M.shape[1], 1, M.shape[0]]) - Xt=X.transpose() + Xt = X.transpose() # Obtain X for each voxel MXt = np.multiply(M, Xt) - MX = MXt.transpose(0,2,1) + MX = MXt.transpose(0, 2, 1) return MX @@ -436,7 +439,7 @@ def applyMask(X,M): # # Given two 3D numpy arrays, A and B, of shape (1, k1, k2) and (v, k1, k3) # respectively, the below function calculates the (v, k2, k3) matrix A'B -# and outputs it to a file in a "memory safe" way, ensuring that the +# and outputs it to a file in a "memory safe" way, ensuring that the # (v, k2, k3) matrix is calculated and output in managable chunks, one at a # time. # @@ -455,14 +458,14 @@ def applyMask(X,M): # - `filename`: The name of the file. # # ============================================================================ -def memorySafeAtB(A,B,MAXMEM,filename): +def memorySafeAtB(A, B, MAXMEM, filename): # Check if file is in use fileLocked = True while fileLocked: try: # Create lock file, so other jobs know we are writing to this file - os.open(filename + ".lock", os.O_CREAT|os.O_EXCL|os.O_RDWR) + os.open(filename + ".lock", os.O_CREAT | os.O_EXCL | os.O_RDWR) fileLocked = False except FileExistsError: fileLocked = True @@ -475,7 +478,8 @@ def memorySafeAtB(A,B,MAXMEM,filename): if not os.path.isfile(filename): # Create a memory-mapped .npy file with the dimensions and dtype we want - M = open_memmap(filename, mode='w+', dtype='float64', shape=(v,pORone)) + M = open_memmap(filename, mode='w+', + dtype='float64', shape=(v, pORone)) # Work out the number of voxels we can save at a time. # (8 bytes per numpy float exponent multiplied by 10 @@ -483,18 +487,20 @@ def memorySafeAtB(A,B,MAXMEM,filename): vPerBlock = MAXMEM/(10*8*pORone) # Work out the indices for each group of voxels - voxelGroups = np.array_split(np.arange(v, dtype='int32'), v//vPerBlock+1) + voxelGroups = np.array_split( + np.arange(v, dtype='int32'), v//vPerBlock+1) # Loop through each group of voxels saving A'B for those voxels for vb in range(int(v//vPerBlock+1)): - M[voxelGroups[vb],:]=(A.transpose(0,2,1) @ B[voxelGroups[vb],:,:]).reshape(len(voxelGroups[vb]),pORone) - + M[voxelGroups[vb], :] = (A.transpose( + 0, 2, 1) @ B[voxelGroups[vb], :, :]).reshape(len(voxelGroups[vb]), pORone) + # Otherwise we add to the memory map that does exist else: # Load in the file but in memory map mode - M = np.load(filename,mmap_mode='r+') - M = M.reshape((v,pORone)) + M = np.load(filename, mmap_mode='r+') + M = M.reshape((v, pORone)) # Work out the number of voxels we can save at a time. # (8 bytes per numpy float exponent multiplied by 10 @@ -502,11 +508,14 @@ def memorySafeAtB(A,B,MAXMEM,filename): vPerBlock = MAXMEM/(10*8*pORone) # Work out the indices for each group of voxels - voxelGroups = np.array_split(np.arange(v, dtype='int32'), v//vPerBlock+1) - + voxelGroups = np.array_split( + np.arange(v, dtype='int32'), v//vPerBlock+1) + # Loop through each group of voxels saving A'B for those voxels for vb in range(int(v//vPerBlock+1)): - M[voxelGroups[vb],:]=M[voxelGroups[vb],:]+(A.transpose(0,2,1) @ B[voxelGroups[vb],:,:]).reshape(len(voxelGroups[vb]),pORone) + M[voxelGroups[vb], :] = M[voxelGroups[vb], :] + \ + (A.transpose(0, 2, 1) @ + B[voxelGroups[vb], :, :]).reshape(len(voxelGroups[vb]), pORone) # Delete M from memory (important!) del M @@ -515,5 +524,6 @@ def memorySafeAtB(A,B,MAXMEM,filename): # file os.remove(filename + ".lock") + if __name__ == "__main__": main() diff --git a/blm/lib/blm_concat.py b/blm/lib/blm_concat.py index b5e838b..54e710f 100644 --- a/blm/lib/blm_concat.py +++ b/blm/lib/blm_concat.py @@ -1,14 +1,14 @@ +from blm.lib.fileio import * +import yaml +import os +import sys +import nibabel as nib +import numpy as np import warnings as w # This warning is caused by numpy updates and should # be ignored for now. -w.simplefilter(action = 'ignore', category = FutureWarning) -import numpy as np -import nibabel as nib -import sys -import os -import yaml +w.simplefilter(action='ignore', category=FutureWarning) np.set_printoptions(threshold=sys.maxsize) -from blm.lib.fileio import * # Developer notes: # -------------------------------------------------------------------------- @@ -19,18 +19,19 @@ # decided by the user specified thresholds. These voxels will typically # be on the edge of the brain and look like a "ring" around the brain, # hence "_r" for ring. -# -# _i - This means that this is an array of values corresponding to voxels +# +# _i - This means that this is an array of values corresponding to voxels # which are present in all n studies. These will usually look like -# a smaller mask place inside the whole study mask. Hence "_i" for +# a smaller mask place inside the whole study mask. Hence "_i" for # inner. # # _sv - This means this variable is spatially varying (There is a reading -# per voxel). +# per voxel). # # -------------------------------------------------------------------------- # Author: Tom Maullin (04/02/2019) + def combine_batch_masking(*args): # Work out number of batchs @@ -46,19 +47,19 @@ def combine_batch_masking(*args): # ---------------------------------------------------------------------- # Check inputs # ---------------------------------------------------------------------- - if len(args)==4 or (not args[4]): + if len(args) == 4 or (not args[4]): # Load in inputs with open(os.path.join( - os.path.dirname(os.path.realpath(__file__)), - '..', - 'blm_config.yml'), 'r') as stream: - inputs = yaml.load(stream,Loader=yaml.FullLoader) + os.path.dirname(os.path.realpath(__file__)), + '..', + 'blm_config.yml'), 'r') as stream: + inputs = yaml.load(stream, Loader=yaml.FullLoader) else: if type(args[4]) is str: # In this case inputs file is first argument with open(os.path.join(args[4]), 'r') as stream: - inputs = yaml.load(stream,Loader=yaml.FullLoader) - else: + inputs = yaml.load(stream, Loader=yaml.FullLoader) + else: # In this case inputs structure is first argument. inputs = args[4] @@ -72,7 +73,7 @@ def combine_batch_masking(*args): L1 = np.array(L1) p = L1.shape[0] del L1 - + # Read in the nifti size and work out number of voxels. with open(inputs['Y_files']) as a: nifti_path = a.readline().replace('\n', '') @@ -81,12 +82,11 @@ def combine_batch_masking(*args): NIFTIsize = nifti.shape v = int(np.prod(NIFTIsize)) - # Number of observations X = loadFile(inputs['X']) n = X.shape[0] - # Check if the maximum memory is saved. + # Check if the maximum memory is saved. if 'MAXMEM' in inputs: MAXMEM = eval(inputs['MAXMEM']) else: @@ -103,35 +103,35 @@ def combine_batch_masking(*args): # observations) # -------------------------------------------------------------------------------- - # Number of images to look at for each node + # Number of images to look at for each node n_images = n_b//numNodes+1 if ((1 + node*n_images) >= (n_b + 1)) and ((1+(node-1)*n_images) <= (n_b + 1)): - + # Work out loop range - loopRange = range(1+(node-1)*n_images,(n_b+1)) - + loopRange = range(1+(node-1)*n_images, (n_b+1)) + # This is the last node lastNode = True elif ((1+(node-1)*n_images) <= (n_b + 1)): - + # Work out loop range - loopRange = range(1+(node-1)*n_images,1+node*n_images) - + loopRange = range(1+(node-1)*n_images, 1+node*n_images) + # This is not the last node lastNode = False else: # Empty loop range - loopRange = range(0,0) + loopRange = range(0, 0) # This is not the last node (this one's redundant) lastNode = False # Empty loop - emptyLoop = (len(loopRange)==0) + emptyLoop = (len(loopRange) == 0) # Check if this is the first image we're looking at firstImage = True @@ -142,9 +142,8 @@ def combine_batch_masking(*args): if firstImage: # Read in n (spatially varying) - n_sv = np.asarray(loadFile(os.path.join(OutDir,"tmp", - "blm_vox_n_batch" + str(batchNo) + ".nii")).dataobj, dtype=np.int64) - + n_sv = np.asarray(loadFile(os.path.join(OutDir, "tmp", + "blm_vox_n_batch" + str(batchNo) + ".nii")).dataobj, dtype=np.int64) # No longer looking at the first image firstImage = False @@ -152,17 +151,17 @@ def combine_batch_masking(*args): else: # Obtain the full nmap. - n_sv = n_sv + np.asarray(loadFile(os.path.join(OutDir,"tmp", - "blm_vox_n_batch" + str(batchNo) + ".nii")).dataobj, dtype=np.int64) + n_sv = n_sv + np.asarray(loadFile(os.path.join(OutDir, "tmp", + "blm_vox_n_batch" + str(batchNo) + ".nii")).dataobj, dtype=np.int64) # Remove file we just read - #os.remove(os.path.join(OutDir,"tmp", "blm_vox_n_batch" + str(batchNo) + ".nii")) + # os.remove(os.path.join(OutDir,"tmp", "blm_vox_n_batch" + str(batchNo) + ".nii")) # Filename for nmap - n_fname = os.path.join(OutDir,'blm_vox_n.nii') + n_fname = os.path.join(OutDir, 'blm_vox_n.nii') # Filename for dfmap - df_fname = os.path.join(OutDir,'blm_vox_edf.nii') + df_fname = os.path.join(OutDir, 'blm_vox_edf.nii') if not emptyLoop: @@ -171,7 +170,8 @@ def combine_batch_masking(*args): while fileLocked: try: # Create lock file, so other jobs know we are writing to this file - f=os.open(os.path.join(OutDir,"config_write.lock"), os.O_CREAT|os.O_EXCL|os.O_RDWR) + f = os.open(os.path.join(OutDir, "config_write.lock"), + os.O_CREAT | os.O_EXCL | os.O_RDWR) fileLocked = False except FileExistsError: fileLocked = True @@ -182,7 +182,8 @@ def combine_batch_masking(*args): if os.path.exists(df_fname): # df_sv = n_sv + loadFile(df_fname).get_fdata() - df_sv = n_sv + np.asarray(loadFile(df_fname).dataobj, dtype=np.int64) + df_sv = n_sv + \ + np.asarray(loadFile(df_fname).dataobj, dtype=np.int64) os.remove(df_fname) else: df_sv = np.array(n_sv) @@ -206,16 +207,16 @@ def combine_batch_masking(*args): else: dfmap = nib.Nifti1Image(df_sv-p, nifti.affine, - header=nifti.header) + header=nifti.header) nib.save(dfmap, df_fname) del dfmap # Delete lock file, so other jobs know they can now write to the # file - os.remove(os.path.join(OutDir,"config_write.lock")) + os.remove(os.path.join(OutDir, "config_write.lock")) os.close(f) - + # -------------------------------------------------------------------------------- # Create Mask # -------------------------------------------------------------------------------- @@ -223,23 +224,25 @@ def combine_batch_masking(*args): if maskJob: # Read in degrees of freedom - df_sv = np.asarray(loadFile(os.path.join(OutDir,'blm_vox_edf.nii')).dataobj, dtype=np.int64) + df_sv = np.asarray(loadFile(os.path.join( + OutDir, 'blm_vox_edf.nii')).dataobj, dtype=np.int64) # Remove non-zero voxels - df_sv = np.maximum(df_sv,0) + df_sv = np.maximum(df_sv, 0) # Write to file dfmap = nib.Nifti1Image(df_sv, nifti.affine, - header=nifti.header) + header=nifti.header) nib.save(dfmap, df_fname) del dfmap # Read in n (spatially varying) - n_sv = np.asarray(loadFile(os.path.join(OutDir,'blm_vox_n.nii')).dataobj, dtype=np.int64) + n_sv = np.asarray(loadFile(os.path.join( + OutDir, 'blm_vox_n.nii')).dataobj, dtype=np.int64) Mask = np.ones([v, 1]) - n_sv = n_sv.reshape(v, 1) + n_sv = n_sv.reshape(v, 1) # Check for user specified missingness thresholds. if 'Missingness' in inputs: @@ -266,7 +269,7 @@ def combine_batch_masking(*args): '0 < ' + str(rmThresh) + ' < 1 violation') # Mask based on threshold. - Mask[n_sv0 and <0 to avoid underflow) pc_r = np.zeros(np.shape(tStatc_r)) - pc_r[tStatc_r < 0] = -np.log10(1-stats.t.cdf(tStatc_r[tStatc_r < 0], df_r[tStatc_r < 0])) - pc_r[tStatc_r >= 0] = -np.log10(stats.t.cdf(-tStatc_r[tStatc_r >= 0], df_r[tStatc_r >= 0])) + pc_r[tStatc_r < 0] = - \ + np.log10( + 1-stats.t.cdf(tStatc_r[tStatc_r < 0], df_r[tStatc_r < 0])) + pc_r[tStatc_r >= 0] = - \ + np.log10( + stats.t.cdf(-tStatc_r[tStatc_r >= 0], df_r[tStatc_r >= 0])) # Remove infs if "minlog" in inputs: - pc_r[np.logical_and(np.isinf(pc_r), pc_r<0)]=inputs['minlog'] + pc_r[np.logical_and( + np.isinf(pc_r), pc_r < 0)] = inputs['minlog'] else: - pc_r[np.logical_and(np.isinf(pc_r), pc_r<0)]=-323.3062153431158 + pc_r[np.logical_and( + np.isinf(pc_r), pc_r < 0)] = -323.3062153431158 # Add conTlp to lists fnames.append(os.path.join(OutDir, 'blm_vox_conTlp.nii')) @@ -670,7 +690,7 @@ def output_results(*args): # Calculate T stat tStatc_i = Lbeta_i.reshape(v_i)/np.sqrt(covLbeta_i) - + # Add conT to lists fnames.append(os.path.join(OutDir, 'blm_vox_conT.nii')) blocks.append(np.array(tStatc_i)) @@ -682,14 +702,18 @@ def output_results(*args): # Calculate p (seperately for >0 and <0 to avoid underflow) pc_i = np.zeros(np.shape(tStatc_i)) - pc_i[tStatc_i < 0] = -np.log10(1-stats.t.cdf(tStatc_i[tStatc_i < 0], df_i)) - pc_i[tStatc_i >= 0] = -np.log10(stats.t.cdf(-tStatc_i[tStatc_i >= 0], df_i)) + pc_i[tStatc_i < 0] = - \ + np.log10(1-stats.t.cdf(tStatc_i[tStatc_i < 0], df_i)) + pc_i[tStatc_i >= 0] = - \ + np.log10(stats.t.cdf(-tStatc_i[tStatc_i >= 0], df_i)) # Remove infs if "minlog" in inputs: - pc_i[np.logical_and(np.isinf(pc_i), pc_i<0)]=inputs['minlog'] + pc_i[np.logical_and( + np.isinf(pc_i), pc_i < 0)] = inputs['minlog'] else: - pc_i[np.logical_and(np.isinf(pc_i), pc_i<0)]=-323.3062153431158 + pc_i[np.logical_and( + np.isinf(pc_i), pc_i < 0)] = -323.3062153431158 # Add conTlp to lists fnames.append(os.path.join(OutDir, 'blm_vox_conTlp.nii')) @@ -707,7 +731,7 @@ def output_results(*args): # Get dimension of Lvector q = Lvec.shape[0] - + # Calculate L'(X'X)^(-1)L # (Note L is read in the other way around for F) if v_r: @@ -718,13 +742,15 @@ def output_results(*args): # Lbeta needs to be nvox by 1 by npar for stacked # multiply. - Lbetat_r = Lbeta_r.transpose(0,2,1) + Lbetat_r = Lbeta_r.transpose(0, 2, 1) # Calculate masked (L'(X'X)^(-1)L)^(-1) values for ring - iLvectiXtXLvec_r = np.linalg.pinv(LvectiXtXLvec_r).reshape([v_r, q*q]) + iLvectiXtXLvec_r = np.linalg.pinv( + LvectiXtXLvec_r).reshape([v_r, q*q]) # Calculate the numerator of the F statistic for the ring - Fnumerator_r = np.matmul(Lbetat_r, np.linalg.solve(LvectiXtXLvec_r, Lbeta_r)) + Fnumerator_r = np.matmul( + Lbetat_r, np.linalg.solve(LvectiXtXLvec_r, Lbeta_r)) Fnumerator_r = Fnumerator_r.reshape(Fnumerator_r.shape[0]) # Calculate the denominator of the F statistic for ring @@ -743,7 +769,7 @@ def output_results(*args): hdrs.append(nifti.header) # Degrees of freedom - df_r = n_sv[R_inds,:] - p + df_r = n_sv[R_inds, :] - p df_r = df_r.reshape(df_r.shape[0]) # Work out p for this contrast @@ -751,9 +777,11 @@ def output_results(*args): # Remove infs if "minlog" in inputs: - pc_r[np.logical_and(np.isinf(pc_r), pc_r<0)]=inputs['minlog'] + pc_r[np.logical_and( + np.isinf(pc_r), pc_r < 0)] = inputs['minlog'] else: - pc_r[np.logical_and(np.isinf(pc_r), pc_r<0)]=-323.3062153431158 + pc_r[np.logical_and( + np.isinf(pc_r), pc_r < 0)] = -323.3062153431158 # Add conFlp to lists fnames.append(os.path.join(OutDir, 'blm_vox_conFlp.nii')) @@ -787,13 +815,15 @@ def output_results(*args): # Lbeta needs to be nvox by 1 by npar for stacked # multiply. - Lbetat_i = Lbeta_i.transpose(0,2,1) + Lbetat_i = Lbeta_i.transpose(0, 2, 1) # Calculate masked (L'(X'X)^(-1)L)^(-1) values for inner - iLvectiXtXLvec_i = np.linalg.pinv(LvectiXtXLvec_i).reshape([1, q*q]) + iLvectiXtXLvec_i = np.linalg.pinv( + LvectiXtXLvec_i).reshape([1, q*q]) # Calculate the numerator of the F statistic for the ring - Fnumerator_i = np.matmul(Lbetat_i, np.linalg.solve(LvectiXtXLvec_i, Lbeta_i)) + Fnumerator_i = np.matmul( + Lbetat_i, np.linalg.solve(LvectiXtXLvec_i, Lbeta_i)) Fnumerator_i = Fnumerator_i.reshape(Fnumerator_i.shape[0]) # Calculate the denominator of the F statistic for inner @@ -812,7 +842,7 @@ def output_results(*args): hdrs.append(nifti.header) # Degrees of freedom - df_r = n_sv[R_inds,:] - p + df_r = n_sv[R_inds, :] - p df_r = df_r.reshape(df_r.shape[0]) # Work out p for this contrast @@ -820,9 +850,11 @@ def output_results(*args): # Remove infs if "minlog" in inputs: - pc_i[np.logical_and(np.isinf(pc_i), pc_i<0)]=inputs['minlog'] + pc_i[np.logical_and( + np.isinf(pc_i), pc_i < 0)] = inputs['minlog'] else: - pc_i[np.logical_and(np.isinf(pc_i), pc_i<0)]=-323.3062153431158 + pc_i[np.logical_and( + np.isinf(pc_i), pc_i < 0)] = -323.3062153431158 # Add conFlp to lists fnames.append(os.path.join(OutDir, 'blm_vox_conFlp.nii')) @@ -847,8 +879,8 @@ def output_results(*args): # Add the blocks to the niftis if blocks: - addBlocksToNiftis(fnames, blocks, blockIndexes,dims,volInds,affs,hdrs) - + addBlocksToNiftis(fnames, blocks, blockIndexes, + dims, volInds, affs, hdrs) w.resetwarnings() @@ -869,15 +901,15 @@ def blm_inverse(A, ouflow=False): if ouflow: # Make D to be filled with diagonal elements - D = np.broadcast_to(np.eye(d_r), (n_r,d_r,d_r)).copy() + D = np.broadcast_to(np.eye(d_r), (n_r, d_r, d_r)).copy() # Obtain 1/sqrt(diagA) - diagA = 1/np.sqrt(A.diagonal(0,1,2)) + diagA = 1/np.sqrt(A.diagonal(0, 1, 2)) diagA = diagA.reshape(n_r, d_r) # Make this back into diagonal matrices diaginds = np.diag_indices(d_r) - D[:, diaginds[0], diaginds[1]] = diagA + D[:, diaginds[0], diaginds[1]] = diagA # Precondition A. A = np.matmul(np.matmul(D, A), D) @@ -886,20 +918,21 @@ def blm_inverse(A, ouflow=False): if np.ndim(A) == 1: iA = 1/A else: - iA = np.linalg.solve(A, np.eye(d_r).reshape(1,d_r,d_r)) + iA = np.linalg.solve(A, np.eye(d_r).reshape(1, d_r, d_r)) if ouflow: # Undo preconditioning. iA = np.matmul(np.matmul(D, iA), D) - return(iA) + return (iA) # This function calculates the determinant of matrix A/ # stack of matrices A, with special handling accounting -# for over/under flow. -def blm_det(A): +# for over/under flow. + +def blm_det(A): # Precondition A. # Work out number of matrices and dimension of @@ -909,15 +942,15 @@ def blm_det(A): d_r = A.shape[1] # Make D to be filled with diagonal elements - D = np.broadcast_to(np.eye(d_r), (n_r,d_r,d_r)).copy() + D = np.broadcast_to(np.eye(d_r), (n_r, d_r, d_r)).copy() # Obtain 1/sqrt(diagA) - diagA = 1/np.sqrt(A.diagonal(0,1,2)) + diagA = 1/np.sqrt(A.diagonal(0, 1, 2)) diagA = diagA.reshape(n_r, d_r) # Make this back into diagonal matrices diaginds = np.diag_indices(d_r) - D[:, diaginds[0], diaginds[1]] = diagA + D[:, diaginds[0], diaginds[1]] = diagA # Calculate DAD. DAD = np.matmul(np.matmul(D, A), D) @@ -925,19 +958,18 @@ def blm_det(A): # Calculate determinants. detDAD = np.linalg.det(DAD) detDD = np.prod(diagA, axis=1) - + # Calculate determinant of A detA = detDAD/detDD - return(detA) - + return (detA) # ============================================================================ # -# For a specified set of voxels, the below function reads in the unique -# product matrices A'B from each batch job, works out which voxel had which -# product matrix, sums the batch product matrices and returns the sum, i.e. +# For a specified set of voxels, the below function reads in the unique +# product matrices A'B from each batch job, works out which voxel had which +# product matrix, sums the batch product matrices and returns the sum, i.e. # the product matrix for the entire analysis, at each voxel. # # Note: This function is only really designed for the product matrix X'X in @@ -950,10 +982,10 @@ def blm_det(A): # # ---------------------------------------------------------------------------- # -# - `AtBstr`: A string representing which product matrix we are looking at. +# - `AtBstr`: A string representing which product matrix we are looking at. # e.g. "XtX" for X'X. # - `OutDir`: Output directory. -# - `vinds`: Voxel indices; (flattened) indices representing which voxels we +# - `vinds`: Voxel indices; (flattened) indices representing which voxels we # are interested in looking at. # - `n_b`: The number of batches run during the batch stage. # - `sv`: Spatial varying boolean value. This tells us if we expect the @@ -966,8 +998,8 @@ def blm_det(A): # # ---------------------------------------------------------------------------- # -# - `AtB`: The product matrix (flattened); If we had wanted X'X (which is -# dimension p by p) for v voxels, the output would here would have +# - `AtB`: The product matrix (flattened); If we had wanted X'X (which is +# dimension p by p) for v voxels, the output would here would have # dimension (1 by p**2). If sv was True, we will have one matrix for # each voxel. If sv was false we will have one matrix for all voxels. # @@ -975,13 +1007,13 @@ def blm_det(A): def readAndSumUniqueAtB(AtBstr, OutDir, vinds, n_b, sv, jobNum): # Work out the uniqueness mask for the spatially varying designs - uniquenessMask = loadFile(os.path.join(OutDir,"tmp", - "blm_vox_uniqueM_batch1.nii")).get_fdata() + uniquenessMask = loadFile(os.path.join(OutDir, "tmp", + "blm_vox_uniqueM_batch1.nii")).get_fdata() v = np.prod(uniquenessMask.shape) vcurrent = np.prod(vinds.shape) - uniquenessMask=uniquenessMask.reshape(v) + uniquenessMask = uniquenessMask.reshape(v) # Work out how many unique matrices there were maxM = np.int32(np.amax(uniquenessMask)) @@ -991,80 +1023,80 @@ def readAndSumUniqueAtB(AtBstr, OutDir, vinds, n_b, sv, jobNum): uniquenessMask = uniquenessMask[vinds] else: # Work out the uniqueness mask value inside the inner part of the brain - uniquenessMask = uniquenessMask[vinds[0]] - + uniquenessMask = uniquenessMask[vinds[0]] # read in XtX AtB_batch_unique = np.load( - os.path.join(OutDir,"tmp",AtBstr+"1.npy")) + os.path.join(OutDir, "tmp", AtBstr+"1.npy")) # Make zeros for outer ring of brain XtX (remember A'B is still flattened) if sv: AtB = np.zeros((vcurrent, AtB_batch_unique.shape[1])) # Fill with unique maskings - for m in range(1,maxM+1): + for m in range(1, maxM+1): if sv: # Work out X'X for the ring - AtB[np.where(uniquenessMask==m),:] = AtB_batch_unique[(m-1),:] + AtB[np.where(uniquenessMask == m), :] = AtB_batch_unique[(m-1), :] # Work out X'X for the inner else: if uniquenessMask == m: - AtB = AtB_batch_unique[(m-1),:] + AtB = AtB_batch_unique[(m-1), :] # Cycle through batches and add together results. - for batchNo in range(2,(n_b+1)): + for batchNo in range(2, (n_b+1)): # Read in uniqueness Mask file - uniquenessMask = loadFile(os.path.join(OutDir,"tmp", - "blm_vox_uniqueM_batch" + str(batchNo) + ".nii")).get_fdata().reshape(v) + uniquenessMask = loadFile(os.path.join(OutDir, "tmp", + "blm_vox_uniqueM_batch" + str(batchNo) + ".nii")).get_fdata().reshape(v) maxM = np.int32(np.amax(uniquenessMask)) if sv: # Work out the uniqueness mask inside the ring around the brain - uniquenessMask = uniquenessMask[vinds] + uniquenessMask = uniquenessMask[vinds] else: # Work out the uniqueness mask value inside the inner part of the brain - uniquenessMask = uniquenessMask[vinds[0]] - + uniquenessMask = uniquenessMask[vinds[0]] # read in XtX AtB_batch_unique = np.load( - os.path.join(OutDir,"tmp",AtBstr + str(batchNo) + ".npy")) + os.path.join(OutDir, "tmp", AtBstr + str(batchNo) + ".npy")) # Make zeros for whole nifti XtX if sv: AtB_batch = np.zeros((vcurrent, AtB_batch_unique.shape[1])) # Fill with unique maskings - for m in range(1,maxM+1): + for m in range(1, maxM+1): if sv: - AtB_batch[np.where(uniquenessMask==m),:] = AtB_batch_unique[(m-1),:] + AtB_batch[np.where(uniquenessMask == m), + :] = AtB_batch_unique[(m-1), :] else: # Work out X'X for the inner if uniquenessMask == m: - AtB_batch = AtB_batch_unique[(m-1),:] + AtB_batch = AtB_batch_unique[(m-1), :] # Add to running total AtB = AtB + AtB_batch - return(AtB) + return (AtB) + def readUniqueAtB(AtBstr, OutDir, vinds, sv, uniquenessMask): # # Work out the uniqueness mask for the spatially varying designs - # uniquenessMask = loadFile(os.path.join(OutDir,"tmp", + # uniquenessMask = loadFile(os.path.join(OutDir,"tmp", # "blm_vox_uniqueM.nii")).get_fdata() v = np.prod(uniquenessMask.shape) vcurrent = np.prod(vinds.shape) - uniquenessMask=uniquenessMask.reshape(v) + uniquenessMask = uniquenessMask.reshape(v) # Work out how many unique matrices there were maxM = np.int32(np.amax(uniquenessMask)) @@ -1074,34 +1106,33 @@ def readUniqueAtB(AtBstr, OutDir, vinds, sv, uniquenessMask): uniquenessMask = uniquenessMask[vinds] else: # Work out the uniqueness mask value inside the inner part of the brain - uniquenessMask = uniquenessMask[vinds[0]] + uniquenessMask = uniquenessMask[vinds[0]] # Get the unique values for these voxels uniqueVals = np.unique(uniquenessMask).astype(int) # Read in the unique lines - AtB_unique = readLinesFromNPY(os.path.join(OutDir,"tmp",AtBstr+".npy"), uniqueVals) + AtB_unique = readLinesFromNPY(os.path.join( + OutDir, "tmp", AtBstr+".npy"), uniqueVals) # Make zeros for outer ring of brain XtX (remember A'B is still flattened) if sv: AtB = np.zeros((vcurrent, AtB_unique.shape[1])) - # Fill with unique maskings for i, m in enumerate(uniqueVals): if sv: # Work out X'X for the ring - AtB[np.where(uniquenessMask==m),:] = AtB_unique[i,:], + AtB[np.where(uniquenessMask == m), :] = AtB_unique[i, :], # Work out X'X for the inner else: if uniquenessMask == m: - AtB = AtB_unique[i,:] - - return(AtB) + AtB = AtB_unique[i, :] + return (AtB) if __name__ == "__rain__": - main() \ No newline at end of file + main() diff --git a/blm/lib/blm_setup.py b/blm/lib/blm_setup.py index 5bef7ea..2aae6a3 100644 --- a/blm/lib/blm_setup.py +++ b/blm/lib/blm_setup.py @@ -1,13 +1,13 @@ +from blm.lib.fileio import * +import yaml +import shutil +import os +import sys +import numpy as np import warnings as w # These warnings are caused by numpy updates and should not be # output. -w.simplefilter(action = 'ignore', category = FutureWarning) -import numpy as np -import sys -import os -import shutil -import yaml -from blm.lib.fileio import * +w.simplefilter(action='ignore', category=FutureWarning) # Main takes in two arguments at most: @@ -22,11 +22,11 @@ def setup(*args): pwd = os.getcwd() os.chdir(os.path.dirname(os.path.realpath(__file__))) - if len(args)==0 or (not args[0]): + if len(args) == 0 or (not args[0]): # Load in inputs - ipath = os.path.abspath(os.path.join('..','blm_config.yml')) + ipath = os.path.abspath(os.path.join('..', 'blm_config.yml')) with open(ipath, 'r') as stream: - inputs = yaml.load(stream,Loader=yaml.FullLoader) + inputs = yaml.load(stream, Loader=yaml.FullLoader) retnb = False else: if type(args[0]) is str: @@ -36,21 +36,21 @@ def setup(*args): ipath = os.path.abspath(os.path.join(pwd, args[0])) # In this case inputs file is first argument with open(ipath, 'r') as stream: - inputs = yaml.load(stream,Loader=yaml.FullLoader) - # Work out whether to return nb or save it in a + inputs = yaml.load(stream, Loader=yaml.FullLoader) + # Work out whether to return nb or save it in a # file - if len(args)>1: + if len(args) > 1: retnb = args[1] else: retnb = False - else: + else: # In this case inputs structure is first argument. inputs = args[0] ipath = '' retnb = True # Save absolute filepaths in place of relative filepaths - if ipath: + if ipath: # Y files if not os.path.isabs(inputs['Y_files']): @@ -65,16 +65,18 @@ def setup(*args): if not os.path.isabs(inputs['data_mask_files']): # Change M in inputs - inputs['data_mask_files'] = os.path.join(pwd, inputs['data_mask_files']) + inputs['data_mask_files'] = os.path.join( + pwd, inputs['data_mask_files']) - # If analysis mask file specified, + # If analysis mask file specified, if 'analysis_mask' in inputs: # M_files if not os.path.isabs(inputs['analysis_mask']): # Change M in inputs - inputs['analysis_mask'] = os.path.join(pwd, inputs['analysis_mask']) + inputs['analysis_mask'] = os.path.join( + pwd, inputs['analysis_mask']) # If X is specified if not os.path.isabs(inputs['X']): @@ -91,7 +93,7 @@ def setup(*args): with open(ipath, 'w') as outfile: yaml.dump(inputs, outfile, default_flow_style=False) - # Change paths to absoluate if they aren't already + # Change paths to absoluate if they aren't already if 'MAXMEM' in inputs: MAXMEM = eval(inputs['MAXMEM']) else: @@ -108,15 +110,13 @@ def setup(*args): 'blm_vox_conSE.nii', 'blm_vox_con.nii', 'blm_vox_conF.nii', 'blm_vox_conFlp.nii', 'blm_vox_conR2.nii'] - for file in files: if os.path.exists(os.path.join(OutDir, file)): os.remove(os.path.join(OutDir, file)) - if os.path.exists(os.path.join(OutDir, 'tmp')): + if os.path.exists(os.path.join(OutDir, 'tmp')): shutil.rmtree(os.path.join(OutDir, 'tmp')) - # Get number of parameters c1 = str2vec(inputs['contrasts'][0]['c' + str(1)]['vector']) c1 = np.array(c1) @@ -162,43 +162,45 @@ def setup(*args): v_am = np.prod(Y0.shape) # Size of non-zero voxels in NIFTI - NIFTIsize = sys.getsizeof(np.zeros([v_am,1],dtype='uint64')) + NIFTIsize = sys.getsizeof(np.zeros([v_am, 1], dtype='uint64')) if NIFTIsize > MAXMEM: raise ValueError('The NIFTI "' + Y_files[0] + '"is too large') # Similar to blksize in SwE, we divide by 8 times the size of a nifti # to work out how many blocks we use. We divide NIFTIsize by 3 - # as approximately a third of the volume is actually non-zero/brain + # as approximately a third of the volume is actually non-zero/brain # and then also divide though everything by the number of parameters # in the analysis. blksize = np.floor(MAXMEM/8/NIFTIsize/(n_p**2)) if blksize == 0: raise ValueError('Blocksize too small.') - # Check F contrast ranks + # Check F contrast ranks n_c = len(inputs['contrasts']) - for i in range(0,n_c): + for i in range(0, n_c): # Read in contrast vector cvec = str2vec(inputs['contrasts'][i]['c' + str(i+1)]['vector']) cvec = np.array(cvec) - if cvec.ndim>1: - + if cvec.ndim > 1: + # Get dimension of cvector q = cvec.shape[0] - if np.linalg.matrix_rank(cvec)1 and data.shape[1]>1: + if data.shape[0] > 1 and data.shape[1] > 1: # Checking for column headers. - if isinstance(data[0,0], str) and isinstance(data[0,1], str): + if isinstance(data[0, 0], str) and isinstance(data[0, 1], str): # Then checking for row headers aswell - if isinstance(data[1,0], str): + if isinstance(data[1, 0], str): # Check if we have numbers in the first column, - # if not remove the first column because it must be + # if not remove the first column because it must be # a header. try: - float(data[1,0]) + float(data[1, 0]) data = pd.io.parsers.read_csv(filepath).values except: data = pd.io.parsers.read_csv( - filepath,usecols=range(1,data.shape[1])).values + filepath, usecols=range(1, data.shape[1])).values else: data = pd.io.parsers.read_csv( filepath).values - elif np.isnan(data[0,0]) and isinstance(data[0,1], str): + elif np.isnan(data[0, 0]) and isinstance(data[0, 1], str): # Then checking for row headers aswell - if isinstance(data[1,0], str): + if isinstance(data[1, 0], str): # Check if we have numbers in the first column, - # if not remove the first column because it must be + # if not remove the first column because it must be # a header. try: - float(data[1,0]) + float(data[1, 0]) data = pd.io.parsers.read_csv(filepath).values except: data = pd.io.parsers.read_csv( - filepath,usecols=range(1,data.shape[1])).values + filepath, usecols=range(1, data.shape[1])).values else: data = pd.io.parsers.read_csv( filepath).values - # Checking for row headers instead. - elif isinstance(data[1,0], str): + elif isinstance(data[1, 0], str): # Check if we have numbers in the first column, - # if not remove the first column because it must be + # if not remove the first column because it must be # a header. try: - float(data[1,0]) + float(data[1, 0]) except: data = pd.io.parsers.read_csv( - filepath,usecols=range(1,data.shape[1])).values + filepath, usecols=range(1, data.shape[1])).values # If we have a nan but numeric headers for both remove the column header by default - elif np.isnan(data[0,0]): + elif np.isnan(data[0, 0]): data = pd.io.parsers.read_csv( - filepath).values + filepath).values # If we have more than one row but only one column, check for a column header - elif data.shape[0]>1: - if isinstance(data[0,0], str): + elif data.shape[0] > 1: + if isinstance(data[0, 0], str): data = pd.io.parsers.read_csv(filepath, header=None).values - elif np.isnan(data[0,0]): + elif np.isnan(data[0, 0]): data = pd.io.parsers.read_csv(filepath, header=None).values # If we have more than one column but only one row, check for a row header - elif data.shape[1]>1: - if isinstance(data[0,0], str): + elif data.shape[1] > 1: + if isinstance(data[0, 0], str): data = pd.io.parsers.read_csv( - filepath,usecols=range(1,data.shape[1])).values - elif np.isnan(data[0,0]): + filepath, usecols=range(1, data.shape[1])).values + elif np.isnan(data[0, 0]): data = pd.io.parsers.read_csv( - filepath,usecols=range(1,data.shape[1])).values + filepath, usecols=range(1, data.shape[1])).values # If the file is a brain image in the form of nii, nii.gz, img or img.gz else: @@ -125,7 +124,7 @@ def loadFile(filepath): # ============================================================================ # -# The below function takes in a string representing a contrast vector and +# The below function takes in a string representing a contrast vector and # returns the contrast vector as an array. # # ============================================================================ @@ -137,13 +136,13 @@ def str2vec(c): c = c.replace('[ [', '[[').replace('] ]', ']]') cs = c.split(' ') cf = '' - for i in range(0,len(cs)): - cs[i]=cs[i].replace(',', '') - cf=cf + cs[i] + for i in range(0, len(cs)): + cs[i] = cs[i].replace(',', '') + cf = cf + cs[i] if i < (len(cs)-1): cf = cf + ', ' - - return(eval(cf)) + + return (eval(cf)) # ============================================================================ @@ -159,29 +158,29 @@ def str2vec(c): # # - `fname`: An absolute path to the Nifti file. # - `block`: The block of values to write to the NIFTI. -# - `blockInds`: The indices representing the 3D coordinates `block` should be +# - `blockInds`: The indices representing the 3D coordinates `block` should be # written to in the NIFTI. (Note: It is assumed if the NIFTI is # 4D we assume that the indices we want to write to in each 3D # volume/slice are the same across all 3D volumes/slices). -# - `dim` (optional): If creating the NIFTI image for the first time, the +# - `dim` (optional): If creating the NIFTI image for the first time, the # dimensions of the NIFTI image must be specified. # - `volInd` (optional): If we only want to write to one 3D volume/slice, # within a 4D file, this specifies the index of the # volume of interest. -# - `aff` (optional): If creating the NIFTI image for the first time, the +# - `aff` (optional): If creating the NIFTI image for the first time, the # affine of the NIFTI image must be specified. -# - `hdr` (optional): If creating the NIFTI image for the first time, the +# - `hdr` (optional): If creating the NIFTI image for the first time, the # header of the NIFTI image must be specified. # # ============================================================================ -def addBlockToNifti(fname, block, blockInds,dim=None,volInd=None,aff=None,hdr=None): +def addBlockToNifti(fname, block, blockInds, dim=None, volInd=None, aff=None, hdr=None): # Check if file is in use fileLocked = True while fileLocked: try: # Create lock file, so other jobs know we are writing to this file - f=os.open(fname + ".lock", os.O_CREAT|os.O_EXCL|os.O_RDWR) + f = os.open(fname + ".lock", os.O_CREAT | os.O_EXCL | os.O_RDWR) fileLocked = False except FileExistsError: fileLocked = True @@ -201,16 +200,16 @@ def addBlockToNifti(fname, block, blockInds,dim=None,volInd=None,aff=None,hdr=No dim = img.shape # Work out data - data = np.asarray(img.dataobj, dtype=np.float64) + data = np.asarray(img.dataobj, dtype=np.float64) # Work out affine affine = img.affine - + else: # If we know how, make the NIFTI if dim is not None: - + # Make data data = np.zeros(dim) @@ -225,12 +224,12 @@ def addBlockToNifti(fname, block, blockInds,dim=None,volInd=None,aff=None,hdr=No # Throw an error because we don't know what to do raise Exception('NIFTI does not exist and dimensions not given') - # Work out the number of output volumes inside the nifti - if len(dim)==3: + # Work out the number of output volumes inside the nifti + if len(dim) == 3: # We only have one volume in this case n_vol = 1 - dim = np.array([dim[0],dim[1],dim[2],1]) + dim = np.array([dim[0], dim[1], dim[2], 1]) else: @@ -243,40 +242,39 @@ def addBlockToNifti(fname, block, blockInds,dim=None,volInd=None,aff=None,hdr=No # Work out the number of voxels n_vox = np.prod(dim[:3]) - # Reshape + # Reshape data = data.reshape([n_vox, n_vol]) # Add all the volumes if volInd is None: # Add block - data[blockInds,:] = block.reshape(data[blockInds,:].shape) - + data[blockInds, :] = block.reshape(data[blockInds, :].shape) + # Cycle through volumes, reshaping. - for k in range(0,data.shape[1]): + for k in range(0, data.shape[1]): - data_out[:,:,:,k] = data[:,k].reshape(int(dim[0]), - int(dim[1]), - int(dim[2])) + data_out[:, :, :, k] = data[:, k].reshape(int(dim[0]), + int(dim[1]), + int(dim[2])) # Add the one volume in the correct place else: # We're only looking at this volume - data = data[:,volInd].reshape((n_vox,1)) + data = data[:, volInd].reshape((n_vox, 1)) # Add block - data[blockInds,:] = block.reshape(data[blockInds,:].shape) - - # Put in the volume - data_out[:,:,:,volInd] = data[:,0].reshape(int(dim[0]), - int(dim[1]), - int(dim[2])) + data[blockInds, :] = block.reshape(data[blockInds, :].shape) + # Put in the volume + data_out[:, :, :, volInd] = data[:, 0].reshape(int(dim[0]), + int(dim[1]), + int(dim[2])) # Make NIFTI nifti = nib.Nifti1Image(data_out, affine, header=hdr) - + # Save NIFTI nib.save(nifti, fname) @@ -301,23 +299,22 @@ def addBlockToNifti(fname, block, blockInds,dim=None,volInd=None,aff=None,hdr=No # # - `fname`: An absolute path to the Nifti file. # - `block`: The block of values to write to the NIFTI. -# - `blockInds`: The indices representing the 3D coordinates `block` should be +# - `blockInds`: The indices representing the 3D coordinates `block` should be # written to in the NIFTI. (Note: It is assumed if the NIFTI is # 4D we assume that the indices we want to write to in each 3D # volume/slice are the same across all 3D volumes/slices). -# - `dim` (optional): If creating the NIFTI image for the first time, the +# - `dim` (optional): If creating the NIFTI image for the first time, the # dimensions of the NIFTI image must be specified. # - `volInd` (optional): If we only want to write to one 3D volume/slice, # within a 4D file, this specifies the index of the # volume of interest. -# - `aff` (optional): If creating the NIFTI image for the first time, the +# - `aff` (optional): If creating the NIFTI image for the first time, the # affine of the NIFTI image must be specified. -# - `hdr` (optional): If creating the NIFTI image for the first time, the +# - `hdr` (optional): If creating the NIFTI image for the first time, the # header of the NIFTI image must be specified. # # ============================================================================ -def addBlocksToNiftis(fnames, blocks, blockIndexes,dims=None,volInds=None,affs=None,hdrs=None): - +def addBlocksToNiftis(fnames, blocks, blockIndexes, dims=None, volInds=None, affs=None, hdrs=None): # Get number of blocks we're adding nBlocks = len(fnames) @@ -343,15 +340,16 @@ def addBlocksToNiftis(fnames, blocks, blockIndexes,dims=None,volInds=None,affs=N # Check if file is in use try: # Create lock file, so other jobs know we are writing to this file - f=os.open(fname + ".lock", os.O_CREAT|os.O_EXCL|os.O_RDWR) + f = os.open(fname + ".lock", os.O_CREAT | + os.O_EXCL | os.O_RDWR) fileLocked = False except FileExistsError: fileLocked = True # If the files not in use we can add to it - if not fileLocked: + if not fileLocked: - dim = dims[i] + dim = dims[i] aff = affs[i] hdr = hdrs[i] @@ -365,16 +363,16 @@ def addBlocksToNiftis(fnames, blocks, blockIndexes,dims=None,volInds=None,affs=N dim = img.shape # Work out data - data = np.asarray(img.dataobj, dtype=np.float64) + data = np.asarray(img.dataobj, dtype=np.float64) # Work out affine affine = img.affine - + else: # If we know how, make the NIFTI if dim is not None: - + # Make data data = np.zeros(dim) @@ -387,14 +385,15 @@ def addBlocksToNiftis(fnames, blocks, blockIndexes,dims=None,volInds=None,affs=N else: # Throw an error because we don't know what to do - raise Exception('NIFTI does not exist and dimensions not given') + raise Exception( + 'NIFTI does not exist and dimensions not given') - # Work out the number of output volumes inside the nifti - if len(dim)==3: + # Work out the number of output volumes inside the nifti + if len(dim) == 3: # We only have one volume in this case n_vol = 1 - dim = np.array([dim[0],dim[1],dim[2],1]) + dim = np.array([dim[0], dim[1], dim[2], 1]) else: @@ -407,14 +406,14 @@ def addBlocksToNiftis(fnames, blocks, blockIndexes,dims=None,volInds=None,affs=N # Work out the number of voxels n_vox = np.prod(dim[:3]) - # Reshape + # Reshape data = data.reshape([n_vox, n_vol]) # Loop through all blocks for j in np.arange(nBlocks): # Check if we are looking at the same file - if fnames[j]==fnames[i]: + if fnames[j] == fnames[i]: # If it's going to the same file, add this block block = blocks[j] @@ -425,14 +424,15 @@ def addBlocksToNiftis(fnames, blocks, blockIndexes,dims=None,volInds=None,affs=N if volInd is None: # Add block - data[blockInds,:] = block.reshape(data[blockInds,:].shape) - + data[blockInds, :] = block.reshape( + data[blockInds, :].shape) + # Cycle through volumes, reshaping. - for k in range(0,data.shape[1]): + for k in range(0, data.shape[1]): - data_out[:,:,:,k] = data[:,k].reshape(int(dim[0]), - int(dim[1]), - int(dim[2])) + data_out[:, :, :, k] = data[:, k].reshape(int(dim[0]), + int(dim[1]), + int(dim[2])) # Add the one volume in the correct place else: @@ -444,20 +444,20 @@ def addBlocksToNiftis(fnames, blocks, blockIndexes,dims=None,volInds=None,affs=N # data_vol = data[:,].reshape((n_vox,1)) # Add block - data[blockInds,volInd] = block.reshape(data[blockInds,volInd].shape) + data[blockInds, volInd] = block.reshape( + data[blockInds, volInd].shape) # Put in the volume - data_out[:,:,:,volInd] = data[:,volInd].reshape(int(dim[0]), - int(dim[1]), - int(dim[2])) + data_out[:, :, :, volInd] = data[:, volInd].reshape(int(dim[0]), + int(dim[1]), + int(dim[2])) # Record that we've added this block blocksAdded[j] = 1 - # Make NIFTI nifti = nib.Nifti1Image(data_out, affine, header=hdr) - + # Save NIFTI nib.save(nifti, fname) @@ -473,7 +473,7 @@ def addBlocksToNiftis(fnames, blocks, blockIndexes,dims=None,volInds=None,affs=N # ============================================================================ # -# The below function reads in a numpy file as a memory map and returns the +# The below function reads in a numpy file as a memory map and returns the # specified lines from the file. The benefit of this is that the entire file # is not read into memory and we retrieve only the parts of the file we need. # @@ -492,21 +492,23 @@ def addBlocksToNiftis(fnames, blocks, blockIndexes,dims=None,volInds=None,affs=N # # ---------------------------------------------------------------------------- # -# - `data_lines`: The lines from the file we wanted to retrieve. +# - `data_lines`: The lines from the file we wanted to retrieve. # # ============================================================================ + + def readLinesFromNPY(filename, lines): # Load in the file but in memory map mode - data_memmap = np.load(filename,mmap_mode='r') + data_memmap = np.load(filename, mmap_mode='r') # Read in the desired lines - data_lines = np.array(data_memmap[lines,:]) + data_lines = np.array(data_memmap[lines, :]) # We don't want this file hanging around del data_memmap - return(data_lines) + return (data_lines) # ============================================================================ @@ -538,20 +540,20 @@ def readLinesFromNPY(filename, lines): # ============================================================================ def get_amInds(am, vb=None, nvb=None): - # Reshape the analysis mask - am = am.reshape([np.prod(am.shape),1]) + # Reshape the analysis mask + am = am.reshape([np.prod(am.shape), 1]) - # Work out analysis mask indices. - amInds=np.where(am==1)[0] + # Work out analysis mask indices. + amInds = np.where(am == 1)[0] - # Get vb^th block of voxels - if vb is not None and vb>=0: + # Get vb^th block of voxels + if vb is not None and vb >= 0: - # Split am into equal nvb "equally" (ish) sized blocks and take - # the vb^th block. - amInds = np.array_split(amInds, nvb)[vb] + # Split am into equal nvb "equally" (ish) sized blocks and take + # the vb^th block. + amInds = np.array_split(amInds, nvb)[vb] - return(amInds) + return (amInds) # ============================================================================ @@ -579,67 +581,67 @@ def get_amInds(am, vb=None, nvb=None): # ============================================================================ def numVoxelBlocks(inputs): - # ---------------------------------------------------------------- - # Design matrix - # ---------------------------------------------------------------- - X = loadFile(inputs['X']) - - # Number of parameters - p = X.shape[1] + # ---------------------------------------------------------------- + # Design matrix + # ---------------------------------------------------------------- + X = loadFile(inputs['X']) - # Check if the maximum memory is saved. - if 'MAXMEM' in inputs: - MAXMEM = eval(inputs['MAXMEM']) - else: - MAXMEM = 2**32 + # Number of parameters + p = X.shape[1] - # ---------------------------------------------------------------- - # Work out number of voxels we'd ideally want in a block - # ---------------------------------------------------------------- - # This is done by taking the maximum memory (in bytes), divided - # by roughly the amount of memory a float in numpy takes (8), - # divided by 10 (allowing us to have up to 10 variables of - # allowed size at any one time), divided by q^2 (the number of - # random effects squared/the largest matrix size we would - # look at). - vPerBlock = MAXMEM/(10*8*(p**2)) - - # Read in analysis mask (if present) - if 'analysis_mask' in inputs: - am = loadFile(inputs['analysis_mask']) - am = am.get_fdata() - else: + # Check if the maximum memory is saved. + if 'MAXMEM' in inputs: + MAXMEM = eval(inputs['MAXMEM']) + else: + MAXMEM = 2**32 + + # ---------------------------------------------------------------- + # Work out number of voxels we'd ideally want in a block + # ---------------------------------------------------------------- + # This is done by taking the maximum memory (in bytes), divided + # by roughly the amount of memory a float in numpy takes (8), + # divided by 10 (allowing us to have up to 10 variables of + # allowed size at any one time), divided by q^2 (the number of + # random effects squared/the largest matrix size we would + # look at). + vPerBlock = MAXMEM/(10*8*(p**2)) + + # Read in analysis mask (if present) + if 'analysis_mask' in inputs: + am = loadFile(inputs['analysis_mask']) + am = am.get_fdata() + else: - # -------------------------------------------------------------- - # Get one Y volume to make full Nifti mask - # -------------------------------------------------------------- + # -------------------------------------------------------------- + # Get one Y volume to make full Nifti mask + # -------------------------------------------------------------- - # Y volumes - with open(inputs['Y_files']) as a: + # Y volumes + with open(inputs['Y_files']) as a: - Y_files = [] - i = 0 - for line in a.readlines(): + Y_files = [] + i = 0 + for line in a.readlines(): - Y_files.append(line.replace('\n', '')) + Y_files.append(line.replace('\n', '')) - # Load in one nifti to check NIFTI size - try: - Y0 = loadFile(Y_files[0]) - except Exception as error: - raise ValueError('The NIFTI "' + Y_files[0] + '"does not exist') + # Load in one nifti to check NIFTI size + try: + Y0 = loadFile(Y_files[0]) + except Exception as error: + raise ValueError('The NIFTI "' + Y_files[0] + '"does not exist') - # Get mask of ones - am = np.ones(Y0.shape) + # Get mask of ones + am = np.ones(Y0.shape) - # Work out number of non-zero voxels in analysis mask - v = np.sum(am!=0) + # Work out number of non-zero voxels in analysis mask + v = np.sum(am != 0) - # Work out number of voxel blocks we would need. - nvb = v//vPerBlock+1 + # Work out number of voxel blocks we would need. + nvb = v//vPerBlock+1 - # Return number of voxel blocks - return(nvb) + # Return number of voxel blocks + return (nvb) # ============================================================================ @@ -667,15 +669,15 @@ def numVoxelBlocks(inputs): # ============================================================================ def pracNumVoxelBlocks(inputs): - # Check if maximum number of voxel blocks specified, - # otherwise, default to 60 - if 'maxnvb' in inputs: - maxnvb = inputs['maxnvb'] - else: - maxnvb = 60 + # Check if maximum number of voxel blocks specified, + # otherwise, default to 60 + if 'maxnvb' in inputs: + maxnvb = inputs['maxnvb'] + else: + maxnvb = 60 - # Work out number of voxel blocks we should use. - nvb = np.min([numVoxelBlocks(inputs), maxnvb]) + # Work out number of voxel blocks we should use. + nvb = np.min([numVoxelBlocks(inputs), maxnvb]) - # Return number of voxel blocks - return(nvb) + # Return number of voxel blocks + return (nvb) diff --git a/setup.py b/setup.py index f0982ba..4901155 100644 --- a/setup.py +++ b/setup.py @@ -23,7 +23,7 @@ "License :: OSI Approved :: MIT License", "Operating System :: OS Independent", ], - install_requires=requirements, + install_requires=requirements, entry_points={ 'console_scripts': [ 'blm=blm.blm_cluster:_main', diff --git a/test/blm_cluster_test.py b/test/blm_cluster_test.py index 5fd43f7..941170a 100644 --- a/test/blm_cluster_test.py +++ b/test/blm_cluster_test.py @@ -1,3 +1,8 @@ +import numpy as np +import argparse +from blm.blm_cluster import _main as blm +from cleanup import cleanup +from generate_test_data import * import os import numpy import pandas @@ -7,7 +12,7 @@ from dask.distributed import Client, as_completed from dask.distributed import performance_report import nibabel as nib -import sys +import sys # Get the directory containing the script test_dir = os.path.dirname(__file__) @@ -17,14 +22,8 @@ sys.path.insert(0, os.path.dirname(test_dir)) # Import test generation and cleanup -from generate_test_data import * -from cleanup import cleanup -from blm.blm_cluster import _main as blm -import argparse -import numpy as np - def _main(argv=None): """ Main function for running the BLM-lm test pipeline. @@ -38,12 +37,15 @@ def _main(argv=None): # Parsing command line arguments parser = argparse.ArgumentParser(description="BLM-lm test pipeline") - parser.add_argument("--sim_ind", type=int, required=True, help="Simulation index to determine n, p, and num_voxel_batches values.") - parser.add_argument("--out_dir", type=str, required=True, help="Output directory path.") - parser.add_argument("--num_nodes", type=int, default=100, help="Number of nodes for Dask setup.") - parser.add_argument("--cluster_type", type=str, default="slurm", help="Cluster type for Dask setup. (default: slurm)") + parser.add_argument("--sim_ind", type=int, required=True, + help="Simulation index to determine n, p, and num_voxel_batches values.") + parser.add_argument("--out_dir", type=str, required=True, + help="Output directory path.") + parser.add_argument("--num_nodes", type=int, default=100, + help="Number of nodes for Dask setup.") + parser.add_argument("--cluster_type", type=str, default="slurm", + help="Cluster type for Dask setup. (default: slurm)") - # Get arguments args = parser.parse_args(argv) @@ -74,7 +76,8 @@ def _main(argv=None): p = 8 num_voxel_batches = 500 else: - raise ValueError("Invalid sim_ind value. Please provide a value between 1 and 4.") + raise ValueError( + "Invalid sim_ind value. Please provide a value between 1 and 4.") # ----------------------------------------------------------------- # Create folders for simulation @@ -90,36 +93,40 @@ def _main(argv=None): os.mkdir(sim_dir) # Make new data directory. - if not os.path.exists(os.path.join(sim_dir,"data")): - os.mkdir(os.path.join(sim_dir,"data")) - + if not os.path.exists(os.path.join(sim_dir, "data")): + os.mkdir(os.path.join(sim_dir, "data")) + # Make new log directory. - if not os.path.exists(os.path.join(sim_dir,"simlog")): - os.mkdir(os.path.join(sim_dir,"simlog")) + if not os.path.exists(os.path.join(sim_dir, "simlog")): + os.mkdir(os.path.join(sim_dir, "simlog")) # ----------------------------------------------------------------- # Generate data # ----------------------------------------------------------------- - print('Test Update for Simulation ' + str(sim_ind) + ': Generating test data...') + print('Test Update for Simulation ' + + str(sim_ind) + ': Generating test data...') generate_data(n, p, dim, out_dir, sim_ind) - print('Test Update for Simulation ' + str(sim_ind) + ': Test data generated.') + print('Test Update for Simulation ' + + str(sim_ind) + ': Test data generated.') # ----------------------------------------------------------------- # Run BLM # ----------------------------------------------------------------- - print('Test Update for Simulation ' + str(sim_ind) + ': Running BLM analysis...') + print('Test Update for Simulation ' + + str(sim_ind) + ': Running BLM analysis...') # Import inputs file - inputs_yml = os.path.join(out_dir, 'sim'+ str(sim_ind), 'inputs.yml') + inputs_yml = os.path.join(out_dir, 'sim' + str(sim_ind), 'inputs.yml') # Run blm_cluster blm([inputs_yml]) - print('Test Update for Simulation ' + str(sim_ind) + ': BLM analysis complete.') + print('Test Update for Simulation ' + + str(sim_ind) + ': BLM analysis complete.') # ----------------------------------------------------------------- # Set up cluster @@ -183,20 +190,22 @@ def _main(argv=None): # Raise a value error if none of the above else: - raise ValueError('The cluster type, ' + cluster_type + ', is not recognized.') + raise ValueError('The cluster type, ' + + cluster_type + ', is not recognized.') # -------------------------------------------------------------------------------- # Connect to client # -------------------------------------------------------------------------------- # Connect to cluster - client = Client(cluster) + client = Client(cluster) # -------------------------------------------------------------------------------- # Run R Jobs # -------------------------------------------------------------------------------- - print('Test Update for Simulation ' + str(sim_ind) + ': Now running R simulations...') + print('Test Update for Simulation ' + + str(sim_ind) + ': Now running R simulations...') # Ask for num_nodes nodes for BLM batch cluster.scale(num_nodes) @@ -206,14 +215,14 @@ def _main(argv=None): # Submit jobs for i in np.arange(num_voxel_batches): - + # Run the jobNum^{th} job. future_r = client.submit(run_voxel_batch_in_R, sim_ind, dim, i, num_voxel_batches, out_dir, test_dir, pure=False) - # Append to list + # Append to list futures.append(future_r) - + # Completed jobs completed = as_completed(futures) @@ -230,7 +239,7 @@ def _main(argv=None): # -------------------------------------------------------------------------------- # Cleanup and results function - cleanup(out_dir,sim_ind) + cleanup(out_dir, sim_ind) # Close the client client.close() @@ -254,7 +263,7 @@ def run_voxel_batch_in_R(sim_ind, dim, batch_no, num_voxel_batches, out_dir, tes """ import subprocess - import sys + import sys sys.path.insert(0, test_dir) sys.path.insert(0, os.path.dirname(test_dir)) @@ -263,7 +272,7 @@ def run_voxel_batch_in_R(sim_ind, dim, batch_no, num_voxel_batches, out_dir, tes # Preprocess this batch Rpreproc(out_dir, sim_ind, dim, num_voxel_batches, batch_no) - + # Load R and run parameter estimation in a single command r_path = "/apps/well/R/3.4.3/bin/R" @@ -278,11 +287,10 @@ def run_voxel_batch_in_R(sim_ind, dim, batch_no, num_voxel_batches, out_dir, tes # Run the job subprocess.run(r_cmd, shell=True) - + # Cleanup job in R to combine files Rcleanup(out_dir, sim_ind, num_voxel_batches, batch_no) - if __name__ == "__main__": - _main(sys.argv[1:]) \ No newline at end of file + _main(sys.argv[1:]) diff --git a/test/cleanup.py b/test/cleanup.py index c16680a..7cc053c 100644 --- a/test/cleanup.py +++ b/test/cleanup.py @@ -1,16 +1,17 @@ +from blm.lib.fileio import * +import pandas as pd +import yaml +import shutil +import os +import nibabel as nib +import numpy as np import warnings as w # This warning is caused by numpy updates and should # be ignored for now. -w.simplefilter(action = 'ignore', category = FutureWarning) -import numpy as np -import nibabel as nib -import os -import shutil -import yaml -import pandas as pd -from blm.lib.fileio import * +w.simplefilter(action='ignore', category=FutureWarning) + -def cleanup(out_dir,sim_ind): +def cleanup(out_dir, sim_ind): # ----------------------------------------------------------------------- # Get simulation directory @@ -23,7 +24,7 @@ def cleanup(out_dir,sim_ind): # ----------------------------------------------------------------------- if os.path.exists(os.path.join(sim_dir, 'data')): shutil.rmtree(os.path.join(sim_dir, 'data')) - + # ----------------------------------------------------------------------- # N map # ----------------------------------------------------------------------- @@ -35,33 +36,33 @@ def cleanup(out_dir,sim_ind): n = np.amax(n_sv) # Work out which voxels had readings for all subjects - loc_sv = (n_sv>n//2)&(n_sv n//2) & (n_sv < n) + loc_nsv = (n_sv == n) # Work out number of spatially varying and non-spatially varying voxels v_sv = np.sum(loc_sv) v_nsv = np.sum(loc_nsv) - + # ----------------------------------------------------------------------- # Files to compare # ----------------------------------------------------------------------- - + filenames = ['blm_vox_beta.nii', 'blm_vox_conT.nii', 'blm_vox_conTlp.nii', 'blm_vox_llh.nii', 'blm_vox_resms.nii'] # ----------------------------------------------------------------------- # Compare to lm # ----------------------------------------------------------------------- - + # Loop through files for blm_filename in filenames: - + # Get the equivalent lm file lm_filename = blm_filename.replace('blm', 'lm') - + # Make the filenames full blm_filename = os.path.join(sim_dir, 'BLM', blm_filename) lm_filename = os.path.join(sim_dir, 'lm', lm_filename) - + # ------------------------------------------------------------------- # Read in files # ------------------------------------------------------------------- @@ -73,16 +74,16 @@ def cleanup(out_dir,sim_ind): lm = nib.load(lm_filename).get_fdata() # Remove zero values (spatially varying) - blm_sv = blm[(lm!=0) & loc_sv] - lm_sv = lm[(lm!=0) & loc_sv] + blm_sv = blm[(lm != 0) & loc_sv] + lm_sv = lm[(lm != 0) & loc_sv] # Remove zero values (non spatially varying) - blm_nsv = blm[(lm!=0) & loc_nsv] - lm_nsv = lm[(lm!=0) & loc_nsv] + blm_nsv = blm[(lm != 0) & loc_nsv] + lm_nsv = lm[(lm != 0) & loc_nsv] # Remove zero values (both) - blm = blm[lm!=0] - lm = lm[lm!=0] + blm = blm[lm != 0] + lm = lm[lm != 0] # Get MAE MAE = np.mean(np.abs(blm-lm)) @@ -106,20 +107,25 @@ def cleanup(out_dir,sim_ind): # Print results print('----------------------------------------------------------------------------------------------') - print('Test Results for Simulation ' + str(sim_ind) + ': ' + str(blm_filename)) + print('Test Results for Simulation ' + + str(sim_ind) + ': ' + str(blm_filename)) print('----------------------------------------------------------------------------------------------') print(' ') print('Mean Absolute Errors: ') print(' All voxels: ' + repr(MAE) + ', Result: ' + result_MAE) - print(' Spatially varying voxels: ' + repr(MAE_sv) + ', Result: ' + result_MAE_sv) - print(' Non-spatially varying voxels: ' + repr(MAE_nsv) + ', Result: ' + result_MAE_nsv) + print(' Spatially varying voxels: ' + + repr(MAE_sv) + ', Result: ' + result_MAE_sv) + print(' Non-spatially varying voxels: ' + + repr(MAE_nsv) + ', Result: ' + result_MAE_nsv) print(' ') print('Mean Relative Differences: ') print(' All voxels: ' + repr(MRD) + ', Result: ' + result_MRD) - print(' Spatially varying voxels: ' + repr(MRD_sv) + ', Result: ' + result_MRD_sv) - print(' Non-spatially varying voxels: ' + repr(MRD_nsv) + ', Result: ' + result_MRD_nsv) + print(' Spatially varying voxels: ' + + repr(MRD_sv) + ', Result: ' + result_MRD_sv) + print(' Non-spatially varying voxels: ' + + repr(MRD_nsv) + ', Result: ' + result_MRD_nsv) print(' ') - + print('----------------------------------------------------------------------------------------------') # ----------------------------------------------------------------------- @@ -140,13 +146,14 @@ def Rcleanup(OutDir, simNo, nvg, cv): # exists for using this format). # ----------------------------------------------------------------------- # There should be an inputs file in each simulation directory - with open(os.path.join(simDir,'inputs.yml'), 'r') as stream: - inputs = yaml.load(stream,Loader=yaml.FullLoader) + with open(os.path.join(simDir, 'inputs.yml'), 'r') as stream: + inputs = yaml.load(stream, Loader=yaml.FullLoader) # ----------------------------------------------------------------------- # Get number of observations and fixed effects # ----------------------------------------------------------------------- - X = pd.io.parsers.read_csv(os.path.join(simDir,"data","X.csv"), header=None).values + X = pd.io.parsers.read_csv(os.path.join( + simDir, "data", "X.csv"), header=None).values n = X.shape[0] p = X.shape[1] @@ -154,7 +161,7 @@ def Rcleanup(OutDir, simNo, nvg, cv): # Get number voxels and dimensions # ----------------------------------------------------------------------- - # nmap location + # nmap location nmap = os.path.join(simDir, "data", "Y0.nii") # Work out dim if we don't already have it @@ -168,7 +175,7 @@ def Rcleanup(OutDir, simNo, nvg, cv): # Delete nmap del nmap - + # ------------------------------------------------------------------- # Voxels of interest # ------------------------------------------------------------------- @@ -187,13 +194,15 @@ def Rcleanup(OutDir, simNo, nvg, cv): # ------------------------------------------------------------------- # Read in file - beta_current = pd.io.parsers.read_csv(os.path.join(simDir, 'lm', 'beta_' + str(cv) + '.csv')).values + beta_current = pd.io.parsers.read_csv(os.path.join( + simDir, 'lm', 'beta_' + str(cv) + '.csv')).values # Loop through parameters adding them one voxel at a time for param in np.arange(p): # Add back to a NIFTI file - addBlockToNifti(os.path.join(simDir,"lm","lm_vox_beta.nii"), beta_current[:,param], inds_cv, volInd=param,dim=(*dim,int(p))) + addBlockToNifti(os.path.join(simDir, "lm", "lm_vox_beta.nii"), + beta_current[:, param], inds_cv, volInd=param, dim=(*dim, int(p))) # Remove file os.remove(os.path.join(simDir, 'lm', 'beta_' + str(cv) + '.csv')) @@ -203,10 +212,12 @@ def Rcleanup(OutDir, simNo, nvg, cv): # ------------------------------------------------------------------- # Read in file - sigma2_current = pd.io.parsers.read_csv(os.path.join(simDir, 'lm', 'sigma2_' + str(cv) + '.csv')).values + sigma2_current = pd.io.parsers.read_csv(os.path.join( + simDir, 'lm', 'sigma2_' + str(cv) + '.csv')).values # Add back to a NIFTI file - addBlockToNifti(os.path.join(simDir,"lm","lm_vox_resms.nii"), sigma2_current, inds_cv, volInd=0,dim=(*dim,1)) + addBlockToNifti(os.path.join(simDir, "lm", "lm_vox_resms.nii"), + sigma2_current, inds_cv, volInd=0, dim=(*dim, 1)) # Remove file os.remove(os.path.join(simDir, 'lm', 'sigma2_' + str(cv) + '.csv')) @@ -216,10 +227,12 @@ def Rcleanup(OutDir, simNo, nvg, cv): # ------------------------------------------------------------------- # Read in file - llh_current = pd.io.parsers.read_csv(os.path.join(simDir, 'lm', 'llh_' + str(cv) + '.csv')).values + llh_current = pd.io.parsers.read_csv(os.path.join( + simDir, 'lm', 'llh_' + str(cv) + '.csv')).values # Add back to a NIFTI file - addBlockToNifti(os.path.join(simDir,"lm","lm_vox_llh.nii"), llh_current, inds_cv, volInd=0,dim=(*dim,1)) + addBlockToNifti(os.path.join(simDir, "lm", "lm_vox_llh.nii"), + llh_current, inds_cv, volInd=0, dim=(*dim, 1)) # Remove file os.remove(os.path.join(simDir, 'lm', 'llh_' + str(cv) + '.csv')) @@ -229,10 +242,12 @@ def Rcleanup(OutDir, simNo, nvg, cv): # ------------------------------------------------------------------- # Read in file - Tstat_current = pd.io.parsers.read_csv(os.path.join(simDir, 'lm', 'Tstat_' + str(cv) + '.csv')).values + Tstat_current = pd.io.parsers.read_csv(os.path.join( + simDir, 'lm', 'Tstat_' + str(cv) + '.csv')).values # Add back to a NIFTI file - addBlockToNifti(os.path.join(simDir,"lm","lm_vox_conT.nii"), Tstat_current, inds_cv, volInd=0,dim=(*dim,1)) + addBlockToNifti(os.path.join(simDir, "lm", "lm_vox_conT.nii"), + Tstat_current, inds_cv, volInd=0, dim=(*dim, 1)) # Remove file os.remove(os.path.join(simDir, 'lm', 'Tstat_' + str(cv) + '.csv')) @@ -242,13 +257,16 @@ def Rcleanup(OutDir, simNo, nvg, cv): # ------------------------------------------------------------------- # Read in file - Pval_current = pd.io.parsers.read_csv(os.path.join(simDir, 'lm', 'Pval_' + str(cv) + '.csv')).values + Pval_current = pd.io.parsers.read_csv(os.path.join( + simDir, 'lm', 'Pval_' + str(cv) + '.csv')).values # Change to log scale - Pval_current[Pval_current!=0]=-np.log10(Pval_current[Pval_current!=0]) + Pval_current[Pval_current != 0] = - \ + np.log10(Pval_current[Pval_current != 0]) # Add back to a NIFTI file - addBlockToNifti(os.path.join(simDir,"lm","lm_vox_conTlp.nii"), Pval_current, inds_cv, volInd=0,dim=(*dim,1)) + addBlockToNifti(os.path.join(simDir, "lm", "lm_vox_conTlp.nii"), + Pval_current, inds_cv, volInd=0, dim=(*dim, 1)) # Remove file os.remove(os.path.join(simDir, 'lm', 'Pval_' + str(cv) + '.csv')) @@ -263,7 +281,7 @@ def addLineToCSV(fname, line): while fileLocked: try: # Create lock file, so other jobs know we are writing to this file - f = os.open(fname + ".lock", os.O_CREAT|os.O_EXCL|os.O_RDWR) + f = os.open(fname + ".lock", os.O_CREAT | os.O_EXCL | os.O_RDWR) fileLocked = False except FileExistsError: fileLocked = True @@ -272,10 +290,11 @@ def addLineToCSV(fname, line): if os.path.isfile(fname): # Read in data - data = pd.io.parsers.read_csv(fname, header=None, index_col=None).values + data = pd.io.parsers.read_csv( + fname, header=None, index_col=None).values # Append line to data - data = np.concatenate((data, line),axis=0) + data = np.concatenate((data, line), axis=0) else: @@ -290,4 +309,4 @@ def addLineToCSV(fname, line): os.remove(fname + ".lock") os.close(f) - del fname \ No newline at end of file + del fname diff --git a/test/generate_test_data.py b/test/generate_test_data.py index a235c6e..fc80350 100644 --- a/test/generate_test_data.py +++ b/test/generate_test_data.py @@ -1,23 +1,24 @@ +from scipy import ndimage +import pandas as pd +import time +from blm.lib.fileio import * +import yaml +import shutil +import glob +import os +import sys +import nibabel as nib +import numpy as np import warnings as w # This warning is caused by numpy updates and should # be ignored for now. -w.simplefilter(action = 'ignore', category = FutureWarning) -import numpy as np -import nibabel as nib -import sys -import os -import glob -import shutil -import yaml -from blm.lib.fileio import * -import time -import pandas as pd -from scipy import ndimage +w.simplefilter(action='ignore', category=FutureWarning) + def generate_data(n, p, dim, OutDir, simNo): """ Generates simulated data with the specified dimensions and other parameters. - + Parameters ---------- n : int @@ -63,7 +64,7 @@ def generate_data(n, p, dim, OutDir, simNo): # Fixed effects design matrix X = get_X(n, p) - + # ------------------------------------------------- # Obtain beta parameter vector # ------------------------------------------------- @@ -79,10 +80,10 @@ def generate_data(n, p, dim, OutDir, simNo): Xbeta = X @ beta # Loop through subjects generating nifti images - for i in np.arange(n): + for i in np.arange(n): # Initialize Yi to Xi times beta - Yi = Xbeta[0,i,0] + Yi = Xbeta[0, i, 0] # Get epsiloni epsiloni = get_epsilon(v, 1).reshape(dim) @@ -91,7 +92,8 @@ def generate_data(n, p, dim, OutDir, simNo): Yi = Yi + epsiloni # Smooth Y_i - Yi = smooth_data(Yi, 3, [fwhm]*3, trunc=6, scaling='kernel').reshape(dim) + Yi = smooth_data(Yi, 3, [fwhm]*3, trunc=6, + scaling='kernel').reshape(dim) # Obtain mask mask = get_random_mask(dim).reshape(Yi.shape) @@ -100,17 +102,19 @@ def generate_data(n, p, dim, OutDir, simNo): Yi = Yi*mask # Truncate off (handles smoothing edge effects) - Yi = Yi[10:(dim[0]-10),10:(dim[1]-10),10:(dim[2]-10)] + Yi = Yi[10:(dim[0]-10), 10:(dim[1]-10), 10:(dim[2]-10)] # Output Yi - addBlockToNifti(os.path.join(simDir,"data","Y"+str(i)+".nii"), Yi, np.arange(np.prod(origdim)), volInd=0,dim=origdim) + addBlockToNifti(os.path.join(simDir, "data", "Y"+str(i)+".nii"), + Yi, np.arange(np.prod(origdim)), volInd=0, dim=origdim) # ----------------------------------------------------- # Save X # ----------------------------------------------------- # Write out Z in full to a csv file - pd.DataFrame(X.reshape(n,p)).to_csv(os.path.join(simDir,"data","X.csv"), header=None, index=None) + pd.DataFrame(X.reshape(n, p)).to_csv(os.path.join( + simDir, "data", "X.csv"), header=None, index=None) # ----------------------------------------------------- # Contrast vector @@ -127,14 +131,15 @@ def generate_data(n, p, dim, OutDir, simNo): # ----------------------------------------------------- # Write to an inputs file - with open(os.path.join(simDir,'inputs.yml'), 'a') as f: + with open(os.path.join(simDir, 'inputs.yml'), 'a') as f: # X, Y, Z and Masks - f.write("Y_files: " + os.path.join(simDir,"data","Yfiles.txt") + os.linesep) - f.write("X: " + os.path.join(simDir,"data","X.csv") + os.linesep) + f.write("Y_files: " + os.path.join(simDir, + "data", "Yfiles.txt") + os.linesep) + f.write("X: " + os.path.join(simDir, "data", "X.csv") + os.linesep) # Output directory - f.write("outdir: " + os.path.join(simDir,"BLM") + os.linesep) + f.write("outdir: " + os.path.join(simDir, "BLM") + os.linesep) # Missingness percentage f.write("Missingness: " + os.linesep) @@ -161,27 +166,28 @@ def generate_data(n, p, dim, OutDir, simNo): # Log directory and simulation mode (backdoor options) f.write("sim: 1" + os.linesep) - f.write("logdir: " + os.path.join(simDir,"simlog") + os.linesep) + f.write("logdir: " + os.path.join(simDir, "simlog") + os.linesep) # ----------------------------------------------------- # Yfiles.txt # ----------------------------------------------------- - with open(os.path.join(simDir,"data",'Yfiles.txt'), 'a') as f: + with open(os.path.join(simDir, "data", 'Yfiles.txt'), 'a') as f: # Loop through listing mask files in text file for i in np.arange(n): # Write filename to text file if i < n-1: - f.write(os.path.join(simDir,"data","Y"+str(i)+".nii") + os.linesep) + f.write(os.path.join(simDir, "data", + "Y"+str(i)+".nii") + os.linesep) else: - f.write(os.path.join(simDir,"data","Y"+str(i)+".nii")) + f.write(os.path.join(simDir, "data", "Y"+str(i)+".nii")) # ----------------------------------------------------- # Version of data which can be fed into R # ----------------------------------------------------- # - i.e. seperate Y out into thousands of csv files - # each containing number of subjects by 1000 + # each containing number of subjects by 1000 # voxel arrays. # ----------------------------------------------------- @@ -191,14 +197,13 @@ def generate_data(n, p, dim, OutDir, simNo): # Work out number of groups we have to split indices into. nvg = int(np.prod(origdim)//nvb) - # Write out the number of voxel groups we split the data into with open(os.path.join(simDir, "data", "nb.txt"), 'w') as f: print(int(nvg), file=f) # R preprocessing -def Rpreproc(OutDir,simNo,dim,nvg,cv): +def Rpreproc(OutDir, simNo, dim, nvg, cv): # Get simulation directory simDir = os.path.join(OutDir, 'sim' + str(simNo)) @@ -210,11 +215,12 @@ def Rpreproc(OutDir,simNo,dim,nvg,cv): v = np.prod(dim) # There should be an inputs file in each simulation directory - with open(os.path.join(simDir,'inputs.yml'), 'r') as stream: - inputs = yaml.load(stream,Loader=yaml.FullLoader) + with open(os.path.join(simDir, 'inputs.yml'), 'r') as stream: + inputs = yaml.load(stream, Loader=yaml.FullLoader) # Number of observations - X = pd.io.parsers.read_csv(os.path.join(simDir,"data","X.csv"), header=None).values + X = pd.io.parsers.read_csv(os.path.join( + simDir, "data", "X.csv"), header=None).values n = X.shape[0] # Relative masking threshold @@ -229,12 +235,13 @@ def Rpreproc(OutDir,simNo,dim,nvg,cv): # Number of voxels currently (should be ~1000) v_current = len(inds_cv) - # Loop through each subject reading in Y and reducing to just the voxels + # Loop through each subject reading in Y and reducing to just the voxels # needed for i in np.arange(n): # Load in the Y volume - Yi = nib.load(os.path.join(simDir,"data","Y"+str(i)+".nii")).get_fdata() + Yi = nib.load(os.path.join(simDir, "data", + "Y"+str(i)+".nii")).get_fdata() # Flatten Yi Yi = Yi.reshape(v) @@ -243,23 +250,24 @@ def Rpreproc(OutDir,simNo,dim,nvg,cv): Yi = Yi[inds_cv].reshape(1, v_current) # Concatenate - if i==0: + if i == 0: Y_concat = Yi else: Y_concat = np.concatenate((Y_concat, Yi), axis=0) - + # Loop through voxels checking missingness for vox in np.arange(v_current): # Threshold out the voxels which have too much missingness - if np.count_nonzero(Y_concat[:,vox], axis=0)/n < rmThresh: + if np.count_nonzero(Y_concat[:, vox], axis=0)/n < rmThresh: - # If we don't have enough data lets replace that voxel + # If we don't have enough data lets replace that voxel # with zeros - Y_concat[:,vox] = np.zeros(Y_concat[:,vox].shape) + Y_concat[:, vox] = np.zeros(Y_concat[:, vox].shape) # Write out Z in full to a csv file - pd.DataFrame(Y_concat.reshape(n,v_current)).to_csv(os.path.join(simDir,"data","Y_Rversion_" + str(cv) + ".csv"), header=None, index=None) + pd.DataFrame(Y_concat.reshape(n, v_current)).to_csv(os.path.join( + simDir, "data", "Y_Rversion_" + str(cv) + ".csv"), header=None, index=None) def get_random_mask(dim): @@ -268,7 +276,8 @@ def get_random_mask(dim): fwhm = 10 # Load analysis mask - mu = nib.load(os.path.join(os.path.dirname(__file__),'mask.nii')).get_fdata() + mu = nib.load(os.path.join(os.path.dirname( + __file__), 'mask.nii')).get_fdata() # Add some noise and smooth mu = smooth_data(mu + 8*np.random.randn(*(mu.shape)), 3, [fwhm]*3) @@ -276,26 +285,26 @@ def get_random_mask(dim): # Re-threshold (this has induced a bit of randomness in the mask shape) mu = 1*(mu > 0.6) - return(mu) + return (mu) -def get_X(n,p): +def get_X(n, p): # Generate random X. - X = np.random.uniform(low=-0.5,high=0.5,size=(n,p)) - + X = np.random.uniform(low=-0.5, high=0.5, size=(n, p)) + # Make the first column an intercept - X[:,0]=1 + X[:, 0] = 1 # Reshape to dimensions for broadcasting X = X.reshape(1, n, p) # Return X - return(X) + return (X) def get_beta(p): - + # Make beta (we just have beta = [p-1,p-2,...,0]) beta = p-1-np.arange(p) @@ -303,29 +312,31 @@ def get_beta(p): beta = beta.reshape(1, p, 1) # Return beta - return(beta) + return (beta) def get_sigma2(v): # Make sigma2 (for now just set to one across all voxels) - sigma2 = 10#np.ones(v).reshape(v,1) + sigma2 = 10 # np.ones(v).reshape(v,1) # Return sigma - return(sigma2) + return (sigma2) + -def get_epsilon(v,n): +def get_epsilon(v, n): # Get sigma2 sigma2 = get_sigma2(v) # Make epsilon. - epsilon = sigma2*np.random.randn(v,n) + epsilon = sigma2*np.random.randn(v, n) # Reshape to dimensions for broadcasting epsilon = epsilon.reshape(v, n, 1) - return(epsilon) + return (epsilon) + def get_Y(X, beta, epsilon): @@ -333,8 +344,7 @@ def get_Y(X, beta, epsilon): Y = X @ beta + epsilon # Return Y - return(Y) - + return (Y) # ============================================================================ @@ -350,29 +360,29 @@ def get_Y(X, beta, epsilon): # # - `fname`: An absolute path to the Nifti file. # - `block`: The block of values to write to the NIFTI. -# - `blockInds`: The indices representing the 3D coordinates `block` should be +# - `blockInds`: The indices representing the 3D coordinates `block` should be # written to in the NIFTI. (Note: It is assumed if the NIFTI is # 4D we assume that the indices we want to write to in each 3D # volume/slice are the same across all 3D volumes/slices). -# - `dim` (optional): If creating the NIFTI image for the first time, the +# - `dim` (optional): If creating the NIFTI image for the first time, the # dimensions of the NIFTI image must be specified. # - `volInd` (optional): If we only want to write to one 3D volume/slice, # within a 4D file, this specifies the index of the # volume of interest. -# - `aff` (optional): If creating the NIFTI image for the first time, the +# - `aff` (optional): If creating the NIFTI image for the first time, the # affine of the NIFTI image must be specified. -# - `hdr` (optional): If creating the NIFTI image for the first time, the +# - `hdr` (optional): If creating the NIFTI image for the first time, the # header of the NIFTI image must be specified. # # ============================================================================ -def addBlockToNifti(fname, block, blockInds,dim=None,volInd=None,aff=None,hdr=None): +def addBlockToNifti(fname, block, blockInds, dim=None, volInd=None, aff=None, hdr=None): # Check if file is in use fileLocked = True while fileLocked: try: # Create lock file, so other jobs know we are writing to this file - f = os.open(fname + ".lock", os.O_CREAT|os.O_EXCL|os.O_RDWR) + f = os.open(fname + ".lock", os.O_CREAT | os.O_EXCL | os.O_RDWR) fileLocked = False except FileExistsError: fileLocked = True @@ -389,16 +399,17 @@ def addBlockToNifti(fname, block, blockInds,dim=None,volInd=None,aff=None,hdr=No dim = nib.Nifti1Image.from_filename(fname, mmap=False).shape # Work out data - data = nib.Nifti1Image.from_filename(fname, mmap=False).get_fdata().copy() + data = nib.Nifti1Image.from_filename( + fname, mmap=False).get_fdata().copy() # Work out affine affine = nib.Nifti1Image.from_filename(fname, mmap=False).affine.copy() - + else: # If we know how, make the NIFTI if dim is not None: - + # Make data data = np.zeros(dim) @@ -413,12 +424,12 @@ def addBlockToNifti(fname, block, blockInds,dim=None,volInd=None,aff=None,hdr=No # Throw an error because we don't know what to do raise Exception('NIFTI does not exist and dimensions not given') - # Work out the number of output volumes inside the nifti - if len(dim)==3: + # Work out the number of output volumes inside the nifti + if len(dim) == 3: # We only have one volume in this case n_vol = 1 - dim = np.array([dim[0],dim[1],dim[2],1]) + dim = np.array([dim[0], dim[1], dim[2], 1]) else: @@ -431,36 +442,36 @@ def addBlockToNifti(fname, block, blockInds,dim=None,volInd=None,aff=None,hdr=No # Work out the number of voxels n_vox = np.prod(dim[:3]) - # Reshape + # Reshape data = data.reshape([n_vox, n_vol]) # Add all the volumes if volInd is None: # Add block - data[blockInds,:] = block.reshape(data[blockInds,:].shape) - + data[blockInds, :] = block.reshape(data[blockInds, :].shape) + # Cycle through volumes, reshaping. - for k in range(0,data.shape[1]): + for k in range(0, data.shape[1]): - data_out[:,:,:,k] = data[:,k].reshape(int(dim[0]), - int(dim[1]), - int(dim[2])) + data_out[:, :, :, k] = data[:, k].reshape(int(dim[0]), + int(dim[1]), + int(dim[2])) # Add the one volume in the correct place else: # We're only looking at this volume - data = data[:,volInd].reshape((n_vox,1)) + data = data[:, volInd].reshape((n_vox, 1)) # Add block - data[blockInds,:] = block.reshape(data[blockInds,:].shape) - + data[blockInds, :] = block.reshape(data[blockInds, :].shape) + # Put in the volume - data_out[:,:,:,volInd] = data[:,0].reshape(int(dim[0]), - int(dim[1]), - int(dim[2])) - + data_out[:, :, :, volInd] = data[:, 0].reshape(int(dim[0]), + int(dim[1]), + int(dim[2])) + # Save NIFTI nib.save(nib.Nifti1Image(data_out, affine, header=hdr), fname) @@ -484,7 +495,7 @@ def smooth_data(data, D, fwhm, trunc=6, scaling='kernel'): fwhm = np.asarray([0. if elem is None else elem for elem in fwhm]) # Non-zero dimensions - D_nz = np.sum(fwhm>0) + D_nz = np.sum(fwhm > 0) # Convert fwhm to sigma values sigma = fwhm / np.sqrt(8 * np.log(2)) @@ -493,7 +504,7 @@ def smooth_data(data, D, fwhm, trunc=6, scaling='kernel'): # Perform smoothing (this code is based on `_smooth_array` from the # nilearn package) # ----------------------------------------------------------------------- - + # Loop through each dimension and smooth for n, s in enumerate(sigma): @@ -501,14 +512,14 @@ def smooth_data(data, D, fwhm, trunc=6, scaling='kernel'): if s > 0.0: # Perform smoothing in nth dimension - ndimage.gaussian_filter1d(data, s, output=data, mode='constant', axis=n, truncate=trunc) - + ndimage.gaussian_filter1d( + data, s, output=data, mode='constant', axis=n, truncate=trunc) # ----------------------------------------------------------------------- # Rescale # ----------------------------------------------------------------------- - if scaling=='kernel': - + if scaling == 'kernel': + # ----------------------------------------------------------------------- # Rescale smoothed data to standard deviation 1 (this code is based on # `_gaussian_kernel1d` from the `scipy.ndimage` package). @@ -520,9 +531,9 @@ def smooth_data(data, D, fwhm, trunc=6, scaling='kernel'): # Calculate kernel radii radii = np.int16(trunc*sigma + 0.5) - # Initialize array for phi values (has to be object as dimensions can + # Initialize array for phi values (has to be object as dimensions can # vary in length) - phis = np.empty(shape=(D_nz),dtype=object) + phis = np.empty(shape=(D_nz), dtype=object) # Index for non-zero dimensions j = 0 @@ -531,11 +542,11 @@ def smooth_data(data, D, fwhm, trunc=6, scaling='kernel'): for k in np.arange(D): # Skip the non-smoothed dimensions - if fwhm[k]!=0: + if fwhm[k] != 0: # Get range of values for this dimension r = np.arange(-radii[k], radii[k]+1) - + # Get the kernel for this dimension phi = np.exp(-0.5 / sigma2[k] * r ** 2) @@ -543,13 +554,13 @@ def smooth_data(data, D, fwhm, trunc=6, scaling='kernel'): phi = phi / phi.sum() # Add phi to dictionary - phis[j]= phi[::-1] + phis[j] = phi[::-1] # Increment j j = j + 1 - + # Create the D_nz dimensional grid - grids = np.meshgrid(*phis); + grids = np.meshgrid(*phis) # Initialize normalizing constant ss = 1 @@ -574,9 +585,9 @@ def smooth_data(data, D, fwhm, trunc=6, scaling='kernel'): # Rescale noise data = data/np.sqrt(ss) - elif scaling=='max': + elif scaling == 'max': # Rescale noise by dividing by maximum value data = data/np.max(data) - return(data) \ No newline at end of file + return (data)