From 2ea523c9568ce32d6293a6a3cdfedff38b0f7d26 Mon Sep 17 00:00:00 2001 From: Arnab Date: Tue, 18 Jul 2023 12:54:23 -0700 Subject: [PATCH] MAINT: Typos and misc bugs fixed for load_from_cache --- pylids/select_frames.py | 42 ++++++++++++++++++++++------------------- 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/pylids/select_frames.py b/pylids/select_frames.py index 326a610..791ef05 100644 --- a/pylids/select_frames.py +++ b/pylids/select_frames.py @@ -67,12 +67,11 @@ def select_augmentations(trn_fls, tst_fls, aug_fls, Returns: TYPE: Description """ - # expects .png folders for test and train files - # returns a list with path to frames to label - - print('This code has a run time of about 40 mins, go make yourself some tea! \n') - print('Run time and RAM required scales with dataset size! \n') - print('Make sure you have enough RAM for this process, else, downsample your data using k-means clustering... \n') + + print('This can take some time to run, go make yourself some tea!') + print('\n Run time and RAM required scales with dataset size!') + print('\n Track memory usage and make sure you have enough RAM for this process.') + print('\n else, downsample your data using k-means clustering within participants...') model = ResNet50(weights="imagenet", include_top=False) if not os.path.exists(cache_loc): os.makedirs(cache_loc) @@ -215,17 +214,16 @@ def select_frames_to_label(trn_fls=None, tst_fls=None, num_frames (int, optional): Description kmeans_batch_size (int, optional): Description kmeans_type (str, optional): default, batch - load_from_cache (bool, optional): set to True if you want to load trn_fls or tst_fls from cache + load_from_cache (bool, optional): set to True if you want to load/save + trn_fls and tst_fls from cache Returns: TYPE: Description """ - # expects .png folders for test and train files - # returns a list with path to frames to label - - print('This code has a run time of about 40 mins, go make yourself some tea!') + print('This can take some time to run, go make yourself some tea!') print('\n Run time and RAM required scales with dataset size!') - print('\n Make sure you have enough RAM for this process else, downsample your data using k-means clustering within participants...') + print('\n Track memory usage and make sure you have enough RAM for this process.') + print('\n else, downsample your data using k-means clustering within participants...') model = ResNet50(weights="imagenet", include_top=False) if not os.path.exists(cache_loc) and load_from_cache: @@ -245,7 +243,7 @@ def select_frames_to_label(trn_fls=None, tst_fls=None, np.save(os.path.join(cache_loc, trn_data+'.npy'), trn_rnfs) else: trn_rnfs = get_rnfs_from_list(trn_fls, model) - np.save(os.path.join(cache_loc, trn_data+'.npy'), trn_rnfs) + if return_min_rand_frames: av_trn_fs = np.mean(trn_rnfs,axis=0) @@ -253,14 +251,20 @@ def select_frames_to_label(trn_fls=None, tst_fls=None, assert trn_fls is not None, 'Test files should be provided' else: # extracting resnet features for test dataset images - tst_data = input("Enter the name of the test dataset:\n") - if os.path.isfile(os.path.join(cache_loc,tst_data+'.npy')): - print('Loading test features from cache') - tst_rnfs = np.load(os.path.join(cache_loc,tst_data+'.npy')) + if load_from_cache: + print('Files in cache: \n') + print(glob(cache_loc+'*.npy')) + + tst_data = input("Enter the name of the test dataset:\n") + if os.path.isfile(os.path.join(cache_loc,tst_data+'.npy')): + print('Loading test features from cache') + tst_rnfs = np.load(os.path.join(cache_loc,tst_data+'.npy')) + else: + tst_rnfs = get_rnfs_from_list(tst_fls, model) + np.save(os.path.join(cache_loc,tst_data+'.npy'), tst_rnfs) else: tst_rnfs = get_rnfs_from_list(tst_fls, model) - np.save(os.path.join(cache_loc,tst_data+'.npy'), tst_rnfs) - + # Iterative k means which keeps running till we find a given number of frames to label # from the test dataset / set of augmented images n_clusters = n_frames