From 3ea62b8dead3bd85ea9af6e3e2fd7a7524590906 Mon Sep 17 00:00:00 2001 From: RSKothari Date: Thu, 8 Apr 2021 06:50:44 +0400 Subject: [PATCH] imprved chunking. will now add the ability to select chunk dimension --- args_maker.py | 2 +- benchmark.py | 7 ++++--- converter.py | 8 ++++---- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/args_maker.py b/args_maker.py index 73e2f76..91bb5b7 100644 --- a/args_maker.py +++ b/args_maker.py @@ -31,7 +31,7 @@ def make_args(): default='D:/Datasets/Gaze360/imgs') required_args.add_argument('--path_output', required=False, help='abs path to output H5 file', - default='D:/exp.h5') + default='D:/exp.h5') #'C:/Users/Rudra/Downloads/exp.h5' args = parser.parse_args() pprint(vars(args)) diff --git a/benchmark.py b/benchmark.py index 5a4b2d9..7ef675b 100644 --- a/benchmark.py +++ b/benchmark.py @@ -42,7 +42,8 @@ def __getitem__(self, idx): # Step #2: Create a H5 object within the __getitem__ call # This creates a H5 reader object for each worker. if not hasattr(self, 'h5_obj'): - self.h5_obj = h5py.File(self.path_h5, mode='r', swmr=True) + self.h5_obj = h5py.File(self.path_h5, mode='r', swmr=True, + rdcc_nbytes=10485760) # Reading a datum from the H5 file datum = self.h5_obj[entry_str][:] @@ -66,11 +67,11 @@ def __del__(self, ): args = vars(make_args()) - bench_obj = benchmark(args['path_output'], args['path_images']) + bench_obj = benchmark(args['path_output'], args['path_images'], mode='H5') loader = torch.utils.data.DataLoader(bench_obj, shuffle=True, batch_size=48, - num_workers=0) + num_workers=4) for epoch in range(3): time_elapsed = [] diff --git a/converter.py b/converter.py index 502b1a0..e6c30c8 100644 --- a/converter.py +++ b/converter.py @@ -58,10 +58,10 @@ def prune_files(self, files): if any(files): if self.args_dict['custom_prune_func']: return [fi for fi in files - if self.default_prune(fi, self.args_dict['ext'])] + if my_prune(fi, self.args_dict['ext'])] else: return [fi for fi in files - if my_prune(fi, self.args_dict['ext'])] + if self.default_prune(fi, self.args_dict['ext'])] else: return [] @@ -71,7 +71,7 @@ def log_sample(self, h5_obj, datum): data.shape, data=data, dtype=str(data.dtype), - chunks=True, + chunks=data.shape, compression='lzf') def read_write(self, ): @@ -144,7 +144,7 @@ def default_prune(self, filename_str, ext_str): from my_functions import my_prune # %% Delete and create a new H5 file - h5_obj = h5py.File(args.path_output, 'w') + h5_obj = h5py.File(args['path_output'], 'w') h5_obj.close() # %% Begin reading and writing to H5