From 3ea62b8dead3bd85ea9af6e3e2fd7a7524590906 Mon Sep 17 00:00:00 2001
From: RSKothari <rsk3900@rit.edu>
Date: Thu, 8 Apr 2021 06:50:44 +0400
Subject: [PATCH] imprved chunking. will now add the ability to select chunk
 dimension

---
 args_maker.py | 2 +-
 benchmark.py  | 7 ++++---
 converter.py  | 8 ++++----
 3 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/args_maker.py b/args_maker.py
index 73e2f76..91bb5b7 100644
--- a/args_maker.py
+++ b/args_maker.py
@@ -31,7 +31,7 @@ def make_args():
                                default='D:/Datasets/Gaze360/imgs')
     required_args.add_argument('--path_output', required=False,
                                help='abs path to output H5 file',
-                               default='D:/exp.h5')
+                               default='D:/exp.h5') #'C:/Users/Rudra/Downloads/exp.h5'
     args = parser.parse_args()
     pprint(vars(args))
 
diff --git a/benchmark.py b/benchmark.py
index 5a4b2d9..7ef675b 100644
--- a/benchmark.py
+++ b/benchmark.py
@@ -42,7 +42,8 @@ def __getitem__(self, idx):
             # Step #2: Create a H5 object within the __getitem__ call
             # This creates a H5 reader object for each worker.
             if not hasattr(self, 'h5_obj'):
-                self.h5_obj = h5py.File(self.path_h5, mode='r', swmr=True)
+                self.h5_obj = h5py.File(self.path_h5, mode='r', swmr=True,
+                                        rdcc_nbytes=10485760)
                 
             # Reading a datum from the H5 file
             datum = self.h5_obj[entry_str][:]
@@ -66,11 +67,11 @@ def __del__(self, ):
 
     args = vars(make_args())
 
-    bench_obj = benchmark(args['path_output'], args['path_images'])
+    bench_obj = benchmark(args['path_output'], args['path_images'], mode='H5')
     loader = torch.utils.data.DataLoader(bench_obj,
                                          shuffle=True,
                                          batch_size=48,
-                                         num_workers=0)
+                                         num_workers=4)
 
     for epoch in range(3):
         time_elapsed = []
diff --git a/converter.py b/converter.py
index 502b1a0..e6c30c8 100644
--- a/converter.py
+++ b/converter.py
@@ -58,10 +58,10 @@ def prune_files(self, files):
         if any(files):
             if self.args_dict['custom_prune_func']:
                 return [fi for fi in files
-                        if self.default_prune(fi, self.args_dict['ext'])]
+                        if my_prune(fi, self.args_dict['ext'])]
             else:
                 return [fi for fi in files
-                        if my_prune(fi, self.args_dict['ext'])]
+                        if self.default_prune(fi, self.args_dict['ext'])]
         else:
             return []
 
@@ -71,7 +71,7 @@ def log_sample(self, h5_obj, datum):
                               data.shape,
                               data=data,
                               dtype=str(data.dtype),
-                              chunks=True,
+                              chunks=data.shape,
                               compression='lzf')
 
     def read_write(self, ):
@@ -144,7 +144,7 @@ def default_prune(self, filename_str, ext_str):
         from my_functions import my_prune
 
     # %% Delete and create a new H5 file
-    h5_obj = h5py.File(args.path_output, 'w')
+    h5_obj = h5py.File(args['path_output'], 'w')
     h5_obj.close()
 
     # %% Begin reading and writing to H5