diff --git a/af_benchmark/data_access/loader.py b/af_benchmark/data_access/loader.py index 1001ad8..8329da7 100644 --- a/af_benchmark/data_access/loader.py +++ b/af_benchmark/data_access/loader.py @@ -4,31 +4,39 @@ def get_file_list(cls): mode = cls.config.get('data-access.mode', 'local') - if mode == 'local': + if mode == 'explicit-files': file_list = cls.config.get('data-access.files', []) - elif mode == 'local_dirs': + elif mode == 'explicit-dirs': dirs = cls.config.get('data-access.directories', []) file_list = [] for dir in dirs: file_list.extend(glob.glob(dir+"/**/*.root", recursive = True)) - elif mode == 'dbs_dataset': - dbsdataset = cls.config.get('data-access.dataset', "") + elif mode == 'dbs-dataset': + dbsdatasets = cls.config.get('data-access.datasets', []) xrootdserver = cls.config.get('data-access.xrootdserver', 'eos.cms.rcac.purdue.edu:1094') dbs = DbsApi('https://cmsweb.cern.ch/dbs/prod/global/DBSReader') - file_list = ["root://"+xrootdserver+"/"+file['logical_file_name'] for file in dbs.listFiles(dataset=dbsdataset)] - elif mode == 'dbs_block': - dbsblock = cls.config.get('data-access.block', "") + file_list = [ + "root://"+xrootdserver+"/"+file['logical_file_name'] + for dataset in dbsdatasets + for file in dbs.listFiles(dataset=dataset) + ] + elif mode == 'dbs-block': + dbsblocks = cls.config.get('data-access.blocks', []) xrootdserver = cls.config.get('data-access.xrootdserver', 'eos.cms.rcac.purdue.edu:1094') dbs = DbsApi('https://cmsweb.cern.ch/dbs/prod/global/DBSReader') - file_list = ["root://"+xrootdserver+"/"+file['logical_file_name'] for file in dbs.listFiles(block_name=dbsblock)] - elif mode == 'dbs_file': - dbsfile = cls.config.get('data-access.file', "") + file_list = [ + "root://"+xrootdserver+"/"+file['logical_file_name'] + for block in dbsblocks + for file in dbs.listFiles(block_name=block) + ] + elif mode == 'dbs-files': + dbsfiles = cls.config.get('data-access.files', []) xrootdserver = cls.config.get('data-access.xrootdserver', 'cms-xcache.rcac.purdue.edu:1094') dbs = DbsApi('https://cmsweb.cern.ch/dbs/prod/global/DBSReader') - file_list = ["root://"+xrootdserver+"/"+dbsfile] + file_list = ["root://"+xrootdserver+"/"+file for file in dbsfiles] else: raise NotImplementedError( - f"Data access modes other than 'local' and 'local_dir' are not yet implemented" + f"Data access mode {mode} not implemented" ) cls.n_files = len(file_list) diff --git a/af_benchmark/example-configs/example-config-1.yaml b/af_benchmark/example-configs/example-config-1.yaml deleted file mode 100644 index 6e967d1..0000000 --- a/af_benchmark/example-configs/example-config-1.yaml +++ /dev/null @@ -1,11 +0,0 @@ -executor: - backend: futures -data-access: - mode: local_dirs - directories: - - /eos/purdue/store/data/Run2016B/SingleMuon/NANOAOD/02Apr2020_ver2-v1/20000/ -processor: - parallelize_over: files - columns: - - event - operation: sum diff --git a/af_benchmark/example-configs/example-config-dbs-block.yaml b/af_benchmark/example-configs/example-config-dbs-blocks.yaml similarity index 67% rename from af_benchmark/example-configs/example-config-dbs-block.yaml rename to af_benchmark/example-configs/example-config-dbs-blocks.yaml index 3c66c7c..f5459db 100644 --- a/af_benchmark/example-configs/example-config-dbs-block.yaml +++ b/af_benchmark/example-configs/example-config-dbs-blocks.yaml @@ -1,11 +1,12 @@ executor: backend: sequential data-access: - mode: dbs_block + mode: dbs-block xrootdserver: eos.cms.rcac.purdue.edu:1094 # xrootdserver: cms-xcache.rcac.purdue.edu:1094 # xrootdserver: cmsxrootd.hep.wisc.edu:1094 - block: /SingleMuon/Run2017C-02Apr2020-v1/NANOAOD#44236284-ff8d-4b95-b971-dcec15b5130f + blocks: + - /SingleMuon/Run2017C-02Apr2020-v1/NANOAOD#44236284-ff8d-4b95-b971-dcec15b5130f processor: parallelize_over: files columns: 5 diff --git a/af_benchmark/example-configs/example-config-dbs-dataset.yaml b/af_benchmark/example-configs/example-config-dbs-datasets.yaml similarity index 52% rename from af_benchmark/example-configs/example-config-dbs-dataset.yaml rename to af_benchmark/example-configs/example-config-dbs-datasets.yaml index a5bc6fc..6203a33 100644 --- a/af_benchmark/example-configs/example-config-dbs-dataset.yaml +++ b/af_benchmark/example-configs/example-config-dbs-datasets.yaml @@ -1,11 +1,12 @@ executor: - backend: sequential + backend: futures data-access: - mode: dbs_dataset + mode: dbs-dataset # xrootdserver: eos.cms.rcac.purdue.edu:1094 xrootdserver: cms-xcache.rcac.purdue.edu:1094 - dataset: /SingleMuon/Run2016B-02Apr2020_ver2-v1/NANOAOD + datasets: + - /SingleMuon/Run2016B-02Apr2020_ver2-v1/NANOAOD processor: parallelize_over: files - columns: 5 - operation: sum + columns: 1 + operation: nothing diff --git a/af_benchmark/example-configs/example-config-dbs-file.yaml b/af_benchmark/example-configs/example-config-dbs-files.yaml similarity index 78% rename from af_benchmark/example-configs/example-config-dbs-file.yaml rename to af_benchmark/example-configs/example-config-dbs-files.yaml index b1d5eef..de8375b 100644 --- a/af_benchmark/example-configs/example-config-dbs-file.yaml +++ b/af_benchmark/example-configs/example-config-dbs-files.yaml @@ -1,11 +1,12 @@ executor: backend: sequential data-access: - mode: dbs_file + mode: dbs-files # xrootdserver: eos.cms.rcac.purdue.edu:1094 xrootdserver: cms-xcache.rcac.purdue.edu:1094 # xrootdserver: cmsxrootd.hep.wisc.edu:1094 - file: /store/data/Run2016B/SingleMuon/NANOAOD/02Apr2020_ver2-v1/20000/90322FC2-4027-0E47-92E4-22307EC8EAD2.root + files: + - /store/data/Run2016B/SingleMuon/NANOAOD/02Apr2020_ver2-v1/20000/90322FC2-4027-0E47-92E4-22307EC8EAD2.root processor: parallelize_over: columns # parallelize_over: files diff --git a/af_benchmark/example-configs/example-config-local-dirs.yaml b/af_benchmark/example-configs/example-config-explicit-dirs.yaml similarity index 90% rename from af_benchmark/example-configs/example-config-local-dirs.yaml rename to af_benchmark/example-configs/example-config-explicit-dirs.yaml index bf45c5a..6bed6ff 100644 --- a/af_benchmark/example-configs/example-config-local-dirs.yaml +++ b/af_benchmark/example-configs/example-config-explicit-dirs.yaml @@ -1,7 +1,7 @@ executor: backend: sequential data-access: - mode: local_dirs + mode: explicit-dirs directories: - /eos/purdue/store/data/Run2016B/SingleMuon/NANOAOD/02Apr2020_ver2-v1/ processor: diff --git a/af_benchmark/example-config.yaml b/af_benchmark/example-configs/example-config.yaml similarity index 94% rename from af_benchmark/example-config.yaml rename to af_benchmark/example-configs/example-config.yaml index 77ed588..31e0987 100644 --- a/af_benchmark/example-config.yaml +++ b/af_benchmark/example-configs/example-config.yaml @@ -4,7 +4,7 @@ executor: # backend: dask-local # backend: dask-gateway data-access: - mode: local + mode: explicit-files files: - tests/data/nano_dimuon.root # mode: local_dirs diff --git a/af_benchmark/processor/uproot_processor.py b/af_benchmark/processor/uproot_processor.py index 0be4b04..c5306db 100644 --- a/af_benchmark/processor/uproot_processor.py +++ b/af_benchmark/processor/uproot_processor.py @@ -74,7 +74,7 @@ def process_column(self, file, column, **kwargs): def run_operation(self, column_data, **kwargs): operation = self.config.get('processor.operation', None) - if not operation: + if (not operation) or (operation=='nothing'): return data_in_memory = np.array([]) diff --git a/tests/test-dask-local.yaml b/tests/test-dask-local.yaml index 483112c..2419858 100644 --- a/tests/test-dask-local.yaml +++ b/tests/test-dask-local.yaml @@ -1,7 +1,7 @@ executor: backend: dask-local data-access: - mode: local + mode: explicit-files files: - tests/data/nano_dimuon.root processor: diff --git a/tests/test-default.yaml b/tests/test-default.yaml index 8cfd4a9..6a6d14a 100644 --- a/tests/test-default.yaml +++ b/tests/test-default.yaml @@ -1,7 +1,7 @@ executor: backend: futures data-access: - mode: local + mode: explicit-files files: - tests/data/nano_dimuon.root processor: diff --git a/tests/test-explicit-columns.yaml b/tests/test-explicit-columns.yaml index 6a31c39..cbfb12e 100644 --- a/tests/test-explicit-columns.yaml +++ b/tests/test-explicit-columns.yaml @@ -1,7 +1,7 @@ executor: backend: sequential data-access: - mode: local + mode: explicit-files files: - tests/data/nano_dimuon.root processor: diff --git a/tests/test-futures.yaml b/tests/test-futures.yaml index 8cfd4a9..6a6d14a 100644 --- a/tests/test-futures.yaml +++ b/tests/test-futures.yaml @@ -1,7 +1,7 @@ executor: backend: futures data-access: - mode: local + mode: explicit-files files: - tests/data/nano_dimuon.root processor: diff --git a/tests/test-read-by-column.yaml b/tests/test-read-by-column.yaml index 8cfd4a9..6a6d14a 100644 --- a/tests/test-read-by-column.yaml +++ b/tests/test-read-by-column.yaml @@ -1,7 +1,7 @@ executor: backend: futures data-access: - mode: local + mode: explicit-files files: - tests/data/nano_dimuon.root processor: diff --git a/tests/test-read-by-file.yaml b/tests/test-read-by-file.yaml index 8cfd4a9..6a6d14a 100644 --- a/tests/test-read-by-file.yaml +++ b/tests/test-read-by-file.yaml @@ -1,7 +1,7 @@ executor: backend: futures data-access: - mode: local + mode: explicit-files files: - tests/data/nano_dimuon.root processor: diff --git a/tests/test-read-from-dir.yaml b/tests/test-read-from-dir.yaml index 2f9ba6b..a2547c5 100644 --- a/tests/test-read-from-dir.yaml +++ b/tests/test-read-from-dir.yaml @@ -1,7 +1,7 @@ executor: backend: futures data-access: - mode: local_dirs + mode: explicit-dirs directories: - tests/data/ processor: