Skip to content

Commit

Permalink
update data loading options
Browse files Browse the repository at this point in the history
  • Loading branch information
kondratyevd committed Mar 4, 2024
1 parent 4cdfa68 commit 4ecb6d1
Show file tree
Hide file tree
Showing 15 changed files with 42 additions and 42 deletions.
32 changes: 20 additions & 12 deletions af_benchmark/data_access/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,31 +4,39 @@

def get_file_list(cls):
mode = cls.config.get('data-access.mode', 'local')
if mode == 'local':
if mode == 'explicit-files':
file_list = cls.config.get('data-access.files', [])
elif mode == 'local_dirs':
elif mode == 'explicit-dirs':
dirs = cls.config.get('data-access.directories', [])
file_list = []
for dir in dirs:
file_list.extend(glob.glob(dir+"/**/*.root", recursive = True))
elif mode == 'dbs_dataset':
dbsdataset = cls.config.get('data-access.dataset', "")
elif mode == 'dbs-dataset':
dbsdatasets = cls.config.get('data-access.datasets', [])
xrootdserver = cls.config.get('data-access.xrootdserver', 'eos.cms.rcac.purdue.edu:1094')
dbs = DbsApi('https://cmsweb.cern.ch/dbs/prod/global/DBSReader')
file_list = ["root://"+xrootdserver+"/"+file['logical_file_name'] for file in dbs.listFiles(dataset=dbsdataset)]
elif mode == 'dbs_block':
dbsblock = cls.config.get('data-access.block', "")
file_list = [
"root://"+xrootdserver+"/"+file['logical_file_name']
for dataset in dbsdatasets
for file in dbs.listFiles(dataset=dataset)
]
elif mode == 'dbs-block':
dbsblocks = cls.config.get('data-access.blocks', [])
xrootdserver = cls.config.get('data-access.xrootdserver', 'eos.cms.rcac.purdue.edu:1094')
dbs = DbsApi('https://cmsweb.cern.ch/dbs/prod/global/DBSReader')
file_list = ["root://"+xrootdserver+"/"+file['logical_file_name'] for file in dbs.listFiles(block_name=dbsblock)]
elif mode == 'dbs_file':
dbsfile = cls.config.get('data-access.file', "")
file_list = [
"root://"+xrootdserver+"/"+file['logical_file_name']
for block in dbsblocks
for file in dbs.listFiles(block_name=block)
]
elif mode == 'dbs-files':
dbsfiles = cls.config.get('data-access.files', [])
xrootdserver = cls.config.get('data-access.xrootdserver', 'cms-xcache.rcac.purdue.edu:1094')
dbs = DbsApi('https://cmsweb.cern.ch/dbs/prod/global/DBSReader')
file_list = ["root://"+xrootdserver+"/"+dbsfile]
file_list = ["root://"+xrootdserver+"/"+file for file in dbsfiles]
else:
raise NotImplementedError(
f"Data access modes other than 'local' and 'local_dir' are not yet implemented"
f"Data access mode {mode} not implemented"
)

cls.n_files = len(file_list)
Expand Down
11 changes: 0 additions & 11 deletions af_benchmark/example-configs/example-config-1.yaml

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
executor:
backend: sequential
data-access:
mode: dbs_block
mode: dbs-block
xrootdserver: eos.cms.rcac.purdue.edu:1094
# xrootdserver: cms-xcache.rcac.purdue.edu:1094
# xrootdserver: cmsxrootd.hep.wisc.edu:1094
block: /SingleMuon/Run2017C-02Apr2020-v1/NANOAOD#44236284-ff8d-4b95-b971-dcec15b5130f
blocks:
- /SingleMuon/Run2017C-02Apr2020-v1/NANOAOD#44236284-ff8d-4b95-b971-dcec15b5130f
processor:
parallelize_over: files
columns: 5
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
executor:
backend: sequential
backend: futures
data-access:
mode: dbs_dataset
mode: dbs-dataset
# xrootdserver: eos.cms.rcac.purdue.edu:1094
xrootdserver: cms-xcache.rcac.purdue.edu:1094
dataset: /SingleMuon/Run2016B-02Apr2020_ver2-v1/NANOAOD
datasets:
- /SingleMuon/Run2016B-02Apr2020_ver2-v1/NANOAOD
processor:
parallelize_over: files
columns: 5
operation: sum
columns: 1
operation: nothing
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
executor:
backend: sequential
data-access:
mode: dbs_file
mode: dbs-files
# xrootdserver: eos.cms.rcac.purdue.edu:1094
xrootdserver: cms-xcache.rcac.purdue.edu:1094
# xrootdserver: cmsxrootd.hep.wisc.edu:1094
file: /store/data/Run2016B/SingleMuon/NANOAOD/02Apr2020_ver2-v1/20000/90322FC2-4027-0E47-92E4-22307EC8EAD2.root
files:
- /store/data/Run2016B/SingleMuon/NANOAOD/02Apr2020_ver2-v1/20000/90322FC2-4027-0E47-92E4-22307EC8EAD2.root
processor:
parallelize_over: columns
# parallelize_over: files
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
executor:
backend: sequential
data-access:
mode: local_dirs
mode: explicit-dirs
directories:
- /eos/purdue/store/data/Run2016B/SingleMuon/NANOAOD/02Apr2020_ver2-v1/
processor:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ executor:
# backend: dask-local
# backend: dask-gateway
data-access:
mode: local
mode: explicit-files
files:
- tests/data/nano_dimuon.root
# mode: local_dirs
Expand Down
2 changes: 1 addition & 1 deletion af_benchmark/processor/uproot_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def process_column(self, file, column, **kwargs):
def run_operation(self, column_data, **kwargs):
operation = self.config.get('processor.operation', None)

if not operation:
if (not operation) or (operation=='nothing'):
return

data_in_memory = np.array([])
Expand Down
2 changes: 1 addition & 1 deletion tests/test-dask-local.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
executor:
backend: dask-local
data-access:
mode: local
mode: explicit-files
files:
- tests/data/nano_dimuon.root
processor:
Expand Down
2 changes: 1 addition & 1 deletion tests/test-default.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
executor:
backend: futures
data-access:
mode: local
mode: explicit-files
files:
- tests/data/nano_dimuon.root
processor:
Expand Down
2 changes: 1 addition & 1 deletion tests/test-explicit-columns.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
executor:
backend: sequential
data-access:
mode: local
mode: explicit-files
files:
- tests/data/nano_dimuon.root
processor:
Expand Down
2 changes: 1 addition & 1 deletion tests/test-futures.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
executor:
backend: futures
data-access:
mode: local
mode: explicit-files
files:
- tests/data/nano_dimuon.root
processor:
Expand Down
2 changes: 1 addition & 1 deletion tests/test-read-by-column.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
executor:
backend: futures
data-access:
mode: local
mode: explicit-files
files:
- tests/data/nano_dimuon.root
processor:
Expand Down
2 changes: 1 addition & 1 deletion tests/test-read-by-file.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
executor:
backend: futures
data-access:
mode: local
mode: explicit-files
files:
- tests/data/nano_dimuon.root
processor:
Expand Down
2 changes: 1 addition & 1 deletion tests/test-read-from-dir.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
executor:
backend: futures
data-access:
mode: local_dirs
mode: explicit-dirs
directories:
- tests/data/
processor:
Expand Down

0 comments on commit 4ecb6d1

Please sign in to comment.