From 4d75e725e01bac4842de51cf533b2b380e0f0005 Mon Sep 17 00:00:00 2001 From: Dmitry Date: Tue, 13 Feb 2024 21:12:39 +0100 Subject: [PATCH] rename handler -> processor --- af_benchmark/benchmark.py | 27 ++++++++++--------- af_benchmark/example-config.yaml | 12 ++++----- .../example-configs/example-config-1.yaml | 2 +- .../example-configs/example-config-2.yaml | 2 +- .../{processing => processor}/__init__.py | 0 .../handler.py => processor/processor.py} | 22 +++++++-------- tests/config_nanoevents_dask_local.yaml | 2 +- tests/config_nanoevents_futures.yaml | 2 +- tests/config_nanoevents_sequential.yaml | 2 +- tests/config_uproot_dask_local.yaml | 2 +- tests/config_uproot_futures.yaml | 2 +- tests/config_uproot_sequential.yaml | 2 +- 12 files changed, 39 insertions(+), 38 deletions(-) rename af_benchmark/{processing => processor}/__init__.py (100%) rename af_benchmark/{processing/handler.py => processor/processor.py} (85%) diff --git a/af_benchmark/benchmark.py b/af_benchmark/benchmark.py index 8db06db..796c32a 100644 --- a/af_benchmark/benchmark.py +++ b/af_benchmark/benchmark.py @@ -6,7 +6,7 @@ from profiling.timing import time_profiler as tp from data_access.loader import get_file_list -from processing.handler import handlers +from processor.processor import processors from executor.sequential import SequentialExecutor from executor.futures import FuturesExecutor @@ -40,11 +40,12 @@ def __init__(self, config_path=None): "dataset", "n_files", "n_columns_read", + "bytes_read", "n_workers", "total_time", "operation", "executor", - "col_handler", + "processor", ] ) if config_path: @@ -64,13 +65,13 @@ def reinitialize(self, config_path): f"Invalid backend: {self.backend}. Allowed values are: {executors.keys()}" ) - # Select file handler method - self.method = self.config.get('processing.method') - if self.method in handlers: - self.handler = handlers[self.method](self.config) + # Select processor method + self.method = self.config.get('processor.method') + if self.method in processors: + self.processor = processors[self.method](self.config) else: raise NotImplementedError( - f"Invalid method: {self.method}. Allowed values are: {handlers.keys()}" + f"Invalid method: {self.method}. Allowed values are: {processors.keys()}" ) @@ -81,15 +82,15 @@ def run(self): self.n_files = len(files) trees = self.executor.execute( - self.handler.open_nanoaod, files + self.processor.open_nanoaod, files ) columns_by_file = self.executor.execute( - self.handler.read_columns, trees + self.processor.read_columns, trees ) outputs = self.executor.execute( - self.handler.run_operation, columns_by_file + self.processor.run_operation, columns_by_file ) return outputs @@ -106,12 +107,12 @@ def update_report(self): pd.DataFrame([{ "dataset": "", "n_files": self.n_files, - "n_columns_read": len(self.config.get('processing.columns')), + "n_columns_read": len(self.config.get('processor.columns')), "n_workers": self.executor.get_n_workers(), "total_time": run_time, - "operation": self.config.get('processing.operation'), + "operation": self.config.get('processor.operation'), "executor": self.backend, - "col_handler": self.method, + "processor": self.method, }]) ]) diff --git a/af_benchmark/example-config.yaml b/af_benchmark/example-config.yaml index 76437b4..d6c7b4f 100644 --- a/af_benchmark/example-config.yaml +++ b/af_benchmark/example-config.yaml @@ -4,12 +4,12 @@ executor: # backend: dask-local # backend: dask-gateway data-access: - # mode: local - # files: - # - tests/data/nano_dimuon.root - mode: local_dir - files_dir: /eos/purdue/store/data/Run2016B/SingleMuon/NANOAOD/02Apr2020_ver2-v1/20000/ -processing: + mode: local + files: + - tests/data/nano_dimuon.root + # mode: local_dir + # files_dir: /eos/purdue/store/data/Run2016B/SingleMuon/NANOAOD/02Apr2020_ver2-v1/20000/ +processor: method: uproot # method: nanoevents columns: diff --git a/af_benchmark/example-configs/example-config-1.yaml b/af_benchmark/example-configs/example-config-1.yaml index 76437b4..87415fa 100644 --- a/af_benchmark/example-configs/example-config-1.yaml +++ b/af_benchmark/example-configs/example-config-1.yaml @@ -9,7 +9,7 @@ data-access: # - tests/data/nano_dimuon.root mode: local_dir files_dir: /eos/purdue/store/data/Run2016B/SingleMuon/NANOAOD/02Apr2020_ver2-v1/20000/ -processing: +processor: method: uproot # method: nanoevents columns: diff --git a/af_benchmark/example-configs/example-config-2.yaml b/af_benchmark/example-configs/example-config-2.yaml index 600aa9d..db81fe9 100644 --- a/af_benchmark/example-configs/example-config-2.yaml +++ b/af_benchmark/example-configs/example-config-2.yaml @@ -3,7 +3,7 @@ executor: data-access: mode: local_dir files_dir: /eos/purdue/store/data/Run2016B/SingleMuon/NANOAOD/02Apr2020_ver2-v1/20000/ -processing: +processor: method: uproot columns: - event diff --git a/af_benchmark/processing/__init__.py b/af_benchmark/processor/__init__.py similarity index 100% rename from af_benchmark/processing/__init__.py rename to af_benchmark/processor/__init__.py diff --git a/af_benchmark/processing/handler.py b/af_benchmark/processor/processor.py similarity index 85% rename from af_benchmark/processing/handler.py rename to af_benchmark/processor/processor.py index e4db652..4e01f75 100644 --- a/af_benchmark/processing/handler.py +++ b/af_benchmark/processor/processor.py @@ -6,8 +6,8 @@ from coffea.nanoevents import NanoEventsFactory, NanoAODSchema -class BaseColumnHandler(ABC): - """A base column handler class +class BaseProcessor(ABC): + """A base processor class """ def __init__(self, config): self.config=config @@ -29,7 +29,7 @@ def run_operation(self, columns, **kwargs): return -class UprootColumnHandler(BaseColumnHandler): +class UprootProcessor(BaseProcessor): def __init__(self, config): self.config = config self.col_stats = pd.DataFrame( @@ -48,7 +48,7 @@ def open_nanoaod(self, file_path, **kwargs): @tp.enable def read_columns(self, tree, **kwargs): - columns_to_read = self.config.get('processing.columns') + columns_to_read = self.config.get('processor.columns') column_data = {} for column in columns_to_read: if column in tree.keys(): @@ -66,7 +66,7 @@ def read_columns(self, tree, **kwargs): return column_data def run_operation(self, columns, **kwargs): - operation = self.config.get('processing.operation') + operation = self.config.get('processor.operation') results = {} for name, data in columns.items(): if operation == 'array': @@ -78,7 +78,7 @@ def run_operation(self, columns, **kwargs): -class NanoEventsColumnHandler(BaseColumnHandler): +class NanoEventsProcessor(BaseProcessor): def open_nanoaod(self, file_path, **kwargs): tree = NanoEventsFactory.from_root( file_path, @@ -88,7 +88,7 @@ def open_nanoaod(self, file_path, **kwargs): return tree def read_columns(self, tree, **kwargs): - columns_to_read = self.config.get('processing.columns') + columns_to_read = self.config.get('processor.columns') column_data = {} for column in columns_to_read: if column in tree.fields: @@ -101,7 +101,7 @@ def read_columns(self, tree, **kwargs): return column_data def run_operation(self, columns, **kwargs): - operation = self.config.get('processing.operation') + operation = self.config.get('processor.operation') results = {} for name, data in columns.items(): if operation == 'mean': @@ -109,9 +109,9 @@ def run_operation(self, columns, **kwargs): return results -handlers = { - 'uproot': UprootColumnHandler, - 'nanoevents': NanoEventsColumnHandler +processors = { + 'uproot': UprootProcessor, + 'nanoevents': NanoEventsProcessor } \ No newline at end of file diff --git a/tests/config_nanoevents_dask_local.yaml b/tests/config_nanoevents_dask_local.yaml index dd4d686..f8dddfd 100644 --- a/tests/config_nanoevents_dask_local.yaml +++ b/tests/config_nanoevents_dask_local.yaml @@ -4,7 +4,7 @@ data-access: mode: local files: - tests/data/nano_dimuon.root -processing: +processor: method: nanoevents columns: - event diff --git a/tests/config_nanoevents_futures.yaml b/tests/config_nanoevents_futures.yaml index 8225737..a6da4d0 100644 --- a/tests/config_nanoevents_futures.yaml +++ b/tests/config_nanoevents_futures.yaml @@ -4,7 +4,7 @@ data-access: mode: local files: - tests/data/nano_dimuon.root -processing: +processor: method: nanoevents columns: - event diff --git a/tests/config_nanoevents_sequential.yaml b/tests/config_nanoevents_sequential.yaml index 5551e53..d376b6d 100644 --- a/tests/config_nanoevents_sequential.yaml +++ b/tests/config_nanoevents_sequential.yaml @@ -4,7 +4,7 @@ data-access: mode: local files: - tests/data/nano_dimuon.root -processing: +processor: method: nanoevents columns: - event diff --git a/tests/config_uproot_dask_local.yaml b/tests/config_uproot_dask_local.yaml index 7987808..49a3e93 100644 --- a/tests/config_uproot_dask_local.yaml +++ b/tests/config_uproot_dask_local.yaml @@ -4,7 +4,7 @@ data-access: mode: local files: - tests/data/nano_dimuon.root -processing: +processor: method: uproot columns: - event diff --git a/tests/config_uproot_futures.yaml b/tests/config_uproot_futures.yaml index e250573..e05724a 100644 --- a/tests/config_uproot_futures.yaml +++ b/tests/config_uproot_futures.yaml @@ -4,7 +4,7 @@ data-access: mode: local files: - tests/data/nano_dimuon.root -processing: +processor: method: uproot columns: - event diff --git a/tests/config_uproot_sequential.yaml b/tests/config_uproot_sequential.yaml index 27b6e06..4a5bd0e 100644 --- a/tests/config_uproot_sequential.yaml +++ b/tests/config_uproot_sequential.yaml @@ -4,7 +4,7 @@ data-access: mode: local files: - tests/data/nano_dimuon.root -processing: +processor: method: uproot columns: - event