From ad0ea30f35185fed5b5b03aadbaf89080d9f7d0e Mon Sep 17 00:00:00 2001 From: Huihuo Zheng Date: Wed, 4 Sep 2024 20:57:06 +0000 Subject: [PATCH] changed log level field to output section --- dlio_benchmark/common/enumerations.py | 6 +++--- dlio_benchmark/utils/config.py | 9 ++++----- dlio_benchmark/utils/statscounter.py | 6 +++--- docs/source/config.rst | 16 ++++++++-------- 4 files changed, 18 insertions(+), 19 deletions(-) diff --git a/dlio_benchmark/common/enumerations.py b/dlio_benchmark/common/enumerations.py index 46ffa54c..cc34fb80 100644 --- a/dlio_benchmark/common/enumerations.py +++ b/dlio_benchmark/common/enumerations.py @@ -54,10 +54,10 @@ class LogLevel(Enum): """ Different levels of logging """ - ERROR = "error" - WARN = "warn" - INFO = "info" DEBUG = "debug" + INFO = "info" + WARNING = "warning" + ERROR = "error" def __str__(self): return self.value diff --git a/dlio_benchmark/utils/config.py b/dlio_benchmark/utils/config.py index cbce06b3..7f975060 100644 --- a/dlio_benchmark/utils/config.py +++ b/dlio_benchmark/utils/config.py @@ -174,8 +174,8 @@ def configure_dlio_logging(self, is_child=False): if self.log_level == LogLevel.DEBUG: log_level = logging.DEBUG log_format = log_format_verbose - elif self.log_level == LogLevel.WARN: - log_level = logging.WARN + elif self.log_level == LogLevel.WARNING: + log_level = logging.WARNING elif self.log_level == LogLevel.ERROR: log_level = logging.ERROR else: @@ -553,7 +553,8 @@ def LoadConfig(args, config): args.output_folder = config['output']['folder'] if 'log_file' in config['output']: args.log_file = config['output']['log_file'] - + if 'log_level' in config['output']: + args.log_level = LogLevel(config['output']['log_level']) if args.output_folder is None: try: hydra_cfg = hydra.core.hydra_config.HydraConfig.get() @@ -569,8 +570,6 @@ def LoadConfig(args, config): args.generate_only = True else: args.generate_only = False - if 'log_level' in config['workflow']: - args.log_level = LogLevel(config['workflow']['log_level']) if 'evaluation' in config['workflow']: args.do_eval = config['workflow']['evaluation'] if 'checkpoint' in config['workflow']: diff --git a/dlio_benchmark/utils/statscounter.py b/dlio_benchmark/utils/statscounter.py index 29da48d1..4bb47868 100644 --- a/dlio_benchmark/utils/statscounter.py +++ b/dlio_benchmark/utils/statscounter.py @@ -182,7 +182,7 @@ def end_run(self): metric = metric + f"[METRIC] Eval Throughput (MB/second): {np.mean(eval_throughput)*self.record_size/1024/1024:.6f} ({np.std(eval_throughput)*self.record_size/1024/1024:.6f})\n" metric = metric + f"[METRIC] eval_au_meet_expectation: {self.summary['metric']['eval_au_meet_expectation']}\n" metric+="[METRIC] ==========================================================\n" - print(metric) + logging.info(metric) def start_train(self, epoch): if self.my_rank == 0: ts = utcnow() @@ -282,8 +282,8 @@ def end_block(self, epoch, block, steps_taken): logging.info(f"{ts} Ending block {block} - {steps_taken} steps completed in {duration} s") self.per_epoch_stats[epoch][f'block{block}']['end'] = ts self.per_epoch_stats[epoch][f'block{block}']['duration'] = duration - print(f"{utcnow()} Epoch {epoch} - Block {block} [Training] Accelerator Utilization [AU] (%): {self.output[epoch]['au'][f'block{block}']:.4f}") - print(f"{utcnow()} Epoch {epoch} - Block {block} [Training] Throughput (samples/second): {self.output[epoch]['throughput'][f'block{block}']*self.comm_size:.4f}") + logging.info(f"{utcnow()} Epoch {epoch} - Block {block} [Training] Accelerator Utilization [AU] (%): {self.output[epoch]['au'][f'block{block}']:.4f}") + logging.info(f"{utcnow()} Epoch {epoch} - Block {block} [Training] Throughput (samples/second): {self.output[epoch]['throughput'][f'block{block}']*self.comm_size:.4f}") def start_ckpt(self, epoch, block, steps_taken): if self.my_rank == 0: diff --git a/docs/source/config.rst b/docs/source/config.rst index b16f6ed9..0a06eb06 100644 --- a/docs/source/config.rst +++ b/docs/source/config.rst @@ -100,9 +100,6 @@ workflow * - profiling - False - whether to perform profiling - * - log_level - - "info" - - select the logging level [error|warn|info|debug|] .. note:: @@ -110,11 +107,6 @@ workflow Even though ``generate_data`` and ``train`` can be performed together in one job, we suggest to perform them seperately to eliminate potential caching effect. One can generate the data first by running DLIO with ```generate_data=True``` and ```train=False```, and then run training benchmark with ```generate_data=False``` and ```train=True```. -.. note:: - - ``log_level=debug`` will output detailed logging info per steps; whereas ``log_level=info`` only output log at the end of each epoch. - For performance mode, we would suggest using error mode to suppress unnecessory logs. - dataset ------------------ .. list-table:: @@ -365,11 +357,19 @@ output * - log_file - dlio.log - log file name + * - log_level + - "info" + - select the logging level [error|warning|info|debug] .. note:: If ``folder`` is not set (None), the output folder will be ```hydra_log/unet3d/$DATE-$TIME```. +.. note:: + + ``log_level=debug`` will output detailed logging info per steps; whereas ``log_level=info`` only output log at the end of each epoch. + For performance mode, we would suggest using error mode to suppress unnecessory logs. + profiling ------------------ .. list-table::