Skip to content

Commit

Permalink
changed log level field to output section
Browse files Browse the repository at this point in the history
  • Loading branch information
zhenghh04 committed Sep 4, 2024
1 parent b9bc737 commit ad0ea30
Show file tree
Hide file tree
Showing 4 changed files with 18 additions and 19 deletions.
6 changes: 3 additions & 3 deletions dlio_benchmark/common/enumerations.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,10 @@ class LogLevel(Enum):
"""
Different levels of logging
"""
ERROR = "error"
WARN = "warn"
INFO = "info"
DEBUG = "debug"
INFO = "info"
WARNING = "warning"
ERROR = "error"
def __str__(self):
return self.value

Expand Down
9 changes: 4 additions & 5 deletions dlio_benchmark/utils/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,8 +174,8 @@ def configure_dlio_logging(self, is_child=False):
if self.log_level == LogLevel.DEBUG:
log_level = logging.DEBUG
log_format = log_format_verbose
elif self.log_level == LogLevel.WARN:
log_level = logging.WARN
elif self.log_level == LogLevel.WARNING:
log_level = logging.WARNING
elif self.log_level == LogLevel.ERROR:
log_level = logging.ERROR
else:
Expand Down Expand Up @@ -553,7 +553,8 @@ def LoadConfig(args, config):
args.output_folder = config['output']['folder']
if 'log_file' in config['output']:
args.log_file = config['output']['log_file']

if 'log_level' in config['output']:
args.log_level = LogLevel(config['output']['log_level'])
if args.output_folder is None:
try:
hydra_cfg = hydra.core.hydra_config.HydraConfig.get()
Expand All @@ -569,8 +570,6 @@ def LoadConfig(args, config):
args.generate_only = True
else:
args.generate_only = False
if 'log_level' in config['workflow']:
args.log_level = LogLevel(config['workflow']['log_level'])
if 'evaluation' in config['workflow']:
args.do_eval = config['workflow']['evaluation']
if 'checkpoint' in config['workflow']:
Expand Down
6 changes: 3 additions & 3 deletions dlio_benchmark/utils/statscounter.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ def end_run(self):
metric = metric + f"[METRIC] Eval Throughput (MB/second): {np.mean(eval_throughput)*self.record_size/1024/1024:.6f} ({np.std(eval_throughput)*self.record_size/1024/1024:.6f})\n"
metric = metric + f"[METRIC] eval_au_meet_expectation: {self.summary['metric']['eval_au_meet_expectation']}\n"
metric+="[METRIC] ==========================================================\n"
print(metric)
logging.info(metric)
def start_train(self, epoch):
if self.my_rank == 0:
ts = utcnow()
Expand Down Expand Up @@ -282,8 +282,8 @@ def end_block(self, epoch, block, steps_taken):
logging.info(f"{ts} Ending block {block} - {steps_taken} steps completed in {duration} s")
self.per_epoch_stats[epoch][f'block{block}']['end'] = ts
self.per_epoch_stats[epoch][f'block{block}']['duration'] = duration
print(f"{utcnow()} Epoch {epoch} - Block {block} [Training] Accelerator Utilization [AU] (%): {self.output[epoch]['au'][f'block{block}']:.4f}")
print(f"{utcnow()} Epoch {epoch} - Block {block} [Training] Throughput (samples/second): {self.output[epoch]['throughput'][f'block{block}']*self.comm_size:.4f}")
logging.info(f"{utcnow()} Epoch {epoch} - Block {block} [Training] Accelerator Utilization [AU] (%): {self.output[epoch]['au'][f'block{block}']:.4f}")
logging.info(f"{utcnow()} Epoch {epoch} - Block {block} [Training] Throughput (samples/second): {self.output[epoch]['throughput'][f'block{block}']*self.comm_size:.4f}")

def start_ckpt(self, epoch, block, steps_taken):
if self.my_rank == 0:
Expand Down
16 changes: 8 additions & 8 deletions docs/source/config.rst
Original file line number Diff line number Diff line change
Expand Up @@ -100,21 +100,13 @@ workflow
* - profiling
- False
- whether to perform profiling
* - log_level
- "info"
- select the logging level [error|warn|info|debug|]

.. note::

``evaluation``, ``checkpoint``, and ``profiling`` have depency on ``train``. If ``train`` is set to be ```False```, ``evaluation``, ``checkpoint``, ``profiling`` will be reset to ```False``` automatically.

Even though ``generate_data`` and ``train`` can be performed together in one job, we suggest to perform them seperately to eliminate potential caching effect. One can generate the data first by running DLIO with ```generate_data=True``` and ```train=False```, and then run training benchmark with ```generate_data=False``` and ```train=True```.

.. note::

``log_level=debug`` will output detailed logging info per steps; whereas ``log_level=info`` only output log at the end of each epoch.
For performance mode, we would suggest using error mode to suppress unnecessory logs.

dataset
------------------
.. list-table::
Expand Down Expand Up @@ -365,11 +357,19 @@ output
* - log_file
- dlio.log
- log file name
* - log_level
- "info"
- select the logging level [error|warning|info|debug]

.. note::

If ``folder`` is not set (None), the output folder will be ```hydra_log/unet3d/$DATE-$TIME```.

.. note::

``log_level=debug`` will output detailed logging info per steps; whereas ``log_level=info`` only output log at the end of each epoch.
For performance mode, we would suggest using error mode to suppress unnecessory logs.

profiling
------------------
.. list-table::
Expand Down

0 comments on commit ad0ea30

Please sign in to comment.