changed log level field to output section

argonne-lcf · Sep 4, 2024 · ad0ea30 · ad0ea30
1 parent b9bc737
commit ad0ea30
Show file tree

Hide file tree

Showing 4 changed files with 18 additions and 19 deletions.
diff --git a/dlio_benchmark/common/enumerations.py b/dlio_benchmark/common/enumerations.py
@@ -54,10 +54,10 @@ class LogLevel(Enum):
     """
     Different levels of logging
     """
-    ERROR = "error" 
-    WARN = "warn"
-    INFO = "info"
     DEBUG = "debug"
+    INFO = "info"
+    WARNING = "warning"
+    ERROR = "error" 
     def __str__(self):
         return self.value
 

diff --git a/dlio_benchmark/utils/config.py b/dlio_benchmark/utils/config.py
@@ -174,8 +174,8 @@ def configure_dlio_logging(self, is_child=False):
         if self.log_level == LogLevel.DEBUG:
             log_level = logging.DEBUG
             log_format = log_format_verbose 
-        elif self.log_level == LogLevel.WARN:
-            log_level = logging.WARN
+        elif self.log_level == LogLevel.WARNING:
+            log_level = logging.WARNING
         elif self.log_level == LogLevel.ERROR:
             log_level = logging.ERROR
         else:
@@ -553,7 +553,8 @@ def LoadConfig(args, config):
             args.output_folder = config['output']['folder']
         if 'log_file' in config['output']:
             args.log_file = config['output']['log_file']
-
+        if 'log_level' in config['output']:
+            args.log_level = LogLevel(config['output']['log_level'])
     if args.output_folder is None:
         try:
             hydra_cfg = hydra.core.hydra_config.HydraConfig.get()
@@ -569,8 +570,6 @@ def LoadConfig(args, config):
             args.generate_only = True
         else:
             args.generate_only = False
-        if 'log_level' in config['workflow']:
-            args.log_level = LogLevel(config['workflow']['log_level'])
         if 'evaluation' in config['workflow']:
             args.do_eval = config['workflow']['evaluation']
         if 'checkpoint' in config['workflow']:

diff --git a/dlio_benchmark/utils/statscounter.py b/dlio_benchmark/utils/statscounter.py
@@ -182,7 +182,7 @@ def end_run(self):
                     metric = metric + f"[METRIC] Eval Throughput (MB/second): {np.mean(eval_throughput)*self.record_size/1024/1024:.6f} ({np.std(eval_throughput)*self.record_size/1024/1024:.6f})\n"
                     metric = metric + f"[METRIC] eval_au_meet_expectation: {self.summary['metric']['eval_au_meet_expectation']}\n"
                 metric+="[METRIC] ==========================================================\n"
-                print(metric)   
+                logging.info(metric)   
     def start_train(self, epoch):   
         if self.my_rank == 0:
             ts = utcnow()
@@ -282,8 +282,8 @@ def end_block(self, epoch, block, steps_taken):
             logging.info(f"{ts} Ending block {block} - {steps_taken} steps completed in {duration} s")
             self.per_epoch_stats[epoch][f'block{block}']['end'] = ts
             self.per_epoch_stats[epoch][f'block{block}']['duration'] = duration
-            print(f"{utcnow()} Epoch {epoch} - Block {block} [Training] Accelerator Utilization [AU] (%): {self.output[epoch]['au'][f'block{block}']:.4f}")
-            print(f"{utcnow()} Epoch {epoch} - Block {block} [Training] Throughput (samples/second): {self.output[epoch]['throughput'][f'block{block}']*self.comm_size:.4f}")
+            logging.info(f"{utcnow()} Epoch {epoch} - Block {block} [Training] Accelerator Utilization [AU] (%): {self.output[epoch]['au'][f'block{block}']:.4f}")
+            logging.info(f"{utcnow()} Epoch {epoch} - Block {block} [Training] Throughput (samples/second): {self.output[epoch]['throughput'][f'block{block}']*self.comm_size:.4f}")
 
     def start_ckpt(self, epoch, block, steps_taken):
         if self.my_rank == 0:

diff --git a/docs/source/config.rst b/docs/source/config.rst
@@ -100,21 +100,13 @@ workflow
    * - profiling
      - False
      - whether to perform profiling
-   * - log_level
-     - "info"
-     - select the logging level [error|warn|info|debug|]
 
 .. note:: 
 
  ``evaluation``, ``checkpoint``, and ``profiling`` have depency on ``train``. If ``train`` is set to be ```False```, ``evaluation``, ``checkpoint``, ``profiling`` will be reset to ```False``` automatically. 
 
   Even though ``generate_data`` and ``train`` can be performed together in one job, we suggest to perform them seperately to eliminate potential caching effect. One can generate the data first by running DLIO with ```generate_data=True``` and ```train=False```, and then run training benchmark with ```generate_data=False``` and ```train=True```. 
 
-.. note:: 
-
-  ``log_level=debug`` will output detailed logging info per steps; whereas ``log_level=info`` only output log at the end of each epoch. 
-  For performance mode, we would suggest using error mode to suppress unnecessory logs. 
-
 dataset
 ------------------
 .. list-table:: 
@@ -365,11 +357,19 @@ output
    * - log_file
      - dlio.log
      - log file name  
+   * - log_level
+     - "info"
+     - select the logging level [error|warning|info|debug]
 
 .. note::
 
    If ``folder`` is not set (None), the output folder will be ```hydra_log/unet3d/$DATE-$TIME```. 
 
+.. note:: 
+
+  ``log_level=debug`` will output detailed logging info per steps; whereas ``log_level=info`` only output log at the end of each epoch. 
+  For performance mode, we would suggest using error mode to suppress unnecessory logs. 
+
 profiling
 ------------------
 .. list-table::