From a308bfbf3e9f21d3e115155ec75584a5c48019aa Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Tue, 9 Apr 2024 11:22:31 -0400 Subject: [PATCH] use locale agnostic seperator to make large nums easier to read --- examples/llama-2/qlora-fsdp.yml | 2 ++ src/axolotl/utils/trainer.py | 6 +++--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/examples/llama-2/qlora-fsdp.yml b/examples/llama-2/qlora-fsdp.yml index 30916ed45a..274bd7312d 100644 --- a/examples/llama-2/qlora-fsdp.yml +++ b/examples/llama-2/qlora-fsdp.yml @@ -65,6 +65,7 @@ deepspeed: weight_decay: 0.0 fsdp: - full_shard + - auto_wrap fsdp_config: fsdp_limit_all_gathers: true fsdp_sync_module_states: true @@ -73,4 +74,5 @@ fsdp_config: fsdp_cpu_ram_efficient_loading: true fsdp_transformer_layer_cls_to_wrap: LlamaDecoderLayer fsdp_state_dict_type: SHARDED_STATE_DICT + fsdp_auto_wrap_policy: TRANSFORMER_BASED_WRAP special_tokens: diff --git a/src/axolotl/utils/trainer.py b/src/axolotl/utils/trainer.py index 2de2c54cce..6625080755 100644 --- a/src/axolotl/utils/trainer.py +++ b/src/axolotl/utils/trainer.py @@ -198,7 +198,7 @@ def calculate_total_num_steps(cfg, train_dataset, update=True): .apply(lambda x: len(x)) # pylint: disable=unnecessary-lambda .values ) - LOG.debug(f"total_num_tokens: {total_num_tokens}", main_process_only=True) + LOG.debug(f"total_num_tokens: {total_num_tokens:_}", main_process_only=True) if update: cfg.total_num_tokens = total_num_tokens @@ -212,7 +212,7 @@ def calculate_total_num_steps(cfg, train_dataset, update=True): .sum() ) LOG.debug( - f"`total_supervised_tokens: {total_supervised_tokens}`", + f"`total_supervised_tokens: {total_supervised_tokens:_}`", main_process_only=True, ) if update: @@ -239,7 +239,7 @@ def calculate_total_num_steps(cfg, train_dataset, update=True): * cfg.num_epochs ) LOG.debug( - f"total_num_tokens: {cfg.total_num_tokens}, total_num_steps: {total_num_steps}", + f"total_num_tokens: {cfg.total_num_tokens:_}, total_num_steps: {total_num_steps:_}", main_process_only=True, ) else: