From 75e35703ebea3f9c0a674edbc335ee7718e2406e Mon Sep 17 00:00:00 2001 From: FlyHorse24 Date: Tue, 30 Jul 2024 17:08:01 +0800 Subject: [PATCH] check last --- .../meta_llama/Meta_Llama_3_1_8B/config.py | 4 ++- .../meta_llama/Meta_Llama_3_1_8B/train.py | 27 +++++++++++++------ examples/meta_llama/__init__.py | 0 3 files changed, 22 insertions(+), 9 deletions(-) create mode 100644 examples/meta_llama/__init__.py diff --git a/examples/meta_llama/Meta_Llama_3_1_8B/config.py b/examples/meta_llama/Meta_Llama_3_1_8B/config.py index 363b105..3d10d25 100644 --- a/examples/meta_llama/Meta_Llama_3_1_8B/config.py +++ b/examples/meta_llama/Meta_Llama_3_1_8B/config.py @@ -24,7 +24,9 @@ # 2. the 'content‘ means what states will be loaded, support: "model", "sampler", "optimizer", "scheduler", "all" # 3. the ’ckpt_type‘ means the type of checkpoint to be loaded, support: "internevo", "llama", "hf_llama", # and "hf_model". - load_ckpt_info=dict(path=MODEL_ONLY_FOLDER, content=("model",), ckpt_type="hf_model"), + load_ckpt_info=dict( + path=MODEL_ONLY_FOLDER, content=("model",), ckpt_type="hf_model" + ), # 'auto_resume' is designed to automatically load the latest checkpoint from 'save_ckpt_folder' when encountering # training interruptions/hangs caused by hardware failures, using a scheduling system (such as k8s/slurm) # with an automatic restart mechanism upon training reboot. diff --git a/examples/meta_llama/Meta_Llama_3_1_8B/train.py b/examples/meta_llama/Meta_Llama_3_1_8B/train.py index a94f9c0..1fe2b93 100644 --- a/examples/meta_llama/Meta_Llama_3_1_8B/train.py +++ b/examples/meta_llama/Meta_Llama_3_1_8B/train.py @@ -3,18 +3,22 @@ from internlm.core.context import global_context as gpc from internlm.core.trainer_builder import TrainerBuilder -from internlm.data import (build_train_loader_with_data_type, - build_valid_loader_with_data_type) +from internlm.data import ( + build_train_loader_with_data_type, + build_valid_loader_with_data_type, +) from internlm.initialize import initialize_distributed_env from internlm.model.registry import hf_config_initializer, model_initializer from internlm.monitor import internevo_monitor from internlm.train import initialize_model from internlm.utils.common import parse_args -from huggingface_model.meta_llama.Meta_Llama_3_1_8B.configuration_llama import \ - LlamaConfig -from huggingface_model.meta_llama.Meta_Llama_3_1_8B.modeling_llama import \ - LlamaForCausalLM +from huggingface_model.meta_llama.Meta_Llama_3_1_8B.configuration_llama import ( + LlamaConfig, +) +from huggingface_model.meta_llama.Meta_Llama_3_1_8B.modeling_llama import ( + LlamaForCausalLM, +) @internevo_monitor(feishu_alert=True, clean_run=True) @@ -23,7 +27,9 @@ def main(args): model_initializer.register_module(gpc.config.model_type, LlamaForCausalLM) hf_config_initializer.register_module(gpc.config.model_type, LlamaConfig) if gpc.config.model_type == "hf": - hf_config_builder = hf_config_initializer.get_module(module_name=gpc.config.model_type) + hf_config_builder = hf_config_initializer.get_module( + module_name=gpc.config.model_type + ) hf_cfg = hf_config_builder(return_dict=False) gpc.config.model.num_layers = hf_cfg.num_hidden_layers gpc.config.model.hidden_size = hf_cfg.hidden_size @@ -54,7 +60,12 @@ def main(args): args = parse_args() # Initialize distributed environment - initialize_distributed_env(config=args.config, launcher=args.launcher, master_port=args.port, seed=args.seed) + initialize_distributed_env( + config=args.config, + launcher=args.launcher, + master_port=args.port, + seed=args.seed, + ) assert hasattr(gpc, "config") and gpc.config is not None # Run the main function with parsed arguments diff --git a/examples/meta_llama/__init__.py b/examples/meta_llama/__init__.py new file mode 100644 index 0000000..e69de29