fix: improve llama2 scripts and add fine-tuning results

inclusive-design · Mar 4, 2024 · a8e35ef · a8e35ef
1 parent bdbb913
commit a8e35ef
Show file tree

Hide file tree

Showing 8 changed files with 5,388 additions and 70 deletions.
diff --git a/jobs/Llama2/finetune/bliss.json b/jobs/Llama2/finetune/bliss.json
diff --git a/jobs/Llama2/finetune/finetune_7b_hf.py b/jobs/Llama2/finetune/finetune_7b_hf.py
@@ -8,10 +8,11 @@
     pipeline
 )
 from peft import LoraConfig
+# from peft import PeftModel
 from trl import SFTTrainer
 
 # The local directory with the model and the tokenizer
-model_dir = "/home/cindyli/llama2/Llama-2-7b-hf"
+model_dir = "/home/cindyli/projects/ctb-whkchun/s2_bliss_LLMs/Llama-2-7b-hf"
 
 # The instruction dataset to use
 dataset_name = "/home/cindyli/llama2/finetune/bliss.json"
@@ -100,7 +101,7 @@
 group_by_length = True
 
 # Save checkpoint every X updates steps
-save_steps = 0
+save_steps = 25
 
 # Log every X updates steps
 logging_steps = 25
@@ -119,31 +120,15 @@
 device_map = {"": 0}
 
 
-def create_prompt_formats(sample):
-    """
-    Creates a formatted prompt template for an entry in the dataset
-
-    :param sample: one dictionary from the dataset
-    """
-
+# Create a formatted prompt template for an entry in the dataset
+def format_prompt(sample):
     # Initialize static strings for the prompt template
-    INTRO_BLURB = "Below is an example that converts an English sentence to a structure in the Bliss language."
-    INPUT_KEY = "Original English sentence:"
-    RESPONSE_KEY = "Sentence in the Bliss language structure:"
-
-    # Combine a prompt with the static strings
-    blurb = f"{INTRO_BLURB}"
-    input_context = f"{INPUT_KEY}\n{sample['original']}" if sample["original"] else None
-    response = f"{RESPONSE_KEY}\n{sample['bliss']}"
-
-    # Create a list of prompt template elements
-    parts = [part for part in [blurb, input_context, response] if part]
-
-    # Join prompt template elements into a single string to create the prompt template
-    formatted_prompt = "\n\n".join(parts)
+    instruction = "### Instruction: \nConvert the input English sentence to a Bliss sentence.\n\n"
+    input_key = "### Input:\n"
+    response_key = "### Response:\n"
 
-    # Store the formatted prompt template in a new key "text"
-    sample["text"] = formatted_prompt
+    # Format the sample
+    sample["text"] = f"{instruction}{input_key}{sample['original']}\n\n{response_key}{sample['bliss']}\n"
 
     return sample
 
@@ -188,7 +173,7 @@ def create_prompt_formats(sample):
 print(f"Column names are: {dataset.column_names}")
 
 # Convert the data into prompts using the instructional template
-dataset = dataset.map(create_prompt_formats)
+dataset = dataset.map(format_prompt)
 
 print(dataset)
 print(dataset[0])
@@ -205,7 +190,7 @@ def create_prompt_formats(sample):
 
 # Set training parameters
 training_arguments = TrainingArguments(
-    output_dir=output_dir,
+    output_dir=f"{output_dir}-{num_train_epochs}epochs",
     num_train_epochs=num_train_epochs,
     per_device_train_batch_size=per_device_train_batch_size,
     gradient_accumulation_steps=gradient_accumulation_steps,
@@ -220,7 +205,8 @@ def create_prompt_formats(sample):
     max_steps=max_steps,
     warmup_ratio=warmup_ratio,
     group_by_length=group_by_length,
-    lr_scheduler_type=lr_scheduler_type
+    lr_scheduler_type=lr_scheduler_type,
+    report_to="tensorboard"
 )
 
 # Set supervised fine-tuning parameters
@@ -239,12 +225,87 @@ def create_prompt_formats(sample):
 trainer.train()
 
 # Save trained model
-trainer.model.save_pretrained(new_model)
+trainer.model.save_pretrained(f"{new_model}-{num_train_epochs}epochs")
 
 print("Done with the fine-tuning.")
 
-# Run text generation pipeline with our next model
-prompt = "Converts this sentence to a structure in the Bliss language: I am a software developer."
-pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=200)
-result = pipe(f"{prompt}")
-print(result[0]['generated_text'])
+# Evaluate the new model
+
+
+# Inference
+def generate_text(instruction, input, model, tokenizer):
+    input_key = "### Input:\n"
+    response_key = "### Response:\n"
+    prompt = f"{instruction}{input_key}{input}\n\n{response_key}\n"
+    input_ids = tokenizer(prompt, return_tensors="pt", truncation=True).input_ids.cuda()
+    outputs = model.generate(input_ids=input_ids, max_new_tokens=100, do_sample=True, top_p=0.9, temperature=0.9)
+    print(f"Instruction: {instruction}\n")
+    print(f"Prompt: {input}\n")
+    print(f"Generated instruction: {tokenizer.batch_decode(outputs.detach().cpu().numpy(), skip_special_tokens=True)[0][len(prompt):]}\n\n")
+    # pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=400)
+    # pipe_return = pipe(f"<s>[INST] {prompt} [/INST]")
+    # print(f"## Prompt: {prompt}\n## Response:\n{pipe_return[0]['generated_text']}\n\n")
+
+
+# Word predictions
+def predict_words(prompt, model, tokenizer):
+    pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer)
+    predictions = pipe(prompt, max_length=20, num_return_sequences=3)
+
+    # write results into the result file
+    print(f"## Prompt: {prompt}\n## Predictions:\n")
+    for prediction in predictions:
+        print(f"- {prediction['generated_text']}\n")
+    print("\n\n")
+
+
+print("1. Inference\n")
+instruction = "### Instruction: \nConvert the input English sentence to a Bliss sentence.\n\n"
+input = "I am a programmer."
+generate_text(instruction, input, model, tokenizer)
+
+input = "Joe will explore the picturesque landscapes of a charming countryside village tomorrow."
+generate_text(instruction, input, model, tokenizer)
+
+input = "I had the pleasure of watching a captivating movie that thoroughly engaged my senses and emotions, providing a delightful escape into the realm of cinematic storytelling."
+generate_text(instruction, input, model, tokenizer)
+
+instruction = "### Instruction: \nConvert the input Bliss sentence to a English sentence.\n\n"
+input = "past:The girl run in the park."
+generate_text(instruction, input, model, tokenizer)
+
+input = "future:month next, I embark on an journey exciting to explore the cultures vibrant and landscapes breathtaking of Southeast Asia."
+generate_text(instruction, input, model, tokenizer)
+
+print("2. Word Prediction\n\n")
+prompt = "present: Joe be in hospital. He"
+predict_words(prompt, model, tokenizer)
+
+prompt = "Tomorrow will be a beautiful day. Running"
+predict_words(prompt, model, tokenizer)
+
+# Empty VRAM
+del model
+del trainer
+
+# # Merge the adapter weights into the base model then save.
+# # See https://www.philschmid.de/instruction-tune-llama-2
+# # Reload model in FP16 and merge it with LoRA weights
+# base_model = AutoModelForCausalLM.from_pretrained(
+#     model_dir,
+#     local_files_only=True,
+#     low_cpu_mem_usage=True,
+#     return_dict=True,
+#     torch_dtype=torch.float16,
+#     device_map=device_map,
+# )
+# model = PeftModel.from_pretrained(base_model, new_model)
+# model = model.merge_and_unload()
+
+# # Reload tokenizer to save it
+# tokenizer = AutoTokenizer.from_pretrained(model_dir, local_files_only=True, trust_remote_code=True)
+# tokenizer.pad_token = tokenizer.eos_token
+# tokenizer.padding_side = "right"
+
+# model.save_pretrained(f"./{new_model}-{num_train_epochs}epochs")
+# tokenizer.save_pretrained(f"./{new_model}-{num_train_epochs}epochs")
diff --git a/jobs/Llama2/finetune/job_finetune_7b_hf.sh b/jobs/Llama2/finetune/job_finetune_7b_hf.sh
@@ -1,30 +1,26 @@
 #!/bin/bash
 #SBATCH --job-name=llama2-finetune-7b-hf
-#SBATCH --time 10-00:00
+#SBATCH --time 2-00:00
 #SBATCH --nodes=1
 #SBATCH --gpus-per-node=v100l:1
-#SBATCH --mem=128G
+#SBATCH --mem=64G
 #SBATCH --ntasks-per-node=4
 #SBATCH --cpus-per-task=4
-#SBATCH --account=ctb-whkchun
+#SBATCH --account=def-whkchun
 #SBATCH --output=%x.o%j
 
 pip install --upgrade pip
 module load python/3.11.5
-python -V
 
 virtualenv --no-download $SLURM_TMPDIR/env
 source $SLURM_TMPDIR/.env/bin/activate
 
 pip install --upgrade pip
 
 module load StdEnv/2023 rust/1.70.0 arrow/14.0.1 gcc/12.3
-pip install --no-index transformers==4.36.2 accelerate==0.25.0 peft==0.5.0 bitsandbytes==0.40.2
+pip install --no-index transformers==4.36.2 accelerate==0.25.0 peft==0.5.0 bitsandbytes==0.42.0 tensorboard
 pip install datasets==2.17.0 trl
 pip install -r /home/cindyli/llama2/requirements-llama2.txt
 
-python -V
-pip list
-
-echo "Fine-tuning Llama2 from job $SLURM_JOB_ID on nodes $SLURM_JOB_NODELIST."
+echo "=== Fine-tuning Llama2 from job $SLURM_JOB_ID on nodes $SLURM_JOB_NODELIST."
 python /home/cindyli/llama2/finetune/finetune_7b_hf.py