Skip to content

Commit

Permalink
no prompt
Browse files Browse the repository at this point in the history
  • Loading branch information
Geun Han Chung authored and Geun Han Chung committed Nov 24, 2024
1 parent 562b70d commit 804e925
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 10 deletions.
13 changes: 9 additions & 4 deletions backend/generate_answer.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,11 +59,16 @@ def generate_initial_note(page_content, model, tokenizer):
max_length=2048,
pad_token_id=tokenizer.eos_token_id,
num_return_sequences=1,
temperature=0.7
temperature=0.7,
output_scores=False, # Exclude unnecessary scores
return_dict_in_generate=True, # Return generation metadata
)
final_output = ""
for output in outputs:
final_output += tokenizer.decode(output, skip_special_tokens=True)

# Extract the generated tokens beyond the input tokens
generated_tokens = outputs.sequences[0][inputs['input_ids'].shape[-1]:]

# Decode the generated tokens
final_output = tokenizer.decode(generated_tokens, skip_special_tokens=True)
return final_output

def generate_note(page_content, note_content, model, tokenizer):
Expand Down
28 changes: 22 additions & 6 deletions backend/lora.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,17 @@ def fine_tune_and_save_lora_weights(model_name, data, output_dir="./lora_weights
Fine-tunes the model using the given dataset and saves the LoRA weights.
"""
dataset = Dataset.from_list(data)


# Use GPU if available
device = "cuda" if torch.cuda.is_available() else "cpu"
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.float16,
bnb_4bit_use_double_quant=False
)

# Load tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(model_name, padding_side="right", truncation_side="right")
tokenizer.pad_token = tokenizer.eos_token

Expand All @@ -26,10 +29,11 @@ def fine_tune_and_save_lora_weights(model_name, data, output_dir="./lora_weights
device_map="auto",
quantization_config=bnb_config,
torch_dtype=torch.float16
)
).to(device)
model.config.pad_token_id = tokenizer.pad_token_id
model.config.use_cache = False

# Preprocessing function
def preprocess_function(examples):
inputs = [f"User: {i} Bot: {o}" for i, o in zip(examples["input"], examples["output"])]
labels = ["positive" if f == "like" else "negative" for f in examples["feedback"]]
Expand All @@ -41,21 +45,26 @@ def preprocess_function(examples):
padding="max_length",
return_tensors="pt"
)
tokenized_inputs = {key: val.to(device) for key, val in tokenized_inputs.items()}

tokenized_labels = tokenizer(
labels,
truncation=True,
max_length=128,
padding="max_length",
return_tensors="pt"
)
tokenized_labels = {key: val.to(device) for key, val in tokenized_labels.items()}

tokenized_inputs["labels"] = tokenized_labels["input_ids"]
return tokenized_inputs

tokenized_dataset = dataset.map(preprocess_function, batched=True)

# Enable input gradients before creating PEFT model
model.enable_input_require_grads()

# Configure LoRA
lora_config = LoraConfig(
r=8,
lora_alpha=32,
Expand All @@ -66,8 +75,10 @@ def preprocess_function(examples):
inference_mode=False
)

# Wrap the model with LoRA
model = get_peft_model(model, lora_config)

# Training arguments
training_args = TrainingArguments(
output_dir=output_dir,
num_train_epochs=num_train_epochs,
Expand All @@ -84,6 +95,7 @@ def preprocess_function(examples):
optim="paged_adamw_32bit"
)

# Trainer
trainer = SFTTrainer(
model=model,
train_dataset=tokenized_dataset,
Expand All @@ -93,9 +105,9 @@ def preprocess_function(examples):
dataset_text_field="input"
)

# Train and save the LoRA weights
model.train()
trainer.train()

model.save_pretrained(output_dir)
print(f"LoRA weights have been saved to {output_dir}")

Expand All @@ -104,22 +116,25 @@ def apply_lora_weights_to_model(base_model_name, lora_weights_dir):
"""
Loads the base model and applies the saved LoRA weights.
"""
device = "cuda" if torch.cuda.is_available() else "cpu"
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.float16,
bnb_4bit_use_double_quant=False
)

tokenizer = AutoTokenizer.from_pretrained(base_model_name, max_new_tokens=8096)
# Load tokenizer and base model
tokenizer = AutoTokenizer.from_pretrained(base_model_name, padding_side="right", truncation_side="right")
base_model = AutoModelForCausalLM.from_pretrained(
base_model_name,
device_map="auto",
quantization_config=bnb_config,
torch_dtype=torch.float16,
)
).to(device)

model = PeftModel.from_pretrained(base_model, lora_weights_dir)
# Apply LoRA weights
model = PeftModel.from_pretrained(base_model, lora_weights_dir).to(device)
model.eval()
print(f"LoRA weights from {lora_weights_dir} have been successfully applied to the base model.")

Expand Down Expand Up @@ -159,6 +174,7 @@ def main():
# Test the model
input_text = "What is the capital of Germany?"
inputs = tokenizer(f"User: {input_text}", return_tensors="pt")
inputs = {key: val.to("cuda" if torch.cuda.is_available() else "cpu") for key, val in inputs.items()}
outputs = model.generate(**inputs)
print("Generated Response:", tokenizer.decode(outputs[0], skip_special_tokens=True))

Expand Down

0 comments on commit 804e925

Please sign in to comment.