diff --git a/backend/generate_answer.py b/backend/generate_answer.py
index 030402d..2dde683 100644
--- a/backend/generate_answer.py
+++ b/backend/generate_answer.py
@@ -59,11 +59,16 @@ def generate_initial_note(page_content, model, tokenizer):
         max_length=2048,
         pad_token_id=tokenizer.eos_token_id,
         num_return_sequences=1,
-        temperature=0.7
+        temperature=0.7,
+        output_scores=False,  # Exclude unnecessary scores
+        return_dict_in_generate=True,  # Return generation metadata
     )
-    final_output = ""
-    for output in outputs:
-        final_output += tokenizer.decode(output, skip_special_tokens=True)
+    
+    # Extract the generated tokens beyond the input tokens
+    generated_tokens = outputs.sequences[0][inputs['input_ids'].shape[-1]:]
+    
+    # Decode the generated tokens
+    final_output = tokenizer.decode(generated_tokens, skip_special_tokens=True)
     return final_output
 
 def generate_note(page_content, note_content, model, tokenizer):
diff --git a/backend/lora.py b/backend/lora.py
index 791210d..04fdd59 100644
--- a/backend/lora.py
+++ b/backend/lora.py
@@ -10,7 +10,9 @@ def fine_tune_and_save_lora_weights(model_name, data, output_dir="./lora_weights
     Fine-tunes the model using the given dataset and saves the LoRA weights.
     """
     dataset = Dataset.from_list(data)
-
+    
+    # Use GPU if available
+    device = "cuda" if torch.cuda.is_available() else "cpu"
     bnb_config = BitsAndBytesConfig(
         load_in_4bit=True,
         bnb_4bit_quant_type="nf4",
@@ -18,6 +20,7 @@ def fine_tune_and_save_lora_weights(model_name, data, output_dir="./lora_weights
         bnb_4bit_use_double_quant=False
     )
 
+    # Load tokenizer and model
     tokenizer = AutoTokenizer.from_pretrained(model_name, padding_side="right", truncation_side="right")
     tokenizer.pad_token = tokenizer.eos_token
 
@@ -26,10 +29,11 @@ def fine_tune_and_save_lora_weights(model_name, data, output_dir="./lora_weights
         device_map="auto",
         quantization_config=bnb_config,
         torch_dtype=torch.float16
-    )
+    ).to(device)
     model.config.pad_token_id = tokenizer.pad_token_id
     model.config.use_cache = False
 
+    # Preprocessing function
     def preprocess_function(examples):
         inputs = [f"User: {i} Bot: {o}" for i, o in zip(examples["input"], examples["output"])]
         labels = ["positive" if f == "like" else "negative" for f in examples["feedback"]]
@@ -41,6 +45,8 @@ def preprocess_function(examples):
             padding="max_length",
             return_tensors="pt"
         )
+        tokenized_inputs = {key: val.to(device) for key, val in tokenized_inputs.items()}
+
         tokenized_labels = tokenizer(
             labels,
             truncation=True,
@@ -48,14 +54,17 @@ def preprocess_function(examples):
             padding="max_length",
             return_tensors="pt"
         )
+        tokenized_labels = {key: val.to(device) for key, val in tokenized_labels.items()}
 
         tokenized_inputs["labels"] = tokenized_labels["input_ids"]
         return tokenized_inputs
 
     tokenized_dataset = dataset.map(preprocess_function, batched=True)
 
+    # Enable input gradients before creating PEFT model
     model.enable_input_require_grads()
 
+    # Configure LoRA
     lora_config = LoraConfig(
         r=8,
         lora_alpha=32,
@@ -66,8 +75,10 @@ def preprocess_function(examples):
         inference_mode=False
     )
 
+    # Wrap the model with LoRA
     model = get_peft_model(model, lora_config)
 
+    # Training arguments
     training_args = TrainingArguments(
         output_dir=output_dir,
         num_train_epochs=num_train_epochs,
@@ -84,6 +95,7 @@ def preprocess_function(examples):
         optim="paged_adamw_32bit"
     )
 
+    # Trainer
     trainer = SFTTrainer(
         model=model,
         train_dataset=tokenized_dataset,
@@ -93,9 +105,9 @@ def preprocess_function(examples):
         dataset_text_field="input"
     )
 
+    # Train and save the LoRA weights
     model.train()
     trainer.train()
-
     model.save_pretrained(output_dir)
     print(f"LoRA weights have been saved to {output_dir}")
 
@@ -104,6 +116,7 @@ def apply_lora_weights_to_model(base_model_name, lora_weights_dir):
     """
     Loads the base model and applies the saved LoRA weights.
     """
+    device = "cuda" if torch.cuda.is_available() else "cpu"
     bnb_config = BitsAndBytesConfig(
         load_in_4bit=True,
         bnb_4bit_quant_type="nf4",
@@ -111,15 +124,17 @@ def apply_lora_weights_to_model(base_model_name, lora_weights_dir):
         bnb_4bit_use_double_quant=False
     )
 
-    tokenizer = AutoTokenizer.from_pretrained(base_model_name, max_new_tokens=8096)
+    # Load tokenizer and base model
+    tokenizer = AutoTokenizer.from_pretrained(base_model_name, padding_side="right", truncation_side="right")
     base_model = AutoModelForCausalLM.from_pretrained(
         base_model_name,
         device_map="auto",
         quantization_config=bnb_config,
         torch_dtype=torch.float16,
-    )
+    ).to(device)
 
-    model = PeftModel.from_pretrained(base_model, lora_weights_dir)
+    # Apply LoRA weights
+    model = PeftModel.from_pretrained(base_model, lora_weights_dir).to(device)
     model.eval()
     print(f"LoRA weights from {lora_weights_dir} have been successfully applied to the base model.")
 
@@ -159,6 +174,7 @@ def main():
     # Test the model
     input_text = "What is the capital of Germany?"
     inputs = tokenizer(f"User: {input_text}", return_tensors="pt")
+    inputs = {key: val.to("cuda" if torch.cuda.is_available() else "cpu") for key, val in inputs.items()}
     outputs = model.generate(**inputs)
     print("Generated Response:", tokenizer.decode(outputs[0], skip_special_tokens=True))