Skip to content

Commit

Permalink
remove max new tokens
Browse files Browse the repository at this point in the history
  • Loading branch information
juancwu committed Nov 24, 2024
1 parent 5af4c90 commit 1d87be6
Show file tree
Hide file tree
Showing 2 changed files with 1 addition and 3 deletions.
3 changes: 1 addition & 2 deletions backend/generate_answer.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,7 @@ def load_base_model():

# Load model in 8-bit to reduce memory usage
base_model = AutoModelForCausalLM.from_pretrained(Config.MODEL_NAME,
token=Config.HUGGINGFACE_ACCESS_TOKEN,
max_new_tokens=8096)
token=Config.HUGGINGFACE_ACCESS_TOKEN)

return base_model, tokenizer

Expand Down
1 change: 0 additions & 1 deletion backend/lora.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,6 @@ def apply_lora_weights_to_model(base_model_name, lora_weights_dir):
device_map="auto",
quantization_config=bnb_config,
torch_dtype=torch.float16,
max_new_tokens=8096
)

model = PeftModel.from_pretrained(base_model, lora_weights_dir)
Expand Down

0 comments on commit 1d87be6

Please sign in to comment.