From 97992802f3e6f9f317313f8afc15e2a37fc2722b Mon Sep 17 00:00:00 2001 From: Jee Jee Li Date: Wed, 14 Aug 2024 08:27:29 +0800 Subject: [PATCH] [CI/Build]Reduce the time consumption for LoRA tests (#7396) --- tests/lora/test_layer_variation.py | 106 ---------------------------- tests/lora/test_punica_sizes.py | 2 +- tests/lora/test_punica_variation.py | 23 +----- 3 files changed, 3 insertions(+), 128 deletions(-) delete mode 100644 tests/lora/test_layer_variation.py diff --git a/tests/lora/test_layer_variation.py b/tests/lora/test_layer_variation.py deleted file mode 100644 index ec9776b77df76..0000000000000 --- a/tests/lora/test_layer_variation.py +++ /dev/null @@ -1,106 +0,0 @@ -import tempfile -from random import sample -from typing import List, Optional - -import peft -import pytest -from transformers import AutoModelForCausalLM - -import vllm -from vllm.lora.request import LoRARequest - -from .conftest import cleanup - -MODEL_PATH = "Felladrin/Llama-68M-Chat-v1" -PROMPTS = [ - "[system] Given a target sentence construct the underlying meaning representation\nof the input sentence as a single function with attributes and attribute\nvalues. This function should describe the target string accurately and the\nfunction must be one of the following ['inform', 'request', 'give_opinion',\n'confirm', 'verify_attribute', 'suggest', 'request_explanation',\n'recommend', 'request_attribute'].\n\nThe attributes must be one of the following:\n['name', 'exp_release_date', 'release_year', 'developer', 'esrb', 'rating',\n'genres', 'player_perspective', 'has_multiplayer', 'platforms',\n'available_on_steam', 'has_linux_release', 'has_mac_release', 'specifier'] [/system] [user] Here is the target sentence:\nSpellForce 3 is a pretty bad game. The developer Grimlore Games is clearly a bunch of no-talent hacks, and 2017 was a terrible year for games anyway. [/user] [assistant]", # noqa: E501 - "[system] Given a target sentence construct the underlying meaning representation\nof the input sentence as a single function with attributes and attribute\nvalues. This function should describe the target string accurately and the\nfunction must be one of the following ['inform', 'request', 'give_opinion',\n'confirm', 'verify_attribute', 'suggest', 'request_explanation',\n'recommend', 'request_attribute'].\n\nThe attributes must be one of the following:\n['name', 'exp_release_date', 'release_year', 'developer', 'esrb', 'rating',\n'genres', 'player_perspective', 'has_multiplayer', 'platforms',\n'available_on_steam', 'has_linux_release', 'has_mac_release', 'specifier'] [/system] [user] Here is the target sentence:\nI wanted to like Grimlore Games' 2017 entry, but in SpellForce 3 they just didn't get anything right. [/user] [assistant]", # noqa: E501 - "[system] Given a target sentence construct the underlying meaning representation\nof the input sentence as a single function with attributes and attribute\nvalues. This function should describe the target string accurately and the\nfunction must be one of the following ['inform', 'request', 'give_opinion',\n'confirm', 'verify_attribute', 'suggest', 'request_explanation',\n'recommend', 'request_attribute'].\n\nThe attributes must be one of the following:\n['name', 'exp_release_date', 'release_year', 'developer', 'esrb', 'rating',\n'genres', 'player_perspective', 'has_multiplayer', 'platforms',\n'available_on_steam', 'has_linux_release', 'has_mac_release', 'specifier'] [/system] [user] Here is the target sentence:\nBioShock is a good role-playing, action-adventure, shooter that released for PlayStation, Xbox, and PC in 2007. It is available on Steam, and it has a Mac release but not a Linux release. [/user] [assistant]", # noqa: E501 -] - - -def get_lora_model(model_id: str, target_modules: List[str], rank: int): - model = AutoModelForCausalLM.from_pretrained(model_id) - lora_config = peft.tuners.lora.LoraConfig(target_modules, rank) - lora_model = peft.PeftModel(model, lora_config) - return lora_model - - -def do_sample(llm: vllm.LLM, - lora_path: Optional[str] = None, - lora_id: Optional[int] = None, - logprobs: int = 0, - n_tokens: int = 256): - prompts = PROMPTS - sampling_params = vllm.SamplingParams(temperature=0, - max_tokens=n_tokens, - logprobs=logprobs, - stop=["[/assistant]"]) - outputs = llm.generate( - prompts, - sampling_params, - lora_request=LoRARequest(str(lora_id), lora_id, lora_path) - if lora_id else None) - # Print the outputs. - generated_texts: List[str] = [] - generated_logprobs: List[List[List[int]]] = [] - for output in outputs: - prompt = output.prompt - generated_text = output.outputs[0].text - generated_texts.append(generated_text) - print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}") - generated_logprobs.append([ - list(logprob.keys()) for out in output.outputs - for logprob in out.logprobs - ]) - return generated_logprobs if logprobs else generated_texts - - -SUPPORTED_MODULES = [ - "qkv_proj", "o_proj", "gate_up_proj", "down_proj", "embed_tokens", - "lm_head" -] -TARGET_MODULES_LIST = [] -for length in range(2, 6): - TARGET_MODULES_LIST.extend( - [sample(SUPPORTED_MODULES, length) for _ in range(3)]) - - -# Test the correctness when layer and rank are varied -# step 1: init a base model and serve with LoRA to get the reference results -# step 2: merge the same LoRA to the base model, serve the merged model -# step 3: compare the results from step 1 and step 2 -@pytest.mark.parametrize("tp_size", [1]) -@pytest.mark.parametrize("target_modules", TARGET_MODULES_LIST) -@pytest.mark.parametrize("rank", [8, 16, 32, 64]) -def test_layer_variation_correctness(tp_size, target_modules, rank): - llm = vllm.LLM(MODEL_PATH, - enable_lora=True, - max_num_seqs=16, - max_loras=4, - tensor_parallel_size=tp_size, - worker_use_ray=True) - model = get_lora_model(MODEL_PATH, target_modules, rank) - with tempfile.TemporaryDirectory() as tmpdir: - model.save_pretrained(tmpdir) - merged_probs = do_sample(llm, tmpdir, 1, logprobs=5, n_tokens=32) - del llm - cleanup() - reference_id_sets = [set(prob[0]) for prob in merged_probs] - - model = get_lora_model(MODEL_PATH, target_modules, rank) - with tempfile.TemporaryDirectory() as tmpdir: - merged_model = model.merge_and_unload() - merged_model.save_pretrained(tmpdir) - llm = vllm.LLM(tmpdir, - tokenizer=MODEL_PATH, - enable_lora=False, - max_num_seqs=16, - tensor_parallel_size=tp_size, - worker_use_ray=True) - probs = do_sample(llm, logprobs=5, n_tokens=32) - del llm - cleanup() - # verify the top-5 tokens are identical for each token - id_sets = [set(prob[0]) for prob in probs] - assert id_sets == reference_id_sets diff --git a/tests/lora/test_punica_sizes.py b/tests/lora/test_punica_sizes.py index c052568dc2e33..c36fb3afb0cc3 100644 --- a/tests/lora/test_punica_sizes.py +++ b/tests/lora/test_punica_sizes.py @@ -98,7 +98,7 @@ 128256, ] #The size of TP -divisibility = [1, 2, 4, 8, 16, 32, 64] +divisibility = [1, 2, 8, 16, 64] all_hidden_size = [] for div in divisibility: diff --git a/tests/lora/test_punica_variation.py b/tests/lora/test_punica_variation.py index 5bf3f72e7d97b..d026e34878e04 100644 --- a/tests/lora/test_punica_variation.py +++ b/tests/lora/test_punica_variation.py @@ -20,10 +20,10 @@ from .utils import (generate_data, generate_data_for_expand_nslices, ref_torch_groupgemm) -HIDDEN_SIZES = [3424, 4096, 4097] +HIDDEN_SIZES = [4097] BATCHES = [1, 4, 16, 32] -NUM_LORA = [1, 4, 8, 16, 32, 64, 128] +NUM_LORA = [1, 8, 32, 128] DTYPES = [torch.float16, torch.bfloat16] MAX_RANKS = [1, 4, 8, 16, 32, 64, 128, 256] SCALES = [0.5] @@ -321,22 +321,3 @@ def test_punica_expand_nslices( slice_offset += hidden_size assert_close(our_outputs, ref_outputs) - - -if __name__ == "__main__": - from itertools import product - - lst = list( - product( - BATCHES, - NUM_LORA, - MAX_RANKS, - [1.0], - [torch.float16], - ["expand"], - SEED, - CUDA_DEVICES, - )) - for ele in lst: - test_punica_bgmv(*ele) - print(f"{ele},pass")