From 849505c794d8c8b37c20b554a9df1f810f3716f4 Mon Sep 17 00:00:00 2001 From: Viswa Date: Thu, 22 Feb 2024 07:41:24 +0000 Subject: [PATCH 1/2] add gemma model --- config.json.example | 4 ++ llm.py | 5 ++ llms/gemma_model.py | 117 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 126 insertions(+) create mode 100644 llms/gemma_model.py diff --git a/config.json.example b/config.json.example index 4c65502b..c3858451 100644 --- a/config.json.example +++ b/config.json.example @@ -16,6 +16,10 @@ "cohere": { "api_key": "TODO" }, + "gemma" : { + "KAGGLE_USERNAME":"UPDATE", + "KAGGLE_KEY":"UPDATE" + }, "anthropic": { "api_key": "TODO" } diff --git a/llm.py b/llm.py index 5e5978ed..8f3bea6d 100644 --- a/llm.py +++ b/llm.py @@ -26,6 +26,7 @@ from llms.mistral_model import MistralModel from llms.vertexai_model import VertexAIModel from llms.cohere_model import CohereModel +from llms.gemma_model import GemmaModel class LLM: def __init__(self, name="gpt-3.5-turbo", use_cache=True, override_hparams={}): @@ -42,6 +43,8 @@ def __init__(self, name="gpt-3.5-turbo", use_cache=True, override_hparams={}): self.model = AnthropicModel(name) elif 'command' in name: self.model = CohereModel(name) + elif 'gemma' in name: + self.model = GemmaModel(name) else: raise self.model.hparams.update(override_hparams) @@ -92,12 +95,14 @@ def __call__(self, conversation, add_image=None, max_tokens=None, skip_cache=Fal #llm = LLM("command") #llm = LLM("gpt-3.5-turbo") +# llm = LLM("gemma:2b-it") llm = LLM("gpt-4-1106-preview") #llm = LLM("claude-instant-1.2") #llm = LLM("mistral-tiny") #llm = LLM("gemini-pro", override_hparams={'temperature': 0.3}, use_cache=False) #eval_llm = LLM("gpt-4-1106-preview") +# eval_llm = LLM("gemma:2b-it") eval_llm = LLM("gpt-4-0125-preview", override_hparams={'temperature': 0.1}) #eval_llm = LLM("gpt-3.5-turbo", override_hparams={'temperature': 0.1}) diff --git a/llms/gemma_model.py b/llms/gemma_model.py new file mode 100644 index 00000000..ec9c55e9 --- /dev/null +++ b/llms/gemma_model.py @@ -0,0 +1,117 @@ +import kagglehub +import os +import torch +import json + +# !git clone https://github.com/google/gemma_pytorch.git +# !pip install -q -U torch immutabledict sentencepiece +import sys + +sys.path.append("gemma_pytorch") # @TODO make it cleaner +from gemma_pytorch.gemma.config import get_config_for_7b, get_config_for_2b +from gemma_pytorch.gemma.model import GemmaForCausalLM + + +class GemmaModel: + def __init__(self, variant, machine_type="cuda"): + """ + Request models access at https://www.kaggle.com/models/google/gemma/frameworks/pyTorch + Generate API token for kaggle + + Do `git clone https://github.com/google/gemma_pytorch.git` This is required for now. + Tested on colab and the test succeeded + !PYTHONPATH='.' python tests/print_hello.py + !PYTHONPATH='.' python tests/explain_code_prime.py + Unlike other models, Gemma doesnt require any paid account or any other setup. + Adds much more flexible to add new test cases and run them. + """ + # variant format : 'gemma:2b-it', 'gemma:7b-it' + self.variant = variant.split(":")[-1] + self.machine_type = machine_type + self.weights_dir = None + self.tokenizer_path = None + self.ckpt_path = None + self.model = None + self.login() + self.choose_variant_and_machine() + self.load_model() + config = json.load(open("config.json")) + self.hparams = config["hparams"] + self.hparams.update(config["llms"]["gemma"].get("hparams") or {}) + + def login(self): + config = json.load(open("config.json")) + os.environ["KAGGLE_USERNAME"] = config["llms"]["gemma"][ + "KAGGLE_USERNAME" + ].strip() + os.environ["KAGGLE_KEY"] = config["llms"]["gemma"]["KAGGLE_KEY"].strip() + + def choose_variant_and_machine(self): + self.weights_dir = kagglehub.model_download( + f"google/gemma/pyTorch/{self.variant}" + ) + self.tokenizer_path = os.path.join(self.weights_dir, "tokenizer.model") + assert os.path.isfile(self.tokenizer_path), "Tokenizer not found!" + self.ckpt_path = os.path.join(self.weights_dir, f"gemma-{self.variant}.ckpt") + assert os.path.isfile(self.ckpt_path), "PyTorch checkpoint not found!" + + def load_model(self): + assert ( + self.weights_dir is not None + ), "Weights directory is not set. Call choose_variant_and_machine() first." + model_config = ( + get_config_for_2b() if "2b" in self.variant else get_config_for_7b() + ) + model_config.tokenizer = self.tokenizer_path + model_config.quant = "quant" in self.variant + torch.set_default_dtype(model_config.get_dtype()) + device = torch.device(self.machine_type) + self.model = GemmaForCausalLM(model_config) + self.model.load_weights(self.ckpt_path) + self.model = self.model.to(device).eval() + + def generate_sample(self, prompt, output_len=60): + assert self.model is not None, "Model is not loaded. Call load_model() first." + return self.model.generate( + prompt, device=torch.device(self.machine_type), output_len=output_len + ) + + def make_request(self, conversation, add_image=None, max_tokens=None): + + # Update conversation roles using your scheme + conversation = [ + {"role": "user" if i % 2 == 0 else "assistant", "content": content} + for i, content in enumerate(conversation) + ] + + # Chat templates + USER_CHAT_TEMPLATE = "user\n{prompt}\n" + MODEL_CHAT_TEMPLATE = "model\n{prompt}\n" + + # Create a formatted prompt from the updated conversation + formatted_prompt = "" + for turn in conversation: + if turn["role"] == "user": + formatted_prompt += USER_CHAT_TEMPLATE.format(prompt=turn["content"]) + else: + formatted_prompt += MODEL_CHAT_TEMPLATE.format(prompt="model response.") + + # Adding a placeholder model turn to end the conversation + formatted_prompt += "model\n" + conversation = formatted_prompt + + assert self.model is not None, "Model is not loaded. Call load_model() first." + + out = self.model.generate( + conversation, device=torch.device(self.machine_type) + ) # output_len=60) + return out + + +if __name__ == "__main__": + # Example usage: + gemma_instance = GemmaModel(variant="gemma:2b-it", machine_type="cuda") + generated_sample = gemma_instance.generate_sample( + "Write a poem about an llm writing a poem.", output_len=60 + ) + print(generated_sample) From 755ba6ea51b4ebb697fa2587b17186aa67334082 Mon Sep 17 00:00:00 2001 From: Viswa Date: Thu, 22 Feb 2024 01:51:16 -0600 Subject: [PATCH 2/2] add colab link to readme. --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index 910a4d9d..3371da0c 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,8 @@ # Yet Another Applied LLM Benchmark +Run a simple test case [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/carlini/yet-another-applied-llm-benchmark/blob/master/run_a_simple_testcase.ipynb) + + This is a benchmark I made, for me, to test how well language models perform on tasks I care about. I know I care about them because each test is directly derived from something I've asked a LLM to perform for me in the last year.