Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add gemma model #5

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# Yet Another Applied LLM Benchmark

Run a simple test case [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/carlini/yet-another-applied-llm-benchmark/blob/master/run_a_simple_testcase.ipynb)


This is a benchmark I made, for me, to test how well language models perform
on tasks I care about. I know I care about them because each test is directly
derived from something I've asked a LLM to perform for me in the last year.
Expand Down
4 changes: 4 additions & 0 deletions config.json.example
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@
"cohere": {
"api_key": "TODO"
},
"gemma" : {
"KAGGLE_USERNAME":"UPDATE",
"KAGGLE_KEY":"UPDATE"
},
"anthropic": {
"api_key": "TODO"
},
Expand Down
5 changes: 5 additions & 0 deletions llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from llms.vertexai_model import VertexAIModel
from llms.cohere_model import CohereModel
from llms.moonshot_model import MoonshotAIModel
from llms.gemma_model import GemmaModel

class LLM:
def __init__(self, name="gpt-3.5-turbo", use_cache=True, override_hparams={}):
Expand All @@ -45,6 +46,8 @@ def __init__(self, name="gpt-3.5-turbo", use_cache=True, override_hparams={}):
self.model = MoonshotAIModel(name)
elif 'command' in name:
self.model = CohereModel(name)
elif 'gemma' in name:
self.model = GemmaModel(name)
else:
raise
self.model.hparams.update(override_hparams)
Expand Down Expand Up @@ -95,12 +98,14 @@ def __call__(self, conversation, add_image=None, max_tokens=None, skip_cache=Fal

#llm = LLM("command")
#llm = LLM("gpt-3.5-turbo")
# llm = LLM("gemma:2b-it")
llm = LLM("gpt-4-1106-preview")
#llm = LLM("claude-instant-1.2")
#llm = LLM("mistral-tiny")
#llm = LLM("gemini-pro", override_hparams={'temperature': 0.3}, use_cache=False)

#eval_llm = LLM("gpt-4-1106-preview")
# eval_llm = LLM("gemma:2b-it")
eval_llm = LLM("gpt-4-0125-preview", override_hparams={'temperature': 0.1})
#eval_llm = LLM("gpt-3.5-turbo", override_hparams={'temperature': 0.1})

Expand Down
117 changes: 117 additions & 0 deletions llms/gemma_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
import kagglehub
import os
import torch
import json

# !git clone https://github.com/google/gemma_pytorch.git
# !pip install -q -U torch immutabledict sentencepiece
import sys

sys.path.append("gemma_pytorch") # @TODO make it cleaner
from gemma_pytorch.gemma.config import get_config_for_7b, get_config_for_2b
from gemma_pytorch.gemma.model import GemmaForCausalLM


class GemmaModel:
def __init__(self, variant, machine_type="cuda"):
"""
Request models access at https://www.kaggle.com/models/google/gemma/frameworks/pyTorch
Generate API token for kaggle

Do `git clone https://github.com/google/gemma_pytorch.git` This is required for now.
Tested on colab and the test succeeded
!PYTHONPATH='.' python tests/print_hello.py
!PYTHONPATH='.' python tests/explain_code_prime.py
Unlike other models, Gemma doesnt require any paid account or any other setup.
Adds much more flexible to add new test cases and run them.
"""
# variant format : 'gemma:2b-it', 'gemma:7b-it'
self.variant = variant.split(":")[-1]
self.machine_type = machine_type
self.weights_dir = None
self.tokenizer_path = None
self.ckpt_path = None
self.model = None
self.login()
self.choose_variant_and_machine()
self.load_model()
config = json.load(open("config.json"))
self.hparams = config["hparams"]
self.hparams.update(config["llms"]["gemma"].get("hparams") or {})

def login(self):
config = json.load(open("config.json"))
os.environ["KAGGLE_USERNAME"] = config["llms"]["gemma"][
"KAGGLE_USERNAME"
].strip()
os.environ["KAGGLE_KEY"] = config["llms"]["gemma"]["KAGGLE_KEY"].strip()

def choose_variant_and_machine(self):
self.weights_dir = kagglehub.model_download(
f"google/gemma/pyTorch/{self.variant}"
)
self.tokenizer_path = os.path.join(self.weights_dir, "tokenizer.model")
assert os.path.isfile(self.tokenizer_path), "Tokenizer not found!"
self.ckpt_path = os.path.join(self.weights_dir, f"gemma-{self.variant}.ckpt")
assert os.path.isfile(self.ckpt_path), "PyTorch checkpoint not found!"

def load_model(self):
assert (
self.weights_dir is not None
), "Weights directory is not set. Call choose_variant_and_machine() first."
model_config = (
get_config_for_2b() if "2b" in self.variant else get_config_for_7b()
)
model_config.tokenizer = self.tokenizer_path
model_config.quant = "quant" in self.variant
torch.set_default_dtype(model_config.get_dtype())
device = torch.device(self.machine_type)
self.model = GemmaForCausalLM(model_config)
self.model.load_weights(self.ckpt_path)
self.model = self.model.to(device).eval()

def generate_sample(self, prompt, output_len=60):
assert self.model is not None, "Model is not loaded. Call load_model() first."
return self.model.generate(
prompt, device=torch.device(self.machine_type), output_len=output_len
)

def make_request(self, conversation, add_image=None, max_tokens=None):

# Update conversation roles using your scheme
conversation = [
{"role": "user" if i % 2 == 0 else "assistant", "content": content}
for i, content in enumerate(conversation)
]

# Chat templates
USER_CHAT_TEMPLATE = "<start_of_turn>user\n{prompt}<end_of_turn>\n"
MODEL_CHAT_TEMPLATE = "<start_of_turn>model\n{prompt}<end_of_turn>\n"

# Create a formatted prompt from the updated conversation
formatted_prompt = ""
for turn in conversation:
if turn["role"] == "user":
formatted_prompt += USER_CHAT_TEMPLATE.format(prompt=turn["content"])
else:
formatted_prompt += MODEL_CHAT_TEMPLATE.format(prompt="model response.")

# Adding a placeholder model turn to end the conversation
formatted_prompt += "<start_of_turn>model\n"
conversation = formatted_prompt

assert self.model is not None, "Model is not loaded. Call load_model() first."

out = self.model.generate(
conversation, device=torch.device(self.machine_type)
) # output_len=60)
return out


if __name__ == "__main__":
# Example usage:
gemma_instance = GemmaModel(variant="gemma:2b-it", machine_type="cuda")
generated_sample = gemma_instance.generate_sample(
"Write a poem about an llm writing a poem.", output_len=60
)
print(generated_sample)