Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Amazon Bedrock provider support for MNIAH testing #35

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

A simple 'needle in a haystack' analysis to test in-context retrieval ability of long context LLMs.

Supported model providers: OpenAI, Anthropic, Cohere
Supported model providers: OpenAI, Anthropic, Cohere, Amazon Bedrock

Get the behind the scenes on the [overview video](https://youtu.be/KwRRuiCCdmc).

Expand Down Expand Up @@ -48,7 +48,7 @@ Start using the package by calling the entry point `needlehaystack.run_test` fro

You can then run the analysis on OpenAI, Anthropic, or Cohere models with the following command line arguments:

- `provider` - The provider of the model, available options are `openai`, `anthropic`, and `cohere`. Defaults to `openai`
- `provider` - The provider of the model, available options are `openai`, `anthropic`, `cohere` and `bedrock`. Defaults to `openai`
- `evaluator` - The evaluator, which can either be a `model` or `LangSmith`. See more on `LangSmith` below. If using a `model`, only `openai` is currently supported. Defaults to `openai`.
- `model_name` - Model name of the language model accessible by the provider. Defaults to `gpt-3.5-turbo-0125`
- `evaluator_model_name` - Model name of the language model accessible by the evaluator. Defaults to `gpt-3.5-turbo-0125`
Expand Down
1 change: 1 addition & 0 deletions needlehaystack/providers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@
from .cohere import Cohere
from .model import ModelProvider
from .openai import OpenAI
from .bedrock import Bedrock
102 changes: 102 additions & 0 deletions needlehaystack/providers/bedrock.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
import pkg_resources

from operator import itemgetter
from typing import Optional

from anthropic import Anthropic as AnthropicModel
from langchain_community.chat_models import BedrockChat
from langchain.prompts import PromptTemplate

from .model import ModelProvider

class Bedrock(ModelProvider):
DEFAULT_MODEL_KWARGS: dict = dict(max_tokens = 300,
temperature = 0)

def __init__(self,
model_name: str = "anthropic.claude-3-sonnet-20240229-v1:0",
model_kwargs: dict = DEFAULT_MODEL_KWARGS):
"""
:param model_id: The model ID. Default is 'anthropic.claude-3-sonnet-20240229-v1:0'.
:param model_kwargs: Model configuration. Default is {max_tokens: 300, temperature: 0}
"""

if "anthropic" not in model_name and \
"meta" not in model_name:
raise NotImplementedError

self.model_name = model_name
self.model_kwargs = model_kwargs

self.tokenizer = AnthropicModel().get_tokenizer()

resource_path = pkg_resources.resource_filename('needlehaystack', 'providers/Anthropic_prompt.txt')

# Generate the prompt structure for the Anthropic model
# Replace the following file with the appropriate prompt structure
with open(resource_path, 'r') as file:
self.prompt_structure = file.read()

async def evaluate_model(self, prompt: str) -> str:
raise NotImplementedError

def generate_prompt(self, context: str, retrieval_question: str) -> str | list[dict[str, str]]:
return self.prompt_structure.format(
retrieval_question=retrieval_question,
context=context)

def encode_text_to_tokens(self, text: str) -> list[int]:
return self.tokenizer.encode(text).ids

def decode_tokens(self, tokens: list[int], context_length: Optional[int] = None) -> str:
# Assuming you have a different decoder for Anthropic
return self.tokenizer.decode(tokens[:context_length])

def get_langchain_runnable(self, context: str) -> str:
"""
Creates a LangChain runnable that constructs a prompt based on a given context and a question,
queries the Anthropic model, and returns the model's response. This method leverages the LangChain
library to build a sequence of operations: extracting input variables, generating a prompt,
querying the model, and processing the response.

Args:
context (str): The context or background information relevant to the user's question.
This context is provided to the model to aid in generating relevant and accurate responses.

Returns:
str: A LangChain runnable object that can be executed to obtain the model's response to a
dynamically provided question. The runnable encapsulates the entire process from prompt
generation to response retrieval.

Example:
To use the runnable:
- Define the context and question.
- Execute the runnable with these parameters to get the model's response.
"""

template = """Human: You are a helpful AI bot that answers questions for a user. Keep your response short and direct" \n
<document_content>
{context}
</document_content>
Here is the user question:
<question>
{question}
</question>
Don't give information outside the document or repeat your findings.
Assistant: Here is the most relevant information in the documents:"""

prompt = PromptTemplate(
template=template,
input_variables=["context", "question"],
)
# Create a LangChain runnable
model = BedrockChat(
model_id=self.model_name,
model_kwargs=self.model_kwargs,
)
chain = ( {"context": lambda x: context,
"question": itemgetter("question")}
| prompt
| model
)
return chain
7 changes: 5 additions & 2 deletions needlehaystack/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

from . import LLMNeedleHaystackTester, LLMMultiNeedleHaystackTester
from .evaluators import Evaluator, LangSmithEvaluator, OpenAIEvaluator
from .providers import Anthropic, ModelProvider, OpenAI, Cohere
from .providers import Anthropic, ModelProvider, OpenAI, Cohere, Bedrock

load_dotenv()

Expand All @@ -15,6 +15,7 @@ class CommandArgs():
provider: str = "openai"
evaluator: str = "openai"
model_name: str = "gpt-3.5-turbo-0125"
model_kwargs: dict = field(default_factory=lambda: dict(max_tokens = 300, temperature = 0))
evaluator_model_name: Optional[str] = "gpt-3.5-turbo-0125"
needle: Optional[str] = "\nThe best thing to do in San Francisco is eat a sandwich and sit in Dolores Park on a sunny day.\n"
haystack_dir: Optional[str] = "PaulGrahamEssays"
Expand Down Expand Up @@ -60,11 +61,13 @@ def get_model_to_test(args: CommandArgs) -> ModelProvider:
"""
match args.provider.lower():
case "openai":
return OpenAI(model_name=args.model_name)
return OpenAI(model_name=args.model_name, model_kwargs=args.model_kwargs)
case "anthropic":
return Anthropic(model_name=args.model_name)
case "cohere":
return Cohere(model_name=args.model_name)
case "bedrock":
return Bedrock(model_name=args.model_name, model_kwargs=args.model_kwargs)
case _:
raise ValueError(f"Invalid provider: {args.provider}")

Expand Down
8 changes: 5 additions & 3 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ annotated-types==0.6.0
anthropic>=0.7.5
anyio==3.7.1
attrs==23.1.0
boto3==1.34.61
botocore>=1.34.0
certifi==2023.11.17
charset-normalizer==3.3.2
cohere>=5.1.2
Expand All @@ -20,10 +22,10 @@ idna==3.6
jsonargparse==4.27.5
jsonpatch==1.33
jsonpointer==2.4
langchain==0.1.9
langchain==0.1.12
langchain-community>=0.0.24
langchain-core>=0.1.26
langsmith>=0.1.8
langchain-core>=0.1.31
langsmith>=0.1.25
langchain_openai
langchain_anthropic
langchain_cohere
Expand Down