gkamradt · JGalego · Mar 14, 2024 · Mar 14, 2024 · Mar 14, 2024 · Apr 8, 2024
diff --git a/README.md b/README.md
@@ -2,7 +2,7 @@
 
 A simple 'needle in a haystack' analysis to test in-context retrieval ability of long context LLMs.
 
-Supported model providers: OpenAI, Anthropic, Cohere
+Supported model providers: OpenAI, Anthropic, Cohere, Amazon Bedrock
 
 Get the behind the scenes on the [overview video](https://youtu.be/KwRRuiCCdmc).
 
@@ -48,7 +48,7 @@ Start using the package by calling the entry point `needlehaystack.run_test` fro
 
 You can then run the analysis on OpenAI, Anthropic, or Cohere models with the following command line arguments:
 
-- `provider` - The provider of the model, available options are `openai`, `anthropic`, and `cohere`. Defaults to `openai`
+- `provider` - The provider of the model, available options are `openai`, `anthropic`, `cohere`  and `bedrock`. Defaults to `openai`
 - `evaluator` - The evaluator, which can either be a `model` or `LangSmith`. See more on `LangSmith` below. If using a `model`, only `openai` is currently supported. Defaults to `openai`.
 - `model_name` - Model name of the language model accessible by the provider. Defaults to `gpt-3.5-turbo-0125`
 - `evaluator_model_name` - Model name of the language model accessible by the evaluator. Defaults to `gpt-3.5-turbo-0125`

diff --git a/needlehaystack/providers/__init__.py b/needlehaystack/providers/__init__.py
@@ -2,3 +2,4 @@
 from .cohere import Cohere
 from .model import ModelProvider
 from .openai import OpenAI
+from .bedrock import Bedrock
diff --git a/needlehaystack/providers/bedrock.py b/needlehaystack/providers/bedrock.py
@@ -0,0 +1,102 @@
+import pkg_resources
+
+from operator import itemgetter
+from typing import Optional
+
+from anthropic import Anthropic as AnthropicModel
+from langchain_community.chat_models import BedrockChat
+from langchain.prompts import PromptTemplate
+
+from .model import ModelProvider
+
+class Bedrock(ModelProvider):
+    DEFAULT_MODEL_KWARGS: dict = dict(max_tokens  = 300,
+                                      temperature = 0)
+
+    def __init__(self,
+                 model_name: str = "anthropic.claude-3-sonnet-20240229-v1:0",
+                 model_kwargs: dict = DEFAULT_MODEL_KWARGS):
+        """
+        :param model_id: The model ID. Default is 'anthropic.claude-3-sonnet-20240229-v1:0'.
+        :param model_kwargs: Model configuration. Default is {max_tokens: 300, temperature: 0}
+        """
+
+        if "anthropic" not in model_name and \
+                "meta" not in model_name:
+            raise NotImplementedError
+
+        self.model_name = model_name
+        self.model_kwargs = model_kwargs
+
+        self.tokenizer = AnthropicModel().get_tokenizer()
+
+        resource_path = pkg_resources.resource_filename('needlehaystack', 'providers/Anthropic_prompt.txt')
+
+        # Generate the prompt structure for the Anthropic model
+        # Replace the following file with the appropriate prompt structure
+        with open(resource_path, 'r') as file:
+            self.prompt_structure = file.read()
+
+    async def evaluate_model(self, prompt: str) -> str:
+        raise NotImplementedError
+
+    def generate_prompt(self, context: str, retrieval_question: str) -> str | list[dict[str, str]]:
+        return self.prompt_structure.format(
+            retrieval_question=retrieval_question,
+            context=context)
+
+    def encode_text_to_tokens(self, text: str) -> list[int]:
+        return self.tokenizer.encode(text).ids
+
+    def decode_tokens(self, tokens: list[int], context_length: Optional[int] = None) -> str:
+        # Assuming you have a different decoder for Anthropic
+        return self.tokenizer.decode(tokens[:context_length])
+
+    def get_langchain_runnable(self, context: str) -> str:
+        """
+        Creates a LangChain runnable that constructs a prompt based on a given context and a question, 
+        queries the Anthropic model, and returns the model's response. This method leverages the LangChain 
+        library to build a sequence of operations: extracting input variables, generating a prompt, 
+        querying the model, and processing the response.
+
+        Args:
+            context (str): The context or background information relevant to the user's question. 
+            This context is provided to the model to aid in generating relevant and accurate responses.
+
+        Returns:
+            str: A LangChain runnable object that can be executed to obtain the model's response to a 
+            dynamically provided question. The runnable encapsulates the entire process from prompt 
+            generation to response retrieval.
+
+        Example:
+            To use the runnable:
+                - Define the context and question.
+                - Execute the runnable with these parameters to get the model's response.
+        """
+
+        template = """Human: You are a helpful AI bot that answers questions for a user. Keep your response short and direct" \n
+        <document_content>
+        {context} 
+        </document_content>
+        Here is the user question:
+        <question>
+         {question}
+        </question>
+        Don't give information outside the document or repeat your findings.
+        Assistant: Here is the most relevant information in the documents:"""
+
+        prompt = PromptTemplate(
+            template=template,
+            input_variables=["context", "question"],
+        )
+        # Create a LangChain runnable
+        model = BedrockChat(
+            model_id=self.model_name,
+            model_kwargs=self.model_kwargs,
+        )
+        chain = ( {"context": lambda x: context,
+                  "question": itemgetter("question")} 
+                | prompt
+                | model
+                )
+        return chain
diff --git a/needlehaystack/run.py b/needlehaystack/run.py
@@ -6,7 +6,7 @@
 
 from . import LLMNeedleHaystackTester, LLMMultiNeedleHaystackTester
 from .evaluators import Evaluator, LangSmithEvaluator, OpenAIEvaluator
-from .providers import Anthropic, ModelProvider, OpenAI, Cohere
+from .providers import Anthropic, ModelProvider, OpenAI, Cohere, Bedrock
 
 load_dotenv()
 
@@ -15,6 +15,7 @@ class CommandArgs():
     provider: str = "openai"
     evaluator: str = "openai"
     model_name: str = "gpt-3.5-turbo-0125"
+    model_kwargs: dict = field(default_factory=lambda: dict(max_tokens = 300, temperature = 0))
     evaluator_model_name: Optional[str] = "gpt-3.5-turbo-0125"
     needle: Optional[str] = "\nThe best thing to do in San Francisco is eat a sandwich and sit in Dolores Park on a sunny day.\n"
     haystack_dir: Optional[str] = "PaulGrahamEssays"
@@ -60,11 +61,13 @@ def get_model_to_test(args: CommandArgs) -> ModelProvider:
     """
     match args.provider.lower():
         case "openai":
-            return OpenAI(model_name=args.model_name)
+            return OpenAI(model_name=args.model_name, model_kwargs=args.model_kwargs)
         case "anthropic":
             return Anthropic(model_name=args.model_name)
         case "cohere":
             return Cohere(model_name=args.model_name)
+        case "bedrock":
+            return Bedrock(model_name=args.model_name, model_kwargs=args.model_kwargs)
         case _:
             raise ValueError(f"Invalid provider: {args.provider}")
 

diff --git a/requirements.txt b/requirements.txt
@@ -4,6 +4,8 @@ annotated-types==0.6.0
 anthropic>=0.7.5
 anyio==3.7.1
 attrs==23.1.0
+boto3==1.34.61
+botocore>=1.34.0
 certifi==2023.11.17
 charset-normalizer==3.3.2
 cohere>=5.1.2
@@ -20,10 +22,10 @@ idna==3.6
 jsonargparse==4.27.5
 jsonpatch==1.33
 jsonpointer==2.4
-langchain==0.1.9
+langchain==0.1.12
 langchain-community>=0.0.24
-langchain-core>=0.1.26
-langsmith>=0.1.8
+langchain-core>=0.1.31
+langsmith>=0.1.25
 langchain_openai
 langchain_anthropic
 langchain_cohere