diff --git a/config/settings.py b/config/settings.py new file mode 100644 index 0000000..910b853 --- /dev/null +++ b/config/settings.py @@ -0,0 +1,44 @@ +import os + +from dotenv import find_dotenv, load_dotenv + +_ = load_dotenv(find_dotenv()) + +LOG_LEVEL = os.environ.get("LOG_LEVEL", "DEBUG") + +PROJECT_PATH = os.environ.get("PROJECT_PATH") +OPENSEARCH_URL = os.environ.get("OPENSEARCH_URL") +OPENSEARCH_BATCH_SIZE = int(os.environ.get("OPENSEARCH_BATCH_SIZE", 500)) +OPENSEARCH_ENDPOINT_NAME = os.environ.get("OPENSEARCH_ENDPOINT_NAME", "vstore") +OPENSEARCH_INDEX_NAME = os.environ.get("OPENSEARCH_INDEX_NAME", "") +EMBEDDING_ENDPOINT_NAME = os.environ.get( + "EMBEDDING_ENDPOINT_NAME", "huggingface-sentencesimilarity" +) + +S3_LOADER_BUCKET = os.environ.get("S3_LOADER_BUCKET", "") +S3_LOADER_FILE_NAME = os.environ.get("S3_LOADER_FILE_NAME", "") +AWS_REGION = os.environ.get("AWS_REGION", "eu-west-2") +ENV = os.environ.get("ENV", "prod") + + +LOADER_CONFIG = os.environ.get("LOADER_CONFIG", "s3_loader") # "file_loader" +VECTOR_STORE_CONFIG = os.environ.get( + "VECTOR_STORE_CONFIG", "opensearch" +) # "opensearch" +LLM_MODEL = os.environ.get("LLM_MODEL", "hosted_llm") # "local_llm" + +AWS_REGION = os.environ.get("AWS_REGION", "eu-west-2") +AWS_SERVICE_ROLE_ARN = os.environ.get("AWS_SERVICE_ROLE_ARN", "arn:aws:iam::") +AWS_SAGEMAKER_ENDPOINT = os.environ.get( + "AWS_SAGEMAKER_ENDPOINT", "llama-meta-textgeneration" +) + +MODEL = "claude-v3-sonnet" +SUMMARISE_API = os.environ.get("SUMMARISE_API") +SUMMARISE_URL = os.environ.get("SUMMARISE_URL") +FACTCHECK_API = os.environ.get("FACTCHECK_API") +FACTCHECK_URL = os.environ.get("FACTCHECK_URL") +GLOSSERY_API = os.environ.get("GLOSSERY_API") +GLOSSERY_URL = os.environ.get("GLOSSERY_URL") +CONVERSATION_API = os.environ.get("CONVERSATION_API") +CONVERSATION_URL = os.environ.get("CONVERSATION_URL") diff --git a/hackathon/constants/constants.py b/hackathon/constants/constants.py new file mode 100644 index 0000000..10cba26 --- /dev/null +++ b/hackathon/constants/constants.py @@ -0,0 +1,2 @@ +METADATA_COLUMNS = [] +CONTENT_COLUMNS = [""] diff --git a/hackathon/llm/chain_config.py b/hackathon/llm/chain_config.py new file mode 100644 index 0000000..c1ecae3 --- /dev/null +++ b/hackathon/llm/chain_config.py @@ -0,0 +1,64 @@ +from dataclasses import dataclass +from operator import itemgetter +from typing import Dict, List + +from langchain.schema.output_parser import StrOutputParser +from langchain_core.output_parsers.transform import BaseTransformOutputParser + +from hackathon.llm.prompts.core import PromptTemplate + + +@dataclass +class ChainType: + """ + A Chain type that a ChainConfig can be set to, this will determine which LLM Chain this config is set up with. + """ + + identifier: str + description: str + + +SINGLE_CHAIN = ChainType( + identifier="SINGLE_CHAIN", + description="A simple chain which takes var_input for the prompt, a prompt, llm and an output parser", +) + + +@dataclass +class ChainConfig: + """ + Configuration for an LLM chain. + The class uses a custome initilisation method to set up various cnfiguration parameters. + + Attributes: + name (str): The name of the LLM chain. + prompt (PromptTemplate): prompt template for the chain to use + input_values (List[str]): input values the prompt takes + input_format (str) = format of the input for the input values defaults to 'Dict' (Currently not functional) + out_parser (BaseTransformOutputParser) = output parser for the LLM Chain to use. + """ + + name: str + prompt: PromptTemplate + input_values: List[str] + input_format: str = "Dict" + # context:Optional[str] = None + out_parser: BaseTransformOutputParser = StrOutputParser + chain_type: ChainType = SINGLE_CHAIN + + def __init__( + self, + name: str, + prompt: PromptTemplate, + chain_type: ChainType = SINGLE_CHAIN, + input_values: List[str] = ["input"], + input_format: str = "Dict", + # context: Optional[str] = None, + out_parser=StrOutputParser, + ): + self.name = name + self.prompt = prompt + self.input_format = input_format + self.var_input = {key: itemgetter(key) for key in input_values} + self.out_parser = out_parser + self.chain_type = chain_type diff --git a/hackathon/llm/prompts/core.py b/hackathon/llm/prompts/core.py new file mode 100644 index 0000000..60e72eb --- /dev/null +++ b/hackathon/llm/prompts/core.py @@ -0,0 +1,41 @@ +from langchain.prompts.prompt import PromptTemplate + +B_INST, E_INST = "[INST]", "[/INST]" +B_SYS, E_SYS = "<>\n", "\n<>\n\n" + +_advice_system_prompt = """ + +""" +_advice_instruction = """ +=== === +{} +=== === +{context} +======= +Question: , {}. +======= +Answer: +""" + + +_advice_prompt = ( + B_INST + B_SYS + _advice_system_prompt + E_SYS + _advice_instruction + E_INST +) + +ADVICE_PROMPT = PromptTemplate.from_template(_advice_prompt) + + +SUMMARISE_PROMPT = PromptTemplate.from_template( + """ +Summarise the + +: {} +---------------------- +ANSWER: +""" +) + +document_retrieval_prompt = PromptTemplate.from_template( + """ +""" +)