Skip to content

Commit

Permalink
Reran pre-commit hooks
Browse files Browse the repository at this point in the history
  • Loading branch information
James-Osmond committed Apr 16, 2024
1 parent cbd37d0 commit 3d11dc9
Show file tree
Hide file tree
Showing 8 changed files with 17 additions and 55 deletions.
4 changes: 1 addition & 3 deletions app/home.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,7 @@

from config.logging import setup_logging
from config.settings import ENV
from hackathon.streamlit.utils import (
check_password,
)
from hackathon.streamlit.utils import check_password

get_logger = setup_logging()
logger = get_logger(__name__)
Expand Down
4 changes: 1 addition & 3 deletions hackathon/llm/chain_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,7 @@
from langchain.schema.output_parser import StrOutputParser
from langchain_core.output_parsers.transform import BaseTransformOutputParser

from hackathon.llm.prompts.core import (
PromptTemplate,
)
from hackathon.llm.prompts.core import PromptTemplate


@dataclass
Expand Down
4 changes: 1 addition & 3 deletions hackathon/llm/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,7 @@ def __init__(
self, llm_model_path: str, stop_sequences: List[str] = ["ANSWER:"]
) -> None:
self.llm_model_path = llm_model_path
self.callback_manager = CallbackManager(
[StreamingStdOutCallbackHandler()]
)
self.callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
self.stop_sequences = stop_sequences
self.initialise_llm()

Expand Down
5 changes: 1 addition & 4 deletions hackathon/llm/llm_chains.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,7 @@ def __init__(self, config: ChainConfig, llm: LLM):
llm (LLM): to attach to the LLMChain
"""
self.chain: RunnableSequence = (
config.var_input
| config.prompt
| llm.get_llm()
| config.out_parser()
config.var_input | config.prompt | llm.get_llm() | config.out_parser()
)

def invoke_query(self, query: Dict):
Expand Down
7 changes: 1 addition & 6 deletions hackathon/llm/prompts/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,7 @@


_advice_prompt = (
B_INST
+ B_SYS
+ _advice_system_prompt
+ E_SYS
+ _advice_instruction
+ E_INST
B_INST + B_SYS + _advice_system_prompt + E_SYS + _advice_instruction + E_INST
)

ADVICE_PROMPT = PromptTemplate.from_template(_advice_prompt)
Expand Down
20 changes: 5 additions & 15 deletions hackathon/loader/processors.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,14 +52,10 @@ def _dataframe_process(self, df) -> List[Document]:
else:
df["metadata"] = {}

df["metadata"] = df["metadata"].apply(
lambda x: None if pd.isna(x) else x
)
df["metadata"] = df["metadata"].apply(lambda x: None if pd.isna(x) else x)

docs = df.apply(
lambda row: Document(
page_content=row["content"], metadata=row["metadata"]
),
lambda row: Document(page_content=row["content"], metadata=row["metadata"]),
axis=1,
).tolist()

Expand Down Expand Up @@ -127,14 +123,8 @@ def get_processor(
"""
extension = file_path.split(".")[-1].lower()
if extension == "csv":
return CSVProcessor(
source_column, metadata_columns, content_columns
)
return CSVProcessor(source_column, metadata_columns, content_columns)
elif extension == "parquet":
return ParquetProcessor(
source_column, metadata_columns, content_columns
)
return ParquetProcessor(source_column, metadata_columns, content_columns)
else:
ValueError(
f"File type {extension} does not have a supported processor"
)
ValueError(f"File type {extension} does not have a supported processor")
12 changes: 3 additions & 9 deletions hackathon/vectorstore/opensearch.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,7 @@ def __init__(self, index_name, endpoint_name=None, region=None) -> None:
self.client = None
self.endpoint_name = endpoint_name
self.index_name = index_name
self.opensearch_endpoint = self._get_opensearch_endpoint(
endpoint_name, region
)
self.opensearch_endpoint = self._get_opensearch_endpoint(endpoint_name, region)
self.client = self.get_client()

def get_client(self):
Expand Down Expand Up @@ -70,9 +68,7 @@ def _get_opensearch_endpoint(self, endpoint_name, region=None):
self.http_auth = ("admin", "admin")
return OPENSEARCH_URL
client = boto3.client("es", region_name=region)
response = client.describe_elasticsearch_domain(
DomainName=endpoint_name
)
response = client.describe_elasticsearch_domain(DomainName=endpoint_name)
return response["DomainStatus"]["Endpoints"]["vpc"]

def _put_bulk_in_opensearch(self, docs):
Expand Down Expand Up @@ -111,9 +107,7 @@ def _create_index(self, index_name=None):
if not index_name:
index_name = self.index_name
settings = {
"settings": {
"index": {"knn": True, "knn.space_type": "cosinesimil"}
}
"settings": {"index": {"knn": True, "knn.space_type": "cosinesimil"}}
}
response = self.client.indices.create(index=index_name, body=settings)
return bool(response["acknowledged"])
Expand Down
16 changes: 4 additions & 12 deletions hackathon/vectorstore/vectorstore.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,7 @@ def get_documents(self, limit: Optional[int] = None):
return self.vectorstore.get(limit=limit)

def search_with_score(self, query: str):
return self.vectorstore.similarity_search_with_relevance_scores(
query, k=100
)
return self.vectorstore.similarity_search_with_relevance_scores(query, k=100)

def similarity_search_with_filter(
self, query: str, filter: Optional[Dict[str, str]]
Expand Down Expand Up @@ -159,9 +157,7 @@ def get_documents(self, limit: Optional[int] = None):
return self.vectorstore.get(limit=limit)

def retrieve_data_with_relevance_scores(self, query):
return self.vectorstore.similarity_search_with_relevance_scores(
query, k=10
)
return self.vectorstore.similarity_search_with_relevance_scores(query, k=10)


class VectorStoreClient(ABC):
Expand Down Expand Up @@ -200,9 +196,7 @@ def __init__(self, embedding_function: Embeddings, name: str = "") -> None:
def store_data(self, documents):
logger.info("storing docs %s in Chroma", len(documents))
response = self.client.add_documents(documents)
logger.info(
"Put docs %s in Chroma with response %s", len(documents), response
)
logger.info("Put docs %s in Chroma with response %s", len(documents), response)
self.client.persist()
return response

Expand Down Expand Up @@ -277,9 +271,7 @@ def store_data(self, documents: List[Document]):
Args:
Documents (list[Document]): list of documents to store into opensearch.
"""
logger.info(
f"Starting Opensearch ingestion of {len(documents)} documents"
)
logger.info(f"Starting Opensearch ingestion of {len(documents)} documents")
for batch in self._split_into_batches(documents):
try:
response = self.vectorstore.add_documents(batch)
Expand Down

0 comments on commit 3d11dc9

Please sign in to comment.