Skip to content

Commit

Permalink
chore: merge main
Browse files Browse the repository at this point in the history
  • Loading branch information
batdevis committed Oct 11, 2024
2 parents d96a9f9 + 757c91c commit 9526c17
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 77 deletions.
1 change: 0 additions & 1 deletion apps/chatbot/.env.example
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ CHB_AWS_ACCESS_KEY_ID=...
CHB_AWS_SECRET_ACCESS_KEY=...
CHB_AWS_DEFAULT_REGION=eu-south-1
CHB_AWS_BEDROCK_REGION=eu-west-3
CHB_AWS_S3_BUCKET=...
CHB_AWS_GUARDRAIL_ID=...
CHB_AWS_GUARDRAIL_VERSION=...
CHB_REDIS_URL=...
Expand Down
79 changes: 3 additions & 76 deletions apps/chatbot/src/modules/vector_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@
from typing import List, Tuple
from chromedriver_py import binary_path

import s3fs
from bs4 import BeautifulSoup
from selenium import webdriver
import html2text

from llama_index.core import (
Settings,
Expand Down Expand Up @@ -284,81 +286,6 @@ def build_automerging_index_redis(
return automerging_index


def load_url_hash_table(
s3_bucket_name: str | None,
) -> dict:

if s3_bucket_name:
logging.info("[vector_database.py] Getting URLs hash table from S3 bucket...")
with FS.open(f"{s3_bucket_name}/hash_table.json", "r") as f:
hash_table = json.load(f)

else:
logging.info("[vector_database.py] Getting URLs hash table from local...")
with open("hash_table.json", "r") as f:
hash_table = json.load(f)

logging.info("[vector_database.py] Loaded URLs hash table successfully.")
return hash_table


def load_automerging_index_s3(
llm: BaseLLM,
embed_model: BaseEmbedding,
save_dir: str,
s3_bucket_name: str,
chunk_sizes: List[int],
chunk_overlap: int,
) -> VectorStoreIndex:

Settings.llm = llm
Settings.embed_model = embed_model
Settings.node_parser = HierarchicalNodeParser.from_defaults(
chunk_sizes=chunk_sizes,
chunk_overlap=chunk_overlap
)

logging.info(f"[vector_database.py] {save_dir} directory exists! Loading vector index...")
automerging_index = load_index_from_storage(
StorageContext.from_defaults(
persist_dir = f"{s3_bucket_name}/{save_dir}",
fs = FS
)
)

logging.info("[vector_database.py] Loaded vector index successfully!")

return automerging_index


def load_automerging_index(
llm: BaseLLM,
embed_model: BaseEmbedding,
save_dir: str,
chunk_sizes: List[int],
chunk_overlap: int,
) -> VectorStoreIndex:

Settings.llm = llm
Settings.embed_model = embed_model
Settings.node_parser = HierarchicalNodeParser.from_defaults(
chunk_sizes=chunk_sizes,
chunk_overlap=chunk_overlap
)

logging.info(f"[vector_database.py] {save_dir} directory exists! Loading vector index...")

automerging_index = load_index_from_storage(
StorageContext.from_defaults(
persist_dir=save_dir
)
)

logging.info("[vector_database.py] Loaded vector index successfully!")

return automerging_index


def load_automerging_index_redis(
llm: BaseLLM,
embed_model: BaseEmbedding,
Expand Down

0 comments on commit 9526c17

Please sign in to comment.