chore: merge main

pagopa · Oct 11, 2024 · 9526c17 · 9526c17
2 parents d96a9f9 + 757c91c
commit 9526c17
Show file tree

Hide file tree

Showing 2 changed files with 3 additions and 77 deletions.
diff --git a/apps/chatbot/.env.example b/apps/chatbot/.env.example
@@ -6,7 +6,6 @@ CHB_AWS_ACCESS_KEY_ID=...
 CHB_AWS_SECRET_ACCESS_KEY=...
 CHB_AWS_DEFAULT_REGION=eu-south-1
 CHB_AWS_BEDROCK_REGION=eu-west-3
-CHB_AWS_S3_BUCKET=...
 CHB_AWS_GUARDRAIL_ID=...
 CHB_AWS_GUARDRAIL_VERSION=...
 CHB_REDIS_URL=...

diff --git a/apps/chatbot/src/modules/vector_database.py b/apps/chatbot/src/modules/vector_database.py
@@ -13,7 +13,9 @@
 from typing import List, Tuple
 from chromedriver_py import binary_path
 
-import s3fs
+from bs4 import BeautifulSoup
+from selenium import webdriver
+import html2text
 
 from llama_index.core import (
     Settings,
@@ -284,81 +286,6 @@ def build_automerging_index_redis(
     return automerging_index
 
 
-def load_url_hash_table(
-    s3_bucket_name: str | None,
-    ) -> dict:
-
-    if s3_bucket_name:
-        logging.info("[vector_database.py] Getting URLs hash table from S3 bucket...")
-        with FS.open(f"{s3_bucket_name}/hash_table.json", "r") as f:
-            hash_table = json.load(f)
-
-    else:
-        logging.info("[vector_database.py] Getting URLs hash table from local...")
-        with open("hash_table.json", "r") as f:
-            hash_table = json.load(f)
-
-    logging.info("[vector_database.py] Loaded URLs hash table successfully.")
-    return hash_table
-
-
-def load_automerging_index_s3(
-        llm: BaseLLM,
-        embed_model: BaseEmbedding,
-        save_dir: str,
-        s3_bucket_name: str,
-        chunk_sizes: List[int],
-        chunk_overlap: int,
-    ) -> VectorStoreIndex:
-
-    Settings.llm = llm
-    Settings.embed_model = embed_model
-    Settings.node_parser = HierarchicalNodeParser.from_defaults(
-        chunk_sizes=chunk_sizes, 
-        chunk_overlap=chunk_overlap
-    )
-
-    logging.info(f"[vector_database.py] {save_dir} directory exists! Loading vector index...")
-    automerging_index = load_index_from_storage(
-        StorageContext.from_defaults(
-            persist_dir = f"{s3_bucket_name}/{save_dir}",
-            fs = FS
-        )
-    )
-
-    logging.info("[vector_database.py] Loaded vector index successfully!")
-
-    return automerging_index
-
-
-def load_automerging_index(
-        llm: BaseLLM,
-        embed_model: BaseEmbedding,
-        save_dir: str,
-        chunk_sizes: List[int],
-        chunk_overlap: int,
-    ) -> VectorStoreIndex:
-
-    Settings.llm = llm
-    Settings.embed_model = embed_model
-    Settings.node_parser = HierarchicalNodeParser.from_defaults(
-        chunk_sizes=chunk_sizes, 
-        chunk_overlap=chunk_overlap
-    )
-
-    logging.info(f"[vector_database.py] {save_dir} directory exists! Loading vector index...")
-
-    automerging_index = load_index_from_storage(
-        StorageContext.from_defaults(
-            persist_dir=save_dir
-        )
-    )
-
-    logging.info("[vector_database.py] Loaded vector index successfully!")
-
-    return automerging_index
-
-
 def load_automerging_index_redis(
         llm: BaseLLM,
         embed_model: BaseEmbedding,