clean up project

a-gleeson · Apr 21, 2024 · 1821559 · 1821559
1 parent 997112a
commit 1821559
Show file tree

Hide file tree

Showing 11 changed files with 131 additions and 268 deletions.
diff --git a/.env.template b/.env.template
@@ -1,10 +1,16 @@
 export ENV="dev"
 export PROJECT_PATH="/Users/<xx>/"
 export OPENSEARCH_URL="https://localhost:9200"
-export S3_URL="http://localhost:9000"
 
 export LOADER_CONFIG="file_loader" # defaults to s3_loader 
-export VECTOR_STORE_CONFIG="opensearch" # defaults to opensearch, chroma is another option
+export VECTOR_STORE_CONFIG="opensearch" # defaults to opensearch
 export LLM_MODEL="local_llm" # defaults to hosted_llm
 
-# export S3_LOADER_FILE_NAME="all_data_null_test_3.parquet"
+export SUMMARISE_API = "xxxxxxx"
+export SUMMARISE_URL = "https://xxxx.amazonaws.com/api"
+export FACTCHECK_API = "xxxxxxx"
+export FACTCHECK_URL = "https://xxxx.amazonaws.com/api"
+export GLOSSERY_API = "xxxxxxx"
+export GLOSSERY_URL = "https://xxxx.amazonaws.com/api"
+export CONVERSATION_API = "xxxxxxx"
+export CONVERSATION_URL = "https://xxxx.amazonaws.com/api"
diff --git a/README.md b/README.md
@@ -1,7 +1,6 @@
 # hackathon
 
 
-
 ## Workflow
 
 ## How it works (for the hackathon)
@@ -12,11 +11,6 @@
 
 # Set up 
 
-https://python.langchain.com/docs/integrations/chat/llama2_chat
-https://python.langchain.com/docs/templates/llama2-functions
-https://huggingface.co/blog/llama2#how-to-prompt-llama-2 
-https://python.langchain.com/docs/integrations/llms/llamacpp#grammars
-
 ## 1. pyenv
 
 Install here: [https://github.com/pyenv/pyenv#homebrew-on-macos]
@@ -207,9 +201,7 @@ docker-compose down
 
 check opensearch by visiting http://localhost:5601/app/login? or running `curl https://localhost:9200 -ku 'admin:admin'`
 
-## Sagemaker setup
-- Launch a SageMaker Notebook from SageMaker > Notebook > Notebook instances > Create notebook instance
-- Select `ml.g4dn.xlarge` instance type (see [https://aws.amazon.com/sagemaker/pricing/] for pricing)
+## Conda setup
 
 ### Install Python dependencies
 
@@ -219,11 +211,8 @@ Create a new terminal and run the following:
 # Switch to a bash shell
 bash
 
-# Change to the repo root
-cd ~/SageMaker/hackathon
 
-# Activate a Python 3.10 environment pre-configured with PyTorch
-conda create -n hackathon python=3.10.13
+# Activate a Python 3.10 environment 
 conda create -n hackathon python=$(cat .python-version)
 conda activate hackathon
 

diff --git a/app/home.py b/app/home.py
@@ -6,19 +6,12 @@
 from PIL import Image
 
 from config.logging import setup_logging
-from config.settings import ENV
-from hackathon.streamlit.utils import check_password
 
 get_logger = setup_logging()
 logger = get_logger(__name__)
 
 st.set_page_config(page_title="QuickQuill", page_icon="memo", layout="wide")
 
-# Password protection of pages
-if ENV.upper() == "PROD" and not check_password():
-    st.stop()  # Do not continue if check_password is not True.
-
-
 # Image loading
 def image_to_base64(image):
     buffered = io.BytesIO()

diff --git a/app/pages/2_Transcript.py b/app/pages/2_Transcript.py
@@ -3,23 +3,15 @@
 import os
 
 import streamlit as st
-from PIL import Image
-from streamlit_gov_uk_components import gov_uk_checkbox
 
 from config.logging import setup_logging
-from config.settings import ENV
-from hackathon.streamlit.utils import check_password
 from hackathon.transcripts.transcript_handling import Transcript
 
 get_logger = setup_logging()
 logger = get_logger(__name__)
 
 st.set_page_config(page_title="QuickQuill", page_icon="memo", layout="wide")
 
-# Password protection of pages
-if ENV.upper() == "PROD" and not check_password():
-    st.stop()  # Do not continue if check_password is not True.
-
 header_css = """
     <style>
         .header {

diff --git a/app/pages/3_Summary.py b/app/pages/3_Summary.py
@@ -9,20 +9,14 @@
 
 from config.logging import setup_logging
 from config.settings import ENV
-from hackathon.api import conversation_api, fact_check_api, glossery_api, summary_api
-from hackathon.streamlit.utils import check_password
+from hackathon.llm.llm_api import conversation_api, fact_check_api, glossery_api, summary_api
 from hackathon.transcripts.transcript_handling import Transcript
 
 get_logger = setup_logging()
 logger = get_logger(__name__)
 
 st.set_page_config(page_title="QuickQuill", page_icon="memo", layout="wide")
 
-# Password protection of pages
-if ENV.upper() == "PROD" and not check_password():
-    st.stop()  # Do not continue if check_password is not True.
-
-
 # Image loading
 def image_to_base64(image):
     buffered = io.BytesIO()
@@ -106,7 +100,7 @@ def llm_summarise(transcript: str) -> str:
     post_response = summary_api.invoke_post(transcript)
     fact_check_response = fact_check_api.invoke_post(transcript)
     conversation_response = conversation_api.invoke_post(transcript)
-    time.sleep(15)
+    time.sleep(20)
 
     get_summary_response = summary_api.invoke_get(post_response["conversationId"])
     get_fact_response = fact_check_api.invoke_get(fact_check_response["conversationId"])
@@ -138,16 +132,16 @@ def query_llm(prompt: str, transcript: str, conversationId) -> str:
     chat_response = conversation_api.invoke_get(query_response["conversationId"])
     return chat_response
 
-
-with st.expander("#### Upload transcript", expanded=False):
+data= ""
+with st.expander("#### Upload transcript", expanded=True):
     data_path = st.file_uploader(label="Upload transcript:")
     if data_path is not None:
         transcript = Transcript(data_path)
         data = str(transcript)
         st.session_state.transcript_uploaded = True
 
 returned_data = {}
-with st.expander("#### Generate summary", expanded=False):
+with st.expander("#### Generate summary", expanded=True):
     if not st.session_state.transcript_uploaded:
         st.error("Upload meeting transcript", icon="⚠️")
     else:

diff --git a/docker-compose.yml b/docker-compose.yml
@@ -63,7 +63,6 @@ volumes:
   opensearch-data1:
   opensearch-data2:
   data-volume: {}
-  s3-volume: {}
 
 networks:
   opensearch-net:
diff --git a/hackathon/api.py → hackathon/llm/llm_api.py b/hackathon/api.py → hackathon/llm/llm_api.py
@@ -1,5 +1,3 @@
-import time
-
 import requests
 
 from config.settings import (
@@ -14,9 +12,7 @@
     SUMMARISE_URL,
 )
 
-
 class API:
-
     def __init__(self, api_key, url):
         self.api_key = api_key
         self.url = url

diff --git a/hackathon/streamlit/utils.py b/hackathon/streamlit/utils.py
@@ -46,8 +46,6 @@
 )
 from hackathon.vectorstore.opensearch import OpensearchClient
 from hackathon.vectorstore.vectorstore import (
-    ChromaClient,
-    ChromaStore,
     OpensearchClientStore,
     OpenSearchStore,
 )
@@ -60,61 +58,6 @@
 logger = get_logger(__name__)
 cwd = os.getcwd()
 
-
-def _get_session():
-    runtime = get_instance()
-    session_id = get_script_run_ctx().session_id
-    session_info = runtime._session_mgr.get_session_info(session_id)
-    if session_info is None:
-        raise RuntimeError("Couldn't get your Streamlit Session object.")
-    return session_info.session
-
-
-def get_password():
-    secret_name = "streamlit-access-password"
-
-    # Create a Secrets Manager client
-    session = boto3.session.Session()
-    client = session.client(service_name="secretsmanager", region_name=AWS_REGION)
-
-    try:
-        get_secret_value_response = client.get_secret_value(SecretId=secret_name)
-    except Exception as e:
-        # For a list of exceptions thrown, see
-        # https://docs.aws.amazon.com/secretsmanager/latest/apireference/API_GetSecretValue.html
-        raise e
-
-    secret = json.loads(get_secret_value_response["SecretString"])
-    return secret["streamlit-access-password"]
-
-
-def check_password():
-    """Returns `True` if the user had the correct password."""
-    logger.debug("Checking password ... ")
-
-    def password_entered():
-        """Checks whether a password entered by the user is correct."""
-        if hmac.compare_digest(st.session_state["password"], get_password()):
-            st.session_state["password_correct"] = True
-            del st.session_state["password"]  # Don't store the password.
-        else:
-            st.session_state["password_correct"] = False
-
-    # Return True if the password is validated.
-    if st.session_state.get("password_correct", False):
-        logger.debug("password correct ...")
-        return True
-
-    # Show input for password.
-    st.text_input(
-        "Password", type="password", on_change=password_entered, key="password"
-    )
-    if "password_correct" in st.session_state:
-        logger.debug("password incorrect ...")
-        st.error("😕 Password incorrect")
-    return False
-
-
 def initialise_llm_runner():
     # Initialise the new embedder
     logger.info("Initialising LLM Runner...")
@@ -127,28 +70,11 @@ def initialise_llm_runner():
             EMBEDDING_ENDPOINT_NAME, AWS_REGION
         )
 
-    if VECTOR_STORE_CONFIG == "chroma":
-        vector_store = ChromaStore(
-            embedding_function=st_embedder,
-            collection_name=OPENSEARCH_INDEX_NAME,
-        )
-    else:
-        if "skills_os_client" not in st.session_state:
-            st.session_state["skills_os_client"] = OpensearchClient(
-                OPENSEARCH_SKILLS_INDEX_NAME,
-                OPENSEARCH_ENDPOINT_NAME,
-                AWS_REGION,
-            )
-        if "vacancy_os_client" not in st.session_state:
-            st.session_state["vacancy_os_client"] = OpensearchClient(
-                OPENSEARCH_INDEX_NAME, OPENSEARCH_ENDPOINT_NAME, AWS_REGION
-            )
-
-        vector_store = OpenSearchStore(
-            st_embedder,
-            OPENSEARCH_INDEX_NAME,
-            st.session_state["vacancy_os_client"],
-        )
+    vector_store = OpenSearchStore(
+        st_embedder,
+        OPENSEARCH_INDEX_NAME,
+        st.session_state["vacancy_os_client"],
+    )
 
     if LLM_MODEL == "local_llm":
         llm_model_path = f"{PROJECT_PATH}/models/llama-2-7b-chat.Q4_K_M.gguf"
@@ -177,17 +103,14 @@ def initialise_vector_store_loader():
             EMBEDDING_ENDPOINT_NAME, AWS_REGION
         )
 
-    if VECTOR_STORE_CONFIG == "chroma":
-        vector_store = ChromaClient(st_embedder, OPENSEARCH_INDEX_NAME)
-    else:
-        if "vacancy_os_client" not in st.session_state:
-            st.session_state["vacancy_os_client"] = OpensearchClient(
-                OPENSEARCH_INDEX_NAME, OPENSEARCH_ENDPOINT_NAME, AWS_REGION
-            )
-        vector_store = OpensearchClientStore(
-            st_embedder,
-            OPENSEARCH_INDEX_NAME,
-            st.session_state["vacancy_os_client"],
+    if "vacancy_os_client" not in st.session_state:
+        st.session_state["vacancy_os_client"] = OpensearchClient(
+            OPENSEARCH_INDEX_NAME, OPENSEARCH_ENDPOINT_NAME, AWS_REGION
+        )
+    vector_store = OpensearchClientStore(
+        st_embedder,
+        OPENSEARCH_INDEX_NAME,
+        st.session_state["vacancy_os_client"],
         )
 
     if LOADER_CONFIG == "file_loader":