Skip to content

Commit

Permalink
clean up project
Browse files Browse the repository at this point in the history
  • Loading branch information
a-gleeson committed Apr 21, 2024
1 parent 997112a commit 1821559
Show file tree
Hide file tree
Showing 11 changed files with 131 additions and 268 deletions.
12 changes: 9 additions & 3 deletions .env.template
Original file line number Diff line number Diff line change
@@ -1,10 +1,16 @@
export ENV="dev"
export PROJECT_PATH="/Users/<xx>/"
export OPENSEARCH_URL="https://localhost:9200"
export S3_URL="http://localhost:9000"

export LOADER_CONFIG="file_loader" # defaults to s3_loader
export VECTOR_STORE_CONFIG="opensearch" # defaults to opensearch, chroma is another option
export VECTOR_STORE_CONFIG="opensearch" # defaults to opensearch
export LLM_MODEL="local_llm" # defaults to hosted_llm

# export S3_LOADER_FILE_NAME="all_data_null_test_3.parquet"
export SUMMARISE_API = "xxxxxxx"
export SUMMARISE_URL = "https://xxxx.amazonaws.com/api"
export FACTCHECK_API = "xxxxxxx"
export FACTCHECK_URL = "https://xxxx.amazonaws.com/api"
export GLOSSERY_API = "xxxxxxx"
export GLOSSERY_URL = "https://xxxx.amazonaws.com/api"
export CONVERSATION_API = "xxxxxxx"
export CONVERSATION_URL = "https://xxxx.amazonaws.com/api"
15 changes: 2 additions & 13 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
# hackathon



## Workflow

## How it works (for the hackathon)
Expand All @@ -12,11 +11,6 @@

# Set up

https://python.langchain.com/docs/integrations/chat/llama2_chat
https://python.langchain.com/docs/templates/llama2-functions
https://huggingface.co/blog/llama2#how-to-prompt-llama-2
https://python.langchain.com/docs/integrations/llms/llamacpp#grammars

## 1. pyenv

Install here: [https://github.com/pyenv/pyenv#homebrew-on-macos]
Expand Down Expand Up @@ -207,9 +201,7 @@ docker-compose down

check opensearch by visiting http://localhost:5601/app/login? or running `curl https://localhost:9200 -ku 'admin:admin'`

## Sagemaker setup
- Launch a SageMaker Notebook from SageMaker > Notebook > Notebook instances > Create notebook instance
- Select `ml.g4dn.xlarge` instance type (see [https://aws.amazon.com/sagemaker/pricing/] for pricing)
## Conda setup

### Install Python dependencies

Expand All @@ -219,11 +211,8 @@ Create a new terminal and run the following:
# Switch to a bash shell
bash

# Change to the repo root
cd ~/SageMaker/hackathon

# Activate a Python 3.10 environment pre-configured with PyTorch
conda create -n hackathon python=3.10.13
# Activate a Python 3.10 environment
conda create -n hackathon python=$(cat .python-version)
conda activate hackathon

Expand Down
7 changes: 0 additions & 7 deletions app/home.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,19 +6,12 @@
from PIL import Image

from config.logging import setup_logging
from config.settings import ENV
from hackathon.streamlit.utils import check_password

get_logger = setup_logging()
logger = get_logger(__name__)

st.set_page_config(page_title="QuickQuill", page_icon="memo", layout="wide")

# Password protection of pages
if ENV.upper() == "PROD" and not check_password():
st.stop() # Do not continue if check_password is not True.


# Image loading
def image_to_base64(image):
buffered = io.BytesIO()
Expand Down
8 changes: 0 additions & 8 deletions app/pages/2_Transcript.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,23 +3,15 @@
import os

import streamlit as st
from PIL import Image
from streamlit_gov_uk_components import gov_uk_checkbox

from config.logging import setup_logging
from config.settings import ENV
from hackathon.streamlit.utils import check_password
from hackathon.transcripts.transcript_handling import Transcript

get_logger = setup_logging()
logger = get_logger(__name__)

st.set_page_config(page_title="QuickQuill", page_icon="memo", layout="wide")

# Password protection of pages
if ENV.upper() == "PROD" and not check_password():
st.stop() # Do not continue if check_password is not True.

header_css = """
<style>
.header {
Expand Down
16 changes: 5 additions & 11 deletions app/pages/3_Summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,20 +9,14 @@

from config.logging import setup_logging
from config.settings import ENV
from hackathon.api import conversation_api, fact_check_api, glossery_api, summary_api
from hackathon.streamlit.utils import check_password
from hackathon.llm.llm_api import conversation_api, fact_check_api, glossery_api, summary_api
from hackathon.transcripts.transcript_handling import Transcript

get_logger = setup_logging()
logger = get_logger(__name__)

st.set_page_config(page_title="QuickQuill", page_icon="memo", layout="wide")

# Password protection of pages
if ENV.upper() == "PROD" and not check_password():
st.stop() # Do not continue if check_password is not True.


# Image loading
def image_to_base64(image):
buffered = io.BytesIO()
Expand Down Expand Up @@ -106,7 +100,7 @@ def llm_summarise(transcript: str) -> str:
post_response = summary_api.invoke_post(transcript)
fact_check_response = fact_check_api.invoke_post(transcript)
conversation_response = conversation_api.invoke_post(transcript)
time.sleep(15)
time.sleep(20)

get_summary_response = summary_api.invoke_get(post_response["conversationId"])
get_fact_response = fact_check_api.invoke_get(fact_check_response["conversationId"])
Expand Down Expand Up @@ -138,16 +132,16 @@ def query_llm(prompt: str, transcript: str, conversationId) -> str:
chat_response = conversation_api.invoke_get(query_response["conversationId"])
return chat_response


with st.expander("#### Upload transcript", expanded=False):
data= ""
with st.expander("#### Upload transcript", expanded=True):
data_path = st.file_uploader(label="Upload transcript:")
if data_path is not None:
transcript = Transcript(data_path)
data = str(transcript)
st.session_state.transcript_uploaded = True

returned_data = {}
with st.expander("#### Generate summary", expanded=False):
with st.expander("#### Generate summary", expanded=True):
if not st.session_state.transcript_uploaded:
st.error("Upload meeting transcript", icon="⚠️")
else:
Expand Down
1 change: 0 additions & 1 deletion docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,6 @@ volumes:
opensearch-data1:
opensearch-data2:
data-volume: {}
s3-volume: {}

networks:
opensearch-net:
4 changes: 0 additions & 4 deletions hackathon/api.py → hackathon/llm/llm_api.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
import time

import requests

from config.settings import (
Expand All @@ -14,9 +12,7 @@
SUMMARISE_URL,
)


class API:

def __init__(self, api_key, url):
self.api_key = api_key
self.url = url
Expand Down
103 changes: 13 additions & 90 deletions hackathon/streamlit/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,6 @@
)
from hackathon.vectorstore.opensearch import OpensearchClient
from hackathon.vectorstore.vectorstore import (
ChromaClient,
ChromaStore,
OpensearchClientStore,
OpenSearchStore,
)
Expand All @@ -60,61 +58,6 @@
logger = get_logger(__name__)
cwd = os.getcwd()


def _get_session():
runtime = get_instance()
session_id = get_script_run_ctx().session_id
session_info = runtime._session_mgr.get_session_info(session_id)
if session_info is None:
raise RuntimeError("Couldn't get your Streamlit Session object.")
return session_info.session


def get_password():
secret_name = "streamlit-access-password"

# Create a Secrets Manager client
session = boto3.session.Session()
client = session.client(service_name="secretsmanager", region_name=AWS_REGION)

try:
get_secret_value_response = client.get_secret_value(SecretId=secret_name)
except Exception as e:
# For a list of exceptions thrown, see
# https://docs.aws.amazon.com/secretsmanager/latest/apireference/API_GetSecretValue.html
raise e

secret = json.loads(get_secret_value_response["SecretString"])
return secret["streamlit-access-password"]


def check_password():
"""Returns `True` if the user had the correct password."""
logger.debug("Checking password ... ")

def password_entered():
"""Checks whether a password entered by the user is correct."""
if hmac.compare_digest(st.session_state["password"], get_password()):
st.session_state["password_correct"] = True
del st.session_state["password"] # Don't store the password.
else:
st.session_state["password_correct"] = False

# Return True if the password is validated.
if st.session_state.get("password_correct", False):
logger.debug("password correct ...")
return True

# Show input for password.
st.text_input(
"Password", type="password", on_change=password_entered, key="password"
)
if "password_correct" in st.session_state:
logger.debug("password incorrect ...")
st.error("😕 Password incorrect")
return False


def initialise_llm_runner():
# Initialise the new embedder
logger.info("Initialising LLM Runner...")
Expand All @@ -127,28 +70,11 @@ def initialise_llm_runner():
EMBEDDING_ENDPOINT_NAME, AWS_REGION
)

if VECTOR_STORE_CONFIG == "chroma":
vector_store = ChromaStore(
embedding_function=st_embedder,
collection_name=OPENSEARCH_INDEX_NAME,
)
else:
if "skills_os_client" not in st.session_state:
st.session_state["skills_os_client"] = OpensearchClient(
OPENSEARCH_SKILLS_INDEX_NAME,
OPENSEARCH_ENDPOINT_NAME,
AWS_REGION,
)
if "vacancy_os_client" not in st.session_state:
st.session_state["vacancy_os_client"] = OpensearchClient(
OPENSEARCH_INDEX_NAME, OPENSEARCH_ENDPOINT_NAME, AWS_REGION
)

vector_store = OpenSearchStore(
st_embedder,
OPENSEARCH_INDEX_NAME,
st.session_state["vacancy_os_client"],
)
vector_store = OpenSearchStore(
st_embedder,
OPENSEARCH_INDEX_NAME,
st.session_state["vacancy_os_client"],
)

if LLM_MODEL == "local_llm":
llm_model_path = f"{PROJECT_PATH}/models/llama-2-7b-chat.Q4_K_M.gguf"
Expand Down Expand Up @@ -177,17 +103,14 @@ def initialise_vector_store_loader():
EMBEDDING_ENDPOINT_NAME, AWS_REGION
)

if VECTOR_STORE_CONFIG == "chroma":
vector_store = ChromaClient(st_embedder, OPENSEARCH_INDEX_NAME)
else:
if "vacancy_os_client" not in st.session_state:
st.session_state["vacancy_os_client"] = OpensearchClient(
OPENSEARCH_INDEX_NAME, OPENSEARCH_ENDPOINT_NAME, AWS_REGION
)
vector_store = OpensearchClientStore(
st_embedder,
OPENSEARCH_INDEX_NAME,
st.session_state["vacancy_os_client"],
if "vacancy_os_client" not in st.session_state:
st.session_state["vacancy_os_client"] = OpensearchClient(
OPENSEARCH_INDEX_NAME, OPENSEARCH_ENDPOINT_NAME, AWS_REGION
)
vector_store = OpensearchClientStore(
st_embedder,
OPENSEARCH_INDEX_NAME,
st.session_state["vacancy_os_client"],
)

if LOADER_CONFIG == "file_loader":
Expand Down
Loading

0 comments on commit 1821559

Please sign in to comment.