From a2752a4b7b50db9fd8ad820752b89d791b9e06d0 Mon Sep 17 00:00:00 2001 From: Dale Mcdiarmid Date: Fri, 16 Feb 2024 19:20:55 +0000 Subject: [PATCH] simple readme --- .../llama-index/hacknernews_app/README.md | 48 +++++++++++++++++++ .../hacknernews_app/hacker_insights.py | 4 +- 2 files changed, 50 insertions(+), 2 deletions(-) create mode 100644 blog-examples/llama-index/hacknernews_app/README.md diff --git a/blog-examples/llama-index/hacknernews_app/README.md b/blog-examples/llama-index/hacknernews_app/README.md new file mode 100644 index 0000000..a534a52 --- /dev/null +++ b/blog-examples/llama-index/hacknernews_app/README.md @@ -0,0 +1,48 @@ +# Hackbot - A Streamlit chatbot 💬 for Hacker News powered by LlamaIndex 🦙 and ClickHouse 🚀 + +Simple chatbot app that uses LllamaIndex, ClickHouse, Hacker News posts, and Stack Overflow survey results to allow an LLM (chatbot v4.0) to provide answers on people's opinions on technology. + +Answers are based on two sources held in ClickHouse: + - Hacker News posts with vector embeddings generated by the `sentence-transformers/all-MiniLM-L6-v2` model. + - Stack Overflow survey results allowing statistics to be looked up, e.g., What is the most popular web development framework? These facts are used to provide additional context for querying posts and requesting a response from the LLM. + +For example, users can query for: + +"What are people's opinions on the most popular database?" + +This requires: + +1. Establish the most popular database through a SQL query to ClickHouse. Answer: Postgresql. +2. Query the posts with "What are people's opinions on Postgresql." +3. Provide context to LLM (Chatgpt 4.0) by asking, "What are people's opinions on Postgresql?" + +This relies on Llama index to control the flow via the [`SQLAutoVectorQueryEngine`]() engine. + + +## Requirements + +- ClickHouse 24.1 +- Python 3.11+ + +## Data + +### Table schemas + + + +### Loading + +Hacker News post are for the period upto 2021. Stack Overflow survey results are also for 2021. + +## Run application + +We recommend using a [virtual environment](). + +``` +pip install -r requirements.txt + +streamlit run hacker_insights.py +``` + +## Other capabilities + diff --git a/blog-examples/llama-index/hacknernews_app/hacker_insights.py b/blog-examples/llama-index/hacknernews_app/hacker_insights.py index a8e5949..9d7c03e 100644 --- a/blog-examples/llama-index/hacknernews_app/hacker_insights.py +++ b/blog-examples/llama-index/hacknernews_app/hacker_insights.py @@ -44,13 +44,13 @@ database = st.secrets.clickhouse.database st.set_page_config( - page_title="Get summaries of Hacker News posts enriched with Stackoverflow survey results, powered by LlamaIndex and CLickHouse", + page_title="Get summaries of Hacker News posts enriched with Stackoverflow survey results, powered by LlamaIndex and ClickHouse", page_icon="🦙🚀", layout="centered", initial_sidebar_state="auto", menu_items=None) st.title("💬HackBot powered by LlamaIndex 🦙 and ClickHouse 🚀") st.info( "Check out the full [blog post](https://blog.streamlit.io/build-a-chatbot-with-custom-data-sources-powered-by-llamaindex/) for this app", icon="📃") -st.caption("A streamlit chatbot for Hacker News powered by 💬🦙 and ClickHouse 🚀") +st.caption("A Streamlit chatbot 💬 for Hacker News powered by LlamaIndex 🦙 and ClickHouse 🚀") @st.cache_resource