forked from michelderu/ragstack-astradb
-
Notifications
You must be signed in to change notification settings - Fork 0
/
streamlit_app.py
623 lines (517 loc) · 20.9 KB
/
streamlit_app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
import os, base64
from pathlib import Path
import hmac
import tempfile
import pandas as pd
import uuid
import streamlit as st
from langchain_community.vectorstores import AstraDB
from langchain_openai import OpenAIEmbeddings
from langchain_openai import ChatOpenAI
from langchain.memory import ConversationBufferWindowMemory
from langchain.memory import AstraDBChatMessageHistory
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader, CSVLoader, WebBaseLoader
from langchain.schema import HumanMessage, AIMessage
from langchain.prompts import ChatPromptTemplate
from langchain.schema.runnable import RunnableMap
from langchain.schema import StrOutputParser
from langchain.callbacks.base import BaseCallbackHandler
import openai
print("Started")
st.set_page_config(page_title='Your Enterprise Sidekick', page_icon='🚀')
# Get a unique session id for memory
if "session_id" not in st.session_state:
st.session_state.session_id = uuid.uuid4()
# Streaming call back handler for responses
class StreamHandler(BaseCallbackHandler):
def __init__(self, container, initial_text=""):
self.container = container
self.text = initial_text
def on_llm_new_token(self, token: str, **kwargs):
self.text += token
self.container.markdown(self.text + "▌")
###############
### Globals ###
###############
global lang_dict
global language
global rails_dict
global session
global embedding
global vectorstore
global chat_history
global memory
# RAG options
global disable_vector_store
global strategy
global prompt_type
global custom_prompt
#################
### Functions ###
#################
# Close off the app using a password
def check_password():
"""Returns `True` if the user had a correct password."""
def login_form():
"""Form with widgets to collect user information"""
with st.form("credentials"):
st.text_input('Username', key='username')
st.text_input('Password', type='password', key='password')
st.form_submit_button('Login', on_click=password_entered)
def password_entered():
"""Checks whether a password entered by the user is correct."""
if st.session_state['username'] in st.secrets['passwords'] and hmac.compare_digest(st.session_state['password'], st.secrets.passwords[st.session_state['username']]):
st.session_state['password_correct'] = True
st.session_state.user = st.session_state['username']
del st.session_state['password'] # Don't store the password.
else:
st.session_state['password_correct'] = False
# Return True if the username + password is validated.
if st.session_state.get('password_correct', False):
return True
# Show inputs for username + password.
login_form()
if "password_correct" in st.session_state:
st.error('😕 User not known or password incorrect')
return False
def logout():
for key in st.session_state.keys():
del st.session_state[key]
st.cache_resource.clear()
st.cache_data.clear()
st.rerun()
# Function for Vectorizing uploaded data into Astra DB
def vectorize_text(uploaded_files):
for uploaded_file in uploaded_files:
if uploaded_file is not None:
# Write to temporary file
temp_dir = tempfile.TemporaryDirectory()
file = uploaded_file
print(f"""Processing: {file}""")
temp_filepath = os.path.join(temp_dir.name, file.name)
with open(temp_filepath, 'wb') as f:
f.write(file.getvalue())
# Process TXT
if uploaded_file.name.endswith('txt'):
file = [uploaded_file.read().decode()]
text_splitter = RecursiveCharacterTextSplitter(
chunk_size = 1500,
chunk_overlap = 100
)
texts = text_splitter.create_documents(file, [{'source': uploaded_file.name}])
vectorstore.add_documents(texts)
st.info(f"{len(texts)} {lang_dict['load_text']}")
# Process PDF
if uploaded_file.name.endswith('pdf'):
docs = []
loader = PyPDFLoader(temp_filepath)
docs.extend(loader.load())
text_splitter = RecursiveCharacterTextSplitter(
chunk_size = 1500,
chunk_overlap = 100
)
pages = text_splitter.split_documents(docs)
vectorstore.add_documents(pages)
st.info(f"{len(pages)} {lang_dict['load_pdf']}")
# Process CSV
if uploaded_file.name.endswith('csv'):
docs = []
loader = CSVLoader(temp_filepath)
docs.extend(loader.load())
vectorstore.add_documents(docs)
st.info(f"{len(docs)} {lang_dict['load_csv']}")
# Load data from URLs
def vectorize_url(urls):
# Create the text splitter
text_splitter = RecursiveCharacterTextSplitter(
chunk_size = 1500,
chunk_overlap = 100
)
for url in urls:
try:
loader = WebBaseLoader(url)
docs = loader.load()
pages = text_splitter.split_documents(docs)
print (f"Loading from URL: {pages}")
vectorstore.add_documents(pages)
st.info(f"{len(pages)} loaded")
except Exception as e:
st.info(f"An error occurred:", e)
# Define the prompt
def get_prompt(type):
template = ''
if type == 'Extended results':
print ("Prompt type: Extended results")
template = f"""You're a helpful AI assistant tasked to answer the user's questions.
You're friendly and you answer extensively with multiple sentences. You prefer to use bulletpoints to summarize.
If the question states the name of the user, just say 'Thanks, I'll use this information going forward'.
If you don't know the answer, just say 'I do not know the answer'.
Use the following context to answer the question:
{{context}}
Use the following chat history to answer the question:
{{chat_history}}
Question:
{{question}}
Answer in {language}:"""
if type == 'Short results':
print ("Prompt type: Short results")
template = f"""You're a helpful AI assistant tasked to answer the user's questions.
You answer in an exceptionally brief way.
If the question states the name of the user, just say 'Thanks, I'll use this information going forward'.
If you don't know the answer, just say 'I do not know the answer'.
Use the following context to answer the question:
{{context}}
Use the following chat history to answer the question:
{{chat_history}}
Question:
{{question}}
Answer in {language}:"""
if type == 'Custom':
print ("Prompt type: Custom")
template = custom_prompt
return ChatPromptTemplate.from_messages([("system", template)])
# Get the OpenAI Chat Model
def load_model():
print(f"""load_model""")
# Get the OpenAI Chat Model
return ChatOpenAI(
temperature=0.3,
model='gpt-4-1106-preview',
streaming=True,
verbose=True
)
# Get the Retriever
def load_retriever(top_k_vectorstore):
print(f"""load_retriever with top_k_vectorstore='{top_k_vectorstore}'""")
# Get the Retriever from the Vectorstore
return vectorstore.as_retriever(
search_kwargs={"k": top_k_vectorstore}
)
@st.cache_resource()
def load_memory(top_k_history):
print(f"""load_memory with top-k={top_k_history}""")
return ConversationBufferWindowMemory(
chat_memory=chat_history,
return_messages=True,
k=top_k_history,
memory_key="chat_history",
input_key="question",
output_key='answer',
)
def generate_queries():
prompt = f"""You are a helpful assistant that generates multiple search queries based on a single input query in language {language}.
Generate multiple search queries related to: {{original_query}}
OUTPUT (4 queries):"""
return ChatPromptTemplate.from_messages([("system", prompt)]) | model | StrOutputParser() | (lambda x: x.split("\n"))
def reciprocal_rank_fusion(results: list[list], k=60):
from langchain.load import dumps, loads
fused_scores = {}
for docs in results:
# Assumes the docs are returned in sorted order of relevance
for rank, doc in enumerate(docs):
doc_str = dumps(doc)
if doc_str not in fused_scores:
fused_scores[doc_str] = 0
previous_score = fused_scores[doc_str]
fused_scores[doc_str] += 1 / (rank + k)
reranked_results = [
(loads(doc), score)
for doc, score in sorted(fused_scores.items(), key=lambda x: x[1], reverse=True)
]
return reranked_results
# Describe the image based on OpenAI
def describeImage(image_bin, language):
print ("describeImage")
image_base64 = base64.b64encode(image_bin).decode()
response = openai.chat.completions.create(
model="gpt-4-vision-preview",
messages=[
{
"role": "user",
"content": [
#{"type": "text", "text": "Describe the image in detail"},
{"type": "text", "text": f"Provide a search text for the main topic of the image writen in {language}"},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{image_base64}",
},
},
],
}
],
max_tokens=4096, # default max tokens is low so set higher
)
print (f"describeImage result: {response}")
return response
##################
### Data Cache ###
##################
# Cache localized strings
@st.cache_data()
def load_localization(locale):
print("load_localization")
# Load in the text bundle and filter by language locale
df = pd.read_csv("./customizations/localization.csv")
df = df.query(f"locale == '{locale}'")
# Create and return a dictionary of key/values.
lang_dict = {df.key.to_list()[i]:df.value.to_list()[i] for i in range(len(df.key.to_list()))}
return lang_dict
# Cache localized strings
@st.cache_data()
def load_rails(username):
print("load_rails")
# Load in the rails bundle and filter by username
df = pd.read_csv("./customizations/rails.csv")
df = df.query(f"username == '{username}'")
# Create and return a dictionary of key/values.
rails_dict = {df.key.to_list()[i]:df.value.to_list()[i] for i in range(len(df.key.to_list()))}
return rails_dict
#############
### Login ###
#############
# Check for username/password and set the username accordingly
if not check_password():
st.stop() # Do not continue if check_password is not True.
username = st.session_state.user
language = st.secrets.languages[username]
lang_dict = load_localization(language)
#######################
### Resources Cache ###
#######################
# Cache OpenAI Embedding for future runs
@st.cache_resource(show_spinner=lang_dict['load_embedding'])
def load_embedding():
print("load_embedding")
# Get the OpenAI Embedding
return OpenAIEmbeddings()
# Cache Vector Store for future runs
@st.cache_resource(show_spinner=lang_dict['load_vectorstore'])
def load_vectorstore(username):
print(f"load_vectorstore for {username}")
# Get the load_vectorstore store from Astra DB
return AstraDB(
embedding=embedding,
collection_name=f"vector_context_{username}",
token=st.secrets["ASTRA_TOKEN"],
api_endpoint=os.environ["ASTRA_ENDPOINT"],
)
# Cache Chat History for future runs
@st.cache_resource(show_spinner=lang_dict['load_message_history'])
def load_chat_history(username):
print(f"load_chat_history for {username}_{st.session_state.session_id}")
return AstraDBChatMessageHistory(
session_id=f"{username}_{st.session_state.session_id}",
api_endpoint=os.environ["ASTRA_ENDPOINT"],
token=st.secrets["ASTRA_TOKEN"],
)
#####################
### Session state ###
#####################
# Start with empty messages, stored in session state
if 'messages' not in st.session_state:
st.session_state.messages = [AIMessage(content=lang_dict['assistant_welcome'])]
############
### Main ###
############
# Show a custom welcome text or the default text
try:
st.markdown(Path(f"""./customizations/welcome/{username}.md""").read_text())
except:
st.markdown(Path('./customizations/welcome/default.md').read_text())
# Show a custom logo (svg or png) or the DataStax logo
with st.sidebar:
try:
st.image(f"""./customizations/logo/{username}.svg""", use_column_width="always")
st.text('')
except:
try:
st.image(f"""./customizations/logo/{username}.png""", use_column_width="always")
st.text('')
except:
st.image('./customizations/logo/default.svg', use_column_width="always")
st.text('')
# Logout button
with st.sidebar:
st.markdown(f"""{lang_dict['logout_caption']} :orange[{username}]""")
logout_button = st.button(lang_dict['logout_button'])
if logout_button:
logout()
with st.sidebar:
st.divider()
# Initialize
with st.sidebar:
rails_dict = load_rails(username)
embedding = load_embedding()
vectorstore = load_vectorstore(username)
chat_history = load_chat_history(username)
# Options panel
with st.sidebar:
# Chat history settings
disable_chat_history = st.toggle(lang_dict['disable_chat_history'])
top_k_history = st.slider(lang_dict['k_chat_history'], 1, 50, 5, disabled=disable_chat_history)
memory = load_memory(top_k_history if not disable_chat_history else 0)
delete_history = st.button(lang_dict['delete_chat_history_button'], disabled=disable_chat_history)
if delete_history:
with st.spinner(lang_dict['deleting_chat_history']):
memory.clear()
# Vector store settings
disable_vector_store = st.toggle(lang_dict['disable_vector_store'])
top_k_vectorstore = st.slider(lang_dict['top_k_vector_store'], 1, 50, 5, disabled=disable_vector_store)
strategy = st.selectbox(lang_dict['rag_strategy'], ('Basic Retrieval', 'Maximal Marginal Relevance', 'Fusion'), help=lang_dict['rag_strategy_help'], disabled=disable_vector_store)
custom_prompt_text = ''
custom_prompt_index = 0
try:
custom_prompt_text = open(f"""./customizations/prompt/{username}.txt""").read()
custom_prompt_index = 2
except:
custom_prompt_text = open(f"""./customizations/prompt/default.txt""").read()
custom_prompt_index = 0
prompt_type = st.selectbox(lang_dict['system_prompt'], ('Short results', 'Extended results', 'Custom'), index=custom_prompt_index)
custom_prompt = st.text_area(lang_dict['custom_prompt'], custom_prompt_text, help=lang_dict['custom_prompt_help'], disabled=(prompt_type != 'Custom'))
print(f"""{disable_vector_store}, {top_k_history}, {top_k_vectorstore}, {strategy}, {prompt_type}""")
with st.sidebar:
st.divider()
# Include the upload form for new data to be Vectorized
with st.sidebar:
uploaded_files = st.file_uploader(lang_dict['load_context'], type=['txt', 'pdf', 'csv'], accept_multiple_files=True)
upload = st.button(lang_dict['load_context_button'])
if upload and uploaded_files:
vectorize_text(uploaded_files)
# Include the upload form for URLs be Vectorized
with st.sidebar:
urls = st.text_area(lang_dict['load_from_urls'], help=lang_dict['load_from_urls_help'])
urls = urls.split(',')
upload = st.button(lang_dict['load_from_urls_button'])
if upload and urls:
vectorize_url(urls)
# Drop the vector data and start from scratch
if (username in st.secrets['delete_option'] and st.secrets.delete_option[username] == 'True'):
with st.sidebar:
st.caption(lang_dict['delete_context'])
submitted = st.button(lang_dict['delete_context_button'])
if submitted:
with st.spinner(lang_dict['deleting_context']):
vectorstore.clear()
memory.clear()
st.session_state.messages = [AIMessage(content=lang_dict['assistant_welcome'])]
with st.sidebar:
st.divider()
# Draw rails
with st.sidebar:
st.subheader(lang_dict['rails_1'])
st.caption(lang_dict['rails_2'])
for i in rails_dict:
st.markdown(f"{i}. {rails_dict[i]}")
# Draw all messages, both user and agent so far (every time the app reruns)
for message in st.session_state.messages:
st.chat_message(message.type).markdown(message.content)
# Now get a prompt from a user
question = st.chat_input(lang_dict['assistant_question'])
with st.sidebar:
st.divider()
picture = st.camera_input(lang_dict['take_picture'])
if picture:
response = describeImage(picture.getvalue(), language)
picture_desc = response.choices[0].message.content
question = picture_desc
if question:
print(f"Got question: {question}")
# Add the prompt to messages, stored in session state
st.session_state.messages.append(HumanMessage(content=question))
# Draw the prompt on the page
print(f"Draw prompt")
with st.chat_message('human'):
st.markdown(question)
# Get model, retriever
model = load_model()
retriever = load_retriever(top_k_vectorstore)
# RAG Strategy
content = ''
fusion_queries = []
relevant_documents = []
if not disable_vector_store:
if strategy == 'Basic Retrieval':
# Basic naive RAG
relevant_documents = retriever.get_relevant_documents(query=question, k=top_k_vectorstore)
if strategy == 'Maximal Marginal Relevance':
relevant_documents = vectorstore.max_marginal_relevance_search(query=question, k=top_k_vectorstore)
if strategy == 'Fusion':
# Fusion: Generate new queries and retrieve most relevant documents based on that
generate_queries = generate_queries()
fusion_queries = generate_queries.invoke({"original_query": question})
print(f"""Fusion queries: {fusion_queries}""")
content += f"""
*{lang_dict['using_fusion_queries']}*
"""
for fq in fusion_queries:
content += f"""📙 :orange[{fq}]
"""
# Write the generated fusion queries
with st.chat_message('assistant'):
st.markdown(content)
# Add the answer to the messages session state
st.session_state.messages.append(AIMessage(content=content))
chain = generate_queries | retriever.map() | reciprocal_rank_fusion
relevant_documents = chain.invoke({"original_query": question})
print(f"""Fusion results: {relevant_documents}""")
# Get the results from Langchain
print(f"Chat message")
with st.chat_message('assistant'):
content = ''
# UI placeholder to start filling with agent response
response_placeholder = st.empty()
# Get chat history
history = memory.load_memory_variables({})
print(f"Using memory: {history}")
# Create the chain
inputs = RunnableMap({
'context': lambda x: x['context'],
'chat_history': lambda x: x['chat_history'],
'question': lambda x: x['question']
})
print(f"Using inputs: {inputs}")
chain = inputs | get_prompt(prompt_type) | model
print(f"Using chain: {chain}")
# Call the chain and stream the results into the UI
response = chain.invoke({'question': question, 'chat_history': history, 'context': relevant_documents}, config={'callbacks': [StreamHandler(response_placeholder)]})
print(f"Response: {response}")
content += response.content
# Add the result to memory (without the sources)
memory.save_context({'question': question}, {'answer': content})
# Write the sources used
if disable_vector_store:
content += f"""
*{lang_dict['no_context']}*
"""
else:
content += f"""
*{lang_dict['sources_used']}*
"""
sources = []
for doc in relevant_documents:
if strategy == 'Fusion':
doc = doc[0]
print (f"""DOC: {doc}""")
source = doc.metadata['source']
page_content = doc.page_content
if source not in sources:
content += f"""📙 :orange[{os.path.basename(os.path.normpath(source))}]
"""
sources.append(source)
# Write the history used
if disable_chat_history:
content += f"""
*{lang_dict['no_chat_history']}*
"""
else:
content += f"""
*{lang_dict['chat_history_used']}: ({int(len(history['chat_history'])/2)} / {top_k_history})*
"""
# Write the final answer without the cursor
response_placeholder.markdown(content)
# Add the answer to the messages session state
st.session_state.messages.append(AIMessage(content=content))
with st.sidebar:
st.caption("v231227.01")