-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathapp.py
263 lines (207 loc) · 9.75 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
import json
import streamlit as st
from dotenv import load_dotenv
from PyPDF2 import PdfReader
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings, HuggingFaceInstructEmbeddings
from langchain.vectorstores import FAISS, Chroma
from langchain.embeddings import HuggingFaceEmbeddings # General embeddings from HuggingFace models.
from langchain.chat_models import ChatOpenAI
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from htmlTemplates import css, bot_template, user_template
from langchain.llms import HuggingFaceHub, LlamaCpp, CTransformers # For loading transformer models.
from langchain.document_loaders import PyPDFLoader, TextLoader, JSONLoader, CSVLoader
import tempfile # Library for creating temporary files.
import os
def export_chat_history():
chat_history = st.session_state.chat_history
if chat_history:
file_path = st.file_uploader("Choose where to save the chat history", type="json", accept_multiple_files=False)
if file_path:
file_path = file_path.name
with open(file_path, "w") as f:
json.dump(chat_history, f, default=lambda x: x.__dict__)
st.success(f"Chat history exported to {file_path}")
else:
st.warning("No chat history available to export")
# Function to extract text from PDF documents.
def get_pdf_text(pdf_docs):
temp_dir = tempfile.TemporaryDirectory() # Create a temporary directory.
temp_filepath = os.path.join(temp_dir.name, pdf_docs.name) # Create a temporary file path.
with open(temp_filepath, "wb") as f: # Open the temporary file in binary write mode.
f.write(pdf_docs.getvalue()) # Write the content of the PDF document to the temporary file.
pdf_loader = PyPDFLoader(temp_filepath) # Load the PDF using PyPDFLoader.
pdf_doc = pdf_loader.load() # Extract text.
return pdf_doc # Return the extracted text.
# Task
# Write the text extraction function below.
def get_text_file(docs):
temp_dir = tempfile.TemporaryDirectory() # Create a temporary directory.
temp_filepath = os.path.join(temp_dir.name, docs.name) # Create a temporary file path.
with open(temp_filepath, "wb") as f: # Open the temporary file in binary write mode.
f.write(docs.getvalue())
loader = TextLoader(temp_filepath)
data = loader.load()
return data
def get_csv_file(docs):
temp_dir = tempfile.TemporaryDirectory() # Create a temporary directory.
temp_filepath = os.path.join(temp_dir.name, docs.name) # Create a temporary file path.
with open(temp_filepath, "wb") as f: # Open the temporary file in binary write mode.
f.write(docs.getvalue())
loader = CSVLoader(file_path=temp_filepath)
data = loader.load()
return data
def get_json_file(docs):
temp_dir = tempfile.TemporaryDirectory() # Create a temporary directory.
temp_filepath = os.path.join(temp_dir.name, docs.name) # Create a temporary file path.
with open(temp_filepath, "wb") as f: # Open the temporary file in binary write mode.
f.write(docs.getvalue())
loader = JSONLoader(file_path=temp_filepath,
jq_schema='.messages[].content',
text_content=False)
data = loader.load()
return data
# Function to split documents into text chunks.
def get_text_chunks(documents):
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=1000, # Specify the chunk size.
chunk_overlap=200, # Specify the overlap between chunks.
length_function=len # Specify the function for measuring text length.
)
documents = text_splitter.split_documents(documents) # Split documents into chunks.
return documents # Return the chunks.
# Function to create a vector store from text chunks.
def get_vectorstore(text_chunks):
# Load the OpenAI embedding model. (Embedding models - Ada v2)
embeddings = OpenAIEmbeddings()
vectorstore = FAISS.from_documents(text_chunks, embeddings) # Create a FAISS vector store.
return vectorstore # Return the created vector store.
def get_conversation_chain(vectorstore):
"""
This function creates a conversational retrieval chain for analyzing product requirements documents.
Args:
vectorstore: A vector store object used for retrieval.
Returns:
A ConversationalRetrievalChain object.
"""
# Use a more descriptive name for the gpt model
gpt_model_name = 'gpt-3.5-turbo'
gpt_model = ChatOpenAI(model_name=gpt_model_name) # Load the gpt-3.5 model
from langchain.prompts import PromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate, ChatPromptTemplate
# Create system message template (consider making this an argument)
system_template = """
You are an AI assistant specialized in generating test cases for software features. Your task is to create test cases based on the list of features provided. Each test case should include:
- A title
- Description of what is being tested
- Preconditions
- Test steps
- Expected results
Instructions:
1. Read the list of features carefully.
2. Generate Test Cases: For each identified feature, create one or more test cases including:
- Title
- Description
- Preconditions
- Test Steps
- Expected Results
Output Format:
Provide the test cases and Python code in the following format:
1. Title: Brief title of the test case
- Description: Detailed description of the test case.
- Preconditions: List of preconditions.
- Test Steps: Step-by-step instructions.
- Expected Results: Expected outcome.
{context}"""
# Create chat prompt templates
question_prompt = "{question}" # Consider making this an argument
messages = [
SystemMessagePromptTemplate.from_template(system_template),
HumanMessagePromptTemplate.from_template(question_prompt)
]
question_answer_prompt = ChatPromptTemplate.from_messages(messages)
# Create memory to store conversation history.
memory = ConversationBufferMemory(
memory_key='chat_history', return_messages=True)
# Handle potential errors with try-except block
try:
# Create a conversational retrieval chain.
conversation_chain = ConversationalRetrievalChain.from_llm(
llm=gpt_model,
retriever=vectorstore.as_retriever(),
memory=memory,
combine_docs_chain_kwargs={"prompt": question_answer_prompt},
)
return conversation_chain
except Exception as e:
print(f"Error creating conversation chain: {e}")
return None
# Function to handle user input.
def handle_userinput(user_question):
# Generate a response to the user question using the conversation chain.
response = st.session_state.conversation({'question': user_question})
st.session_state.chat_history = response['chat_history']
# Store the conversation history.
for i, message in enumerate(st.session_state.chat_history):
if i % 2 == 0:
st.write(user_template.replace(
"{{MSG}}", message.content), unsafe_allow_html=True)
else:
st.write(bot_template.replace(
"{{MSG}}", message.content), unsafe_allow_html=True)
def main():
load_dotenv()
st.header("PRD Test Cases :")
user_question = st.text_input("Ask a question about your documents:")
if user_question:
handle_userinput(user_question)
st.write(css, unsafe_allow_html=True)
if "conversation" not in st.session_state:
st.session_state.conversation = None
if "chat_history" not in st.session_state:
st.session_state.chat_history = None
with st.sidebar:
openai_key = st.text_input("Paste your OpenAI API key (sk-...)")
if openai_key:
os.environ["OPENAI_API_KEY"] = openai_key
st.subheader("Your documents")
docs = st.file_uploader(
"Upload your PDFs here and click on 'Process'", accept_multiple_files=True)
if st.button("Process"):
with st.spinner("Processing"):
# get pdf text
doc_list = []
for file in docs:
print('file - type : ', file.type)
if file.type == 'text/plain':
# file is .txt
doc_list.extend(get_text_file(file))
elif file.type in ['application/octet-stream', 'application/pdf']:
# file is .pdf
doc_list.extend(get_pdf_text(file))
elif file.type == 'text/csv':
# file is .csv
doc_list.extend(get_csv_file(file))
elif file.type == 'application/json':
# file is .json
doc_list.extend(get_json_file(file))
# get the text chunks
text_chunks = get_text_chunks(doc_list)
# create vector store
vectorstore = get_vectorstore(text_chunks)
# create conversation chain
st.session_state.conversation = get_conversation_chain(
vectorstore)
# Export chat history button
if st.button("Export Chat History"):
export_path = "test.json"
if export_path:
chat_history = st.session_state.chat_history
if chat_history:
with open(export_path, "w") as f:
json.dump(chat_history, f, default=lambda x: x.__dict__)
st.success("Chat history exported successfully.")
else:
st.warning("No chat history available to export.")
if __name__ == '__main__':
main()