-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathapp.py
66 lines (52 loc) · 2.2 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import json
import os
from dotenv import load_dotenv
from flask import Flask, Response, request, stream_with_context
from langchain_community.llms import Ollama
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.output_parsers import StrOutputParser
# Load environment variables from the .env file
load_dotenv()
app = Flask(__name__)
OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL", "http://0.0.0.0:11434")
OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "llama2")
llm = Ollama(model=OLLAMA_MODEL, base_url=OLLAMA_BASE_URL)
prompt = ChatPromptTemplate.from_messages(
[
("system", "You're a world class assistant"),
MessagesPlaceholder(variable_name="chat_history"),
("user", "{question}"),
]
)
def generate_tokens(question):
chain = prompt | llm | StrOutputParser()
for chunks in chain.stream({"chat_history": [], "question": question}):
yield chunks
@app.route("/users/chat", methods=["POST"])
def ask_ai():
def generate_json(question):
with app.app_context(): # Ensure we're within the application context
full_content = ""
for token in generate_tokens(question):
full_content += token
json_data = {"model": OLLAMA_MODEL, "content": token, "done": False}
json_str = json.dumps(json_data) # Convert JSON data to a string
json_bytes = json_str.encode("utf-8") # Encode JSON string to bytes
yield json_bytes
yield b"\n" # Yield newline as bytes
# Once streaming is finished, yield one last JSON object with "done" set to True
json_data = {
"model": OLLAMA_MODEL,
"full_content": full_content,
"done": True,
}
json_str = json.dumps(json_data) # Convert JSON data to a string
json_bytes = json_str.encode("utf-8") # Encode JSON string to bytes
yield json_bytes
request_data = request.json
question = request_data.get("question")
return Response(
stream_with_context(generate_json(question)), mimetype="application/json"
)
if __name__ == "__main__":
app.run(debug=True)