Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Base #1

Open
wants to merge 6 commits into
base: feat/todo-list
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions .env-example
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
CHAIN_TYPE=map_reduce
OPENAI_API_KEY=""
OPENAI_API_BASE=""
OPENAI_BASE_URL=""
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,6 @@ COPY . /src

WORKDIR /src

RUN pip install -r requirements-frozen.txt
RUN pip install -r requirements.txt

CMD ["python3", "app.py"]
181 changes: 160 additions & 21 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,38 +3,159 @@
import time
import uvicorn

from langchain_openai import OpenAI, ChatOpenAI
from langchain.chains.summarize import load_summarize_chain
from langchain.chains import AnalyzeDocumentChain
from langchain.text_splitter import CharacterTextSplitter
from openai import OpenAI
from sse_starlette import EventSourceResponse
from fastapi import FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel, Field
from typing import List, Literal, Optional, Union
from langdetect import detect

dotenv.load_dotenv()

DEFAULTS = {
'CHAIN_TYPE': "map_reduce"
'HTTPX_TIMEOUT': 60,
'TEMPERATURE': 0,
'MAX_TOKENS': 4096
}

summary_refine_prompt_template = """\
Your job is to produce a final summary.
We have provided an existing summary up to a certain point: {answer}
We have the opportunity to refine the existing summary (only if needed) with some more context below.
------------
{text}
------------
Given the new context, refine the original summary.
If the context isn't useful, return the original summary.
The language of summary must keep in {language}.
"""

summary_prompt_template = """Write a concise summary of the following,
and the language of summary must keep in {language}.


"{text}"


CONCISE SUMMARY:"""

todo_refine_prompt_template = """\
Your job is to produce a final todo list.
We have provided an existing todo list up to a certain point: {answer}
We have the opportunity to refine the existing todo list (only if needed) with some more context below.
------------
{text}
------------
Given the new context, refine the original todo list.
If the context isn't useful, return the original todo list.
The language of todo list must keep in {language}.
"""

todo_prompt_template = """Write a concise todo list of the following,
and the language of todo list must keep in {language}:


"{text}"


CONCISE TODO LIST:"""


def get_env(key):
return os.environ.get(key, DEFAULTS.get(key))


def summarize(content: str, chain_type: str, model_name: str):
llm = ChatOpenAI(temperature=0, model_name=model_name)
text_splitter = CharacterTextSplitter(
chunk_size=1500,
chunk_overlap=0,
length_function=len,
)
summary_chain = load_summarize_chain(llm, chain_type=chain_type)
summarize_document_chain = AnalyzeDocumentChain(combine_docs_chain=summary_chain, text_splitter=text_splitter)
summary_text = summarize_document_chain.invoke(content)
return summary_text
def make_todo_list(content: str, model_name: str):
client = OpenAI()

language = detect(content)
length = len(content)
chunk_size = 1500
start_idx = 0
end_idx = 0
times = 1
answer = None
while end_idx < length:
end_idx = start_idx + chunk_size
if end_idx >= length:
end_idx = length

text = content[start_idx:end_idx]
text_nolines = text.replace("\n", "\\n")
print(f"idx=[{start_idx}, {end_idx}], text: {text_nolines}")
start_idx = end_idx

if times == 1:
content = todo_prompt_template.format(text=text, language=language)
else:
content = todo_refine_prompt_template.format(answer=answer, text=text, language=language)

messages = [{
"role": "user",
"content": content
}]
params = dict(
messages=messages,
stream=False,
model=model_name,
temperature=get_env("TEMPERATURE"),
max_tokens=get_env("MAX_TOKENS"),
timeout=get_env("HTTPX_TIMEOUT")
)

chat_completion = client.chat.completions.create(**params)
answer = chat_completion.choices[0].message.content
print(f"Todo times: {times}, answer: {answer}")
times = times + 1

return answer


def summarize(content: str, model_name: str):
client = OpenAI()

language = detect(content)
length = len(content)
chunk_size = 1500
start_idx = 0
end_idx = 0
times = 1
answer = None
while end_idx < length:
end_idx = start_idx + chunk_size
if end_idx >= length:
end_idx = length

text = content[start_idx:end_idx]
text_nolines = text.replace("\n", "\\n")
print(f"idx=[{start_idx}, {end_idx}], text: {text_nolines}")
start_idx = end_idx

if times == 1:
content = summary_prompt_template.format(text=text, language=language)
else:
content = summary_refine_prompt_template.format(answer=answer, text=text, language=language)

messages = [{
"role": "user",
"content": content
}]
params = dict(
messages=messages,
stream=False,
model=model_name,
temperature=get_env("TEMPERATURE"),
max_tokens=get_env("MAX_TOKENS"),
timeout=get_env("HTTPX_TIMEOUT")
)

chat_completion = client.chat.completions.create(**params)
answer = chat_completion.choices[0].message.content
print(f"Summarize times: {times}, answer: {answer}")
times = times + 1

return answer


app = FastAPI()
Expand Down Expand Up @@ -87,7 +208,7 @@ class ChatCompletionResponse(BaseModel):
created: Optional[int] = Field(default_factory=lambda: int(time.time()))


def predict(query: str, model_id: str, chain_type: str):
def predict(query: str, model_id: str):
choice_data = ChatCompletionResponseStreamChoice(
index=0,
delta=DeltaMessage(role="assistant"),
Expand All @@ -97,10 +218,29 @@ def predict(query: str, model_id: str, chain_type: str):
choice_data], object="chat.completion.chunk")
yield "{}".format(chunk.json(exclude_unset=True))

summary = summarize(query, chain_type, model_id)
summary = summarize(query, model_id)
choice_data = ChatCompletionResponseStreamChoice(
index=0,
delta=DeltaMessage(content=f"Summary:\n {summary}", role="assistant"),
finish_reason=None
)
chunk = ChatCompletionResponse(model=model_id, choices=[
choice_data], object="chat.completion.chunk")
yield "{}".format(chunk.json(exclude_unset=True))

todo_list = make_todo_list(query, model_id)
choice_data = ChatCompletionResponseStreamChoice(
index=0,
delta=DeltaMessage(content=f"\n\nTodo List:\n {todo_list}", role="assistant"),
finish_reason=None
)
chunk = ChatCompletionResponse(model=model_id, choices=[
choice_data], object="chat.completion.chunk")
yield "{}".format(chunk.json(exclude_unset=True))

choice_data = ChatCompletionResponseStreamChoice(
index=0,
delta=DeltaMessage(content=summary['output_text'], role="assistant"),
delta=DeltaMessage(content=f"\n\nTranscription:\n {query}", role="assistant"),
finish_reason=None
)
chunk = ChatCompletionResponse(model=model_id, choices=[
Expand All @@ -123,8 +263,7 @@ async def create_chat_completion(request: ChatCompletionRequest):
if request.messages[-1].role != "user":
raise HTTPException(status_code=400, detail="Invalid request")
user_content = request.messages[-1].content
chain_type = get_env("CHAIN_TYPE")
generate = predict(user_content, request.model, chain_type)
generate = predict(user_content, request.model)
return EventSourceResponse(generate, media_type="text/event-stream")


Expand Down
4 changes: 1 addition & 3 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
langchain==0.1.14
langchain-community==0.0.31
langchain-openai==0.1.1
openai==1.14.0
python-dotenv==1.0.1
fastapi==0.110.0
Expand All @@ -12,3 +9,4 @@ httpx==0.27.0
httpx-ws==0.5.2
pydantic==2.6.4
pydantic_core==2.16.3
langdetect==1.0.9
24 changes: 24 additions & 0 deletions tests.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
from app import summarize, make_todo_list

if __name__ == '__main__':

content = """
Key concepts
Text generation models
OpenAI's text generation models (often referred to as generative pre-trained transformers or "GPT" models for short), like GPT-4 and GPT-3.5, have been trained to understand natural and formal language. Models like GPT-4 allows text outputs in response to their inputs. The inputs to these models are also referred to as "prompts". Designing a prompt is essentially how you "program" a model like GPT-4, usually by providing instructions or some examples of how to successfully complete a task. Models like GPT-4 can be used across a great variety of tasks including content or code generation, summarization, conversation, creative writing, and more. Read more in our introductory text generation guide and in our prompt engineering guide.

Assistants
Assistants refer to entities, which in the case of the OpenAI API are powered by large language models like GPT-4, that are capable of performing tasks for users. These assistants operate based on the instructions embedded within the context window of the model. They also usually have access to tools which allows the assistants to perform more complex tasks like running code or retrieving information from a file. Read more about assistants in our Assistants API Overview.

Embeddings
An embedding is a vector representation of a piece of data (e.g. some text) that is meant to preserve aspects of its content and/or its meaning. Chunks of data that are similar in some way will tend to have embeddings that are closer together than unrelated data. OpenAI offers text embedding models that take as input a text string and produce as output an embedding vector. Embeddings are useful for search, clustering, recommendations, anomaly detection, classification, and more. Read more about embeddings in our embeddings guide.

Tokens
Text generation and embeddings models process text in chunks called tokens. Tokens represent commonly occurring sequences of characters. For example, the string " tokenization" is decomposed as " token" and "ization", while a short and common word like " the" is represented as a single token. Note that in a sentence, the first token of each word typically starts with a space character. Check out our tokenizer tool to test specific strings and see how they are translated into tokens. As a rough rule of thumb, 1 token is approximately 4 characters or 0.75 words for English text.

One limitation to keep in mind is that for a text generation model the prompt and the generated output combined must be no more than the model's maximum context length. For embeddings models (which do not output tokens), the input must be shorter than the model's maximum context length. The maximum context lengths for each text generation and embeddings model can be found in the model index.
"""
answer = summarize(content, "gpt-3.5-turbo")
print(answer)
answer = make_todo_list(content, "gpt-3.5-turbo")
print(answer)