-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathsummarizer.py
79 lines (67 loc) · 3.01 KB
/
summarizer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
from langchain.chains.summarize import load_summarize_chain
from langchain.docstore.document import Document
from langchain.llms.openai import OpenAI, OpenAIChat
from langchain.prompts import PromptTemplate
from langchain.text_splitter import CharacterTextSplitter
from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
import os
import openai
from dotenv import load_dotenv
load_dotenv()
api_key = os.environ.get('OPENAI_API_KEY')
target_len = 500
chunk_size = 3000
chunk_overlap = 200
tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_418M", src_lang="fr")
model = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_418M")
def summarize(text, lang):
try:
text_splitter = CharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap, length_function=len)
texts = text_splitter.split_text(text)
docs = [Document(page_content=t) for t in texts[:]]
openaichat = OpenAIChat(temperature=0, model="gpt-3.5-turbo")
prompt_template = """Act as a professional technical meeting minutes writer.
Tone: formal
Format: Technical meeting summary
Length: 200 ~ 300
Tasks:
- highlight action items and owners
- highlight the agreements
- Use bullet points if needed
{text}
CONCISE SUMMARY IN ENGLISH:"""
PROMPT = PromptTemplate(template=prompt_template, input_variables=["text"])
refine_template = (
"Your job is to produce a final summary\n"
"We have provided an existing summary up to a certain point: {existing_answer}\n"
"We have the opportunity to refine the existing summary"
"(only if needed) with some more context below.\n"
"------------\n"
"{text}\n"
"------------\n"
f"Given the new context, refine the original summary in English within {target_len} words: following the format"
"Participants: <participants>"
"Discussed: <Discussed-items>"
"Follow-up actions: <a-list-of-follow-up-actions-with-owner-names>"
"If the context isn't useful, return the original summary. Highlight agreements and follow-up actions and owners."
)
refine_prompt = PromptTemplate(
input_variables=["existing_answer", "text"],
template=refine_template,
)
chain = load_summarize_chain(
openaichat,
chain_type="refine",
return_intermediate_steps=True,
question_prompt=PROMPT,
refine_prompt=refine_prompt,
)
resp = chain({"input_documents": docs}, return_only_outputs=True)
print(resp["output_text"])
# return resp["output_text"]
text = resp['output_text']
encoded_zh = tokenizer(text, return_tensors="pt")
generated_tokens = model.generate(**encoded_zh, forced_bos_token_id=tokenizer.get_lang_id(lang))
return tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
except:
pass