-
Notifications
You must be signed in to change notification settings - Fork 8
/
dialogue_manager.py
108 lines (85 loc) · 3.92 KB
/
dialogue_manager.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import os
from sklearn.metrics.pairwise import pairwise_distances_argmin
from chatterbot import ChatBot
from chatterbot.trainers import ListTrainer
from utils import *
from chatterbot.trainers import ChatterBotCorpusTrainer
class ThreadRanker(object):
def __init__(self, paths):
self.word_embeddings, self.embeddings_dim = load_embeddings(paths['WORD_EMBEDDINGS'])
self.thread_embeddings_folder = paths['THREAD_EMBEDDINGS_FOLDER']
def __load_embeddings_by_tag(self, tag_name):
embeddings_path = os.path.join(self.thread_embeddings_folder, tag_name + ".pkl")
thread_ids, thread_embeddings = unpickle_file(embeddings_path)
return thread_ids, thread_embeddings
def get_best_thread(self, question, tag_name):
""" Returns id of the most similar thread for the question.
The search is performed across the threads with a given tag.
"""
thread_ids, thread_embeddings = self.__load_embeddings_by_tag(tag_name)
question_vec = question_to_vec(question, self.word_embeddings, self.embeddings_dim)
best_thread = pairwise_distances_argmin(
X=question_vec.reshape(1, self.embeddings_dim),
Y=thread_embeddings,
metric='cosine'
)
return thread_ids[best_thread][0]
class DialogueManager(object):
def __init__(self, paths):
print("Loading resources...")
# Intent recognition:
self.intent_recognizer = unpickle_file(paths['INTENT_RECOGNIZER'])
self.tfidf_vectorizer = unpickle_file(paths['TFIDF_VECTORIZER'])
self.ANSWER_TEMPLATE = 'I think its about %s\n This thread might help you: https://stackoverflow.com/questions/%s'
# Goal-oriented part:
self.tag_classifier = unpickle_file(paths['TAG_CLASSIFIER'])
self.thread_ranker = ThreadRanker(paths)
# Chit-chat part
self.create_chitchat_bot()
def create_chitchat_bot(self):
"""Initializes self.chitchat_bot with some conversational model."""
self.chatbot = ChatBot(
'HelloBot',
trainer='chatterbot.trainers.ChatterBotCorpusTrainer'
)
self.chatbot.train("chatterbot.corpus.english")
self.chatbot.set_trainer(ListTrainer)
self.chatbot.train([
"How are you doing?",
"I am good!",
])
self.chatbot.train([
"What's your hobby?",
"I love playing cricket,listening music,reading books"
])
self.chatbot.train([
"Hey",
"Hello. How do you do?",
"I am good!",
'That is good to hear.How can I help you ?',
'Thank you',
'You are welcome.'
])
self.chatbot.train([
"What is AI?",
"Me."
])
def generate_answer(self, question):
"""Combines stackoverflow and chitchat parts using intent recognition."""
# Recognize intent of the question using `intent_recognizer`.
# Don't forget to prepare question and calculate features for the question.
prepared_question = text_prepare(question)
features = self.tfidf_vectorizer.transform([prepared_question])
intent = self.intent_recognizer.predict(features)[0]
# Chit-chat part:
if intent == 'dialogue':
# Pass question to chitchat_bot to generate a response.
response = self.chatbot.get_response(prepared_question)
return response
# Goal-oriented part:
else:
# Pass features to tag_clasifier to get predictions.
tag = self.tag_classifier.predict(features)[0]
# Pass prepared_question to thread_ranker to get predictions.
thread_id = self.thread_ranker.get_best_thread(prepared_question, tag)
return self.ANSWER_TEMPLATE % (tag, thread_id)