Skip to content

Commit

Permalink
Decoupled LLM dependencies (#6)
Browse files Browse the repository at this point in the history
  • Loading branch information
NeonBohdan authored Aug 14, 2023
2 parents a5f0bcd + fbca43d commit d21d16d
Show file tree
Hide file tree
Showing 5 changed files with 18 additions and 223 deletions.
2 changes: 1 addition & 1 deletion docker_overlay/etc/neon/diana.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ MQ:
mq_handler:
user: neon_api_utils
password: Klatchat2021
FastChat:
LLM_FASTCHAT:
model: "fastchat"
context_depth: 3
max_tokens: 256
Expand Down
50 changes: 0 additions & 50 deletions neon_llm_fastchat/config.py

This file was deleted.

36 changes: 11 additions & 25 deletions neon_llm_fastchat/fastchat.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,25 +23,28 @@
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import ctranslate2
import numpy as np

from typing import List

import ctranslate2
from transformers import T5Tokenizer
from huggingface_hub import snapshot_download
import numpy as np
from neon_llm_core.llm import NeonLLM


# TODO: make LLM interface generic
class FastChat:
class FastChat(NeonLLM):

mq_to_llm_role = {
"user": "Human",
"llm": "Assistant"
}

def __init__(self, config):
super().__init__(config)
self.context_depth = config["context_depth"]
self.max_tokens = config["max_tokens"]
self.num_parallel_processes = config["num_parallel_processes"]
self.num_threads_per_process = config["num_threads_per_process"]
self._tokenizer = None
self._model = None

@property
def tokenizer(self) -> T5Tokenizer:
Expand Down Expand Up @@ -76,12 +79,6 @@ def _system_prompt(self) -> str:
"geothermal, and biomass. Non-renewable energy sources, on the other hand, " \
"are finite and will eventually be depleted, such as coal, oil, and natural gas.\n"

def ask(self, message: str, chat_history: List[List[str]]) -> str:
""" Generates llm response based on user message and (user, llm) chat history """
prompt = self._assemble_prompt(message, chat_history)
llm_text_output = self._call_model(prompt)
return llm_text_output

def get_sorted_answer_indexes(self, question: str, answers: List[str]) -> List[int]:
"""
Creates sorted list of answer indexes with respect to order provided in :param answers based on PPL score
Expand Down Expand Up @@ -130,22 +127,11 @@ def _assemble_prompt(self, message: str, chat_history: List[List[str]]) -> str:
prompt = self._system_prompt
# Context N messages
for role, content in chat_history[-self.context_depth:]:
role_fastchat = self._convert_role(role)
role_fastchat = self.convert_role(role)
prompt += f"### {role_fastchat}: {content}\n"
prompt += f"### Human: {message}\n### Assistant:"
return prompt

@staticmethod
def _convert_role(role: str) -> str:
""" Maps MQ role to FastChat internal domain """
if role == "user":
role_fastchat = "Human"
elif role == "llm":
role_fastchat = "Assistant"
else:
raise ValueError(f"role={role} is undefined, supported are: ('user', 'llm')")
return role_fastchat

def _call_score(self, prompt: str, targets: List[str]) -> List[List[float]]:
"""
Calculates logarithmic probabilities for the list of provided text sequences
Expand Down
149 changes: 5 additions & 144 deletions neon_llm_fastchat/rmq.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,165 +23,26 @@
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
from neon_mq_connector.connector import MQConnector
from neon_mq_connector.utils.rabbit_utils import create_mq_callback
from ovos_utils.log import LOG
from neon_llm_core.rmq import NeonLLMMQConnector

from neon_llm_fastchat.fastchat import FastChat
from neon_llm_fastchat.config import load_config


# TODO: make LLM MQ interface generic
class FastchatMQ(MQConnector):
class FastchatMQ(NeonLLMMQConnector):
"""
Module for processing MQ requests to Fast Chat LLM
"""

def __init__(self):
self.service_name = f'neon_llm_{self.name}'

self.ovos_config = load_config()
mq_config = self.ovos_config.get("MQ", None)
super().__init__(config=mq_config, service_name=self.service_name)
self.vhost = "/llm"

self.register_consumers()
self._model = None

def register_consumers(self):
for idx in range(self.model_config["num_parallel_processes"]):
self.register_consumer(name=f"neon_llm_{self.service_name}_ask_{idx}",
vhost=self.vhost,
queue=self.queue_ask,
callback=self.handle_request,
on_error=self.default_error_handler,)
self.register_consumer(name=f'neon_llm_{self.name}_score',
vhost=self.vhost,
queue=self.queue_score,
callback=self.handle_score_request,
on_error=self.default_error_handler,)
self.register_consumer(name=f'neon_llm_{self.name}_discussion',
vhost=self.vhost,
queue=self.queue_opinion,
callback=self.handle_opinion_request,
on_error=self.default_error_handler,)

@property
def name(self):
return "fastchat"

@property
def model_config(self):
return self.ovos_config.get(f"LLM_{self.name.upper()}", None)

@property
def queue_ask(self):
return f"{self.name}_input"

@property
def queue_score(self):
return f"{self.name}_score_input"

@property
def queue_opinion(self):
return f"{self.name}_discussion_input"

@property
def model(self):
if self._model is None:
self._model = FastChat(self.model_config)
return self._model

@create_mq_callback()
def handle_request(self, body: dict):
"""
Handles ask requests from MQ to LLM
:param body: request body (dict)
"""
message_id = body["message_id"]
routing_key = body["routing_key"]

query = body["query"]
history = body["history"]

try:
response = self.model.ask(message=query, chat_history=history)
except ValueError as err:
LOG.error(f'ValueError={err}')
response = 'Sorry, but I cannot respond to your message at the moment, please try again later'
api_response = {
"message_id": message_id,
"response": response
}
self.send_message(request_data=api_response,
queue=routing_key)
LOG.info(f"Handled ask request for message_id={message_id}")

@create_mq_callback()
def handle_score_request(self, body: dict):
"""
Handles score requests from MQ to LLM
:param body: request body (dict)
"""
message_id = body["message_id"]
routing_key = body["routing_key"]

query = body["query"]
responses = body["responses"]

if not responses:
sorted_answer_indexes = []
else:
try:
sorted_answer_indexes = self.model.get_sorted_answer_indexes(question=query, answers=responses)
except ValueError as err:
LOG.error(f'ValueError={err}')
sorted_answer_indexes = []
api_response = {
"message_id": message_id,
"sorted_answer_indexes": sorted_answer_indexes
}
self.send_message(request_data=api_response,
queue=routing_key)
LOG.info(f"Handled score request for message_id={message_id}")

@create_mq_callback()
def handle_opinion_request(self, body: dict):
"""
Handles opinion requests from MQ to LLM
:param body: request body (dict)
"""
message_id = body["message_id"]
routing_key = body["routing_key"]

query = body["query"]
options = body["options"]
responses = list(options.values())

if not responses:
opinion = "Sorry, but I got no options to choose from."
else:
try:
sorted_answer_indexes = self.model.get_sorted_answer_indexes(question=query, answers=responses)
best_respondent_nick, best_responce = list(options.items())[sorted_answer_indexes[0]]
opinion = self._ask_model_for_opinion(respondent_nick=best_respondent_nick,
question=query,
answer=best_responce)
except ValueError as err:
LOG.error(f'ValueError={err}')
opinion = "Sorry, but I experienced an issue trying to make up an opinion on this topic"

api_response = {
"message_id": message_id,
"opinion": opinion
}

self.send_message(request_data=api_response,
queue=routing_key)
LOG.info(f"Handled ask request for message_id={message_id}")

def _ask_model_for_opinion(self, respondent_nick: str, question: str, answer: str) -> str:
prompt = f'Why Answer "{answer}" to the Question "{question}" generated by Bot named "{respondent_nick}" is good?'
opinion = self.model.ask(message=prompt, chat_history=[])
LOG.info(f'Received LLM opinion={opinion}, prompt={prompt}')
return opinion
@staticmethod
def compose_opinion_prompt(respondent_nick: str, question: str, answer: str) -> str:
return f'Why Answer "{answer}" to the Question "{question}" generated by Bot named "{respondent_nick}" is good?'
4 changes: 1 addition & 3 deletions requirements/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,4 @@ ctranslate2
transformers
SentencePiece
# networking
neon-mq-connector~=0.7
ovos-utils~=0.0.32
ovos-config~=0.0.10
neon_llm_core

0 comments on commit d21d16d

Please sign in to comment.