From e9ae09b702e9508a5b82e77b2fcfbb9236e0aa79 Mon Sep 17 00:00:00 2001 From: Kyrylo Hrymailo Date: Mon, 7 Aug 2023 16:12:59 +0200 Subject: [PATCH 1/5] added neon-llm-core dependency --- neon_llm_fastchat/fastchat.py | 36 +++----- neon_llm_fastchat/rmq.py | 149 ++-------------------------------- requirements/requirements.txt | 4 +- 3 files changed, 17 insertions(+), 172 deletions(-) diff --git a/neon_llm_fastchat/fastchat.py b/neon_llm_fastchat/fastchat.py index f233ab0..46b8830 100644 --- a/neon_llm_fastchat/fastchat.py +++ b/neon_llm_fastchat/fastchat.py @@ -23,25 +23,28 @@ # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +import ctranslate2 +import numpy as np from typing import List - -import ctranslate2 from transformers import T5Tokenizer from huggingface_hub import snapshot_download -import numpy as np +from neon_llm_core.llm import NeonLLM -# TODO: make LLM interface generic -class FastChat: +class FastChat(NeonLLM): + + mq_to_llm_role = { + "user": "Human", + "llm": "Assistant" + } def __init__(self, config): + super().__init__(config) self.context_depth = config["context_depth"] self.max_tokens = config["max_tokens"] self.num_parallel_processes = config["num_parallel_processes"] self.num_threads_per_process = config["num_threads_per_process"] - self._tokenizer = None - self._model = None @property def tokenizer(self) -> T5Tokenizer: @@ -76,12 +79,6 @@ def _system_prompt(self) -> str: "geothermal, and biomass. Non-renewable energy sources, on the other hand, " \ "are finite and will eventually be depleted, such as coal, oil, and natural gas.\n" - def ask(self, message: str, chat_history: List[List[str]]) -> str: - """ Generates llm response based on user message and (user, llm) chat history """ - prompt = self._assemble_prompt(message, chat_history) - llm_text_output = self._call_model(prompt) - return llm_text_output - def get_sorted_answer_indexes(self, question: str, answers: List[str]) -> List[int]: """ Creates sorted list of answer indexes with respect to order provided in :param answers based on PPL score @@ -130,22 +127,11 @@ def _assemble_prompt(self, message: str, chat_history: List[List[str]]) -> str: prompt = self._system_prompt # Context N messages for role, content in chat_history[-self.context_depth:]: - role_fastchat = self._convert_role(role) + role_fastchat = self.convert_role(role) prompt += f"### {role_fastchat}: {content}\n" prompt += f"### Human: {message}\n### Assistant:" return prompt - @staticmethod - def _convert_role(role: str) -> str: - """ Maps MQ role to FastChat internal domain """ - if role == "user": - role_fastchat = "Human" - elif role == "llm": - role_fastchat = "Assistant" - else: - raise ValueError(f"role={role} is undefined, supported are: ('user', 'llm')") - return role_fastchat - def _call_score(self, prompt: str, targets: List[str]) -> List[List[float]]: """ Calculates logarithmic probabilities for the list of provided text sequences diff --git a/neon_llm_fastchat/rmq.py b/neon_llm_fastchat/rmq.py index 111fe6b..952abbe 100644 --- a/neon_llm_fastchat/rmq.py +++ b/neon_llm_fastchat/rmq.py @@ -23,165 +23,26 @@ # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -from neon_mq_connector.connector import MQConnector -from neon_mq_connector.utils.rabbit_utils import create_mq_callback -from ovos_utils.log import LOG +from neon_llm_core.rmq import NeonLLMMQConnector from neon_llm_fastchat.fastchat import FastChat -from neon_llm_fastchat.config import load_config -# TODO: make LLM MQ interface generic -class FastchatMQ(MQConnector): +class FastchatMQ(NeonLLMMQConnector): """ Module for processing MQ requests to Fast Chat LLM """ - - def __init__(self): - self.service_name = f'neon_llm_{self.name}' - - self.ovos_config = load_config() - mq_config = self.ovos_config.get("MQ", None) - super().__init__(config=mq_config, service_name=self.service_name) - self.vhost = "/llm" - - self.register_consumers() - self._model = None - - def register_consumers(self): - for idx in range(self.model_config["num_parallel_processes"]): - self.register_consumer(name=f"neon_llm_{self.service_name}_ask_{idx}", - vhost=self.vhost, - queue=self.queue_ask, - callback=self.handle_request, - on_error=self.default_error_handler,) - self.register_consumer(name=f'neon_llm_{self.name}_score', - vhost=self.vhost, - queue=self.queue_score, - callback=self.handle_score_request, - on_error=self.default_error_handler,) - self.register_consumer(name=f'neon_llm_{self.name}_discussion', - vhost=self.vhost, - queue=self.queue_opinion, - callback=self.handle_opinion_request, - on_error=self.default_error_handler,) @property def name(self): return "fastchat" - @property - def model_config(self): - return self.ovos_config.get(f"LLM_{self.name.upper()}", None) - - @property - def queue_ask(self): - return f"{self.name}_input" - - @property - def queue_score(self): - return f"{self.name}_score_input" - - @property - def queue_opinion(self): - return f"{self.name}_discussion_input" - @property def model(self): if self._model is None: self._model = FastChat(self.model_config) return self._model - @create_mq_callback() - def handle_request(self, body: dict): - """ - Handles ask requests from MQ to LLM - :param body: request body (dict) - """ - message_id = body["message_id"] - routing_key = body["routing_key"] - - query = body["query"] - history = body["history"] - - try: - response = self.model.ask(message=query, chat_history=history) - except ValueError as err: - LOG.error(f'ValueError={err}') - response = 'Sorry, but I cannot respond to your message at the moment, please try again later' - api_response = { - "message_id": message_id, - "response": response - } - self.send_message(request_data=api_response, - queue=routing_key) - LOG.info(f"Handled ask request for message_id={message_id}") - - @create_mq_callback() - def handle_score_request(self, body: dict): - """ - Handles score requests from MQ to LLM - :param body: request body (dict) - """ - message_id = body["message_id"] - routing_key = body["routing_key"] - - query = body["query"] - responses = body["responses"] - - if not responses: - sorted_answer_indexes = [] - else: - try: - sorted_answer_indexes = self.model.get_sorted_answer_indexes(question=query, answers=responses) - except ValueError as err: - LOG.error(f'ValueError={err}') - sorted_answer_indexes = [] - api_response = { - "message_id": message_id, - "sorted_answer_indexes": sorted_answer_indexes - } - self.send_message(request_data=api_response, - queue=routing_key) - LOG.info(f"Handled score request for message_id={message_id}") - - @create_mq_callback() - def handle_opinion_request(self, body: dict): - """ - Handles opinion requests from MQ to LLM - :param body: request body (dict) - """ - message_id = body["message_id"] - routing_key = body["routing_key"] - - query = body["query"] - options = body["options"] - responses = list(options.values()) - - if not responses: - opinion = "Sorry, but I got no options to choose from." - else: - try: - sorted_answer_indexes = self.model.get_sorted_answer_indexes(question=query, answers=responses) - best_respondent_nick, best_responce = list(options.items())[sorted_answer_indexes[0]] - opinion = self._ask_model_for_opinion(respondent_nick=best_respondent_nick, - question=query, - answer=best_responce) - except ValueError as err: - LOG.error(f'ValueError={err}') - opinion = "Sorry, but I experienced an issue trying to make up an opinion on this topic" - - api_response = { - "message_id": message_id, - "opinion": opinion - } - - self.send_message(request_data=api_response, - queue=routing_key) - LOG.info(f"Handled ask request for message_id={message_id}") - - def _ask_model_for_opinion(self, respondent_nick: str, question: str, answer: str) -> str: - prompt = f'Why Answer "{answer}" to the Question "{question}" generated by Bot named "{respondent_nick}" is good?' - opinion = self.model.ask(message=prompt, chat_history=[]) - LOG.info(f'Received LLM opinion={opinion}, prompt={prompt}') - return opinion + @staticmethod + def compose_opinion_prompt(respondent_nick: str, question: str, answer: str) -> str: + return f'Why Answer "{answer}" to the Question "{question}" generated by Bot named "{respondent_nick}" is good?' diff --git a/requirements/requirements.txt b/requirements/requirements.txt index 8191b21..1d175d1 100644 --- a/requirements/requirements.txt +++ b/requirements/requirements.txt @@ -3,6 +3,4 @@ ctranslate2 transformers SentencePiece # networking -neon-mq-connector~=0.7 -ovos-utils~=0.0.32 -ovos-config~=0.0.10 \ No newline at end of file +neon_llm_core @ git+https://github.com/NeonGeckoCom/neon-llm-core \ No newline at end of file From fd0423124d4afe3718a9498ca0193748fded4560 Mon Sep 17 00:00:00 2001 From: Kyrylo Hrymailo Date: Mon, 7 Aug 2023 17:44:37 +0200 Subject: [PATCH 2/5] added installing of git in Dockerfile --- Dockerfile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Dockerfile b/Dockerfile index 5eb3671..ffcfecb 100644 --- a/Dockerfile +++ b/Dockerfile @@ -10,6 +10,8 @@ COPY docker_overlay/ / WORKDIR /app COPY . /app +RUN apt-get update; \ + apt-get install -y git; RUN pip install /app CMD [ "neon-llm-fastchat" ] \ No newline at end of file From 820485765d6b3ebdbd39dce3e4b9892747caf806 Mon Sep 17 00:00:00 2001 From: NeonBohdan Date: Tue, 8 Aug 2023 18:06:21 +0300 Subject: [PATCH 3/5] Reverted a package installation from github --- Dockerfile | 2 -- requirements/requirements.txt | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/Dockerfile b/Dockerfile index ffcfecb..5eb3671 100644 --- a/Dockerfile +++ b/Dockerfile @@ -10,8 +10,6 @@ COPY docker_overlay/ / WORKDIR /app COPY . /app -RUN apt-get update; \ - apt-get install -y git; RUN pip install /app CMD [ "neon-llm-fastchat" ] \ No newline at end of file diff --git a/requirements/requirements.txt b/requirements/requirements.txt index 1d175d1..d2ff363 100644 --- a/requirements/requirements.txt +++ b/requirements/requirements.txt @@ -3,4 +3,4 @@ ctranslate2 transformers SentencePiece # networking -neon_llm_core @ git+https://github.com/NeonGeckoCom/neon-llm-core \ No newline at end of file +neon_llm_core \ No newline at end of file From 8374800564e38def8e7d65bc8c09117e32da60e4 Mon Sep 17 00:00:00 2001 From: NeonBohdan Date: Mon, 14 Aug 2023 17:55:12 +0300 Subject: [PATCH 4/5] Removed config.py as code duplicate --- neon_llm_fastchat/config.py | 50 ------------------------------------- 1 file changed, 50 deletions(-) delete mode 100644 neon_llm_fastchat/config.py diff --git a/neon_llm_fastchat/config.py b/neon_llm_fastchat/config.py deleted file mode 100644 index 2787463..0000000 --- a/neon_llm_fastchat/config.py +++ /dev/null @@ -1,50 +0,0 @@ -# NEON AI (TM) SOFTWARE, Software Development Kit & Application Development System -# All trademark and other rights reserved by their respective owners -# Copyright 2008-2021 Neongecko.com Inc. -# BSD-3 -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# 1. Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# 2. Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# 3. Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from this -# software without specific prior written permission. -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, -# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, -# OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -import json - -from os.path import join, dirname, isfile -from ovos_utils.log import LOG -from ovos_config.config import Configuration - - -def load_config() -> dict: - """ - Load and return a configuration object, - """ - legacy_config_path = "/app/app/config.json" - if isfile(legacy_config_path): - LOG.warning(f"Deprecated configuration found at {legacy_config_path}") - with open(legacy_config_path) as f: - config = json.load(f) - return config - config = Configuration() - if not config: - LOG.warning(f"No configuration found! falling back to defaults") - default_config_path = join(dirname(__file__), "default_config.json") - with open(default_config_path) as f: - config = json.load(f) - return config From fbca43df6e3736f09f4de1ecdaed71f7a5e66d8d Mon Sep 17 00:00:00 2001 From: NeonBohdan Date: Mon, 14 Aug 2023 17:55:12 +0300 Subject: [PATCH 5/5] Fixed default LLM config name --- docker_overlay/etc/neon/diana.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker_overlay/etc/neon/diana.yaml b/docker_overlay/etc/neon/diana.yaml index 468c16c..3c30785 100644 --- a/docker_overlay/etc/neon/diana.yaml +++ b/docker_overlay/etc/neon/diana.yaml @@ -14,7 +14,7 @@ MQ: mq_handler: user: neon_api_utils password: Klatchat2021 -FastChat: +LLM_FASTCHAT: model: "fastchat" context_depth: 3 max_tokens: 256