From e9ae09b702e9508a5b82e77b2fcfbb9236e0aa79 Mon Sep 17 00:00:00 2001
From: Kyrylo Hrymailo <kyrylo.hrymailo@tripledotstudios.com>
Date: Mon, 7 Aug 2023 16:12:59 +0200
Subject: [PATCH 1/5] added neon-llm-core dependency

---
 neon_llm_fastchat/fastchat.py |  36 +++-----
 neon_llm_fastchat/rmq.py      | 149 ++--------------------------------
 requirements/requirements.txt |   4 +-
 3 files changed, 17 insertions(+), 172 deletions(-)

diff --git a/neon_llm_fastchat/fastchat.py b/neon_llm_fastchat/fastchat.py
index f233ab0..46b8830 100644
--- a/neon_llm_fastchat/fastchat.py
+++ b/neon_llm_fastchat/fastchat.py
@@ -23,25 +23,28 @@
 # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 # SOFTWARE,  EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+import ctranslate2
+import numpy as np
 
 from typing import List
-
-import ctranslate2
 from transformers import T5Tokenizer
 from huggingface_hub import snapshot_download
-import numpy as np
+from neon_llm_core.llm import NeonLLM
 
 
-# TODO: make LLM interface generic
-class FastChat:
+class FastChat(NeonLLM):
+
+    mq_to_llm_role = {
+        "user": "Human",
+        "llm": "Assistant"
+    }
 
     def __init__(self, config):
+        super().__init__(config)
         self.context_depth = config["context_depth"]
         self.max_tokens = config["max_tokens"]
         self.num_parallel_processes = config["num_parallel_processes"]
         self.num_threads_per_process = config["num_threads_per_process"]
-        self._tokenizer = None
-        self._model = None
 
     @property
     def tokenizer(self) -> T5Tokenizer:
@@ -76,12 +79,6 @@ def _system_prompt(self) -> str:
                "geothermal, and biomass. Non-renewable energy sources, on the other hand, " \
                "are finite and will eventually be depleted, such as coal, oil, and natural gas.\n"
 
-    def ask(self, message: str, chat_history: List[List[str]]) -> str:
-        """ Generates llm response based on user message and (user, llm) chat history """
-        prompt = self._assemble_prompt(message, chat_history)
-        llm_text_output = self._call_model(prompt)
-        return llm_text_output
-
     def get_sorted_answer_indexes(self, question: str, answers: List[str]) -> List[int]:
         """
             Creates sorted list of answer indexes with respect to order provided in :param answers based on PPL score
@@ -130,22 +127,11 @@ def _assemble_prompt(self, message: str, chat_history: List[List[str]]) -> str:
         prompt = self._system_prompt
         # Context N messages
         for role, content in chat_history[-self.context_depth:]:
-            role_fastchat = self._convert_role(role)
+            role_fastchat = self.convert_role(role)
             prompt += f"### {role_fastchat}: {content}\n"
         prompt += f"### Human: {message}\n### Assistant:"
         return prompt
 
-    @staticmethod
-    def _convert_role(role: str) -> str:
-        """ Maps MQ role to FastChat internal domain """
-        if role == "user":
-            role_fastchat = "Human"
-        elif role == "llm":
-            role_fastchat = "Assistant"
-        else:
-            raise ValueError(f"role={role} is undefined, supported are: ('user', 'llm')")
-        return role_fastchat
-
     def _call_score(self, prompt: str, targets: List[str]) -> List[List[float]]:
         """
             Calculates logarithmic probabilities for the list of provided text sequences
diff --git a/neon_llm_fastchat/rmq.py b/neon_llm_fastchat/rmq.py
index 111fe6b..952abbe 100644
--- a/neon_llm_fastchat/rmq.py
+++ b/neon_llm_fastchat/rmq.py
@@ -23,165 +23,26 @@
 # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 # SOFTWARE,  EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-from neon_mq_connector.connector import MQConnector
-from neon_mq_connector.utils.rabbit_utils import create_mq_callback
-from ovos_utils.log import LOG
+from neon_llm_core.rmq import NeonLLMMQConnector
 
 from neon_llm_fastchat.fastchat import FastChat
-from neon_llm_fastchat.config import load_config
 
 
-# TODO: make LLM MQ interface generic
-class FastchatMQ(MQConnector):
+class FastchatMQ(NeonLLMMQConnector):
     """
         Module for processing MQ requests to Fast Chat LLM
     """
-
-    def __init__(self):
-        self.service_name = f'neon_llm_{self.name}'
-
-        self.ovos_config = load_config()
-        mq_config = self.ovos_config.get("MQ", None)
-        super().__init__(config=mq_config, service_name=self.service_name)
-        self.vhost = "/llm"
-
-        self.register_consumers()
-        self._model = None
-
-    def register_consumers(self):
-        for idx in range(self.model_config["num_parallel_processes"]):
-            self.register_consumer(name=f"neon_llm_{self.service_name}_ask_{idx}",
-                                   vhost=self.vhost,
-                                   queue=self.queue_ask,
-                                   callback=self.handle_request,
-                                   on_error=self.default_error_handler,)
-        self.register_consumer(name=f'neon_llm_{self.name}_score',
-                               vhost=self.vhost,
-                               queue=self.queue_score,
-                               callback=self.handle_score_request,
-                               on_error=self.default_error_handler,)
-        self.register_consumer(name=f'neon_llm_{self.name}_discussion',
-                               vhost=self.vhost,
-                               queue=self.queue_opinion,
-                               callback=self.handle_opinion_request,
-                               on_error=self.default_error_handler,)
     
     @property
     def name(self):
         return "fastchat"
 
-    @property
-    def model_config(self):
-        return self.ovos_config.get(f"LLM_{self.name.upper()}", None)
-    
-    @property
-    def queue_ask(self):
-        return f"{self.name}_input"
-    
-    @property
-    def queue_score(self):
-        return f"{self.name}_score_input"
-    
-    @property
-    def queue_opinion(self):
-        return f"{self.name}_discussion_input"
-
     @property
     def model(self):
         if self._model is None:
             self._model = FastChat(self.model_config)
         return self._model
 
-    @create_mq_callback()
-    def handle_request(self, body: dict):
-        """
-            Handles ask requests from MQ to LLM
-            :param body: request body (dict)
-        """
-        message_id = body["message_id"]
-        routing_key = body["routing_key"]
-
-        query = body["query"]
-        history = body["history"]
-
-        try:
-            response = self.model.ask(message=query, chat_history=history)
-        except ValueError as err:
-            LOG.error(f'ValueError={err}')
-            response = 'Sorry, but I cannot respond to your message at the moment, please try again later'
-        api_response = {
-            "message_id": message_id,
-            "response": response
-        }
-        self.send_message(request_data=api_response,
-                          queue=routing_key)
-        LOG.info(f"Handled ask request for message_id={message_id}")
-
-    @create_mq_callback()
-    def handle_score_request(self, body: dict):
-        """
-            Handles score requests from MQ to LLM
-            :param body: request body (dict)
-        """
-        message_id = body["message_id"]
-        routing_key = body["routing_key"]
-
-        query = body["query"]
-        responses = body["responses"]
-
-        if not responses:
-            sorted_answer_indexes = []
-        else:
-            try:
-                sorted_answer_indexes = self.model.get_sorted_answer_indexes(question=query, answers=responses)
-            except ValueError as err:
-                LOG.error(f'ValueError={err}')
-                sorted_answer_indexes = []
-        api_response = {
-            "message_id": message_id,
-            "sorted_answer_indexes": sorted_answer_indexes
-        }
-        self.send_message(request_data=api_response,
-                          queue=routing_key)
-        LOG.info(f"Handled score request for message_id={message_id}")
-
-    @create_mq_callback()
-    def handle_opinion_request(self, body: dict):
-        """
-            Handles opinion requests from MQ to LLM
-            :param body: request body (dict)
-        """
-        message_id = body["message_id"]
-        routing_key = body["routing_key"]
-
-        query = body["query"]
-        options = body["options"]
-        responses = list(options.values())
-
-        if not responses:
-            opinion = "Sorry, but I got no options to choose from."
-        else:
-            try:
-                sorted_answer_indexes = self.model.get_sorted_answer_indexes(question=query, answers=responses)
-                best_respondent_nick, best_responce = list(options.items())[sorted_answer_indexes[0]]
-                opinion = self._ask_model_for_opinion(respondent_nick=best_respondent_nick,
-                                                      question=query,
-                                                      answer=best_responce)
-            except ValueError as err:
-                LOG.error(f'ValueError={err}')
-                opinion = "Sorry, but I experienced an issue trying to make up an opinion on this topic"
-
-        api_response = {
-            "message_id": message_id,
-            "opinion": opinion
-        }
-
-        self.send_message(request_data=api_response,
-                          queue=routing_key)
-        LOG.info(f"Handled ask request for message_id={message_id}")
-
-    def _ask_model_for_opinion(self, respondent_nick: str, question: str, answer: str) -> str:
-        prompt = f'Why Answer "{answer}" to the Question "{question}" generated by Bot named "{respondent_nick}" is good?'
-        opinion = self.model.ask(message=prompt, chat_history=[])
-        LOG.info(f'Received LLM opinion={opinion}, prompt={prompt}')
-        return opinion
+    @staticmethod
+    def compose_opinion_prompt(respondent_nick: str, question: str, answer: str) -> str:
+        return f'Why Answer "{answer}" to the Question "{question}" generated by Bot named "{respondent_nick}" is good?'
diff --git a/requirements/requirements.txt b/requirements/requirements.txt
index 8191b21..1d175d1 100644
--- a/requirements/requirements.txt
+++ b/requirements/requirements.txt
@@ -3,6 +3,4 @@ ctranslate2
 transformers
 SentencePiece
 # networking
-neon-mq-connector~=0.7
-ovos-utils~=0.0.32
-ovos-config~=0.0.10
\ No newline at end of file
+neon_llm_core @ git+https://github.com/NeonGeckoCom/neon-llm-core
\ No newline at end of file

From fd0423124d4afe3718a9498ca0193748fded4560 Mon Sep 17 00:00:00 2001
From: Kyrylo Hrymailo <kyrylo.hrymailo@tripledotstudios.com>
Date: Mon, 7 Aug 2023 17:44:37 +0200
Subject: [PATCH 2/5] added installing of git in Dockerfile

---
 Dockerfile | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Dockerfile b/Dockerfile
index 5eb3671..ffcfecb 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -10,6 +10,8 @@ COPY docker_overlay/ /
 
 WORKDIR /app
 COPY . /app
+RUN apt-get update; \
+    apt-get install -y git;
 RUN pip install /app
 
 CMD [ "neon-llm-fastchat" ]
\ No newline at end of file

From 820485765d6b3ebdbd39dce3e4b9892747caf806 Mon Sep 17 00:00:00 2001
From: NeonBohdan <bohdan@neon.ai>
Date: Tue, 8 Aug 2023 18:06:21 +0300
Subject: [PATCH 3/5] Reverted a package installation from github

---
 Dockerfile                    | 2 --
 requirements/requirements.txt | 2 +-
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index ffcfecb..5eb3671 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -10,8 +10,6 @@ COPY docker_overlay/ /
 
 WORKDIR /app
 COPY . /app
-RUN apt-get update; \
-    apt-get install -y git;
 RUN pip install /app
 
 CMD [ "neon-llm-fastchat" ]
\ No newline at end of file
diff --git a/requirements/requirements.txt b/requirements/requirements.txt
index 1d175d1..d2ff363 100644
--- a/requirements/requirements.txt
+++ b/requirements/requirements.txt
@@ -3,4 +3,4 @@ ctranslate2
 transformers
 SentencePiece
 # networking
-neon_llm_core @ git+https://github.com/NeonGeckoCom/neon-llm-core
\ No newline at end of file
+neon_llm_core
\ No newline at end of file

From 8374800564e38def8e7d65bc8c09117e32da60e4 Mon Sep 17 00:00:00 2001
From: NeonBohdan <bohdan@neon.ai>
Date: Mon, 14 Aug 2023 17:55:12 +0300
Subject: [PATCH 4/5] Removed config.py as code duplicate

---
 neon_llm_fastchat/config.py | 50 -------------------------------------
 1 file changed, 50 deletions(-)
 delete mode 100644 neon_llm_fastchat/config.py

diff --git a/neon_llm_fastchat/config.py b/neon_llm_fastchat/config.py
deleted file mode 100644
index 2787463..0000000
--- a/neon_llm_fastchat/config.py
+++ /dev/null
@@ -1,50 +0,0 @@
-# NEON AI (TM) SOFTWARE, Software Development Kit & Application Development System
-# All trademark and other rights reserved by their respective owners
-# Copyright 2008-2021 Neongecko.com Inc.
-# BSD-3
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-# 1. Redistributions of source code must retain the above copyright notice,
-#    this list of conditions and the following disclaimer.
-# 2. Redistributions in binary form must reproduce the above copyright notice,
-#    this list of conditions and the following disclaimer in the documentation
-#    and/or other materials provided with the distribution.
-# 3. Neither the name of the copyright holder nor the names of its
-#    contributors may be used to endorse or promote products derived from this
-#    software without specific prior written permission.
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
-# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
-# CONTRIBUTORS  BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
-# OR PROFITS;  OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-# SOFTWARE,  EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import json
-
-from os.path import join, dirname, isfile
-from ovos_utils.log import LOG
-from ovos_config.config import Configuration
-
-
-def load_config() -> dict:
-    """
-    Load and return a configuration object,
-    """
-    legacy_config_path = "/app/app/config.json"
-    if isfile(legacy_config_path):
-        LOG.warning(f"Deprecated configuration found at {legacy_config_path}")
-        with open(legacy_config_path) as f:
-            config = json.load(f)
-        return config
-    config = Configuration()
-    if not config:
-        LOG.warning(f"No configuration found! falling back to defaults")
-        default_config_path = join(dirname(__file__), "default_config.json")
-        with open(default_config_path) as f:
-            config = json.load(f)
-    return config

From fbca43df6e3736f09f4de1ecdaed71f7a5e66d8d Mon Sep 17 00:00:00 2001
From: NeonBohdan <bohdan@neon.ai>
Date: Mon, 14 Aug 2023 17:55:12 +0300
Subject: [PATCH 5/5] Fixed default LLM config name

---
 docker_overlay/etc/neon/diana.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker_overlay/etc/neon/diana.yaml b/docker_overlay/etc/neon/diana.yaml
index 468c16c..3c30785 100644
--- a/docker_overlay/etc/neon/diana.yaml
+++ b/docker_overlay/etc/neon/diana.yaml
@@ -14,7 +14,7 @@ MQ:
     mq_handler:
       user: neon_api_utils
       password: Klatchat2021
-FastChat:
+LLM_FASTCHAT:
   model: "fastchat"
   context_depth: 3
   max_tokens: 256