From 21ee1fe6635b087ae6fae0e95dabf0f9bd923be8 Mon Sep 17 00:00:00 2001
From: Daniel O'Connell <github@ahiru.pl>
Date: Tue, 31 Oct 2023 23:40:36 +0100
Subject: [PATCH 1/2] Fixes and refactors of OpenAI stuff

---
 api/openai.py                      | 140 ++++++++++++++---------------
 config.py                          |  54 +++++++----
 modules/chatgpt.py                 |  57 ++++++------
 modules/gpt3module.py              |  14 +--
 servicemodules/serviceConstants.py |  18 ----
 utilities/utilities.py             |  11 +--
 6 files changed, 142 insertions(+), 152 deletions(-)

diff --git a/api/openai.py b/api/openai.py
index 79db41e9..0cb6974b 100644
--- a/api/openai.py
+++ b/api/openai.py
@@ -2,20 +2,18 @@
 from api.utilities.openai import OpenAIEngines
 from config import (
     openai_api_key,
-    paid_service_channel_ids,
     gpt4,
     gpt4_for_all,
     gpt4_whitelist_role_ids,
     bot_vip_ids,
     paid_service_all_channels,
     use_helicone,
-    disable_prompt_moderation
+    disable_prompt_moderation,
+    openai_allowed_sources,
 )
 from structlog import get_logger
-from servicemodules.serviceConstants import Services, openai_channel_ids
 from utilities.serviceutils import ServiceMessage
-from utilities import utilities, Utilities
-from utilities import discordutils
+from utilities import Utilities, discordutils
 if use_helicone:
     try:
         from helicone import openai
@@ -28,11 +26,12 @@
 import requests
 import json # moderation response dump
 
-CURL_REQUEST: bool # helicone breaks some moderation attribute of openai module
-if use_helicone:
-    CURL_REQUEST = True
-else:
-    CURL_REQUEST = False
+
+OPENAI_NASTY_CATEGORIES = {
+    "sexual", "hate", "harassment", "self-harm", "sexual/minors", "hate/threatening",
+    "violence/graphic", "self-harm/intent", "self-harm/instructions",
+    "harassment/threatening", "violence"
+}
 
 openai.api_key = openai_api_key
 start_sequence = "\nA:"
@@ -46,95 +45,89 @@ def __init__(self):
         self.log = get_logger()
 
     def is_channel_allowed(self, message: ServiceMessage) -> bool:
-        if message.service in openai_channel_ids and message.channel.id in openai_channel_ids[message.service]:
-            # For Rob's discord
-            return True
-        elif paid_service_all_channels:
-            return True
-        elif message.channel.id in paid_service_channel_ids:
-            # if list is empty, default
-            return True
+        channel_id = (message.channel and message.channel.id)
+        return (
+            paid_service_all_channels or
+            channel_id in openai_allowed_sources.get(message.service.value, [])
+        )
+
+    def log_error(self, error, exception=None, warning=False):
+        if warning:
+            self.log.warning(self.class_name, error=error)
         else:
-            return False
-    def is_text_risky(self, text: str) -> bool:
-        """Ask the openai moderation endpoint if the text is risky
+            self.log.error(self.class_name, error=error)
 
-        See https://platform.openai.com/docs/guides/moderation/quickstart for details"""
+        loop = asyncio.get_running_loop()
+        loop.create_task(utils.log_error(error))
+        if exception:
+            loop.create_task(utils.log_exception(exception))
+
+    def is_text_risky(self, text: str) -> bool:
+        """Ask the openai moderation endpoint if the text is risky.
 
-        allowed_categories = frozenset("violence") # Can be triggered by some AI safety terms
+        See https://platform.openai.com/docs/guides/moderation/quickstart for details.
+        """
+        allowed_categories = {"violence"} # Can be triggered by some AI safety terms
 
         if disable_prompt_moderation:
             return False
 
-        if CURL_REQUEST:
+        response = None
+        if use_helicone:
             try:
                 http_response = requests.post(
-                        'https://api.openai.com/v1/moderations',
-                        headers={
-                            "Content-Type": "application/json",
-                            "Authorization": f"Bearer {openai_api_key}"
-                            },
-                        json={
-                            "input": text
-                            }
-                        )
+                    'https://api.openai.com/v1/moderations',
+                    headers={
+                        "Content-Type": "application/json",
+                        "Authorization": f"Bearer {openai_api_key}"
+                    },
+                    json={"input": text}
+                )
             except Exception as e:
-                self.log.error(self.class_name, error="Error in Requests module trying to moderate content")
-                loop = asyncio.get_running_loop()
-                loop.create_task(utils.log_error(f"Error in Requests module trying to moderate content"))
-                loop.create_task(utils.log_exception(e))
+                self.log_error("Error in Requests module trying to moderate content", e)
                 return True
+
             if http_response.status_code == 401:
-                self.log.error(self.class_name, error="OpenAI Authentication Failed")
-                loop = asyncio.get_running_loop()
-                loop.create_task(utils.log_error(f"OpenAI Authenication Failed"))
-                loop.create_task(utils.log_exception(e))
+                self.log_error("OpenAI Authentication Failed")
                 return True
             elif http_response.status_code == 429:
-                self.log.warning(self.class_name, error="OpenAI Rate Limit Exceeded")
-                loop = asyncio.get_running_loop()
-                loop.create_task(utils.log_error(f"OpenAI Rate Limit Exceeded"))
-                loop.create_task(utils.log_exception(e))
+                self.log_error("OpenAI Rate Limit Exceeded", warning=True)
                 return True
             elif http_response.status_code != 200:
-                self.log.warning(self.class_name, error=f"Possible issue with the OpenAI API. Status: {http_response.status_code}, Content: {http_response.text}")
-                loop = asyncio.get_running_loop()
-                loop.create_task(utils.log_error(f"Possible issue with the OpenAI API. Status: {http_response.status_code}, Content: {http_response.text}"))
+                self.log_error(
+                    f"Possible issue with the OpenAI API. Status: {http_response.status_code}, Content: {http_response.text}"
+                )
                 return True
             response = http_response.json()
+
         else:
             try:
                 response = Moderation.create(input=text)
             except openai.error.AuthenticationError as e:
-                self.log.error(self.class_name, error="OpenAI Authentication Failed")
-                loop = asyncio.get_running_loop()
-                loop.create_task(utils.log_error(f"OpenAI Authenication Failed"))
-                loop.create_task(utils.log_exception(e))
+                self.log_error("OpenAI Authentication Failed", e)
                 return True
             except openai.error.RateLimitError as e:
-                self.log.warning(self.class_name, error="OpenAI Rate Limit Exceeded")
-                loop = asyncio.get_running_loop()
-                loop.create_task(utils.log_error(f"OpenAI Rate Limit Exceeded"))
-                loop.create_task(utils.log_exception(e))
+                self.log_error(self.class_name, "OpenAI Rate Limit Exceeded", e, warning=True)
                 return True
 
-        flagged: bool = response["results"][0]["flagged"]
-
-        all_morals: frozenset[str] = ["sexual", "hate", "harassment", "self-harm", "sexual/minors", "hate/threatening", "violence/graphic", "self-harm/intent", "self-harm/instructions", "harassment/threatening", "violence"]
-        violated_categories = set()
+        results = response.get("results", [])[0]
+        if not results:
+            return False
 
-        if flagged:
-            for moral in all_morals - allowed_categories:
-                if response["results"][0][moral]:
-                    violated_categories.add(moral)
+        if not results["flagged"]:
+            self.log.info(self.class_name, msg=f"Checked with content filter, it says the text looks clean")
+            return False
 
-        if len(violated_categories) > 0:
+        violated_categories = [
+            moral for moral in OPENAI_NASTY_CATEGORIES - allowed_categories if results.get(moral)
+        ]
+        if violated_categories:
             self.log.warning(self.class_name, msg=f"Text violated these unwanted categories: {violated_categories}")
             self.log.debug(self.class_name, msg=f"OpenAI moderation response: {json.dumps(response)}")
             return True
-        else:
-            self.log.info(self.class_name, msg=f"Checked with content filter, it says the text looks clean")
-            return False
+
+        self.log.info(self.class_name, msg=f"Checked with content filter, it says the text looks clean")
+        return False
 
     def get_engine(self, message: ServiceMessage) -> OpenAIEngines:
         """Pick the appropriate engine to respond to a message with"""
@@ -153,9 +146,9 @@ def get_response(self, engine: OpenAIEngines, prompt: str, logit_bias: dict[int,
             return ""
 
         try:
-            response = openai.Completion.create(
-                engine=str(engine),
-                prompt=prompt,
+            response = openai.ChatCompletion.create(
+                model=str(engine),
+                messages=[{'role': 'user', 'content': prompt}],
                 temperature=0,
                 max_tokens=100,
                 top_p=1,
@@ -178,8 +171,9 @@ def get_response(self, engine: OpenAIEngines, prompt: str, logit_bias: dict[int,
 
         if response["choices"]:
             choice = response["choices"][0]
-            if choice["finish_reason"] == "stop" and choice["text"].strip() != "Unknown":
-                text = choice["text"].strip(". \n").split("\n")[0]
+            text = choice.get('message', {}).get('content', '').strip()
+            if choice["finish_reason"] == "stop" and text != "Unknown":
+                text = text.strip(". \n").split("\n")[0]
                 self.log.info(self.class_name, gpt_response=text)
                 return text
 
diff --git a/config.py b/config.py
index 73b4fec4..48d379ec 100644
--- a/config.py
+++ b/config.py
@@ -139,7 +139,6 @@ def getenv_unique_set(var_name: str, default: T = frozenset()) -> Union[frozense
 valid_bot_reboot_options = Literal["exec", False]
 bot_reboot: valid_bot_reboot_options
 paid_service_all_channels: bool
-paid_service_channel_ids: frozenset
 paid_service_for_all: bool
 paid_service_whitelist_role_ids: frozenset
 gpt4: bool
@@ -197,15 +196,27 @@ def getenv_unique_set(var_name: str, default: T = frozenset()) -> Union[frozense
     bot_reboot = cast(valid_bot_reboot_options, False)
     paid_service_for_all = True
     paid_service_all_channels = True
-    paid_service_channel_ids = frozenset()
+
     # NOTE: rob's approved stuff are in servicemodules/serviceConstants.py
+    from servicemodules import discordConstants
     paid_service_whitelist_role_ids = frozenset()
-    gpt4 = getenv_bool("GPT4")
-    gpt4_for_all = getenv_bool("GPT4_FOR_ALL")
-    gpt4_whitelist_role_ids = getenv_unique_set("GPT4_WHITELIST_ROLE_IDS", frozenset())
-    use_helicone = getenv_bool("USE_HELICONE")
-    llm_prompt = getenv("LLM_PROMPT", default=stampy_default_prompt)
-    be_shy = getenv_bool("BE_SHY")
+    openai_allowed_sources: dict[str, tuple[str, ...]] = {
+        "Discord": (
+            discordConstants.stampy_dev_priv_channel_id,
+            discordConstants.aligned_intelligences_only_channel_id,
+            discordConstants.ai_channel_id,
+            discordConstants.not_ai_channel_id,
+            discordConstants.events_channel_id,
+            discordConstants.projects_channel_id,
+            discordConstants.book_club_channel_id,
+            discordConstants.dialogues_with_stampy_channel_id,
+            discordConstants.meta_channel_id,
+            discordConstants.general_channel_id,
+            discordConstants.talk_to_stampy_channel_id,
+        ),
+        "Flask": ("flask_api",),
+    }
+
     channel_whitelist = None
     bot_error_channel_id = {
             "production": "1017527224540344380",
@@ -222,28 +233,35 @@ def getenv_unique_set(var_name: str, default: T = frozenset()) -> Union[frozense
     bot_dev_roles = getenv_unique_set("BOT_DEV_ROLES", frozenset())
     bot_dev_ids = getenv_unique_set("BOT_DEV_IDS", frozenset())
     bot_control_channel_ids = getenv_unique_set("BOT_CONTROL_CHANNEL_IDS", frozenset())
-    bot_private_channel_id = getenv("BOT_PRIVATE_CHANNEL_ID")
+    bot_private_channel_id = getenv("BOT_PRIVATE_CHANNEL_ID", '')
     bot_error_channel_id = getenv("BOT_ERROR_CHANNEL_ID", bot_private_channel_id)
     # NOTE: Rob's invite/member management functions, not ported yet
     member_role_id = getenv("MEMBER_ROLE_ID", default=None)
     bot_reboot = cast(valid_bot_reboot_options, getenv("BOT_REBOOT", default=False))
     paid_service_all_channels = getenv_bool("PAID_SERVICE_ALL_CHANNELS")
-    paid_service_channel_ids = getenv_unique_set(
-        "PAID_SERVICE_CHANNEL_IDS", frozenset()
-    )
+    openai_allowed_sources: dict[str, tuple[str, ...]] = {
+        "Discord": tuple(getenv_unique_set("PAID_SERVICE_CHANNEL_IDS", frozenset())),
+        "Flask": {
+            'production': tuple(),
+            'development': ("flask_api",)
+        }[ENVIRONMENT_TYPE],
+    }
+
     paid_service_for_all = getenv_bool("PAID_SERVICE_FOR_ALL")
     paid_service_whitelist_role_ids = getenv_unique_set(
         "PAID_SERVICE_ROLE_IDS", frozenset()
     )
-    gpt4 = getenv_bool("GPT4")
-    gpt4_for_all = getenv_bool("GPT4_FOR_ALL")
-    gpt4_whitelist_role_ids = getenv_unique_set("GPT4_WHITELIST_ROLE_IDS", frozenset())
-    use_helicone = getenv_bool("USE_HELICONE")
-    llm_prompt = getenv("LLM_PROMPT", default=stampy_default_prompt)
-    be_shy = getenv_bool("BE_SHY")
+
     channel_whitelist = getenv_unique_set("CHANNEL_WHITELIST", None)
     disable_prompt_moderation = getenv_bool("DISABLE_PROMPT_MODERATION")
 
+gpt4 = getenv_bool("GPT4")
+gpt4_for_all = getenv_bool("GPT4_FOR_ALL")
+gpt4_whitelist_role_ids = getenv_unique_set("GPT4_WHITELIST_ROLE_IDS", frozenset())
+use_helicone = getenv_bool("USE_HELICONE")
+llm_prompt = getenv("LLM_PROMPT", default=stampy_default_prompt)
+be_shy = getenv_bool("BE_SHY")
+
 discord_token: str = getenv("DISCORD_TOKEN")
 database_path: str = getenv("DATABASE_PATH")
 youtube_api_key: Optional[str] = getenv("YOUTUBE_API_KEY", default=None)
diff --git a/modules/chatgpt.py b/modules/chatgpt.py
index 73df6b44..814a01a4 100644
--- a/modules/chatgpt.py
+++ b/modules/chatgpt.py
@@ -140,38 +140,39 @@ async def chatgpt_chat(self, message: ServiceMessage) -> Response:
         else:
             im = default_italics_mark
 
-        if self.openai.is_channel_allowed(message):
-            if self.openai.is_text_risky(message.clean_content):
-                return Response(
-                    confidence=0,
-                    text="",
-                    why="GPT-3's content filter thought the prompt was risky",
-                )
-            self.log.info(
-                self.class_name,
-                msg=f"sending chat prompt to chatgpt, engine {engine} ({engine.description})",
-            )
-            chatcompletion = cast(
-                OpenAIObject,
-                openai.ChatCompletion.create(model=str(engine), messages=messages),
+        if not self.openai.is_channel_allowed(message):
+            self.log.info(self.class_name, msg="channel not allowed")
+            return Response()
+
+        if self.openai.is_text_risky(message.clean_content):
+            return Response(
+                confidence=0,
+                text="",
+                why="GPT-3's content filter thought the prompt was risky",
             )
-            print(chatcompletion)
-            if chatcompletion.choices:
-                response = chatcompletion.choices[0].message.content
 
-                # sometimes the response starts with "Stampy says:" or responds or replies etc, which we don't want
-                response = re.sub(r"^[sS]tampy\ ?[a-zA-Z]{,15}:\s?", "", response)
+        self.log.info(
+            self.class_name,
+            msg=f"sending chat prompt to chatgpt, engine {engine} ({engine.description})",
+        )
+        chatcompletion = cast(
+            OpenAIObject,
+            openai.ChatCompletion.create(model=str(engine), messages=messages),
+        )
+        if chatcompletion.choices:
+            response = chatcompletion.choices[0].message.content
 
-                self.log.info(self.class_name, response=response)
+            # sometimes the response starts with "Stampy says:" or responds or replies etc, which we don't want
+            response = re.sub(r"^[sS]tampy\ ?[a-zA-Z]{,15}:\s?", "", response)
 
-                if response:
-                    return Response(
-                        confidence=10,
-                        text=f"{im}{response}{im}",
-                        why="ChatGPT made me say it!",
-                    )
-        else:
-            self.log.info(self.class_name, msg="channel not allowed")
+            self.log.info(self.class_name, response=response)
+
+            if response:
+                return Response(
+                    confidence=10,
+                    text=f"{im}{response}{im}",
+                    why="ChatGPT made me say it!",
+                )
         return Response()
 
     def __str__(self):
diff --git a/modules/gpt3module.py b/modules/gpt3module.py
index 357009cb..39392d26 100644
--- a/modules/gpt3module.py
+++ b/modules/gpt3module.py
@@ -9,8 +9,8 @@
 import openai.error as oa_error
 
 from api.openai import OpenAI, OpenAIEngines
-from config import CONFUSED_RESPONSE, openai_api_key, bot_vip_ids
-from modules.module import IntegrationTest, Module, Response
+from config import openai_api_key, bot_vip_ids
+from modules.module import Module, Response
 from utilities import Utilities
 from utilities.serviceutils import ServiceChannel, ServiceMessage
 from servicemodules.serviceConstants import service_italics_marks, default_italics_mark
@@ -95,14 +95,6 @@ def message_log_append(self, message: ServiceMessage) -> None:
         self.message_logs[message.channel] = self.message_logs[message.channel][-self.log_max_messages :]  # fmt:skip
 
     def generate_chatlog_prompt(self, channel: ServiceChannel) -> str:
-        users = set()
-        for message in self.message_logs[channel]:
-            if message.author.name != "stampy":
-                users.add(message.author.name)
-        users_string = ", ".join(users)
-        if len(users) > 1:
-            users_string += ","
-
         chatlog_string = self.generate_chatlog(channel)
 
         prompt = (
@@ -166,6 +158,7 @@ def tokenize(self, engine: OpenAIEngines, data: str) -> int:
         return engine.tokenizer(data)["input_ids"][0]  # type:ignore
 
     def get_engine(self, message: ServiceMessage) -> Optional[OpenAIEngines]:
+        print('getting enginr', self.openai, self.openai.is_channel_allowed(message))
         if self.openai and self.openai.is_channel_allowed(message):
             return self.openai.get_engine(message)
 
@@ -175,6 +168,7 @@ async def gpt3_chat(self, message: ServiceMessage) -> Response:
 
         engine = self.get_engine(message)
         if not engine:
+            print('no engine')
             return Response()
 
         prompt = self.generate_chatlog_prompt(message.channel)
diff --git a/servicemodules/serviceConstants.py b/servicemodules/serviceConstants.py
index 2c6542db..7bbc1ddb 100644
--- a/servicemodules/serviceConstants.py
+++ b/servicemodules/serviceConstants.py
@@ -1,5 +1,4 @@
 from enum import Enum
-from servicemodules import discordConstants
 
 
 class Services(Enum):
@@ -20,23 +19,6 @@ def __hash__(self):
         return hash(str(self)) >> 22
 
 
-openai_channel_ids: dict[Services, tuple[str, ...]] = {
-    Services.DISCORD: (
-        discordConstants.stampy_dev_priv_channel_id,
-        discordConstants.aligned_intelligences_only_channel_id,
-        discordConstants.ai_channel_id,
-        discordConstants.not_ai_channel_id,
-        discordConstants.events_channel_id,
-        discordConstants.projects_channel_id,
-        discordConstants.book_club_channel_id,
-        discordConstants.dialogues_with_stampy_channel_id,
-        discordConstants.meta_channel_id,
-        discordConstants.general_channel_id,
-        discordConstants.talk_to_stampy_channel_id,
-    )
-}
-
-
 service_italics_marks = {
     Services.SLACK: "_",
     Services.FLASK: "",
diff --git a/utilities/utilities.py b/utilities/utilities.py
index 86c1e695..ad1880ea 100644
--- a/utilities/utilities.py
+++ b/utilities/utilities.py
@@ -569,11 +569,12 @@ def mask_quoted_text(text: str) -> str:
 
 
 def can_use_paid_service(author: ServiceUser) -> bool:
-    if paid_service_for_all:
-        return True
-    if author.id in bot_vip_ids or is_bot_dev(author):
-        return True
-    return any(user_has_role(author, x) for x in paid_service_whitelist_role_ids)
+    return (
+        paid_service_for_all or
+        is_bot_dev(author) or
+        any(user_has_role(author, x) for x in paid_service_whitelist_role_ids)
+    )
+
 
 def is_shy() -> bool:
     return be_shy

From 0fef240f0a70318adf44745ca79561d5565db59f Mon Sep 17 00:00:00 2001
From: Daniel O'Connell <github@ahiru.pl>
Date: Wed, 1 Nov 2023 12:04:29 +0100
Subject: [PATCH 2/2] change log message

---
 api/openai.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/api/openai.py b/api/openai.py
index 0cb6974b..96ff8137 100644
--- a/api/openai.py
+++ b/api/openai.py
@@ -126,7 +126,7 @@ def is_text_risky(self, text: str) -> bool:
             self.log.debug(self.class_name, msg=f"OpenAI moderation response: {json.dumps(response)}")
             return True
 
-        self.log.info(self.class_name, msg=f"Checked with content filter, it says the text looks clean")
+        self.log.info(self.class_name, msg="Checked with content filter, it doesn't violate any of our categories")
         return False
 
     def get_engine(self, message: ServiceMessage) -> OpenAIEngines: