Skip to content

Commit

Permalink
Merge pull request #322 from StampyAI/mod-the-world
Browse files Browse the repository at this point in the history
  • Loading branch information
ProducerMatt authored Sep 24, 2023
2 parents 2733916 + c48a127 commit 646f43b
Show file tree
Hide file tree
Showing 6 changed files with 117 additions and 102 deletions.
6 changes: 4 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -69,12 +69,13 @@ Not required:
- `BOT_DEV_IDS`: list of user ids of bot devs. You may want to include `BOT_VIP_IDS` here.
- `BOT_CONTROL_CHANNEL_IDS`: list of channels where control commands are accepted.
- `BOT_PRIVATE_CHANNEL_ID`: single channel where private Stampy status updates are sent
- `BOT_ERROR_CHANNEL_ID`: (defaults to private channel) low level error tracebacks from Python. with this variable they can be shunted to a seperate channel.
- `CODA_API_TOKEN`: token to access Coda. Without it, modules `Questions` and `QuestionSetter` will not be available and `StampyControls` will have limited functionality.
- `BOT_REBOOT`: how Stampy reboots himself. Unset, he only quits, expecting an external `while true` loop (like in `runstampy`/Dockerfile). Set to `exec` he will try to relaunch himself from his own CLI arguments.
- `STOP_ON_ERROR`: Dockerfile/`runstampy` only, unset `BOT_REBOOT` only. If defined, will only restart Stampy when he gets told to reboot, returning exit code 42. Any other exit code will cause the script to just stop.
- `BE_SHY`: Stamp won't respond when the message isn't specifically to him.
- `BE_SHY`: Stamp never responds when the message isn't specifically to him.
- `CHANNEL_WHITELIST`: channels Stampy is allowed to respond to messages in
- `IS_ROB_SERVER`: If defined, Rob Miles server-specific stuff is enabled. Servers other than Rob Miles Discord Server and Stampy Test Server should not enable it, Otherwise some errors are likely to occur.
- `IS_ROB_SERVER`: If defined, Rob Miles server-specific stuff is enabled. This is a convenience option for the Rob Miles sysadmins. Servers other than Rob Miles Discord Server and Stampy Test Server should not enable it, otherwise your custom config won't be read.

Specific modules (excluding LLM stuff):

Expand All @@ -93,6 +94,7 @@ LLM stuff:
- `GPT4_WHITELIST_ROLE_IDS`: if the above is unset, Stampy responds with GPT4 only for users with these roles.
- `USE_HELICONE`: if set, GPT prompts call the helicone API rather than OpenAI.
- `LLM_PROMPT`: What prompt is the language model being fed? This describes the personality and behavior of the bot.
- `DISABLE_PROMPT_MODERATION`: don't check safety of prompts for LLM

## Docker

Expand Down
163 changes: 90 additions & 73 deletions api/openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@
gpt4_whitelist_role_ids,
bot_vip_ids,
paid_service_all_channels,
use_helicone
use_helicone,
disable_prompt_moderation
)
from structlog import get_logger
from servicemodules.serviceConstants import Services, openai_channel_ids
Expand All @@ -22,7 +23,16 @@
from helicone import openai_proxy as openai
else:
import openai
from openai import Moderation
import discord
import requests
import json # moderation response dump

CURL_REQUEST: bool # helicone breaks some moderation attribute of openai module
if use_helicone:
CURL_REQUEST = True
else:
CURL_REQUEST = False

openai.api_key = openai_api_key
start_sequence = "\nA:"
Expand All @@ -46,78 +56,85 @@ def is_channel_allowed(self, message: ServiceMessage) -> bool:
return True
else:
return False
def cf_risk_level(self, prompt):
"""Ask the openai content filter if the prompt is risky
Returns:
0 - The text is safe.
1 - This text is sensitive.
2 - This text is unsafe.
def is_text_risky(self, text: str) -> bool:
"""Ask the openai moderation endpoint if the text is risky
See https://beta.openai.com/docs/engines/content-filter for details"""
See https://platform.openai.com/docs/guides/moderation/quickstart for details"""

try:
response = openai.Completion.create(
engine="content-filter-alpha",
prompt="<|endoftext|>" + prompt + "\n--\nLabel:",
temperature=0,
max_tokens=1,
top_p=0,
logprobs=10,
)
except openai.error.AuthenticationError as e:
self.log.error(self.class_name, error="OpenAI Authentication Failed")
loop = asyncio.get_running_loop()
loop.create_task(utils.log_error(f"OpenAI Authenication Failed"))
loop.create_task(utils.log_exception(e))
return 2
except openai.error.RateLimitError as e:
self.log.warning(self.class_name, error="OpenAI Rate Limit Exceeded")
loop = asyncio.get_running_loop()
loop.create_task(utils.log_error(f"OpenAI Rate Limit Exceeded"))
loop.create_task(utils.log_exception(e))
return 2

output_label = response["choices"][0]["text"]

# This is the probability at which we evaluate that a "2" is likely real
# vs. should be discarded as a false positive
toxic_threshold = -0.355

if output_label == "2":
# If the model returns "2", return its confidence in 2 or other output-labels
logprobs = response["choices"][0]["logprobs"]["top_logprobs"][0]

# If the model is not sufficiently confident in "2",
# choose the most probable of "0" or "1"
# Guaranteed to have a confidence for 2 since this was the selected token.
if logprobs["2"] < toxic_threshold:
logprob_0 = logprobs.get("0", None)
logprob_1 = logprobs.get("1", None)

# If both "0" and "1" have probabilities, set the output label
# to whichever is most probable
if logprob_0 is not None and logprob_1 is not None:
if logprob_0 >= logprob_1:
output_label = "0"
else:
output_label = "1"
# If only one of them is found, set output label to that one
elif logprob_0 is not None:
output_label = "0"
elif logprob_1 is not None:
output_label = "1"

# If neither "0" or "1" are available, stick with "2"
# by leaving output_label unchanged.

# if the most probable token is none of "0", "1", or "2"
# this should be set as unsafe
if output_label not in ["0", "1", "2"]:
output_label = "2"

self.log.info(self.class_name, msg=f"Prompt is risk level {output_label}")

return int(output_label)
allowed_categories = frozenset("violence") # Can be triggered by some AI safety terms

if disable_prompt_moderation:
return False

if CURL_REQUEST:
try:
http_response = requests.post(
'https://api.openai.com/v1/moderations',
headers={
"Content-Type": "application/json",
"Authorization": f"Bearer {openai_api_key}"
},
json={
"input": text
}
)
except Exception as e:
self.log.error(self.class_name, error="Error in Requests module trying to moderate content")
loop = asyncio.get_running_loop()
loop.create_task(utils.log_error(f"Error in Requests module trying to moderate content"))
loop.create_task(utils.log_exception(e))
return True
if http_response.status_code == 401:
self.log.error(self.class_name, error="OpenAI Authentication Failed")
loop = asyncio.get_running_loop()
loop.create_task(utils.log_error(f"OpenAI Authenication Failed"))
loop.create_task(utils.log_exception(e))
return True
elif http_response.status_code == 429:
self.log.warning(self.class_name, error="OpenAI Rate Limit Exceeded")
loop = asyncio.get_running_loop()
loop.create_task(utils.log_error(f"OpenAI Rate Limit Exceeded"))
loop.create_task(utils.log_exception(e))
return True
elif http_response.status_code != 200:
self.log.warning(self.class_name, error=f"Possible issue with the OpenAI API. Status: {http_response.status_code}, Content: {http_response.text}")
loop = asyncio.get_running_loop()
loop.create_task(utils.log_error(f"Possible issue with the OpenAI API. Status: {http_response.status_code}, Content: {http_response.text}"))
return True
response = http_response.json()
else:
try:
response = Moderation.create(input=text)
except openai.error.AuthenticationError as e:
self.log.error(self.class_name, error="OpenAI Authentication Failed")
loop = asyncio.get_running_loop()
loop.create_task(utils.log_error(f"OpenAI Authenication Failed"))
loop.create_task(utils.log_exception(e))
return True
except openai.error.RateLimitError as e:
self.log.warning(self.class_name, error="OpenAI Rate Limit Exceeded")
loop = asyncio.get_running_loop()
loop.create_task(utils.log_error(f"OpenAI Rate Limit Exceeded"))
loop.create_task(utils.log_exception(e))
return True

flagged: bool = response["results"][0]["flagged"]

all_morals: frozenset[str] = ["sexual", "hate", "harassment", "self-harm", "sexual/minors", "hate/threatening", "violence/graphic", "self-harm/intent", "self-harm/instructions", "harassment/threatening", "violence"]
violated_categories = set()

if flagged:
for moral in all_morals - allowed_categories:
if response["results"][0][moral]:
violated_categories.add(moral)

if len(violated_categories) > 0:
self.log.warning(self.class_name, msg=f"Text violated these unwanted categories: {violated_categories}")
self.log.debug(self.class_name, msg=f"OpenAI moderation response: {json.dumps(response)}")
return True
else:
self.log.info(self.class_name, msg=f"Checked with content filter, it says the text looks clean")
return False

def get_engine(self, message: ServiceMessage) -> OpenAIEngines:
"""Pick the appropriate engine to respond to a message with"""
Expand All @@ -131,8 +148,8 @@ def get_engine(self, message: ServiceMessage) -> OpenAIEngines:
return OpenAIEngines.GPT_3_5_TURBO

def get_response(self, engine: OpenAIEngines, prompt: str, logit_bias: dict[int, int]) -> str:
if self.cf_risk_level(prompt) > 1:
self.log.info(self.class_name, msg="OpenAI's GPT-3 content filter thought the prompt was risky")
if self.is_text_risky(prompt):
self.log.info(self.class_name, msg="The content filter thought the prompt was risky")
return ""

try:
Expand Down
36 changes: 13 additions & 23 deletions config.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,30 +126,21 @@ def getenv_unique_set(var_name: str, default: T = frozenset()) -> Union[frozense
else:
enabled_modules = enabled_modules_var

# user-configured from dotenv
# SEE README: ENVIRONMENT VARIABLES
discord_guild: str
# Factoid.py
factoid_database_path: str
# VIPs have full access + special permissions
bot_vip_ids: frozenset
# devs have less but can do maintainence like reboot
bot_dev_roles: frozenset
bot_dev_ids: frozenset
# control channel is where maintainence commands are issued
bot_control_channel_ids: frozenset
# private channel is where stampy logging gets printed
bot_private_channel_id: Optional[str]
# NOTE: Rob's invite/member management functions, not ported yet
bot_private_channel_id: str
bot_error_channel_id: str
member_role_id: Optional[str]
# bot_reboot is how stampy reboots himself
valid_bot_reboot_options = Literal["exec", False]
bot_reboot: valid_bot_reboot_options
# GPT STUFF
paid_service_all_channels: bool
# if above is false, where can paid services be used?
paid_service_channel_ids: frozenset
paid_service_for_all: bool
# if above is false, who gets to use paid services?
paid_service_whitelist_role_ids: frozenset
gpt4: bool
gpt4_for_all: bool
Expand All @@ -158,6 +149,7 @@ def getenv_unique_set(var_name: str, default: T = frozenset()) -> Union[frozense
llm_prompt: str
be_shy: bool
channel_whitelist: Optional[frozenset[str]]
disable_prompt_moderation: bool

is_rob_server = getenv_bool("IS_ROB_SERVER")
if is_rob_server:
Expand Down Expand Up @@ -215,34 +207,31 @@ def getenv_unique_set(var_name: str, default: T = frozenset()) -> Union[frozense
llm_prompt = getenv("LLM_PROMPT", default=stampy_default_prompt)
be_shy = getenv_bool("BE_SHY")
channel_whitelist = None
bot_error_channel_id = {
"production": "1017527224540344380",
"development": "1017531179664150608"
}[ENVIRONMENT_TYPE]
disable_prompt_moderation = False
else:
# user-configured from dotenv
# SEE README: ENVIRONMENT VARIABLES
discord_guild = getenv("DISCORD_GUILD")
# Factoid.py
factoid_database_path = getenv(
"FACTOID_DATABASE_PATH", default="./database/Factoids.db"
)
# VIPs have full access + special permissions
bot_vip_ids = getenv_unique_set("BOT_VIP_IDS", frozenset())
# devs have less but can do maintainence like reboot
bot_dev_roles = getenv_unique_set("BOT_DEV_ROLES", frozenset())
bot_dev_ids = getenv_unique_set("BOT_DEV_IDS", frozenset())
# control channel is where maintainence commands are issued
bot_control_channel_ids = getenv_unique_set("BOT_CONTROL_CHANNEL_IDS", frozenset())
# private channel is where stampy logging gets printed
bot_private_channel_id = getenv("BOT_PRIVATE_CHANNEL_ID", default=None)
bot_private_channel_id = getenv("BOT_PRIVATE_CHANNEL_ID")
bot_error_channel_id = getenv("BOT_ERROR_CHANNEL_ID", bot_private_channel_id)
# NOTE: Rob's invite/member management functions, not ported yet
member_role_id = getenv("MEMBER_ROLE_ID", default=None)
# bot_reboot is how stampy reboots himself
bot_reboot = cast(valid_bot_reboot_options, getenv("BOT_REBOOT", default=False))
# GPT STUFF
paid_service_all_channels = getenv_bool("PAID_SERVICE_ALL_CHANNELS")
# if above is false, where can paid services be used?
paid_service_channel_ids = getenv_unique_set(
"PAID_SERVICE_CHANNEL_IDS", frozenset()
)
paid_service_for_all = getenv_bool("PAID_SERVICE_FOR_ALL")
# if above is false, who gets to use paid services?
paid_service_whitelist_role_ids = getenv_unique_set(
"PAID_SERVICE_ROLE_IDS", frozenset()
)
Expand All @@ -253,6 +242,7 @@ def getenv_unique_set(var_name: str, default: T = frozenset()) -> Union[frozense
llm_prompt = getenv("LLM_PROMPT", default=stampy_default_prompt)
be_shy = getenv_bool("BE_SHY")
channel_whitelist = getenv_unique_set("CHANNEL_WHITELIST", None)
disable_prompt_moderation = getenv_bool("DISABLE_PROMPT_MODERATION")

discord_token: str = getenv("DISCORD_TOKEN")
database_path: str = getenv("DATABASE_PATH")
Expand Down
6 changes: 6 additions & 0 deletions modules/chatgpt.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,12 @@ async def chatgpt_chat(self, message: ServiceMessage) -> Response:
im = default_italics_mark

if self.openai.is_channel_allowed(message):
if self.openai.is_text_risky(message.clean_content):
return Response(
confidence=0,
text="",
why="GPT-3's content filter thought the prompt was risky",
)
self.log.info(
self.class_name,
msg=f"sending chat prompt to chatgpt, engine {engine} ({engine.description})",
Expand Down
2 changes: 1 addition & 1 deletion modules/gpt3module.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,7 @@ async def gpt3_question(self, message: ServiceMessage) -> Response:
self.log.info(self.class_name, status="Asking GPT-3")
prompt = self.start_prompt + text + start_sequence

if self.openai.cf_risk_level(prompt) > 1:
if self.openai.is_text_risky(text):
return Response(
confidence=0,
text="",
Expand Down
6 changes: 3 additions & 3 deletions utilities/utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,12 +39,12 @@
bot_vip_ids,
paid_service_for_all,
paid_service_whitelist_role_ids,
be_shy
be_shy,
bot_error_channel_id
)
from database.database import Database
from servicemodules.discordConstants import (
wiki_feed_channel_id,
stampy_error_log_channel_id,
)
from servicemodules.serviceConstants import Services
from utilities.discordutils import DiscordUser, user_has_role
Expand Down Expand Up @@ -291,7 +291,7 @@ async def log_exception(
def error_channel(self) -> discord.channel.TextChannel:
return cast(
discord.channel.TextChannel,
self.client.get_channel(int(stampy_error_log_channel_id)),
self.client.get_channel(int(bot_error_channel_id)),
)

async def log_error(self, error_message: str) -> None:
Expand Down

0 comments on commit 646f43b

Please sign in to comment.