Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Env fixes #91

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions api/.env.example
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
OPENAI_API_KEY="sk-XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
OPENAI_ORGANIZATION="" # leave blank if you're not using an organization
PINECONE_API_KEY="" # leave blank to use our online API instead
PINECONE_INDEX_NAME=""
PINECONE_NAMESPACE=""
PINECONE_ENVIRONMENT=""
LOGGING_URL="" # leave blank if you're not testing logging specifically
1 change: 1 addition & 0 deletions api/Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ verify_ssl = true
name = "pypi"

[packages]
stampy-chat = {editable = true, path = "."}
# ---- <flask stuff> ----
flask = "==1.1.2"
gunicorn = "==20.0.4"
Expand Down
837 changes: 426 additions & 411 deletions api/Pipfile.lock

Large diffs are not rendered by default.

246 changes: 0 additions & 246 deletions api/chat.py
Original file line number Diff line number Diff line change
@@ -1,246 +0,0 @@
# ------------------------------- env, constants -------------------------------
from followups import multisearch_authored
from get_blocks import get_top_k_blocks, Block

from dataclasses import asdict
from typing import List, Dict, Callable
import openai
import re
import tiktoken
import time

# OpenAI models
EMBEDDING_MODEL = "text-embedding-ada-002"
COMPLETIONS_MODEL = "gpt-3.5-turbo"
# COMPLETIONS_MODEL = "gpt-4"

STANDARD_K = 20 if COMPLETIONS_MODEL == 'gpt-4' else 10

# parameters

# NOTE: All this is approximate, there's bits I'm intentionally not counting. Leave a buffer beyond what you might expect.
NUM_TOKENS = 8191 if COMPLETIONS_MODEL == 'gpt-4' else 4095
HISTORY_FRACTION = 0.25 # the (approximate) fraction of num_tokens to use for history text before truncating
CONTEXT_FRACTION = 0.5 # the (approximate) fraction of num_tokens to use for context text before truncating

ENCODER = tiktoken.get_encoding("cl100k_base")

DEBUG_PRINT = True

def set_debug_print(val: bool):
global DEBUG_PRINT
DEBUG_PRINT = val

# --------------------------------- prompt code --------------------------------



# limit a string to a certain number of tokens
def cap(text: str, max_tokens: int) -> str:

if max_tokens <= 0: return "..."

encoded_text = ENCODER.encode(text)

if len(encoded_text) <= max_tokens: return text
else: return ENCODER.decode(encoded_text[:max_tokens]) + " ..."




def construct_prompt(query: str, mode: str, history: List[Dict[str, str]], context: List[Block]) -> List[Dict[str, str]]:

prompt = []

# History takes the format: history=[
# {"role": "user", "content": "Die monster. You don’t belong in this world!"},
# {"role": "assistant", "content": "It was not by my hand I am once again given flesh. I was called here by humans who wished to pay me tribute."},
# {"role": "user", "content": "Tribute!?! You steal men's souls and make them your slaves!"},
# {"role": "assistant", "content": "Perhaps the same could be said of all religions..."},
# {"role": "user", "content": "Your words are as empty as your soul! Mankind ill needs a savior such as you!"},
# {"role": "assistant", "content": "What is a man? A miserable little pile of secrets. But enough talk... Have at you!"},
# ]

source_prompt = "You are a helpful assistant knowledgeable about AI Alignment and Safety. " \
"Please give a clear and coherent answer to the user's questions.(written after \"Q:\") " \
"using the following sources. Each source is labeled with a letter. Feel free to " \
"use the sources in any order, and try to use multiple sources in your answers.\n\n"

token_count = len(ENCODER.encode(source_prompt))

# Context from top-k blocks
for i, block in enumerate(context):
block_str = f"[{chr(ord('a') + i)}] {block.title} - {block.author} - {block.date}\n{block.text}\n\n"
block_tc = len(ENCODER.encode(block_str))

if token_count + block_tc > int(NUM_TOKENS * CONTEXT_FRACTION):
source_prompt += cap(block_str, int(NUM_TOKENS * CONTEXT_FRACTION) - token_count)
break
else:
source_prompt += block_str
token_count += block_tc

source_prompt = source_prompt.strip();
if len(history) > 0:
source_prompt += "\n\n"\
"Before the question (\"Q: \"), there will be a history of previous questions and answers. " \
"These sources only apply to the last question. Any sources used in previous answers " \
"are invalid."

prompt.append({"role": "system", "content": source_prompt.strip()})


# Write a version of the last 10 messages into history, cutting things off when we hit the token limit.
token_count = 0
history_trnc = []
for message in history[:-10:-1]:
if message["role"] == "user":
history_trnc.append({"role": "user", "content": "Q: " + message["content"]})
token_count += len(ENCODER.encode("Q: " + message["content"]))
else:
content = cap(message["content"], int(NUM_TOKENS * HISTORY_FRACTION) - token_count)

# censor all source letters into [x]
content = re.sub(r"\[[0-9]+\]", "[x]", content)

history_trnc.append({"role": "assistant", "content": content})
token_count += len(ENCODER.encode(content))

if token_count > int(NUM_TOKENS * HISTORY_FRACTION):
break

prompt.extend(history_trnc[::-1])


question_prompt = "In your answer, please cite any claims you make back to each source " \
"using the format: [a], [b], etc. If you use multiple sources to make a claim " \
"cite all of them. For example: \"AGI is concerning [c, d, e].\"\n\n"

if mode == "concise":
question_prompt += "Answer very concisely, getting to the crux of the matter in as " \
"few words as possible. Limit your answer to 1-2 sentences.\n\n"

elif mode == "rookie":
question_prompt += "This user is new to the field of AI Alignment and Safety - don't " \
"assume they know any technical terms or jargon. Still give a complete answer " \
"without patronizing the user, but take any extra time needed to " \
"explain new concepts or to illustrate your answer with examples. "\
"Put extra effort into explaining the intuition behind concepts " \
"rather than just giving a formal definition.\n\n"

elif mode != "default": raise ValueError("Invalid mode: " + mode)


question_prompt += "Q: " + query

prompt.append({"role": "user", "content": question_prompt})

return prompt

# ------------------------------- completion code -------------------------------
import time
import json

def talk_to_robot_internal(index, query: str, mode: str, history: List[Dict[str, str]], k: int = STANDARD_K, log: Callable = print):
try:
# 1. Find the most relevant blocks from the Alignment Research Dataset
yield {"state": "loading", "phase": "semantic"}
top_k_blocks = get_top_k_blocks(index, query, k)

yield {"state": "loading", "phase": "semantic", 'citations': [{'title': block.title, 'author': block.author, 'date': block.date, 'url': block.url} for block in top_k_blocks]}

# 2. Generate a prompt
yield {"state": "loading", "phase": "prompt"}
prompt = construct_prompt(query, mode, history, top_k_blocks)

# 3. Run both the standalone query and the full prompt through
# moderation to see if it will be accepted by OpenAI's api

prompt_string = '\n\n'.join([message["content"] for message in prompt])
mod_res = openai.Moderation.create( input = [ query, prompt_string ])

if any(map(lambda x: x["flagged"], mod_res["results"])):

# this is a biiig ask of a discord webhook - put most important
# info at start such that it's more likely to not be cut off
log('-' * 80)
log("MODERATION REJECTED")
log("MODERATION RESPONSE:\n\n" + json.dumps(mod_res["results"], indent=2))
log("REJECTED QUERY: " + query)
log("REJECTED PROMPT:\n\n" + prompt_string)
log('-' * 80)

raise ValueError("This conversation was rejected by OpenAI's moderation filter. Sorry.")

# 4. Count number of tokens left for completion (-50 for a buffer)
max_tokens_completion = NUM_TOKENS - sum([len(ENCODER.encode(message["content"]) + ENCODER.encode(message["role"])) for message in prompt]) - 50

# 5. Answer the user query
yield {"state": "loading", "phase": "llm"}
t1 = time.time()
response = ''

for chunk in openai.ChatCompletion.create(
model=COMPLETIONS_MODEL,
messages=prompt,
max_tokens=max_tokens_completion,
stream=True,
temperature=0, # may or may not be a good idea
):
res = chunk["choices"][0]["delta"]
if res is not None and res.get("content") is not None:
response += res["content"]
yield {"state": "streaming", "content": res["content"]}


t2 = time.time()
print(f'Time to get response: {t2-t1:.2f}s')

if DEBUG_PRINT:
print('\n' * 10)
print(" ------------------------------ prompt: -----------------------------")
for message in prompt:
print(f"----------- {message['role']}: ------------------")
print(message['content'])

print('\n' * 10)

print(' ------------------------------ response: -----------------------------')
print(response)

log(query)
log(response)

# yield done state, possibly with followup questions
fin_json = {'state': 'done'}
followups = multisearch_authored([query, response], DEBUG_PRINT)
for i, followup in enumerate(followups):
fin_json[f'followup_{i}'] = asdict(followup)
yield fin_json

except Exception as e:
print(e)
yield {'state': 'error', 'error': str(e)}

# convert talk_to_robot_internal from dict generator into json generator
def talk_to_robot(index, query: str, mode: str, history: List[Dict[str, str]], k: int = STANDARD_K, log: Callable = print):
yield from (json.dumps(block) for block in talk_to_robot_internal(index, query, mode, history, k, log))

# wayyy simplified api
def talk_to_robot_simple(index, query: str, log: Callable = print):
res = {'response': ''}

for block in talk_to_robot_internal(index, query, "default", [], log = log):
if block['state'] == 'loading' and block['phase'] == 'semantic' and 'citations' in block:
citations = {}
for i, c in enumerate(block['citations']):
citations[chr(ord('a') + i)] = c
res['citations'] = citations

elif block['state'] == 'streaming':
res['response'] += block['content']

elif block['state'] == 'error':
res['response'] = block['error']

return json.dumps(res)
10 changes: 5 additions & 5 deletions api/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@
import json
import re

from env import PINECONE_INDEX, log
from get_blocks import get_top_k_blocks
from chat import talk_to_robot, talk_to_robot_simple
from stampy_chat.env import PINECONE_INDEX, FLASK_PORT, log
from stampy_chat.get_blocks import get_top_k_blocks
from stampy_chat.chat import talk_to_robot, talk_to_robot_simple


# ---------------------------------- web setup ---------------------------------
Expand Down Expand Up @@ -73,10 +73,10 @@ def human(id):
# ⬇️
# <a href=\"https://stampy.ai/?state=6207&question=What%20is%20%22superintelligence%22%3F\">
text = re.sub(r'<a href=\\"/\?state=(\d+.*)\\">', r'<a href=\"https://aisafety.info/?state=\1\\">', r.text)

return Response(text, mimetype='application/json')

# ------------------------------------------------------------------------------

if __name__ == '__main__':
app.run(debug=True, port=3000)
app.run(debug=True, port=FLASK_PORT)
25 changes: 25 additions & 0 deletions api/setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
from setuptools import setup, find_packages

setup(
name='Stampy-chat',
version='0.1',
packages=find_packages(where="src"),
package_dir={"": "src"},
install_requires=[
'flask',
'gunicorn',
'jinja2',
'markupsafe',
'itsdangerous',
'werkzeug',
'flask-cors',
'openai',
'numpy',
'tenacity',
'tiktoken',
'pinecone-client',
'python-dotenv',
'discord-webhook',
'requests'
],
)
Empty file added api/src/stampy_chat/__init__.py
Empty file.
Loading