thenewboston-developers · dmugtasimov · Aug 9, 2024 · Aug 8, 2024 · dmugtasimov · Aug 8, 2024
diff --git a/.github/workflows/quality-assurance.yml b/.github/workflows/quality-assurance.yml
@@ -8,7 +8,7 @@ jobs:
 
     # TODO(dmu) LOW: Consider using Debian Buster (the same as docker image is based on) if it is easy to do
     runs-on: ubuntu-latest
-    container: python:3.10.4
+    container: python:3.10.13
 
     services:
       # TODO(dmu) LOW: This section duplicates services already defined in `docker-compose.yml`.

diff --git a/Dockerfile b/Dockerfile
@@ -1,4 +1,4 @@
-FROM python:3.10.4-buster
+FROM python:3.10.13-buster
 
 WORKDIR /opt/project
 

diff --git a/README.md b/README.md
@@ -7,7 +7,10 @@
 
 # Initial Project Setup
 
-1. Install Poetry
+1. Install Python version 3.10.13 and make sure it is being used in the following steps and later during development
+   (it is recommended to use [pyenv](https://github.com/pyenv/pyenv) for Python versions management)
+
+2. Install Poetry
 
 ```bash
 export PIP_REQUIRED_VERSION=24.2
@@ -18,21 +21,21 @@ poetry config virtualenvs.path ${HOME}/.virtualenvs && \
 poetry run pip install pip==${PIP_REQUIRED_VERSION}
 ```
 
-2. Clone the Repository
+3. Clone the Repository
 
 ```bash
 git clone https://github.com/thenewboston-developers/thenewboston-Backend.git
 ```
 
-3. Copy the settings templates into a new local directory:
+4. Copy the settings templates into a new local directory:
 
 ```bash
 mkdir -p local
 cp thenewboston/project/settings/templates/settings.dev.py ./local/settings.dev.py
 cp thenewboston/project/settings/templates/settings.unittests.py ./local/settings.unittests.py
 ```
 
-4. Install / upgrade docker as described at https://docs.docker.com/engine/install/
+5. Install / upgrade docker as described at https://docs.docker.com/engine/install/
 ```bash
 # Known working versions described in the comments below 
 
@@ -42,14 +45,14 @@ docker --version # Docker version 26.0.1, build d260a54
 docker compose version # Docker Compose version v2.26.1
 ```
 
-5. Commands for setting up local environment. Run the following commands:
+6. Commands for setting up local environment. Run the following commands:
 
 ```bash
 make run-dependencies  # Sets up the necessary Docker containers for Redis and PostgreSQL
 make update            # Installs project dependencies, pre-commit and applies database migrations
 ```
 
-6. Fire Up the Server 🚀
+7. Fire Up the Server 🚀
 
 ```bash
 make run-server       # Starts the Django development server

diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -36,6 +36,7 @@ sentry-sdk = {extras = ["django"], version = "^1.45.0"}
 django-restql = "^0.15.4"
 discord-py = "^2.4.0"
 promptlayer = "^1.0.9"
+anthropic = "^0.32.0"
 
 [tool.poetry.group.dev.dependencies]
 colorlog = "^6.7.0"

diff --git a/thenewboston/art/views/openai_image.py b/thenewboston/art/views/openai_image.py
@@ -2,7 +2,7 @@
 from rest_framework.permissions import IsAuthenticated
 from rest_framework.response import Response
 
-from thenewboston.general.clients.openai import OpenAIClient
+from thenewboston.general.clients.llm import LLMClient
 from thenewboston.general.constants import OPENAI_IMAGE_CREATION_FEE
 from thenewboston.general.enums import MessageType
 from thenewboston.wallets.consumers.wallet import WalletConsumer
@@ -23,7 +23,7 @@ def create(self, request):
             description = serializer.validated_data['description']
             quantity = serializer.validated_data['quantity']
 
-            response = OpenAIClient.get_instance().generate_image(prompt=description, quantity=quantity)
+            response = LLMClient.get_instance().generate_image(prompt=description, quantity=quantity)
 
             self.charge_image_creation_fee(request.user, quantity)
 

diff --git a/thenewboston/contributions/models/contribution.py b/thenewboston/contributions/models/contribution.py
@@ -4,7 +4,7 @@
 from django.db import models
 from django.utils import timezone
 
-from thenewboston.general.clients.openai import OpenAIClient
+from thenewboston.general.clients.llm import LLMClient, make_prompt_kwargs
 from thenewboston.general.models import CreatedModified
 from thenewboston.general.utils.transfers import change_wallet_balance
 
@@ -69,11 +69,11 @@ def assess(self, save=True):
                 assessment_points = pull.assessment_points
                 assessment_explanation = pull.assessment_explanation
             case ContributionType.MANUAL.value:
-                result = OpenAIClient.get_instance().get_chat_completion(
-                    settings.GITHUB_MANUAL_CONTRIBUTION_ASSESSMENT_PROMPT_NAME,
+                result = LLMClient.get_instance().get_chat_completion(
                     input_variables={'description': self.description},
                     tracked_user=self.user,
                     tags=['manual_contribution_assessment'],
+                    **make_prompt_kwargs(settings.GITHUB_MANUAL_CONTRIBUTION_ASSESSMENT_PROMPT_NAME),
                 )
                 assessment_points = result['assessment']
                 assessment_explanation = result['explanation']

diff --git a/thenewboston/discord/bot.py b/thenewboston/discord/bot.py
@@ -11,7 +11,7 @@
 from django.conf import settings  # noqa: E402
 from django.contrib.auth import get_user_model  # noqa: E402
 
-from thenewboston.general.clients.openai import OpenAIClient  # noqa: E402
+from thenewboston.general.clients.llm import LLMClient, make_prompt_kwargs  # noqa: E402
 
 logger = logging.getLogger(__name__)
 
@@ -20,6 +20,49 @@
 
 bot = commands.Bot('/', intents=intents)
 
+# TODO(dmu) HIGH: Cover bot logic with unittests: it is already complex enough
+
+
+def is_ia(author):
+    return author.id == settings.IA_DISCORD_USER_ID
+
+
+def map_author_plaintext(author):
+    return 'ia' if is_ia(author) else author.name
+
+
+def map_author_structured(author):
+    return 'assistant' if is_ia(author) else 'user'
+
+
+def messages_to_plaintext(messages):
+    return '\n'.join(f'{map_author_plaintext(message.author)}: {message.content}' for message in messages)
+
+
+def messages_to_structured(messages):
+    structured_messages = []
+
+    prev_role = None
+    for message in messages:
+        content = message.content
+
+        if (role := map_author_structured(message.author)) == prev_role:
+            # We need to merge messages to prevent the following error from Anthropic
+            # messages: roles must alternate between "user" and "assistant", but found multiple "user" roles in a row
+            assert structured_messages
+            structured_messages[-1]['content'][0]['text'] += f'\n{content}'
+        else:
+            structured_messages.append({'role': role, 'content': [{'type': 'text', 'text': content}]})
+
+        prev_role = role
+
+    return structured_messages
+
+
+async def get_historical_messages(channel):
+    # TODO(dmu) MEDIUM: Filter out only author's and IA's messages from the channel?
+    return [message async for message in channel.history(limit=settings.DISCORD_MESSAGE_HISTORY_LIMIT)]
+
 
 @bot.event
 async def on_ready():
@@ -32,28 +75,25 @@ async def on_message_implementation(message):
         await message.reply('Please, register at https://thenewboston.com')
         return
 
-    # TODO(dmu) MEDIUM: Request message history just once and convert it to necessary format before LLM call
-    plain_text_message_history = await get_plain_text_message_history(message.channel)
+    messages = (await get_historical_messages(message.channel))[::-1]
 
     # TODO(dmu) HIGH: Consider making just one LLM call that will return required response if necessary
-    answer = OpenAIClient.get_instance().get_chat_completion(
-        settings.DISCORD_IS_RESPONSE_WARRANTED_PROMPT_NAME,
-        input_variables={'plain_text_message_history': plain_text_message_history},
-        tracked_user=user
+    answer = LLMClient.get_instance().get_chat_completion(
+        input_variables={'plain_text_message_history': messages_to_plaintext(messages)},
+        tracked_user=user,
+        **make_prompt_kwargs(settings.DISCORD_IS_RESPONSE_WARRANTED_PROMPT_NAME),
     )
 
     # TODO(dmu) LOW: Rename requiresResponse -> requires_response
     if answer.get('requiresResponse'):
-        historical_messages = await get_historical_messages(message.channel)
-
-        ias_response = OpenAIClient.get_instance().get_chat_completion(
-            settings.DISCORD_CREATE_RESPONSE_PROMPT_NAME,
+        ias_response = LLMClient.get_instance().get_chat_completion(
             input_variables={
-                'messages': historical_messages,
+                'messages': messages_to_structured(messages),
                 'text': message.content
             },
             tracked_user=user,
-            tags=['discord_bot_response']
+            tags=['discord_bot_response'],
+            **make_prompt_kwargs(settings.DISCORD_CREATE_RESPONSE_PROMPT_NAME)
         )
         await message.reply(ias_response)
 
@@ -72,33 +112,5 @@ async def on_message(message):
         await message.reply('Oops.. Looks like something went wrong. Our team has been notified.')
 
 
-async def get_historical_messages(channel):
-    # TODO(dmu) LOW: Make `get_historical_messages()` DRY with `get_plain_text_message_history()`
-    results = []
-
-    async for message in channel.history(limit=settings.DISCORD_MESSAGE_HISTORY_LIMIT):
-        # TODO(dmu) LOW: If `_ia` supposed to be a suffix then use .endswith(). Also put `_ia` in a named
-        #                constant or (better) custom setting
-        if '_ia' in str(message.author):
-            results.append({'role': 'assistant', 'content': [{'type': 'text', 'text': message.content}]})
-        else:
-            results.append({'role': 'user', 'content': [{'type': 'text', 'text': message.content}]})
-
-    return results[::-1]
-
-
-async def get_plain_text_message_history(channel):
-    # TODO(dmu) LOW: Make `get_plain_text_message_history()` DRY with `get_historical_messages()`
-    messages = []
-
-    async for message in channel.history(limit=settings.DISCORD_MESSAGE_HISTORY_LIMIT):
-        # TODO(dmu) LOW: If `_ia` supposed to be a suffix then use .endswith(). Also put `_ia` in a named
-        #                constant or (better) custom setting
-        author_name = 'ia' if '_ia' in str(message.author) else message.author.name
-        messages.append(f'{author_name}: {message.content}')
-
-    return '\n'.join(messages[::-1])
-
-
 if __name__ == '__main__':
     bot.run(settings.DISCORD_BOT_TOKEN, log_handler=None)
diff --git a/thenewboston/discord/tests/test_bot.py b/thenewboston/discord/tests/test_bot.py
@@ -1,11 +1,111 @@
+from collections import namedtuple
 from unittest.mock import patch
 
 import pytest
+from django.test import override_settings
 
-from thenewboston.discord.bot import on_ready
+from thenewboston.discord.bot import messages_to_structured, on_ready
+
+Author = namedtuple('Author', ['id'])
+Message = namedtuple('Message', ['author', 'content'])
 
 
 @pytest.mark.asyncio
 async def test_on_ready():
     with patch('thenewboston.discord.bot.bot'):
         await on_ready()
+
+
+@override_settings(IA_DISCORD_USER_ID=1234)
+def test_messages_to_structured():
+    assert messages_to_structured([Message(author=Author(id=1234), content='hello')]) == [{
+        'role': 'assistant',
+        'content': [{
+            'type': 'text',
+            'text': 'hello'
+        }]
+    }]
+    assert messages_to_structured([
+        Message(author=Author(id=1234), content='hello'),
+        Message(author=Author(id=1234), content='world')
+    ]) == [{
+        'role': 'assistant',
+        'content': [{
+            'type': 'text',
+            'text': 'hello\nworld'
+        }]
+    }]
+    assert messages_to_structured([
+        Message(author=Author(id=1234), content='hello'),
+        Message(author=Author(id=10), content='world')
+    ]) == [
+        {
+            'role': 'assistant',
+            'content': [{
+                'type': 'text',
+                'text': 'hello'
+            }]
+        },
+        {
+            'role': 'user',
+            'content': [{
+                'type': 'text',
+                'text': 'world'
+            }]
+        },
+    ]
+    assert messages_to_structured([
+        Message(author=Author(id=1234), content='hello'),
+        Message(author=Author(id=10), content='world'),
+        Message(author=Author(id=1234), content='bye')
+    ]) == [
+        {
+            'role': 'assistant',
+            'content': [{
+                'type': 'text',
+                'text': 'hello'
+            }]
+        },
+        {
+            'role': 'user',
+            'content': [{
+                'type': 'text',
+                'text': 'world'
+            }]
+        },
+        {
+            'role': 'assistant',
+            'content': [{
+                'type': 'text',
+                'text': 'bye'
+            }]
+        },
+    ]
+    assert messages_to_structured([
+        Message(author=Author(id=1234), content='hello'),
+        Message(author=Author(id=10), content='world'),
+        Message(author=Author(id=10), content='mine'),
+        Message(author=Author(id=1234), content='bye')
+    ]) == [
+        {
+            'role': 'assistant',
+            'content': [{
+                'type': 'text',
+                'text': 'hello'
+            }]
+        },
+        {
+            'role': 'user',
+            'content': [{
+                'type': 'text',
+                'text': 'world\nmine'
+            }]
+        },
+        {
+            'role': 'assistant',
+            'content': [{
+                'type': 'text',
+                'text': 'bye'
+            }]
+        },
+    ]