From fc44738388706b24a10696518b00b2f723538aee Mon Sep 17 00:00:00 2001 From: Alexander Joham Date: Tue, 1 Oct 2024 16:56:40 +0200 Subject: [PATCH 01/17] Add token usage monitoring in exercise chat and send to Artemis --- app/domain/data/token_usage_dto.py | 7 +++++++ app/domain/pyris_message.py | 4 ++++ app/domain/status/status_update_dto.py | 2 ++ app/llm/external/LLMTokenCount.py | 13 +++++++++++++ app/llm/external/ollama.py | 7 +++++-- app/llm/external/openai_chat.py | 17 ++++++++++++----- app/llm/langchain/iris_langchain_chat_model.py | 8 ++++++-- app/pipeline/chat/code_feedback_pipeline.py | 4 ++++ app/pipeline/chat/exercise_chat_pipeline.py | 16 ++++++++++++++-- .../chat/interaction_suggestion_pipeline.py | 3 +++ app/retrieval/lecture_retrieval.py | 4 ++++ app/web/status/status_update.py | 4 ++++ 12 files changed, 78 insertions(+), 11 deletions(-) create mode 100644 app/domain/data/token_usage_dto.py create mode 100644 app/llm/external/LLMTokenCount.py diff --git a/app/domain/data/token_usage_dto.py b/app/domain/data/token_usage_dto.py new file mode 100644 index 00000000..9159eaa9 --- /dev/null +++ b/app/domain/data/token_usage_dto.py @@ -0,0 +1,7 @@ +from pydantic import BaseModel + + +class TokenUsageDTO(BaseModel): + model_info: str + num_input_tokens: int + num_output_tokens: int \ No newline at end of file diff --git a/app/domain/pyris_message.py b/app/domain/pyris_message.py index 056f77ef..2d1ead95 100644 --- a/app/domain/pyris_message.py +++ b/app/domain/pyris_message.py @@ -16,6 +16,10 @@ class IrisMessageRole(str, Enum): class PyrisMessage(BaseModel): model_config = ConfigDict(populate_by_name=True) + num_input_tokens: int = Field(alias="numInputTokens", default=0) + num_output_tokens: int = Field(alias="numOutputTokens", default=0) + model_info: str = Field(alias="modelInfo", default="") + sent_at: datetime | None = Field(alias="sentAt", default=None) sender: IrisMessageRole contents: List[MessageContentDTO] = [] diff --git a/app/domain/status/status_update_dto.py b/app/domain/status/status_update_dto.py index bb6dc3a6..3dfa140b 100644 --- a/app/domain/status/status_update_dto.py +++ b/app/domain/status/status_update_dto.py @@ -2,8 +2,10 @@ from pydantic import BaseModel +from ..data.token_usage_dto import TokenUsageDTO from ...domain.status.stage_dto import StageDTO class StatusUpdateDTO(BaseModel): stages: List[StageDTO] + tokens: List[TokenUsageDTO] = [] diff --git a/app/llm/external/LLMTokenCount.py b/app/llm/external/LLMTokenCount.py new file mode 100644 index 00000000..8ce2fa0c --- /dev/null +++ b/app/llm/external/LLMTokenCount.py @@ -0,0 +1,13 @@ +class LLMTokenCount: + + model_info: str + num_input_tokens: int + num_output_tokens: int + + def __init__(self, model_info: str, num_input_tokens: int, num_output_tokens: int): + self.model_info = model_info + self.num_input_tokens = num_input_tokens + self.num_output_tokens = num_output_tokens + + def __str__(self): + return f"{self.model_info}: {self.num_input_tokens} in, {self.num_output_tokens} out" diff --git a/app/llm/external/ollama.py b/app/llm/external/ollama.py index 832df17c..89d126a6 100644 --- a/app/llm/external/ollama.py +++ b/app/llm/external/ollama.py @@ -57,7 +57,7 @@ def convert_to_ollama_messages(messages: list[PyrisMessage]) -> list[Message]: return messages_to_return -def convert_to_iris_message(message: Message) -> PyrisMessage: +def convert_to_iris_message(message: Message, num_input_tokens: int, num_output_tokens: int, model: str) -> PyrisMessage: """ Convert a Message to a PyrisMessage """ @@ -66,6 +66,9 @@ def convert_to_iris_message(message: Message) -> PyrisMessage: sender=map_str_to_role(message["role"]), contents=contents, send_at=datetime.now(), + num_input_tokens=num_input_tokens, + num_output_tokens=num_output_tokens, + model_info=model, ) @@ -108,7 +111,7 @@ def chat( format="json" if arguments.response_format == "JSON" else "", options=self.options, ) - return convert_to_iris_message(response["message"]) + return convert_to_iris_message(response["message"], response["prompt_eval_count"], response["eval_count"], response["model"]) def embed(self, text: str) -> list[float]: response = self._client.embeddings( diff --git a/app/llm/external/openai_chat.py b/app/llm/external/openai_chat.py index 27e2d080..da5a8c2e 100644 --- a/app/llm/external/openai_chat.py +++ b/app/llm/external/openai_chat.py @@ -2,10 +2,11 @@ import time import traceback from datetime import datetime -from typing import Literal, Any +from typing import Literal, Any, Optional from openai import OpenAI from openai.lib.azure import AzureOpenAI +from openai.types import CompletionUsage from openai.types.chat import ChatCompletionMessage, ChatCompletionMessageParam from openai.types.shared_params import ResponseFormatJSONObject @@ -61,16 +62,22 @@ def convert_to_open_ai_messages( return openai_messages -def convert_to_iris_message(message: ChatCompletionMessage) -> PyrisMessage: +def convert_to_iris_message(message: ChatCompletionMessage, usage: Optional[CompletionUsage], model: str) -> PyrisMessage: """ Convert a ChatCompletionMessage to a PyrisMessage """ - return PyrisMessage( + num_input_tokens = getattr(usage, 'prompt_tokens', -1) + num_output_tokens = getattr(usage, 'completion_tokens', -1) + + message = PyrisMessage( sender=map_str_to_role(message.role), contents=[TextMessageContentDTO(textContent=message.content)], send_at=datetime.now(), + num_input_tokens=num_input_tokens, + num_output_tokens=num_output_tokens, + model_info=model ) - + return message class OpenAIChatModel(ChatModel): model: str @@ -103,7 +110,7 @@ def chat( temperature=arguments.temperature, max_tokens=arguments.max_tokens, ) - return convert_to_iris_message(response.choices[0].message) + return convert_to_iris_message(response.choices[0].message, response.usage, response.model) except Exception as e: wait_time = initial_delay * (backoff_factor**attempt) logging.warning(f"Exception on attempt {attempt + 1}: {e}") diff --git a/app/llm/langchain/iris_langchain_chat_model.py b/app/llm/langchain/iris_langchain_chat_model.py index 9dc85d38..f4dc5443 100644 --- a/app/llm/langchain/iris_langchain_chat_model.py +++ b/app/llm/langchain/iris_langchain_chat_model.py @@ -5,9 +5,9 @@ BaseChatModel, ) from langchain_core.messages import BaseMessage -from langchain_core.outputs import ChatResult -from langchain_core.outputs.chat_generation import ChatGeneration +from langchain_core.outputs import ChatResult, ChatGeneration +from ..external.LLMTokenCount import LLMTokenCount from ...common import ( convert_iris_message_to_langchain_message, convert_langchain_message_to_iris_message, @@ -20,6 +20,7 @@ class IrisLangchainChatModel(BaseChatModel): request_handler: RequestHandler completion_args: CompletionArguments + tokens: LLMTokenCount = None def __init__( self, @@ -43,6 +44,9 @@ def _generate( iris_message = self.request_handler.chat(iris_messages, self.completion_args) base_message = convert_iris_message_to_langchain_message(iris_message) chat_generation = ChatGeneration(message=base_message) + self.tokens = LLMTokenCount(model_info=iris_message.model_info, + num_input_tokens=iris_message.num_input_tokens, + num_output_tokens=iris_message.num_output_tokens) return ChatResult(generations=[chat_generation]) @property diff --git a/app/pipeline/chat/code_feedback_pipeline.py b/app/pipeline/chat/code_feedback_pipeline.py index 8ed5d9ba..80954640 100644 --- a/app/pipeline/chat/code_feedback_pipeline.py +++ b/app/pipeline/chat/code_feedback_pipeline.py @@ -7,12 +7,14 @@ from langchain_core.runnables import Runnable from langsmith import traceable from pydantic import BaseModel +from sipbuild.generator.parser.tokens import tokens from ...domain import PyrisMessage from ...domain.data.build_log_entry import BuildLogEntryDTO from ...domain.data.feedback_dto import FeedbackDTO from ...llm import CapabilityRequestHandler, RequirementList from ...llm import CompletionArguments +from ...llm.external.LLMTokenCount import LLMTokenCount from ...llm.langchain import IrisLangchainChatModel from ...pipeline import Pipeline from ...web.status.status_update import StatusCallback @@ -40,6 +42,7 @@ class CodeFeedbackPipeline(Pipeline): callback: StatusCallback default_prompt: PromptTemplate output_parser: StrOutputParser + tokens: LLMTokenCount def __init__(self, callback: Optional[StatusCallback] = None): super().__init__(implementation_id="code_feedback_pipeline_reference_impl") @@ -141,4 +144,5 @@ def __call__( } ) ) + self.tokens = self.llm.tokens return response.replace("{", "{{").replace("}", "}}") diff --git a/app/pipeline/chat/exercise_chat_pipeline.py b/app/pipeline/chat/exercise_chat_pipeline.py index 8043e9ad..0dc994bf 100644 --- a/app/pipeline/chat/exercise_chat_pipeline.py +++ b/app/pipeline/chat/exercise_chat_pipeline.py @@ -10,6 +10,7 @@ ) from langchain_core.runnables import Runnable from langsmith import traceable, get_current_run_tree +from sipbuild.generator.parser.tokens import tokens from weaviate.collections.classes.filters import Filter from .code_feedback_pipeline import CodeFeedbackPipeline @@ -34,6 +35,7 @@ from ...domain.data.programming_submission_dto import ProgrammingSubmissionDTO from ...llm import CapabilityRequestHandler, RequirementList from ...llm import CompletionArguments +from ...llm.external.LLMTokenCount import LLMTokenCount from ...llm.langchain import IrisLangchainChatModel from ...retrieval.lecture_retrieval import LectureRetrieval from ...vector_database.database import VectorDatabase @@ -53,6 +55,7 @@ class ExerciseChatPipeline(Pipeline): suggestion_pipeline: InteractionSuggestionPipeline code_feedback_pipeline: CodeFeedbackPipeline prompt: ChatPromptTemplate + tokens: List[LLMTokenCount] def __init__(self, callback: ExerciseChatStatusCallback): super().__init__(implementation_id="exercise_chat_pipeline") @@ -78,6 +81,7 @@ def __init__(self, callback: ExerciseChatStatusCallback): self.code_feedback_pipeline = CodeFeedbackPipeline() self.pipeline = self.llm | StrOutputParser() self.citation_pipeline = CitationPipeline() + self.tokens = [] def __repr__(self): return f"{self.__class__.__name__}(llm={self.llm})" @@ -98,7 +102,7 @@ def __call__(self, dto: ExerciseChatPipelineExecutionDTO): ) self._run_exercise_chat_pipeline(dto, should_execute_lecture_pipeline), self.callback.done( - "Generated response", final_result=self.exercise_chat_response + "Generated response", final_result=self.exercise_chat_response, tokens=self.tokens ) try: @@ -112,7 +116,7 @@ def __call__(self, dto: ExerciseChatPipelineExecutionDTO): suggestion_dto.last_message = self.exercise_chat_response suggestion_dto.problem_statement = dto.exercise.problem_statement suggestions = self.suggestion_pipeline(suggestion_dto) - self.callback.done(final_result=None, suggestions=suggestions) + self.callback.done(final_result=None, suggestions=suggestions, tokens=[self.suggestion_pipeline.tokens]) else: # This should never happen but whatever self.callback.skip( @@ -200,6 +204,8 @@ def _run_exercise_chat_pipeline( if submission: try: feedback = future_feedback.result() + if self.code_feedback_pipeline.tokens is not None: + self.tokens.append(self.code_feedback_pipeline.tokens) self.prompt += SystemMessagePromptTemplate.from_template( "Another AI has checked the code of the student and has found the following issues. " "Use this information to help the student. " @@ -220,6 +226,8 @@ def _run_exercise_chat_pipeline( if should_execute_lecture_pipeline: try: self.retrieved_lecture_chunks = future_lecture.result() + if self.retriever.tokens is not None: + self.tokens.append(self.retriever.tokens) if len(self.retrieved_lecture_chunks) > 0: self._add_relevant_chunks_to_prompt( self.retrieved_lecture_chunks @@ -252,6 +260,8 @@ def _run_exercise_chat_pipeline( .with_config({"run_name": "Response Drafting"}) .invoke({}) ) + if self.llm.tokens is not None: + self.tokens.append(self.llm.tokens) self.callback.done() self.prompt = ChatPromptTemplate.from_messages( [ @@ -266,6 +276,8 @@ def _run_exercise_chat_pipeline( .with_config({"run_name": "Response Refining"}) .invoke({}) ) + if self.llm.tokens is not None: + self.tokens.append(self.llm.tokens) if "!ok!" in guide_response: print("Response is ok and not rewritten!!!") diff --git a/app/pipeline/chat/interaction_suggestion_pipeline.py b/app/pipeline/chat/interaction_suggestion_pipeline.py index 86635166..8fe60252 100644 --- a/app/pipeline/chat/interaction_suggestion_pipeline.py +++ b/app/pipeline/chat/interaction_suggestion_pipeline.py @@ -34,6 +34,7 @@ ) from ...llm import CompletionArguments +from ...llm.external.LLMTokenCount import LLMTokenCount from ...llm.langchain import IrisLangchainChatModel from ..pipeline import Pipeline @@ -52,6 +53,7 @@ class InteractionSuggestionPipeline(Pipeline): pipeline: Runnable prompt: ChatPromptTemplate variant: str + tokens: LLMTokenCount def __init__(self, variant: str = "default"): super().__init__(implementation_id="interaction_suggestion_pipeline") @@ -164,6 +166,7 @@ def __call__( self.prompt = ChatPromptTemplate.from_messages(prompt_val) response: dict = (self.prompt | self.pipeline).invoke({}) + self.tokens = self.llm.tokens return response["questions"] except Exception as e: logger.error( diff --git a/app/retrieval/lecture_retrieval.py b/app/retrieval/lecture_retrieval.py index df832ebc..c80f94bb 100644 --- a/app/retrieval/lecture_retrieval.py +++ b/app/retrieval/lecture_retrieval.py @@ -2,10 +2,12 @@ from typing import List from langsmith import traceable +from sipbuild.generator.parser.tokens import tokens from weaviate import WeaviateClient from weaviate.classes.query import Filter from ..common import convert_iris_message_to_langchain_message +from ..llm.external.LLMTokenCount import LLMTokenCount from ..llm.langchain import IrisLangchainChatModel from ..pipeline import Pipeline @@ -80,6 +82,7 @@ class LectureRetrieval(Pipeline): """ Class for retrieving lecture data from the database. """ + tokens: LLMTokenCount def __init__(self, client: WeaviateClient, **kwargs): super().__init__(implementation_id="lecture_retrieval_pipeline") @@ -236,6 +239,7 @@ def rewrite_student_query( prompt = ChatPromptTemplate.from_messages(prompt_val) try: response = (prompt | self.pipeline).invoke({}) + self.tokens = self.llm.tokens logger.info(f"Response from exercise chat pipeline: {response}") return response except Exception as e: diff --git a/app/web/status/status_update.py b/app/web/status/status_update.py index 1f497f75..a8687862 100644 --- a/app/web/status/status_update.py +++ b/app/web/status/status_update.py @@ -19,6 +19,8 @@ from ...domain.status.status_update_dto import StatusUpdateDTO import logging +from ...llm.external.LLMTokenCount import LLMTokenCount + logger = logging.getLogger(__name__) @@ -96,6 +98,7 @@ def done( message: Optional[str] = None, final_result: Optional[str] = None, suggestions: Optional[List[str]] = None, + tokens: Optional[List[LLMTokenCount]] = None, next_stage_message: Optional[str] = None, start_next_stage: bool = True, ): @@ -107,6 +110,7 @@ def done( self.stage.state = StageStateEnum.DONE self.stage.message = message self.status.result = final_result + self.status.tokens = tokens or self.status.tokens if hasattr(self.status, "suggestions"): self.status.suggestions = suggestions next_stage = self.get_next_stage() From 26e3873f4d45af31327e208d719637c0ca6b1a32 Mon Sep 17 00:00:00 2001 From: Alexander Joham Date: Fri, 11 Oct 2024 15:29:58 +0200 Subject: [PATCH 02/17] Add Pipeline enum for better tracking --- app/domain/data/token_usage_dto.py | 5 ++++- app/llm/external/LLMTokenCount.py | 9 +++++++-- app/llm/external/PipelineEnum.py | 11 +++++++++++ app/llm/langchain/iris_langchain_chat_model.py | 4 +++- app/pipeline/chat/exercise_chat_pipeline.py | 3 +++ app/pipeline/chat/interaction_suggestion_pipeline.py | 2 ++ 6 files changed, 30 insertions(+), 4 deletions(-) create mode 100644 app/llm/external/PipelineEnum.py diff --git a/app/domain/data/token_usage_dto.py b/app/domain/data/token_usage_dto.py index 9159eaa9..cc98c8af 100644 --- a/app/domain/data/token_usage_dto.py +++ b/app/domain/data/token_usage_dto.py @@ -1,7 +1,10 @@ from pydantic import BaseModel +from app.llm.external.PipelineEnum import PipelineEnum + class TokenUsageDTO(BaseModel): model_info: str num_input_tokens: int - num_output_tokens: int \ No newline at end of file + num_output_tokens: int + pipeline: PipelineEnum \ No newline at end of file diff --git a/app/llm/external/LLMTokenCount.py b/app/llm/external/LLMTokenCount.py index 8ce2fa0c..e82b02af 100644 --- a/app/llm/external/LLMTokenCount.py +++ b/app/llm/external/LLMTokenCount.py @@ -1,13 +1,18 @@ +from app.llm.external.PipelineEnum import PipelineEnum + + class LLMTokenCount: model_info: str num_input_tokens: int num_output_tokens: int + pipeline: PipelineEnum - def __init__(self, model_info: str, num_input_tokens: int, num_output_tokens: int): + def __init__(self, model_info: str, num_input_tokens: int, num_output_tokens: int, pipeline: PipelineEnum): self.model_info = model_info self.num_input_tokens = num_input_tokens self.num_output_tokens = num_output_tokens + self.pipeline = pipeline def __str__(self): - return f"{self.model_info}: {self.num_input_tokens} in, {self.num_output_tokens} out" + return f"{self.model_info}: {self.num_input_tokens} in, {self.num_output_tokens} out, {self.pipeline} pipeline" diff --git a/app/llm/external/PipelineEnum.py b/app/llm/external/PipelineEnum.py new file mode 100644 index 00000000..9efef2f2 --- /dev/null +++ b/app/llm/external/PipelineEnum.py @@ -0,0 +1,11 @@ +from enum import Enum + + +class PipelineEnum(str, Enum): + IRIS_CODE_FEEDBACK = "IRIS_CODE_FEEDBACK" + IRIS_CHAT_COURSE_MESSAGE = "IRIS_CHAT_COURSE_MESSAGE" + IRIS_CHAT_EXERCISE_MESSAGE = "IRIS_CHAT_EXERCISE_MESSAGE" + IRIS_INTERACTION_SUGGESTION = "IRIS_INTERACTION_SUGGESTION" + IRIS_CHAT_LECTURE_MESSAGE = "IRIS_CHAT_LECTURE_MESSAGE" + IRIS_COMPETENCY_GENERATION = "IRIS_COMPETENCY_GENERATION" + NOT_SET = "NOT_SET" diff --git a/app/llm/langchain/iris_langchain_chat_model.py b/app/llm/langchain/iris_langchain_chat_model.py index f4dc5443..c5cd9273 100644 --- a/app/llm/langchain/iris_langchain_chat_model.py +++ b/app/llm/langchain/iris_langchain_chat_model.py @@ -8,6 +8,7 @@ from langchain_core.outputs import ChatResult, ChatGeneration from ..external.LLMTokenCount import LLMTokenCount +from ..external.PipelineEnum import PipelineEnum from ...common import ( convert_iris_message_to_langchain_message, convert_langchain_message_to_iris_message, @@ -46,7 +47,8 @@ def _generate( chat_generation = ChatGeneration(message=base_message) self.tokens = LLMTokenCount(model_info=iris_message.model_info, num_input_tokens=iris_message.num_input_tokens, - num_output_tokens=iris_message.num_output_tokens) + num_output_tokens=iris_message.num_output_tokens, + pipeline=PipelineEnum.NOT_SET) return ChatResult(generations=[chat_generation]) @property diff --git a/app/pipeline/chat/exercise_chat_pipeline.py b/app/pipeline/chat/exercise_chat_pipeline.py index 0dc994bf..be373f5b 100644 --- a/app/pipeline/chat/exercise_chat_pipeline.py +++ b/app/pipeline/chat/exercise_chat_pipeline.py @@ -36,6 +36,7 @@ from ...llm import CapabilityRequestHandler, RequirementList from ...llm import CompletionArguments from ...llm.external.LLMTokenCount import LLMTokenCount +from ...llm.external.PipelineEnum import PipelineEnum from ...llm.langchain import IrisLangchainChatModel from ...retrieval.lecture_retrieval import LectureRetrieval from ...vector_database.database import VectorDatabase @@ -261,6 +262,7 @@ def _run_exercise_chat_pipeline( .invoke({}) ) if self.llm.tokens is not None: + self.llm.tokens.pipeline = PipelineEnum.IRIS_CHAT_EXERCISE_MESSAGE self.tokens.append(self.llm.tokens) self.callback.done() self.prompt = ChatPromptTemplate.from_messages( @@ -277,6 +279,7 @@ def _run_exercise_chat_pipeline( .invoke({}) ) if self.llm.tokens is not None: + self.llm.tokens.pipeline = PipelineEnum.IRIS_CHAT_EXERCISE_MESSAGE self.tokens.append(self.llm.tokens) if "!ok!" in guide_response: diff --git a/app/pipeline/chat/interaction_suggestion_pipeline.py b/app/pipeline/chat/interaction_suggestion_pipeline.py index 8fe60252..4a50795e 100644 --- a/app/pipeline/chat/interaction_suggestion_pipeline.py +++ b/app/pipeline/chat/interaction_suggestion_pipeline.py @@ -35,6 +35,7 @@ from ...llm import CompletionArguments from ...llm.external.LLMTokenCount import LLMTokenCount +from ...llm.external.PipelineEnum import PipelineEnum from ...llm.langchain import IrisLangchainChatModel from ..pipeline import Pipeline @@ -167,6 +168,7 @@ def __call__( response: dict = (self.prompt | self.pipeline).invoke({}) self.tokens = self.llm.tokens + self.tokens.pipeline = PipelineEnum.IRIS_INTERACTION_SUGGESTION return response["questions"] except Exception as e: logger.error( From aa50faf9a2cf443e8a1947620be69efc89fc4324 Mon Sep 17 00:00:00 2001 From: Alexander Joham Date: Fri, 11 Oct 2024 17:48:07 +0200 Subject: [PATCH 03/17] Update tokens location, add token tracking to competency and chat pipe --- app/llm/external/PipelineEnum.py | 1 + app/pipeline/chat/code_feedback_pipeline.py | 5 ++++- app/pipeline/chat/course_chat_pipeline.py | 10 +++++++++- app/pipeline/chat/exercise_chat_pipeline.py | 1 - app/pipeline/chat/lecture_chat_pipeline.py | 7 +++++++ app/pipeline/competency_extraction_pipeline.py | 11 ++++++++++- app/pipeline/pipeline.py | 4 ++++ app/pipeline/shared/citation_pipeline.py | 6 ++++++ 8 files changed, 41 insertions(+), 4 deletions(-) diff --git a/app/llm/external/PipelineEnum.py b/app/llm/external/PipelineEnum.py index 9efef2f2..d1d28cfd 100644 --- a/app/llm/external/PipelineEnum.py +++ b/app/llm/external/PipelineEnum.py @@ -8,4 +8,5 @@ class PipelineEnum(str, Enum): IRIS_INTERACTION_SUGGESTION = "IRIS_INTERACTION_SUGGESTION" IRIS_CHAT_LECTURE_MESSAGE = "IRIS_CHAT_LECTURE_MESSAGE" IRIS_COMPETENCY_GENERATION = "IRIS_COMPETENCY_GENERATION" + IRIS_CITATION_PIPELINE = "IRIS_CITATION_PIPELINE" NOT_SET = "NOT_SET" diff --git a/app/pipeline/chat/code_feedback_pipeline.py b/app/pipeline/chat/code_feedback_pipeline.py index 80954640..de6a0d90 100644 --- a/app/pipeline/chat/code_feedback_pipeline.py +++ b/app/pipeline/chat/code_feedback_pipeline.py @@ -15,6 +15,7 @@ from ...llm import CapabilityRequestHandler, RequirementList from ...llm import CompletionArguments from ...llm.external.LLMTokenCount import LLMTokenCount +from ...llm.external.PipelineEnum import PipelineEnum from ...llm.langchain import IrisLangchainChatModel from ...pipeline import Pipeline from ...web.status.status_update import StatusCallback @@ -144,5 +145,7 @@ def __call__( } ) ) - self.tokens = self.llm.tokens + num_tokens = self.llm.tokens + num_tokens.pipeline = PipelineEnum.IRIS_CODE_FEEDBACK + self.tokens = num_tokens return response.replace("{", "{{").replace("}", "}}") diff --git a/app/pipeline/chat/course_chat_pipeline.py b/app/pipeline/chat/course_chat_pipeline.py index d2928df7..6da287f0 100644 --- a/app/pipeline/chat/course_chat_pipeline.py +++ b/app/pipeline/chat/course_chat_pipeline.py @@ -14,6 +14,7 @@ from langchain_core.runnables import Runnable from langchain_core.tools import tool from langsmith import traceable +from sipbuild.generator.parser.tokens import tokens from weaviate.collections.classes.filters import Filter from .interaction_suggestion_pipeline import ( @@ -41,6 +42,8 @@ elicit_begin_agent_jol_prompt, ) from ...domain import CourseChatPipelineExecutionDTO +from ...llm.external.LLMTokenCount import LLMTokenCount +from ...llm.external.PipelineEnum import PipelineEnum from ...retrieval.lecture_retrieval import LectureRetrieval from ...vector_database.database import VectorDatabase from ...vector_database.lecture_schema import LectureSchema @@ -107,6 +110,7 @@ def __init__(self, callback: CourseChatStatusCallback, variant: str = "default") # Create the pipeline self.pipeline = self.llm | StrOutputParser() + self.tokens = [] def __repr__(self): return f"{self.__class__.__name__}(llm={self.llm})" @@ -406,14 +410,18 @@ def lecture_content_retrieval() -> str: self.callback.in_progress() for step in agent_executor.iter(params): print("STEP:", step) + token_count = self.llm.tokens + token_count.pipeline = PipelineEnum.IRIS_CHAT_COURSE_MESSAGE + self.tokens.append(token_count) if step.get("output", None): out = step["output"] if self.retrieved_paragraphs: self.callback.in_progress("Augmenting response ...") out = self.citation_pipeline(self.retrieved_paragraphs, out) + self.tokens.extend(self.citation_pipeline.tokens) - self.callback.done("Response created", final_result=out) + self.callback.done("Response created", final_result=out, tokens=self.tokens) # try: # # if out: diff --git a/app/pipeline/chat/exercise_chat_pipeline.py b/app/pipeline/chat/exercise_chat_pipeline.py index be373f5b..2aee5beb 100644 --- a/app/pipeline/chat/exercise_chat_pipeline.py +++ b/app/pipeline/chat/exercise_chat_pipeline.py @@ -56,7 +56,6 @@ class ExerciseChatPipeline(Pipeline): suggestion_pipeline: InteractionSuggestionPipeline code_feedback_pipeline: CodeFeedbackPipeline prompt: ChatPromptTemplate - tokens: List[LLMTokenCount] def __init__(self, callback: ExerciseChatStatusCallback): super().__init__(implementation_id="exercise_chat_pipeline") diff --git a/app/pipeline/chat/lecture_chat_pipeline.py b/app/pipeline/chat/lecture_chat_pipeline.py index 51693009..cf9f1cc1 100644 --- a/app/pipeline/chat/lecture_chat_pipeline.py +++ b/app/pipeline/chat/lecture_chat_pipeline.py @@ -16,6 +16,8 @@ LectureChatPipelineExecutionDTO, ) from ...llm import CapabilityRequestHandler, RequirementList +from ...llm.external import LLMTokenCount +from ...llm.external.PipelineEnum import PipelineEnum from ...retrieval.lecture_retrieval import LectureRetrieval from ...vector_database.database import VectorDatabase from ...vector_database.lecture_schema import LectureSchema @@ -74,6 +76,7 @@ def __init__(self): self.retriever = LectureRetrieval(self.db.client) self.pipeline = self.llm | StrOutputParser() self.citation_pipeline = CitationPipeline() + self.tokens = [] def __repr__(self): return f"{self.__class__.__name__}(llm={self.llm})" @@ -114,9 +117,13 @@ def __call__(self, dto: LectureChatPipelineExecutionDTO): self.prompt = ChatPromptTemplate.from_messages(prompt_val) try: response = (self.prompt | self.pipeline).invoke({}) + num_tokens = self.llm.tokens + num_tokens.pipeline = PipelineEnum.IRIS_CHAT_LECTURE_MESSAGE + self.tokens.append(num_tokens) response_with_citation = self.citation_pipeline( retrieved_lecture_chunks, response ) + self.tokens.extend(self.citation_pipeline.tokens) logger.info(f"Response from lecture chat pipeline: {response}") return response_with_citation except Exception as e: diff --git a/app/pipeline/competency_extraction_pipeline.py b/app/pipeline/competency_extraction_pipeline.py index a2288ab5..df4d2e30 100644 --- a/app/pipeline/competency_extraction_pipeline.py +++ b/app/pipeline/competency_extraction_pipeline.py @@ -5,6 +5,7 @@ from langchain_core.prompts import ( ChatPromptTemplate, ) +from sipbuild.generator.parser.tokens import tokens from app.domain import ( CompetencyExtractionPipelineExecutionDTO, @@ -14,6 +15,8 @@ from app.domain.data.text_message_content_dto import TextMessageContentDTO from app.domain.data.competency_dto import Competency from app.llm import CapabilityRequestHandler, RequirementList, CompletionArguments +from app.llm.external.LLMTokenCount import LLMTokenCount +from app.llm.external.PipelineEnum import PipelineEnum from app.pipeline import Pipeline from app.web.status.status_update import CompetencyExtractionCallback from app.pipeline.prompts.competency_extraction import system_prompt @@ -38,6 +41,7 @@ def __init__(self, callback: Optional[CompetencyExtractionCallback] = None): ) ) self.output_parser = PydanticOutputParser(pydantic_object=Competency) + self.tokens = [] def __call__( self, @@ -76,6 +80,11 @@ def __call__( response = self.request_handler.chat( [prompt], CompletionArguments(temperature=0.4) ) + num_tokens = LLMTokenCount(model_info=response.model_info, + num_input_tokens=response.num_input_tokens, + num_output_tokens=response.num_output_tokens, + pipeline=PipelineEnum.IRIS_COMPETENCY_GENERATION) + self.tokens.append(num_tokens) response = response.contents[0].text_content generated_competencies: list[Competency] = [] @@ -98,4 +107,4 @@ def __call__( continue logger.debug(f"Generated competency: {competency}") generated_competencies.append(competency) - self.callback.done(final_result=generated_competencies) + self.callback.done(final_result=generated_competencies, tokens=self.tokens) diff --git a/app/pipeline/pipeline.py b/app/pipeline/pipeline.py index 8f2249b7..2359e4a7 100644 --- a/app/pipeline/pipeline.py +++ b/app/pipeline/pipeline.py @@ -1,10 +1,14 @@ from abc import ABCMeta +from typing import List + +from app.llm.external import LLMTokenCount class Pipeline(metaclass=ABCMeta): """Abstract class for all pipelines""" implementation_id: str + tokens: List[LLMTokenCount] def __init__(self, implementation_id=None, **kwargs): self.implementation_id = implementation_id diff --git a/app/pipeline/shared/citation_pipeline.py b/app/pipeline/shared/citation_pipeline.py index 6a4aab38..0bcdc1e2 100644 --- a/app/pipeline/shared/citation_pipeline.py +++ b/app/pipeline/shared/citation_pipeline.py @@ -7,6 +7,8 @@ from langchain_core.runnables import Runnable from app.llm import CapabilityRequestHandler, RequirementList, CompletionArguments +from app.llm.external import LLMTokenCount +from app.llm.external.PipelineEnum import PipelineEnum from app.llm.langchain import IrisLangchainChatModel from app.pipeline import Pipeline @@ -38,6 +40,7 @@ def __init__(self): with open(prompt_file_path, "r") as file: self.prompt_str = file.read() self.pipeline = self.llm | StrOutputParser() + self.tokens = [] def __repr__(self): return f"{self.__class__.__name__}(llm={self.llm})" @@ -83,6 +86,9 @@ def __call__( response = (self.default_prompt | self.pipeline).invoke( {"Answer": answer, "Paragraphs": paras} ) + token_count = self.llm.tokens + token_count.pipeline = PipelineEnum.IRIS_CITATION_PIPELINE + self.tokens.append(token_count) if response == "!NONE!": return answer print(response) From 9905460775843863874ec8766dbb7d3023c0cfa5 Mon Sep 17 00:00:00 2001 From: Alexander Joham Date: Fri, 11 Oct 2024 18:26:01 +0200 Subject: [PATCH 04/17] added first versions for tracking for smaller pipelines --- app/llm/external/PipelineEnum.py | 2 ++ app/pipeline/lecture_ingestion_pipeline.py | 10 +++++++++- app/pipeline/shared/reranker_pipeline.py | 6 ++++++ app/pipeline/shared/summary_pipeline.py | 1 + 4 files changed, 18 insertions(+), 1 deletion(-) diff --git a/app/llm/external/PipelineEnum.py b/app/llm/external/PipelineEnum.py index d1d28cfd..475dbc4a 100644 --- a/app/llm/external/PipelineEnum.py +++ b/app/llm/external/PipelineEnum.py @@ -9,4 +9,6 @@ class PipelineEnum(str, Enum): IRIS_CHAT_LECTURE_MESSAGE = "IRIS_CHAT_LECTURE_MESSAGE" IRIS_COMPETENCY_GENERATION = "IRIS_COMPETENCY_GENERATION" IRIS_CITATION_PIPELINE = "IRIS_CITATION_PIPELINE" + IRIS_RERANKER_PIPELINE = "IRIS_RERANKER_PIPELINE" + IRIS_SUMMARY_PIPELINE = "IRIS_SUMMARY_PIPELINE" NOT_SET = "NOT_SET" diff --git a/app/pipeline/lecture_ingestion_pipeline.py b/app/pipeline/lecture_ingestion_pipeline.py index 0b468a41..c51848fe 100644 --- a/app/pipeline/lecture_ingestion_pipeline.py +++ b/app/pipeline/lecture_ingestion_pipeline.py @@ -6,6 +6,7 @@ import fitz from langchain_core.output_parsers import StrOutputParser from langchain_core.prompts import ChatPromptTemplate +from sipbuild.generator.parser.tokens import tokens from unstructured.cleaners.core import clean from weaviate import WeaviateClient from weaviate.classes.query import Filter @@ -18,6 +19,7 @@ IngestionPipelineExecutionDto, ) from ..domain.data.text_message_content_dto import TextMessageContentDTO +from ..llm.external.PipelineEnum import PipelineEnum from ..llm.langchain import IrisLangchainChatModel from ..vector_database.lecture_schema import init_lecture_schema, LectureSchema from ..ingestion.abstract_ingestion import AbstractIngestion @@ -112,6 +114,7 @@ def __init__( request_handler=request_handler, completion_args=completion_args ) self.pipeline = self.llm | StrOutputParser() + self.tokens = [] def __call__(self) -> bool: try: @@ -273,9 +276,14 @@ def merge_page_content_and_image_interpretation( image_interpretation=image_interpretation, ) prompt = ChatPromptTemplate.from_messages(prompt_val) - return clean( + clean_output = clean( (prompt | self.pipeline).invoke({}), bullets=True, extra_whitespace=True ) + # TODO: send to artemis + num_tokens = self.llm.tokens + num_tokens.pipeline = PipelineEnum.IRIS_LECTURE_INGESTION + tokens.append(num_tokens) + return clean_output def get_course_language(self, page_content: str) -> str: """ diff --git a/app/pipeline/shared/reranker_pipeline.py b/app/pipeline/shared/reranker_pipeline.py index 178bb4e6..566288c9 100644 --- a/app/pipeline/shared/reranker_pipeline.py +++ b/app/pipeline/shared/reranker_pipeline.py @@ -6,9 +6,11 @@ from langchain_core.prompts import ChatPromptTemplate, PromptTemplate from langchain_core.runnables import Runnable from langsmith import traceable +from sipbuild.generator.parser.tokens import tokens from app.domain import PyrisMessage from app.llm import CapabilityRequestHandler, RequirementList, CompletionArguments +from app.llm.external.PipelineEnum import PipelineEnum from app.llm.langchain import IrisLangchainChatModel from app.pipeline import Pipeline from app.pipeline.chat.output_models.output_models.selected_paragraphs import ( @@ -56,6 +58,7 @@ def __init__(self): ) logger.debug(self.output_parser.get_format_instructions()) self.pipeline = self.llm | self.output_parser + self.tokens = [] def __repr__(self): return f"{self.__class__.__name__}(llm={self.llm})" @@ -108,4 +111,7 @@ def __call__( prompt = self.default_prompt response = (prompt | self.pipeline).invoke(data) + num_tokens = self.llm.tokens + num_tokens.pipeline = PipelineEnum.IRIS_RERANKER_PIPELINE + self.tokens.append(num_tokens) return response.selected_paragraphs diff --git a/app/pipeline/shared/summary_pipeline.py b/app/pipeline/shared/summary_pipeline.py index 382881a2..6a7f49a0 100644 --- a/app/pipeline/shared/summary_pipeline.py +++ b/app/pipeline/shared/summary_pipeline.py @@ -45,6 +45,7 @@ def __init__(self): ) # Create the pipeline self.pipeline = self.prompt | self.llm | StrOutputParser() + self.tokens = [] def __repr__(self): return f"{self.__class__.__name__}(llm={self.llm})" From e241d457b86e97ad4df94fa91e11db80ca28d552 Mon Sep 17 00:00:00 2001 From: Alexander Joham Date: Fri, 11 Oct 2024 19:40:11 +0200 Subject: [PATCH 05/17] Fix lint errors --- app/domain/data/token_usage_dto.py | 2 +- app/llm/external/LLMTokenCount.py | 8 +++++++- app/llm/external/ollama.py | 11 +++++++++-- app/llm/external/openai_chat.py | 15 ++++++++++----- app/llm/langchain/iris_langchain_chat_model.py | 10 ++++++---- app/pipeline/chat/exercise_chat_pipeline.py | 12 ++++++++---- app/pipeline/competency_extraction_pipeline.py | 11 ++++++----- 7 files changed, 47 insertions(+), 22 deletions(-) diff --git a/app/domain/data/token_usage_dto.py b/app/domain/data/token_usage_dto.py index cc98c8af..c7e1868c 100644 --- a/app/domain/data/token_usage_dto.py +++ b/app/domain/data/token_usage_dto.py @@ -7,4 +7,4 @@ class TokenUsageDTO(BaseModel): model_info: str num_input_tokens: int num_output_tokens: int - pipeline: PipelineEnum \ No newline at end of file + pipeline: PipelineEnum diff --git a/app/llm/external/LLMTokenCount.py b/app/llm/external/LLMTokenCount.py index e82b02af..7570eddb 100644 --- a/app/llm/external/LLMTokenCount.py +++ b/app/llm/external/LLMTokenCount.py @@ -8,7 +8,13 @@ class LLMTokenCount: num_output_tokens: int pipeline: PipelineEnum - def __init__(self, model_info: str, num_input_tokens: int, num_output_tokens: int, pipeline: PipelineEnum): + def __init__( + self, + model_info: str, + num_input_tokens: int, + num_output_tokens: int, + pipeline: PipelineEnum, + ): self.model_info = model_info self.num_input_tokens = num_input_tokens self.num_output_tokens = num_output_tokens diff --git a/app/llm/external/ollama.py b/app/llm/external/ollama.py index 89d126a6..8474b8a1 100644 --- a/app/llm/external/ollama.py +++ b/app/llm/external/ollama.py @@ -57,7 +57,9 @@ def convert_to_ollama_messages(messages: list[PyrisMessage]) -> list[Message]: return messages_to_return -def convert_to_iris_message(message: Message, num_input_tokens: int, num_output_tokens: int, model: str) -> PyrisMessage: +def convert_to_iris_message( + message: Message, num_input_tokens: int, num_output_tokens: int, model: str +) -> PyrisMessage: """ Convert a Message to a PyrisMessage """ @@ -111,7 +113,12 @@ def chat( format="json" if arguments.response_format == "JSON" else "", options=self.options, ) - return convert_to_iris_message(response["message"], response["prompt_eval_count"], response["eval_count"], response["model"]) + return convert_to_iris_message( + response["message"], + response["prompt_eval_count"], + response["eval_count"], + response["model"], + ) def embed(self, text: str) -> list[float]: response = self._client.embeddings( diff --git a/app/llm/external/openai_chat.py b/app/llm/external/openai_chat.py index da5a8c2e..99d6c4b6 100644 --- a/app/llm/external/openai_chat.py +++ b/app/llm/external/openai_chat.py @@ -62,12 +62,14 @@ def convert_to_open_ai_messages( return openai_messages -def convert_to_iris_message(message: ChatCompletionMessage, usage: Optional[CompletionUsage], model: str) -> PyrisMessage: +def convert_to_iris_message( + message: ChatCompletionMessage, usage: Optional[CompletionUsage], model: str +) -> PyrisMessage: """ Convert a ChatCompletionMessage to a PyrisMessage """ - num_input_tokens = getattr(usage, 'prompt_tokens', -1) - num_output_tokens = getattr(usage, 'completion_tokens', -1) + num_input_tokens = getattr(usage, "prompt_tokens", -1) + num_output_tokens = getattr(usage, "completion_tokens", -1) message = PyrisMessage( sender=map_str_to_role(message.role), @@ -75,10 +77,11 @@ def convert_to_iris_message(message: ChatCompletionMessage, usage: Optional[Comp send_at=datetime.now(), num_input_tokens=num_input_tokens, num_output_tokens=num_output_tokens, - model_info=model + model_info=model, ) return message + class OpenAIChatModel(ChatModel): model: str api_key: str @@ -110,7 +113,9 @@ def chat( temperature=arguments.temperature, max_tokens=arguments.max_tokens, ) - return convert_to_iris_message(response.choices[0].message, response.usage, response.model) + return convert_to_iris_message( + response.choices[0].message, response.usage, response.model + ) except Exception as e: wait_time = initial_delay * (backoff_factor**attempt) logging.warning(f"Exception on attempt {attempt + 1}: {e}") diff --git a/app/llm/langchain/iris_langchain_chat_model.py b/app/llm/langchain/iris_langchain_chat_model.py index c5cd9273..50c1cb0a 100644 --- a/app/llm/langchain/iris_langchain_chat_model.py +++ b/app/llm/langchain/iris_langchain_chat_model.py @@ -45,10 +45,12 @@ def _generate( iris_message = self.request_handler.chat(iris_messages, self.completion_args) base_message = convert_iris_message_to_langchain_message(iris_message) chat_generation = ChatGeneration(message=base_message) - self.tokens = LLMTokenCount(model_info=iris_message.model_info, - num_input_tokens=iris_message.num_input_tokens, - num_output_tokens=iris_message.num_output_tokens, - pipeline=PipelineEnum.NOT_SET) + self.tokens = LLMTokenCount( + model_info=iris_message.model_info, + num_input_tokens=iris_message.num_input_tokens, + num_output_tokens=iris_message.num_output_tokens, + pipeline=PipelineEnum.NOT_SET, + ) return ChatResult(generations=[chat_generation]) @property diff --git a/app/pipeline/chat/exercise_chat_pipeline.py b/app/pipeline/chat/exercise_chat_pipeline.py index 2aee5beb..d2c0ca04 100644 --- a/app/pipeline/chat/exercise_chat_pipeline.py +++ b/app/pipeline/chat/exercise_chat_pipeline.py @@ -10,7 +10,6 @@ ) from langchain_core.runnables import Runnable from langsmith import traceable, get_current_run_tree -from sipbuild.generator.parser.tokens import tokens from weaviate.collections.classes.filters import Filter from .code_feedback_pipeline import CodeFeedbackPipeline @@ -35,7 +34,6 @@ from ...domain.data.programming_submission_dto import ProgrammingSubmissionDTO from ...llm import CapabilityRequestHandler, RequirementList from ...llm import CompletionArguments -from ...llm.external.LLMTokenCount import LLMTokenCount from ...llm.external.PipelineEnum import PipelineEnum from ...llm.langchain import IrisLangchainChatModel from ...retrieval.lecture_retrieval import LectureRetrieval @@ -102,7 +100,9 @@ def __call__(self, dto: ExerciseChatPipelineExecutionDTO): ) self._run_exercise_chat_pipeline(dto, should_execute_lecture_pipeline), self.callback.done( - "Generated response", final_result=self.exercise_chat_response, tokens=self.tokens + "Generated response", + final_result=self.exercise_chat_response, + tokens=self.tokens, ) try: @@ -116,7 +116,11 @@ def __call__(self, dto: ExerciseChatPipelineExecutionDTO): suggestion_dto.last_message = self.exercise_chat_response suggestion_dto.problem_statement = dto.exercise.problem_statement suggestions = self.suggestion_pipeline(suggestion_dto) - self.callback.done(final_result=None, suggestions=suggestions, tokens=[self.suggestion_pipeline.tokens]) + self.callback.done( + final_result=None, + suggestions=suggestions, + tokens=[self.suggestion_pipeline.tokens], + ) else: # This should never happen but whatever self.callback.skip( diff --git a/app/pipeline/competency_extraction_pipeline.py b/app/pipeline/competency_extraction_pipeline.py index df4d2e30..68265a48 100644 --- a/app/pipeline/competency_extraction_pipeline.py +++ b/app/pipeline/competency_extraction_pipeline.py @@ -5,7 +5,6 @@ from langchain_core.prompts import ( ChatPromptTemplate, ) -from sipbuild.generator.parser.tokens import tokens from app.domain import ( CompetencyExtractionPipelineExecutionDTO, @@ -80,10 +79,12 @@ def __call__( response = self.request_handler.chat( [prompt], CompletionArguments(temperature=0.4) ) - num_tokens = LLMTokenCount(model_info=response.model_info, - num_input_tokens=response.num_input_tokens, - num_output_tokens=response.num_output_tokens, - pipeline=PipelineEnum.IRIS_COMPETENCY_GENERATION) + num_tokens = LLMTokenCount( + model_info=response.model_info, + num_input_tokens=response.num_input_tokens, + num_output_tokens=response.num_output_tokens, + pipeline=PipelineEnum.IRIS_COMPETENCY_GENERATION, + ) self.tokens.append(num_tokens) response = response.contents[0].text_content From 4502e30e52f6117e20b14abc9dd6af9d96a526d5 Mon Sep 17 00:00:00 2001 From: Alexander Joham Date: Fri, 11 Oct 2024 19:42:52 +0200 Subject: [PATCH 06/17] Fix last lint error --- app/retrieval/lecture_retrieval.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/retrieval/lecture_retrieval.py b/app/retrieval/lecture_retrieval.py index c80f94bb..f75a139e 100644 --- a/app/retrieval/lecture_retrieval.py +++ b/app/retrieval/lecture_retrieval.py @@ -2,7 +2,6 @@ from typing import List from langsmith import traceable -from sipbuild.generator.parser.tokens import tokens from weaviate import WeaviateClient from weaviate.classes.query import Filter @@ -82,6 +81,7 @@ class LectureRetrieval(Pipeline): """ Class for retrieving lecture data from the database. """ + tokens: LLMTokenCount def __init__(self, client: WeaviateClient, **kwargs): From 3b81a3091aeb9899f7e1b77c3d93d9c1dc7a3685 Mon Sep 17 00:00:00 2001 From: Alexander Joham Date: Fri, 11 Oct 2024 19:46:11 +0200 Subject: [PATCH 07/17] Fix lint errors Fix last lint error Fix last lint errors --- app/domain/data/token_usage_dto.py | 2 +- app/llm/external/LLMTokenCount.py | 8 +++++++- app/llm/external/ollama.py | 11 +++++++++-- app/llm/external/openai_chat.py | 15 ++++++++++----- app/llm/langchain/iris_langchain_chat_model.py | 10 ++++++---- app/pipeline/chat/code_feedback_pipeline.py | 1 - app/pipeline/chat/course_chat_pipeline.py | 2 -- app/pipeline/chat/exercise_chat_pipeline.py | 12 ++++++++---- app/pipeline/chat/lecture_chat_pipeline.py | 1 - app/pipeline/competency_extraction_pipeline.py | 11 ++++++----- app/pipeline/shared/citation_pipeline.py | 1 - app/pipeline/shared/reranker_pipeline.py | 1 - app/retrieval/lecture_retrieval.py | 2 +- 13 files changed, 48 insertions(+), 29 deletions(-) diff --git a/app/domain/data/token_usage_dto.py b/app/domain/data/token_usage_dto.py index cc98c8af..c7e1868c 100644 --- a/app/domain/data/token_usage_dto.py +++ b/app/domain/data/token_usage_dto.py @@ -7,4 +7,4 @@ class TokenUsageDTO(BaseModel): model_info: str num_input_tokens: int num_output_tokens: int - pipeline: PipelineEnum \ No newline at end of file + pipeline: PipelineEnum diff --git a/app/llm/external/LLMTokenCount.py b/app/llm/external/LLMTokenCount.py index e82b02af..7570eddb 100644 --- a/app/llm/external/LLMTokenCount.py +++ b/app/llm/external/LLMTokenCount.py @@ -8,7 +8,13 @@ class LLMTokenCount: num_output_tokens: int pipeline: PipelineEnum - def __init__(self, model_info: str, num_input_tokens: int, num_output_tokens: int, pipeline: PipelineEnum): + def __init__( + self, + model_info: str, + num_input_tokens: int, + num_output_tokens: int, + pipeline: PipelineEnum, + ): self.model_info = model_info self.num_input_tokens = num_input_tokens self.num_output_tokens = num_output_tokens diff --git a/app/llm/external/ollama.py b/app/llm/external/ollama.py index 89d126a6..8474b8a1 100644 --- a/app/llm/external/ollama.py +++ b/app/llm/external/ollama.py @@ -57,7 +57,9 @@ def convert_to_ollama_messages(messages: list[PyrisMessage]) -> list[Message]: return messages_to_return -def convert_to_iris_message(message: Message, num_input_tokens: int, num_output_tokens: int, model: str) -> PyrisMessage: +def convert_to_iris_message( + message: Message, num_input_tokens: int, num_output_tokens: int, model: str +) -> PyrisMessage: """ Convert a Message to a PyrisMessage """ @@ -111,7 +113,12 @@ def chat( format="json" if arguments.response_format == "JSON" else "", options=self.options, ) - return convert_to_iris_message(response["message"], response["prompt_eval_count"], response["eval_count"], response["model"]) + return convert_to_iris_message( + response["message"], + response["prompt_eval_count"], + response["eval_count"], + response["model"], + ) def embed(self, text: str) -> list[float]: response = self._client.embeddings( diff --git a/app/llm/external/openai_chat.py b/app/llm/external/openai_chat.py index da5a8c2e..99d6c4b6 100644 --- a/app/llm/external/openai_chat.py +++ b/app/llm/external/openai_chat.py @@ -62,12 +62,14 @@ def convert_to_open_ai_messages( return openai_messages -def convert_to_iris_message(message: ChatCompletionMessage, usage: Optional[CompletionUsage], model: str) -> PyrisMessage: +def convert_to_iris_message( + message: ChatCompletionMessage, usage: Optional[CompletionUsage], model: str +) -> PyrisMessage: """ Convert a ChatCompletionMessage to a PyrisMessage """ - num_input_tokens = getattr(usage, 'prompt_tokens', -1) - num_output_tokens = getattr(usage, 'completion_tokens', -1) + num_input_tokens = getattr(usage, "prompt_tokens", -1) + num_output_tokens = getattr(usage, "completion_tokens", -1) message = PyrisMessage( sender=map_str_to_role(message.role), @@ -75,10 +77,11 @@ def convert_to_iris_message(message: ChatCompletionMessage, usage: Optional[Comp send_at=datetime.now(), num_input_tokens=num_input_tokens, num_output_tokens=num_output_tokens, - model_info=model + model_info=model, ) return message + class OpenAIChatModel(ChatModel): model: str api_key: str @@ -110,7 +113,9 @@ def chat( temperature=arguments.temperature, max_tokens=arguments.max_tokens, ) - return convert_to_iris_message(response.choices[0].message, response.usage, response.model) + return convert_to_iris_message( + response.choices[0].message, response.usage, response.model + ) except Exception as e: wait_time = initial_delay * (backoff_factor**attempt) logging.warning(f"Exception on attempt {attempt + 1}: {e}") diff --git a/app/llm/langchain/iris_langchain_chat_model.py b/app/llm/langchain/iris_langchain_chat_model.py index c5cd9273..50c1cb0a 100644 --- a/app/llm/langchain/iris_langchain_chat_model.py +++ b/app/llm/langchain/iris_langchain_chat_model.py @@ -45,10 +45,12 @@ def _generate( iris_message = self.request_handler.chat(iris_messages, self.completion_args) base_message = convert_iris_message_to_langchain_message(iris_message) chat_generation = ChatGeneration(message=base_message) - self.tokens = LLMTokenCount(model_info=iris_message.model_info, - num_input_tokens=iris_message.num_input_tokens, - num_output_tokens=iris_message.num_output_tokens, - pipeline=PipelineEnum.NOT_SET) + self.tokens = LLMTokenCount( + model_info=iris_message.model_info, + num_input_tokens=iris_message.num_input_tokens, + num_output_tokens=iris_message.num_output_tokens, + pipeline=PipelineEnum.NOT_SET, + ) return ChatResult(generations=[chat_generation]) @property diff --git a/app/pipeline/chat/code_feedback_pipeline.py b/app/pipeline/chat/code_feedback_pipeline.py index de6a0d90..fa35cca8 100644 --- a/app/pipeline/chat/code_feedback_pipeline.py +++ b/app/pipeline/chat/code_feedback_pipeline.py @@ -7,7 +7,6 @@ from langchain_core.runnables import Runnable from langsmith import traceable from pydantic import BaseModel -from sipbuild.generator.parser.tokens import tokens from ...domain import PyrisMessage from ...domain.data.build_log_entry import BuildLogEntryDTO diff --git a/app/pipeline/chat/course_chat_pipeline.py b/app/pipeline/chat/course_chat_pipeline.py index 6da287f0..84e9eed2 100644 --- a/app/pipeline/chat/course_chat_pipeline.py +++ b/app/pipeline/chat/course_chat_pipeline.py @@ -14,7 +14,6 @@ from langchain_core.runnables import Runnable from langchain_core.tools import tool from langsmith import traceable -from sipbuild.generator.parser.tokens import tokens from weaviate.collections.classes.filters import Filter from .interaction_suggestion_pipeline import ( @@ -42,7 +41,6 @@ elicit_begin_agent_jol_prompt, ) from ...domain import CourseChatPipelineExecutionDTO -from ...llm.external.LLMTokenCount import LLMTokenCount from ...llm.external.PipelineEnum import PipelineEnum from ...retrieval.lecture_retrieval import LectureRetrieval from ...vector_database.database import VectorDatabase diff --git a/app/pipeline/chat/exercise_chat_pipeline.py b/app/pipeline/chat/exercise_chat_pipeline.py index 2aee5beb..d2c0ca04 100644 --- a/app/pipeline/chat/exercise_chat_pipeline.py +++ b/app/pipeline/chat/exercise_chat_pipeline.py @@ -10,7 +10,6 @@ ) from langchain_core.runnables import Runnable from langsmith import traceable, get_current_run_tree -from sipbuild.generator.parser.tokens import tokens from weaviate.collections.classes.filters import Filter from .code_feedback_pipeline import CodeFeedbackPipeline @@ -35,7 +34,6 @@ from ...domain.data.programming_submission_dto import ProgrammingSubmissionDTO from ...llm import CapabilityRequestHandler, RequirementList from ...llm import CompletionArguments -from ...llm.external.LLMTokenCount import LLMTokenCount from ...llm.external.PipelineEnum import PipelineEnum from ...llm.langchain import IrisLangchainChatModel from ...retrieval.lecture_retrieval import LectureRetrieval @@ -102,7 +100,9 @@ def __call__(self, dto: ExerciseChatPipelineExecutionDTO): ) self._run_exercise_chat_pipeline(dto, should_execute_lecture_pipeline), self.callback.done( - "Generated response", final_result=self.exercise_chat_response, tokens=self.tokens + "Generated response", + final_result=self.exercise_chat_response, + tokens=self.tokens, ) try: @@ -116,7 +116,11 @@ def __call__(self, dto: ExerciseChatPipelineExecutionDTO): suggestion_dto.last_message = self.exercise_chat_response suggestion_dto.problem_statement = dto.exercise.problem_statement suggestions = self.suggestion_pipeline(suggestion_dto) - self.callback.done(final_result=None, suggestions=suggestions, tokens=[self.suggestion_pipeline.tokens]) + self.callback.done( + final_result=None, + suggestions=suggestions, + tokens=[self.suggestion_pipeline.tokens], + ) else: # This should never happen but whatever self.callback.skip( diff --git a/app/pipeline/chat/lecture_chat_pipeline.py b/app/pipeline/chat/lecture_chat_pipeline.py index cf9f1cc1..3d7d1320 100644 --- a/app/pipeline/chat/lecture_chat_pipeline.py +++ b/app/pipeline/chat/lecture_chat_pipeline.py @@ -16,7 +16,6 @@ LectureChatPipelineExecutionDTO, ) from ...llm import CapabilityRequestHandler, RequirementList -from ...llm.external import LLMTokenCount from ...llm.external.PipelineEnum import PipelineEnum from ...retrieval.lecture_retrieval import LectureRetrieval from ...vector_database.database import VectorDatabase diff --git a/app/pipeline/competency_extraction_pipeline.py b/app/pipeline/competency_extraction_pipeline.py index df4d2e30..68265a48 100644 --- a/app/pipeline/competency_extraction_pipeline.py +++ b/app/pipeline/competency_extraction_pipeline.py @@ -5,7 +5,6 @@ from langchain_core.prompts import ( ChatPromptTemplate, ) -from sipbuild.generator.parser.tokens import tokens from app.domain import ( CompetencyExtractionPipelineExecutionDTO, @@ -80,10 +79,12 @@ def __call__( response = self.request_handler.chat( [prompt], CompletionArguments(temperature=0.4) ) - num_tokens = LLMTokenCount(model_info=response.model_info, - num_input_tokens=response.num_input_tokens, - num_output_tokens=response.num_output_tokens, - pipeline=PipelineEnum.IRIS_COMPETENCY_GENERATION) + num_tokens = LLMTokenCount( + model_info=response.model_info, + num_input_tokens=response.num_input_tokens, + num_output_tokens=response.num_output_tokens, + pipeline=PipelineEnum.IRIS_COMPETENCY_GENERATION, + ) self.tokens.append(num_tokens) response = response.contents[0].text_content diff --git a/app/pipeline/shared/citation_pipeline.py b/app/pipeline/shared/citation_pipeline.py index 0bcdc1e2..2c7d84a7 100644 --- a/app/pipeline/shared/citation_pipeline.py +++ b/app/pipeline/shared/citation_pipeline.py @@ -7,7 +7,6 @@ from langchain_core.runnables import Runnable from app.llm import CapabilityRequestHandler, RequirementList, CompletionArguments -from app.llm.external import LLMTokenCount from app.llm.external.PipelineEnum import PipelineEnum from app.llm.langchain import IrisLangchainChatModel from app.pipeline import Pipeline diff --git a/app/pipeline/shared/reranker_pipeline.py b/app/pipeline/shared/reranker_pipeline.py index 566288c9..ed0a3f9c 100644 --- a/app/pipeline/shared/reranker_pipeline.py +++ b/app/pipeline/shared/reranker_pipeline.py @@ -6,7 +6,6 @@ from langchain_core.prompts import ChatPromptTemplate, PromptTemplate from langchain_core.runnables import Runnable from langsmith import traceable -from sipbuild.generator.parser.tokens import tokens from app.domain import PyrisMessage from app.llm import CapabilityRequestHandler, RequirementList, CompletionArguments diff --git a/app/retrieval/lecture_retrieval.py b/app/retrieval/lecture_retrieval.py index c80f94bb..f75a139e 100644 --- a/app/retrieval/lecture_retrieval.py +++ b/app/retrieval/lecture_retrieval.py @@ -2,7 +2,6 @@ from typing import List from langsmith import traceable -from sipbuild.generator.parser.tokens import tokens from weaviate import WeaviateClient from weaviate.classes.query import Filter @@ -82,6 +81,7 @@ class LectureRetrieval(Pipeline): """ Class for retrieving lecture data from the database. """ + tokens: LLMTokenCount def __init__(self, client: WeaviateClient, **kwargs): From 43241806718898bc0fc4bb3e76cefed7ceaee60d Mon Sep 17 00:00:00 2001 From: Alexander Joham Date: Sat, 12 Oct 2024 13:02:23 +0200 Subject: [PATCH 08/17] Add token cost tracking for input and output tokens --- app/domain/data/token_usage_dto.py | 2 ++ app/domain/pyris_message.py | 2 ++ app/llm/external/LLMTokenCount.py | 11 ++++++++++- app/llm/langchain/iris_langchain_chat_model.py | 2 ++ app/llm/request_handler/capability_request_handler.py | 5 ++++- 5 files changed, 20 insertions(+), 2 deletions(-) diff --git a/app/domain/data/token_usage_dto.py b/app/domain/data/token_usage_dto.py index c7e1868c..95221ef9 100644 --- a/app/domain/data/token_usage_dto.py +++ b/app/domain/data/token_usage_dto.py @@ -6,5 +6,7 @@ class TokenUsageDTO(BaseModel): model_info: str num_input_tokens: int + cost_per_input_token: float num_output_tokens: int + cost_per_output_token: float pipeline: PipelineEnum diff --git a/app/domain/pyris_message.py b/app/domain/pyris_message.py index 2d1ead95..c0587327 100644 --- a/app/domain/pyris_message.py +++ b/app/domain/pyris_message.py @@ -17,7 +17,9 @@ class PyrisMessage(BaseModel): model_config = ConfigDict(populate_by_name=True) num_input_tokens: int = Field(alias="numInputTokens", default=0) + cost_per_input_token: float = Field(alias="costPerInputToken", default=0) num_output_tokens: int = Field(alias="numOutputTokens", default=0) + cost_per_output_token: float = Field(alias="costPerOutputToken", default=0) model_info: str = Field(alias="modelInfo", default="") sent_at: datetime | None = Field(alias="sentAt", default=None) diff --git a/app/llm/external/LLMTokenCount.py b/app/llm/external/LLMTokenCount.py index 7570eddb..8b300d3f 100644 --- a/app/llm/external/LLMTokenCount.py +++ b/app/llm/external/LLMTokenCount.py @@ -5,20 +5,29 @@ class LLMTokenCount: model_info: str num_input_tokens: int + cost_per_input_token: float num_output_tokens: int + cost_per_output_token: float pipeline: PipelineEnum def __init__( self, model_info: str, num_input_tokens: int, + cost_per_input_token: float, num_output_tokens: int, + cost_per_output_token: float, pipeline: PipelineEnum, ): self.model_info = model_info self.num_input_tokens = num_input_tokens + self.cost_per_input_token = cost_per_input_token self.num_output_tokens = num_output_tokens + self.cost_per_output_token = cost_per_output_token self.pipeline = pipeline def __str__(self): - return f"{self.model_info}: {self.num_input_tokens} in, {self.num_output_tokens} out, {self.pipeline} pipeline" + return ( + f"{self.model_info}: {self.num_input_tokens} in, {self.cost_per_input_token} cost in," + f" {self.num_output_tokens} out, {self.cost_per_output_token} cost out, {self.pipeline} pipeline" + ) diff --git a/app/llm/langchain/iris_langchain_chat_model.py b/app/llm/langchain/iris_langchain_chat_model.py index 50c1cb0a..29065353 100644 --- a/app/llm/langchain/iris_langchain_chat_model.py +++ b/app/llm/langchain/iris_langchain_chat_model.py @@ -48,7 +48,9 @@ def _generate( self.tokens = LLMTokenCount( model_info=iris_message.model_info, num_input_tokens=iris_message.num_input_tokens, + cost_per_input_token=iris_message.cost_per_input_token, num_output_tokens=iris_message.num_output_tokens, + cost_per_output_token=iris_message.cost_per_output_token, pipeline=PipelineEnum.NOT_SET, ) return ChatResult(generations=[chat_generation]) diff --git a/app/llm/request_handler/capability_request_handler.py b/app/llm/request_handler/capability_request_handler.py index 1ed05b3d..ebb3c12a 100644 --- a/app/llm/request_handler/capability_request_handler.py +++ b/app/llm/request_handler/capability_request_handler.py @@ -44,7 +44,10 @@ def chat( self, messages: list[PyrisMessage], arguments: CompletionArguments ) -> PyrisMessage: llm = self._select_model(ChatModel) - return llm.chat(messages, arguments) + message = llm.chat(messages, arguments) + message.cost_per_input_token = llm.capabilities.input_cost.value + message.cost_per_output_token = llm.capabilities.output_cost.value + return message def embed(self, text: str) -> list[float]: llm = self._select_model(EmbeddingModel) From c9e89be023d06d4b955a85a60caf1c573b22ea1e Mon Sep 17 00:00:00 2001 From: Alexander Joham Date: Sat, 12 Oct 2024 15:08:48 +0200 Subject: [PATCH 09/17] Update token handling as proposed by CodeRabbit --- app/llm/external/PipelineEnum.py | 1 + app/llm/external/ollama.py | 8 ++++---- app/pipeline/chat/code_feedback_pipeline.py | 6 +++--- app/pipeline/chat/exercise_chat_pipeline.py | 19 ++++++++++++------- app/pipeline/chat/lecture_chat_pipeline.py | 6 +++--- .../competency_extraction_pipeline.py | 4 ++-- app/pipeline/lecture_ingestion_pipeline.py | 7 +++---- app/pipeline/pipeline.py | 2 +- app/pipeline/shared/reranker_pipeline.py | 6 +++--- app/retrieval/lecture_retrieval.py | 17 +++++++++++++++-- 10 files changed, 47 insertions(+), 29 deletions(-) diff --git a/app/llm/external/PipelineEnum.py b/app/llm/external/PipelineEnum.py index 475dbc4a..f568ecf6 100644 --- a/app/llm/external/PipelineEnum.py +++ b/app/llm/external/PipelineEnum.py @@ -11,4 +11,5 @@ class PipelineEnum(str, Enum): IRIS_CITATION_PIPELINE = "IRIS_CITATION_PIPELINE" IRIS_RERANKER_PIPELINE = "IRIS_RERANKER_PIPELINE" IRIS_SUMMARY_PIPELINE = "IRIS_SUMMARY_PIPELINE" + IRIS_LECTURE_RETRIEVAL_PIPELINE = "IRIS_LECTURE_RETRIEVAL_PIPELINE" NOT_SET = "NOT_SET" diff --git a/app/llm/external/ollama.py b/app/llm/external/ollama.py index 8474b8a1..b441ef2e 100644 --- a/app/llm/external/ollama.py +++ b/app/llm/external/ollama.py @@ -114,10 +114,10 @@ def chat( options=self.options, ) return convert_to_iris_message( - response["message"], - response["prompt_eval_count"], - response["eval_count"], - response["model"], + response.get("message"), + response.get("prompt_eval_count", 0), + response.get("eval_count", 0), + response.get("model", self.model), ) def embed(self, text: str) -> list[float]: diff --git a/app/pipeline/chat/code_feedback_pipeline.py b/app/pipeline/chat/code_feedback_pipeline.py index fa35cca8..039a0064 100644 --- a/app/pipeline/chat/code_feedback_pipeline.py +++ b/app/pipeline/chat/code_feedback_pipeline.py @@ -144,7 +144,7 @@ def __call__( } ) ) - num_tokens = self.llm.tokens - num_tokens.pipeline = PipelineEnum.IRIS_CODE_FEEDBACK - self.tokens = num_tokens + token_usage = self.llm.tokens + token_usage.pipeline = PipelineEnum.IRIS_CODE_FEEDBACK + self.tokens = token_usage return response.replace("{", "{{").replace("}", "}}") diff --git a/app/pipeline/chat/exercise_chat_pipeline.py b/app/pipeline/chat/exercise_chat_pipeline.py index d2c0ca04..c82991c1 100644 --- a/app/pipeline/chat/exercise_chat_pipeline.py +++ b/app/pipeline/chat/exercise_chat_pipeline.py @@ -116,10 +116,14 @@ def __call__(self, dto: ExerciseChatPipelineExecutionDTO): suggestion_dto.last_message = self.exercise_chat_response suggestion_dto.problem_statement = dto.exercise.problem_statement suggestions = self.suggestion_pipeline(suggestion_dto) + if self.suggestion_pipeline.tokens is not None: + tokens = [self.suggestion_pipeline.tokens] + else: + tokens = [] self.callback.done( final_result=None, suggestions=suggestions, - tokens=[self.suggestion_pipeline.tokens], + tokens=tokens, ) else: # This should never happen but whatever @@ -264,9 +268,7 @@ def _run_exercise_chat_pipeline( .with_config({"run_name": "Response Drafting"}) .invoke({}) ) - if self.llm.tokens is not None: - self.llm.tokens.pipeline = PipelineEnum.IRIS_CHAT_EXERCISE_MESSAGE - self.tokens.append(self.llm.tokens) + self._collect_llm_tokens() self.callback.done() self.prompt = ChatPromptTemplate.from_messages( [ @@ -281,9 +283,7 @@ def _run_exercise_chat_pipeline( .with_config({"run_name": "Response Refining"}) .invoke({}) ) - if self.llm.tokens is not None: - self.llm.tokens.pipeline = PipelineEnum.IRIS_CHAT_EXERCISE_MESSAGE - self.tokens.append(self.llm.tokens) + self._collect_llm_tokens() if "!ok!" in guide_response: print("Response is ok and not rewritten!!!") @@ -385,3 +385,8 @@ def should_execute_lecture_pipeline(self, course_id: int) -> bool: ) return len(result.objects) > 0 return False + + def _collect_llm_tokens(self): + if self.llm.tokens is not None: + self.llm.tokens.pipeline = PipelineEnum.IRIS_CHAT_EXERCISE_MESSAGE + self.tokens.append(self.llm.tokens) diff --git a/app/pipeline/chat/lecture_chat_pipeline.py b/app/pipeline/chat/lecture_chat_pipeline.py index 3d7d1320..e3f4b07d 100644 --- a/app/pipeline/chat/lecture_chat_pipeline.py +++ b/app/pipeline/chat/lecture_chat_pipeline.py @@ -116,9 +116,9 @@ def __call__(self, dto: LectureChatPipelineExecutionDTO): self.prompt = ChatPromptTemplate.from_messages(prompt_val) try: response = (self.prompt | self.pipeline).invoke({}) - num_tokens = self.llm.tokens - num_tokens.pipeline = PipelineEnum.IRIS_CHAT_LECTURE_MESSAGE - self.tokens.append(num_tokens) + token_usage = self.llm.tokens + token_usage.pipeline = PipelineEnum.IRIS_CHAT_LECTURE_MESSAGE + self.tokens.append(token_usage) response_with_citation = self.citation_pipeline( retrieved_lecture_chunks, response ) diff --git a/app/pipeline/competency_extraction_pipeline.py b/app/pipeline/competency_extraction_pipeline.py index 68265a48..738fdcb7 100644 --- a/app/pipeline/competency_extraction_pipeline.py +++ b/app/pipeline/competency_extraction_pipeline.py @@ -79,13 +79,13 @@ def __call__( response = self.request_handler.chat( [prompt], CompletionArguments(temperature=0.4) ) - num_tokens = LLMTokenCount( + token_usage = LLMTokenCount( model_info=response.model_info, num_input_tokens=response.num_input_tokens, num_output_tokens=response.num_output_tokens, pipeline=PipelineEnum.IRIS_COMPETENCY_GENERATION, ) - self.tokens.append(num_tokens) + self.tokens.append(token_usage) response = response.contents[0].text_content generated_competencies: list[Competency] = [] diff --git a/app/pipeline/lecture_ingestion_pipeline.py b/app/pipeline/lecture_ingestion_pipeline.py index c51848fe..a896be37 100644 --- a/app/pipeline/lecture_ingestion_pipeline.py +++ b/app/pipeline/lecture_ingestion_pipeline.py @@ -6,7 +6,6 @@ import fitz from langchain_core.output_parsers import StrOutputParser from langchain_core.prompts import ChatPromptTemplate -from sipbuild.generator.parser.tokens import tokens from unstructured.cleaners.core import clean from weaviate import WeaviateClient from weaviate.classes.query import Filter @@ -280,9 +279,9 @@ def merge_page_content_and_image_interpretation( (prompt | self.pipeline).invoke({}), bullets=True, extra_whitespace=True ) # TODO: send to artemis - num_tokens = self.llm.tokens - num_tokens.pipeline = PipelineEnum.IRIS_LECTURE_INGESTION - tokens.append(num_tokens) + token_usage = self.llm.tokens + token_usage.pipeline = PipelineEnum.IRIS_LECTURE_INGESTION + self.tokens.append(token_usage) return clean_output def get_course_language(self, page_content: str) -> str: diff --git a/app/pipeline/pipeline.py b/app/pipeline/pipeline.py index 2359e4a7..338bb767 100644 --- a/app/pipeline/pipeline.py +++ b/app/pipeline/pipeline.py @@ -1,7 +1,7 @@ from abc import ABCMeta from typing import List -from app.llm.external import LLMTokenCount +from app.llm.external.LLMTokenCount import LLMTokenCount class Pipeline(metaclass=ABCMeta): diff --git a/app/pipeline/shared/reranker_pipeline.py b/app/pipeline/shared/reranker_pipeline.py index ed0a3f9c..e33c2606 100644 --- a/app/pipeline/shared/reranker_pipeline.py +++ b/app/pipeline/shared/reranker_pipeline.py @@ -110,7 +110,7 @@ def __call__( prompt = self.default_prompt response = (prompt | self.pipeline).invoke(data) - num_tokens = self.llm.tokens - num_tokens.pipeline = PipelineEnum.IRIS_RERANKER_PIPELINE - self.tokens.append(num_tokens) + token_usage = self.llm.tokens + token_usage.pipeline = PipelineEnum.IRIS_RERANKER_PIPELINE + self.tokens.append(token_usage) return response.selected_paragraphs diff --git a/app/retrieval/lecture_retrieval.py b/app/retrieval/lecture_retrieval.py index f75a139e..ef47b5ad 100644 --- a/app/retrieval/lecture_retrieval.py +++ b/app/retrieval/lecture_retrieval.py @@ -7,6 +7,7 @@ from ..common import convert_iris_message_to_langchain_message from ..llm.external.LLMTokenCount import LLMTokenCount +from ..llm.external.PipelineEnum import PipelineEnum from ..llm.langchain import IrisLangchainChatModel from ..pipeline import Pipeline @@ -82,7 +83,7 @@ class LectureRetrieval(Pipeline): Class for retrieving lecture data from the database. """ - tokens: LLMTokenCount + tokens: [LLMTokenCount] def __init__(self, client: WeaviateClient, **kwargs): super().__init__(implementation_id="lecture_retrieval_pipeline") @@ -101,6 +102,7 @@ def __init__(self, client: WeaviateClient, **kwargs): self.pipeline = self.llm | StrOutputParser() self.collection = init_lecture_schema(client) self.reranker_pipeline = RerankerPipeline() + self.tokens = [] @traceable(name="Full Lecture Retrieval") def __call__( @@ -239,7 +241,9 @@ def rewrite_student_query( prompt = ChatPromptTemplate.from_messages(prompt_val) try: response = (prompt | self.pipeline).invoke({}) - self.tokens = self.llm.tokens + token_usage = self.llm.tokens + token_usage.pipeline = PipelineEnum.IRIS_LECTURE_RETRIEVAL_PIPELINE + self.tokens.append(self.llm.tokens) logger.info(f"Response from exercise chat pipeline: {response}") return response except Exception as e: @@ -277,6 +281,9 @@ def rewrite_student_query_with_exercise_context( prompt = ChatPromptTemplate.from_messages(prompt_val) try: response = (prompt | self.pipeline).invoke({}) + token_usage = self.llm.tokens + token_usage.pipeline = PipelineEnum.IRIS_LECTURE_RETRIEVAL_PIPELINE + self.tokens.append(self.llm.tokens) logger.info(f"Response from exercise chat pipeline: {response}") return response except Exception as e: @@ -312,6 +319,9 @@ def rewrite_elaborated_query( prompt = ChatPromptTemplate.from_messages(prompt_val) try: response = (prompt | self.pipeline).invoke({}) + token_usage = self.llm.tokens + token_usage.pipeline = PipelineEnum.IRIS_LECTURE_RETRIEVAL_PIPELINE + self.tokens.append(self.llm.tokens) logger.info(f"Response from retirval pipeline: {response}") return response except Exception as e: @@ -351,6 +361,9 @@ def rewrite_elaborated_query_with_exercise_context( ) try: response = (prompt | self.pipeline).invoke({}) + token_usage = self.llm.tokens + token_usage.pipeline = PipelineEnum.IRIS_LECTURE_RETRIEVAL_PIPELINE + self.tokens.append(self.llm.tokens) logger.info(f"Response from exercise chat pipeline: {response}") return response except Exception as e: From 4c9290091ede58c677dfaa6bcfa7d2618f43b4a8 Mon Sep 17 00:00:00 2001 From: Alexander Joham Date: Sat, 12 Oct 2024 21:06:06 +0200 Subject: [PATCH 10/17] Update PyrisMessage to use only TokenUsageDTO, add token count for error --- app/{llm/external => common}/PipelineEnum.py | 1 + app/common/message_converters.py | 2 +- app/{domain => common}/pyris_message.py | 7 +--- app/common/token_usage_dto.py | 18 +++++++++ app/domain/__init__.py | 1 - .../chat_pipeline_execution_base_data_dto.py | 2 +- .../chat/chat_pipeline_execution_dto.py | 2 +- app/domain/chat/interaction_suggestion_dto.py | 2 +- app/domain/data/token_usage_dto.py | 12 ------ app/domain/status/status_update_dto.py | 2 +- app/llm/external/LLMTokenCount.py | 33 ----------------- app/llm/external/model.py | 2 +- app/llm/external/ollama.py | 14 ++++--- app/llm/external/openai_chat.py | 15 +++++--- .../langchain/iris_langchain_chat_model.py | 20 +++++----- .../request_handler/basic_request_handler.py | 2 +- .../capability_request_handler.py | 6 +-- .../request_handler_interface.py | 2 +- app/pipeline/chat/code_feedback_pipeline.py | 8 ++-- app/pipeline/chat/course_chat_pipeline.py | 13 ++++--- app/pipeline/chat/exercise_chat_pipeline.py | 37 ++++++++++++------- .../chat/interaction_suggestion_pipeline.py | 8 ++-- app/pipeline/chat/lecture_chat_pipeline.py | 8 ++-- .../competency_extraction_pipeline.py | 13 +------ app/pipeline/lecture_ingestion_pipeline.py | 23 +++++++----- app/pipeline/pipeline.py | 9 ++++- app/pipeline/shared/citation_pipeline.py | 6 +-- app/pipeline/shared/reranker_pipeline.py | 8 ++-- app/retrieval/lecture_retrieval.py | 8 ++-- app/web/status/status_update.py | 10 +++-- 30 files changed, 141 insertions(+), 153 deletions(-) rename app/{llm/external => common}/PipelineEnum.py (92%) rename app/{domain => common}/pyris_message.py (62%) create mode 100644 app/common/token_usage_dto.py delete mode 100644 app/domain/data/token_usage_dto.py delete mode 100644 app/llm/external/LLMTokenCount.py diff --git a/app/llm/external/PipelineEnum.py b/app/common/PipelineEnum.py similarity index 92% rename from app/llm/external/PipelineEnum.py rename to app/common/PipelineEnum.py index f568ecf6..3d8e101e 100644 --- a/app/llm/external/PipelineEnum.py +++ b/app/common/PipelineEnum.py @@ -12,4 +12,5 @@ class PipelineEnum(str, Enum): IRIS_RERANKER_PIPELINE = "IRIS_RERANKER_PIPELINE" IRIS_SUMMARY_PIPELINE = "IRIS_SUMMARY_PIPELINE" IRIS_LECTURE_RETRIEVAL_PIPELINE = "IRIS_LECTURE_RETRIEVAL_PIPELINE" + IRIS_LECTURE_INGESTION = "IRIS_LECTURE_INGESTION" NOT_SET = "NOT_SET" diff --git a/app/common/message_converters.py b/app/common/message_converters.py index 671dd565..d96886e5 100644 --- a/app/common/message_converters.py +++ b/app/common/message_converters.py @@ -4,7 +4,7 @@ from langchain_core.messages import BaseMessage, HumanMessage, AIMessage, SystemMessage from app.domain.data.text_message_content_dto import TextMessageContentDTO -from app.domain.pyris_message import PyrisMessage, IrisMessageRole +from app.common.pyris_message import PyrisMessage, IrisMessageRole def convert_iris_message_to_langchain_message( diff --git a/app/domain/pyris_message.py b/app/common/pyris_message.py similarity index 62% rename from app/domain/pyris_message.py rename to app/common/pyris_message.py index c0587327..f18e636a 100644 --- a/app/domain/pyris_message.py +++ b/app/common/pyris_message.py @@ -5,6 +5,7 @@ from pydantic import BaseModel, ConfigDict, Field from app.domain.data.message_content_dto import MessageContentDTO +from app.common.token_usage_dto import TokenUsageDTO class IrisMessageRole(str, Enum): @@ -16,11 +17,7 @@ class IrisMessageRole(str, Enum): class PyrisMessage(BaseModel): model_config = ConfigDict(populate_by_name=True) - num_input_tokens: int = Field(alias="numInputTokens", default=0) - cost_per_input_token: float = Field(alias="costPerInputToken", default=0) - num_output_tokens: int = Field(alias="numOutputTokens", default=0) - cost_per_output_token: float = Field(alias="costPerOutputToken", default=0) - model_info: str = Field(alias="modelInfo", default="") + token_usage: TokenUsageDTO = Field(default_factory=TokenUsageDTO) sent_at: datetime | None = Field(alias="sentAt", default=None) sender: IrisMessageRole diff --git a/app/common/token_usage_dto.py b/app/common/token_usage_dto.py new file mode 100644 index 00000000..9579c831 --- /dev/null +++ b/app/common/token_usage_dto.py @@ -0,0 +1,18 @@ +from pydantic import BaseModel, Field + +from app.common.PipelineEnum import PipelineEnum + + +class TokenUsageDTO(BaseModel): + model_info: str = Field(alias="modelInfo", default="") + num_input_tokens: int = Field(alias="numInputTokens", default=0) + cost_per_input_token: float = Field(alias="costPerInputToken", default=0) + num_output_tokens: int = Field(alias="numOutputTokens", default=0) + cost_per_output_token: float = Field(alias="costPerOutputToken", default=0) + pipeline: PipelineEnum = Field(default=PipelineEnum.NOT_SET) + + def __str__(self): + return ( + f"{self.model_info}: {self.num_input_tokens} input cost: {self.cost_per_input_token}," + f" {self.num_output_tokens} output cost: {self.cost_per_output_token}, pipeline: {self.pipeline} " + ) diff --git a/app/domain/__init__.py b/app/domain/__init__.py index 27fd881d..e7b03301 100644 --- a/app/domain/__init__.py +++ b/app/domain/__init__.py @@ -12,6 +12,5 @@ from app.domain.chat.course_chat.course_chat_pipeline_execution_dto import ( CourseChatPipelineExecutionDTO, ) -from .pyris_message import PyrisMessage, IrisMessageRole from app.domain.data import image_message_content_dto from app.domain.feature_dto import FeatureDTO diff --git a/app/domain/chat/chat_pipeline_execution_base_data_dto.py b/app/domain/chat/chat_pipeline_execution_base_data_dto.py index e0677c76..a9bfd8d2 100644 --- a/app/domain/chat/chat_pipeline_execution_base_data_dto.py +++ b/app/domain/chat/chat_pipeline_execution_base_data_dto.py @@ -3,7 +3,7 @@ from pydantic import Field, BaseModel from app.domain import PipelineExecutionSettingsDTO -from app.domain.pyris_message import PyrisMessage +from app.common.pyris_message import PyrisMessage from app.domain.data.user_dto import UserDTO from app.domain.status.stage_dto import StageDTO diff --git a/app/domain/chat/chat_pipeline_execution_dto.py b/app/domain/chat/chat_pipeline_execution_dto.py index e3e63284..a92c8332 100644 --- a/app/domain/chat/chat_pipeline_execution_dto.py +++ b/app/domain/chat/chat_pipeline_execution_dto.py @@ -3,7 +3,7 @@ from pydantic import Field from app.domain import PipelineExecutionDTO -from app.domain.pyris_message import PyrisMessage +from app.common.pyris_message import PyrisMessage from app.domain.data.user_dto import UserDTO diff --git a/app/domain/chat/interaction_suggestion_dto.py b/app/domain/chat/interaction_suggestion_dto.py index 3835ce81..e905f83f 100644 --- a/app/domain/chat/interaction_suggestion_dto.py +++ b/app/domain/chat/interaction_suggestion_dto.py @@ -2,7 +2,7 @@ from pydantic import Field, BaseModel -from app.domain import PyrisMessage +from app.common.pyris_message import PyrisMessage class InteractionSuggestionPipelineExecutionDTO(BaseModel): diff --git a/app/domain/data/token_usage_dto.py b/app/domain/data/token_usage_dto.py deleted file mode 100644 index 95221ef9..00000000 --- a/app/domain/data/token_usage_dto.py +++ /dev/null @@ -1,12 +0,0 @@ -from pydantic import BaseModel - -from app.llm.external.PipelineEnum import PipelineEnum - - -class TokenUsageDTO(BaseModel): - model_info: str - num_input_tokens: int - cost_per_input_token: float - num_output_tokens: int - cost_per_output_token: float - pipeline: PipelineEnum diff --git a/app/domain/status/status_update_dto.py b/app/domain/status/status_update_dto.py index 3dfa140b..80848a21 100644 --- a/app/domain/status/status_update_dto.py +++ b/app/domain/status/status_update_dto.py @@ -2,7 +2,7 @@ from pydantic import BaseModel -from ..data.token_usage_dto import TokenUsageDTO +from app.common.token_usage_dto import TokenUsageDTO from ...domain.status.stage_dto import StageDTO diff --git a/app/llm/external/LLMTokenCount.py b/app/llm/external/LLMTokenCount.py deleted file mode 100644 index 8b300d3f..00000000 --- a/app/llm/external/LLMTokenCount.py +++ /dev/null @@ -1,33 +0,0 @@ -from app.llm.external.PipelineEnum import PipelineEnum - - -class LLMTokenCount: - - model_info: str - num_input_tokens: int - cost_per_input_token: float - num_output_tokens: int - cost_per_output_token: float - pipeline: PipelineEnum - - def __init__( - self, - model_info: str, - num_input_tokens: int, - cost_per_input_token: float, - num_output_tokens: int, - cost_per_output_token: float, - pipeline: PipelineEnum, - ): - self.model_info = model_info - self.num_input_tokens = num_input_tokens - self.cost_per_input_token = cost_per_input_token - self.num_output_tokens = num_output_tokens - self.cost_per_output_token = cost_per_output_token - self.pipeline = pipeline - - def __str__(self): - return ( - f"{self.model_info}: {self.num_input_tokens} in, {self.cost_per_input_token} cost in," - f" {self.num_output_tokens} out, {self.cost_per_output_token} cost out, {self.pipeline} pipeline" - ) diff --git a/app/llm/external/model.py b/app/llm/external/model.py index 47b90962..3fba9e6f 100644 --- a/app/llm/external/model.py +++ b/app/llm/external/model.py @@ -1,7 +1,7 @@ from abc import ABCMeta, abstractmethod from pydantic import BaseModel -from ...domain import PyrisMessage +from ...common.pyris_message import PyrisMessage from ...llm import CompletionArguments from ...llm.capability import CapabilityList diff --git a/app/llm/external/ollama.py b/app/llm/external/ollama.py index b441ef2e..4a29744e 100644 --- a/app/llm/external/ollama.py +++ b/app/llm/external/ollama.py @@ -6,10 +6,11 @@ from ollama import Client, Message from ...common.message_converters import map_role_to_str, map_str_to_role +from ...common.pyris_message import PyrisMessage +from ...common.token_usage_dto import TokenUsageDTO from ...domain.data.json_message_content_dto import JsonMessageContentDTO from ...domain.data.text_message_content_dto import TextMessageContentDTO from ...domain.data.image_message_content_dto import ImageMessageContentDTO -from ...domain import PyrisMessage from ...llm import CompletionArguments from ...llm.external.model import ChatModel, CompletionModel, EmbeddingModel @@ -64,13 +65,16 @@ def convert_to_iris_message( Convert a Message to a PyrisMessage """ contents = [TextMessageContentDTO(text_content=message["content"])] + tokens = TokenUsageDTO( + numInputTokens=num_input_tokens, + numOutputTokens=num_output_tokens, + modelInfo=model, + ) return PyrisMessage( sender=map_str_to_role(message["role"]), contents=contents, - send_at=datetime.now(), - num_input_tokens=num_input_tokens, - num_output_tokens=num_output_tokens, - model_info=model, + sentAt=datetime.now(), + token_usage=tokens, ) diff --git a/app/llm/external/openai_chat.py b/app/llm/external/openai_chat.py index 99d6c4b6..9ec9d0d1 100644 --- a/app/llm/external/openai_chat.py +++ b/app/llm/external/openai_chat.py @@ -12,7 +12,8 @@ from ...common.message_converters import map_str_to_role, map_role_to_str from app.domain.data.text_message_content_dto import TextMessageContentDTO -from ...domain import PyrisMessage +from ...common.pyris_message import PyrisMessage +from ...common.token_usage_dto import TokenUsageDTO from ...domain.data.image_message_content_dto import ImageMessageContentDTO from ...domain.data.json_message_content_dto import JsonMessageContentDTO from ...llm import CompletionArguments @@ -71,13 +72,17 @@ def convert_to_iris_message( num_input_tokens = getattr(usage, "prompt_tokens", -1) num_output_tokens = getattr(usage, "completion_tokens", -1) + tokens = TokenUsageDTO( + modelInfo=model, + numInputTokens=num_input_tokens, + numOutputTokens=num_output_tokens, + ) + message = PyrisMessage( sender=map_str_to_role(message.role), contents=[TextMessageContentDTO(textContent=message.content)], - send_at=datetime.now(), - num_input_tokens=num_input_tokens, - num_output_tokens=num_output_tokens, - model_info=model, + sentAt=datetime.now(), + token_usage=tokens, ) return message diff --git a/app/llm/langchain/iris_langchain_chat_model.py b/app/llm/langchain/iris_langchain_chat_model.py index 29065353..94f41d5d 100644 --- a/app/llm/langchain/iris_langchain_chat_model.py +++ b/app/llm/langchain/iris_langchain_chat_model.py @@ -1,3 +1,4 @@ +import logging from typing import List, Optional, Any from langchain_core.callbacks import CallbackManagerForLLMRun @@ -7,12 +8,12 @@ from langchain_core.messages import BaseMessage from langchain_core.outputs import ChatResult, ChatGeneration -from ..external.LLMTokenCount import LLMTokenCount -from ..external.PipelineEnum import PipelineEnum +from app.common.PipelineEnum import PipelineEnum from ...common import ( convert_iris_message_to_langchain_message, convert_langchain_message_to_iris_message, ) +from app.common.token_usage_dto import TokenUsageDTO from ...llm import RequestHandler, CompletionArguments @@ -21,7 +22,8 @@ class IrisLangchainChatModel(BaseChatModel): request_handler: RequestHandler completion_args: CompletionArguments - tokens: LLMTokenCount = None + tokens: TokenUsageDTO = None + logger = logging.getLogger(__name__) def __init__( self, @@ -45,12 +47,12 @@ def _generate( iris_message = self.request_handler.chat(iris_messages, self.completion_args) base_message = convert_iris_message_to_langchain_message(iris_message) chat_generation = ChatGeneration(message=base_message) - self.tokens = LLMTokenCount( - model_info=iris_message.model_info, - num_input_tokens=iris_message.num_input_tokens, - cost_per_input_token=iris_message.cost_per_input_token, - num_output_tokens=iris_message.num_output_tokens, - cost_per_output_token=iris_message.cost_per_output_token, + self.tokens = TokenUsageDTO( + modelInfo=iris_message.token_usage.model_info, + numInputTokens=iris_message.token_usage.num_input_tokens, + costPerInputToken=iris_message.token_usage.cost_per_input_token, + numOutputTokens=iris_message.token_usage.num_output_tokens, + costPerOutputToken=iris_message.token_usage.cost_per_output_token, pipeline=PipelineEnum.NOT_SET, ) return ChatResult(generations=[chat_generation]) diff --git a/app/llm/request_handler/basic_request_handler.py b/app/llm/request_handler/basic_request_handler.py index 5756346f..1342a71c 100644 --- a/app/llm/request_handler/basic_request_handler.py +++ b/app/llm/request_handler/basic_request_handler.py @@ -1,6 +1,6 @@ from typing import Optional -from app.domain import PyrisMessage +from app.common.pyris_message import PyrisMessage from app.domain.data.image_message_content_dto import ImageMessageContentDTO from app.llm.request_handler import RequestHandler from app.llm.completion_arguments import CompletionArguments diff --git a/app/llm/request_handler/capability_request_handler.py b/app/llm/request_handler/capability_request_handler.py index ebb3c12a..97d6a36f 100644 --- a/app/llm/request_handler/capability_request_handler.py +++ b/app/llm/request_handler/capability_request_handler.py @@ -1,6 +1,6 @@ from enum import Enum -from app.domain import PyrisMessage +from app.common.pyris_message import PyrisMessage from app.llm.capability import RequirementList from app.llm.external.model import ( ChatModel, @@ -45,8 +45,8 @@ def chat( ) -> PyrisMessage: llm = self._select_model(ChatModel) message = llm.chat(messages, arguments) - message.cost_per_input_token = llm.capabilities.input_cost.value - message.cost_per_output_token = llm.capabilities.output_cost.value + message.token_usage.cost_per_input_token = llm.capabilities.input_cost.value + message.token_usage.cost_per_output_token = llm.capabilities.output_cost.value return message def embed(self, text: str) -> list[float]: diff --git a/app/llm/request_handler/request_handler_interface.py b/app/llm/request_handler/request_handler_interface.py index 390a4cbc..89dccedb 100644 --- a/app/llm/request_handler/request_handler_interface.py +++ b/app/llm/request_handler/request_handler_interface.py @@ -1,7 +1,7 @@ from abc import ABCMeta, abstractmethod from typing import Optional -from ...domain import PyrisMessage +from ...common.pyris_message import PyrisMessage from ...domain.data.image_message_content_dto import ImageMessageContentDTO from ...llm import CompletionArguments diff --git a/app/pipeline/chat/code_feedback_pipeline.py b/app/pipeline/chat/code_feedback_pipeline.py index 039a0064..90c27ecc 100644 --- a/app/pipeline/chat/code_feedback_pipeline.py +++ b/app/pipeline/chat/code_feedback_pipeline.py @@ -8,13 +8,13 @@ from langsmith import traceable from pydantic import BaseModel -from ...domain import PyrisMessage +from ...common.pyris_message import PyrisMessage from ...domain.data.build_log_entry import BuildLogEntryDTO from ...domain.data.feedback_dto import FeedbackDTO +from app.common.token_usage_dto import TokenUsageDTO from ...llm import CapabilityRequestHandler, RequirementList from ...llm import CompletionArguments -from ...llm.external.LLMTokenCount import LLMTokenCount -from ...llm.external.PipelineEnum import PipelineEnum +from app.common.PipelineEnum import PipelineEnum from ...llm.langchain import IrisLangchainChatModel from ...pipeline import Pipeline from ...web.status.status_update import StatusCallback @@ -42,7 +42,7 @@ class CodeFeedbackPipeline(Pipeline): callback: StatusCallback default_prompt: PromptTemplate output_parser: StrOutputParser - tokens: LLMTokenCount + tokens: TokenUsageDTO def __init__(self, callback: Optional[StatusCallback] = None): super().__init__(implementation_id="code_feedback_pipeline_reference_impl") diff --git a/app/pipeline/chat/course_chat_pipeline.py b/app/pipeline/chat/course_chat_pipeline.py index 84e9eed2..b21616d0 100644 --- a/app/pipeline/chat/course_chat_pipeline.py +++ b/app/pipeline/chat/course_chat_pipeline.py @@ -22,7 +22,7 @@ from .lecture_chat_pipeline import LectureChatPipeline from ..shared.citation_pipeline import CitationPipeline from ...common import convert_iris_message_to_langchain_message -from ...domain import PyrisMessage +from ...common.pyris_message import PyrisMessage from ...llm import CapabilityRequestHandler, RequirementList from ..prompts.iris_course_chat_prompts import ( tell_iris_initial_system_prompt, @@ -41,7 +41,7 @@ elicit_begin_agent_jol_prompt, ) from ...domain import CourseChatPipelineExecutionDTO -from ...llm.external.PipelineEnum import PipelineEnum +from app.common.PipelineEnum import PipelineEnum from ...retrieval.lecture_retrieval import LectureRetrieval from ...vector_database.database import VectorDatabase from ...vector_database.lecture_schema import LectureSchema @@ -408,9 +408,9 @@ def lecture_content_retrieval() -> str: self.callback.in_progress() for step in agent_executor.iter(params): print("STEP:", step) - token_count = self.llm.tokens - token_count.pipeline = PipelineEnum.IRIS_CHAT_COURSE_MESSAGE - self.tokens.append(token_count) + self._append_tokens( + self.llm.tokens, PipelineEnum.IRIS_CHAT_COURSE_MESSAGE + ) if step.get("output", None): out = step["output"] @@ -446,7 +446,8 @@ def lecture_content_retrieval() -> str: ) traceback.print_exc() self.callback.error( - "An error occurred while running the course chat pipeline." + "An error occurred while running the course chat pipeline.", + tokens=self.tokens, ) def should_allow_lecture_tool(self, course_id: int) -> bool: diff --git a/app/pipeline/chat/exercise_chat_pipeline.py b/app/pipeline/chat/exercise_chat_pipeline.py index c82991c1..ea6f0536 100644 --- a/app/pipeline/chat/exercise_chat_pipeline.py +++ b/app/pipeline/chat/exercise_chat_pipeline.py @@ -24,8 +24,8 @@ from ..shared.citation_pipeline import CitationPipeline from ..shared.reranker_pipeline import RerankerPipeline from ...common import convert_iris_message_to_langchain_message +from ...common.pyris_message import PyrisMessage from ...domain import ExerciseChatPipelineExecutionDTO -from ...domain import PyrisMessage from ...domain.chat.interaction_suggestion_dto import ( InteractionSuggestionPipelineExecutionDTO, ) @@ -34,7 +34,7 @@ from ...domain.data.programming_submission_dto import ProgrammingSubmissionDTO from ...llm import CapabilityRequestHandler, RequirementList from ...llm import CompletionArguments -from ...llm.external.PipelineEnum import PipelineEnum +from app.common.PipelineEnum import PipelineEnum from ...llm.langchain import IrisLangchainChatModel from ...retrieval.lecture_retrieval import LectureRetrieval from ...vector_database.database import VectorDatabase @@ -137,11 +137,15 @@ def __call__(self, dto: ExerciseChatPipelineExecutionDTO): ) traceback.print_exc() self.callback.error( - "Generating interaction suggestions failed.", exception=e + "Generating interaction suggestions failed.", + exception=e, + tokens=self.tokens, ) except Exception as e: traceback.print_exc() - self.callback.error(f"Failed to generate response: {e}", exception=e) + self.callback.error( + f"Failed to generate response: {e}", exception=e, tokens=self.tokens + ) def _run_exercise_chat_pipeline( self, @@ -226,7 +230,9 @@ def _run_exercise_chat_pipeline( ) except Exception as e: self.callback.error( - f"Failed to look up files in the repository: {e}", exception=e + f"Failed to look up files in the repository: {e}", + exception=e, + tokens=self.tokens, ) return @@ -242,7 +248,9 @@ def _run_exercise_chat_pipeline( ) except Exception as e: self.callback.error( - f"Failed to retrieve lecture chunks: {e}", exception=e + f"Failed to retrieve lecture chunks: {e}", + exception=e, + tokens=self.tokens, ) return @@ -268,7 +276,9 @@ def _run_exercise_chat_pipeline( .with_config({"run_name": "Response Drafting"}) .invoke({}) ) - self._collect_llm_tokens() + self._append_tokens( + self.llm.tokens, PipelineEnum.IRIS_CHAT_EXERCISE_MESSAGE + ) self.callback.done() self.prompt = ChatPromptTemplate.from_messages( [ @@ -283,7 +293,9 @@ def _run_exercise_chat_pipeline( .with_config({"run_name": "Response Refining"}) .invoke({}) ) - self._collect_llm_tokens() + self._append_tokens( + self.llm.tokens, PipelineEnum.IRIS_CHAT_EXERCISE_MESSAGE + ) if "!ok!" in guide_response: print("Response is ok and not rewritten!!!") @@ -292,7 +304,9 @@ def _run_exercise_chat_pipeline( print("Response is rewritten.") self.exercise_chat_response = guide_response except Exception as e: - self.callback.error(f"Failed to create response: {e}", exception=e) + self.callback.error( + f"Failed to create response: {e}", exception=e, tokens=self.tokens + ) # print stack trace traceback.print_exc() return "Failed to generate response" @@ -385,8 +399,3 @@ def should_execute_lecture_pipeline(self, course_id: int) -> bool: ) return len(result.objects) > 0 return False - - def _collect_llm_tokens(self): - if self.llm.tokens is not None: - self.llm.tokens.pipeline = PipelineEnum.IRIS_CHAT_EXERCISE_MESSAGE - self.tokens.append(self.llm.tokens) diff --git a/app/pipeline/chat/interaction_suggestion_pipeline.py b/app/pipeline/chat/interaction_suggestion_pipeline.py index 4a50795e..620728de 100644 --- a/app/pipeline/chat/interaction_suggestion_pipeline.py +++ b/app/pipeline/chat/interaction_suggestion_pipeline.py @@ -13,10 +13,11 @@ from pydantic.v1 import Field, BaseModel from ...common import convert_iris_message_to_langchain_message -from ...domain import PyrisMessage from app.domain.chat.interaction_suggestion_dto import ( InteractionSuggestionPipelineExecutionDTO, ) +from app.common.token_usage_dto import TokenUsageDTO +from ...common.pyris_message import PyrisMessage from ...llm import CapabilityRequestHandler, RequirementList from ..prompts.iris_interaction_suggestion_prompts import ( course_chat_begin_prompt, @@ -34,8 +35,7 @@ ) from ...llm import CompletionArguments -from ...llm.external.LLMTokenCount import LLMTokenCount -from ...llm.external.PipelineEnum import PipelineEnum +from app.common.PipelineEnum import PipelineEnum from ...llm.langchain import IrisLangchainChatModel from ..pipeline import Pipeline @@ -54,7 +54,7 @@ class InteractionSuggestionPipeline(Pipeline): pipeline: Runnable prompt: ChatPromptTemplate variant: str - tokens: LLMTokenCount + tokens: TokenUsageDTO def __init__(self, variant: str = "default"): super().__init__(implementation_id="interaction_suggestion_pipeline") diff --git a/app/pipeline/chat/lecture_chat_pipeline.py b/app/pipeline/chat/lecture_chat_pipeline.py index e3f4b07d..22eb8c7a 100644 --- a/app/pipeline/chat/lecture_chat_pipeline.py +++ b/app/pipeline/chat/lecture_chat_pipeline.py @@ -11,12 +11,12 @@ from ..shared.citation_pipeline import CitationPipeline from ...common import convert_iris_message_to_langchain_message -from ...domain import PyrisMessage +from ...common.pyris_message import PyrisMessage from ...domain.chat.lecture_chat.lecture_chat_pipeline_execution_dto import ( LectureChatPipelineExecutionDTO, ) from ...llm import CapabilityRequestHandler, RequirementList -from ...llm.external.PipelineEnum import PipelineEnum +from app.common.PipelineEnum import PipelineEnum from ...retrieval.lecture_retrieval import LectureRetrieval from ...vector_database.database import VectorDatabase from ...vector_database.lecture_schema import LectureSchema @@ -116,9 +116,7 @@ def __call__(self, dto: LectureChatPipelineExecutionDTO): self.prompt = ChatPromptTemplate.from_messages(prompt_val) try: response = (self.prompt | self.pipeline).invoke({}) - token_usage = self.llm.tokens - token_usage.pipeline = PipelineEnum.IRIS_CHAT_LECTURE_MESSAGE - self.tokens.append(token_usage) + self._append_tokens(self.llm.tokens, PipelineEnum.IRIS_CHAT_LECTURE_MESSAGE) response_with_citation = self.citation_pipeline( retrieved_lecture_chunks, response ) diff --git a/app/pipeline/competency_extraction_pipeline.py b/app/pipeline/competency_extraction_pipeline.py index 738fdcb7..bcc2846f 100644 --- a/app/pipeline/competency_extraction_pipeline.py +++ b/app/pipeline/competency_extraction_pipeline.py @@ -6,16 +6,13 @@ ChatPromptTemplate, ) +from app.common.pyris_message import PyrisMessage, IrisMessageRole from app.domain import ( CompetencyExtractionPipelineExecutionDTO, - PyrisMessage, - IrisMessageRole, ) from app.domain.data.text_message_content_dto import TextMessageContentDTO from app.domain.data.competency_dto import Competency from app.llm import CapabilityRequestHandler, RequirementList, CompletionArguments -from app.llm.external.LLMTokenCount import LLMTokenCount -from app.llm.external.PipelineEnum import PipelineEnum from app.pipeline import Pipeline from app.web.status.status_update import CompetencyExtractionCallback from app.pipeline.prompts.competency_extraction import system_prompt @@ -79,13 +76,7 @@ def __call__( response = self.request_handler.chat( [prompt], CompletionArguments(temperature=0.4) ) - token_usage = LLMTokenCount( - model_info=response.model_info, - num_input_tokens=response.num_input_tokens, - num_output_tokens=response.num_output_tokens, - pipeline=PipelineEnum.IRIS_COMPETENCY_GENERATION, - ) - self.tokens.append(token_usage) + self.tokens.append(response.token_usage) response = response.contents[0].text_content generated_competencies: list[Competency] = [] diff --git a/app/pipeline/lecture_ingestion_pipeline.py b/app/pipeline/lecture_ingestion_pipeline.py index a896be37..73d6371b 100644 --- a/app/pipeline/lecture_ingestion_pipeline.py +++ b/app/pipeline/lecture_ingestion_pipeline.py @@ -10,7 +10,7 @@ from weaviate import WeaviateClient from weaviate.classes.query import Filter from . import Pipeline -from ..domain import IrisMessageRole, PyrisMessage +from ..common.pyris_message import PyrisMessage, IrisMessageRole from ..domain.data.image_message_content_dto import ImageMessageContentDTO from ..domain.data.lecture_unit_dto import LectureUnitDTO @@ -18,7 +18,7 @@ IngestionPipelineExecutionDto, ) from ..domain.data.text_message_content_dto import TextMessageContentDTO -from ..llm.external.PipelineEnum import PipelineEnum +from app.common.PipelineEnum import PipelineEnum from ..llm.langchain import IrisLangchainChatModel from ..vector_database.lecture_schema import init_lecture_schema, LectureSchema from ..ingestion.abstract_ingestion import AbstractIngestion @@ -141,7 +141,7 @@ def __call__(self) -> bool: self.callback.done("Lecture Chunking and interpretation Finished") self.callback.in_progress("Ingesting lecture chunks into database...") self.batch_update(chunks) - self.callback.done("Lecture Ingestion Finished") + self.callback.done("Lecture Ingestion Finished", tokens=self.tokens) logger.info( f"Lecture ingestion pipeline finished Successfully for course " f"{self.dto.lecture_units[0].course_name}" @@ -150,7 +150,9 @@ def __call__(self) -> bool: except Exception as e: logger.error(f"Error updating lecture unit: {e}") self.callback.error( - f"Failed to ingest lectures into the database: {e}", exception=e + f"Failed to ingest lectures into the database: {e}", + exception=e, + tokens=self.tokens, ) return False @@ -172,7 +174,9 @@ def batch_update(self, chunks): except Exception as e: logger.error(f"Error updating lecture unit: {e}") self.callback.error( - f"Failed to ingest lectures into the database: {e}", exception=e + f"Failed to ingest lectures into the database: {e}", + exception=e, + tokens=self.tokens, ) def chunk_data( @@ -247,6 +251,9 @@ def interpret_image( response = self.llm_vision.chat( [iris_message], CompletionArguments(temperature=0, max_tokens=512) ) + self._append_tokens( + response.token_usage, PipelineEnum.IRIS_LECTURE_INGESTION + ) except Exception as e: logger.error(f"Error interpreting image: {e}") return None @@ -278,10 +285,7 @@ def merge_page_content_and_image_interpretation( clean_output = clean( (prompt | self.pipeline).invoke({}), bullets=True, extra_whitespace=True ) - # TODO: send to artemis - token_usage = self.llm.tokens - token_usage.pipeline = PipelineEnum.IRIS_LECTURE_INGESTION - self.tokens.append(token_usage) + self._append_tokens(self.llm.tokens, PipelineEnum.IRIS_LECTURE_INGESTION) return clean_output def get_course_language(self, page_content: str) -> str: @@ -299,6 +303,7 @@ def get_course_language(self, page_content: str) -> str: response = self.llm_chat.chat( [iris_message], CompletionArguments(temperature=0, max_tokens=20) ) + self._append_tokens(response.token_usage, PipelineEnum.IRIS_LECTURE_INGESTION) return response.contents[0].text_content def delete_old_lectures(self): diff --git a/app/pipeline/pipeline.py b/app/pipeline/pipeline.py index 338bb767..428dcf62 100644 --- a/app/pipeline/pipeline.py +++ b/app/pipeline/pipeline.py @@ -1,14 +1,15 @@ from abc import ABCMeta from typing import List -from app.llm.external.LLMTokenCount import LLMTokenCount +from app.common.token_usage_dto import TokenUsageDTO +from app.common.PipelineEnum import PipelineEnum class Pipeline(metaclass=ABCMeta): """Abstract class for all pipelines""" implementation_id: str - tokens: List[LLMTokenCount] + tokens: List[TokenUsageDTO] def __init__(self, implementation_id=None, **kwargs): self.implementation_id = implementation_id @@ -31,3 +32,7 @@ def __init_subclass__(cls, **kwargs): raise NotImplementedError( "Subclasses of Pipeline interface must implement the __call__ method." ) + + def _append_tokens(self, tokens: TokenUsageDTO, pipeline: PipelineEnum) -> None: + tokens.pipeline = pipeline + self.tokens.append(tokens) diff --git a/app/pipeline/shared/citation_pipeline.py b/app/pipeline/shared/citation_pipeline.py index 2c7d84a7..b630bd4d 100644 --- a/app/pipeline/shared/citation_pipeline.py +++ b/app/pipeline/shared/citation_pipeline.py @@ -7,7 +7,7 @@ from langchain_core.runnables import Runnable from app.llm import CapabilityRequestHandler, RequirementList, CompletionArguments -from app.llm.external.PipelineEnum import PipelineEnum +from app.common.PipelineEnum import PipelineEnum from app.llm.langchain import IrisLangchainChatModel from app.pipeline import Pipeline @@ -85,9 +85,7 @@ def __call__( response = (self.default_prompt | self.pipeline).invoke( {"Answer": answer, "Paragraphs": paras} ) - token_count = self.llm.tokens - token_count.pipeline = PipelineEnum.IRIS_CITATION_PIPELINE - self.tokens.append(token_count) + self._append_tokens(self.llm.tokens, PipelineEnum.IRIS_CITATION_PIPELINE) if response == "!NONE!": return answer print(response) diff --git a/app/pipeline/shared/reranker_pipeline.py b/app/pipeline/shared/reranker_pipeline.py index e33c2606..f915be32 100644 --- a/app/pipeline/shared/reranker_pipeline.py +++ b/app/pipeline/shared/reranker_pipeline.py @@ -7,9 +7,9 @@ from langchain_core.runnables import Runnable from langsmith import traceable -from app.domain import PyrisMessage +from app.common.pyris_message import PyrisMessage from app.llm import CapabilityRequestHandler, RequirementList, CompletionArguments -from app.llm.external.PipelineEnum import PipelineEnum +from app.common.PipelineEnum import PipelineEnum from app.llm.langchain import IrisLangchainChatModel from app.pipeline import Pipeline from app.pipeline.chat.output_models.output_models.selected_paragraphs import ( @@ -110,7 +110,5 @@ def __call__( prompt = self.default_prompt response = (prompt | self.pipeline).invoke(data) - token_usage = self.llm.tokens - token_usage.pipeline = PipelineEnum.IRIS_RERANKER_PIPELINE - self.tokens.append(token_usage) + self._append_tokens(self.llm.tokens, PipelineEnum.IRIS_RERANKER_PIPELINE) return response.selected_paragraphs diff --git a/app/retrieval/lecture_retrieval.py b/app/retrieval/lecture_retrieval.py index ef47b5ad..5cfbb3c7 100644 --- a/app/retrieval/lecture_retrieval.py +++ b/app/retrieval/lecture_retrieval.py @@ -6,12 +6,12 @@ from weaviate.classes.query import Filter from ..common import convert_iris_message_to_langchain_message -from ..llm.external.LLMTokenCount import LLMTokenCount -from ..llm.external.PipelineEnum import PipelineEnum +from app.common.token_usage_dto import TokenUsageDTO +from app.common.PipelineEnum import PipelineEnum +from ..common.pyris_message import PyrisMessage from ..llm.langchain import IrisLangchainChatModel from ..pipeline import Pipeline -from app.domain import PyrisMessage from app.llm import ( BasicRequestHandler, CompletionArguments, @@ -83,7 +83,7 @@ class LectureRetrieval(Pipeline): Class for retrieving lecture data from the database. """ - tokens: [LLMTokenCount] + tokens: [TokenUsageDTO] def __init__(self, client: WeaviateClient, **kwargs): super().__init__(implementation_id="lecture_retrieval_pipeline") diff --git a/app/web/status/status_update.py b/app/web/status/status_update.py index a8687862..199b39d1 100644 --- a/app/web/status/status_update.py +++ b/app/web/status/status_update.py @@ -5,6 +5,7 @@ import requests from abc import ABC +from app.common.token_usage_dto import TokenUsageDTO from ...domain.status.competency_extraction_status_update_dto import ( CompetencyExtractionStatusUpdateDTO, ) @@ -19,8 +20,6 @@ from ...domain.status.status_update_dto import StatusUpdateDTO import logging -from ...llm.external.LLMTokenCount import LLMTokenCount - logger = logging.getLogger(__name__) @@ -98,7 +97,7 @@ def done( message: Optional[str] = None, final_result: Optional[str] = None, suggestions: Optional[List[str]] = None, - tokens: Optional[List[LLMTokenCount]] = None, + tokens: Optional[List[TokenUsageDTO]] = None, next_stage_message: Optional[str] = None, start_next_stage: bool = True, ): @@ -122,7 +121,9 @@ def done( self.stage.state = StageStateEnum.IN_PROGRESS self.on_status_update() - def error(self, message: str, exception=None): + def error( + self, message: str, exception=None, tokens: Optional[List[TokenUsageDTO]] = None + ): """ Transition the current stage to ERROR and update the status. Set all later stages to SKIPPED if an error occurs. @@ -130,6 +131,7 @@ def error(self, message: str, exception=None): self.stage.state = StageStateEnum.ERROR self.stage.message = message self.status.result = None + self.status.tokens = tokens or self.status.tokens # Set all subsequent stages to SKIPPED if an error occurs rest_of_index = ( self.current_stage_index + 1 From 6bd4b33ac5f6e6ef4dea26755b03c382f823030f Mon Sep 17 00:00:00 2001 From: Alexander Joham Date: Sat, 12 Oct 2024 21:45:14 +0200 Subject: [PATCH 11/17] Fix competency extraction did not save Enum --- app/pipeline/competency_extraction_pipeline.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/app/pipeline/competency_extraction_pipeline.py b/app/pipeline/competency_extraction_pipeline.py index bcc2846f..12efb65f 100644 --- a/app/pipeline/competency_extraction_pipeline.py +++ b/app/pipeline/competency_extraction_pipeline.py @@ -6,6 +6,7 @@ ChatPromptTemplate, ) +from app.common.PipelineEnum import PipelineEnum from app.common.pyris_message import PyrisMessage, IrisMessageRole from app.domain import ( CompetencyExtractionPipelineExecutionDTO, @@ -76,7 +77,9 @@ def __call__( response = self.request_handler.chat( [prompt], CompletionArguments(temperature=0.4) ) - self.tokens.append(response.token_usage) + self._append_tokens( + response.token_usage, PipelineEnum.IRIS_COMPETENCY_GENERATION + ) response = response.contents[0].text_content generated_competencies: list[Competency] = [] From 4d61c85c9731b41db5fc1103c3f094aadd941609 Mon Sep 17 00:00:00 2001 From: Alexander Joham Date: Tue, 15 Oct 2024 15:32:32 +0200 Subject: [PATCH 12/17] Update code after merge --- app/domain/text_exercise_chat_pipeline_execution_dto.py | 3 ++- app/llm/external/openai_chat.py | 4 +++- app/pipeline/text_exercise_chat_pipeline.py | 2 +- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/app/domain/text_exercise_chat_pipeline_execution_dto.py b/app/domain/text_exercise_chat_pipeline_execution_dto.py index 65e8871c..ed77892c 100644 --- a/app/domain/text_exercise_chat_pipeline_execution_dto.py +++ b/app/domain/text_exercise_chat_pipeline_execution_dto.py @@ -1,6 +1,7 @@ from pydantic import BaseModel, Field -from app.domain import PipelineExecutionDTO, PyrisMessage +from app.common.pyris_message import PyrisMessage +from app.domain import PipelineExecutionDTO from app.domain.data.text_exercise_dto import TextExerciseDTO diff --git a/app/llm/external/openai_chat.py b/app/llm/external/openai_chat.py index 8688149d..005c2dd7 100644 --- a/app/llm/external/openai_chat.py +++ b/app/llm/external/openai_chat.py @@ -128,13 +128,15 @@ def chat( max_tokens=arguments.max_tokens, ) choice = response.choices[0] + usage = response.usage + model = response.model if choice.finish_reason == "content_filter": # I figured that an openai error would be automatically raised if the content filter activated, # but it seems that that is not the case. # We don't want to retry because the same message will likely be rejected again. # Raise an exception to trigger the global error handler and report a fatal error to the client. raise ContentFilterFinishReasonError() - return convert_to_iris_message(choice.message) + return convert_to_iris_message(choice.message, usage, model) except ( APIError, APITimeoutError, diff --git a/app/pipeline/text_exercise_chat_pipeline.py b/app/pipeline/text_exercise_chat_pipeline.py index 5d27fc71..9bcf2431 100644 --- a/app/pipeline/text_exercise_chat_pipeline.py +++ b/app/pipeline/text_exercise_chat_pipeline.py @@ -2,9 +2,9 @@ from datetime import datetime from typing import Optional, List, Tuple +from app.common.pyris_message import PyrisMessage, IrisMessageRole from app.llm import CapabilityRequestHandler, RequirementList, CompletionArguments from app.pipeline import Pipeline -from app.domain import PyrisMessage, IrisMessageRole from app.domain.text_exercise_chat_pipeline_execution_dto import ( TextExerciseChatPipelineExecutionDTO, ) From 3253c46b097fb7a740079fc6b354eb97da7d30a0 Mon Sep 17 00:00:00 2001 From: Alexander Joham Date: Wed, 16 Oct 2024 11:18:02 +0200 Subject: [PATCH 13/17] Make -1 default value if no tokens have been received --- app/llm/external/ollama.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/app/llm/external/ollama.py b/app/llm/external/ollama.py index 4a29744e..146ed82a 100644 --- a/app/llm/external/ollama.py +++ b/app/llm/external/ollama.py @@ -119,8 +119,8 @@ def chat( ) return convert_to_iris_message( response.get("message"), - response.get("prompt_eval_count", 0), - response.get("eval_count", 0), + response.get("prompt_eval_count", -1), + response.get("eval_count", -1), response.get("model", self.model), ) From 9fe9e0a66783f7dbe3d91e5c17297e7373c07c88 Mon Sep 17 00:00:00 2001 From: Alexander Joham Date: Sat, 19 Oct 2024 20:33:51 +0200 Subject: [PATCH 14/17] Update DTO for new Artemis table --- app/common/token_usage_dto.py | 8 ++++---- app/llm/external/ollama.py | 2 +- app/llm/external/openai_chat.py | 2 +- app/llm/langchain/iris_langchain_chat_model.py | 6 +++--- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/app/common/token_usage_dto.py b/app/common/token_usage_dto.py index 9579c831..a0ee3eda 100644 --- a/app/common/token_usage_dto.py +++ b/app/common/token_usage_dto.py @@ -4,12 +4,12 @@ class TokenUsageDTO(BaseModel): - model_info: str = Field(alias="modelInfo", default="") + model_info: str = Field(alias="model", default="") num_input_tokens: int = Field(alias="numInputTokens", default=0) - cost_per_input_token: float = Field(alias="costPerInputToken", default=0) + cost_per_input_token: float = Field(alias="costPerMillionInputToken", default=0) num_output_tokens: int = Field(alias="numOutputTokens", default=0) - cost_per_output_token: float = Field(alias="costPerOutputToken", default=0) - pipeline: PipelineEnum = Field(default=PipelineEnum.NOT_SET) + cost_per_output_token: float = Field(alias="costPerMillionOutputToken", default=0) + pipeline: PipelineEnum = Field(alias="pipelineId", default=PipelineEnum.NOT_SET) def __str__(self): return ( diff --git a/app/llm/external/ollama.py b/app/llm/external/ollama.py index 146ed82a..2cea702f 100644 --- a/app/llm/external/ollama.py +++ b/app/llm/external/ollama.py @@ -68,7 +68,7 @@ def convert_to_iris_message( tokens = TokenUsageDTO( numInputTokens=num_input_tokens, numOutputTokens=num_output_tokens, - modelInfo=model, + model=model, ) return PyrisMessage( sender=map_str_to_role(message["role"]), diff --git a/app/llm/external/openai_chat.py b/app/llm/external/openai_chat.py index 005c2dd7..7a49f0c6 100644 --- a/app/llm/external/openai_chat.py +++ b/app/llm/external/openai_chat.py @@ -79,7 +79,7 @@ def convert_to_iris_message( num_output_tokens = getattr(usage, "completion_tokens", -1) tokens = TokenUsageDTO( - modelInfo=model, + model=model, numInputTokens=num_input_tokens, numOutputTokens=num_output_tokens, ) diff --git a/app/llm/langchain/iris_langchain_chat_model.py b/app/llm/langchain/iris_langchain_chat_model.py index 94f41d5d..c8b1c6da 100644 --- a/app/llm/langchain/iris_langchain_chat_model.py +++ b/app/llm/langchain/iris_langchain_chat_model.py @@ -48,11 +48,11 @@ def _generate( base_message = convert_iris_message_to_langchain_message(iris_message) chat_generation = ChatGeneration(message=base_message) self.tokens = TokenUsageDTO( - modelInfo=iris_message.token_usage.model_info, + model=iris_message.token_usage.model_info, numInputTokens=iris_message.token_usage.num_input_tokens, - costPerInputToken=iris_message.token_usage.cost_per_input_token, + costPerMillionInputToken=iris_message.token_usage.cost_per_input_token, numOutputTokens=iris_message.token_usage.num_output_tokens, - costPerOutputToken=iris_message.token_usage.cost_per_output_token, + costPerMillionOutputToken=iris_message.token_usage.cost_per_output_token, pipeline=PipelineEnum.NOT_SET, ) return ChatResult(generations=[chat_generation]) From 13c5db1b94a293c83d3f678f6e2e00e28e301e8d Mon Sep 17 00:00:00 2001 From: Alexander Joham Date: Wed, 23 Oct 2024 16:31:58 +0200 Subject: [PATCH 15/17] Change number of tokens if error to 0, as is standard by OpenAI & Ollama --- app/llm/external/ollama.py | 4 ++-- app/llm/external/openai_chat.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/app/llm/external/ollama.py b/app/llm/external/ollama.py index 2cea702f..1b89f3c4 100644 --- a/app/llm/external/ollama.py +++ b/app/llm/external/ollama.py @@ -119,8 +119,8 @@ def chat( ) return convert_to_iris_message( response.get("message"), - response.get("prompt_eval_count", -1), - response.get("eval_count", -1), + response.get("prompt_eval_count", 0), + response.get("eval_count", 0), response.get("model", self.model), ) diff --git a/app/llm/external/openai_chat.py b/app/llm/external/openai_chat.py index 7a49f0c6..75b6f3b2 100644 --- a/app/llm/external/openai_chat.py +++ b/app/llm/external/openai_chat.py @@ -75,8 +75,8 @@ def convert_to_iris_message( """ Convert a ChatCompletionMessage to a PyrisMessage """ - num_input_tokens = getattr(usage, "prompt_tokens", -1) - num_output_tokens = getattr(usage, "completion_tokens", -1) + num_input_tokens = getattr(usage, "prompt_tokens", 0) + num_output_tokens = getattr(usage, "completion_tokens", 0) tokens = TokenUsageDTO( model=model, From dd504fc6b7d288066c7eb57dfcde8ef08e33f988 Mon Sep 17 00:00:00 2001 From: Patrick Bassner Date: Wed, 23 Oct 2024 20:47:37 +0200 Subject: [PATCH 16/17] Fix token usage list append bug --- app/pipeline/chat/exercise_chat_pipeline.py | 4 ++-- app/retrieval/lecture_retrieval.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/app/pipeline/chat/exercise_chat_pipeline.py b/app/pipeline/chat/exercise_chat_pipeline.py index ea6f0536..8b05ee89 100644 --- a/app/pipeline/chat/exercise_chat_pipeline.py +++ b/app/pipeline/chat/exercise_chat_pipeline.py @@ -240,8 +240,8 @@ def _run_exercise_chat_pipeline( if should_execute_lecture_pipeline: try: self.retrieved_lecture_chunks = future_lecture.result() - if self.retriever.tokens is not None: - self.tokens.append(self.retriever.tokens) + if self.retriever.tokens is not None and len(self.retriever.tokens) > 0: + self.tokens.extend(self.retriever.tokens) if len(self.retrieved_lecture_chunks) > 0: self._add_relevant_chunks_to_prompt( self.retrieved_lecture_chunks diff --git a/app/retrieval/lecture_retrieval.py b/app/retrieval/lecture_retrieval.py index 5cfbb3c7..7bcd8ce0 100644 --- a/app/retrieval/lecture_retrieval.py +++ b/app/retrieval/lecture_retrieval.py @@ -83,7 +83,7 @@ class LectureRetrieval(Pipeline): Class for retrieving lecture data from the database. """ - tokens: [TokenUsageDTO] + tokens: List[TokenUsageDTO] def __init__(self, client: WeaviateClient, **kwargs): super().__init__(implementation_id="lecture_retrieval_pipeline") From 043264a7e3163d0c5fcb1d545dde2496280496fd Mon Sep 17 00:00:00 2001 From: Patrick Bassner Date: Wed, 23 Oct 2024 20:48:17 +0200 Subject: [PATCH 17/17] Fix formatting --- app/pipeline/chat/exercise_chat_pipeline.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/app/pipeline/chat/exercise_chat_pipeline.py b/app/pipeline/chat/exercise_chat_pipeline.py index 8b05ee89..386cd27a 100644 --- a/app/pipeline/chat/exercise_chat_pipeline.py +++ b/app/pipeline/chat/exercise_chat_pipeline.py @@ -240,7 +240,10 @@ def _run_exercise_chat_pipeline( if should_execute_lecture_pipeline: try: self.retrieved_lecture_chunks = future_lecture.result() - if self.retriever.tokens is not None and len(self.retriever.tokens) > 0: + if ( + self.retriever.tokens is not None + and len(self.retriever.tokens) > 0 + ): self.tokens.extend(self.retriever.tokens) if len(self.retrieved_lecture_chunks) > 0: self._add_relevant_chunks_to_prompt(