Skip to content

Commit 0d90606

Browse files
authored
Merge pull request #389 from LlmKira/dev
✨ Image input
2 parents aec38eb + 9677832 commit 0d90606

File tree

10 files changed

+263
-216
lines changed

10 files changed

+263
-216
lines changed

README.md

+8-7
Original file line numberDiff line numberDiff line change
@@ -42,11 +42,12 @@ This project uses the ToolCall feature.
4242
It integrates a message queuing and snapshot system, offering plugin mechanisms and authentication prior to plugin
4343
execution.
4444

45-
The model adheres to the Openai Schema, other models are not supported. Please adapt using gateways independently.
45+
The model adheres to the Openai Format Schema. Please adapt using [gateway](https://github.com/Portkey-AI/gateway)
46+
or [one-api](https://github.com/songquanpeng/one-api) independently.
4647

47-
| Demo |
48-
|-----------------------------------|
49-
| ![sticker](./docs/chain_chat.gif) |
48+
| Demo | Vision With Voice |
49+
|-----------------------------------|------------------------------|
50+
| ![sticker](./docs/chain_chat.gif) | ![vision](./docs/vision.gif) |
5051

5152
## 🔨 Roadmap
5253

@@ -56,10 +57,10 @@ The model adheres to the Openai Schema, other models are not supported. Please a
5657
- [x] Implementation of a more robust plugin system
5758
- [x] Project structure simplification
5859
- [x] Elimination of the Provider system
59-
- [x] Hook support.
60-
- [x] Access to TTS.
60+
- [x] Hook support
61+
- [x] Access to TTS
62+
- [x] Add standalone support for gpt-4-turbo and vision
6163
- [ ] Add LLM reference support to the plugin environment. (extract && search in text)
62-
- [ ] Add standalone support for Openai's new Schema. (vision)
6364

6465
## 📦 Features
6566

app/middleware/llm_task.py

+14-12
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
SystemMessage,
2323
ToolMessage,
2424
AssistantMessage,
25+
UserMessage,
2526
)
2627
from llmkira.openai.request import OpenAIResult, OpenAI, OpenAICredential
2728
from llmkira.task import TaskHeader
@@ -174,7 +175,7 @@ async def build_task_messages(self, remember=True):
174175
for i, message in enumerate(task_message):
175176
message: EventMessage
176177
# message format
177-
user_message = message.format_user_message()
178+
user_message = await message.format_user_message()
178179
message_run.append(user_message)
179180
if remember:
180181
await self.message_history.append(messages=[user_message])
@@ -192,14 +193,7 @@ async def request_openai(
192193
:param disable_tool: 禁用函数
193194
:param credential: 凭证
194195
:return: OpenaiResult 返回结果
195-
:raise RuntimeError: # Feel time leave
196-
time_feel = await TimeFeelManager(self.session_uid).get_leave()
197-
if time_feel:
198-
await self.remember(
199-
message=SystemMessage(
200-
content=f"statu:[After {time_feel} leave, user is back]"
201-
)
202-
) 无法处理消息
196+
:raise RuntimeError: 消息为空
203197
:raise AssertionError: 无法处理消息
204198
:raise OpenaiError: Openai错误
205199
"""
@@ -231,13 +225,21 @@ async def request_openai(
231225
# TODO:实现消息时序切片
232226
# 日志
233227
logger.info(
234-
f"[x] Openai request" f"\n--message {messages} " f"\n--tools {tools}"
228+
f"[x] Openai request" f"\n--message {len(messages)} " f"\n--tools {tools}"
235229
)
230+
for msg in messages:
231+
if isinstance(msg, UserMessage):
232+
if len(str(msg)) < 100:
233+
logger.debug(f"Message: {msg}")
234+
else:
235+
logger.debug("Message: UserMessage")
236+
else:
237+
logger.debug(f"Message:{msg}")
236238
# 必须校验
237239
if disable_tool or not tools:
238-
logger.debug("llm_task:Tool not enable")
240+
logger.debug("llm_task:no tool loaded")
239241
tools = None
240-
# 根据模型选择不同的驱动a
242+
# 根据模型选择不同的驱动
241243
assert messages, RuntimeError("llm_task:message cant be none...")
242244
messages = await validate_mock(messages)
243245
endpoint: OpenAI = OpenAI(

app/receiver/slack/__init__.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import ssl
77
from typing import List
88

9-
from loguru import logger, Message
9+
from loguru import logger
1010
from slack_sdk.web.async_client import AsyncWebClient
1111

1212
from app.middleware.llm_task import OpenaiMiddleware
@@ -16,6 +16,7 @@
1616
from app.setting.slack import BotSetting
1717
from llmkira.kv_manager.file import File
1818
from llmkira.openai import OpenAIResult
19+
from llmkira.openai.cell import AssistantMessage
1920
from llmkira.task import Task, TaskHeader
2021

2122
__receiver__ = "slack"
@@ -86,7 +87,10 @@ async def forward(self, receiver: Location, message: List[EventMessage]):
8687
await self.bot.chat_postMessage(**_message)
8788

8889
async def reply(
89-
self, receiver: Location, messages: List[Message], reply_to_message: bool = True
90+
self,
91+
receiver: Location,
92+
messages: List[AssistantMessage],
93+
reply_to_message: bool = True,
9094
):
9195
"""
9296
模型直转发,Message是Openai的类型

app/sender/telegram/__init__.py

+30-7
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
auth_reloader,
2222
uid_make,
2323
login,
24+
TimerObjectContainer,
2425
)
2526
from app.setting.telegram import BotSetting
2627
from llmkira.kv_manager.env import EnvManager
@@ -38,7 +39,7 @@
3839
from app.components.credential import split_setting_string, Credential, ProviderError
3940

4041
StepCache = StateMemoryStorage()
41-
42+
FileWindow = TimerObjectContainer()
4243
TelegramTask = Task(queue=__sender__)
4344

4445

@@ -67,6 +68,7 @@ async def transcribe(
6768
files = files if files else []
6869
messages = messages if messages else []
6970
event_messages = []
71+
files = [file for file in files if file] # No None
7072
for index, message in enumerate(messages):
7173
message_text = getattr(message, "text", "empty")
7274
event_messages.append(
@@ -144,6 +146,19 @@ async def create_task(message: types.Message, disable_tool_action: bool = True):
144146
message.text = message.text
145147
if not message.text:
146148
return None
149+
__used_file_id = []
150+
photos: List[types.PhotoSize] = FileWindow.get_objects(
151+
user_id=message.from_user.id
152+
)
153+
FileWindow.clear_objects(user_id=message.from_user.id)
154+
for photo in photos:
155+
__used_file_id.append(photo.file_id)
156+
uploaded_file.append(
157+
await self.upload(
158+
file=photo,
159+
uid=uid_make(__sender__, message.from_user.id),
160+
)
161+
)
147162
if message.photo:
148163
uploaded_file.append(
149164
await self.upload(
@@ -161,12 +176,13 @@ async def create_task(message: types.Message, disable_tool_action: bool = True):
161176
)
162177
if message.reply_to_message:
163178
if message.reply_to_message.photo:
164-
uploaded_file.append(
165-
await self.upload(
166-
message.reply_to_message.photo[-1],
167-
uid=uid_make(__sender__, message.from_user.id),
179+
if message.reply_to_message.photo[-1].file_id not in __used_file_id:
180+
uploaded_file.append(
181+
await self.upload(
182+
message.reply_to_message.photo[-1],
183+
uid=uid_make(__sender__, message.from_user.id),
184+
)
168185
)
169-
)
170186
if message.reply_to_message.document:
171187
if message.reply_to_message.document.file_size < 1024 * 1024 * 10:
172188
uploaded_file.append(
@@ -367,9 +383,16 @@ async def handle_private_msg(message: types.Message):
367383
自动响应私聊消息
368384
"""
369385
message.text = message.text if message.text else message.caption
386+
387+
# Support for GPT Vision
370388
if not message.text:
389+
if message.photo:
390+
logger.debug("Add a spc image")
391+
FileWindow.add_object(
392+
user_id=message.from_user.id, obj=message.photo[-1]
393+
)
371394
return None
372-
395+
# 扳机
373396
trigger = await get_trigger_loop(
374397
platform_name=__sender__,
375398
message=message.text,

app/sender/util_func.py

+38
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
# @Author : sudoskys
44
# @File : util_func.py
55
# @Software: PyCharm
6+
import time
67
from typing import Tuple, Optional, Union
78
from urllib.parse import urlparse
89

@@ -111,3 +112,40 @@ async def auth_reloader(snapshot_credential: str, platform: str, user_id: str) -
111112
queue_name=snap.channel,
112113
task=snap.snapshot_data,
113114
)
115+
116+
117+
class TimerObjectContainer:
118+
def __init__(self):
119+
self.users = {}
120+
121+
def add_object(self, user_id, obj):
122+
if user_id not in self.users:
123+
self.users[user_id] = {}
124+
self.users[user_id][obj] = time.time()
125+
126+
def get_objects(self, user_id, second=1200) -> list: # 20 minutes = 1200 seconds
127+
"""
128+
获取特定用户的对象列表,并自动删除在指定时间内添加的对象
129+
:param user_id: 用户ID
130+
:param second: 时间(秒)
131+
"""
132+
if user_id not in self.users:
133+
return []
134+
135+
user_objs = self.users[user_id]
136+
valid_objects = {
137+
obj: add_time
138+
for obj, add_time in user_objs.items()
139+
if time.time() - add_time < second
140+
}
141+
142+
self.users[user_id] = valid_objects
143+
return list(valid_objects.keys())
144+
145+
def clear_objects(self, user_id):
146+
"""
147+
清空特定用户的对象
148+
:param user_id: 用户ID
149+
"""
150+
if user_id in self.users:
151+
self.users[user_id] = {}

docs/vision.gif

258 KB
Loading

llmkira/openai/cell.py

+84-1
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
1+
import base64
12
from abc import ABC
23
from typing import Optional, Union, List, Literal, Type, Any
34

45
from docstring_parser import parse
56
from json_repair import repair_json
67
from pydantic import ConfigDict, BaseModel, Field, field_validator, model_validator
78

9+
from llmkira.openai.utils import resize_openai_image
10+
811

912
class FunctionChoice(BaseModel):
1013
name: str
@@ -170,11 +173,91 @@ class SystemMessage(Message):
170173
name: Optional[str] = None
171174

172175

176+
class ImageContent(BaseModel):
177+
url: str
178+
detail: Optional[str] = "auto"
179+
180+
181+
class ContentPart(BaseModel):
182+
type: Union[str, Literal["text", "image_url"]]
183+
text: Optional[str] = None
184+
image_url: Optional[ImageContent] = None
185+
186+
@model_validator(mode="after")
187+
def check_model(self):
188+
if self.type == "image_url":
189+
if self.image_url is None:
190+
raise ValueError("image_url cannot be None")
191+
if self.type == "text":
192+
if self.text is None:
193+
raise ValueError("text cannot be None")
194+
return self
195+
196+
@classmethod
197+
def create_text(cls, text: str):
198+
"""
199+
Create a text content part
200+
:param text: text
201+
:return: ContentPart
202+
"""
203+
assert isinstance(text, str), ValueError("text must be a string")
204+
return cls(type="text", text=text)
205+
206+
@classmethod
207+
def create_image(
208+
cls, url: Union[str, bytes], detail: Literal["low", "high", "auto"] = "auto"
209+
):
210+
"""
211+
Create an image content part
212+
:param url: image url or image bytes
213+
:param detail: image detail
214+
:return: ContentPart
215+
"""
216+
assert detail in ("low", "high", "auto"), ValueError(
217+
"detail must be low, high or auto"
218+
)
219+
if isinstance(url, bytes):
220+
url = resize_openai_image(url, mode=detail)
221+
base64_image = base64.b64encode(url).decode("utf-8")
222+
url = f"data:image/jpeg;base64,{base64_image}"
223+
elif isinstance(url, str):
224+
if not url.startswith("http") or not url.startswith(
225+
"data:image/jpeg;base64,"
226+
):
227+
raise ValueError(
228+
"url must be a http url or `data:image/jpeg;base64,` as base64 image"
229+
)
230+
else:
231+
raise ValueError("url must be a http url or bytes")
232+
return cls(type="image_url", image_url=ImageContent(url=url, detail=detail))
233+
234+
173235
class UserMessage(Message):
174236
role: Literal["user"] = "user"
175-
content: str
237+
content: Union[str, List[ContentPart]]
176238
name: Optional[str] = None
177239

240+
@field_validator("content")
241+
def check_content(cls, v):
242+
if isinstance(v, str):
243+
return [ContentPart.create_text(text=v)]
244+
elif isinstance(v, list):
245+
return v
246+
else:
247+
raise ValueError("content must be a string or a list of ContentPart")
248+
249+
def add_text(self, text: str):
250+
self.content.append(ContentPart.create_text(text=text))
251+
return self
252+
253+
def add_image(
254+
self,
255+
image_url: Union[str, bytes],
256+
detail: Literal["low", "high", "auto"] = "auto",
257+
):
258+
self.content.append(ContentPart.create_image(url=image_url, detail=detail))
259+
return self
260+
178261

179262
class ToolMessage(Message):
180263
role: Literal["tool"] = "tool"

llmkira/openai/request.py

+18-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
from curl_cffi.requests import AsyncSession
66
from loguru import logger
7-
from pydantic import ConfigDict, BaseModel, Field, field_validator
7+
from pydantic import ConfigDict, BaseModel, Field, field_validator, model_validator
88
from pydantic import SecretStr
99
from tenacity import retry, stop_after_attempt
1010

@@ -162,6 +162,23 @@ def check_tools(cls, v):
162162
def make_url(base_url: str):
163163
return base_url.strip().rstrip("/") + "/chat/completions"
164164

165+
@model_validator(mode="after")
166+
def check_vision(self):
167+
if not self.model.startswith(("gpt-4-vision", "gpt-4-turbo", "claude-3")):
168+
logger.info(
169+
"Remove the image content part from the messages, because the model is not supported."
170+
)
171+
for message in self.messages:
172+
if isinstance(message, UserMessage) and isinstance(
173+
message.content, list
174+
):
175+
message.content = [
176+
content
177+
for content in message.content
178+
if content.type != "image_url"
179+
]
180+
return self
181+
165182
@retry(stop=stop_after_attempt(3), reraise=True)
166183
async def request(self, session: OpenAICredential) -> OpenAIResult:
167184
"""

0 commit comments

Comments
 (0)