-
Notifications
You must be signed in to change notification settings - Fork 1
/
readim.py
134 lines (123 loc) · 6.79 KB
/
readim.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
# encoding:utf-8
import plugins
from bridge.context import ContextType
from bridge.reply import Reply, ReplyType
from channel.chat_message import ChatMessage
from common.log import logger
from plugins import *
from .ocr import ocr_from_image
from .model import Blip
from PIL import Image
import io
@plugins.register(
name="readim",
desire_priority=0,
hidden=False,
desc="A plugin that convert image with blip2(or other model) and ocr",
version="0.2",
author="loping151",
)
class readim(Plugin):
def __init__(self):
super().__init__()
curdir = os.path.dirname(__file__)
config_path = os.path.join(curdir, "config.json")
self.state = {} # 切换激活状态, 分开各个channel状态
try:
with open(config_path, "r", encoding="utf-8") as f:
config = json.load(f)
self.lang = config.get("lang", "eng+chi_sim")
self.prefix_verb = config["prefix_verb"]
self.prefix_noun = config["prefix_noun"]
self.always_read_image = config["always_read_image"]
self.default_allow_ocr = config["default_allow_ocr"]
self.free_cuda_memory = config["free_cuda_memory"] # 如果使用cpu运行blip2,这个参数不会造成影响
self.model = config["model"]
if self.model == "blip2": # allowed: blip2 only for now
self.model = Blip(free_cuda_memory=self.free_cuda_memory)
elif self.model == "something_else": # 这样添加你的接口
pass
self.allow_ocr = {}
self.handlers[Event.ON_HANDLE_CONTEXT] = self.on_handle_context
logger.info("[readim] inited")
except Exception as e:
if isinstance(e, FileNotFoundError):
logger.warn(f"[readim] init failed, {config_path} not found, ignore or see")
else:
logger.warn("[readim] init failed, ignore or see ")
raise e
def on_handle_context(self, e_context: EventContext):
if e_context["context"].type not in [
ContextType.TEXT,
ContextType.IMAGE
]:
return
content = e_context['context'].content
session_id = e_context['context'].kwargs['session_id']
is_group = e_context['context'].kwargs['msg'].is_group # certain group
receiver = e_context['context'].kwargs['receiver']
if is_group:
user = e_context['context'].kwargs['msg'].actual_user_nickname
else:
user = e_context['context'].kwargs['msg'].from_user_nickname
if e_context["context"].type == ContextType.TEXT and not self.always_read_image:
is_verb, is_noun = False, False
for verb in self.prefix_verb:
if verb in content:
is_verb = True
for noun in self.prefix_noun:
if noun in content:
is_noun = True
if is_verb and is_noun:
self.state[session_id+receiver+user] = True
if not self.model.ready:
logger.info(f"视觉模型未就绪")
e_context["context"].content += f"。你的回答需要包括:你的读图功能还没有准备好, 需要等等你" # 这些都是发给bot的提示词,可以自己改。我觉得现在的不错,先不写在config
else:
e_context["context"].content += f"。现在, 你可以读取图片了, 请你向我索要图片"
if '开启' in content and 'ocr' in content: # 暂时没有接入plugin的标准命令
self.allow_ocr[session_id] = True
logger.info(f"ocr开启了")
e_context["context"].content = f"现在, 你的ocr功能被开启了"
e_context.action = EventAction.BREAK
elif '关闭' in content and 'ocr' in content:
self.allow_ocr[session_id] =False
logger.info(f"ocr关闭了")
e_context["context"].content = f"现在, 你的ocr功能被禁用了"
e_context.action = EventAction.BREAK
return
if (e_context["context"].type == ContextType.IMAGE and self.state.get(session_id+receiver+user, False)) or self.always_read_image:
e_context["context"].type = ContextType.TEXT
msg: ChatMessage = e_context["context"]["msg"]
if not self.model.ready:
logger.info(f"视觉模型未就绪")
e_context["context"].content = f"你的回答需要包括:你的读图功能还没有准备好, 需要等等你"
else:
try:
msg.prepare()
with open(content, 'rb') as file:
image_data = file.read()
logger.info("图片读取成功")
image = Image.open(io.BytesIO(image_data))
image_caption = self.model.caption_image(image)
e_context["context"].content = "我发给了你图片,内容是{}, ".format(image_caption)
if self.allow_ocr.get(receiver, self.default_allow_ocr):
image_ocr = ocr_from_image(image, lang=self.lang)
e_context["context"].content += "上面写了文字:{}(如果你看不明白上面写了什么, 请你忽略上面写的文字, 不需要告诉我)。".format(image_ocr)
e_context["context"].content += "请你假装你自己看到了这张图片的内容, 而不是我告诉你了描述。请假装你真的看到了图片内容,你的回答中千万不要透露你是假装看到的。"
try:
self.state[session_id+receiver+user]=False # 只有成功的读完了图片,才结束请求。否则无需重新请求,就可以再接受一张图
except Exception as e:
pass # 一般是always_read_image,无需处理
except Exception as e:
logger.error(f"读取图片数据时出现错误:{e}")
e_context["context"].content = f"你没有收到图片或图片处理失败了。"
e_context.action = EventAction.BREAK
return
def get_help_text(self, **kwargs):
if self.always_read_image:
help_text = f"现在直接发送图片就可以读\n"
else:
help_text = "尝试在句子中组合动词{}和名词{}吧\n".format(self.prefix_verb, self.prefix_noun)
help_text+=f"发送 \"开启/关闭 ocr\" 控制是否读取图片的文字信息。只在当前会话中起作用。\n"
return help_text