Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

For openxlab readthedocs #376

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
238 changes: 238 additions & 0 deletions huixiangdou/gradio_ui.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,238 @@
import argparse
import json
import os
import time
import pdb
from multiprocessing import Process, Value
import asyncio
import cv2
import gradio as gr
import pytoml
from loguru import logger
from typing import List
from huixiangdou.primitive import Query
from huixiangdou.service import ErrorCode, SerialPipeline, ParallelPipeline, llm_serve, start_llm_server
import json
from datetime import datetime

def ymd():
now = datetime.now()
date_string = now.strftime("%Y-%m-%d")
if not os.path.exists(date_string):
os.makedirs(date_string)
return date_string

def parse_args():
"""Parse args."""
parser = argparse.ArgumentParser(description='SerialPipeline.')
parser.add_argument('--work_dir',
type=str,
default='workdir',
help='Working directory.')
parser.add_argument('--pipeline-count', type=int, default=1, help='Support user choosing all pipeline types.')
parser.add_argument(
'--config_path',
default='config-cpu.ini',
type=str,
help='SerialPipeline configuration path. Default value is config.ini')
parser.add_argument('--standalone',
action='store_true',
default=True,
help='Auto deploy required Hybrid LLM Service.')
parser.add_argument('--no-standalone',
action='store_false',
dest='standalone',
help='Do not auto deploy required Hybrid LLM Service.')
parser.add_argument('--placeholder', type=str, default='How to install HuixiangDou ?', help='Placeholder for user query.')
parser.add_argument('--image', action='store_true', default=True, help='')
parser.add_argument('--no-image', action='store_false', dest='image', help='Close some components for readthedocs.')
parser.add_argument('--theme', type=str, default='soft', help='Gradio theme, default value is `soft`. Open https://www.gradio.app/guides/theming-guide for all themes.')

args = parser.parse_args()
return args

language='en'
enable_web_search=False
pipeline='chat_with_repo'
main_args = None
paralle_assistant = None
serial_assistant = None

def on_language_changed(value:str):
global language
print(value)
language = value

def on_pipeline_changed(value:str):
global pipeline
print(value)
pipeline = value

def on_web_search_changed(value: str):
global enable_web_search
print(value)
if 'no' in value:
enable_web_search = False
else:
enable_web_search = True


def format_refs(refs: List[str]):
refs_filter = list(set(refs))
if len(refs) < 1:
return ''
text = ''
if language == 'zh':
text += '参考资料:\r\n'
else:
text += '**References:**\r\n'

for file_or_url in refs_filter:
text += '* {}\r\n'.format(file_or_url)
text += '\r\n'
return text


async def predict(text:str, image:str):
global language
global enable_web_search
global pipeline
global main_args
global serial_assistant
global paralle_assistant

with open('query.txt', 'a') as f:
f.write(json.dumps({'data': text, 'date': ymd()}, ensure_ascii=False))
f.write('\n')

if image is not None:
filename = 'image.png'
image_path = os.path.join(main_args.work_dir, filename)
cv2.imwrite(image_path, image)
else:
image_path = None

query = Query(text, image_path)
if 'chat_in_group' in pipeline:
if serial_assistant is None:
serial_assistant = SerialPipeline(work_dir=main_args.work_dir, config_path=main_args.config_path)
args = {'query':query, 'history': [], 'groupname':''}
pipeline = {'status': {}}
debug = dict()
stream_chat_content = ''
for sess in serial_assistant.generate(**args):
if len(sess.delta) > 0:
# start chat, display
stream_chat_content += sess.delta
yield stream_chat_content
else:
status = {
"state":str(sess.code),
"response": sess.response,
"refs": sess.references
}
pipeline['status'] = status
pipeline['debug'] = sess.debug

json_str = json.dumps(pipeline, indent=2, ensure_ascii=False)
yield json_str

else:
if paralle_assistant is None:
paralle_assistant = ParallelPipeline(work_dir=main_args.work_dir, config_path=main_args.config_path)
args = {'query':query, 'history':[], 'language':language}
args['enable_web_search'] = enable_web_search

sentence = ''
async for sess in paralle_assistant.generate(**args):
if sentence == '' and len(sess.references) > 0:
sentence = format_refs(sess.references)

if len(sess.delta) > 0:
sentence += sess.delta
yield sentence

yield sentence

def download_and_unzip(main_args):
zip_filepath = os.path.join(main_args.feature_local, 'workdir.zip')
main_args.work_dir = os.path.join(main_args.feature_local, 'workdir')
logger.info(f'assign {main_args.work_dir} to args.work_dir')

download_cmd = f'wget -O {zip_filepath} {main_args.feature_url}'
os.system(download_cmd)

if not os.path.exists(zip_filepath):
raise Exception(f'zip filepath {zip_filepath} not exist.')

unzip_cmd = f'unzip -o {zip_filepath} -d {main_args.feature_local}'
os.system(unzip_cmd)
if not os.path.exists(main_args.work_dir):
raise Exception(f'feature dir {zip_dir} not exist.')

def build_feature_store(main_args):
if os.path.exists('workdir'):
logger.warning('feature_store `workdir` already exist, skip')
return
logger.info('start build feature_store..')
os.system('python3 -m huixiangdou.service.feature_store --config_path {}'.format(main_args.config_path))

if __name__ == '__main__':
main_args = parse_args()
build_feature_store(main_args)

show_image = True
radio_options = ["chat_with_repo"]

if not main_args.image:
show_image = False

if main_args.pipeline_count > 1:
radio_options.append('chat_in_group')

# start service
if main_args.standalone is True:
# hybrid llm serve
start_llm_server(config_path=main_args.config_path)

themes = {
'soft': gr.themes.Soft(),
'monochrome': gr.themes.Monochrome(),
'base': gr.themes.Base(),
'default': gr.themes.Default(),
'glass': gr.themes.Glass()
}
if main_args.theme in themes:
theme = themes[main_args.theme]
else:
theme = gr.themes.Soft()

with gr.Blocks(theme=theme, title='HuixiangDou AI assistant', analytics_enabled=True) as demo:
with gr.Row():
gr.Markdown("""
#### [HuixiangDou](https://github.com/internlm/huixiangdou) AI assistant
""", label='Reply', header_links=True, line_breaks=True,)
with gr.Row():
if len(radio_options) > 1:
with gr.Column():
ui_pipeline = gr.Radio(radio_options, label="Pipeline type", info="Group-chat is slow but accurate and safe, default value is `chat_with_repo`")
ui_pipeline.change(fn=on_pipeline_changed, inputs=ui_pipeline, outputs=[])
with gr.Column():
ui_language = gr.Radio(["en", "zh"], label="Language", info="Use `en` by default ")
ui_language.change(fn=on_language_changed, inputs=ui_language, outputs=[])
with gr.Column():
ui_web_search = gr.Radio(["no", "yes"], label="Enable web search", info="Disable by default ")
ui_web_search.change(on_web_search_changed, inputs=ui_web_search, outputs=[])

with gr.Row():
input_question = gr.TextArea(label='Input your question', placeholder=main_args.placeholder, show_copy_button=True, lines=9)
input_image = gr.Image(label='[Optional] Image-text retrieval needs `config-multimodal.ini`', render=show_image)
with gr.Row():
run_button = gr.Button()
with gr.Row():
result = gr.Markdown('>Text reply or inner status callback here, depends on `pipeline type`', label='Reply', show_label=True, header_links=True, line_breaks=True, show_copy_button=True)
# result = gr.TextArea(label='Reply', show_copy_button=True, placeholder='Text Reply or inner status callback, depends on `pipeline type`')

run_button.click(predict, [input_question, input_image], [result])
demo.queue()
demo.launch(share=False, server_name='0.0.0.0', debug=True)
2 changes: 1 addition & 1 deletion huixiangdou/service/llm_server_hybrid.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ def __init__(self,
"deepseek": "deepseek-chat",
"zhipuai": "glm-4",
"puyu": "internlm2-latest",
"siliconcloud": "alibaba/Qwen1.5-110B-Chat"
"siliconcloud": "internlm/internlm2_5-20b-chat"
}

async def call_kimi(self, prompt:str, history:List[Tuple], remote_api_key:str, model:str):
Expand Down
16 changes: 16 additions & 0 deletions repodir/huixiangdou/.github/ISSUE_TEMPLATE/bug.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
---
name: 🐛 bug issue
about: submit a bug report +_+
---

## error log | 日志或报错信息 | ログ

## context | 编译/运行环境 | バックグラウンド

## how to reproduce | 复现步骤 | 再現方法

1.
2.
3.

## more | 其他 | その他
6 changes: 6 additions & 0 deletions repodir/huixiangdou/.github/ISSUE_TEMPLATE/others.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
---
name: 📝 others
about: discussion, suggestion and question
---

## detail | 详细描述 | 詳細な説明
89 changes: 89 additions & 0 deletions repodir/huixiangdou/.github/scripts/doc_link_checker.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
# Copyright (c) MegFlow. All rights reserved.
# /bin/python3

import argparse
import os
import re


def make_parser():
parser = argparse.ArgumentParser('Doc link checker')
parser.add_argument('--http',
default=False,
type=bool,
help='check http or not ')
parser.add_argument('--target',
default='./docs',
type=str,
help='the directory or file to check')
return parser


pattern = re.compile(r'\[.*?\]\(.*?\)')


def analyze_doc(home, path):
print('analyze {}'.format(path))
problem_list = []
code_block = 0
with open(path, encoding='utf8') as f:
lines = f.readlines()
for line in lines:
line = line.strip()
if line.startswith('```'):
code_block = 1 - code_block

if code_block > 0:
continue

if '[' in line and ']' in line and '(' in line and ')' in line:
all = pattern.findall(line)
for item in all:
# skip ![]()
if item.find('[') == item.find(']') - 1:
continue

# process the case [text()]()
offset = item.find('](')
if offset == -1:
continue
item = item[offset:]
start = item.find('(')
end = item.find(')')
ref = item[start + 1:end]

if ref.startswith('http') or ref.startswith('#'):
continue
if '.md#' in ref:
ref = ref[ref.find('#'):]
fullpath = os.path.join(home, ref)
if not os.path.exists(fullpath):
# raise ValueError(fullpath)
problem_list.append(ref)
else:
continue
if len(problem_list) > 0:
print(f'{path}:')
for item in problem_list:
print(f'\t {item}')
print('\n')
raise Exception('found link error')


def traverse(target):
if os.path.isfile(target):
analyze_doc(os.path.dirname(target), target)
return
for home, dirs, files in os.walk(target):
for filename in files:
if filename.endswith('.md'):
path = os.path.join(home, filename)
if os.path.islink(path) is False:
if 'copy_' in path:
continue
analyze_doc(home, path)


if __name__ == '__main__':
args = make_parser().parse_args()
traverse(args.target)
24 changes: 24 additions & 0 deletions repodir/huixiangdou/.github/workflows/lint.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
name: Check markdown local file link available

on:
push:
branches:
- main
pull_request:

jobs:
lint:
runs-on: ubuntu-20.04
steps:
- uses: actions/checkout@v2
- name: Set up Python 3.9
uses: actions/setup-python@v2
with:
python-version: 3.9
- name: Check doc link
run: |
python .github/scripts/doc_link_checker.py --target README_zh.md
python .github/scripts/doc_link_checker.py --target README.md
python -m pip install pylint interrogate
pylint huixiangdou || true
interrogate huixiangdou -v || true
Loading
Loading