config-advanced.ini

[feature_store]
# `feature_store.py` use this throttle to distinct `good_questions` and `bad_questions`
reject_throttle = -1.0
# text2vec model path, support local relative path and huggingface model format.
# also support local path, model_path = "/path/to/your/text2vec-model"
embedding_model_path = "maidalun1020/bce-embedding-base_v1"
reranker_model_path = "maidalun1020/bce-reranker-base_v1"
work_dir = "workdir"

[web_search]
engine = "ddgs"
# web search engine support ddgs and serper
# For ddgs, see https://pypi.org/project/duckduckgo-search
# For serper, check https://serper.dev/api-key to get a free API key
serper_x_api_key = "YOUR-API-KEY-HERE"
domain_partial_order = ["openai.com", "pytorch.org", "readthedocs.io", "nvidia.com", "stackoverflow.com", "juejin.cn", "zhuanlan.zhihu.com", "www.cnblogs.com"]
save_dir = "logs/web_search_result"

[llm]
enable_local = 1
enable_remote = 1
# hybrid llm service address
client_url = "http://127.0.0.1:8888/inference"

[llm.server]
# local LLM configuration
# support "internlm/internlm2-chat-7b" and "qwen/qwen-7b-chat-int8"
# support local path, for example
# local_llm_path = "/path/to/your/internlm2"

local_llm_path = "internlm/internlm2-chat-7b"
local_llm_max_text_length = 3000
local_llm_bind_port = 8888

# remote LLM service configuration
# support "gpt", "kimi", "deepseek", "zhipuai", "xi-api" and "alles-apin"
# xi-api and alles-apin is chinese gpt proxy

remote_type = "kimi"
remote_api_key = "YOUR-API-KEY-HERE"
# max text length for remote LLM.
# use 128000 for kimi, 192000 for gpt/xi-api, 16000 for deepseek, 128000 for zhipuai
remote_llm_max_text_length = 128000
# openai API model type, support model list:
# "auto" for kimi. To save money, we auto select model name by prompt length.
# "gpt-4-0613" for gpt/xi-api,
# "deepseek-chat" for deepseek,
# "glm-4" for zhipuai,
# "gpt-4-1106-preview" for alles-apin or OpenAOE
remote_llm_model = "auto"
# request per minute
rpm = 500

[worker]
# enable web search or not
enable_web_search = 1
# enable search enhancement or not
enable_sg_search = 1
save_path = "logs/work.txt"

[worker.time]
enable = 0
start = "00:00:00"
end = "23:59:59"
has_weekday = 1

[sg_search]
# download `src` from https://github.com/sourcegraph/src-cli#installation
binary_src_path = "/usr/local/bin/src"
src_access_token = "${YOUR-SRC-ACCESS-TOKEN}"

# add your repo here, we just take opencompass and lmdeploy as example
[sg_search.opencompass]
github_repo_id = "open-compass/opencompass"
introduction = "用于评测大型语言模型（LLM）. 它提供了完整的开源可复现的评测框架，支持大语言模型、多模态模型的一站式评测，基于分布式技术，对大参数量模型亦能实现高效评测。评测方向汇总为知识、语言、理解、推理、考试五大能力维度，整合集纳了超过70个评测数据集，合计提供了超过40万个模型评测问题，并提供长文本、安全、代码3类大模型特色技术能力评测。"

[sg_search.mmpose]
github_repo_id = "open-mmlab/mmpose"
introduction = "MMPose is an open-source toolbox for pose estimation based on PyTorch"

[sg_search.mmdeploy]
github_repo_id = "open-mmlab/mmdeploy"
introduction = "MMDeploy is an open-source deep learning model deployment toolset"

[sg_search.mmdetection]
github_repo_id = "open-mmlab/mmdetection"
introduction = "MMDetection is an open source object detection toolbox based on PyTorch."

[sg_search.lmdeploy]
github_repo_id = "internlm/lmdeploy"
introduction = "lmdeploy 是一个用于压缩、部署和服务 LLM（Large Language Model）的工具包。是一个服务端场景下，transformer 结构 LLM 部署工具，支持 GPU 服务端部署，速度有保障，支持 Tensor Parallel，多并发优化，功能全面，包括模型转换、缓存历史会话的 cache feature 等. 它还提供了 WebUI、命令行和 gRPC 客户端接入。"

[sg_search.huixiangdou]
github_repo_id = "internlm/huixiangdou"
introduction = "茴香豆是一个基于 LLM 的群聊知识助手。设计拒答、响应两阶段 pipeline 应对群聊场景，解答问题同时不会消息泛滥。"

[sg_search.xtuner]
github_repo_id = "internlm/xtuner"
introduction = "XTuner is an efficient, flexible and full-featured toolkit for fine-tuning large models."

[sg_search.mmyolo]
github_repo_id = "open-mmlab/mmyolo"
introduction = "OpenMMLab YOLO series toolbox and benchmark. Implemented RTMDet, RTMDet-Rotated,YOLOv5, YOLOv6, YOLOv7, YOLOv8,YOLOX, PPYOLOE, etc."

[sg_search.Amphion]
github_repo_id = "open-mmlab/Amphion"
introduction = "Amphion is a toolkit for Audio, Music, and Speech Generation. Its purpose is to support reproducible research and help junior researchers and engineers get started in the field of audio, music, and speech generation research and development."

[sg_search.mmcv]
github_repo_id = "open-mmlab/mmcv"
introduction = "MMCV is a foundational library for computer vision research and it provides image/video processing, image and annotation visualization, image transformation, various CNN architectures and high-quality implementation of common CPU and CUDA ops"

[frontend]
# chat group assistant type, support "lark", "lark_group", "wechat_personal" and "none"
# for "lark", open https://open.feishu.cn/document/client-docs/bot-v3/add-custom-bot to add bot, **only send, cannot receive**
# for "lark_group", open https://open.feishu.cn/document/home/introduction-to-custom-app-development/self-built-application-development-process to create one
# for "wechat_personal", read ./docs/add_wechat_group_zh.md to setup gateway
type = "none"

# for "lark", it is chat group webhook url, send reply to group, for example "https://open.feishu.cn/open-apis/bot/v2/hook/xxxxxxxxxxxxxxx"
# for "lark_group", it is the url to fetch chat group message, for example "http://101.133.161.20:6666/fetch", `101.133.161.20` is your own public IPv4 addr
# for "wechat_personal", it is useless
webhook_url = "https://open.feishu.cn/open-apis/bot/v2/hook/xxxxxxxxxxxxxxx"

# when a new group chat message is received, should it be processed immediately or wait for 18 seconds in case the user hasn't finished speaking?
# support "immediate"
message_process_policy = "immediate"

[frontend.lark_group]
# "lark_group" configuration examples, use your own app_id and secret !!!
app_id = "cli_a53a34dcb778500e"
app_secret = "2ajhg1ixSvlNm1bJkH4tJhPfTCsGGHT1"
encrypt_key = "abc"
verification_token = "def"

[frontend.wechat_personal]
# "wechat_personal" listen port
bind_port = 9527