feat(core): Multi-module dependency splitting (#606)

Close #601 **Note**: This is a breaking upgrade. We have updated chromadb from version 0.3.22 to the latest version. In this new version, the database storage engine has been switched from "duckdb" to "SQLite". For more details, you can check [here](https://docs.trychroma.com/migration). If you were using chromadb previously, you'll need to delete the content imported into the knowledge base and re-import it. Other: - Support InternLM 20B
eosphoros-ai · Sep 20, 2023 · 132814e · 132814e
2 parents d31a220 + 50f8bbf
commit 132814e
Show file tree

Hide file tree

Showing 30 changed files with 326 additions and 262 deletions.
diff --git a/.github/release-drafter.yml b/.github/release-drafter.yml
@@ -76,6 +76,16 @@ autolabeler:
       # feat(connection): Support xxxx
       # fix(connection): Fix xxx
       - '/^(build|chore|ci|depr|docs|feat|fix|perf|refactor|release|test)\(.*connection.*\)/'
+  - label: core
+    title:
+      # feat(core): Support xxxx
+      # fix(core): Fix xxx
+      - '/^(build|chore|ci|depr|docs|feat|fix|perf|refactor|release|test)\(.*core.*\)/'
+  - label: web
+    title:
+      # feat(web): Support xxxx
+      # fix(web): Fix xxx
+      - '/^(build|chore|ci|depr|docs|feat|fix|perf|refactor|release|test)\(.*web.*\)/'
   - label: build
     title:
       - '/^build/'

diff --git a/.gitignore b/.gitignore
@@ -28,6 +28,8 @@ sdist/
 var/
 wheels/
 models/
+# Soft link
+models
 plugins/
 
 pip-wheel-metadata/

diff --git a/README.md b/README.md
@@ -86,7 +86,7 @@ Currently, we have released multiple key features, which are listed below to dem
 - Unified vector storage/indexing of knowledge base
   - Support for unstructured data such as PDF, TXT, Markdown, CSV, DOC, PPT, and WebURL
 - Multi LLMs Support, Supports multiple large language models, currently supporting
-  - 🔥 InternLM(7b)
+  - 🔥 InternLM(7b,20b)
   - 🔥 Baichuan2(7b,13b)
   - 🔥 Vicuna-v1.5(7b,13b)
   - 🔥 llama-2(7b,13b,70b)

diff --git a/README.zh.md b/README.zh.md
@@ -119,7 +119,7 @@ DB-GPT 是一个开源的以数据库为基础的GPT实验项目，使用本地
   - 非结构化数据支持包括PDF、MarkDown、CSV、WebURL
 - 多模型支持
   - 支持多种大语言模型, 当前已支持如下模型: 
-  - 🔥 InternLM(7b)
+  - 🔥 InternLM(7b,20b)
   - 🔥 Baichuan2(7b,13b)
   - 🔥 Vicuna-v1.5(7b,13b)
   - 🔥 llama-2(7b,13b,70b)

diff --git a/docker/base/Dockerfile b/docker/base/Dockerfile
@@ -11,10 +11,12 @@ ARG LANGUAGE="en"
 ARG PIP_INDEX_URL="https://pypi.org/simple"
 ENV PIP_INDEX_URL=$PIP_INDEX_URL
 
+ARG DB_GPT_INSTALL_MODEL="default"
+ENV DB_GPT_INSTALL_MODEL=$DB_GPT_INSTALL_MODEL
+
 RUN mkdir -p /app
 
 # COPY only requirements.txt first to leverage Docker cache
-COPY ./requirements.txt /app/requirements.txt
 COPY ./setup.py /app/setup.py
 COPY ./README.md /app/README.md
 
@@ -26,9 +28,9 @@ WORKDIR /app
 # RUN pip3 install -i $PIP_INDEX_URL ".[all]"
 
 RUN pip3 install --upgrade pip -i $PIP_INDEX_URL \
-    && pip3 install -i $PIP_INDEX_URL . \
-    # && pip3 install -i $PIP_INDEX_URL ".[llama_cpp]" \
-    && (if [ "${LANGUAGE}" = "zh" ]; \
+    && pip3 install -i $PIP_INDEX_URL ".[$DB_GPT_INSTALL_MODEL]"
+
+RUN (if [ "${LANGUAGE}" = "zh" ]; \
         # language is zh, download zh_core_web_sm from github
         then wget https://github.com/explosion/spacy-models/releases/download/zh_core_web_sm-3.5.0/zh_core_web_sm-3.5.0-py3-none-any.whl -O /tmp/zh_core_web_sm-3.5.0-py3-none-any.whl \
         && pip3 install /tmp/zh_core_web_sm-3.5.0-py3-none-any.whl -i $PIP_INDEX_URL \
@@ -58,4 +60,4 @@ RUN (if [ "${LOAD_EXAMPLES}" = "true" ]; \
 ENV PYTHONPATH "/app:$PYTHONPATH"
 EXPOSE 5000
 
-CMD ["python3", "pilot/server/dbgpt_server.py"]
+CMD ["dbgpt", "start", "webserver"]
diff --git a/docker/base/build_image.sh b/docker/base/build_image.sh
@@ -4,14 +4,21 @@ SCRIPT_LOCATION=$0
 cd "$(dirname "$SCRIPT_LOCATION")"
 WORK_DIR=$(pwd)
 
-BASE_IMAGE="nvidia/cuda:11.8.0-runtime-ubuntu22.04"
+BASE_IMAGE_DEFAULT="nvidia/cuda:11.8.0-runtime-ubuntu22.04"
+BASE_IMAGE_DEFAULT_CPU="ubuntu:22.04"
+
+BASE_IMAGE=$BASE_IMAGE_DEFAULT
 IMAGE_NAME="eosphorosai/dbgpt"
+IMAGE_NAME_ARGS=""
+
 # zh: https://pypi.tuna.tsinghua.edu.cn/simple
 PIP_INDEX_URL="https://pypi.org/simple"
 # en or zh
 LANGUAGE="en"
 BUILD_LOCAL_CODE="false"
 LOAD_EXAMPLES="true"
+BUILD_NETWORK=""
+DB_GPT_INSTALL_MODEL="default"
 
 usage () {
     echo "USAGE: $0 [--base-image nvidia/cuda:11.8.0-runtime-ubuntu22.04] [--image-name db-gpt]"
@@ -21,6 +28,8 @@ usage () {
     echo "  [--language en or zh] You language, default: en"
     echo "  [--build-local-code true or false] Whether to use the local project code to package the image, default: false"
     echo "  [--load-examples true or false] Whether to load examples to default database default: true"
+    echo "  [--network network name] The network of docker build"
+    echo "  [--install-mode mode name] Installation mode name, default: default, If you completely use openai's service, you can set the mode name to 'openai'"
     echo "  [-h|--help] Usage message"
 }
 
@@ -33,7 +42,7 @@ while [[ $# -gt 0 ]]; do
         shift # past value
         ;;
         -n|--image-name)
-        IMAGE_NAME="$2"
+        IMAGE_NAME_ARGS="$2"
         shift # past argument
         shift # past value
         ;;
@@ -57,6 +66,20 @@ while [[ $# -gt 0 ]]; do
         shift
         shift
         ;;
+        --network)
+        BUILD_NETWORK=" --network $2 "
+        shift # past argument
+        shift # past value
+        ;;
+        -h|--help)
+        help="true"
+        shift
+        ;;
+        --install-mode)
+        DB_GPT_INSTALL_MODEL="$2"
+        shift # past argument
+        shift # past value
+        ;;
         -h|--help)
         help="true"
         shift
@@ -73,11 +96,29 @@ if [[ $help ]]; then
     exit 0
 fi
 
-docker build \
+if [ "$DB_GPT_INSTALL_MODEL" != "default" ]; then
+    IMAGE_NAME="$IMAGE_NAME-$DB_GPT_INSTALL_MODEL"
+    echo "install mode is not 'default', set image name to: ${IMAGE_NAME}"
+fi
+
+if [ -z "$IMAGE_NAME_ARGS" ]; then
+    if [ "$DB_GPT_INSTALL_MODEL" == "openai" ]; then 
+        # Use cpu image
+        BASE_IMAGE=$BASE_IMAGE_DEFAULT_CPU
+    fi
+else
+    # User input image is not empty
+    BASE_IMAGE=$IMAGE_NAME_ARGS
+fi
+
+echo "Begin build docker image, base image: ${BASE_IMAGE}, target image name: ${IMAGE_NAME}"
+
+docker build $BUILD_NETWORK \
     --build-arg BASE_IMAGE=$BASE_IMAGE \
     --build-arg PIP_INDEX_URL=$PIP_INDEX_URL \
     --build-arg LANGUAGE=$LANGUAGE \
     --build-arg BUILD_LOCAL_CODE=$BUILD_LOCAL_CODE \
     --build-arg LOAD_EXAMPLES=$LOAD_EXAMPLES \
+    --build-arg DB_GPT_INSTALL_MODEL=$DB_GPT_INSTALL_MODEL \
     -f Dockerfile \
     -t $IMAGE_NAME $WORK_DIR/../../
diff --git a/docs/getting_started/install/cluster/vms/index.md b/docs/getting_started/install/cluster/vms/index.md
@@ -6,7 +6,7 @@ Local cluster deployment
 
 **Installing Command-Line Tool**
 
-All operations below are performed using the `dbgpt` command. To use the `dbgpt` command, you need to install the DB-GPT project with `pip install -e .`. Alternatively, you can use `python pilot/scripts/cli_scripts.py` as a substitute for the `dbgpt` command.
+All operations below are performed using the `dbgpt` command. To use the `dbgpt` command, you need to install the DB-GPT project with `pip install -e ".[default]"`. Alternatively, you can use `python pilot/scripts/cli_scripts.py` as a substitute for the `dbgpt` command.
 
 ### Launch Model Controller
 

diff --git a/docs/getting_started/install/deploy/deploy.md b/docs/getting_started/install/deploy/deploy.md
@@ -49,7 +49,7 @@ For the entire installation process of DB-GPT, we use the miniconda3 virtual env
 python>=3.10
 conda create -n dbgpt_env python=3.10
 conda activate dbgpt_env
-pip install -e .
+pip install -e ".[default]"
 ```
 Before use DB-GPT Knowledge
 ```bash

diff --git a/docs/getting_started/install/llm/llm.rst b/docs/getting_started/install/llm/llm.rst
@@ -6,7 +6,7 @@ DB-GPT provides a management and deployment solution for multiple models. This c
 
 
 Multi LLMs Support, Supports multiple large language models, currently supporting
-  - 🔥 InternLM(7b)
+  - 🔥 InternLM(7b,20b)
   - 🔥 Baichuan2(7b,13b)
   - 🔥 Vicuna-v1.5(7b,13b)
   - 🔥 llama-2(7b,13b,70b)
-Original file line number
+Diff line change
@@ Expand Up / @@ -28,6 +28,8 @@ sdist/ @@
     var/
     wheels/
     models/
+    # Soft link
+    models
     plugins/
     pip-wheel-metadata/
@@ Expand Down @@