Skip to content

Commit

Permalink
Rework Dockerfile.scratch (#2525)
Browse files Browse the repository at this point in the history
### What problem does this PR solve?

Rework Dockerfile.scratch
- Multiple stage Dockerfile
- Removed conda
- Replaced pip with poetry
- Added missing dependencies and fixed package version conflicts
- Added deepdoc models

### Type of change

- [x] Refactoring
- [ ] Performance Improvement
- [ ] Other (please describe):
  • Loading branch information
yuzhichang authored Sep 23, 2024
1 parent 4a6a2a0 commit d8a4341
Show file tree
Hide file tree
Showing 13 changed files with 25,328 additions and 28,048 deletions.
105 changes: 70 additions & 35 deletions Dockerfile.scratch
Original file line number Diff line number Diff line change
@@ -1,56 +1,91 @@
FROM ubuntu:22.04
# base stage
FROM ubuntu:24.04 AS base
USER root

WORKDIR /ragflow

RUN apt-get update && apt-get install -y wget curl build-essential libopenmpi-dev
RUN rm -f /etc/apt/apt.conf.d/docker-clean \
&& echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' > /etc/apt/apt.conf.d/keep-cache

RUN wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh && \
bash ~/miniconda.sh -b -p /root/miniconda3 && \
rm ~/miniconda.sh && ln -s /root/miniconda3/etc/profile.d/conda.sh /etc/profile.d/conda.sh && \
echo ". /root/miniconda3/etc/profile.d/conda.sh" >> ~/.bashrc && \
echo "conda activate base" >> ~/.bashrc
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
apt update && apt-get --no-install-recommends install -y ca-certificates

ENV PATH /root/miniconda3/bin:$PATH
# if you located in China, you can use tsinghua mirror to speed up apt
RUN sed -i 's|http://archive.ubuntu.com|https://mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list.d/ubuntu.sources

RUN conda create -y --name py11 python=3.11
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
apt update && apt install -y curl libpython3-dev nginx openmpi-bin openmpi-common libopenmpi-dev libglib2.0-0 libglx-mesa0 \
&& rm -rf /var/lib/apt/lists/* \
&& curl -sSL https://install.python-poetry.org | python3 -

ENV CONDA_DEFAULT_ENV py11
ENV CONDA_PREFIX /root/miniconda3/envs/py11
ENV PATH $CONDA_PREFIX/bin:$PATH
ENV PYTHONDONTWRITEBYTECODE=1 LD_LIBRARY_PATH=usr/lib/x86_64-linux-gnu/openmpi/lib:$LD_LIBRARY_PATH

RUN curl -sL https://deb.nodesource.com/setup_14.x | bash -
RUN apt-get install -y nodejs
# Configure Poetry
ENV POETRY_NO_INTERACTION=1
ENV POETRY_VIRTUALENVS_IN_PROJECT=true
ENV POETRY_VIRTUALENVS_CREATE=true
ENV POETRY_REQUESTS_TIMEOUT=15

RUN apt-get install -y nginx
# builder stage
FROM base AS builder
USER root

WORKDIR /ragflow

RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
apt update && apt install -y nodejs npm && \
rm -rf /var/lib/apt/lists/*

# if you located in China, you can use taobao registry to speed up npm and yarn
RUN npm config set registry https://registry.npmmirror.com/

# https://yarnpkg.com/getting-started/install
COPY web web
RUN cd web && npm install -g corepack && corepack enable && yarn install && yarn run build

ADD ./web ./web
ADD ./api ./api
ADD ./conf ./conf
ADD ./deepdoc ./deepdoc
ADD ./rag ./rag
ADD ./requirements.txt ./requirements.txt
ADD ./agent ./agent
ADD ./graphrag ./graphrag
# install dependencies from poetry.lock file
COPY pyproject.toml poetry.toml poetry.lock ./
RUN --mount=type=cache,target=/root/.cache/pypoetry,sharing=locked \
/root/.local/bin/poetry install --sync --no-cache --no-root

RUN apt install openmpi-bin openmpi-common libopenmpi-dev
ENV LD_LIBRARY_PATH /usr/lib/x86_64-linux-gnu/openmpi/lib:$LD_LIBRARY_PATH
RUN rm /root/miniconda3/envs/py11/compiler_compat/ld
RUN cd ./web && npm i --force && npm run build
RUN conda run -n py11 pip install -i https://mirrors.aliyun.com/pypi/simple/ -r ./requirements.txt
# production stage
FROM base AS production
USER root

WORKDIR /ragflow

RUN apt-get update && \
apt-get install -y libglib2.0-0 libgl1-mesa-glx && \
# Install python packages' dependencies
# cv2 requires libGL.so.1
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
apt update && apt install -y --no-install-recommends nginx libgl1 vim less && \
rm -rf /var/lib/apt/lists/*

RUN conda run -n py11 pip install -i https://mirrors.aliyun.com/pypi/simple/ ollama
RUN conda run -n py11 python -m nltk.downloader punkt
RUN conda run -n py11 python -m nltk.downloader wordnet
COPY web web
COPY api api
COPY conf conf
COPY deepdoc deepdoc
COPY rag rag
COPY agent agent
COPY graphrag graphrag
COPY pyproject.toml poetry.toml poetry.lock ./

# Copy compiled web pages
COPY --from=builder /ragflow/web/dist /ragflow/web/dist

# Copy Python environment and packages
ENV VIRTUAL_ENV=/ragflow/.venv
COPY --from=builder ${VIRTUAL_ENV} ${VIRTUAL_ENV}
ENV PATH="${VIRTUAL_ENV}/bin:/root/.local/bin:${PATH}"

# Download nltk data
RUN python3 -m nltk.downloader wordnet punkt punkt_tab

# Copy models downloaded via download_deps.sh
COPY det.onnx layout.laws.onnx layout.manual.onnx layout.onnx layout.paper.onnx ocr.res rec.onnx tsr.onnx updown_concat_xgb.model /ragflow/rag/res/deepdoc/

ENV PYTHONPATH=/ragflow/
ENV HF_ENDPOINT=https://hf-mirror.com

ADD docker/entrypoint.sh ./entrypoint.sh
COPY docker/entrypoint.sh ./entrypoint.sh
RUN chmod +x ./entrypoint.sh

ENTRYPOINT ["./entrypoint.sh"]
2 changes: 1 addition & 1 deletion api/ragflow_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def update_progress():


if __name__ == '__main__':
print("""
print(r"""
____ ______ __
/ __ \ ____ _ ____ _ / ____// /____ _ __
/ /_/ // __ `// __ `// /_ / // __ \| | /| / /
Expand Down
5 changes: 4 additions & 1 deletion docker/.env
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,13 @@ REDIS_PASSWORD=infini_rag_flow

SVR_HTTP_PORT=9380

RAGFLOW_VERSION=dev
RAGFLOW_VERSION=poetry

TIMEZONE='Asia/Shanghai'

# Inside GFW, we need the following huggingface.co mirror:
HF_ENDPOINT=https://hf-mirror.com

######## OS setup for ES ###########
# sysctl vm.max_map_count
# sudo sysctl -w vm.max_map_count=262144
Expand Down
3 changes: 2 additions & 1 deletion docker/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ services:
- ${SVR_HTTP_PORT}:9380
- 80:80
- 443:443
- 5678:5678
volumes:
- ./service_conf.yaml:/ragflow/conf/service_conf.yaml
- ./ragflow-logs:/ragflow/logs
Expand All @@ -23,7 +24,7 @@ services:
- ./nginx/nginx.conf:/etc/nginx/nginx.conf
environment:
- TZ=${TIMEZONE}
- HF_ENDPOINT=https://huggingface.co
- HF_ENDPOINT=${HF_ENDPOINT}
- MACOS=${MACOS}
networks:
- ragflow
Expand Down
38 changes: 38 additions & 0 deletions download_deps.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#!/usr/bin/env bash

download()
{
echo "download $1"
# https://stackoverflow.com/questions/3162385/how-to-split-a-string-in-shell-and-get-the-last-field
fn=${1##*/}
if [ ! -f $fn ] ; then
wget --no-check-certificate $1
fi
}

# https://stackoverflow.com/questions/24628076/convert-multiline-string-to-array
names="https://huggingface.co/InfiniFlow/deepdoc/resolve/main/det.onnx
https://huggingface.co/InfiniFlow/deepdoc/resolve/main/layout.laws.onnx
https://huggingface.co/InfiniFlow/deepdoc/resolve/main/layout.manual.onnx
https://huggingface.co/InfiniFlow/deepdoc/resolve/main/layout.onnx
https://huggingface.co/InfiniFlow/deepdoc/resolve/main/layout.paper.onnx
https://huggingface.co/InfiniFlow/deepdoc/resolve/main/ocr.res
https://huggingface.co/InfiniFlow/deepdoc/resolve/main/rec.onnx
https://huggingface.co/InfiniFlow/deepdoc/resolve/main/tsr.onnx
https://huggingface.co/InfiniFlow/text_concat_xgb_v1.0/resolve/main/updown_concat_xgb.model"

SAVEIFS=$IFS # Save current IFS (Internal Field Separator)
IFS=$'\n' # Change IFS to newline char
names=($names) # split the `names` string into an array by the same name
IFS=$SAVEIFS # Restore original IFS

find . -size 0 | xargs rm -f
# https://stackoverflow.com/questions/15466808/shell-iterate-over-array
for ((i=0; i<${#names[@]}; i+=1)); do
url="${names[$i]}"
download $url
if [ $? != 0 ]; then
exit -1
fi
done
find . -size 0 | xargs rm -f
Loading

0 comments on commit d8a4341

Please sign in to comment.