-
Notifications
You must be signed in to change notification settings - Fork 2.2k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
### What problem does this PR solve? Rework Dockerfile.scratch - Multiple stage Dockerfile - Removed conda - Replaced pip with poetry - Added missing dependencies and fixed package version conflicts - Added deepdoc models ### Type of change - [x] Refactoring - [ ] Performance Improvement - [ ] Other (please describe):
- Loading branch information
1 parent
4a6a2a0
commit d8a4341
Showing
13 changed files
with
25,328 additions
and
28,048 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,56 +1,91 @@ | ||
FROM ubuntu:22.04 | ||
# base stage | ||
FROM ubuntu:24.04 AS base | ||
USER root | ||
|
||
WORKDIR /ragflow | ||
|
||
RUN apt-get update && apt-get install -y wget curl build-essential libopenmpi-dev | ||
RUN rm -f /etc/apt/apt.conf.d/docker-clean \ | ||
&& echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' > /etc/apt/apt.conf.d/keep-cache | ||
|
||
RUN wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh && \ | ||
bash ~/miniconda.sh -b -p /root/miniconda3 && \ | ||
rm ~/miniconda.sh && ln -s /root/miniconda3/etc/profile.d/conda.sh /etc/profile.d/conda.sh && \ | ||
echo ". /root/miniconda3/etc/profile.d/conda.sh" >> ~/.bashrc && \ | ||
echo "conda activate base" >> ~/.bashrc | ||
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \ | ||
apt update && apt-get --no-install-recommends install -y ca-certificates | ||
|
||
ENV PATH /root/miniconda3/bin:$PATH | ||
# if you located in China, you can use tsinghua mirror to speed up apt | ||
RUN sed -i 's|http://archive.ubuntu.com|https://mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list.d/ubuntu.sources | ||
|
||
RUN conda create -y --name py11 python=3.11 | ||
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \ | ||
apt update && apt install -y curl libpython3-dev nginx openmpi-bin openmpi-common libopenmpi-dev libglib2.0-0 libglx-mesa0 \ | ||
&& rm -rf /var/lib/apt/lists/* \ | ||
&& curl -sSL https://install.python-poetry.org | python3 - | ||
|
||
ENV CONDA_DEFAULT_ENV py11 | ||
ENV CONDA_PREFIX /root/miniconda3/envs/py11 | ||
ENV PATH $CONDA_PREFIX/bin:$PATH | ||
ENV PYTHONDONTWRITEBYTECODE=1 LD_LIBRARY_PATH=usr/lib/x86_64-linux-gnu/openmpi/lib:$LD_LIBRARY_PATH | ||
|
||
RUN curl -sL https://deb.nodesource.com/setup_14.x | bash - | ||
RUN apt-get install -y nodejs | ||
# Configure Poetry | ||
ENV POETRY_NO_INTERACTION=1 | ||
ENV POETRY_VIRTUALENVS_IN_PROJECT=true | ||
ENV POETRY_VIRTUALENVS_CREATE=true | ||
ENV POETRY_REQUESTS_TIMEOUT=15 | ||
|
||
RUN apt-get install -y nginx | ||
# builder stage | ||
FROM base AS builder | ||
USER root | ||
|
||
WORKDIR /ragflow | ||
|
||
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \ | ||
apt update && apt install -y nodejs npm && \ | ||
rm -rf /var/lib/apt/lists/* | ||
|
||
# if you located in China, you can use taobao registry to speed up npm and yarn | ||
RUN npm config set registry https://registry.npmmirror.com/ | ||
|
||
# https://yarnpkg.com/getting-started/install | ||
COPY web web | ||
RUN cd web && npm install -g corepack && corepack enable && yarn install && yarn run build | ||
|
||
ADD ./web ./web | ||
ADD ./api ./api | ||
ADD ./conf ./conf | ||
ADD ./deepdoc ./deepdoc | ||
ADD ./rag ./rag | ||
ADD ./requirements.txt ./requirements.txt | ||
ADD ./agent ./agent | ||
ADD ./graphrag ./graphrag | ||
# install dependencies from poetry.lock file | ||
COPY pyproject.toml poetry.toml poetry.lock ./ | ||
RUN --mount=type=cache,target=/root/.cache/pypoetry,sharing=locked \ | ||
/root/.local/bin/poetry install --sync --no-cache --no-root | ||
|
||
RUN apt install openmpi-bin openmpi-common libopenmpi-dev | ||
ENV LD_LIBRARY_PATH /usr/lib/x86_64-linux-gnu/openmpi/lib:$LD_LIBRARY_PATH | ||
RUN rm /root/miniconda3/envs/py11/compiler_compat/ld | ||
RUN cd ./web && npm i --force && npm run build | ||
RUN conda run -n py11 pip install -i https://mirrors.aliyun.com/pypi/simple/ -r ./requirements.txt | ||
# production stage | ||
FROM base AS production | ||
USER root | ||
|
||
WORKDIR /ragflow | ||
|
||
RUN apt-get update && \ | ||
apt-get install -y libglib2.0-0 libgl1-mesa-glx && \ | ||
# Install python packages' dependencies | ||
# cv2 requires libGL.so.1 | ||
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \ | ||
apt update && apt install -y --no-install-recommends nginx libgl1 vim less && \ | ||
rm -rf /var/lib/apt/lists/* | ||
|
||
RUN conda run -n py11 pip install -i https://mirrors.aliyun.com/pypi/simple/ ollama | ||
RUN conda run -n py11 python -m nltk.downloader punkt | ||
RUN conda run -n py11 python -m nltk.downloader wordnet | ||
COPY web web | ||
COPY api api | ||
COPY conf conf | ||
COPY deepdoc deepdoc | ||
COPY rag rag | ||
COPY agent agent | ||
COPY graphrag graphrag | ||
COPY pyproject.toml poetry.toml poetry.lock ./ | ||
|
||
# Copy compiled web pages | ||
COPY --from=builder /ragflow/web/dist /ragflow/web/dist | ||
|
||
# Copy Python environment and packages | ||
ENV VIRTUAL_ENV=/ragflow/.venv | ||
COPY --from=builder ${VIRTUAL_ENV} ${VIRTUAL_ENV} | ||
ENV PATH="${VIRTUAL_ENV}/bin:/root/.local/bin:${PATH}" | ||
|
||
# Download nltk data | ||
RUN python3 -m nltk.downloader wordnet punkt punkt_tab | ||
|
||
# Copy models downloaded via download_deps.sh | ||
COPY det.onnx layout.laws.onnx layout.manual.onnx layout.onnx layout.paper.onnx ocr.res rec.onnx tsr.onnx updown_concat_xgb.model /ragflow/rag/res/deepdoc/ | ||
|
||
ENV PYTHONPATH=/ragflow/ | ||
ENV HF_ENDPOINT=https://hf-mirror.com | ||
|
||
ADD docker/entrypoint.sh ./entrypoint.sh | ||
COPY docker/entrypoint.sh ./entrypoint.sh | ||
RUN chmod +x ./entrypoint.sh | ||
|
||
ENTRYPOINT ["./entrypoint.sh"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
#!/usr/bin/env bash | ||
|
||
download() | ||
{ | ||
echo "download $1" | ||
# https://stackoverflow.com/questions/3162385/how-to-split-a-string-in-shell-and-get-the-last-field | ||
fn=${1##*/} | ||
if [ ! -f $fn ] ; then | ||
wget --no-check-certificate $1 | ||
fi | ||
} | ||
|
||
# https://stackoverflow.com/questions/24628076/convert-multiline-string-to-array | ||
names="https://huggingface.co/InfiniFlow/deepdoc/resolve/main/det.onnx | ||
https://huggingface.co/InfiniFlow/deepdoc/resolve/main/layout.laws.onnx | ||
https://huggingface.co/InfiniFlow/deepdoc/resolve/main/layout.manual.onnx | ||
https://huggingface.co/InfiniFlow/deepdoc/resolve/main/layout.onnx | ||
https://huggingface.co/InfiniFlow/deepdoc/resolve/main/layout.paper.onnx | ||
https://huggingface.co/InfiniFlow/deepdoc/resolve/main/ocr.res | ||
https://huggingface.co/InfiniFlow/deepdoc/resolve/main/rec.onnx | ||
https://huggingface.co/InfiniFlow/deepdoc/resolve/main/tsr.onnx | ||
https://huggingface.co/InfiniFlow/text_concat_xgb_v1.0/resolve/main/updown_concat_xgb.model" | ||
|
||
SAVEIFS=$IFS # Save current IFS (Internal Field Separator) | ||
IFS=$'\n' # Change IFS to newline char | ||
names=($names) # split the `names` string into an array by the same name | ||
IFS=$SAVEIFS # Restore original IFS | ||
|
||
find . -size 0 | xargs rm -f | ||
# https://stackoverflow.com/questions/15466808/shell-iterate-over-array | ||
for ((i=0; i<${#names[@]}; i+=1)); do | ||
url="${names[$i]}" | ||
download $url | ||
if [ $? != 0 ]; then | ||
exit -1 | ||
fi | ||
done | ||
find . -size 0 | xargs rm -f |
Oops, something went wrong.