Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

api v1alpha1 #17

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 40 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# Ignore Python cache files
__pycache__/
**/__pycache__/
*.pyc
*.pyo
*.pyd

# Ignore virtual environments
env/
venv/

# Ignore development artifacts
*.log
*.db
*.sqlite3

# Ignore configuration and sensitive files
**/.env
*.env
*.ini
*.cfg

# Ignore IDE and editor settings
.vscode/
.idea/
*.swp
*.swo

# Ignore Git files
.git/
.gitignore

# Ignore Docker files themselves (optional if not needed in the image)
.dockerignore
Dockerfile*

# Ignore build artifacts (if applicable)
build/
dist/
*.egg-info
63 changes: 45 additions & 18 deletions Containerfile
Original file line number Diff line number Diff line change
@@ -1,32 +1,59 @@
FROM python:3.11-slim-bookworm
ARG BASE_IMAGE=quay.io/sclorg/python-312-c9s:c9s

FROM ${BASE_IMAGE}

ARG CPU_ONLY=false
WORKDIR /docling-serve

RUN apt-get update \
&& apt-get install -y libgl1 libglib2.0-0 curl wget git \
&& apt-get clean
USER 0

RUN pip install --no-cache-dir poetry
###################################################################################################
# OS Layer #
###################################################################################################

COPY pyproject.toml poetry.lock README.md /docling-serve/
RUN --mount=type=bind,source=os-packages.txt,target=/tmp/os-packages.txt \
dnf -y install --best --nodocs --setopt=install_weak_deps=False dnf-plugins-core && \
dnf config-manager --best --nodocs --setopt=install_weak_deps=False --save && \
dnf config-manager --enable crb && \
dnf -y update && \
dnf install -y $(cat /tmp/os-packages.txt) && \
dnf -y clean all && \
rm -rf /var/cache/dnf

RUN if [ "$CPU_ONLY" = "true" ]; then \
poetry install --no-root --with cpu; \
else \
poetry install --no-root; \
fi
ENV TESSDATA_PREFIX=/usr/share/tesseract/tessdata/

ENV HF_HOME=/tmp/
ENV TORCH_HOME=/tmp/
###################################################################################################
# Docling layer #
###################################################################################################

RUN poetry run python -c 'from docling.pipeline.standard_pdf_pipeline import StandardPdfPipeline; artifacts_path = StandardPdfPipeline.download_models_hf(force=True);'
USER 1001

WORKDIR /opt/app-root/src

# On container environments, always set a thread budget to avoid undesired thread congestion.
ENV OMP_NUM_THREADS=4

COPY ./docling_serve /docling-serve/docling_serve
ENV LANG=en_US.UTF-8
ENV LC_ALL=en_US.UTF-8
ENV PYTHONIOENCODING=utf-8

COPY --chown=1001:0 pyproject.toml poetry.lock models_download.py README.md ./

RUN pip install --no-cache-dir poetry && \
# We already are in a virtual environment, so we don't need to create a new one, only activate it.
poetry config virtualenvs.create false && \
source /opt/app-root/bin/activate && \
if [ "$CPU_ONLY" = "true" ]; then \
poetry install --no-root --no-cache --no-interaction --all-extras --with cpu --without dev; \
else \
poetry install --no-root --no-cache --no-interaction --all-extras --without dev; \
fi && \
echo "Downloading models..." && \
python models_download.py && \
chown -R 1001:0 /opt/app-root/src && \
chmod -R g=u /opt/app-root/src

COPY --chown=1001:0 --chmod=664 ./docling_serve ./docling_serve

EXPOSE 5000
EXPOSE 8080

CMD ["poetry", "run", "uvicorn", "--port", "5000", "--host", "0.0.0.0", "docling_serve.app:app"]
CMD ["python", "docling_serve/app.py"]
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,14 @@ md-lint-file:
$(CMD_PREFIX) touch .markdown-lint

.PHONY: docling-serve-cpu-image
docling-serve-cpu-image: Containerfile ## Build docling-serve "cpu only" continaer image
docling-serve-cpu-image: Containerfile ## Build docling-serve "cpu only" container image
$(ECHO_PREFIX) printf " %-12s Containerfile\n" "[docling-serve CPU ONLY]"
$(CMD_PREFIX) docker build --build-arg CPU_ONLY=true -f Containerfile --platform linux/amd64 -t ghcr.io/ds4sd/docling-serve-cpu:$(TAG) .
$(CMD_PREFIX) docker tag ghcr.io/ds4sd/docling-serve-cpu:$(TAG) ghcr.io/ds4sd/docling-serve-cpu:main
$(CMD_PREFIX) docker tag ghcr.io/ds4sd/docling-serve-cpu:$(TAG) quay.io/ds4sd/docling-serve-cpu:main

.PHONY: docling-serve-gpu-image
docling-serve-gpu-image: Containerfile ## Build docling-serve continaer image with GPU support
docling-serve-gpu-image: Containerfile ## Build docling-serve container image with GPU support
$(ECHO_PREFIX) printf " %-12s Containerfile\n" "[docling-serve with GPU]"
$(CMD_PREFIX) docker build --build-arg CPU_ONLY=false -f Containerfile --platform linux/amd64 -t ghcr.io/ds4sd/docling-serve:$(TAG) .
$(CMD_PREFIX) docker tag ghcr.io/ds4sd/docling-serve:$(TAG) ghcr.io/ds4sd/docling-serve:main
Expand Down
Loading