From d50b738df3a497eb3e50cc5d88afb198bc1451f9 Mon Sep 17 00:00:00 2001 From: Anil Vishnoi Date: Fri, 6 Sep 2024 03:26:44 -0700 Subject: [PATCH] POC: Frontend docling library with fastapi and build the container image to deploy as a service Signed-off-by: Anil Vishnoi --- Makefile | 5 +++++ duckbill/Containerfile | 34 ++++++++++++++++++++++++++++++++++ duckbill/apiserver/__init__.py | 0 duckbill/apiserver/main.py | 24 ++++++++++++++++++++++++ duckbill/requirements.txt | 3 +++ 5 files changed, 66 insertions(+) create mode 100644 duckbill/Containerfile create mode 100644 duckbill/apiserver/__init__.py create mode 100644 duckbill/apiserver/main.py create mode 100644 duckbill/requirements.txt diff --git a/Makefile b/Makefile index 841d7919..e69f219d 100644 --- a/Makefile +++ b/Makefile @@ -47,6 +47,11 @@ ps-image: Containerfile.ps ## Build continaer image for the pathservice $(CMD_PREFIX) docker build -f Containerfile.ps -t ghcr.io/instructlab/ui/pathservice:$(TAG) . $(CMD_PREFIX) docker tag ghcr.io/instructlab/ui/pathservice:$(TAG) ghcr.io/instructlab/ui/pathservice:main +duckbill-image: duckbill/Containerfile ## Build continaer image for the pathservice + $(ECHO_PREFIX) printf " %-12s duckbill/Containerfile\n" "[docker]" + $(CMD_PREFIX) docker build -f duckbill/Containerfile --platform linux/amd64 -t ghcr.io/instructlab/ui/duckbill:$(TAG) ./duckbill + $(CMD_PREFIX) docker tag ghcr.io/instructlab/ui/duckbill:$(TAG) quay.io/instructlab-ui/docling:main + ##@ Local Dev - Run the stack (UI and PathService) on your local machine .PHONY: stop-dev-local stop-dev-local: ## Stop the npm and pathservice local instances diff --git a/duckbill/Containerfile b/duckbill/Containerfile new file mode 100644 index 00000000..281b7ba0 --- /dev/null +++ b/duckbill/Containerfile @@ -0,0 +1,34 @@ +FROM python:3.11-slim-bookworm + +WORKDIR /duckbill +COPY ./requirements.txt /duckbill/requirements.txt + +RUN pip install --upgrade pip +RUN pip install --no-cache-dir --upgrade -r /duckbill/requirements.txt + +ENV GIT_SSH_COMMAND="ssh -o StrictHostKeyChecking=no" + +RUN apt-get update \ + && apt-get install -y libgl1 libglib2.0-0 curl wget git \ + && apt-get clean + +# This will install torch with *only* cpu support +# Remove the --extra-index-url part if you want to install all the gpu requirements +# For more details in the different torch distribution visit https://pytorch.org/. +#RUN pip install --no-cache-dir docling --extra-index-url https://download.pytorch.org/whl/cpu +#RUN pip install --no-cache-dir docling fastapi + +ENV HF_HOME=/tmp/ +ENV TORCH_HOME=/tmp/ + +RUN python -c 'from deepsearch_glm.utils.load_pretrained_models import load_pretrained_nlp_models; load_pretrained_nlp_models(verbose=True);' +RUN python -c 'from docling.document_converter import DocumentConverter; artifacts_path = DocumentConverter.download_models_hf(force=True);' + +# On container environments, always set a thread budget to avoid undesired thread congestion. +ENV OMP_NUM_THREADS=4 + +COPY ./apiserver /duckbill/apiserver + +EXPOSE 5000 + +CMD ["fastapi", "run", "apiserver/main.py", "--port", "5000"] \ No newline at end of file diff --git a/duckbill/apiserver/__init__.py b/duckbill/apiserver/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/duckbill/apiserver/main.py b/duckbill/apiserver/main.py new file mode 100644 index 00000000..f388ee24 --- /dev/null +++ b/duckbill/apiserver/main.py @@ -0,0 +1,24 @@ +from typing import Union + +from fastapi import FastAPI +from pydantic import BaseModel +from docling.document_converter import DocumentConverter + + +app = FastAPI() + +class UrlRequest(BaseModel): + url: str + +@app.get("/") +def read_root(): + return {"Docling Service is up and running."} + + +@app.post("/simpleconvert") +def simpleconvert(request: UrlRequest): + url = request.url + converter = DocumentConverter() + doc = converter.convert_single(url) + print(doc.render_as_markdown()) + return {doc.render_as_markdown()} \ No newline at end of file diff --git a/duckbill/requirements.txt b/duckbill/requirements.txt new file mode 100644 index 00000000..fec5b3ff --- /dev/null +++ b/duckbill/requirements.txt @@ -0,0 +1,3 @@ +fastapi[standard]>=0.113.0,<0.114.0 +pydantic>=2.7.0,<3.0.0 +docling>=1.9.0