forked from badgerdoc/badgerdoc
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Dockerfile
56 lines (41 loc) · 2.08 KB
/
Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
FROM python:3.8
RUN apt-get install apt-transport-https && \
echo "deb https://notesalexp.org/tesseract-ocr/buster/ buster main" >> /etc/apt/sources.list && \
wget -O - https://notesalexp.org/debian/alexp_key.asc | apt-key add -
RUN apt-get update && \
apt-get install --yes locales build-essential libpoppler-cpp-dev python3-dev \
python3-distutils poppler-utils libpoppler-qt5-1 poppler-data libleptonica-dev \
libtesseract-dev tesseract-ocr pkg-config cmake wget curl \
default-jre libreoffice-java-common vim && rm -rf /var/lib/apt/lists/*
RUN echo "LC_ALL=en_US.UTF-8" >> /etc/environment && \
echo "en_US.UTF-8 UTF-8" >> /etc/locale.gen && \
echo "LANG=en_US.UTF-8" > /etc/locale.conf && \
locale-gen en_US.UTF-8
RUN wget https://downloadarchive.documentfoundation.org/libreoffice/old/6.4.6.2/deb/x86_64/LibreOffice_6.4.6.2_Linux_x86-64_deb.tar.gz && \
tar -xzvf LibreOffice_6.4.6.2_Linux_x86-64_deb.tar.gz && \
cd LibreOffice_6.4.6.2_Linux_x86-64_deb/DEBS && \
dpkg -i *.deb && \
cd ../../
RUN pip install poetry
RUN mkdir mmcv && wget -P mmcv https://download.openmmlab.com/mmcv/dist/1.2.1/torch1.7.0/cpu/mmcv_full-1.2.1%2Btorch1.7.0%2Bcpu-cp38-cp38-manylinux1_x86_64.whl
COPY poetry.lock /
COPY pyproject.toml /
COPY poetry.lock /
RUN poetry config virtualenvs.create false && \
poetry add mmcv/mmcv_full-1.2.1+torch1.7.0+cpu-cp38-cp38-manylinux1_x86_64.whl \
&& poetry install --no-interaction \
&& rm -rf poetry.lock pyproject.toml poetry.lock mmcv/
RUN pip install 'git+https://github.com/open-mmlab/[email protected]'
RUN python -m nltk.downloader stopwords && \
python -m nltk.downloader words && \
python -m nltk.downloader punkt && \
python -m nltk.downloader wordnet
RUN mkdir /models && \
gdown "https://drive.google.com/uc?id=1YmO5O8kBPI9XZWASTWqP1Qh4skqQu7US" -O /models/3_cls_w18_e30.pth
ENV LIBRE_RUN="libreoffice6.4"
ENV CASCADE_MODEL_PATH="/models/3_cls_w18_e30.pth"
COPY . /table-extractor
ENV PYTHONUNBUFFERED=1
ENV PYTHONPATH="${PYTHONPATH}:/table-extractor"
WORKDIR /table-extractor
CMD ["/bin/bash"]