-
Notifications
You must be signed in to change notification settings - Fork 3
/
Dockerfile
28 lines (22 loc) · 991 Bytes
/
Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
# Use Python 3.8 as base image
FROM python:3.8
# Set working directory inside the container
WORKDIR /app
# Download the model during build
RUN pip install --no-cache-dir sentence_transformers transformers nltk
RUN python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('intfloat/e5-small-v2')"
RUN python -c "from transformers import AutoTokenizer; AutoTokenizer.from_pretrained('intfloat/e5-small-v2')"
RUN python -c "import nltk; nltk.download('punkt')"
# Install required Python packages
RUN pip install --no-cache-dir Flask gunicorn numpy
# Copy the Flask app to the container
COPY app.py .
# Expose the port the app runs on
EXPOSE 8080
# Run the app
#
# For CPU based servers, 4 processes can over-saturate a Ryzen 5950x 32-cpu
# chip. htop shows load avg of 52, which means processes are waiting for CPU
# time. So, let's limit this to only 2 processes.
CMD ["gunicorn", "-w", "2", "-b", "0.0.0.0:8080", "app:app"]
# CMD ["python", "-u", "app.py"]