Skip to content

Commit 6b6db26

Browse files
author
Venkatesh Bingi
committed
Upgrading base image and moving mlio build to a seperate stage
1 parent dc603db commit 6b6db26

File tree

2 files changed

+68
-67
lines changed

2 files changed

+68
-67
lines changed

docker/1.4-2/base/Dockerfile.cpu

Lines changed: 67 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,10 @@
1-
ARG UBUNTU_VERSION=20.04
2-
ARG UBUNTU_IMAGE_DIGEST=874aca52f79ae5f8258faff03e10ce99ae836f6e7d2df6ecd3da5c1cad3a912b
1+
ARG UBUNTU_VERSION=24.04
2+
ARG UBUNTU_IMAGE_DIGEST=b359f1067efa76f37863778f7b6d0e8d911e3ee8efa807ad01fbf5dc1ef9006b
33

44
# Build stage for SQLite compilation
55
FROM ubuntu:${UBUNTU_VERSION}@sha256:${UBUNTU_IMAGE_DIGEST} as sqlite-builder
66
RUN apt-get update && apt-get install -y --no-install-recommends \
7-
build-essential \
8-
wget \
9-
ca-certificates \
10-
&& \
7+
build-essential wget ca-certificates && \
118
cd /tmp && \
129
wget https://www.sqlite.org/2025/sqlite-autoconf-3500200.tar.gz && \
1310
tar xzf sqlite-autoconf-3500200.tar.gz && \
@@ -21,39 +18,19 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
2118
apt-get clean && \
2219
rm -rf /var/lib/apt/lists/*
2320

24-
# Main image
25-
FROM ubuntu:${UBUNTU_VERSION}@sha256:${UBUNTU_IMAGE_DIGEST}
21+
# MLIO builder stage with Ubuntu 20.04
22+
FROM ubuntu:20.04@sha256:874aca52f79ae5f8258faff03e10ce99ae836f6e7d2df6ecd3da5c1cad3a912b as mlio-builder
2623

2724
ARG PYTHON_VERSION=3.10
28-
ARG PYARROW_VERSION=17.0.0
2925
ARG MLIO_VERSION=v0.9.0
26+
ARG PYARROW_VERSION=17.0.0
3027

3128
ENV DEBIAN_FRONTEND=noninteractive
3229

3330
# Install python and other scikit-learn runtime dependencies
3431
RUN apt-get update && \
35-
apt-get -y upgrade && \
3632
apt-get -y install --no-install-recommends \
37-
build-essential \
38-
curl \
39-
git \
40-
jq \
41-
libatlas-base-dev \
42-
nginx \
43-
openjdk-8-jdk-headless \
44-
unzip \
45-
wget \
46-
expat \
47-
tzdata \
48-
apparmor\
49-
libgstreamer1.0-0 \
50-
linux-libc-dev \
51-
libxml2 \
52-
libsqlite3-0 \
53-
software-properties-common \
54-
ca-certificates \
55-
lsb-release \
56-
&& \
33+
build-essential curl git wget ca-certificates lsb-release software-properties-common && \
5734
# Add Apache Arrow repository
5835
wget https://packages.apache.org/artifactory/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb && \
5936
apt install -y -V ./apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb && \
@@ -63,37 +40,20 @@ RUN apt-get update && \
6340
add-apt-repository ppa:deadsnakes/ppa && \
6441
apt-get update && \
6542
apt-get -y install --no-install-recommends \
66-
python3.10 \
67-
python3.10-dev \
68-
python3.10-distutils \
69-
&& \
43+
python3.10 python3.10-dev python3.10-distutils && \
7044
# MLIO build dependencies
7145
wget http://es.archive.ubuntu.com/ubuntu/pool/main/libf/libffi/libffi7_3.3-4_amd64.deb && \
7246
dpkg -i libffi7_3.3-4_amd64.deb && \
7347
apt-get -y install --no-install-recommends \
74-
apt-transport-https \
75-
ca-certificates \
76-
gnupg \
77-
&& \
48+
apt-transport-https gnupg && \
7849
wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | \
7950
gpg --dearmor - | \
8051
tee /usr/share/keyrings/kitware-archive-keyring.gpg >/dev/null && \
8152
echo 'deb [signed-by=/usr/share/keyrings/kitware-archive-keyring.gpg] https://apt.kitware.com/ubuntu/ bionic main' | tee /etc/apt/sources.list.d/kitware.list >/dev/null && \
8253
apt-get update && \
8354
rm /usr/share/keyrings/kitware-archive-keyring.gpg && \
8455
apt-get install -y --no-install-recommends \
85-
autoconf \
86-
automake \
87-
cmake \
88-
cmake-data \
89-
doxygen \
90-
kitware-archive-keyring \
91-
libcurl4-openssl-dev \
92-
libssl-dev \
93-
libtool \
94-
ninja-build \
95-
zlib1g-dev \
96-
&& \
56+
autoconf automake cmake cmake-data doxygen kitware-archive-keyring libcurl4-openssl-dev libssl-dev libtool ninja-build zlib1g-dev && \
9757
update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.10 1 && \
9858
curl -sS https://bootstrap.pypa.io/get-pip.py | python3 && \
9959
apt-get clean && \
@@ -102,17 +62,9 @@ RUN apt-get update && \
10262
RUN ln -fs /usr/share/zoneinfo/UTC /etc/localtime && \
10363
dpkg-reconfigure --frontend noninteractive tzdata
10464

105-
# Install uv for fast Python package management
106-
RUN curl -LsSf https://astral.sh/uv/install.sh | sh && \
107-
mv /root/.local/bin/uv /usr/local/bin/uv
108-
109-
ENV PATH=/usr/local/bin:${PATH}
11065
ENV PIP_ROOT_USER_ACTION=ignore
11166

112-
# Install MLIO with Apache Arrow integration
113-
# First install Arrow C++ libraries (needed for MLIO compilation)
114-
RUN uv pip install --system pyarrow==${PYARROW_VERSION}
115-
67+
# Build MLIO from scratch
11668
# Clone MLIO repository
11769
RUN cd /tmp && \
11870
git clone --branch ${MLIO_VERSION} https://github.com/awslabs/ml-io.git mlio
@@ -147,15 +99,64 @@ RUN cd /tmp/mlio/build/release && \
14799
cmake --build . --target mlio-py && \
148100
cmake --build . --target mlio-arrow
149101

150-
# Build and install MLIO Python wheel
102+
# Build MLIO Python wheel
151103
RUN cd /tmp/mlio/src/mlio-py && \
152-
python3 setup.py bdist_wheel && \
153-
uv pip install --system dist/*.whl
104+
python3 setup.py bdist_wheel
154105

155-
# Copy TBB libraries and cleanup
156-
RUN cp -r /tmp/mlio/build/third-party/lib/libtbb* /usr/local/lib/ && \
157-
ldconfig && \
158-
rm -rf /tmp/mlio
106+
# Copy TBB libraries and MLIO shared libraries to a location we can copy from
107+
RUN mkdir -p /mlio-artifacts && \
108+
cp -r /tmp/mlio/build/third-party/lib/libtbb* /mlio-artifacts/ && \
109+
cp /usr/local/lib/libmlio* /mlio-artifacts/ 2>/dev/null || true && \
110+
cp /tmp/mlio/src/mlio-py/dist/*.whl /mlio-artifacts/
111+
112+
# Main image
113+
FROM ubuntu:${UBUNTU_VERSION}@sha256:${UBUNTU_IMAGE_DIGEST}
114+
115+
ARG PYTHON_VERSION=3.10
116+
ARG PYARROW_VERSION=17.0.0
117+
118+
ENV DEBIAN_FRONTEND=noninteractive
119+
120+
# Install runtime dependencies only
121+
RUN apt-get update && \
122+
apt-get -y upgrade && \
123+
apt-get -y install --no-install-recommends \
124+
curl git jq libatlas-base-dev nginx openjdk-8-jdk-headless unzip wget expat tzdata apparmor \
125+
libgstreamer1.0-0 libxml2 libsqlite3-0 software-properties-common ca-certificates lsb-release \
126+
build-essential linux-libc-dev && \
127+
# Add Apache Arrow repository for runtime libraries only
128+
wget https://packages.apache.org/artifactory/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb && \
129+
apt install -y -V ./apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb && \
130+
apt-get update && \
131+
apt-get install -y -V libarrow-dev=17.0.0-1 libarrow-dataset-dev=17.0.0-1 libparquet-dev=17.0.0-1 libarrow-acero-dev=17.0.0-1 && \
132+
# Add deadsnakes PPA for Python 3.10
133+
add-apt-repository ppa:deadsnakes/ppa && \
134+
apt-get update && \
135+
apt-get -y install --no-install-recommends \
136+
python3.10 python3.10-distutils python3.10-dev && \
137+
update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.10 1 && \
138+
curl -sS https://bootstrap.pypa.io/get-pip.py | python3 && \
139+
apt-get clean && \
140+
rm -rf /var/lib/apt/lists/*
141+
142+
RUN ln -fs /usr/share/zoneinfo/UTC /etc/localtime && \
143+
dpkg-reconfigure --frontend noninteractive tzdata
144+
145+
# Install uv for fast Python package management
146+
RUN curl -LsSf https://astral.sh/uv/install.sh | sh && \
147+
mv /root/.local/bin/uv /usr/local/bin/uv
148+
149+
ENV PATH=/usr/local/bin:${PATH}
150+
ENV PIP_ROOT_USER_ACTION=ignore
151+
152+
# Copy MLIO wheel, TBB libraries, and MLIO shared libraries from builder stage
153+
COPY --from=mlio-builder /mlio-artifacts/*.whl /tmp/
154+
COPY --from=mlio-builder /mlio-artifacts/libtbb* /usr/local/lib/
155+
COPY --from=mlio-builder /mlio-artifacts/libmlio* /usr/local/lib/
156+
157+
# Install MLIO wheel
158+
RUN uv pip install --system /tmp/*.whl && \
159+
rm /tmp/*.whl
159160

160161
# Copy compiled SQLite from builder stage
161162
COPY --from=sqlite-builder /usr/local/bin/sqlite3 /usr/local/bin/sqlite3

test/unit/test_modules.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,4 +9,4 @@ def test_pandas_version():
99

1010
def test_pyarrow_to_parquet_conversion_regression_issue_106():
1111
df = pd.DataFrame({'x': [1, 2]})
12-
df.to_parquet('test.parquet', engine='pyarrow')
12+
df.to_parquet('test.parquet', engine='pyarrow')

0 commit comments

Comments
 (0)