1- ARG UBUNTU_VERSION=20 .04
2- ARG UBUNTU_IMAGE_DIGEST=874aca52f79ae5f8258faff03e10ce99ae836f6e7d2df6ecd3da5c1cad3a912b
1+ ARG UBUNTU_VERSION=24 .04
2+ ARG UBUNTU_IMAGE_DIGEST=b359f1067efa76f37863778f7b6d0e8d911e3ee8efa807ad01fbf5dc1ef9006b
33
44# Build stage for SQLite compilation
55FROM ubuntu:${UBUNTU_VERSION}@sha256:${UBUNTU_IMAGE_DIGEST} as sqlite-builder
66RUN apt-get update && apt-get install -y --no-install-recommends \
7- build-essential \
8- wget \
9- ca-certificates \
10- && \
7+ build-essential wget ca-certificates && \
118 cd /tmp && \
129 wget https://www.sqlite.org/2025/sqlite-autoconf-3500200.tar.gz && \
1310 tar xzf sqlite-autoconf-3500200.tar.gz && \
@@ -21,39 +18,19 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
2118 apt-get clean && \
2219 rm -rf /var/lib/apt/lists/*
2320
24- # Main image
25- FROM ubuntu:${UBUNTU_VERSION} @sha256:${UBUNTU_IMAGE_DIGEST}
21+ # MLIO builder stage with Ubuntu 20.04
22+ FROM ubuntu:20.04 @sha256:874aca52f79ae5f8258faff03e10ce99ae836f6e7d2df6ecd3da5c1cad3a912b as mlio-builder
2623
2724ARG PYTHON_VERSION=3.10
28- ARG PYARROW_VERSION=17.0.0
2925ARG MLIO_VERSION=v0.9.0
26+ ARG PYARROW_VERSION=17.0.0
3027
3128ENV DEBIAN_FRONTEND=noninteractive
3229
3330# Install python and other scikit-learn runtime dependencies
3431RUN apt-get update && \
35- apt-get -y upgrade && \
3632 apt-get -y install --no-install-recommends \
37- build-essential \
38- curl \
39- git \
40- jq \
41- libatlas-base-dev \
42- nginx \
43- openjdk-8-jdk-headless \
44- unzip \
45- wget \
46- expat \
47- tzdata \
48- apparmor\
49- libgstreamer1.0-0 \
50- linux-libc-dev \
51- libxml2 \
52- libsqlite3-0 \
53- software-properties-common \
54- ca-certificates \
55- lsb-release \
56- && \
33+ build-essential curl git wget ca-certificates lsb-release software-properties-common && \
5734 # Add Apache Arrow repository
5835 wget https://packages.apache.org/artifactory/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb && \
5936 apt install -y -V ./apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb && \
@@ -63,37 +40,20 @@ RUN apt-get update && \
6340 add-apt-repository ppa:deadsnakes/ppa && \
6441 apt-get update && \
6542 apt-get -y install --no-install-recommends \
66- python3.10 \
67- python3.10-dev \
68- python3.10-distutils \
69- && \
43+ python3.10 python3.10-dev python3.10-distutils && \
7044 # MLIO build dependencies
7145 wget http://es.archive.ubuntu.com/ubuntu/pool/main/libf/libffi/libffi7_3.3-4_amd64.deb && \
7246 dpkg -i libffi7_3.3-4_amd64.deb && \
7347 apt-get -y install --no-install-recommends \
74- apt-transport-https \
75- ca-certificates \
76- gnupg \
77- && \
48+ apt-transport-https gnupg && \
7849 wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | \
7950 gpg --dearmor - | \
8051 tee /usr/share/keyrings/kitware-archive-keyring.gpg >/dev/null && \
8152 echo 'deb [signed-by=/usr/share/keyrings/kitware-archive-keyring.gpg] https://apt.kitware.com/ubuntu/ bionic main' | tee /etc/apt/sources.list.d/kitware.list >/dev/null && \
8253 apt-get update && \
8354 rm /usr/share/keyrings/kitware-archive-keyring.gpg && \
8455 apt-get install -y --no-install-recommends \
85- autoconf \
86- automake \
87- cmake \
88- cmake-data \
89- doxygen \
90- kitware-archive-keyring \
91- libcurl4-openssl-dev \
92- libssl-dev \
93- libtool \
94- ninja-build \
95- zlib1g-dev \
96- && \
56+ autoconf automake cmake cmake-data doxygen kitware-archive-keyring libcurl4-openssl-dev libssl-dev libtool ninja-build zlib1g-dev && \
9757 update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.10 1 && \
9858 curl -sS https://bootstrap.pypa.io/get-pip.py | python3 && \
9959 apt-get clean && \
@@ -102,17 +62,9 @@ RUN apt-get update && \
10262RUN ln -fs /usr/share/zoneinfo/UTC /etc/localtime && \
10363 dpkg-reconfigure --frontend noninteractive tzdata
10464
105- # Install uv for fast Python package management
106- RUN curl -LsSf https://astral.sh/uv/install.sh | sh && \
107- mv /root/.local/bin/uv /usr/local/bin/uv
108-
109- ENV PATH=/usr/local/bin:${PATH}
11065ENV PIP_ROOT_USER_ACTION=ignore
11166
112- # Install MLIO with Apache Arrow integration
113- # First install Arrow C++ libraries (needed for MLIO compilation)
114- RUN uv pip install --system pyarrow==${PYARROW_VERSION}
115-
67+ # Build MLIO from scratch
11668# Clone MLIO repository
11769RUN cd /tmp && \
11870 git clone --branch ${MLIO_VERSION} https://github.com/awslabs/ml-io.git mlio
@@ -147,15 +99,64 @@ RUN cd /tmp/mlio/build/release && \
14799 cmake --build . --target mlio-py && \
148100 cmake --build . --target mlio-arrow
149101
150- # Build and install MLIO Python wheel
102+ # Build MLIO Python wheel
151103RUN cd /tmp/mlio/src/mlio-py && \
152- python3 setup.py bdist_wheel && \
153- uv pip install --system dist/*.whl
104+ python3 setup.py bdist_wheel
154105
155- # Copy TBB libraries and cleanup
156- RUN cp -r /tmp/mlio/build/third-party/lib/libtbb* /usr/local/lib/ && \
157- ldconfig && \
158- rm -rf /tmp/mlio
106+ # Copy TBB libraries and MLIO shared libraries to a location we can copy from
107+ RUN mkdir -p /mlio-artifacts && \
108+ cp -r /tmp/mlio/build/third-party/lib/libtbb* /mlio-artifacts/ && \
109+ cp /usr/local/lib/libmlio* /mlio-artifacts/ 2>/dev/null || true && \
110+ cp /tmp/mlio/src/mlio-py/dist/*.whl /mlio-artifacts/
111+
112+ # Main image
113+ FROM ubuntu:${UBUNTU_VERSION}@sha256:${UBUNTU_IMAGE_DIGEST}
114+
115+ ARG PYTHON_VERSION=3.10
116+ ARG PYARROW_VERSION=17.0.0
117+
118+ ENV DEBIAN_FRONTEND=noninteractive
119+
120+ # Install runtime dependencies only
121+ RUN apt-get update && \
122+ apt-get -y upgrade && \
123+ apt-get -y install --no-install-recommends \
124+ curl git jq libatlas-base-dev nginx openjdk-8-jdk-headless unzip wget expat tzdata apparmor \
125+ libgstreamer1.0-0 libxml2 libsqlite3-0 software-properties-common ca-certificates lsb-release \
126+ build-essential linux-libc-dev && \
127+ # Add Apache Arrow repository for runtime libraries only
128+ wget https://packages.apache.org/artifactory/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb && \
129+ apt install -y -V ./apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb && \
130+ apt-get update && \
131+ apt-get install -y -V libarrow-dev=17.0.0-1 libarrow-dataset-dev=17.0.0-1 libparquet-dev=17.0.0-1 libarrow-acero-dev=17.0.0-1 && \
132+ # Add deadsnakes PPA for Python 3.10
133+ add-apt-repository ppa:deadsnakes/ppa && \
134+ apt-get update && \
135+ apt-get -y install --no-install-recommends \
136+ python3.10 python3.10-distutils python3.10-dev && \
137+ update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.10 1 && \
138+ curl -sS https://bootstrap.pypa.io/get-pip.py | python3 && \
139+ apt-get clean && \
140+ rm -rf /var/lib/apt/lists/*
141+
142+ RUN ln -fs /usr/share/zoneinfo/UTC /etc/localtime && \
143+ dpkg-reconfigure --frontend noninteractive tzdata
144+
145+ # Install uv for fast Python package management
146+ RUN curl -LsSf https://astral.sh/uv/install.sh | sh && \
147+ mv /root/.local/bin/uv /usr/local/bin/uv
148+
149+ ENV PATH=/usr/local/bin:${PATH}
150+ ENV PIP_ROOT_USER_ACTION=ignore
151+
152+ # Copy MLIO wheel, TBB libraries, and MLIO shared libraries from builder stage
153+ COPY --from=mlio-builder /mlio-artifacts/*.whl /tmp/
154+ COPY --from=mlio-builder /mlio-artifacts/libtbb* /usr/local/lib/
155+ COPY --from=mlio-builder /mlio-artifacts/libmlio* /usr/local/lib/
156+
157+ # Install MLIO wheel
158+ RUN uv pip install --system /tmp/*.whl && \
159+ rm /tmp/*.whl
159160
160161# Copy compiled SQLite from builder stage
161162COPY --from=sqlite-builder /usr/local/bin/sqlite3 /usr/local/bin/sqlite3
0 commit comments