-
Couldn't load subscription status.
- Fork 45
Add Dockerfiles for inference engines #217
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,7 @@ | ||
| # syntax=docker/dockerfile:1 | ||
|
|
||
| FROM scratch | ||
| ARG TARGETOS | ||
| ARG TARGETARCH | ||
| ARG ACCEL | ||
| COPY --from=release-artifacts /com.docker.llama-server.native.$TARGETOS.$ACCEL.$TARGETARCH /com.docker.llama-server.native.$TARGETOS.$ACCEL.$TARGETARCH | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,90 @@ | ||
| ifeq ($(OS),Windows_NT) | ||
| DETECTED_OS := Windows | ||
| else | ||
| UNAME_S := $(shell uname -s) | ||
| ifeq ($(UNAME_S),Linux) | ||
| DETECTED_OS := Linux | ||
| endif | ||
| ifeq ($(UNAME_S),Darwin) | ||
| DETECTED_OS := macOS | ||
| endif | ||
| endif | ||
|
|
||
| BUILD_DIR := build | ||
| INSTALL_DIR := install | ||
| NATIVE_DIR := native | ||
|
|
||
| .PHONY: build clean install-deps install-dir | ||
|
|
||
| build: install-deps | ||
| ifeq ($(DETECTED_OS),macOS) | ||
| @echo "Building for macOS..." | ||
| @echo "Configuring CMake..." | ||
| cmake -B $(BUILD_DIR) \ | ||
| -DCMAKE_CXX_COMPILER=clang++ \ | ||
| -DCMAKE_C_COMPILER=clang \ | ||
| -DCMAKE_BUILD_TYPE=Release \ | ||
| -DCMAKE_OSX_DEPLOYMENT_TARGET=13.3 \ | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
| -DCMAKE_MACOSX_RPATH=ON \ | ||
| -DCMAKE_INSTALL_RPATH='@executable_path/../lib' \ | ||
| -DGGML_NATIVE=OFF \ | ||
| -DGGML_OPENMP=OFF \ | ||
| -DLLAMA_CURL=OFF \ | ||
| -GNinja \ | ||
| -S $(NATIVE_DIR) | ||
| @echo "Building..." | ||
| cmake --build $(BUILD_DIR) --config Release | ||
| @echo "Installing..." | ||
| cmake --install $(BUILD_DIR) \ | ||
| --config Release \ | ||
| --prefix $(INSTALL_DIR) | ||
| @echo "Cleaning install directory..." | ||
| rm -rf $(INSTALL_DIR)/lib/cmake | ||
| rm -rf $(INSTALL_DIR)/lib/pkgconfig | ||
| rm -rf $(INSTALL_DIR)/include | ||
| @echo "Build complete! Binaries are in $(INSTALL_DIR)" | ||
| else ifeq ($(DETECTED_OS),Linux) | ||
| @echo "Linux build not implemented yet" | ||
| @exit 1 | ||
| else ifeq ($(DETECTED_OS),Windows) | ||
| @echo "Windows build not implemented yet" | ||
| @exit 1 | ||
| else | ||
| @echo "Unsupported OS: $(DETECTED_OS)" | ||
| @exit 1 | ||
| endif | ||
|
|
||
| install-deps: | ||
| ifeq ($(DETECTED_OS),macOS) | ||
| @echo "Installing build dependencies for macOS..." | ||
| @if ! command -v ninja >/dev/null 2>&1; then \ | ||
| echo "Installing Ninja..."; \ | ||
| brew install ninja; \ | ||
| else \ | ||
| echo "Ninja already installed"; \ | ||
| fi | ||
| else ifeq ($(DETECTED_OS),Linux) | ||
| @echo "Linux dependency installation not implemented yet" | ||
| @exit 1 | ||
| else ifeq ($(DETECTED_OS),Windows) | ||
| @echo "Windows dependency installation not implemented yet" | ||
| @exit 1 | ||
| else | ||
| @echo "Unsupported OS: $(DETECTED_OS)" | ||
| @exit 1 | ||
| endif | ||
|
|
||
| clean: | ||
| rm -rf $(BUILD_DIR) | ||
| rm -rf $(INSTALL_DIR) | ||
|
|
||
| install-dir: | ||
| @echo "$(INSTALL_DIR)" | ||
|
|
||
| help: | ||
| @echo "Available targets:" | ||
| @echo " build - Build llama.cpp (macOS only for now)" | ||
| @echo " install-deps - Install build dependencies" | ||
| @echo " install-dir - Print install directory path" | ||
| @echo " clean - Clean build artifacts" | ||
| @echo " help - Show this help" | ||
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
| @@ -0,0 +1,6 @@ | ||||||
| # llama.cpp inference runtime | ||||||
|
|
||||||
| This repo contains implementations of the llama.cpp inference runtime. | ||||||
|
|
||||||
| * native/ - contains an implementaion based on `llama.cpp`'s native server | ||||||
|
||||||
| * native/ - contains an implementaion based on `llama.cpp`'s native server | |
| * native/ - contains an implementation based on `llama.cpp`'s native server |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
issue (typo): Typo: 'implementaion' should be 'implementation'.
Change 'implementaion' to 'implementation' in the directory description.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1 @@ | ||
| build/ |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,25 @@ | ||
| cmake_minimum_required(VERSION 3.13) | ||
|
|
||
| project( | ||
| com.docker.llama-server.native | ||
| DESCRIPTION "DD inference server, based on llama.cpp native server" | ||
| LANGUAGES C CXX | ||
| ) | ||
|
|
||
| option(DDLLAMA_BUILD_SERVER "Build the DD llama.cpp server executable" ON) | ||
| option(DDLLAMA_BUILD_UTILS "Build utilities, e.g. nv-gpu-info" OFF) | ||
| set(DDLLAMA_PATCH_COMMAND "patch" CACHE STRING "patch command") | ||
|
|
||
| set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) | ||
| set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) | ||
|
|
||
| if (DDLLAMA_BUILD_SERVER) | ||
| set(LLAMA_BUILD_COMMON ON) | ||
| add_subdirectory(vendor/llama.cpp) | ||
| add_subdirectory(vendor/llama.cpp/tools/mtmd) | ||
| add_subdirectory(src/server) | ||
| endif() | ||
|
|
||
| if (WIN32 AND DDLLAMA_BUILD_UTILS) | ||
| add_subdirectory(src/nv-gpu-info) | ||
| endif() |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,10 @@ | ||
| # Native llama-server for DD | ||
|
|
||
| ## Building | ||
|
|
||
| cmake -B build | ||
| cmake --build build --parallel 8 --config Release | ||
|
|
||
| ## Running | ||
|
|
||
| DD_INF_UDS=<socket path> ./build/bin/com.docker.llama-server --model <path to model> |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,53 @@ | ||
| # syntax=docker/dockerfile:1 | ||
|
|
||
| ARG CUDA_VERSION=12.9.0 | ||
| ARG CUDA_IMAGE_VARIANT=ubuntu24.04 | ||
|
|
||
| FROM nvidia/cuda:${CUDA_VERSION}-devel-${CUDA_IMAGE_VARIANT} AS builder | ||
|
|
||
| ARG TARGETARCH | ||
| ARG CUDA_IMAGE_VARIANT | ||
|
|
||
| COPY native/install-clang.sh . | ||
| RUN ./install-clang.sh "${CUDA_IMAGE_VARIANT}" | ||
|
|
||
| WORKDIR /llama-server | ||
|
|
||
| COPY .git .git | ||
| COPY native/CMakeLists.txt . | ||
| COPY native/src src | ||
| COPY native/vendor vendor | ||
|
|
||
| # Fix submodule .git file to point to correct location in container | ||
| RUN echo "gitdir: ../../.git/modules/native/vendor/llama.cpp" > vendor/llama.cpp/.git && \ | ||
| sed -i 's|worktree = ../../../../../native/vendor/llama.cpp|worktree = /llama-server/vendor/llama.cpp|' .git/modules/native/vendor/llama.cpp/config | ||
|
|
||
| ENV CC=/usr/bin/clang-20 | ||
| ENV CXX=/usr/bin/clang++-20 | ||
| RUN echo "-B build \ | ||
| -DCMAKE_BUILD_TYPE=Release \ | ||
| -DBUILD_SHARED_LIBS=ON \ | ||
| -DGGML_BACKEND_DL=ON \ | ||
| -DGGML_CPU_ALL_VARIANTS=ON \ | ||
| -DGGML_NATIVE=OFF \ | ||
| -DGGML_OPENMP=OFF \ | ||
| -DGGML_CUDA=ON \ | ||
| -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc \ | ||
| -DLLAMA_CURL=OFF \ | ||
| -GNinja \ | ||
| -S ." > cmake-flags | ||
| RUN cmake $(cat cmake-flags) | ||
| RUN cmake --build build --config Release | ||
| RUN cmake --install build --config Release --prefix install | ||
|
Comment on lines
+27
to
+41
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. To optimize the Docker image by reducing the number of layers, you can combine the |
||
|
|
||
| RUN rm install/bin/*.py | ||
| RUN rm -r install/lib/cmake | ||
| RUN rm -r install/lib/pkgconfig | ||
| RUN rm -r install/include | ||
|
Comment on lines
+43
to
+46
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
|
|
||
| FROM scratch AS final | ||
|
|
||
| ARG TARGETARCH | ||
| ARG CUDA_VERSION | ||
|
|
||
| COPY --from=builder /llama-server/install /com.docker.llama-server.native.linux.cuda$CUDA_VERSION.$TARGETARCH | ||
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
| @@ -0,0 +1,62 @@ | ||||||
| # syntax=docker/dockerfile:1 | ||||||
|
|
||||||
| ARG BASE_IMAGE=ubuntu:25.10 | ||||||
|
|
||||||
| FROM ${BASE_IMAGE} AS builder | ||||||
|
|
||||||
| ARG TARGETARCH | ||||||
|
|
||||||
| RUN apt-get update && apt-get install -y cmake ninja-build git build-essential curl | ||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. suggestion (performance): No cleanup of apt cache after installation. Add 'rm -rf /var/lib/apt/lists/*' after installing packages to minimize image size and prevent outdated package lists.
Suggested change
|
||||||
|
|
||||||
| COPY native/install-vulkan.sh . | ||||||
| RUN ./install-vulkan.sh | ||||||
|
|
||||||
| ENV VULKAN_SDK=/opt/vulkan | ||||||
| ENV PATH=$VULKAN_SDK/bin:$PATH | ||||||
| ENV LD_LIBRARY_PATH=$VULKAN_SDK/lib | ||||||
| ENV CMAKE_PREFIX_PATH=$VULKAN_SDK | ||||||
| ENV PKG_CONFIG_PATH=$VULKAN_SDK/lib/pkgconfig | ||||||
|
|
||||||
| WORKDIR /llama-server | ||||||
|
|
||||||
| COPY .git .git | ||||||
| COPY native/CMakeLists.txt . | ||||||
| COPY native/src src | ||||||
| COPY native/vendor vendor | ||||||
|
|
||||||
| # Fix submodule .git file to point to correct location in container | ||||||
| RUN echo "gitdir: ../../.git/modules/native/vendor/llama.cpp" > vendor/llama.cpp/.git && \ | ||||||
| sed -i 's|worktree = ../../../../../native/vendor/llama.cpp|worktree = /llama-server/vendor/llama.cpp|' .git/modules/native/vendor/llama.cpp/config | ||||||
|
|
||||||
| RUN echo "-B build \ | ||||||
| -DCMAKE_BUILD_TYPE=Release \ | ||||||
| -DGGML_NATIVE=OFF \ | ||||||
| -DGGML_OPENMP=OFF \ | ||||||
| -DLLAMA_CURL=OFF \ | ||||||
| -DGGML_VULKAN=ON \ | ||||||
| -GNinja \ | ||||||
| -S ." > cmake-flags | ||||||
| RUN if [ "${TARGETARCH}" = "amd64" ]; then \ | ||||||
| echo " -DBUILD_SHARED_LIBS=ON \ | ||||||
| -DGGML_BACKEND_DL=ON \ | ||||||
| -DGGML_CPU_ALL_VARIANTS=ON" >> cmake-flags; \ | ||||||
| elif [ "${TARGETARCH}" = "arm64" ]; then \ | ||||||
| echo " -DBUILD_SHARED_LIBS=OFF" >> cmake-flags; \ | ||||||
| else \ | ||||||
| echo "${TARGETARCH} is not supported"; \ | ||||||
| exit 1; \ | ||||||
| fi | ||||||
| RUN cmake $(cat cmake-flags) | ||||||
| RUN cmake --build build --config Release -j 4 | ||||||
| RUN cmake --install build --config Release --prefix install | ||||||
|
Comment on lines
+31
to
+51
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. To optimize the Docker image by reducing layers, the logic for setting CMake flags, configuring, building, and installing can be combined into a single |
||||||
|
|
||||||
| RUN rm install/bin/*.py | ||||||
| RUN rm -r install/lib/cmake | ||||||
| RUN rm -r install/lib/pkgconfig | ||||||
| RUN rm -r install/include | ||||||
|
Comment on lines
+53
to
+56
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||||||
|
|
||||||
| FROM scratch AS final | ||||||
|
|
||||||
| ARG TARGETARCH | ||||||
|
|
||||||
| COPY --from=builder /llama-server/install /com.docker.llama-server.native.linux.cpu.$TARGETARCH | ||||||
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
| @@ -0,0 +1,24 @@ | ||||||
| #!/bin/bash | ||||||
|
|
||||||
| main() { | ||||||
| set -eux -o pipefail | ||||||
|
|
||||||
| apt-get update && apt-get install -y cmake ninja-build git wget gnupg2 | ||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. suggestion (bug_risk): Missing noninteractive flags may cause issues in CI environments. Add DEBIAN_FRONTEND=noninteractive to apt-get install to prevent prompts during automated builds.
Suggested change
|
||||||
| wget -qO- https://apt.llvm.org/llvm-snapshot.gpg.key | tee /etc/apt/trusted.gpg.d/apt.llvm.org.asc | ||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The
Suggested change
|
||||||
|
|
||||||
| if [ "$1" = "ubuntu22.04" ]; then | ||||||
| echo "deb http://apt.llvm.org/jammy/ llvm-toolchain-jammy-20 main" >> /etc/apt/sources.list | ||||||
| echo "deb-src http://apt.llvm.org/jammy/ llvm-toolchain-jammy-20 main" >> /etc/apt/sources.list | ||||||
| elif [ "$1" = "ubuntu24.04" ]; then | ||||||
| echo "deb http://apt.llvm.org/noble/ llvm-toolchain-noble-20 main" >> /etc/apt/sources.list | ||||||
| echo "deb-src http://apt.llvm.org/noble/ llvm-toolchain-noble-20 main" >> /etc/apt/sources.list | ||||||
| else | ||||||
| echo "distro variant not supported yet" | ||||||
| exit 1 | ||||||
| fi | ||||||
|
|
||||||
| apt-get update && apt-get install -y clang-20 lldb-20 lld-20 | ||||||
| } | ||||||
|
|
||||||
| main "$@" | ||||||
|
|
||||||
| Original file line number | Diff line number | Diff line change | ||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,10 @@ | ||||||||||||
| #!/bin/bash | ||||||||||||
|
|
||||||||||||
| main() { | ||||||||||||
| set -eux -o pipefail | ||||||||||||
|
|
||||||||||||
| apt-get install -y glslc libvulkan-dev | ||||||||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. suggestion (bug_risk): Missing apt-get update before install may cause package issues. Add 'apt-get update' before installing packages to avoid outdated package lists.
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's a best practice to run
Suggested change
|
||||||||||||
| } | ||||||||||||
|
|
||||||||||||
| main "$@" | ||||||||||||
|
|
||||||||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,12 @@ | ||
| set(TARGET com.docker.nv-gpu-info) | ||
|
|
||
| add_library(nvapi STATIC IMPORTED) | ||
| set_target_properties(nvapi PROPERTIES | ||
| IMPORTED_LOCATION "${CMAKE_SOURCE_DIR}/vendor/nvapi/amd64/nvapi64.lib" | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
| INTERFACE_INCLUDE_DIRECTORIES "${CMAKE_SOURCE_DIR}/vendor/nvapi" | ||
| ) | ||
|
|
||
| add_executable(${TARGET} nv-gpu-info.c) | ||
| install(TARGETS ${TARGET} RUNTIME) | ||
|
|
||
| target_link_libraries(${TARGET} nvapi) | ||
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
| @@ -0,0 +1,75 @@ | ||||||
| #include <stdio.h> | ||||||
| #include "nvapi.h" | ||||||
|
|
||||||
| #pragma comment(lib, "nvapi64.lib") | ||||||
| int main() { | ||||||
| NvAPI_Status status = NVAPI_OK; | ||||||
| NvAPI_ShortString error_str = { 0 }; | ||||||
|
|
||||||
| status = NvAPI_Initialize(); | ||||||
| if (status != NVAPI_OK) { | ||||||
| NvAPI_GetErrorMessage(status, error_str); | ||||||
| printf("Failed to initialise NVAPI: %s\n", error_str); | ||||||
|
||||||
| printf("Failed to initialise NVAPI: %s\n", error_str); | |
| printf("Failed to initialize NVAPI: %s\n", error_str); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The pattern for checking NvAPI_Status and printing an error message is repeated multiple times throughout the file. To improve code maintainability and reduce duplication, consider extracting this logic into a helper function. For example:
bool check_nvapi_status(NvAPI_Status status, const char* message) {
if (status != NVAPI_OK) {
NvAPI_ShortString error_str = { 0 };
NvAPI_GetErrorMessage(status, error_str);
printf("%s: %s\n", message, error_str);
return false;
}
return true;
}| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,5 @@ | ||
| # syntax=docker/dockerfile:1 | ||
|
|
||
| ARG BASE_IMAGE | ||
|
|
||
| FROM ${BASE_IMAGE} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The destination path for the
COPYcommand is the same as the source directory name. This will result in a deeply nested and cumbersome path for the executables inside the final image (e.g.,/com.docker.llama-server.native.linux.cpu.amd64/bin/com.docker.llama-server). It's generally better to copy the contents of the artifact directory into a standard location like/to make the binaries easier to locate and execute. Adding a trailing slash to the source path will copy the directory's contents.