From 0c6c92f8e35752bd057f3465beaca5f247d88ac9 Mon Sep 17 00:00:00 2001 From: Nelson Chen Date: Fri, 17 Nov 2023 10:23:49 -0800 Subject: [PATCH] Add ENV NVIDIA_DRIVER_CAPABILITIES=all to permit Video SDK transcoding, 3D rendering, and X11 OpenGL Display (#1288) * Add ENV NVIDIA_DRIVER_CAPABILITIES=all * Document NVIDIA_DRIVER_CAPABILITIES Signed-off-by: Nelson Chen --- docs/environment.md | 10 ++ pkg/dockerfile/generator.go | 3 +- pkg/dockerfile/generator_test.go | 270 ------------------------------- 3 files changed, 12 insertions(+), 271 deletions(-) delete mode 100644 pkg/dockerfile/generator_test.go diff --git a/docs/environment.md b/docs/environment.md index f45cec085b..099cb46b7a 100644 --- a/docs/environment.md +++ b/docs/environment.md @@ -106,3 +106,13 @@ This section lists the relevant environment variables for Cog's built-in queue w This determines whether to enable or disable the usage of OpenTelemetry. OpenTelemetry (OTEL) is an open-source technology used to capture and measure metrics, traces, and logs. It is used for Cog's queue worker. This can either be set / unset in order to determine whether or not to enable OpenTelemtry. If it is set, then Cog will handle the necessary setup for OpenTelemtry. Otherwise, OpenTelemetry calls will be treated as no-ops. If OpenTelemetry is enabled, the OTLP exporter may also need to be [configured via environment variables](https://opentelemetry-python.readthedocs.io/en/latest/sdk/environment_variables.html). + + +## Docker Image + +### `NVIDIA_DRIVER_CAPABILITIES` +This [controls which Nvidia driver libraries/binaries will be mounted inside the container](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/docker-specialized.html#driver-capabilities). The generated Docker image will set this to `all` which will mount all Nvidia driver libraries/binaries inside the container beyond the default `utility` and `compute` capabilities. + +`graphics`, `video`, and `display` add additional interesting capabilities beyond the default that may be useful for some models running in the container. `graphics` is required for accelerated OpenGL support. `video` is required for accelerated video encoding/decoding. `display` is required for accelerated X11 support. + +This is set to `all`, is non-configurable, and is documented here as an environment variable of interest. Setting or changing this during runtime inside the image will have no effect. diff --git a/pkg/dockerfile/generator.go b/pkg/dockerfile/generator.go index 4ef2a2428d..b36c7e6796 100644 --- a/pkg/dockerfile/generator.go +++ b/pkg/dockerfile/generator.go @@ -133,7 +133,8 @@ func (g *Generator) baseImage() (string, error) { func (g *Generator) preamble() string { return `ENV DEBIAN_FRONTEND=noninteractive ENV PYTHONUNBUFFERED=1 -ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/lib/x86_64-linux-gnu:/usr/local/nvidia/lib64:/usr/local/nvidia/bin` +ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/lib/x86_64-linux-gnu:/usr/local/nvidia/lib64:/usr/local/nvidia/bin +ENV NVIDIA_DRIVER_CAPABILITIES=all` } func (g *Generator) installTini() string { diff --git a/pkg/dockerfile/generator_test.go b/pkg/dockerfile/generator_test.go deleted file mode 100644 index f3b90272c9..0000000000 --- a/pkg/dockerfile/generator_test.go +++ /dev/null @@ -1,270 +0,0 @@ -package dockerfile - -import ( - "fmt" - "os" - "path" - "testing" - - "github.com/stretchr/testify/require" - - "github.com/sieve-data/cog/pkg/config" -) - -func testTini() string { - return `RUN --mount=type=cache,target=/var/cache/apt set -eux; \ -apt-get update -qq; \ -apt-get install -qqy --no-install-recommends curl; \ -rm -rf /var/lib/apt/lists/*; \ -TINI_VERSION=v0.19.0; \ -TINI_ARCH="$(dpkg --print-architecture)"; \ -curl -sSL -o /sbin/tini "https://github.com/krallin/tini/releases/download/${TINI_VERSION}/tini-${TINI_ARCH}"; \ -chmod +x /sbin/tini -ENTRYPOINT ["/sbin/tini", "--"] -` -} - -func testInstallCog(relativeTmpDir string) string { - return fmt.Sprintf(`COPY %s/cog-0.0.1.dev-py3-none-any.whl /tmp/cog-0.0.1.dev-py3-none-any.whl -RUN --mount=type=cache,target=/root/.cache/pip pip install /tmp/cog-0.0.1.dev-py3-none-any.whl`, relativeTmpDir) -} - -func testInstallPython(version string) string { - return fmt.Sprintf(`ENV PATH="/root/.pyenv/shims:/root/.pyenv/bin:$PATH" -RUN --mount=type=cache,target=/var/cache/apt apt-get update -qq && apt-get install -qqy --no-install-recommends \ - make \ - build-essential \ - libssl-dev \ - zlib1g-dev \ - libbz2-dev \ - libreadline-dev \ - libsqlite3-dev \ - wget \ - curl \ - llvm \ - libncurses5-dev \ - libncursesw5-dev \ - xz-utils \ - tk-dev \ - libffi-dev \ - liblzma-dev \ - git \ - ca-certificates \ - && rm -rf /var/lib/apt/lists/* -RUN curl -s -S -L https://raw.githubusercontent.com/pyenv/pyenv-installer/master/bin/pyenv-installer | bash && \ - git clone https://github.com/momo-lab/pyenv-install-latest.git "$(pyenv root)"/plugins/pyenv-install-latest && \ - pyenv install-latest "%s" && \ - pyenv global $(pyenv install-latest --print "%s") && \ - pip install "wheel<1" -`, version, version) -} - -func TestGenerateEmptyCPU(t *testing.T) { - tmpDir := t.TempDir() - - conf, err := config.FromYAML([]byte(` -build: - gpu: false -predict: predict.py:Predictor -`)) - require.NoError(t, err) - require.NoError(t, conf.ValidateAndComplete("")) - - gen, err := NewGenerator(conf, tmpDir) - require.NoError(t, err) - actual, err := gen.Generate() - require.NoError(t, err) - - expected := `# syntax = docker/dockerfile:1.2 -FROM python:3.8 -ENV DEBIAN_FRONTEND=noninteractive -ENV PYTHONUNBUFFERED=1 -ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/lib/x86_64-linux-gnu:/usr/local/nvidia/lib64:/usr/local/nvidia/bin -` + testTini() + testInstallCog(gen.relativeTmpDir) + ` -WORKDIR /src -EXPOSE 5000 -CMD ["python", "-m", "cog.server.http"] -COPY . /src` - - require.Equal(t, expected, actual) -} - -func TestGenerateEmptyGPU(t *testing.T) { - tmpDir := t.TempDir() - - conf, err := config.FromYAML([]byte(` -build: - gpu: true -predict: predict.py:Predictor -`)) - require.NoError(t, err) - require.NoError(t, conf.ValidateAndComplete("")) - gen, err := NewGenerator(conf, tmpDir) - require.NoError(t, err) - actual, err := gen.Generate() - require.NoError(t, err) - - expected := `# syntax = docker/dockerfile:1.2 -FROM nvidia/cuda:11.2.0-cudnn8-devel-ubuntu20.04 -ENV DEBIAN_FRONTEND=noninteractive -ENV PYTHONUNBUFFERED=1 -ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/lib/x86_64-linux-gnu:/usr/local/nvidia/lib64:/usr/local/nvidia/bin -` + testTini() + testInstallPython("3.8") + testInstallCog(gen.relativeTmpDir) + ` -WORKDIR /src -EXPOSE 5000 -CMD ["python", "-m", "cog.server.http"] -COPY . /src` - - require.Equal(t, expected, actual) -} - -func TestGenerateFullCPU(t *testing.T) { - tmpDir := t.TempDir() - - conf, err := config.FromYAML([]byte(` -build: - gpu: false - system_packages: - - ffmpeg - - cowsay - python_packages: - - torch==1.5.1 - - pandas==1.2.0.12 - run: - - "cowsay moo" -predict: predict.py:Predictor -`)) - require.NoError(t, err) - require.NoError(t, conf.ValidateAndComplete("")) - - gen, err := NewGenerator(conf, tmpDir) - require.NoError(t, err) - actual, err := gen.Generate() - require.NoError(t, err) - - expected := `# syntax = docker/dockerfile:1.2 -FROM python:3.8 -ENV DEBIAN_FRONTEND=noninteractive -ENV PYTHONUNBUFFERED=1 -ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/lib/x86_64-linux-gnu:/usr/local/nvidia/lib64:/usr/local/nvidia/bin -` + testTini() + testInstallCog(gen.relativeTmpDir) + ` -RUN --mount=type=cache,target=/var/cache/apt apt-get update -qq && apt-get install -qqy ffmpeg cowsay && rm -rf /var/lib/apt/lists/* -COPY ` + gen.relativeTmpDir + `/requirements.txt /tmp/requirements.txt -RUN --mount=type=cache,target=/root/.cache/pip pip install -r /tmp/requirements.txt -RUN cowsay moo -WORKDIR /src -EXPOSE 5000 -CMD ["python", "-m", "cog.server.http"] -COPY . /src` - require.Equal(t, expected, actual) - - requirements, err := os.ReadFile(path.Join(gen.tmpDir, "requirements.txt")) - require.NoError(t, err) - - require.Equal(t, `--find-links https://download.pytorch.org/whl/torch_stable.html -torch==1.5.1+cpu -pandas==1.2.0.12`, string(requirements)) -} - -func TestGenerateFullGPU(t *testing.T) { - tmpDir := t.TempDir() - - conf, err := config.FromYAML([]byte(` -build: - gpu: true - system_packages: - - ffmpeg - - cowsay - python_packages: - - torch==1.5.1 - - pandas==1.2.0.12 - run: - - "cowsay moo" -predict: predict.py:Predictor -`)) - require.NoError(t, err) - require.NoError(t, conf.ValidateAndComplete("")) - - gen, err := NewGenerator(conf, tmpDir) - require.NoError(t, err) - actual, err := gen.Generate() - require.NoError(t, err) - - expected := `# syntax = docker/dockerfile:1.2 -FROM nvidia/cuda:10.2-cudnn8-devel-ubuntu18.04 -ENV DEBIAN_FRONTEND=noninteractive -ENV PYTHONUNBUFFERED=1 -ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/lib/x86_64-linux-gnu:/usr/local/nvidia/lib64:/usr/local/nvidia/bin -` + testTini() + - testInstallPython("3.8") + - testInstallCog(gen.relativeTmpDir) + ` -RUN --mount=type=cache,target=/var/cache/apt apt-get update -qq && apt-get install -qqy ffmpeg cowsay && rm -rf /var/lib/apt/lists/* -COPY ` + gen.relativeTmpDir + `/requirements.txt /tmp/requirements.txt -RUN --mount=type=cache,target=/root/.cache/pip pip install -r /tmp/requirements.txt -RUN cowsay moo -WORKDIR /src -EXPOSE 5000 -CMD ["python", "-m", "cog.server.http"] -COPY . /src` - - require.Equal(t, expected, actual) - - requirements, err := os.ReadFile(path.Join(gen.tmpDir, "requirements.txt")) - require.NoError(t, err) - require.Equal(t, `torch==1.5.1 -pandas==1.2.0.12`, string(requirements)) -} - -// pre_install is deprecated but supported for backwards compatibility -func TestPreInstall(t *testing.T) { - tmpDir := t.TempDir() - - conf, err := config.FromYAML([]byte(` -build: - system_packages: - - cowsay - pre_install: - - "cowsay moo" -`)) - require.NoError(t, err) - require.NoError(t, conf.ValidateAndComplete("")) - - gen, err := NewGenerator(conf, tmpDir) - require.NoError(t, err) - actual, err := gen.Generate() - require.NoError(t, err) - - expected := `# syntax = docker/dockerfile:1.2 -FROM python:3.8 -ENV DEBIAN_FRONTEND=noninteractive -ENV PYTHONUNBUFFERED=1 -ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/lib/x86_64-linux-gnu:/usr/local/nvidia/lib64:/usr/local/nvidia/bin -` + testTini() + testInstallCog(gen.relativeTmpDir) + ` -RUN --mount=type=cache,target=/var/cache/apt apt-get update -qq && apt-get install -qqy cowsay && rm -rf /var/lib/apt/lists/* -RUN cowsay moo -WORKDIR /src -EXPOSE 5000 -CMD ["python", "-m", "cog.server.http"] -COPY . /src` - require.Equal(t, expected, actual) - -} - -func TestPythonRequirements(t *testing.T) { - tmpDir := t.TempDir() - err := os.WriteFile(path.Join(tmpDir, "my-requirements.txt"), []byte("torch==1.0.0"), 0o644) - require.NoError(t, err) - conf, err := config.FromYAML([]byte(` -build: - python_requirements: "my-requirements.txt" -`)) - require.NoError(t, err) - require.NoError(t, conf.ValidateAndComplete(tmpDir)) - - gen, err := NewGenerator(conf, tmpDir) - require.NoError(t, err) - actual, err := gen.Generate() - require.NoError(t, err) - fmt.Println(actual) - require.Contains(t, actual, `pip install -r /tmp/requirements.txt`) -}