add Docker images and run GPU tests on Beaker (allenai#14)

* add image for gantry * add make target for gantry testing * GPU tests * fix * setup beaker * fixes * fix * try fix * try fix again * try with direct clone * try https * try with custom GH token * try again * try again * try again with default gh token * fix * fix gpu tests * fix * fix * clean up
AlibabaPAI · Mar 2, 2023 · 7b3a1a7 · 7b3a1a7
1 parent ba20a85
commit 7b3a1a7
Show file tree

Hide file tree

Showing 12 changed files with 250 additions and 8 deletions.
diff --git a/.dockerignore b/.dockerignore
@@ -0,0 +1,7 @@
+.git
+.github
+.mypy_cache
+.pytest_cache
+.venv
+__pycache__
+*.egg-info
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -101,6 +101,54 @@ jobs:
           . .venv/bin/activate
           pip uninstall -y dolma
 
+  gpu_tests:
+    name: GPU Tests
+    runs-on: ubuntu-latest
+    env:
+      BEAKER_TOKEN: ${{ secrets.BEAKER_TOKEN }}
+      BEAKER_IMAGE: dolma-test
+      BEAKER_WORKSPACE: ai2/llm-testing
+    steps:
+      - name: Determine current commit SHA (pull request)
+        if: github.event_name == 'pull_request'
+        run: |
+          echo "COMMIT_SHA=${{ github.event.pull_request.head.sha }}" >> $GITHUB_ENV
+
+      - name: Determine current commit SHA (push)
+        if: github.event_name != 'pull_request'
+        run: |
+          echo "COMMIT_SHA=$GITHUB_SHA" >> $GITHUB_ENV
+
+      - name: GPU Tests
+        uses: allenai/[email protected]
+        if: env.BEAKER_TOKEN != ''
+        with:
+          spec: |
+            version: v2
+            description: GPU Tests
+            tasks:
+              - name: tests
+                image:
+                  beaker: ${{ env.BEAKER_IMAGE }}
+                context:
+                  priority: preemptible
+                resources:
+                  gpuCount: 1
+                envVars:
+                  - name: COMMIT_SHA
+                    value: ${{ env.COMMIT_SHA }}
+                  - name: GITHUB_TOKEN
+                    value: ${{ secrets.GITHUB_TOKEN }}
+                  - name: CUDA_LAUNCH_BLOCKING
+                    value: "1"
+                  - name: TOKENIZERS_PARALLELISM
+                    value: "false"
+                command: ["/entrypoint.sh", "pytest", "-v", "-m", "gpu", "tests/"]
+                result:
+                  path: /unused
+          token: ${{ env.BEAKER_TOKEN }}
+          workspace: ${{ env.BEAKER_WORKSPACE }}
+
   release:
     name: Release
     runs-on: ubuntu-latest

diff --git a/Dockerfile.gantry b/Dockerfile.gantry
@@ -0,0 +1,14 @@
+# Defines a CUDA-enabled Docker image suitable for running this project's experiments
+# via beaker-gantry.
+#
+# To build and push the image to Beaker, run 'make gantry-image'.
+# To test the image after pushing to Beaker, run 'make gantry-test'.
+
+FROM ghcr.io/allenai/pytorch:1.13.1-cuda11.7-python3.10
+
+WORKDIR /stage
+
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+
+WORKDIR /app/dolma
diff --git a/Dockerfile.test b/Dockerfile.test
@@ -0,0 +1,14 @@
+# Defines a CUDA-enabled Docker image suitable for running GPU tests on Beaker
+# via the GitHub Action 'beaker-run-action'.
+# The image needs to exist on Beaker for the tests to work.
+#
+# To build and push the image to Beaker, run 'make test-image'.
+
+FROM ghcr.io/allenai/pytorch:1.13.1-cuda11.7-python3.10
+
+COPY scripts/test_entrypoint.sh /entrypoint.sh
+RUN chmod +x /entrypoint.sh
+
+WORKDIR /testing
+
+ENTRYPOINT ["/entrypoint.sh"]
diff --git a/Makefile b/Makefile
@@ -1,7 +1,74 @@
+# If you update this, also update BEAKER_IMAGE in .github/workflows/main.yml
+IMAGE_NAME_BASE = dolma
+# If you update this, also update BEAKER_WORKSPACE in .github/workflows/main.yml
+BEAKER_WORKSPACE = "ai2/llm-testing"
+
+BEAKER_USER = $(shell beaker account whoami --format=json | jq -r '.[0].name')
+GANTRY_IMAGE = $(shell beaker workspace images $(BEAKER_WORKSPACE) --format=json | jq -r -c '.[] | select( .name == "$(IMAGE_NAME_BASE)-gantry" ) | .fullName')
+TEST_IMAGE =  $(shell beaker workspace images $(BEAKER_WORKSPACE) --format=json | jq -r -c '.[] | select( .name == "$(IMAGE_NAME_BASE)-test" ) | .fullName')
+
 .PHONY : run-checks
 run-checks :
 	isort --check .
 	black --check .
 	flake8 .
 	mypy .
 	CUDA_VISIBLE_DEVICES='' pytest -v --color=yes tests/
+
+.PHONY : beaker-info
+beaker-info :
+	@echo "Beaker user:   $(BEAKER_USER)"
+	@echo "Gantry image:  $(GANTRY_IMAGE)"
+	@echo "Testing image: $(TEST_IMAGE)"
+
+.PHONY : gantry-image
+gantry-image :
+	docker build -f Dockerfile.gantry -t $(IMAGE_NAME_BASE)-gantry .
+	beaker image create $(IMAGE_NAME_BASE)-gantry --name $(IMAGE_NAME_BASE)-gantry-tmp --workspace $(BEAKER_WORKSPACE)
+	beaker image delete $(GANTRY_IMAGE) || true
+	beaker image rename $(BEAKER_USER)/$(IMAGE_NAME_BASE)-gantry-tmp $(IMAGE_NAME_BASE)-gantry
+
+.PHONY : test-image
+test-image :
+	docker build -f Dockerfile.test -t $(IMAGE_NAME_BASE)-test .
+	beaker image create $(IMAGE_NAME_BASE)-test --name $(IMAGE_NAME_BASE)-test-tmp --workspace $(BEAKER_WORKSPACE)
+	beaker image delete $(TEST_IMAGE) || true
+	beaker image rename $(BEAKER_USER)/$(IMAGE_NAME_BASE)-test-tmp $(IMAGE_NAME_BASE)-test
+
+.PHONY : show-test-image
+show-test-image :
+	@echo $(TEST_IMAGE)
+
+.PHONY : show-beaker-workspace
+show-beaker-workspace :
+	@echo $(BEAKER_WORKSPACE)
+
+.PHONY : gantry-test
+gantry-test :
+	gantry run \
+		--workspace "$(BEAKER_WORKSPACE)" \
+		--priority "preemptible" \
+		--beaker-image "$(GANTRY_IMAGE)" \
+		--gpus 1 \
+		--description "Test run" \
+		--cluster ai2/allennlp-cirrascale \
+		--cluster ai2/aristo-cirrascale \
+		--cluster ai2/mosaic-cirrascale \
+		--cluster ai2/mosaic-cirrascale-a100 \
+		--cluster ai2/prior-cirrascale \
+		--cluster ai2/s2-cirrascale \
+		--cluster ai2/general-cirrascale \
+		--cluster ai2/general-cirrascale-a100-80g-ib \
+		--allow-dirty \
+		--venv base \
+		--timeout -1 \
+		--yes \
+		-- make check-cuda-install
+
+.PHONY : check-cpu-install
+check-cpu-install :
+	@python -c 'from dolma import check_install; check_install(cuda=False)'
+
+.PHONY : check-cuda-install
+check-cuda-install :
+	@python -c 'from dolma import check_install; check_install(cuda=True)'
diff --git a/README.md b/README.md
@@ -1 +1,9 @@
 # DOLMA: Delightful Open Language Model from AI2
+
+## Setup
+
+After cloning this repository, first install the latest [PyTorch](https://pytorch.org) according the official instructions relevant to your environment. Then install the remaining dependencies and code base by running:
+
+```
+pip install -e .[dev] --config-settings editable_mode=compat
+```
diff --git a/dolma/__init__.py b/dolma/__init__.py
@@ -2,4 +2,16 @@
 from .model import DolmaGPT, DolmaGPTOutput
 from .tokenizer import Tokenizer, TruncationDirection
 
-__all__ = ["Config", "Tokenizer", "TruncationDirection", "DolmaGPT", "DolmaGPTOutput"]
+__all__ = ["Config", "Tokenizer", "TruncationDirection", "DolmaGPT", "DolmaGPTOutput", "check_install"]
+
+
+def check_install(cuda: bool = False):
+    import torch
+
+    from .version import VERSION
+
+    if cuda:
+        assert torch.cuda.is_available(), "CUDA is not available!"
+        print("CUDA available")
+
+    print(f"DOLMA v{VERSION} installed")
diff --git a/dolma/model.py b/dolma/model.py
@@ -146,7 +146,7 @@ def __init__(self, config: Config):
             self.transformer.update(
                 {"wpe": nn.Embedding(config.max_sequence_length, config.d_model, device=config.init_device)}
             )
-        self.lm_head = nn.Linear(config.d_model, config.vocab_size, bias=False)
+        self.lm_head = nn.Linear(config.d_model, config.vocab_size, bias=False, device=config.init_device)
         if self.config.init_device != "meta":
             self.apply(self.param_init_fn)
 

diff --git a/pytest.ini b/pytest.ini
@@ -4,4 +4,5 @@ python_classes = Test* *Test
 log_format = %(asctime)s - %(levelname)s - %(name)s - %(message)s
 log_level = DEBUG
 markers =
+    gpu: marks tests that need GPUs
 filterwarnings =
diff --git a/requirements.txt b/requirements.txt
@@ -1,3 +1,5 @@
+# NOTE: when upgrading requirements here you may have to rebuild and push some
+# Docker images. See each Dockerfile for details on how to do that.
 numpy
 torch
 mosaicml
@@ -6,3 +8,4 @@ tokenizers
 click
 rich
 cached-path
+beaker-gantry
diff --git a/scripts/test_entrypoint.sh b/scripts/test_entrypoint.sh
@@ -0,0 +1,38 @@
+#!/bin/bash
+
+# Exit script if any commands fail.
+set -e
+set -o pipefail
+
+# Check that the environment variables have been set correctly
+for env_var in "$GITHUB_TOKEN" "$COMMIT_SHA"; do
+    if [[ -z "$env_var" ]]; then
+        echo >&2 "error: required environment variable $env_var is empty"
+        exit 1
+    fi
+done
+
+# Initialize conda for bash.
+# See https://stackoverflow.com/a/58081608/4151392
+eval "$(command conda 'shell.bash' 'hook' 2> /dev/null)"
+
+# Install GitHub CLI.
+conda install gh --channel conda-forge
+
+# Configure git to use GitHub CLI as a credential helper so that we can clone private repos.
+gh auth setup-git
+
+# Clone and install tango.
+mkdir LLM && cd LLM
+gh repo clone allenai/LLM .
+git checkout --quiet "$COMMIT_SHA"
+
+# Install dependencies.
+pip install --upgrade pip
+pip install --no-cache-dir '.[dev]'
+
+# Create directory for results.
+mkdir -p /results
+
+# Execute the arguments to this script as commands themselves, piping output into a log file.
+exec "$@" 2>&1 | tee /results/out.log
diff --git a/tests/model_test.py b/tests/model_test.py
@@ -6,11 +6,38 @@
 from dolma.data import DataCollator, PaddingDirection
 
 
-@pytest.mark.parametrize("alibi", [pytest.param(True, id="alibi-emb"), pytest.param(False, id="posit-emb")])
-def test_forward(config: Config, tokenizer: Tokenizer, alibi: bool):
+@pytest.mark.parametrize(
+    "alibi, cuda",
+    [
+        pytest.param(True, False, id="alibi-emb-cpu"),
+        pytest.param(False, False, id="posit-emb-cpu"),
+        pytest.param(
+            True,
+            True,
+            id="alibi-emb-cuda",
+            marks=(
+                pytest.mark.gpu,
+                pytest.mark.skipif(torch.cuda.device_count() < 1, reason="Requires CUDA devices"),
+            ),
+        ),
+        pytest.param(
+            False,
+            True,
+            id="posit-emb-cuda",
+            marks=(
+                pytest.mark.gpu,
+                pytest.mark.skipif(torch.cuda.device_count() < 1, reason="Requires CUDA devices"),
+            ),
+        ),
+    ],
+)
+def test_forward(config: Config, tokenizer: Tokenizer, alibi: bool, cuda: bool):
     torch.manual_seed(0)
 
     config.alibi = alibi
+    if cuda:
+        config.init_device = "cuda"
+
     model = DolmaGPT(config).eval()
 
     input1 = tokenizer.encode("My name is DOLMA!")
@@ -21,11 +48,14 @@ def test_forward(config: Config, tokenizer: Tokenizer, alibi: bool):
             {"input_ids": input2, "attention_mask": [1.0] * len(input2)},
         ]
     )
+    batch_inputs = {  # type: ignore
+        k: v.to(device=config.device) if isinstance(v, torch.Tensor) else v for k, v in batch_inputs.items()
+    }
 
     # Check that logits from individual inputs are equal to logits from batch.
     with torch.inference_mode():
-        output1 = model(torch.tensor(input1).unsqueeze(0))
-        output2 = model(torch.tensor(input2).unsqueeze(0))
+        output1 = model(torch.tensor(input1, device=config.device).unsqueeze(0))
+        output2 = model(torch.tensor(input2, device=config.device).unsqueeze(0))
         batch_output = model(**batch_inputs)
 
     torch.testing.assert_close(output1.logits[0][: len(input1)], batch_output.logits[0][: len(input1)])
@@ -40,7 +70,7 @@ def test_backward(config: Config, tokenizer: Tokenizer, alibi: bool):
     model = DolmaGPT(config).train()
 
     # Forward pass to get logits.
-    input_ids = torch.tensor(tokenizer.encode("My name is DOLMA!")).unsqueeze(0)
+    input_ids = torch.tensor(tokenizer.encode("My name is DOLMA!"), device=config.device).unsqueeze(0)
     logits = model(input_ids).logits
 
     # Compute loss.
@@ -55,7 +85,7 @@ def test_backward(config: Config, tokenizer: Tokenizer, alibi: bool):
     for name, parameter in model.named_parameters():
         if parameter.requires_grad:
             assert parameter.grad is not None
-            zeros = torch.zeros(parameter.size())
+            zeros = torch.zeros(parameter.size(), device=config.device)
             if (parameter.grad == zeros).all():
                 raise RuntimeError(f"{name} has zero a gradient!")
         else: