diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 000000000..c7c3e97d4 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,7 @@ +.git +.github +.mypy_cache +.pytest_cache +.venv +__pycache__ +*.egg-info diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index d043de74e..d00e4c790 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -101,6 +101,54 @@ jobs: . .venv/bin/activate pip uninstall -y dolma + gpu_tests: + name: GPU Tests + runs-on: ubuntu-latest + env: + BEAKER_TOKEN: ${{ secrets.BEAKER_TOKEN }} + BEAKER_IMAGE: dolma-test + BEAKER_WORKSPACE: ai2/llm-testing + steps: + - name: Determine current commit SHA (pull request) + if: github.event_name == 'pull_request' + run: | + echo "COMMIT_SHA=${{ github.event.pull_request.head.sha }}" >> $GITHUB_ENV + + - name: Determine current commit SHA (push) + if: github.event_name != 'pull_request' + run: | + echo "COMMIT_SHA=$GITHUB_SHA" >> $GITHUB_ENV + + - name: GPU Tests + uses: allenai/beaker-run-action@v1.2 + if: env.BEAKER_TOKEN != '' + with: + spec: | + version: v2 + description: GPU Tests + tasks: + - name: tests + image: + beaker: ${{ env.BEAKER_IMAGE }} + context: + priority: preemptible + resources: + gpuCount: 1 + envVars: + - name: COMMIT_SHA + value: ${{ env.COMMIT_SHA }} + - name: GITHUB_TOKEN + value: ${{ secrets.GITHUB_TOKEN }} + - name: CUDA_LAUNCH_BLOCKING + value: "1" + - name: TOKENIZERS_PARALLELISM + value: "false" + command: ["/entrypoint.sh", "pytest", "-v", "-m", "gpu", "tests/"] + result: + path: /unused + token: ${{ env.BEAKER_TOKEN }} + workspace: ${{ env.BEAKER_WORKSPACE }} + release: name: Release runs-on: ubuntu-latest diff --git a/Dockerfile.gantry b/Dockerfile.gantry new file mode 100644 index 000000000..c4ec30402 --- /dev/null +++ b/Dockerfile.gantry @@ -0,0 +1,14 @@ +# Defines a CUDA-enabled Docker image suitable for running this project's experiments +# via beaker-gantry. +# +# To build and push the image to Beaker, run 'make gantry-image'. +# To test the image after pushing to Beaker, run 'make gantry-test'. + +FROM ghcr.io/allenai/pytorch:1.13.1-cuda11.7-python3.10 + +WORKDIR /stage + +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +WORKDIR /app/dolma diff --git a/Dockerfile.test b/Dockerfile.test new file mode 100644 index 000000000..7cf8adc36 --- /dev/null +++ b/Dockerfile.test @@ -0,0 +1,14 @@ +# Defines a CUDA-enabled Docker image suitable for running GPU tests on Beaker +# via the GitHub Action 'beaker-run-action'. +# The image needs to exist on Beaker for the tests to work. +# +# To build and push the image to Beaker, run 'make test-image'. + +FROM ghcr.io/allenai/pytorch:1.13.1-cuda11.7-python3.10 + +COPY scripts/test_entrypoint.sh /entrypoint.sh +RUN chmod +x /entrypoint.sh + +WORKDIR /testing + +ENTRYPOINT ["/entrypoint.sh"] diff --git a/Makefile b/Makefile index cfbcfe76a..bc477bb7e 100644 --- a/Makefile +++ b/Makefile @@ -1,3 +1,12 @@ +# If you update this, also update BEAKER_IMAGE in .github/workflows/main.yml +IMAGE_NAME_BASE = dolma +# If you update this, also update BEAKER_WORKSPACE in .github/workflows/main.yml +BEAKER_WORKSPACE = "ai2/llm-testing" + +BEAKER_USER = $(shell beaker account whoami --format=json | jq -r '.[0].name') +GANTRY_IMAGE = $(shell beaker workspace images $(BEAKER_WORKSPACE) --format=json | jq -r -c '.[] | select( .name == "$(IMAGE_NAME_BASE)-gantry" ) | .fullName') +TEST_IMAGE = $(shell beaker workspace images $(BEAKER_WORKSPACE) --format=json | jq -r -c '.[] | select( .name == "$(IMAGE_NAME_BASE)-test" ) | .fullName') + .PHONY : run-checks run-checks : isort --check . @@ -5,3 +14,61 @@ run-checks : flake8 . mypy . CUDA_VISIBLE_DEVICES='' pytest -v --color=yes tests/ + +.PHONY : beaker-info +beaker-info : + @echo "Beaker user: $(BEAKER_USER)" + @echo "Gantry image: $(GANTRY_IMAGE)" + @echo "Testing image: $(TEST_IMAGE)" + +.PHONY : gantry-image +gantry-image : + docker build -f Dockerfile.gantry -t $(IMAGE_NAME_BASE)-gantry . + beaker image create $(IMAGE_NAME_BASE)-gantry --name $(IMAGE_NAME_BASE)-gantry-tmp --workspace $(BEAKER_WORKSPACE) + beaker image delete $(GANTRY_IMAGE) || true + beaker image rename $(BEAKER_USER)/$(IMAGE_NAME_BASE)-gantry-tmp $(IMAGE_NAME_BASE)-gantry + +.PHONY : test-image +test-image : + docker build -f Dockerfile.test -t $(IMAGE_NAME_BASE)-test . + beaker image create $(IMAGE_NAME_BASE)-test --name $(IMAGE_NAME_BASE)-test-tmp --workspace $(BEAKER_WORKSPACE) + beaker image delete $(TEST_IMAGE) || true + beaker image rename $(BEAKER_USER)/$(IMAGE_NAME_BASE)-test-tmp $(IMAGE_NAME_BASE)-test + +.PHONY : show-test-image +show-test-image : + @echo $(TEST_IMAGE) + +.PHONY : show-beaker-workspace +show-beaker-workspace : + @echo $(BEAKER_WORKSPACE) + +.PHONY : gantry-test +gantry-test : + gantry run \ + --workspace "$(BEAKER_WORKSPACE)" \ + --priority "preemptible" \ + --beaker-image "$(GANTRY_IMAGE)" \ + --gpus 1 \ + --description "Test run" \ + --cluster ai2/allennlp-cirrascale \ + --cluster ai2/aristo-cirrascale \ + --cluster ai2/mosaic-cirrascale \ + --cluster ai2/mosaic-cirrascale-a100 \ + --cluster ai2/prior-cirrascale \ + --cluster ai2/s2-cirrascale \ + --cluster ai2/general-cirrascale \ + --cluster ai2/general-cirrascale-a100-80g-ib \ + --allow-dirty \ + --venv base \ + --timeout -1 \ + --yes \ + -- make check-cuda-install + +.PHONY : check-cpu-install +check-cpu-install : + @python -c 'from dolma import check_install; check_install(cuda=False)' + +.PHONY : check-cuda-install +check-cuda-install : + @python -c 'from dolma import check_install; check_install(cuda=True)' diff --git a/README.md b/README.md index 565393793..2f8ebe53c 100644 --- a/README.md +++ b/README.md @@ -1 +1,9 @@ # DOLMA: Delightful Open Language Model from AI2 + +## Setup + +After cloning this repository, first install the latest [PyTorch](https://pytorch.org) according the official instructions relevant to your environment. Then install the remaining dependencies and code base by running: + +``` +pip install -e .[dev] --config-settings editable_mode=compat +``` diff --git a/dolma/__init__.py b/dolma/__init__.py index b938ea0ce..71d595abb 100644 --- a/dolma/__init__.py +++ b/dolma/__init__.py @@ -2,4 +2,16 @@ from .model import DolmaGPT, DolmaGPTOutput from .tokenizer import Tokenizer, TruncationDirection -__all__ = ["Config", "Tokenizer", "TruncationDirection", "DolmaGPT", "DolmaGPTOutput"] +__all__ = ["Config", "Tokenizer", "TruncationDirection", "DolmaGPT", "DolmaGPTOutput", "check_install"] + + +def check_install(cuda: bool = False): + import torch + + from .version import VERSION + + if cuda: + assert torch.cuda.is_available(), "CUDA is not available!" + print("CUDA available") + + print(f"DOLMA v{VERSION} installed") diff --git a/dolma/model.py b/dolma/model.py index ea7acbf3a..b7d74d0d5 100644 --- a/dolma/model.py +++ b/dolma/model.py @@ -146,7 +146,7 @@ def __init__(self, config: Config): self.transformer.update( {"wpe": nn.Embedding(config.max_sequence_length, config.d_model, device=config.init_device)} ) - self.lm_head = nn.Linear(config.d_model, config.vocab_size, bias=False) + self.lm_head = nn.Linear(config.d_model, config.vocab_size, bias=False, device=config.init_device) if self.config.init_device != "meta": self.apply(self.param_init_fn) diff --git a/pytest.ini b/pytest.ini index b9702a151..bbf35b764 100644 --- a/pytest.ini +++ b/pytest.ini @@ -4,4 +4,5 @@ python_classes = Test* *Test log_format = %(asctime)s - %(levelname)s - %(name)s - %(message)s log_level = DEBUG markers = + gpu: marks tests that need GPUs filterwarnings = diff --git a/requirements.txt b/requirements.txt index 490904af1..886fdccc8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,5 @@ +# NOTE: when upgrading requirements here you may have to rebuild and push some +# Docker images. See each Dockerfile for details on how to do that. numpy torch mosaicml @@ -6,3 +8,4 @@ tokenizers click rich cached-path +beaker-gantry diff --git a/scripts/test_entrypoint.sh b/scripts/test_entrypoint.sh new file mode 100644 index 000000000..4f289eba1 --- /dev/null +++ b/scripts/test_entrypoint.sh @@ -0,0 +1,38 @@ +#!/bin/bash + +# Exit script if any commands fail. +set -e +set -o pipefail + +# Check that the environment variables have been set correctly +for env_var in "$GITHUB_TOKEN" "$COMMIT_SHA"; do + if [[ -z "$env_var" ]]; then + echo >&2 "error: required environment variable $env_var is empty" + exit 1 + fi +done + +# Initialize conda for bash. +# See https://stackoverflow.com/a/58081608/4151392 +eval "$(command conda 'shell.bash' 'hook' 2> /dev/null)" + +# Install GitHub CLI. +conda install gh --channel conda-forge + +# Configure git to use GitHub CLI as a credential helper so that we can clone private repos. +gh auth setup-git + +# Clone and install tango. +mkdir LLM && cd LLM +gh repo clone allenai/LLM . +git checkout --quiet "$COMMIT_SHA" + +# Install dependencies. +pip install --upgrade pip +pip install --no-cache-dir '.[dev]' + +# Create directory for results. +mkdir -p /results + +# Execute the arguments to this script as commands themselves, piping output into a log file. +exec "$@" 2>&1 | tee /results/out.log diff --git a/tests/model_test.py b/tests/model_test.py index fb4a1d079..8a5601755 100644 --- a/tests/model_test.py +++ b/tests/model_test.py @@ -6,11 +6,38 @@ from dolma.data import DataCollator, PaddingDirection -@pytest.mark.parametrize("alibi", [pytest.param(True, id="alibi-emb"), pytest.param(False, id="posit-emb")]) -def test_forward(config: Config, tokenizer: Tokenizer, alibi: bool): +@pytest.mark.parametrize( + "alibi, cuda", + [ + pytest.param(True, False, id="alibi-emb-cpu"), + pytest.param(False, False, id="posit-emb-cpu"), + pytest.param( + True, + True, + id="alibi-emb-cuda", + marks=( + pytest.mark.gpu, + pytest.mark.skipif(torch.cuda.device_count() < 1, reason="Requires CUDA devices"), + ), + ), + pytest.param( + False, + True, + id="posit-emb-cuda", + marks=( + pytest.mark.gpu, + pytest.mark.skipif(torch.cuda.device_count() < 1, reason="Requires CUDA devices"), + ), + ), + ], +) +def test_forward(config: Config, tokenizer: Tokenizer, alibi: bool, cuda: bool): torch.manual_seed(0) config.alibi = alibi + if cuda: + config.init_device = "cuda" + model = DolmaGPT(config).eval() input1 = tokenizer.encode("My name is DOLMA!") @@ -21,11 +48,14 @@ def test_forward(config: Config, tokenizer: Tokenizer, alibi: bool): {"input_ids": input2, "attention_mask": [1.0] * len(input2)}, ] ) + batch_inputs = { # type: ignore + k: v.to(device=config.device) if isinstance(v, torch.Tensor) else v for k, v in batch_inputs.items() + } # Check that logits from individual inputs are equal to logits from batch. with torch.inference_mode(): - output1 = model(torch.tensor(input1).unsqueeze(0)) - output2 = model(torch.tensor(input2).unsqueeze(0)) + output1 = model(torch.tensor(input1, device=config.device).unsqueeze(0)) + output2 = model(torch.tensor(input2, device=config.device).unsqueeze(0)) batch_output = model(**batch_inputs) torch.testing.assert_close(output1.logits[0][: len(input1)], batch_output.logits[0][: len(input1)]) @@ -40,7 +70,7 @@ def test_backward(config: Config, tokenizer: Tokenizer, alibi: bool): model = DolmaGPT(config).train() # Forward pass to get logits. - input_ids = torch.tensor(tokenizer.encode("My name is DOLMA!")).unsqueeze(0) + input_ids = torch.tensor(tokenizer.encode("My name is DOLMA!"), device=config.device).unsqueeze(0) logits = model(input_ids).logits # Compute loss. @@ -55,7 +85,7 @@ def test_backward(config: Config, tokenizer: Tokenizer, alibi: bool): for name, parameter in model.named_parameters(): if parameter.requires_grad: assert parameter.grad is not None - zeros = torch.zeros(parameter.size()) + zeros = torch.zeros(parameter.size(), device=config.device) if (parameter.grad == zeros).all(): raise RuntimeError(f"{name} has zero a gradient!") else: