Skip to content

Commit

Permalink
Adding HF CI (deepchem#4173)
Browse files Browse the repository at this point in the history
* Added HF workflow

* Moved prot bert tests to HF marker

* Resolved??

* Moved all HF tests to new CI
  • Loading branch information
Shiva-sankaran authored Nov 12, 2024
1 parent be83685 commit f44e45b
Show file tree
Hide file tree
Showing 7 changed files with 172 additions and 33 deletions.
139 changes: 139 additions & 0 deletions .github/workflows/hf_setup.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
name: Test for DeepChem HuggingFace
on:
push: # ci work when pushing master branch
branches:
- master
pull_request: # ci work when creating a PR to master branch
branches:
- master
jobs:
HF-build:
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest]
python-version: ['3.8', '3.10', '3.11']
include:
- os: windows-latest
python-version: 3.9
steps:
- uses: actions/checkout@v4
- name: Cache pip modules for Linux
uses: actions/cache@v4
with:
path: ~/.cache/pip
key: ${{ runner.os }}-pip-${{ hashFiles('requirements/torch/**') }}
restore-keys: |
${{ runner.os }}-pip-
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Build DeepChem
run: |
python -m pip install --upgrade pip
pip install -e '.[torch]'
- name: Import checking
run: python -c "import deepchem; import torch;"

HF-tests:
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest]
python-version: ['3.8', '3.10', '3.11']
include:
- os: windows-latest
python-version: '3.9'
env:
OS: ${{ matrix.os }}
PYTHON_VERSION: ${{ matrix.python-version }}
steps:
- name: Maximize build space
if: matrix.os == 'ubuntu-latest'
run: |
sudo rm -rf /usr/share/dotnet
sudo rm -rf /opt/ghc
sudo rm -rf "/usr/local/share/boost"
sudo rm -rf "$AGENT_TOOLSDIRECTORY"
sudo rm -rf /usr/local/lib/android
- uses: actions/checkout@v4
with:
fetch-depth: 0
# https://github.com/galaxyproject/tools-iuc/blob/master/.github/workflows/pr.yaml
# The range of commits to check for changes is:
# - for events on the master branch we compare against the sha before the event
# (note that this does not work for feature branch events since we want all
# commits on the feature branch and not just the commits of the last event)
# - for pull requests we compare against the 1st ancestor, given the current
# HEAD is the merge between the PR branch and the base branch
- name: Set commit range (push to the master branch, e.g. merge)
if: github.ref == 'refs/heads/master' && github.event_name == 'push'
run: echo "COMMIT_RANGE=${{ github.event.before }}.." >> $GITHUB_ENV
- name: Set commit range (pull request)
if: github.event_name == 'pull_request'
run: |
git fetch origin master
echo "COMMIT_RANGE=origin/master..." >> $GITHUB_ENV
- name: Cache pip packages for Linux
if: runner.os == 'Linux'
uses: actions/cache@v4
with:
path: ~/.cache/pip
key: ${{ runner.os }}-pip-${{ hashFiles('requirements/torch/**') }}
restore-keys: |
${{ runner.os }}-pip-
- name: Cache pip packages for MacOS
if: runner.os == 'macOS'
uses: actions/cache@v4
with:
path: ~/Library/Caches/pip
key: ${{ matrix.os }}-pip-${{ hashFiles('requirements/torch/**') }}
restore-keys: |
${{ runner.os }}-pip-
- name: Cache pip packages for Windows
if: runner.os == 'Windows'
uses: actions/cache@v4
with:
path: ~\AppData\Local\pip\Cache
key: ${{ matrix.os }}-pip-${{ hashFiles('requirements/torch/**') }}
restore-keys: |
${{ runner.os }}-pip-
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Create env.yml for python 3.9+
shell: bash
if: ${{ matrix.python-version != '3.8' }}
run: |
python -m pip install --upgrade pip;
pip install conda-merge;
conda-merge requirements/env_common.yml requirements/torch/env_torch.cpu.yml requirements/env_test.yml > env.yml
- name: Create env.yml for python 3.8
shell: bash
# A special case of environment creation for python 3.8 which includes an older version of matminer
if: ${{ matrix.python-version == '3.8' }}
run: |
python -m pip install --upgrade pip;
pip install conda-merge;
conda-merge requirements/env_common_3_8.yml requirements/torch/env_torch.cpu.yml requirements/env_test.yml > env.yml
- name: Install all dependencies
uses: conda-incubator/setup-miniconda@v3
with:
miniconda-version: "latest"
auto-update-conda: true
activate-environment: deepchem
channels: conda-forge,defaults
python-version: ${{ matrix.python-version }}
environment-file: env.yml
- name: Install DeepChem
id: install
shell: bash -l {0}
run: pip install -e .
- name: PyTest
if: ${{ (success() || failure()) && (steps.install.outcome == 'failure' || steps.install.outcome == 'success') }}
shell: bash -l {0}
run: pytest -v -m 'hf' deepchem
14 changes: 7 additions & 7 deletions deepchem/models/torch_models/tests/test_antibody_modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def igbert_tokenizer():
return tokenizer


@pytest.mark.torch
@pytest.mark.hf
def test_init(igbert_tokenizer):
from deepchem.models.torch_models.antibody_modeling import DeepAbLLM
from deepchem.models.torch_models.hf_model import HuggingFaceModel
Expand All @@ -25,7 +25,7 @@ def test_init(igbert_tokenizer):
assert anti_model.n_tasks == 1


@pytest.mark.torch
@pytest.mark.hf
def test_load_from_pretrained(tmpdir):
pretrain_model_dir = os.path.join(tmpdir, 'pretrain')
finetune_model_dir = os.path.join(tmpdir, 'finetune')
Expand Down Expand Up @@ -57,7 +57,7 @@ def test_load_from_pretrained(tmpdir):
assert all(matches)


@pytest.mark.torch
@pytest.mark.hf
def test_initialize_new_config():
model_path = 'Rostlab/prot_bert'
config = {"num_attention_heads": 8, "num_hidden_layers": 6}
Expand All @@ -72,7 +72,7 @@ def test_initialize_new_config():
assert model.model.config['num_hidden_layers'] == 6


@pytest.mark.torch
@pytest.mark.hf
def test_save_reload(tmpdir):
model_path = 'Exscientia/IgBert'
anti_model = DeepAbLLM(task='mlm',
Expand All @@ -99,7 +99,7 @@ def test_save_reload(tmpdir):
assert all(matches)


@pytest.mark.torch
@pytest.mark.hf
def test_mask_seq_pos(igbert_tokenizer):
from deepchem.models.torch_models.antibody_modeling import DeepAbLLM
anti_model = DeepAbLLM(model_path='facebook/esm2_t6_8M_UR50D',
Expand All @@ -114,7 +114,7 @@ def test_mask_seq_pos(igbert_tokenizer):
assert masked_test_string.split(' ')[10] == anti_model.tokenizer.mask_token


@pytest.mark.torch
@pytest.mark.hf
def test_redesign_residue():
from Levenshtein import distance
from deepchem.models.torch_models.antibody_modeling import DeepAbLLM
Expand All @@ -139,7 +139,7 @@ def test_redesign_residue():
assert abs(item[2]) <= 1


@pytest.mark.torch
@pytest.mark.hf
def test_optimize_sequence():
from Levenshtein import distance
from deepchem.models.torch_models.antibody_modeling import DeepAbLLM
Expand Down
10 changes: 5 additions & 5 deletions deepchem/models/torch_models/tests/test_chemberta.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
pass


@pytest.mark.torch
@pytest.mark.hf
def test_chemberta_pretraining(smiles_regression_dataset,
smiles_multitask_regression_dataset):
# Pretraining in MLM mode
Expand All @@ -28,7 +28,7 @@ def test_chemberta_pretraining(smiles_regression_dataset,
assert loss


@pytest.mark.torch
@pytest.mark.hf
def test_chemberta_finetuning(smiles_regression_dataset,
smiles_multitask_regression_dataset):
# test regression
Expand Down Expand Up @@ -69,7 +69,7 @@ def test_chemberta_finetuning(smiles_regression_dataset,
assert prediction.shape == (dataset.y.shape[0], 2)


@pytest.mark.torch
@pytest.mark.hf
def test_chemberta_load_from_pretrained(tmpdir, smiles_regression_dataset):
pretrain_model_dir = os.path.join(tmpdir, 'pretrain')
finetune_model_dir = os.path.join(tmpdir, 'finetune')
Expand Down Expand Up @@ -100,7 +100,7 @@ def test_chemberta_load_from_pretrained(tmpdir, smiles_regression_dataset):
assert all(matches)


@pytest.mark.torch
@pytest.mark.hf
def test_chemberta_save_reload(tmpdir):
tokenizer_path = 'seyonec/PubChem10M_SMILES_BPE_60k'
model = Chemberta(task='regression',
Expand All @@ -125,7 +125,7 @@ def test_chemberta_save_reload(tmpdir):
assert all(matches)


@pytest.mark.torch
@pytest.mark.hf
def test_chemberta_load_weights_from_hf_hub():
pretrained_model_path = 'DeepChem/ChemBERTa-77M-MLM'
tokenizer_path = 'DeepChem/ChemBERTa-77M-MLM'
Expand Down
18 changes: 9 additions & 9 deletions deepchem/models/torch_models/tests/test_hf_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def hf_tokenizer(tmpdir):
return tokenizer


@pytest.mark.torch
@pytest.mark.hf
def test_pretraining(hf_tokenizer, smiles_regression_dataset):
from deepchem.models.torch_models.hf_models import HuggingFaceModel
from transformers.models.roberta import RobertaConfig, RobertaForMaskedLM
Expand All @@ -55,7 +55,7 @@ def test_pretraining(hf_tokenizer, smiles_regression_dataset):
assert loss


@pytest.mark.torch
@pytest.mark.hf
def test_hf_model_regression(hf_tokenizer, smiles_regression_dataset):
from transformers.models.roberta import (RobertaConfig,
RobertaForSequenceClassification)
Expand All @@ -77,7 +77,7 @@ def test_hf_model_regression(hf_tokenizer, smiles_regression_dataset):
assert score


@pytest.mark.torch
@pytest.mark.hf
def test_hf_model_classification(hf_tokenizer, smiles_regression_dataset):
y = np.random.choice([0, 1], size=smiles_regression_dataset.y.shape)
dataset = dc.data.NumpyDataset(X=smiles_regression_dataset.X,
Expand All @@ -102,7 +102,7 @@ def test_hf_model_classification(hf_tokenizer, smiles_regression_dataset):
assert score


@pytest.mark.torch
@pytest.mark.hf
def test_load_from_pretrained(tmpdir, hf_tokenizer):
# Create pretrained model
from transformers.models.roberta import (RobertaConfig, RobertaForMaskedLM,
Expand Down Expand Up @@ -147,7 +147,7 @@ def test_load_from_pretrained(tmpdir, hf_tokenizer):
assert all(matches)


@pytest.mark.torch
@pytest.mark.hf
def test_model_save_reload(tmpdir, hf_tokenizer):
from transformers.models.roberta import (RobertaConfig,
RobertaForSequenceClassification)
Expand Down Expand Up @@ -181,7 +181,7 @@ def test_model_save_reload(tmpdir, hf_tokenizer):
assert all(matches)


@pytest.mark.torch
@pytest.mark.hf
def test_load_from_hf_checkpoint():
from transformers.models.t5 import T5Config, T5Model
config = T5Config()
Expand All @@ -203,7 +203,7 @@ def test_load_from_hf_checkpoint():
assert all(not_matches)


@pytest.mark.torch
@pytest.mark.hf
def test_fill_mask_IO(tmpdir, hf_tokenizer):
from transformers import (RobertaConfig, RobertaForMaskedLM)

Expand Down Expand Up @@ -232,7 +232,7 @@ def test_fill_mask_IO(tmpdir, hf_tokenizer):
assert isinstance(results[0][0], dict)


@pytest.mark.torch
@pytest.mark.hf
def test_fill_mask_fidelity(tmpdir, hf_tokenizer):
from transformers import (RobertaConfig, RobertaForMaskedLM)

Expand Down Expand Up @@ -268,7 +268,7 @@ def test_fill_mask_fidelity(tmpdir, hf_tokenizer):
assert filled['sequence'].startswith(f'<s>{filled["token_str"]}')


@pytest.mark.torch
@pytest.mark.hf
def test_load_from_pretrained_with_diff_task(tmpdir):
# Tests loading a pretrained model where the weight shape in last layer
# (the final projection layer) of the pretrained model does not match
Expand Down
8 changes: 4 additions & 4 deletions deepchem/models/torch_models/tests/test_molformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
pass


@pytest.mark.torch
@pytest.mark.hf
def test_molformer_pretraining(smiles_regression_dataset,
smiles_multitask_regression_dataset):
# Pretraining in MLM mode
Expand All @@ -26,7 +26,7 @@ def test_molformer_pretraining(smiles_regression_dataset,
assert loss


@pytest.mark.torch
@pytest.mark.hf
def test_molformer_finetuning(smiles_regression_dataset,
smiles_multitask_regression_dataset):

Expand Down Expand Up @@ -70,7 +70,7 @@ def test_molformer_finetuning(smiles_regression_dataset,
assert prediction.shape == (dataset.y.shape[0], 2)


@pytest.mark.torch
@pytest.mark.hf
def test_molformer_load_from_pretrained(tmpdir, smiles_regression_dataset):
pretrain_model_dir = os.path.join(tmpdir, 'pretrain')
finetune_model_dir = os.path.join(tmpdir, 'finetune')
Expand All @@ -95,7 +95,7 @@ def test_molformer_load_from_pretrained(tmpdir, smiles_regression_dataset):
assert all(matches)


@pytest.mark.torch
@pytest.mark.hf
def test_molformer_save_reload(tmpdir):
model = MoLFormer(task='regression', model_dir=tmpdir)
model._ensure_built()
Expand Down
Loading

0 comments on commit f44e45b

Please sign in to comment.