-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Loading status checks…
add runner
Signed-off-by: Zhiyuan Chen <[email protected]>
1 parent
303d1d9
commit 8ee697a
Showing
19 changed files
with
1,118 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
--- | ||
authors: | ||
- Zhiyuan Chen | ||
date: 2024-05-04 | ||
--- | ||
|
||
# MultiTask | ||
|
||
::: multimolecule.data.multitask |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
--- | ||
authors: | ||
- Zhiyuan Chen | ||
date: 2024-05-04 | ||
--- | ||
|
||
# MultiMoleculeConfig | ||
|
||
::: multimolecule.runners.MultiMoleculeConfig |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
--- | ||
authors: | ||
- Zhiyuan Chen | ||
date: 2024-05-04 | ||
--- | ||
|
||
# runners | ||
|
||
--8<-- "multimolecule/runners/README.md:8:" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
--- | ||
authors: | ||
- Zhiyuan Chen | ||
date: 2024-05-04 | ||
--- | ||
|
||
# MultiMoleculeRunner | ||
|
||
::: multimolecule.runners.base_runner.BaseRunner |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
# MultiMolecule | ||
# Copyright (C) 2024-Present MultiMolecule | ||
|
||
# This program is free software: you can redistribute it and/or modify | ||
# it under the terms of the GNU Affero General Public License as published by | ||
# the Free Software Foundation, either version 3 of the License, or | ||
# any later version. | ||
|
||
# This program is distributed in the hope that it will be useful, | ||
# but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
# GNU Affero General Public License for more details. | ||
|
||
# You should have received a copy of the GNU Affero General Public License | ||
# along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
|
||
from .run import evaluate, infer, train | ||
|
||
__all__ = ["train", "evaluate", "infer"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,115 @@ | ||
# MultiMolecule | ||
# Copyright (C) 2024-Present MultiMolecule | ||
|
||
# This program is free software: you can redistribute it and/or modify | ||
# it under the terms of the GNU Affero General Public License as published by | ||
# the Free Software Foundation, either version 3 of the License, or | ||
# any later version. | ||
|
||
# This program is distributed in the hope that it will be useful, | ||
# but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
# GNU Affero General Public License for more details. | ||
|
||
# You should have received a copy of the GNU Affero General Public License | ||
# along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
|
||
# mypy: disable-error-code="attr-defined" | ||
|
||
import atexit | ||
import os | ||
import warnings | ||
from typing import Type | ||
|
||
import danling as dl | ||
import torch | ||
|
||
from multimolecule.runners import MultiMoleculeConfig, MultiMoleculeRunner | ||
|
||
try: | ||
import nni | ||
except ImportError: | ||
nni = None | ||
|
||
|
||
def train( | ||
config: MultiMoleculeConfig = None, # type: ignore | ||
runner_cls: Type[MultiMoleculeRunner] = MultiMoleculeRunner, | ||
): | ||
if config is None: | ||
config = MultiMoleculeConfig() | ||
config = config.parse(default_config="config", no_default_config_action="warn") | ||
config.interpolate(unsafe_eval=True) | ||
config.training = True | ||
if config.allow_tf32: | ||
torch.backends.cudnn.allow_tf32 = True | ||
torch.backends.cuda.matmul.allow_tf32 = True | ||
if config.reduced_precision_reduction: | ||
torch.backends.cuda.matmul.allow_fp16_reduced_precision_reduction = True | ||
torch.backends.cuda.matmul.allow_bf16_reduced_precision_reduction = True | ||
if config.get("nni", False): | ||
if nni is None: | ||
raise ValueError("Unable to retrieve nni parameters, since nni is not installed.") | ||
config.merge(nni.get_next_parameter()) | ||
with dl.debug(config.get("debug", False)): | ||
runner = runner_cls(config) | ||
atexit.register(runner.print_result) | ||
atexit.register(runner.save_result) | ||
atexit.register(runner.save_checkpoint) | ||
result = runner.train() | ||
return result | ||
|
||
|
||
def evaluate( | ||
config: MultiMoleculeConfig = None, # type: ignore | ||
runner_cls: Type[MultiMoleculeRunner] = MultiMoleculeRunner, | ||
): | ||
if config is None: | ||
config = MultiMoleculeConfig.empty() | ||
config = config.parse(default_config="config", no_default_config_action="warn") | ||
config.interpolate(unsafe_eval=True) | ||
config.training = False | ||
if config.allow_tf32: | ||
torch.backends.cudnn.allow_tf32 = True | ||
torch.backends.cuda.matmul.allow_tf32 = True | ||
if config.reduced_precision_reduction: | ||
torch.backends.cuda.matmul.allow_fp16_reduced_precision_reduction = True | ||
torch.backends.cuda.matmul.allow_bf16_reduced_precision_reduction = True | ||
if "checkpoint" not in config or not isinstance(config.checkpoint, str): | ||
raise RuntimeError("Please specify `checkpoint` to run evaluate") | ||
for name, data in config.datas.items(): | ||
if "evaluation" not in data or not isinstance(data.evaluate, str): | ||
raise RuntimeError(f"Please specify `evaluation` to run evaluate in datas.{name}") | ||
runner = runner_cls(config) | ||
result = runner.evaluate_epoch("evaluation") | ||
print(result) | ||
return result | ||
|
||
|
||
def infer( | ||
config: MultiMoleculeConfig = None, # type: ignore | ||
runner_cls: Type[MultiMoleculeRunner] = MultiMoleculeRunner, | ||
): | ||
if config is None: | ||
config = MultiMoleculeConfig.empty() | ||
config = config.parse(default_config="config", no_default_config_action="warn") | ||
config.interpolate(unsafe_eval=True) | ||
config.training = False | ||
if config.allow_tf32: | ||
torch.backends.cudnn.allow_tf32 = True | ||
torch.backends.cuda.matmul.allow_tf32 = True | ||
if config.reduced_precision_reduction: | ||
torch.backends.cuda.matmul.allow_fp16_reduced_precision_reduction = True | ||
torch.backends.cuda.matmul.allow_bf16_reduced_precision_reduction = True | ||
if "checkpoint" not in config or not isinstance(config.checkpoint, str): | ||
raise RuntimeError("Please specify `checkpoint` to run infer.") | ||
for name, data in config.datas.items(): | ||
if "inference" not in data or not isinstance(data.inference, str): | ||
raise RuntimeError(f"Please specify `inference` to run infer in datas.{name}") | ||
if "result_path" not in config or not isinstance(config.result_path, str): | ||
config.result_path = os.path.join(os.getcwd(), "result.json") | ||
warnings.warn("`result_path` is not specified, default to `result.json`.", RuntimeWarning, stacklevel=2) | ||
runner = runner_cls(config) | ||
result = runner.infer() | ||
runner.save(result, config.result_path) | ||
return result |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,99 @@ | ||
# MultiMolecule | ||
# Copyright (C) 2024-Present MultiMolecule | ||
|
||
# This program is free software: you can redistribute it and/or modify | ||
# it under the terms of the GNU Affero General Public License as published by | ||
# the Free Software Foundation, either version 3 of the License, or | ||
# any later version. | ||
|
||
# This program is distributed in the hope that it will be useful, | ||
# but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
# GNU Affero General Public License for more details. | ||
|
||
# You should have received a copy of the GNU Affero General Public License | ||
# along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
|
||
import os | ||
import shutil | ||
from statistics import mean | ||
from typing import List | ||
|
||
import chanfig | ||
import pandas as pd | ||
from chanfig import NestedDict | ||
from tqdm import tqdm | ||
|
||
|
||
class Result(NestedDict): | ||
pretrained: str | ||
id: str | ||
seed: int | ||
epoch: int | ||
validation: NestedDict | ||
test: NestedDict | ||
|
||
|
||
def get_result_stat(experiment_root: str, remove_empty: bool = True) -> List[Result]: | ||
results = [] | ||
for root, _, files in tqdm(os.walk(experiment_root)): | ||
if "run.log" in files: | ||
if "best.json" not in files: | ||
if remove_empty: | ||
shutil.rmtree(root) | ||
continue | ||
best = NestedDict.from_json(os.path.join(root, "best.json")) | ||
if "index" not in best: | ||
if remove_empty: | ||
shutil.rmtree(root) | ||
continue | ||
config = NestedDict.from_yaml(os.path.join(root, "trainer.yaml")) | ||
pretrained = config.pretrained.split("/")[-1] | ||
seed = config.seed | ||
result = Result(id=best.id, pretrained=pretrained, seed=seed) | ||
result.validation = NestedDict( | ||
{k: format(mean(v) if isinstance(v, list) else v, ".8f") for k, v in best.validation.all_items()} | ||
) | ||
result.test = NestedDict( | ||
{k: format(mean(v) if isinstance(v, list) else v, ".8f") for k, v in best.test.all_items()} | ||
) | ||
result.epoch = best.index | ||
result.pop("validation.time", None) | ||
result.pop("test.time", None) | ||
result.pop("validation.loss", None) | ||
result.pop("test.loss", None) | ||
result.pop("validation.lr", None) | ||
result.pop("test.lr", None) | ||
results.append(result) | ||
# Remove empty directories, perform twice to remove all empty directories | ||
if remove_empty: | ||
for root, dirs, files in os.walk(experiment_root): | ||
if not files and not dirs: | ||
os.rmdir(root) | ||
for root, dirs, files in os.walk(experiment_root): | ||
if not files and not dirs: | ||
os.rmdir(root) | ||
results.sort(key=lambda x: (x.pretrained, x.seed, x.id)) | ||
return results | ||
|
||
|
||
def write_result_stat(results: List[Result], path: str): | ||
results = [dict(result.all_items()) for result in results] # type: ignore[misc] | ||
df = pd.DataFrame.from_dict(results) | ||
df.insert(len(df.keys()) - 1, "comment", "") | ||
df.fillna("") | ||
df.to_csv(path, index=False) | ||
|
||
|
||
class Config(chanfig.Config): | ||
experiment_root: str = "experiments" | ||
out_path: str = "result.csv" | ||
remove_empty: bool = True | ||
|
||
|
||
if __name__ == "__main__": | ||
config = Config().parse() | ||
result_stat = get_result_stat(config.experiment_root, config.remove_empty) | ||
if not len(result_stat) > 0: | ||
raise ValueError("No results found") | ||
write_result_stat(result_stat, config.out_path) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,246 @@ | ||
# MultiMolecule | ||
# Copyright (C) 2024-Present MultiMolecule | ||
|
||
# This program is free software: you can redistribute it and/or modify | ||
# it under the terms of the GNU Affero General Public License as published by | ||
# the Free Software Foundation, either version 3 of the License, or | ||
# any later version. | ||
|
||
# This program is distributed in the hope that it will be useful, | ||
# but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
# GNU Affero General Public License for more details. | ||
|
||
# You should have received a copy of the GNU Affero General Public License | ||
# along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
|
||
from __future__ import annotations | ||
|
||
from bisect import bisect_right | ||
from collections.abc import Iterator, Mapping, Sequence | ||
from copy import deepcopy | ||
from random import choices | ||
|
||
import torch | ||
from chanfig import NestedDict | ||
from torch import distributed as dist | ||
from torch.utils import data | ||
|
||
from .dataset import Dataset | ||
|
||
|
||
class MultiTaskDataset(data.ConcatDataset): | ||
|
||
datasets: Mapping | ||
dataset_keys: Sequence[str] | ||
dataset_values: Sequence[Dataset] | ||
|
||
def __init__(self, datasets: Mapping) -> None: | ||
for key, dataset in datasets.items(): | ||
if not isinstance(dataset, Dataset): | ||
raise TypeError(f"Dataset {key} should be an instance of Dataset") | ||
self.datasets = datasets | ||
if not len(self.datasets) > 0: | ||
raise ValueError("MultiTaskDataset should contain at least one dataset") | ||
self.dataset_keys, self.dataset_values = zip(*self.datasets.items()) | ||
self.cumulative_sizes = self.cumsum(self.dataset_values) | ||
|
||
def __getitems__(self, key: Sequence[int]) -> Mapping: | ||
dataset_idx = bisect_right(self.cumulative_sizes, key[0]) | ||
if dataset_idx == 0: | ||
sample_idx = key | ||
else: | ||
sample_idx = [i - self.cumulative_sizes[dataset_idx - 1] for i in key] | ||
batch = self.dataset_values[dataset_idx][sample_idx] | ||
batch["dataset"] = self.dataset_keys[dataset_idx] | ||
return batch | ||
|
||
@property | ||
def tasks(self) -> NestedDict: | ||
tasks = NestedDict() | ||
for dataset in self.dataset_values: | ||
for n, t in dataset.tasks.items(): | ||
if n not in tasks: | ||
tasks[n] = t | ||
elif tasks[n] != t: | ||
raise ValueError(f"Task {n} has different configurations across datasets") | ||
return tasks | ||
|
||
@property | ||
def dataset_tasks(self) -> NestedDict: | ||
return NestedDict({k: v.tasks for k, v in self.datasets.items()}) | ||
|
||
def __repr__(self) -> str: | ||
return f"MultiTaskDataset({', '.join([str(d) for d in self.datasets])})" | ||
|
||
|
||
class MultiTaskSampler(data.BatchSampler): | ||
r""" | ||
Ensure all items in a batch comes from the same dataset. | ||
Arguments: | ||
sampler (Sampler): Base sampler. | ||
batch_size (int): Size of mini-batch. | ||
drop_last (bool): If ``True``, the sampler will drop the last batch if | ||
its size would be less than ``batch_size`` | ||
""" | ||
|
||
datasets: Sequence[Dataset] | ||
|
||
def __init__( # pylint: disable=super-init-not-called | ||
self, | ||
dataset: MultiTaskDataset, | ||
batch_size: int, | ||
shuffle: bool = True, | ||
drop_last: bool = False, | ||
sampler_cls: type[data.Sampler] | None = None, | ||
weights: list[int] | None = None, | ||
) -> None: | ||
self.datasets = dataset.dataset_values | ||
self.batch_size = batch_size | ||
self.drop_last = drop_last | ||
self.shuffle = shuffle | ||
if sampler_cls is None: | ||
sampler_cls = data.RandomSampler if shuffle else data.SequentialSampler | ||
self.samplers = [sampler_cls(d) for d in self.datasets] # type: ignore | ||
self.dataset_sizes = [len(d) for d in self.datasets] # type: ignore | ||
self.cumulative_sizes = dataset.cumulative_sizes | ||
self.num_datasets = len(self.datasets) | ||
self.weights = weights if weights is not None else self.dataset_sizes | ||
|
||
def __iter__(self): | ||
sampler_iters = [(i, iter(s)) for i, s in enumerate(self.samplers)] | ||
sampler_weights = deepcopy(self.weights) | ||
sampler_idx = 0 | ||
# Implemented based on the benchmarking in https://github.com/pytorch/pytorch/pull/76951 | ||
if self.drop_last: | ||
while sampler_iters: | ||
if self.shuffle: | ||
sampler_idx = choices(range(len(sampler_iters)), weights=sampler_weights)[0] | ||
sampler_id, sampler_iter = sampler_iters[sampler_idx] | ||
cumulative_size = self.cumulative_sizes[sampler_id - 1] if sampler_id > 0 else 0 | ||
try: | ||
batch = [next(sampler_iter) + cumulative_size for _ in range(self.batch_size)] | ||
yield batch | ||
except StopIteration: | ||
sampler_iters.pop(sampler_idx) | ||
sampler_weights.pop(sampler_idx) | ||
else: | ||
while sampler_iters: | ||
if self.shuffle: | ||
sampler_idx = choices(range(len(sampler_iters)), weights=sampler_weights)[0] | ||
sampler_id, sampler_iter = sampler_iters[sampler_idx] | ||
cumulative_size = self.cumulative_sizes[sampler_id - 1] if sampler_id > 0 else 0 | ||
batch = [0] * self.batch_size | ||
idx_in_batch = 0 | ||
try: | ||
for _ in range(self.batch_size): | ||
batch[idx_in_batch] = next(sampler_iter) + cumulative_size | ||
idx_in_batch += 1 | ||
yield batch | ||
idx_in_batch = 0 # noqa: SIM113 | ||
batch = [0] * self.batch_size | ||
except StopIteration: | ||
sampler_iters.pop(sampler_idx) | ||
sampler_weights.pop(sampler_idx) | ||
if idx_in_batch > 0: | ||
yield batch[:idx_in_batch] | ||
|
||
def __len__(self): | ||
batch_size = self.batch_size | ||
if self.drop_last: | ||
return sum(len(d) // batch_size for d in self.datasets) | ||
return sum((len(d) + batch_size - 1) // batch_size for d in self.datasets) | ||
|
||
|
||
class DistributedMultiTaskSampler(MultiTaskSampler): # pylint: disable=too-few-public-methods | ||
r""" | ||
Distributed version of MultiTaskSampler, which ensures that all GPUs sample data from the | ||
same sub-dataset in each step without requiring additional communication. | ||
The dataset selection is based on a random seed mechanism that is synchronized across epochs. | ||
See Also: | ||
[MultiTaskSampler][MultiTaskSampler] | ||
""" | ||
|
||
def __init__( | ||
self, | ||
dataset: MultiTaskDataset, | ||
batch_size: int, | ||
shuffle: bool = True, | ||
drop_last: bool = False, | ||
sampler_cls: type[data.Sampler] = data.RandomSampler, | ||
weights: list[int] | None = None, | ||
seed: int = 0, | ||
) -> None: | ||
super().__init__(dataset, batch_size, shuffle, drop_last, sampler_cls, weights) | ||
self.samplers = [data.DistributedSampler(d, shuffle=shuffle, drop_last=drop_last) for d in self.datasets] | ||
self.seed = seed | ||
self.epoch = 0 | ||
|
||
def set_epoch(self, epoch: int): | ||
""" | ||
Sets the epoch for deterministic shuffling. | ||
""" | ||
self.epoch = epoch | ||
for sampler in self.samplers: | ||
sampler.set_epoch(epoch) | ||
|
||
def _get_sampler_idx(self, high: int) -> int: | ||
""" | ||
Determines which sampler (i.e., sub-dataset) to use based on the seed and epoch. | ||
""" | ||
g = torch.Generator() | ||
g.manual_seed(self.seed + self.epoch) | ||
sampler_idx = torch.randint(low=0, high=high, size=(1,), generator=g).item() | ||
return sampler_idx | ||
|
||
def __iter__(self) -> Iterator: | ||
sampler_iters = [(i, iter(s)) for i, s in enumerate(self.samplers)] | ||
sampler_weights = deepcopy(self.weights) | ||
|
||
if self.drop_last: | ||
while sampler_iters: | ||
# Sample the same sub-dataset across all GPUs using the seeded index | ||
sampler_idx = self._get_sampler_idx(len(sampler_iters)) | ||
sampler_id, sampler_iter = sampler_iters[sampler_idx] | ||
cumulative_size = self.cumulative_sizes[sampler_id - 1] if sampler_id > 0 else 0 | ||
try: | ||
batch = [next(sampler_iter) + cumulative_size for _ in range(self.batch_size)] | ||
yield batch | ||
except StopIteration: | ||
sampler_iters.pop(sampler_idx) | ||
sampler_weights.pop(sampler_idx) | ||
else: | ||
while sampler_iters: | ||
# Sample the same sub-dataset across all GPUs using the seeded index | ||
sampler_idx = self._get_sampler_idx(len(sampler_iters)) | ||
sampler_id, sampler_iter = sampler_iters[sampler_idx] | ||
cumulative_size = self.cumulative_sizes[sampler_id - 1] if sampler_id > 0 else 0 | ||
batch = [0] * self.batch_size | ||
idx_in_batch = 0 | ||
try: | ||
for _ in range(self.batch_size): | ||
batch[idx_in_batch] = next(sampler_iter) + cumulative_size | ||
idx_in_batch += 1 | ||
yield batch | ||
idx_in_batch = 0 # noqa: SIM113 | ||
batch = [0] * self.batch_size | ||
except StopIteration: | ||
sampler_iters.pop(sampler_idx) | ||
sampler_weights.pop(sampler_idx) | ||
if idx_in_batch > 0: | ||
yield batch[:idx_in_batch] | ||
|
||
def __len__(self) -> int: | ||
batch_size = self.batch_size * self.world_size | ||
if self.drop_last: | ||
return sum(len(d) // batch_size for d in self.datasets) | ||
return sum((len(d) + batch_size - 1) // batch_size for d in self.datasets) | ||
|
||
@property | ||
def world_size(self) -> int: | ||
r"""Return the number of processes in the current process group.""" | ||
if dist.is_available() and dist.is_initialized(): | ||
return dist.get_world_size() | ||
return 1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
--- | ||
authors: | ||
- Zhiyuan Chen | ||
date: 2024-05-04 | ||
--- | ||
|
||
# runners | ||
|
||
`runners` provide an easy-to-use interface for running experiments. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
# MultiMolecule | ||
# Copyright (C) 2024-Present MultiMolecule | ||
|
||
# This program is free software: you can redistribute it and/or modify | ||
# it under the terms of the GNU Affero General Public License as published by | ||
# the Free Software Foundation, either version 3 of the License, or | ||
# any later version. | ||
|
||
# This program is distributed in the hope that it will be useful, | ||
# but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
# GNU Affero General Public License for more details. | ||
|
||
# You should have received a copy of the GNU Affero General Public License | ||
# along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
|
||
from .config import MultiMoleculeConfig | ||
from .runner import MultiMoleculeRunner | ||
|
||
__all__ = ["MultiMoleculeConfig", "MultiMoleculeRunner"] |
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,105 @@ | ||
# MultiMolecule | ||
# Copyright (C) 2024-Present MultiMolecule | ||
|
||
# This program is free software: you can redistribute it and/or modify | ||
# it under the terms of the GNU Affero General Public License as published by | ||
# the Free Software Foundation, either version 3 of the License, or | ||
# any later version. | ||
|
||
# This program is distributed in the hope that it will be useful, | ||
# but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
# GNU Affero General Public License for more details. | ||
|
||
# You should have received a copy of the GNU Affero General Public License | ||
# along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
|
||
from __future__ import annotations | ||
|
||
import os | ||
from pathlib import Path | ||
from typing import List | ||
|
||
from chanfig import Config | ||
from transformers import PretrainedConfig | ||
|
||
|
||
class DataConfig(Config): | ||
root: str = "." | ||
train: str | None | ||
validation: str | None | ||
test: str | None | ||
feature_cols: List | None = None | ||
label_cols: List | None = None | ||
truncation: bool = True | ||
|
||
|
||
class OptimConfig(Config): | ||
name: str = "AdamW" | ||
lr: float = 1e-3 | ||
weight_decay: float = 1e-2 | ||
pretrained_ratio: float = 1e-2 | ||
|
||
|
||
class EmaConfig(Config): | ||
enabled: bool = False | ||
beta: float = 0.999 | ||
update_after_step: int = 0 | ||
update_every: int = 10 | ||
|
||
|
||
class MultiMoleculeConfig(Config): | ||
name: str | ||
seed: int = 1016 | ||
|
||
balance: str = "ew" | ||
platform: str = "torch" | ||
training: bool = True | ||
|
||
pretrained: str | None | ||
use_pretrained: bool = True | ||
transformers: PretrainedConfig | ||
epoch_end: int = 20 | ||
|
||
data: DataConfig | ||
|
||
tensorboard: bool = True | ||
save_interval: int = 10 | ||
|
||
art: bool = True | ||
allow_tf32: bool = True | ||
reduced_precision_reduction: bool = False | ||
|
||
def __init__(self, *args, **kwargs): | ||
super().__init__(*args, **kwargs) | ||
self.datas = Config(default_factory=DataConfig) | ||
self.dataloader.batch_size = 32 | ||
self.optim = OptimConfig() | ||
self.ema = EmaConfig() | ||
self.sched.final_lr = 0 | ||
|
||
def post(self): | ||
if "pretrained" not in self and "checkpoint" not in self: | ||
raise ValueError("Either one of `pretrained` or `checkpoint` must be specified") | ||
if "data" in self: | ||
if self.datas: | ||
raise ValueError("Only one of `data` or `datas` can be specified, but not both") | ||
del self.datas | ||
if "pretrained" in self: | ||
self["network.backbone.sequence.name"] = self.get("pretrained") | ||
self.name = self.get_name() | ||
self["network.backbone.sequence.use_pretrained"] = self.use_pretrained | ||
|
||
def get_name(self) -> str: | ||
pretrained = self.get("pretrained") | ||
if os.path.exists(pretrained): | ||
path = Path(pretrained) | ||
if os.path.isfile(pretrained): | ||
pretrained = str(path.relative_to(path.parents[1]).with_suffix("")) | ||
else: | ||
pretrained = path.stem | ||
name = pretrained.replace("/", "--") | ||
if "optim" in self: | ||
optim_name = self.optim.get("name", "no") | ||
name += f"-{self.optim.lr}@{optim_name}" | ||
return name + f"-{self.seed}" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
# MultiMolecule | ||
# Copyright (C) 2024-Present MultiMolecule | ||
|
||
# This program is free software: you can redistribute it and/or modify | ||
# it under the terms of the GNU Affero General Public License as published by | ||
# the Free Software Foundation, either version 3 of the License, or | ||
# any later version. | ||
|
||
# This program is distributed in the hope that it will be useful, | ||
# but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
# GNU Affero General Public License for more details. | ||
|
||
# You should have received a copy of the GNU Affero General Public License | ||
# along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
|
||
from chanfig import Registry as Registry_ | ||
from danling.metrics import binary_metrics, multiclass_metrics, multilabel_metrics, regression_metrics | ||
|
||
|
||
class Registry(Registry_): | ||
|
||
def build(self, type, num_labels: int | None = None, **kwargs): | ||
if type == "multilabel": | ||
return self.init(self.lookup(type), num_labels=num_labels, **kwargs) | ||
if type == "multiclass": | ||
return self.init(self.lookup(type), num_classes=num_labels, **kwargs) | ||
if type == "regression": | ||
return self.init(self.lookup(type), num_outputs=num_labels, **kwargs) | ||
return self.init(self.lookup(type), **kwargs) | ||
|
||
|
||
MetricRegistry = Registry(key="type") | ||
MetricRegistry.register(binary_metrics, "binary") | ||
MetricRegistry.register(multiclass_metrics, "multiclass") | ||
MetricRegistry.register(multilabel_metrics, "multilabel") | ||
MetricRegistry.register(regression_metrics, "regression") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
# MultiMolecule | ||
# Copyright (C) 2024-Present MultiMolecule | ||
|
||
# This program is free software: you can redistribute it and/or modify | ||
# it under the terms of the GNU Affero General Public License as published by | ||
# the Free Software Foundation, either version 3 of the License, or | ||
# any later version. | ||
|
||
# This program is distributed in the hope that it will be useful, | ||
# but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
# GNU Affero General Public License for more details. | ||
|
||
# You should have received a copy of the GNU Affero General Public License | ||
# along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
|
||
import danling as dl | ||
|
||
from .base_runner import BaseRunner | ||
|
||
|
||
class MultiMoleculeRunner(type): | ||
def __new__(cls, config): | ||
if config.get("platform", "torch") == "torch": | ||
return TorchRunner(config) | ||
if config.platform == "deepspeed": | ||
return DeepSpeedRunner(config) | ||
if config.platform == "accelerate": | ||
return AccelerateRunner(config) | ||
raise ValueError(f"Unsupported platform: {config.platform}") | ||
|
||
|
||
class TorchRunner(BaseRunner, dl.TorchRunner): | ||
pass | ||
|
||
|
||
class DeepSpeedRunner(BaseRunner, dl.DeepSpeedRunner): | ||
pass | ||
|
||
|
||
class AccelerateRunner(BaseRunner, dl.AccelerateRunner): | ||
pass |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
# MultiMolecule | ||
# Copyright (C) 2024-Present MultiMolecule | ||
|
||
# This program is free software: you can redistribute it and/or modify | ||
# it under the terms of the GNU Affero General Public License as published by | ||
# the Free Software Foundation, either version 3 of the License, or | ||
# any later version. | ||
|
||
# This program is distributed in the hope that it will be useful, | ||
# but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
# GNU Affero General Public License for more details. | ||
|
||
# You should have received a copy of the GNU Affero General Public License | ||
# along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
|
||
from .apis import train | ||
|
||
if __name__ == "__main__": | ||
train() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters