From 5dbd34c61c263916c11491dd3504fe0b6de369e4 Mon Sep 17 00:00:00 2001 From: Rinchin Date: Tue, 10 Dec 2024 14:03:52 +0000 Subject: [PATCH] update exp tracking --- scripts/experiments/run.py | 40 ++++++++++++++++++++++-------- scripts/experiments/run_tabular.py | 6 ++--- tox.ini | 8 +++--- 3 files changed, 38 insertions(+), 16 deletions(-) diff --git a/scripts/experiments/run.py b/scripts/experiments/run.py index ef666ac9..6aff5db8 100644 --- a/scripts/experiments/run.py +++ b/scripts/experiments/run.py @@ -7,18 +7,22 @@ def main( # noqa D103 + task_name: str, dataset_name: str, queue: str, image: str, project: str, cpu_limit: int, + min_num_obs: int, memory_limit: int, tags: list, dataset_project: str = None, dataset_partial_name: str = None, n_datasets: int = -1, ): - if (dataset_project is not None) or (dataset_partial_name is not None) or len(tags) > 0: + if dataset_name is not None: + dataset_list = [dataset_name] + else: dataset_list = pd.DataFrame( clearml.Dataset.list_datasets( dataset_project=dataset_project, @@ -34,29 +38,42 @@ def main( # noqa D103 dataset_list.sort_values("version", ascending=False).drop_duplicates(subset=["name"]).to_dict("records") ) - else: - dataset_list = [dataset_name] + if min_num_obs is not None: + for indx, dataset in enumerate(dataset_list): + metadata = clearml.Dataset.get(dataset_id=None, dataset_name=dataset["name"]).get_metadata() + if metadata["num_obs"].iloc[0] < min_num_obs: + dataset_list.pop(indx) - print(f"Running {len(dataset_list)} datasets:") + if len(dataset_list) <= 0: + raise ValueError("No one dataset was found with passed parameters.") - np.random.shuffle(dataset_list) + np.random.shuffle(dataset_list) + dataset_list = dataset_list[:n_datasets] - for dataset in dataset_list[:n_datasets]: + print(f"Running {len(dataset_list)} datasets:") + + for dataset in dataset_list: if isinstance(dataset, str): dataset_name = dataset - tags = "" + tags = [""] else: dataset_name = dataset["name"] - tags = f"--tags {' '.join(dataset['tags'])}" if len(tags) else "" + tags = dataset["tags"] + + curr_task_name = f"{task_name}@{dataset_name}" if task_name is not None else f"{dataset_name}" + + tags.append(queue) + tags = f"--tags {' '.join(tags)}" if len(tags) else "" os.system( - f'clearml-task --project {project} --name {dataset_name} --script scripts/experiments/run_tabular.py --queue {queue} {tags} --docker {image} --docker_args "--cpus={cpu_limit} --memory={memory_limit}g" --args dataset={dataset_name}' + f'clearml-task --project {project} --name {curr_task_name} --script scripts/experiments/run_tabular.py --queue {queue} {tags} --docker {image} --docker_args "--cpus={cpu_limit} --memory={memory_limit}g" --args dataset={dataset_name}' ) if __name__ == "__main__": parser = argparse.ArgumentParser(description="") - parser.add_argument("--dataset", type=str, help="dataset name or id", default="sampled_app_train") + parser.add_argument("--name", type=str, help="name for task", default=None) + parser.add_argument("--dataset", type=str, help="dataset name or id", default=None) parser.add_argument("--dataset_project", type=str, help="dataset_project", default="Datasets_with_metadata") parser.add_argument("--dataset_partial_name", type=str, help="dataset_partial_name", default=None) parser.add_argument("--tags", nargs="+", default=[], help="tags") @@ -66,9 +83,11 @@ def main( # noqa D103 parser.add_argument("--project", type=str, help="clearml project", default="junk") parser.add_argument("--image", type=str, help="docker image", default="for_clearml:latest") parser.add_argument("--n_datasets", type=int, help="number of datasets", default=-1) + parser.add_argument("--min_num_obs", type=int, help="min number of samples", default=None) args = parser.parse_args() main( + task_name=args.name, dataset_name=args.dataset, cpu_limit=args.cpu_limit, memory_limit=args.memory_limit, @@ -79,4 +98,5 @@ def main( # noqa D103 project=args.project, image=args.image, n_datasets=args.n_datasets, + min_num_obs=args.min_num_obs, ) diff --git a/scripts/experiments/run_tabular.py b/scripts/experiments/run_tabular.py index 85b8b45b..eaa59aa3 100644 --- a/scripts/experiments/run_tabular.py +++ b/scripts/experiments/run_tabular.py @@ -49,9 +49,9 @@ def main(dataset_name: str, cpu_limit: int, memory_limit: int): # noqa D103 cpu_limit=cpu_limit, memory_limit=memory_limit, timeout=10 * 60 * 60, - # general_params={ - # "use_algos": [["mlp"]] - # }, # ['nn', 'mlp', 'dense', 'denselight', 'resnet', 'snn', 'node', 'autoint', 'fttransformer'] or custom torch model + general_params={ + # "use_algos": [["mlp"]] + }, # ['nn', 'mlp', 'dense', 'denselight', 'resnet', 'snn', 'node', 'autoint', 'fttransformer'] or custom torch model # nn_params={"n_epochs": 10, "bs": 512, "num_workers": 0, "path_to_save": None, "freeze_defaults": True}, # nn_pipeline_params={"use_qnt": True, "use_te": False}, reader_params={ diff --git a/tox.ini b/tox.ini index 683245fb..1952da82 100644 --- a/tox.ini +++ b/tox.ini @@ -81,11 +81,13 @@ deps = commands = codespell -# example: tox -e exp -- --dataset_project=Datasets_with_metadata --tags=binary openml -# args [--tags=binary openml] means tag is binary OR tag is openml +# example: +# tox -e exp -- --dataset_project=Datasets_with_metadata --tags=binary openml +# tox -e exp -- --dataset_project=Datasets_with_metadata --dataset=CIFAR_10_openml --queue=gpu_queue +# tox -e exp -- --dataset_project=Datasets_with_metadata --tags=multiclass --queue=gpu_queue --n_datasets=5 --name=mlp --min_num_obs=100000 +# Notion: args [--tags=binary openml] means tag is binary OR tag is openml [testenv:exp] deps = clearml - commands = python scripts/experiments/run.py {posargs}