From cb207f3ff9f6e1bcefb25d6e580b4dc1725602f8 Mon Sep 17 00:00:00 2001 From: Ben Hearsum Date: Mon, 29 Apr 2024 14:50:00 -0400 Subject: [PATCH] Wrap train-taskcluster.sh in train_taskcluster.py --- Taskfile.yml | 2 +- taskcluster/kinds/finetune-student/kind.yml | 4 +- taskcluster/kinds/train-backwards/kind.yml | 3 +- taskcluster/kinds/train-student/kind.yml | 4 +- taskcluster/kinds/train-teacher/kind.yml | 3 +- taskcluster/scripts/pipeline/__init__.py | 0 .../scripts/pipeline/train_taskcluster.py | 15 ++++ tests/test_train_taskcluster.py | 80 +++++++++++++++++++ 8 files changed, 106 insertions(+), 5 deletions(-) create mode 100644 taskcluster/scripts/pipeline/__init__.py create mode 100755 taskcluster/scripts/pipeline/train_taskcluster.py create mode 100644 tests/test_train_taskcluster.py diff --git a/Taskfile.yml b/Taskfile.yml index 17d405872..bbc89b426 100644 --- a/Taskfile.yml +++ b/Taskfile.yml @@ -101,7 +101,7 @@ tasks: task test -- tests/test_alignments.py deps: [poetry-install-tests] cmds: - - PYTHONPATH=$(pwd) poetry run pytest -vv {{.CLI_ARGS}} + - PYTHONPATH="$(pwd):$(pwd)/taskcluster/scripts/pipeline" poetry run pytest -vv {{.CLI_ARGS}} test-docker: desc: Run the unit tests in the docker image. Some tests require the pre-built Linux executables. diff --git a/taskcluster/kinds/finetune-student/kind.yml b/taskcluster/kinds/finetune-student/kind.yml index 3800561fe..256b6936c 100644 --- a/taskcluster/kinds/finetune-student/kind.yml +++ b/taskcluster/kinds/finetune-student/kind.yml @@ -48,6 +48,8 @@ tasks: - pipeline/train/configs/opustrainer/student.yml - pipeline/train/configs/training/student.train.yml - pipeline/train/train.sh + - taskcluster/scripts/pipeline/train_taskcluster.py + - taskcluster/scripts/pipeline/train-taskcluster.sh from-parameters: marian_args: training_config.marian-args.training-student-finetuned @@ -88,7 +90,7 @@ tasks: pip3 install $VCS_PATH/tracking && export PATH="$HOME/.local/bin:$PATH" && export MARIAN=$MOZ_FETCHES_DIR && - $VCS_PATH/taskcluster/scripts/pipeline/train-taskcluster.sh + $VCS_PATH/taskcluster/scripts/pipeline/train_taskcluster.py student finetune {src_locale} diff --git a/taskcluster/kinds/train-backwards/kind.yml b/taskcluster/kinds/train-backwards/kind.yml index e54ce51e0..0c7b4a46e 100644 --- a/taskcluster/kinds/train-backwards/kind.yml +++ b/taskcluster/kinds/train-backwards/kind.yml @@ -31,6 +31,7 @@ tasks: type: train-backwards resources: - pipeline/train/train.sh + - taskcluster/scripts/pipeline/train_taskcluster.py - taskcluster/scripts/pipeline/train-taskcluster.sh - pipeline/train/configs/model/backward.yml - pipeline/train/configs/opustrainer/backward.yml @@ -98,7 +99,7 @@ tasks: pip3 install $VCS_PATH/tracking && export PATH="$HOME/.local/bin:$PATH" && export MARIAN=$MOZ_FETCHES_DIR && - $VCS_PATH/taskcluster/scripts/pipeline/train-taskcluster.sh + $VCS_PATH/taskcluster/scripts/pipeline/train_taskcluster.py backward train {trg_locale} diff --git a/taskcluster/kinds/train-student/kind.yml b/taskcluster/kinds/train-student/kind.yml index 964dda46a..678f9cc90 100644 --- a/taskcluster/kinds/train-student/kind.yml +++ b/taskcluster/kinds/train-student/kind.yml @@ -49,6 +49,8 @@ tasks: - pipeline/train/configs/opustrainer/student.yml - pipeline/train/configs/training/student.train.yml - pipeline/train/train.sh + - taskcluster/scripts/pipeline/train_taskcluster.py + - taskcluster/scripts/pipeline/train-taskcluster.sh from-parameters: marian_args: training_config.marian-args.training-student worker-type: @@ -94,7 +96,7 @@ tasks: pip3 install $VCS_PATH/tracking && export PATH="$HOME/.local/bin:$PATH" && export MARIAN=$MOZ_FETCHES_DIR && - $VCS_PATH/taskcluster/scripts/pipeline/train-taskcluster.sh + $VCS_PATH/taskcluster/scripts/pipeline/train_taskcluster.py student train {src_locale} diff --git a/taskcluster/kinds/train-teacher/kind.yml b/taskcluster/kinds/train-teacher/kind.yml index fbb9849a5..a272b3cdb 100644 --- a/taskcluster/kinds/train-teacher/kind.yml +++ b/taskcluster/kinds/train-teacher/kind.yml @@ -70,6 +70,7 @@ tasks: - pipeline/train/configs/opustrainer/teacher.yml - pipeline/train/configs/training/teacher.train.yml - pipeline/train/train.sh + - taskcluster/scripts/pipeline/train_taskcluster.py - taskcluster/scripts/pipeline/train-taskcluster.sh from-parameters: marian_args: training_config.marian-args.training-teacher @@ -119,7 +120,7 @@ tasks: pip3 install $VCS_PATH/tracking && export PATH="$HOME/.local/bin:$PATH" && export MARIAN=$MOZ_FETCHES_DIR && - $VCS_PATH/taskcluster/scripts/pipeline/train-taskcluster.sh + $VCS_PATH/taskcluster/scripts/pipeline/train_taskcluster.py teacher train {src_locale} diff --git a/taskcluster/scripts/pipeline/__init__.py b/taskcluster/scripts/pipeline/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/taskcluster/scripts/pipeline/train_taskcluster.py b/taskcluster/scripts/pipeline/train_taskcluster.py new file mode 100755 index 000000000..921cce709 --- /dev/null +++ b/taskcluster/scripts/pipeline/train_taskcluster.py @@ -0,0 +1,15 @@ +#!/usr/bin/env python3 + +import os.path +import subprocess +import sys + +TRAINING_SCRIPT = os.path.join(os.path.dirname(__file__), "train-taskcluster.sh") + + +def main(args): + subprocess.run([TRAINING_SCRIPT, *args], check=True) + + +if __name__ == "__main__": + main(sys.argv[1:]) diff --git a/tests/test_train_taskcluster.py b/tests/test_train_taskcluster.py new file mode 100644 index 000000000..8c9eec829 --- /dev/null +++ b/tests/test_train_taskcluster.py @@ -0,0 +1,80 @@ +import os +from unittest import mock + +import pytest +import train_taskcluster + +TRAIN_TASKCLUSTER_SH = os.path.normpath( + os.path.join( + os.path.dirname(os.path.realpath(__file__)), + "..", + "taskcluster", + "scripts", + "pipeline", + "train-taskcluster.sh", + ) +) + + +@pytest.mark.parametrize( + "args", + ( + pytest.param( + [ + "model_type", + "type", + "src", + "trg", + "train_set_prefix", + "valid_set_prefix", + "model_dir", + "best_model_metric", + "alignments", + "seed", + ], + id="required_only", + ), + pytest.param( + [ + "model_type", + "type", + "src", + "trg", + "train_set_prefix", + "valid_set_prefix", + "model_dir", + "best_model_metric", + "alignments", + "seed", + "pretrained_model_mode", + "pretrained_model_type", + ], + id="with_pretrained_model", + ), + pytest.param( + [ + "model_type", + "type", + "src", + "trg", + "train_set_prefix", + "valid_set_prefix", + "model_dir", + "best_model_metric", + "alignments", + "seed", + "pretrained_model_mode", + "pretrained_model_type", + "--foo", + "--bar", + ], + id="with_extra_params", + ), + ), +) +def test_all_args_forwarded(args): + with mock.patch("train_taskcluster.subprocess") as mocked_subprocess: + train_taskcluster.main(args) + assert mocked_subprocess.run.call_args_list == [ + mock.call([TRAIN_TASKCLUSTER_SH] + args, check=True), + ]