diff --git a/Taskfile.yml b/Taskfile.yml index 17d405872..bbc89b426 100644 --- a/Taskfile.yml +++ b/Taskfile.yml @@ -101,7 +101,7 @@ tasks: task test -- tests/test_alignments.py deps: [poetry-install-tests] cmds: - - PYTHONPATH=$(pwd) poetry run pytest -vv {{.CLI_ARGS}} + - PYTHONPATH="$(pwd):$(pwd)/taskcluster/scripts/pipeline" poetry run pytest -vv {{.CLI_ARGS}} test-docker: desc: Run the unit tests in the docker image. Some tests require the pre-built Linux executables. diff --git a/taskcluster/kinds/finetune-student/kind.yml b/taskcluster/kinds/finetune-student/kind.yml index 3800561fe..256b6936c 100644 --- a/taskcluster/kinds/finetune-student/kind.yml +++ b/taskcluster/kinds/finetune-student/kind.yml @@ -48,6 +48,8 @@ tasks: - pipeline/train/configs/opustrainer/student.yml - pipeline/train/configs/training/student.train.yml - pipeline/train/train.sh + - taskcluster/scripts/pipeline/train_taskcluster.py + - taskcluster/scripts/pipeline/train-taskcluster.sh from-parameters: marian_args: training_config.marian-args.training-student-finetuned @@ -88,7 +90,7 @@ tasks: pip3 install $VCS_PATH/tracking && export PATH="$HOME/.local/bin:$PATH" && export MARIAN=$MOZ_FETCHES_DIR && - $VCS_PATH/taskcluster/scripts/pipeline/train-taskcluster.sh + $VCS_PATH/taskcluster/scripts/pipeline/train_taskcluster.py student finetune {src_locale} diff --git a/taskcluster/kinds/train-backwards/kind.yml b/taskcluster/kinds/train-backwards/kind.yml index e54ce51e0..0c7b4a46e 100644 --- a/taskcluster/kinds/train-backwards/kind.yml +++ b/taskcluster/kinds/train-backwards/kind.yml @@ -31,6 +31,7 @@ tasks: type: train-backwards resources: - pipeline/train/train.sh + - taskcluster/scripts/pipeline/train_taskcluster.py - taskcluster/scripts/pipeline/train-taskcluster.sh - pipeline/train/configs/model/backward.yml - pipeline/train/configs/opustrainer/backward.yml @@ -98,7 +99,7 @@ tasks: pip3 install $VCS_PATH/tracking && export PATH="$HOME/.local/bin:$PATH" && export MARIAN=$MOZ_FETCHES_DIR && - $VCS_PATH/taskcluster/scripts/pipeline/train-taskcluster.sh + $VCS_PATH/taskcluster/scripts/pipeline/train_taskcluster.py backward train {trg_locale} diff --git a/taskcluster/kinds/train-student/kind.yml b/taskcluster/kinds/train-student/kind.yml index 964dda46a..678f9cc90 100644 --- a/taskcluster/kinds/train-student/kind.yml +++ b/taskcluster/kinds/train-student/kind.yml @@ -49,6 +49,8 @@ tasks: - pipeline/train/configs/opustrainer/student.yml - pipeline/train/configs/training/student.train.yml - pipeline/train/train.sh + - taskcluster/scripts/pipeline/train_taskcluster.py + - taskcluster/scripts/pipeline/train-taskcluster.sh from-parameters: marian_args: training_config.marian-args.training-student worker-type: @@ -94,7 +96,7 @@ tasks: pip3 install $VCS_PATH/tracking && export PATH="$HOME/.local/bin:$PATH" && export MARIAN=$MOZ_FETCHES_DIR && - $VCS_PATH/taskcluster/scripts/pipeline/train-taskcluster.sh + $VCS_PATH/taskcluster/scripts/pipeline/train_taskcluster.py student train {src_locale} diff --git a/taskcluster/kinds/train-teacher/kind.yml b/taskcluster/kinds/train-teacher/kind.yml index fbb9849a5..a272b3cdb 100644 --- a/taskcluster/kinds/train-teacher/kind.yml +++ b/taskcluster/kinds/train-teacher/kind.yml @@ -70,6 +70,7 @@ tasks: - pipeline/train/configs/opustrainer/teacher.yml - pipeline/train/configs/training/teacher.train.yml - pipeline/train/train.sh + - taskcluster/scripts/pipeline/train_taskcluster.py - taskcluster/scripts/pipeline/train-taskcluster.sh from-parameters: marian_args: training_config.marian-args.training-teacher @@ -119,7 +120,7 @@ tasks: pip3 install $VCS_PATH/tracking && export PATH="$HOME/.local/bin:$PATH" && export MARIAN=$MOZ_FETCHES_DIR && - $VCS_PATH/taskcluster/scripts/pipeline/train-taskcluster.sh + $VCS_PATH/taskcluster/scripts/pipeline/train_taskcluster.py teacher train {src_locale} diff --git a/taskcluster/scripts/pipeline/__init__.py b/taskcluster/scripts/pipeline/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/taskcluster/scripts/pipeline/train_taskcluster.py b/taskcluster/scripts/pipeline/train_taskcluster.py new file mode 100755 index 000000000..f3ced8993 --- /dev/null +++ b/taskcluster/scripts/pipeline/train_taskcluster.py @@ -0,0 +1,17 @@ +#!/usr/bin/env python3 + +import json +import os.path +import requests +import subprocess +import sys + +TRAINING_SCRIPT = os.path.join(os.path.dirname(__file__), "train-taskcluster.sh") + + +def main(args): + subprocess.run([TRAINING_SCRIPT, *args], check=True) + + +if __name__ == "__main__": + main(sys.argv[1:]) diff --git a/tests/test_train_taskcluster.py b/tests/test_train_taskcluster.py new file mode 100644 index 000000000..b5290d875 --- /dev/null +++ b/tests/test_train_taskcluster.py @@ -0,0 +1,38 @@ +import pytest +from unittest import mock + +import train_taskcluster + +@pytest.mark.parametrize("args", + ( + ( + "foo", + "bar", + "blah", + ), + ( + "foo", + "bar", + "blah", + "and", + "many", + "more", + "arguments", + "way", + "way", + "way", + "way", + "way", + "way", + "more", + "than", + "we", + "currently", + "use", + ), + ), +) +def test_all_args_forwarded(args): + with mock.patch("train_taskcluster.subprocess") as mocked_subprocess: + train_taskcluster.main(args) + assert mocked_subprocess.run.call_args[0][0][1:] == list(args)