-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathproject.yml
77 lines (67 loc) · 3.11 KB
/
project.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
title: "Weights & Biases for xlm-roberta model for ner-uk"
description: "Use [Weights & Biases](https://www.wandb.com/) for logging of training experiments. This project template uses the NER_UK and includes two workflows: `log` for training a simple text classification model and logging the results to Weights & Biases (works out-of-the-box and only requires the `[training.logger]` to be set in the config) and `parameter-search` for running a hyperparameter search using [Weights & Biases Sweeps](https://docs.wandb.ai/guides/sweeps), running the experiments and logging the results."
# Variables can be referenced across the project.yml using ${vars.var_name}
vars:
default_config: "ner_uk_xlm_roberta_wandb.config"
# These are the directories that the project needs. The project CLI will make
# sure that they always exist.
directories: ["scripts", "assets", "training", "configs"]
# Assets that should be downloaded or available in the directory. You can replace
# this with your own input data.
# assets:
# - dest: "assets/aclImdb_v1.tar.gz"
# url: "http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz"
# description: "Movie Review Dataset for sentiment analysis by Maas et al., ACL 2011."
workflows:
log:
- data
- train
parameter-search:
- data
- train-search
# Project commands, specified in a style similar to CI config files (e.g. Azure
# pipelines). The name is the command name that lets you trigger the command
# via "spacy project run [command] [path]". The help message is optional and
# shown when executing "spacy project run [optional command] [path] --help".
commands:
- name: "install"
help: "Install dependencies and log in to Weights & Biases"
script:
- "pip install -r requirements.txt"
- "pip install --upgrade spacy spacy-transformers"
- "wandb login"
deps:
- "requirements.txt"
- name: "data"
help: "Extract the gold-standard annotations"
script:
- "mkdir -p assets/NER_UK/test/"
- "mkdir -p assets/NER_UK/train/"
- "python -m spacy convert /gdrive/MyDrive/NER_UK/fixed-split/test.iob assets/NER_UK/test/"
- "python -m spacy convert /gdrive/MyDrive/NER_UK/fixed-split/train.iob assets/NER_UK/train/"
deps:
- "/gdrive/MyDrive/NER_UK/fixed-split/test.iob"
- "/gdrive/MyDrive/NER_UK/fixed-split/train.iob"
outputs:
- "assets/NER_UK/train/"
- "assets/NER_UK/test/"
- name: "train"
help: "Train a model using the default config"
script:
- "python -m spacy train configs/${vars.default_config} --output training/ --gpu-id 0"
deps:
- "assets/NER_UK/train/"
- "assets/NER_UK/test/"
- name: "train-search"
help: "Run customized training runs for hyperparameter search using [Weights & Biases Sweeps](https://docs.wandb.ai/guides/sweeps)"
script:
- "python ./scripts/sweeps_using_config.py configs/${vars.default_config} scripts/sweep.yml training/"
deps:
- "assets/NER_UK/train/"
- "assets/NER_UK/test/"
- "configs/"
- name: "clean"
help: "Remove intermediate files."
script:
- "rm -rf training/*"
- "rm -rf wandb/*"