diff --git a/.gitignore b/.gitignore index b2b7ff4bce1..16d98cdded6 100644 --- a/.gitignore +++ b/.gitignore @@ -2,19 +2,12 @@ # python bytecode *.pyc -# Generated documentation files. -# - User Guide. -doc/src/cylc-user-guide/pdf -doc/src/cylc-user-guide/html -doc/src/cylc-user-guide/commands.tex -doc/src/cylc-user-guide/cylc-version.txt -# - Suite Design Guide. -doc/src/suite-design-guide/*.aux -doc/src/suite-design-guide/*.out -doc/src/suite-design-guide/*.pdf -doc/src/suite-design-guide/*.log -doc/src/suite-design-guide/*.toc -# Installed docs. +# Generated documentation files (from 'cylc make-docs') +# - Command reference from called script 'custom/make-commands.sh'. +doc/src/appendices/command-ref.rst +# - Main directory of generated sphinx docs (guides) files. +doc/built-sphinx/ +# - Installed docs doc/install/ # VERSION FILE diff --git a/bin/cylc-check-software b/bin/cylc-check-software index 2e2ea3b2064..d150c110af5 100755 --- a/bin/cylc-check-software +++ b/bin/cylc-check-software @@ -55,20 +55,11 @@ opt_spec = { 'requests': [(2, 4, 2), 'HTTPSCOMMS', 'PY'], 'OpenSSL': [None, 'HTTPSCOMMS', 'PY'], 'urllib3': [None, 'HTTPSCOMMS', 'PY'], + 'sphinx': [(1, 5, 3), 'HTMLDOCS', 'PY'], 'pygtk': [(2, 0), 'GUIORGRAPH', 'PY'], 'pygraphviz': [None, 'GUIORGRAPH', 'PY'], - 'texlive': [None, 'LATEXGUIDE', 'TEX'], - 'tocloft': [None, 'LATEXGUIDE', 'TEX'], - 'framed': [None, 'LATEXGUIDE', 'TEX'], - 'preprint': [None, 'LATEXGUIDE', 'TEX'], - 'tex4ht': [None, 'LATEXGUIDE', 'TEX'], - 'TeX': [(3, 0), 'LATEXGUIDE', 'OTHER', - (['tex'], '-v', r'TeX ([^\s]+)')], 'graphviz': [None, 'GUIORGRAPH', 'OTHER', (['dot'], '-V', r'graphviz version ([^\s]+)', 2)], - 'ImageMagick': [None, 'HTMLUGUIDE', 'OTHER', - (['magick convert', 'convert'], - '-version', r'ImageMagick ([^\s]+)')] } # All possible module reqs to accept as arguments, as above or all lower case. @@ -81,8 +72,7 @@ func_tags_and_text = { 'TEMPLATING': 'configuration templating', 'HTTPSCOMMS': 'HTTPS communications layer', 'GUIORGRAPH': 'GUI & dependency graph visualisation', - 'LATEXGUIDE': 'LaTeX User Guide', - 'HTMLUGUIDE': 'HTML User Guide' + 'HTMLDOCS': 'HTML documentation', } # Initialise results dict diff --git a/bin/cylc-documentation b/bin/cylc-documentation index 4f7206811ea..10ddbc97918 100755 --- a/bin/cylc-documentation +++ b/bin/cylc-documentation @@ -18,7 +18,7 @@ """cylc [info] documentation|browse [OPTIONS] [SUITE] -View documentation in browser or PDF viewer, as per Cylc global config. +View documentation in the browser, as per Cylc global config. % cylc doc [OPTIONS] View local or internet [--www] Cylc documentation URLs. @@ -53,8 +53,9 @@ def main(): parser = OptionParser(__doc__) parser.add_option( - "-p", "--pdf", help="Open the PDF User Guide directly.", - action="store_true", default=False, dest="pdf") + "-g", "--guides", + help="Open the HTML (User & Suite Design) Guides directly.", + action="store_true", default=False, dest="guides") parser.add_option( "-w", "--www", help="Open the cylc internet homepage", @@ -96,16 +97,13 @@ def main(): intranet_url = glbl_cfg().get(['documentation', 'urls', 'local index']) internet_url = glbl_cfg().get(['documentation', 'urls', 'internet homepage']) - html_file = glbl_cfg().get(['documentation', 'files', 'html index']) + html_file = glbl_cfg().get(['documentation', 'files', 'html user guides']) html_viewer = glbl_cfg().get(['document viewers', 'html']) - pdf_file = glbl_cfg().get(['documentation', 'files', 'pdf user guide']) - pdf_viewer = glbl_cfg().get(['document viewers', 'pdf']) if len(args) == 0: # Cylc documentation. - if options.pdf: - # Force PDF. - viewer = pdf_viewer - target = pdf_file + if options.guides: + viewer = html_viewer + target = html_file elif options.url: viewer = html_viewer target = options.url @@ -129,9 +127,9 @@ def main(): elif len(args) == 1: # Suite or task documentation. - if options.pdf or options.www: - print >> sys.stderr, ( - "(Note: --pdf and --www are ignored for suite documentation).") + if options.guides or options.www: + print >> sys.stderr, ("(Note: --guides and --www are ignored for " + "suite documentation).") suite = args[0] if options.task_name: # Task documentation. @@ -156,7 +154,7 @@ def main(): else: parser.error("Too many arguments.") - if target in [pdf_file, html_file] and not os.path.isfile(target): + if target in html_file and not os.path.isfile(target): sys.exit("ERROR, file not found: %s (see your cylc admin)" % target) # viewer may have spaces (e.g. 'firefox --no-remote'): diff --git a/bin/cylc-help b/bin/cylc-help index 556ae595399..dc40f02c3ec 100755 --- a/bin/cylc-help +++ b/bin/cylc-help @@ -294,6 +294,7 @@ admin_commands['profile-battery'] = ['profile-battery'] admin_commands['import-examples'] = ['import-examples'] admin_commands['upgrade-run-dir'] = ['upgrade-run-dir'] admin_commands['check-software'] = ['check-software'] +admin_commands['make-docs'] = ['make-docs'] license_commands = {} license_commands['warranty'] = ['warranty'] @@ -365,6 +366,7 @@ comsum['profile-battery'] = 'Run a battery of profiling tests' comsum['import-examples'] = 'Import example suites your suite run directory' comsum['upgrade-run-dir'] = 'Upgrade a pre-cylc-6 suite run directory' comsum['check-software'] = 'Check required software is installed' +comsum['make-docs'] = 'Build the HTML documentation with Sphinx.' # license comsum['warranty'] = 'Print the GPLv3 disclaimer of warranty' comsum['conditions'] = 'Print the GNU General Public License v3.0' diff --git a/bin/cylc-make-docs b/bin/cylc-make-docs new file mode 100755 index 00000000000..d3198635c9b --- /dev/null +++ b/bin/cylc-make-docs @@ -0,0 +1,48 @@ +#!/bin/bash +# +# Copyright (C) 2008-2018 NIWA & British Crown (Met Office) & Contributors. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +usage() { + echo "Usage: cylc [admin] make-docs [--help]" + echo "This command builds the HTML documentation, which is auto-generated" + echo "with the tool Sphinx. It wraps the 'sphinx-build' command." + echo "" + echo "Options:" + echo " --help Print this usage message." +} + +if [[ $# != 0 ]]; then + usage + if [[ $1 == "--help" ]]; then + exit 0 + else + echo "ERROR: illegal command line arguments" + exit 1 + fi +fi + +echo >&2 +echo "Building the HTML Cylc Documentation with Sphinx:" +echo >&2 +cd "$CYLC_DIR"/doc/ +echo "... Generating the command reference ..." +./src/custom/make-commands.sh +echo >&2 + +echo "... Auto-generating the HTML with Sphinx ..." +sphinx-build -n -b html ./src built-sphinx/ +echo >&2 +echo "Done." diff --git a/doc/Makefile b/doc/Makefile index 58b1e622377..38d98886349 100644 --- a/doc/Makefile +++ b/doc/Makefile @@ -1,52 +1,177 @@ -#!/usr/bin/make -f - -# THIS FILE IS PART OF THE CYLC SUITE ENGINE. -# Copyright (C) 2008-2018 NIWA & British Crown (Met Office) & Contributors. -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. +# Makefile for Sphinx documentation # -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . -.PHONY: all cug sdg cug-pdf cug-html clean install installclean +# You can set these variables from the command line. +SPHINXOPTS = +SPHINXBUILD = sphinx-build +PAPER = +BUILDDIR = _build -all: sdg cug install +# User-friendly check for sphinx-build +ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) +$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) +endif -sdg: src/suite-design-guide/document.pdf +# Internal variables. +PAPEROPT_a4 = -D latex_paper_size=a4 +PAPEROPT_letter = -D latex_paper_size=letter +ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . +# the i18n builder cannot share the environment and doctrees with the others +I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . -cug: cug-pdf cug-html - -cug-pdf: src/cylc-user-guide/pdf/cug-pdf.pdf +.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext -cug-html: src/cylc-user-guide/html/single/cug-html.html \ - src/cylc-user-guide/html/multi/cug-html.html +help: + @echo "Please use \`make ' where is one of" + @echo " html to make standalone HTML files" + @echo " dirhtml to make HTML files named index.html in directories" + @echo " singlehtml to make a single large HTML file" + @echo " pickle to make pickle files" + @echo " json to make JSON files" + @echo " htmlhelp to make HTML files and a HTML help project" + @echo " qthelp to make HTML files and a qthelp project" + @echo " devhelp to make HTML files and a Devhelp project" + @echo " epub to make an epub" + @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" + @echo " latexpdf to make LaTeX files and run them through pdflatex" + @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" + @echo " text to make text files" + @echo " man to make manual pages" + @echo " texinfo to make Texinfo files" + @echo " info to make Texinfo files and run them through makeinfo" + @echo " gettext to make PO message catalogs" + @echo " changes to make an overview of all changed/added/deprecated items" + @echo " xml to make Docutils-native XML files" + @echo " pseudoxml to make pseudoxml-XML files for display purposes" + @echo " linkcheck to check all external links for integrity" + @echo " doctest to run all doctests embedded in the documentation (if enabled)" -src/suite-design-guide/document.pdf: - cd src/suite-design-guide && $(MAKE) +clean: + rm -rf $(BUILDDIR)/* -src/cylc-user-guide/pdf/cug-pdf.pdf: - cd src/cylc-user-guide && $(MAKE) pdf +html: + $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html + @echo + @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." -src/cylc-user-guide/html/single/cug-html.html: - cd src/cylc-user-guide && $(MAKE) html-single +dirhtml: + $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml + @echo + @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." -src/cylc-user-guide/html/multi/cug-html.html: - cd src/cylc-user-guide && $(MAKE) html-multi +singlehtml: + $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml + @echo + @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." -install: - ./src/make-index.sh +pickle: + $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle + @echo + @echo "Build finished; now you can process the pickle files." -clean: - cd src/suite-design-guide && $(MAKE) clean - cd src/cylc-user-guide && $(MAKE) clean +json: + $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json + @echo + @echo "Build finished; now you can process the JSON files." + +htmlhelp: + $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp + @echo + @echo "Build finished; now you can run HTML Help Workshop with the" \ + ".hhp project file in $(BUILDDIR)/htmlhelp." + +qthelp: + $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp + @echo + @echo "Build finished; now you can run "qcollectiongenerator" with the" \ + ".qhcp project file in $(BUILDDIR)/qthelp, like this:" + @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/cylc.qhcp" + @echo "To view the help file:" + @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/cylc.qhc" + +devhelp: + $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp + @echo + @echo "Build finished." + @echo "To view the help file:" + @echo "# mkdir -p $$HOME/.local/share/devhelp/cylc" + @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/cylc" + @echo "# devhelp" + +epub: + $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub + @echo + @echo "Build finished. The epub file is in $(BUILDDIR)/epub." + +latex: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo + @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." + @echo "Run \`make' in that directory to run these through (pdf)latex" \ + "(use \`make latexpdf' here to do that automatically)." + +latexpdf: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo "Running LaTeX files through pdflatex..." + $(MAKE) -C $(BUILDDIR)/latex all-pdf + @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." + +latexpdfja: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo "Running LaTeX files through platex and dvipdfmx..." + $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja + @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." + +text: + $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text + @echo + @echo "Build finished. The text files are in $(BUILDDIR)/text." + +man: + $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man + @echo + @echo "Build finished. The manual pages are in $(BUILDDIR)/man." + +texinfo: + $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo + @echo + @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." + @echo "Run \`make' in that directory to run these through makeinfo" \ + "(use \`make info' here to do that automatically)." + +info: + $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo + @echo "Running Texinfo files through makeinfo..." + make -C $(BUILDDIR)/texinfo info + @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." + +gettext: + $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale + @echo + @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." + +changes: + $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes + @echo + @echo "The overview file is in $(BUILDDIR)/changes." + +linkcheck: + $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck + @echo + @echo "Link check complete; look for any errors in the above output " \ + "or in $(BUILDDIR)/linkcheck/output.txt." + +doctest: + $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest + @echo "Testing of doctests in the sources finished, look at the " \ + "results in $(BUILDDIR)/doctest/output.txt." + +xml: + $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml + @echo + @echo "Build finished. The XML files are in $(BUILDDIR)/xml." -installclean: - rm -r install +pseudoxml: + $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml + @echo + @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." diff --git a/doc/README b/doc/README deleted file mode 100644 index 359780380f9..00000000000 --- a/doc/README +++ /dev/null @@ -1,23 +0,0 @@ ------------------------------------------- -Cylc document generation from LaTeX source ------------------------------------------- - -cug = Cylc User Guide -sdg = Suite Design Guide - -make all - create cug and sdg, and install to 'doc/install/' - -make sdg - just create sdg (PDF) - -make cug - just create cug (PDF and HTML) -make cug-pdf - just create cug-pdf -make cug-html - just create cug-html (single and multi-page versions) - -make install - install created docs to 'doc/install/', create index.html -make clean - remove all generated files (except those in 'doc/install/') -make installclean - remove the 'doc/install/' directory - ------------------------------------------------------------------------ -NOTE 'doc/install/' can be moved wholesale to another location (add the -new location to your global.rc so that 'cylc doc' works properly). ------------------------------------------------------------------------ diff --git a/doc/src/appendices/appendices-master.rst b/doc/src/appendices/appendices-master.rst new file mode 100644 index 00000000000..b75734576e2 --- /dev/null +++ b/doc/src/appendices/appendices-master.rst @@ -0,0 +1,42 @@ +.. Appendices Label: + +********** +Appendices +********** + +.. toctree:: + :maxdepth: 2 + + suiterc-config-ref + site-user-config-ref + gcylc-config-ref + gscan-config-ref + remote-job-management + command-ref + gcylc-graph-view + readme-file + install-file + dev-history-major-changes + cylc-6-migration-ref + known-issues + licensing + + +.. insert vertical whitespace else sidebar menu overhangs short page (ugly) + +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| diff --git a/doc/src/appendices/cylc-6-migration-ref.rst b/doc/src/appendices/cylc-6-migration-ref.rst new file mode 100644 index 00000000000..84f58f5e169 --- /dev/null +++ b/doc/src/appendices/cylc-6-migration-ref.rst @@ -0,0 +1,247 @@ +.. _cylc-6-migration: + +Cylc 6 Migration Reference +========================== + +Cylc 6 introduced new date-time-related syntax for the suite.rc file. In +some places, this is quite radically different from the earlier syntax. + + +.. _cylc-6-migration-timeout-delays: + +Timeouts and Delays +------------------- + +Timeouts and delays such as ``[cylc][[events]]timeout`` or +``[runtime][[my_task]][[[job]]]execution retry delays`` were written in +a purely numeric form before cylc 6, in seconds, minutes (most common), or +hours, depending on the setting. + +They are now written in an ISO 8601 duration form, which has the benefit +that the units are user-selectable (use 1 day instead of 1440 minutes) +and explicit. + +Nearly all timeouts and delays in cylc were in minutes, except for: + +.. code-block:: none + + [runtime][[my_task]][[[suite state polling]]]interval + +.. code-block:: none + + [runtime][[my_task]][[[simulation mode]]]run time range + +which were in seconds, and + +.. code-block:: none + + [scheduling]runahead limit + +which was in hours (this is a special case discussed below +in :ref:`cylc-6-migration-runahead-limit`). + +See :ref:`Table X `. + +.. _cylc-6-migration-timeout-delays-table: + +.. table:: Timeout/Delay Syntax Change Examples + + ========================================================= =============== =============== + Setting Pre-Cylc-6 Cylc-6+ + ========================================================= =============== =============== + ``[cylc][[events]]timeout`` 180 PT3H + ``[runtime][[my_task]][[[job]]]execution retry delays`` 2*30, 360, 1440 2*PT30M, PT6H, P1D + ``[runtime][[my_task]][[[suite state polling]]]interval`` 2 PT2S + ========================================================= =============== =============== + + +.. _cylc-6-migration-runahead-limit: + +Runahead Limit +-------------- + +See :ref:`runahead limit`. + +The ``[scheduling]runahead limit`` setting was written as a number of +hours in pre-cylc-6 suites. This is now in ISO 8601 format for date-time +cycling suites, so ``[scheduling]runahead limit=36`` would be written +``[scheduling]runahead limit=PT36H``. + +There is a new preferred alternative to ``runahead limit``, +``[scheduling]max active cycle points``. This allows the user to +configure how many cycle points can run at once (default ``3``). See +:ref:`max active cycle points`. + + +.. _cylc-6-migration-cycle-point: + +Cycle Time/Cycle Point +---------------------- + +See :ref:`initial cycle point`. + +The following suite.rc settings have changed name +(:ref:`Table X `): + +.. _cylc-6-migration-cycle-point-time-table: + +.. table:: Cycle Point Renaming + + ======================================= ================================== + Pre-Cylc-6 Cylc-6+ + ======================================= ================================== + ``[scheduling]initial cycle time`` ``[scheduling]initial cycle point`` + ``[scheduling]final cycle time`` ``[scheduling]final cycle point`` + ``[visualization]initial cycle time`` ``[visualization]initial cycle point`` + ``[visualization]final cycle time`` ``[visualization]final cycle point`` + ======================================= ================================== + + +This change is to reflect the fact that cycling in cylc 6+ can now be over +e.g. integers instead of being purely based on date-time. + +Date-times written in ``initial cycle time`` and +``final cycle time`` were in a cylc-specific 10-digit (or less) +``CCYYMMDDhh`` format, such as ``2014021400`` for 00:00 on +the 14th of February 2014. + +Date-times are now required to be ISO 8601 compatible. This can be achieved +easily enough by inserting a ``T`` between the day and the hour digits. + +.. _cylc-6-migration-cycle-point-syntax-table: + +.. table:: Cycle Point Syntax Example + + ================================== =============== =============== + Setting Pre-Cylc-6 Cylc-6+ + ================================== =============== =============== + ``[scheduling]initial cycle time`` 2014021400 20140214T00 + ================================== =============== =============== + + +.. _cylc-6-migration-cycling: + +Cycling +------- + +Special *start-up* and *cold-start* tasks have been removed from cylc 6. +Instead, use the initial/run-once notation as detailed +in :ref:`initial-non-repeating-r1-tasks` and :ref:`AdvancedStartingUp`. + +*Repeating asynchronous tasks* have also been removed because non date-time +workflows can now be handled more easily with integer cycling. See for instance +the satellite data processing example documented in :ref:`IntegerCycling`. + +For repeating tasks with hour-based cycling the syntax has only minor changes: + +Pre-cylc-6: + +.. code-block:: cylc + + [scheduling] + # ... + [[dependencies]] + [[[0,12]]] + graph = foo[T-12] => foo & bar => baz + +Cylc-6+: + +.. code-block:: cylc + + [scheduling] + # ... + [[dependencies]] + [[[T00,T12]]] + graph = foo[-PT12H] => foo & bar => baz + + +Hour-based cycling section names are easy enough to convert, as seen in +:ref:`Table X `. + +.. _cylc-6-migration-cycling-hours-table: + +.. table:: Hourly Cycling Sections + + ======================================== ================================== + Pre-Cylc-6 Cylc-6+ + ======================================== ================================== + ``[scheduling][[dependencies]][[[0]]]`` ``[scheduling][[dependencies]][[[T00]]]`` + ``[scheduling][[dependencies]][[[6]]]`` ``[scheduling][[dependencies]][[[T06]]]`` + ``[scheduling][[dependencies]][[[12]]]`` ``[scheduling][[dependencies]][[[T12]]]`` + ``[scheduling][[dependencies]][[[18]]]`` ``[scheduling][[dependencies]][[[T18]]]`` + ======================================== ================================== + + +The graph text in hour-based cycling is also easy to convert, as seen in +:ref:`Table X `. + +.. _cylc-6-migration-cycling-hours-offset-table: + +.. table:: Hourly Cycling Offsets + + ================= ============================================= + Pre-Cylc-6 Cylc-6+ + ================= ============================================= + ``my_task[T-6]`` ``my_task[-PT6H]`` + ``my_task[T-12]`` ``my_task[-PT12H]`` + ``my_task[T-24]`` ``my_task[-PT24H]`` or even ``my_task[-P1D]`` + ================= ============================================= + + +.. _cylc-6-migration-implicit-cycling: + +No Implicit Creation of Tasks by Offset Triggers +------------------------------------------------ + +Prior to cylc-6 intercycle offset triggers implicitly created task instances at +the offset cycle points. For example, this pre cylc-6 suite automatically +creates instances of task ``foo`` at the offset hours +``3,9,15,21`` each day, for task ``bar`` to trigger off at ``0,6,12,18``: + +.. code-block:: cylc + + # Pre cylc-6 implicit cycling. + [scheduling] + initial cycle time = 2014080800 + [[dependencies]] + [[[00,06,12,18]]] + # This creates foo instances at 03,09,15,21: + graph = foo[T-3] => bar + +Here's the direct translation to cylc-6+ format: + +.. code-block:: cylc + + # In cylc-6+ this suite will stall. + [scheduling] + initial cycle point = 20140808T00 + [[dependencies]] + [[[T00,T06,T12,T18]]] + # This does NOT create foo instances at 03,09,15,21: + graph = foo[-PT3H] => bar + + +This suite fails validation with +``ERROR: No cycling sequences defined for foo``, +and at runtime it would stall with ``bar`` instances waiting on +non-existent offset ``foo`` instances (note that these +appear as ghost nodes in graph visualisations). + +To fix this, explicitly define the cycling of with an offset cycling sequence +``foo``: + +.. code-block:: cylc + + # Cylc-6+ requires explicit task instance creation. + [scheduling] + initial cycle point = 20140808T00 + [[dependencies]] + [[[T03,T09,T15,T21]]] + graph = foo + [[[T00,T06,T12,T18]]] + graph = foo[-PT3H] => bar + +Implicit task creation by offset triggers is no longer allowed because it is +error prone: a mistaken task cycle point offset should cause a failure +rather than automatically creating task instances on the wrong cycling +sequence. diff --git a/doc/src/appendices/dev-history-major-changes.rst b/doc/src/appendices/dev-history-major-changes.rst new file mode 100644 index 00000000000..ad6fc290c98 --- /dev/null +++ b/doc/src/appendices/dev-history-major-changes.rst @@ -0,0 +1,48 @@ +Cylc Development History - Major Changes +======================================== + +- **pre-cylc-3** + + - early versions focused on the new + scheduling algorithm. A suite was a collection of "task definition files" + that encoded the prerequisites and outputs of each task, + exposing cylc's self-organising nature. Tasks could be transferred + from one suite to another by simply copying their taskdef files over + and checking prerequisite and output consistency. Global suite + structure was not easy to discern until run time (although cylc-2 + could generate resolved run time dependency graphs). + +- **cylc-3** + + - a new suite design interface: dependency graph and task runtime properties + defined in a single structured, validated, configuration file - the + suite.rc file + - graphical user interface + - suite graphing. + +- **cylc-4** + + - refined and organized the suite.rc file structure + - task runtime properties defined by an efficient inheritance hierarchy + - support for the Jinja2 template processor in suite configurations. + +- **cylc-5** + + - multi-threading for continuous network request handling and job submission + - more task states to distinguish job submission from execution + - dependence between suites via new suite run databases + - polling and killing of real task jobs + - polling as task communications option. + +- **cylc-6** + + - specification of all date-times and cycling workflows via the ISO8601 + date-times, durations, and recurrence expressions + - integer cycling + - a multi-process pool to execute job submissions, event handlers, and poll + and kill commands. + +- **cylc-7** + + - Replaced the Pyro communications layer with RESTful HTTPS + - Removed deprecated pre cylc-6 syntax and features. diff --git a/doc/src/appendices/gcylc-config-ref.rst b/doc/src/appendices/gcylc-config-ref.rst new file mode 100644 index 00000000000..8a7dae09e4a --- /dev/null +++ b/doc/src/appendices/gcylc-config-ref.rst @@ -0,0 +1,270 @@ +.. _GcylcRCReference: + +Gcylc GUI (cylc gui) Config File Reference +========================================== + +This section defines all legal items and values for the gcylc user config file, +which should be located in ``$HOME/.cylc/gcylc.rc``. Current settings +can be printed with the ``cylc get-gui-config`` command. + + +Top Level Items +--------------- + +dot icon size +^^^^^^^^^^^^^ + +Set the size of the task state dot icons displayed in the text and dot +views. + +- *type*: string +- *legal values*: ``small`` (10px), ``medium`` (14px), ``large`` (20px), + ``extra large`` (30px) +- *default*: ``medium`` + + +initial side-by-side views +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Set the suite view panels initial orientation when the GUI starts. +This can be changed later using the "View" menu "Toggle views side-by-side" +option. + +- *type*: boolean (False or True) +- *default*: ``False`` + + +initial views +^^^^^^^^^^^^^ + +Set the suite view panel(s) displayed initially, when the GUI starts. +This can be changed later using the tool bar. + +- *type*: string (a list of one or two view names) +- *legal values*: ``text``, ``dot``, ``graph`` +- *default*: ``text`` +- *example*: ``initial views = graph, dot`` + + +maximum update interval +^^^^^^^^^^^^^^^^^^^^^^^ + +Set the maximum (longest) time interval between calls to the suite for data +update. + +The update frequency of the GUI is variable. It is determined by considering +the time of last update and the mean duration of the last 10 main loops of the +suite. + +In general, the GUI will use an update frequency that matches the mean duration +of the suite's main loop. In quiet time (or if the suite is not contactable), +it will gradually increase the update interval (i.e. reduce the update +frequency) to a maximum determined by this setting. + +Increasing this setting will reduce the network traffic and hits on the suite +process. However, if a quiet suite starts to pick up activity, the GUI may +initially appear out of sync with what is happening in the suite for the +duration of this interval. + +- *type*: ISO 8601 duration/interval representation (e.g. + ``PT10S``, 10 seconds, or ``PT1M``, 1 minute). +- *default*: PT15S + + +sort by definition order +^^^^^^^^^^^^^^^^^^^^^^^^ + +If this is not turned off the default sort order for task names and +families in the dot and text views will the order they appear in the +suite definition. Clicking on the task name column in the treeview will +toggle to alphanumeric sort, and a View menu item does the same for the +dot view. If turned off, the default sort order is alphanumeric and +definition order is not available at all. + +- *type*: boolean +- *default*: ``True`` + + +sort column +^^^^^^^^^^^ + +If ``text`` is in ``initial views`` then ``sort column`` sets +the column that will be sorted initially when the GUI launches. Sorting can be +changed later by clicking on the column headers. + +- *type*: string +- *legal values*: ``task``, ``state``, ``host``, ``job system``, + ``job ID``, ``T-submit``, ``T-start``, ``T-finish``, ``dT-mean``, + ``latest message``, ``none`` +- *default*: ``none`` +- *example*: ``sort column = T-start`` + + +sort column ascending +^^^^^^^^^^^^^^^^^^^^^ + +For use in combination with ``sort column``, sets whether the column will +be sorted using ascending or descending order. + +- *type*: boolean +- *default*: ``True`` +- *example*: ``sort column ascending = False`` + + +sub-graphs on +^^^^^^^^^^^^^ + +Set the sub-graphs view to be enabled by default. +This can be changed later using the toggle options for the graph view. + +- *type*: boolean (False or True) +- *default*: ``False`` + + +task filter highlight color +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The color used to highlight active task filters in gcylc. It must be a name +from the X11 ``rgb.txt`` file, e.g. ``SteelBlue``; or a +*quoted* hexadecimal color code, e.g. ``"#ff0000"`` for red (quotes +are required to prevent the hex code being interpreted as a comment). + +- *type*: string +- *default*: ``PowderBlue`` + + +task states to filter out +^^^^^^^^^^^^^^^^^^^^^^^^^ + +Set the initial filtering options when the GUI starts. Later this can be +changed by using the "View" menu "Task Filtering" option. + +- *type*: string list +- *legal values*: waiting, held, queued, ready, expired, submitted, + submit-failed, submit-retrying, running, succeeded, failed, retrying, + runahead +- *default*: runahead + + +transpose dot +^^^^^^^^^^^^^ + +Transposes the content in dot view so that it displays from left to right +rather than from top to bottom. Can be changed later using the options +submenu available via the view menu. + +- *type*: boolean +- *default*: ``False`` +- *example*: ``transpose dot = True`` + + +transpose graph +^^^^^^^^^^^^^^^ + +Transposes the content in graph view so that it displays from left to right +rather than from top to bottom. Can be changed later using the options submenu +via the view menu. + +- *type*: boolean +- *default*: ``False`` +- *example*: ``transpose graph = True`` + + +ungrouped views +^^^^^^^^^^^^^^^ + +List suite views, if any, that should be displayed initially in an +ungrouped state. Namespace family grouping can be changed later +using the tool bar. + +- *type*: string (a list of zero or more view names) +- *legal values*: ``text``, ``dot``, ``graph`` +- *default*: (none) +- *example*: ``ungrouped views = text, dot`` + + +use theme +^^^^^^^^^ + +Set the task state color theme, common to all views, to use initially. The +color theme can be changed later using the tool bar. See +``etc/gcylc.rc.eg`` and ``etc/gcylc-themes.rc`` in the Cylc +installation directory for how to modify existing themes or define your own. +Use ``cylc get-gui-config`` to list available themes. + +- *type*: string (theme name) +- *legal values*: ``default``, ``solid``, ``high-contrast``, + ``color-blind``, and any custom or user-modified themes. +- *default*: ``default`` + + +window size +^^^^^^^^^^^ + +Sets the size (in pixels) of the cylc GUI at startup. + +- *type*: integer list: x, y +- *legal values*: positive integers +- *default*: 800, 500 +- *example*: ``window size = 1000, 700`` + + +[themes] +-------- + +This section may contain task state color theme definitions. + + +[themes] ``->`` [[THEME]] +^^^^^^^^^^^^^^^^^^^^^^^^^ + +The name of the task state color-theme to be defined in this section. + +- *type*: string + + +[themes] ``->`` [[THEME]] ``->`` inherit +"""""""""""""""""""""""""""""""""""""""" + +You can inherit from another theme in order to avoid defining all states. + +- *type*: string (parent theme name) +- *default*: ``default`` + + +[themes] ``->`` [[THEME]] ``->`` defaults +""""""""""""""""""""""""""""""""""""""""" + +Set default icon attributes for all state icons in this theme. + +- *type*: string list (icon attributes) +- *legal values*: ``"color=COLOR"``, ``"style=STYLE"``, + ``"fontcolor=FONTCOLOR"`` +- *default*: (none) + +For the attribute values, ``COLOR`` and ``FONTCOLOR`` can be color names from +the X11 ``rgb.txt`` file, e.g. ``SteelBlue``; or hexadecimal color codes, e.g. +``#ff0000`` for red; and ``STYLE`` can be ``filled`` or ``unfilled``. +See ``etc/gcylc.rc.eg`` and ``etc/gcylc-themes.rc`` in +the Cylc installation directory for examples. + + +[themes] ``->`` [[THEME]] ``->`` STATE +"""""""""""""""""""""""""""""""""""""" + +Set icon attributes for all task states in ``THEME``, or for a subset of them +if you have used theme inheritance and/or defaults. Legal values of ``STATE`` +are any of the cylc task proxy states: *waiting, runahead, held, queued, +ready, submitted, submit-failed, running, succeeded, failed, retrying, +submit-retrying*. + +- *type*: string list (icon attributes) +- *legal values*: ``"color=COLOR"``, ``"style=STYLE"``, + ``"fontcolor=FONTCOLOR"`` +- *default:* (none) + +For the attribute values, ``COLOR`` and ``FONTCOLOR`` can be color names from +the X11 ``rgb.txt`` file, e.g. ``SteelBlue``; or hexadecimal color codes, e.g. +``#ff0000`` for red; and ``STYLE`` can be ``filled`` or ``unfilled``. +See ``etc/gcylc.rc.eg`` and ``etc/gcylc-themes.rc`` in +the Cylc installation directory for examples. diff --git a/doc/src/appendices/gcylc-graph-view.rst b/doc/src/appendices/gcylc-graph-view.rst new file mode 100644 index 00000000000..95b2827aea4 --- /dev/null +++ b/doc/src/appendices/gcylc-graph-view.rst @@ -0,0 +1,44 @@ +.. _TheGraphBasedcontrolGUI: + +The gcylc Graph View +==================== + +The graph view in the gcylc GUI shows the structure of the suite as it +evolves. It can work well even for large suites, but be aware that the +Graphviz layout engine has to do a new global layout every time a task +proxy appears in or disappears from the task pool. The following may help +mitigate any jumping layout problems: + +- The disconnect button can be used to temporarily prevent the + graph from changing as the suite evolves. +- The greyed-out base nodes, which are only present to fill out + the graph structure, can be toggled off (but this will split the + graph into disconnected sub-trees). +- Right-click on a task and choose the "Focus" option to restrict + the graph display to that task's cycle point. Anything interesting + happening in other cycle points will show up as disconnected + rectangular nodes to the right of the graph (and you can click on + those to instantly refocus to their cycle points). +- Task filtering is the ultimate quick route to focusing on just + the tasks you're interested in, but this will destroy the graph + structure. + + +.. insert vertical whitespace else sidebar menu overhangs short page (ugly) + +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| diff --git a/doc/src/appendices/gscan-config-ref.rst b/doc/src/appendices/gscan-config-ref.rst new file mode 100644 index 00000000000..3bea0c962fc --- /dev/null +++ b/doc/src/appendices/gscan-config-ref.rst @@ -0,0 +1,105 @@ +.. _GscanRCReference: + +Gscan GUI (cylc gscan) Config File Reference +============================================ + +This section defines all legal items and values for the gscan config +file which should be located in ``$HOME/.cylc/gscan.rc``. Some items +also affect the gpanel panel app. + +The main menubar can be hidden to maximise the display area. Its visibility +can be toggled via the mouse right-click menu, or by typing ``Alt-m``. When +visible, the main View menu allows you to change properties such as the columns +that are displayed, which hosts to scan for running suites, and the task state +icon theme. + +At startup, the task state icon theme and icon size are taken from the gcylc +config file ``$HOME/.cylc/gcylc.rc``. + + +Top Level Items +--------------- + + +activate on startup +^^^^^^^^^^^^^^^^^^^ + +Set whether ``cylc gpanel`` will activate automatically when the GUI is +loaded or not. + +- *type*: boolean (True or False) +- *legal values*: ``True``, ``False`` +- *default*: ``False`` +- *example*: ``activate on startup = True`` + + +columns +^^^^^^^ + +Set the columns to display when the ``cylc gscan`` GUI starts. This can +be changed later with the View menu. The order in which the columns are +specified here does not affect the display order. + +- *type*: string (a list of one or more view names) +- *legal values*: ``host``, ``owner``, ``status``, ``suite``, + ``title``, ``updated`` +- *default*: ``status``, ``suite`` +- *example*: ``columns = suite, title, status`` + + +suite listing update interval +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Set the time interval between refreshing the suite listing (by file system or +port range scan). + +Increasing this setting will reduce the frequency of gscan looking for running +suites. Scanning for suites by port range scan can be a hit on the network and +the running suite processes, while scanning for suites by walking the file +system can hit the file system (especially if the file system is a network file +system). Therefore, this is normally set with a lower frequency than the status +update interval. Increasing this setting will make gscan friendlier to the +network and/or the file system, but gscan may appear out of sync if there are +many start up or shut down of suites between the intervals. + +- *type*: ISO 8601 duration/interval representation (e.g. ``PT10S``, + 10 seconds, or ``PT1M``, 1 minute). +- *default*: PT1M + + +suite status update interval +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Set the time interval between calls to known running suites (suites that are +known via the latest suite listing) for data updates. + +Increasing this setting will reduce the network traffic and hits on the suite +processes. However, gscan may appear out of sync with what may be happening +in very busy suites. + +- *type*: ISO 8601 duration/interval representation (e.g. ``PT10S``, + 10 seconds, or ``PT1M``, 1 minute). +- *default*: PT15S + + +window size +^^^^^^^^^^^ + +Sets the size in pixels of the ``cylc gscan`` GUI window at startup. + +- *type*: integer list: x, y +- *legal values*: positive integers +- *default*: 300, 200 +- *example*: ``window size = 1000, 700`` + + +hide main menubar +^^^^^^^^^^^^^^^^^ + +Hide the main menubar of the ``cylc gscan`` GUI window at startup. By +default, the menubar is not hidden. Either way, you can toggle its +visibility with ``Alt-m`` or via the right-click menu. + +- *type*: boolean (True or False) +- *default*: False +- *example*: ``hide main menubar = True`` diff --git a/doc/src/appendices/install-file.rst b/doc/src/appendices/install-file.rst new file mode 100644 index 00000000000..e8e8d3f9aca --- /dev/null +++ b/doc/src/appendices/install-file.rst @@ -0,0 +1,4 @@ +Cylc INSTALL File +================= + +.. literalinclude:: ../../../INSTALL.md diff --git a/doc/src/appendices/known-issues.rst b/doc/src/appendices/known-issues.rst new file mode 100644 index 00000000000..a3eab879b09 --- /dev/null +++ b/doc/src/appendices/known-issues.rst @@ -0,0 +1,59 @@ +.. _KnownIssues: + +Known Issues +============ + + +.. _CurrentKnownIssues: + +Current Known Issues +-------------------- + +The best place to find current known issues is on +`GitHub `_. + + +.. _NotableKnownIssues: + +Notable Known Issues +-------------------- + + +.. _PipeInJobScripts: + +Use of pipes in job scripts +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In bash, the return status of a pipeline is normally the exit status of the +last command. This is unsafe, because if any command in the pipeline fails, the +script will continue nevertheless. + +For safety, a cylc task job script running in bash will have the +``set -o pipefail`` option turned on automatically. If a pipeline +exists in a task's ``script``, etc section, the failure of any part of +a pipeline will cause the command to return a non-zero code at the end, which +will be reported as a task job failure. Due to the unique nature of a pipeline, +the job file will trap the failure of the individual commands, as well as the +whole pipeline, and will attempt to report a failure back to the suite twice. +The second message is ignored by the suite, and so the behaviour can be safely +ignored. (You should probably still investigate the failure, however!) + + +.. insert vertical whitespace else sidebar menu overhangs short page (ugly) + +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| diff --git a/doc/src/appendices/licensing.rst b/doc/src/appendices/licensing.rst new file mode 100644 index 00000000000..e12581e819d --- /dev/null +++ b/doc/src/appendices/licensing.rst @@ -0,0 +1,24 @@ +GNU GENERAL PUBLIC LICENSE v3.0 +=============================== + +See the `GNU GENERAL PUBLIC LICENSE v3.0 `_. + + +.. insert vertical whitespace else sidebar menu overhangs short page (ugly) + +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| diff --git a/doc/src/appendices/readme-file.rst b/doc/src/appendices/readme-file.rst new file mode 100644 index 00000000000..b15184c8b37 --- /dev/null +++ b/doc/src/appendices/readme-file.rst @@ -0,0 +1,24 @@ +Cylc README File +================ + +.. literalinclude:: ../../../README.md + + +.. insert vertical whitespace else sidebar menu overhangs short page (ugly) + +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| diff --git a/doc/src/appendices/remote-job-management.rst b/doc/src/appendices/remote-job-management.rst new file mode 100644 index 00000000000..cc8369da905 --- /dev/null +++ b/doc/src/appendices/remote-job-management.rst @@ -0,0 +1,355 @@ +Remote Job Management +===================== + +Managing tasks in a workflow requires more than just job execution: Cylc +performs additional actions with ``rsync`` for file transfer, and +direct execution of ``cylc`` sub-commands over non-interactive +SSH [4]_. + +SSH-free Job Management? +------------------------ + +Some sites may want to restrict access to job hosts by whitelisting SSH +connections to allow only ``rsync`` for file transfer, and allowing job +execution only via a local batch system that sees the job hosts [5]_ . +We are investigating the feasibility of SSH-free job management when a local +batch system is available, but this is not yet possible unless your suite +and job hosts also share a filesystem, which allows Cylc to treat jobs as +entirely local [6]_ . + +SSH-based Job Management +------------------------ + +Cylc does not have persistent agent processes running on job hosts to act on +instructions received over the network [7]_ so instead we execute job +management commands directly on job hosts over SSH. Reasons for this include: + +- it works equally for batch system and background jobs +- SSH is *required* for background jobs, and for batch jobs if the + batch system is not available on the suite host +- *querying the batch system alone is not sufficient for full job + polling functionality* because jobs can complete (and then be forgotten by + the batch system) while the network, suite host, or suite server program is + down (e.g. between suite shutdown and restart) + + - to handle this we get the automatic job wrapper code to write + job messages and exit status to *job status files* that are + interrogated by suite server programs during job polling operations + - job status files reside on the job host, so the interrogation + is done over SSH + +- job status files also hold batch system name and job ID; this is + written by the job submit command, and read by job poll and kill commands + (all over SSH) + +A Concrete Example +------------------ + +The following suite, registered as ``suitex``, is used to illustrate +our current SSH-based remote job management. It submits two jobs to a remote, +and a local task views a remote job log then polls and kills the remote jobs. + +.. code-block:: cylc + + # suite.rc + [scheduling] + [[dependencies]] + graph = "delayer => master & REMOTES" + [runtime] + [[REMOTES]] + script = "sleep 30" + [[[remote]]] + host = wizard + owner = hobo + [[remote-a, remote-b]] + inherit = REMOTES + [[delayer]] + script = "sleep 10" + [[master]] + script = """ + sleep 5 + cylc cat-log -m c -f o $CYLC_SUITE_NAME remote-a.1 + sleep 2 + cylc poll $CYLC_SUITE_NAME REMOTES.1 + sleep 2 + cylc kill $CYLC_SUITE_NAME REMOTES.1 + sleep 2 + cylc remove $CYLC_SUITE_NAME REMOTES.1""" + + +The *delayer* task just separates suite start-up from remote job +submission, for clarity when watching the job host (e.g. with +``watch -n 1 find ~/cylc-run/suitex``). + +Global config specifies the path to the remote Cylc executable, says +to retrieve job logs, and not to use a remote login shell: + +.. code-block:: cylc + + # global.rc + [hosts] + [[wizard]] + cylc executable = /opt/bin/cylc + retrieve job logs = True + use login shell = False + +On running the suite, remote job host actions were captured in the transcripts +below by wrapping the ``ssh``, ``scp``, and ``rsync`` +executables in scripts that log their command lines before taking action. + +Create suite run directory and install source files +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Done by ``rose suite-run`` before suite start-up (the command will be +migrated to Cylc soon though). + +- with ``--new`` it invokes bash over SSH and a raw shell + expression, to delete previous-run files +- it invokes itself over SSH to create top level suite directories + and install source files + + - skips installation if server UUID file is found on the job host + (indicates a shared filesystem) + +- uses ``rsync`` for suite source file installation + +.. note:: + + The same directory structure is used on suite and job hosts, for + consistency and simplicity, and because the suite host can also be a job + host. + +.. code-block:: bash + + # rose suite-run --new only: initial clean-out + ssh -oBatchMode=yes -oConnectTimeout=10 hobo@wizard bash -l -O extglob -c 'cd; echo '"'"'673d7a0d-7816-42a4-8132-4b1ab394349c'"'"'; ls -d -r cylc-run/suitex/work cylc-run/suitex/share/cycle cylc-run/suitex/share cylc-run/suitex; rm -fr cylc-run/suitex/work cylc-run/suitex/share/cycle cylc-run/suitex/share cylc-run/suitex; (cd ; rmdir -p cylc-run/suitex/work cylc-run/suitex/share/cycle cylc-run/suitex/share cylc-run 2>/dev/null || true)' + + # rose suite-run: test for shared filesystem and create share/cycle directories + ssh -oBatchMode=yes -oConnectTimeout=10 -n hobo@wizard env ROSE_VERSION=2018.02.0 CYLC_VERSION=7.6.x bash -l -c '"$0" "$@"' rose suite-run -vv -n suitex --run=run --remote=uuid=231cd6a1-6d61-476d-96e1-4325ef9216fc,now-str=20180416T042319Z + + # rose suite-run: install suite source directory to job host + rsync -a --exclude=.* --timeout=1800 --rsh=ssh -oBatchMode=yes -oConnectTimeout=10 --exclude=231cd6a1-6d61-476d-96e1-4325ef9216fc --exclude=log/231cd6a1-6d61-476d-96e1-4325ef9216fc --exclude=share/231cd6a1-6d61-476d-96e1-4325ef9216fc --exclude=share/cycle/231cd6a1-6d61-476d-96e1-4325ef9216fc --exclude=work/231cd6a1-6d61-476d-96e1-4325ef9216fc --exclude=/.* --exclude=/cylc-suite.db --exclude=/log --exclude=/log.* --exclude=/state --exclude=/share --exclude=/work ./ hobo@wizard:cylc-run/suitex + # (internal rsync) + ssh -oBatchMode=yes -oConnectTimeout=10 -l hobo wizard rsync --server -logDtpre.iLsfx --timeout=1800 . cylc-run/suitex + # (internal rsync, back from hobo@wizard) + rsync --server -logDtpre.iLsfx --timeout=1800 . cylc-run/suitex + +Result: + +.. todo:: + + Nicer dirtree display via sphinx or custom extension? + +.. code-block:: bash + + ~/cylc-run/suitex + |__log->log.20180418T025047Z # LOG DIRECTORIES + |__log.20180418T025047Z # log directory for current suite run + |__suiter.rc + |__xxx # any suite source sub-dirs or file + |__work # JOB WORK DIRECTORIES + |__share # SUITE SHARE DIRECTORY + |__cycle + + +Server installs service directory +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +- server address and credentials, so that clients such as + ``cylc message`` executed by jobs can connect +- done just before the first job is submitted to a remote, and at + suite restart for the remotes of jobs running when the suite went + down (server host, port, etc. may change at restart) +- uses SSH to invoke ``cylc remote-init`` on job hosts. If the remote command + does not find a server-side UUID file (which would indicate a shared + filesystem) it reads a tar archive of the service directory from stdin, and + unpacks it to install. + +.. code-block:: bash + + # cylc remote-init: install suite service directory + ssh -oBatchMode=yes -oConnectTimeout=10 hobo@wizard env CYLC_VERSION=7.6.x /opt/bin/cylc remote-init '066592b1-4525-48b5-b86e-da06eb2380d9' '$HOME/cylc-run/suitex' + +Result: + +.. todo:: + + Nicer dirtree display via sphinx or custom extension? + +.. code-block:: bash + + ~/cylc-run/suitex + |__.service # SUITE SERVICE DIRECTORY + | |__contact # server address information + | |__passphrase # suite passphrase + | |__ssl.cert # suite SSL certificate + |__log->log.20180418T025047Z # LOG DIRECTORIES + |__log.20180418T025047Z # log directory for current suite run + |__suiter.rc + |__xxx # any suite source sub-dirs or file + |__work # JOB WORK DIRECTORIES + |__share # SUITE SHARE DIRECTORY + |__cycle + + +Server submits jobs +^^^^^^^^^^^^^^^^^^^ + +- done when tasks are ready to run, for multiple jobs at once +- uses SSH to invoke ``cylc jobs-submit`` on the remote - to read job + scripts from stdin, write them to disk, and submit them to run + +.. code-block:: bash + + # cylc jobs-submit: submit two jobs + ssh -oBatchMode=yes -oConnectTimeout=10 hobo@wizard env CYLC_VERSION=7.6.x /opt/bin/cylc jobs-submit '--remote-mode' '--' '$HOME/cylc-run/suitex/log/job' '1/remote-a/01' '1/remote-b/01' + +Result: + +.. todo:: + + Nicer dirtree display via sphinx or custom extension? + +.. code-block:: bash + + ~/cylc-run/suitex + |__.service # SUITE SERVICE DIRECTORY + | |__contact # server address information + | |__passphrase # suite passphrase + | |__ssl.cert # suite SSL certificate + |__log->log.20180418T025047Z # LOG DIRECTORIES + |__log.20180418T025047Z # log directory for current suite run + | |__ job # job logs (to be distinguished from log/suite/ on the suite host) + | |__1 # cycle point + | |__remote-a # task name + | | |__01 # job submit number + | | | |__job # job script + | | | |__job.out # job stdout + | | | |__job.err # job stderr + | | | |__job.status # job status + | | |__NN->0l # symlink to latest submit number + | |__remote-b # task name + | |__01 # job submit number + | | |__job # job script + | | |__job.out # job stdout + | | |__job.err # job stderr + | | |__job.status # job status + | |__NN->0l # symlink to latest submit number + |__suiter.rc + |__xxx # any suite source sub-dirs or file + |__work # JOB WORK DIRECTORIES + | |__1 # cycle point + | |__remote-a # task name + | | |__xxx # (any files written by job to PWD) + | |__remote-b # task name + | |__xxx # (any files written by job to PWD) + |__share # SUITE SHARE DIRECTORY + |__cycle + |__xxx # (any job-created sub-dirs and files) + + +Server tracks job progress +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +- jobs send messages back to the server program on the suite host + + - directly: client-server HTTPS over the network (requires service + files installed - see above) + - indirectly: re-invoke clients on the suite host (requires reverse SSH) + +- OR server polls jobs at intervals (requires job polling - see below) + + +User views job logs +^^^^^^^^^^^^^^^^^^^ + +- command ``cylc cat-log`` via CLI or GUI, invokes itself over SSH to the + remote +- suites will serve job logs in future, but this will still be needed + (e.g. if the suite is down) + +.. code-block:: bash + + # cylc cat-log: view a job log + ssh -oBatchMode=yes -oConnectTimeout=10 -n hobo@wizard env CYLC_VERSION=7.6.x /opt/bin/cylc cat-log --remote-arg='$HOME/cylc-run/suitex/log/job/1/remote-a/NN/job.out' --remote-arg=cat --remote-arg='tail -n +1 -F %(filename)s' suitex + + +Server cancels or kills jobs +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +- done automatically or via user command ``cylc kill``, for + multiple jobs at once +- uses SSH to invoke ``cylc jobs-kill`` on the + remote, with job log paths on the command line. Reads job ID from the + job status file. + +.. code-block:: bash + + # cylc jobs-kill: kill two jobs + ssh -oBatchMode=yes -oConnectTimeout=10 hobo@wizard env CYLC_VERSION=7.6.x /opt/bin/cylc jobs-kill '--' '$HOME/cylc-run/suitex/log/job' '1/remote-a/01' '1/remote-b/01' + + +Server polls jobs +^^^^^^^^^^^^^^^^^ + +- done automatically or via user command ``cylc poll``, for + multiple jobs at once +- uses SSH to invoke ``cylc jobs-poll`` on the + remote, with job log paths on the command line. Reads job ID from the + job status file. + +.. code-block:: bash + + # cylc jobs-poll: poll two jobs + ssh -oBatchMode=yes -oConnectTimeout=10 hobo@wizard env CYLC_VERSION=7.6.x /opt/bin/cylc jobs-poll '--' '$HOME/cylc-run/suitex/log/job' '1/remote-a/01' '1/remote-b/01' + + +Server retrieves jobs logs +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +- done at job completion, according to global config +- uses ``rsync`` + +.. code-block:: bash + + # rsync: retrieve two job logs + rsync -a --rsh=ssh -oBatchMode=yes -oConnectTimeout=10 --include=/1 --include=/1/remote-a --include=/1/remote-a/01 --include=/1/remote-a/01/** --include=/1/remote-b --include=/1/remote-b/01 --include=/1/remote-b/01/** --exclude=/** hobo@wizard:$HOME/cylc-run/suitex/log/job/ /home/vagrant/cylc-run/suitex/log/job/ + # (internal rsync) + ssh -oBatchMode=yes -oConnectTimeout=10 -l hobo wizard rsync --server --sender -logDtpre.iLsfx . $HOME/cylc-run/suitex/log/job/ + # (internal rsync, back from hobo@wizard) + rsync --server --sender -logDtpre.iLsfx . /home/hobo/cylc-run/suitex/log/job/ + + +Server tidies job remote at shutdown +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +- removes ``.service/contact`` so that clients won't repeatedly + try to connect + +.. code-block:: bash + + # cylc remote-tidy: remove the remote suite contact file + ssh -oBatchMode=yes -oConnectTimeout=10 hobo@wizard env CYLC_VERSION=7.6.x /opt/bin/cylc remote-tidy '$HOME/cylc-run/suitex' + + +Other Use of SSH in Cylc +------------------------ + +- see if a suite is running on another host with a shared + filesystem - see ``detect_old_contact_file()`` in + ``lib/cylc/suite_srv_files_mgr.py`` +- cat content of a remote service file over SSH, if possible, for + clients on that do not have suite credentials installed - see + ``_load_remote_item()`` in ``suite_srv_files_mgr.py`` + + +.. [4] Cylc used to run bare shell expressions over SSH, which required + a bash shell and made whitelisting difficult. +.. [5] A malicious script could be ``rsync``'d and run from a batch + job, but batch jobs are considered easier to audit. +.. [6] The job ID must also be valid to query and kill the job via the local + batch system. This is not the case for Slurm, unless the ``--cluster`` + option is explicitly used in job query and kill commands, otherwise + the job ID is not recognized by the local Slurm instance. +.. [7] This would be a more complex solution, in terms of implementation, + administration, and security. diff --git a/doc/src/appendices/site-user-config-ref.rst b/doc/src/appendices/site-user-config-ref.rst new file mode 100644 index 00000000000..09af6c80d75 --- /dev/null +++ b/doc/src/appendices/site-user-config-ref.rst @@ -0,0 +1,1260 @@ +.. _SiteRCReference: + +Global (Site, User) Config File Reference +========================================= + +This section defines all legal items and values for cylc site and +user config files. See :ref:`SiteAndUserConfiguration` for file locations, +intended usage, and how to generate the files using the +``cylc get-site-config`` command. + +*As for suite configurations, Jinja2 expressions can be embedded in +site and user config files to generate the final result parsed by cylc.* +Use of Jinja2 in suite configurations is documented in :ref:`Jinja`. + + +Top Level Items +--------------- + + +temporary directory +^^^^^^^^^^^^^^^^^^^ + +A temporary directory is needed by a few cylc commands, and is cleaned +automatically on exit. Leave unset for the default (usually ``$TMPDIR``). + +- *type*: string (directory path) +- *default*: (none) +- *example*: ``temporary directory = /tmp/$USER/cylc`` + + +.. _process pool size: + +process pool size +^^^^^^^^^^^^^^^^^ + +Maximum number of concurrent processes used to execute external job +submission, event handlers, and job poll and kill commands - see +:ref:`Managing External Command Execution`. + +- *type*: integer +- *default*: 4 + + +.. _process pool timeout: + +process pool timeout +^^^^^^^^^^^^^^^^^^^^ + +Interval after which long-running commands in the process pool will be killed - +see :ref:`Managing External Command Execution`. + +- *type*: ISO 8601 duration/interval representation (e.g. + ``PT10S``, 10 seconds, or ``PT1M``, 1 minute). +- *default*: PT10M - note this is set quite high to avoid killing + important processes when the system is under load. + + +disable interactive command prompts +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Commands that intervene in running suites can be made to ask for +confirmation before acting. Some find this annoying and ineffective as a +safety measure, however, so command prompts are disabled by default. + +- *type*: boolean +- *default*: True + + +enable run directory housekeeping +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The suite run directory tree is created anew with every suite start +(not restart) but output from the most recent previous runs can be +retained in a rolling archive. Set length to 0 to keep no backups. +**This is incompatible with current Rose suite housekeeping** (see +:ref:`SuiteStorageEtc` for more on Rose) so it is disabled by +default, in which case new suite run files will overwrite existing ones +in the same run directory tree. Rarely, this can result in incorrect +polling results due to the presence of old task status files. + +- *type*: boolean +- *default*: False + + +run directory rolling archive length +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The number of old run directory trees to retain if run directory +housekeeping is enabled. + +- *type*: integer +- *default*: 2 + + +task host select command timeout +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +When a task host in a suite is a shell command string, cylc calls the shell to +determine the task host. This call is invoked by the main process, and may +cause the suite to hang while waiting for the command to finish. This setting +sets a timeout for such a command to ensure that the suite can continue. + +- *type*: ISO 8601 duration/interval representation (e.g. + ``PT10S``, 10 seconds, or ``PT1M``, 1 minute). +- *default*: PT10S + + +[task messaging] +---------------- + +This section contains configuration items that affect task-to-suite +communications. + + +[retry interval]{[task messaging] ``->`` retry interval +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +If a send fails, the messaging code will retry after a configured +delay interval. + +- *type*: ISO 8601 duration/interval representation (e.g. + ``PT10S``, 10 seconds, or ``PT1M``, 1 minute). +- *default*: PT5S + + +[maximum number of tries]{[task messaging] ``->`` maximum number of tries +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +If successive sends fail, the messaging code will give up after a +configured number of tries. + +- *type*: integer +- *minimum*: 1 +- *default*: 7 + + +[connection timeout]{[task messaging] ``->`` connection timeout +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +This is the same as the ``--comms-timeout`` option in cylc +commands. Without a timeout remote connections to unresponsive +suites can hang indefinitely (suites suspended with Ctrl-Z for instance). + +- *type*: ISO 8601 duration/interval representation (e.g. + ``PT10S``, 10 seconds, or ``PT1M``, 1 minute). +- *default*: PT30S + + +[suite logging] +--------------- + +The suite event log, held under the suite run directory, is maintained +as a rolling archive. Logs are rolled over (backed up and started anew) +when they reach a configurable limit size. + + +[rolling archive length]{[suite logging] ``->`` rolling archive length +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +How many rolled logs to retain in the archive. + +- *type*: integer +- *minimum*: 1 +- *default*: 5 + + +maximum size in bytes]{[suite logging] ``->`` maximum size in bytes +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Suite event logs are rolled over when they reach this file size. + +- *type*: integer +- *default*: 1000000 + + +[documentation] +--------------- + +Documentation locations for the ``cylc doc`` command and gcylc +Help menus. + + +[documentation] ``->`` [[files]] +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +File locations of documentation held locally on the cylc host server. + + +[documentation] ``->`` [[files]] ``->`` html index +"""""""""""""""""""""""""""""""""""""""""""""""""" + +File location of the main cylc documentation index. + +- *type*: string +- *default*: ``/doc/index.html`` + + +[documentation] ``->`` [[files]] ``->`` pdf user guide +"""""""""""""""""""""""""""""""""""""""""""""""""""""" + +File location of the cylc User Guide, PDF version. + +- *type*: string +- *default*: ``/doc/cug-pdf.pdf`` + + +[documentation] ``->`` [[files]] ``->`` multi-page html user guide +"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" + +File location of the cylc User Guide, multi-page HTML version. + +- *type*: string +- *default*: ``/doc/html/multi/cug-html.html`` + + +[documentation] ``->`` [[files]] ``->`` single-page html user guide +""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" + +File location of the cylc User Guide, single-page HTML version. + +- *type*: string +- *default*: ``/doc/html/single/cug-html.html`` + + +[documentation] ``->`` [[urls]] +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Online documentation URLs. + + +[documentation] ``->`` [[urls]] ``->`` internet homepage +"""""""""""""""""""""""""""""""""""""""""""""""""""""""" + +URL of the cylc internet homepage, with links to documentation for the +latest official release. + +- *type*: string +- *default*: http://cylc.github.com/cylc/ + + +[documentation] ``->`` [[urls]] ``->`` local index +"""""""""""""""""""""""""""""""""""""""""""""""""" + +Local intranet URL of the main cylc documentation index. + +- *type*: string +- *default*: (none) + + +[document viewers] +------------------ + +PDF and HTML viewers can be launched by cylc to view the documentation. + + +[document viewers] ``->`` pdf +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Your preferred PDF viewer program. + +- *type*: string +- *default*: evince + + +[document viewers] ``->`` html +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Your preferred web browser. + +- *type*: string +- *default*: firefox + + +[editors] +--------- + +Choose your favourite text editor for editing suite configurations. + + +[editors] ``->`` terminal +^^^^^^^^^^^^^^^^^^^^^^^^^ + +The editor to be invoked by the cylc command line interface. + +- *type*: string +- *default*: ``vim`` +- *examples*: + - ``terminal = emacs -nw`` (emacs non-GUI) + - ``terminal = emacs`` (emacs GUI) + - ``terminal = gvim -f`` (vim GUI) + + +[editors] ``->`` gui +^^^^^^^^^^^^^^^^^^^^ + +The editor to be invoked by the cylc GUI. + +- *type*: string +- *default*: ``gvim -f`` +- *examples*: + - ``gui = emacs`` + - ``gui = xterm -e vim`` + + +[communication] +--------------- + +This section covers options for network communication between cylc +clients (suite-connecting commands and guis) servers (running suites). +Each suite listens on a dedicated network port, binding on the first +available starting at the configured base port. + +By default, the communication method is HTTPS secured with HTTP Digest +Authentication. If the system does not support SSL, you should configure +this section to use HTTP. Cylc will not automatically fall back to HTTP +if HTTPS is not available. + + +[communication] ``->`` method +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The choice of client-server communication method - currently only HTTPS +and HTTP are supported, although others could be developed and plugged in. +Cylc defaults to HTTPS if this setting is not explicitly configured. + +- *type*: string +- *options*: + - **https** + - **http** +- *default*: https + + +[communication] ``->`` base port +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The first port that Cylc is allowed to use. This item (and +``maximum number of ports``) is deprecated; please use +``run ports`` under ``[suite servers]`` instead. + +- *type*: integer +- *default*: ``43001`` + + +[communication] ``->`` maximum number of ports +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +This setting (and ``base port``) is deprecated; please use +``run ports`` under ``[suite servers]`` instead. + +- *type*: integer +- *default*: ``100`` + + +[communication] ``->`` proxies on +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Enable or disable proxy servers for HTTPS - disabled by default. + +- *type*: boolean +- *localhost default*: False + + +[communication] ``->`` options +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Option flags for the communication method. Currently only 'SHA1' is +supported for HTTPS, which alters HTTP Digest Auth to use the SHA1 hash +algorithm rather than the standard MD5. This is more secure but is also +less well supported by third party web clients including web browsers. +You may need to add the 'SHA1' option if you are running on platforms +where MD5 is discouraged (e.g. under FIPS). + +- *type*: string\_list +- *default*: ``[]`` +- *options*: + - **SHA1** + + +[monitor] +--------- + +Configurable settings for the command line ``cylc monitor`` tool. + + +[monitor] ``->`` sort order +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The sort order for tasks in the monitor view. + +- *type*: string +- *options*: + + - **alphanumeric** + - **definition** - the order that tasks appear under + ``[runtime]`` in the suite configuration. + +- *default*: definition + + +[hosts] +------- + +The [hosts] section configures some important host-specific settings for +the suite host ("localhost") and remote task hosts. + +.. note:: + + Remote task behaviour is determined by the site/user config on the + suite host, not on the task host. + +Suites can specify task hosts that +are not listed here, in which case local settings will be assumed, +with the local home directory path, if present, replaced by +``$HOME`` in items that configure directory locations. + + +[hosts] ``->`` [[HOST]] +^^^^^^^^^^^^^^^^^^^^^^^ + +The default task host is the suite host, **localhost**, with default +values as listed below. Use an explicit ``[hosts][[localhost]]`` +section if you need to override the defaults. Localhost settings are +then also used as defaults for other hosts, with the local home +directory path replaced as described above. This applies to items +omitted from an explicit host section, and to hosts that are not listed +at all in the site and user config files. Explicit host sections are only +needed if the automatically modified local defaults are not sufficient. + +Host section headings can also be *regular expressions* to match +multiple hostnames. + +.. note:: + + The general regular expression wildcard + is ``'.*'`` (zero or more of any character), not ``'*'``. + Hostname matching regular expressions are used as-is in the Python + ``re.match()`` function. + +As such they match from the beginning +of the hostname string (as specified in the suite configuration) and they +do not have to match through to the end of the string (use the +string-end matching character ``'$'`` in the expression to force this). + +A hierarchy of host match expressions from specific to general can be +used because config items are processed in the order specified in the +file. + +- *type*: string (hostname or regular expression) +- *examples*: + - ``server1.niwa.co.nz`` - explicit host name + - ``server\d.niwa.co.nz`` - regular expression + + +[hosts] ``->`` [[HOST]] ``->`` run directory +"""""""""""""""""""""""""""""""""""""""""""" + +The top level for suite logs and service files, etc. Can contain +``$HOME`` or ``$USER`` but not other environment variables (the +item cannot actually be evaluated by the shell on HOST before use, but the +remote home directory is where ``rsync`` and ``ssh`` naturally +land, and the remote username is known by the suite server program). + +- *type*: string (directory path) +- *default*: ``$HOME/cylc-run`` +- *example*: ``/nfs/data/$USER/cylc-run`` + + +.. _workdirectory: + +[hosts] ``->`` [[HOST]] ``->`` work directory +""""""""""""""""""""""""""""""""""""""""""""" + +The top level for suite work and share directories. Can contain +``$HOME`` or ``$USER`` but not other environment variables +(the item cannot actually be evaluated by the shell on HOST before use, but the +remote home directory is where ``rsync`` and ``ssh`` naturally +land, and the remote username is known by the suite server program). + +- *type*: string (directory path) +- *localhost default*: ``$HOME/cylc-run`` +- *example*: ``/nfs/data/$USER/cylc-run`` + + +.. _task_comms_method: + +[hosts] ``->`` [[HOST]] ``->`` task communication method +"""""""""""""""""""""""""""""""""""""""""""""""""""""""" + +The means by which task progress messages are reported back to the running suite. +See above for default polling intervals for the poll method. + +- *type*: string (must be one of the following three options) +- *options*: + - **default** - direct client-server communication via network ports + - **ssh** - use ssh to re-invoke the messaging commands on the suite server + - **poll** - the suite polls for the status of tasks (no task messaging) +- *localhost default*: default + + +.. _execution_polling: + +[hosts] ``->`` [[HOST]] ``->`` execution polling intervals +"""""""""""""""""""""""""""""""""""""""""""""""""""""""""" + +Cylc can poll running jobs to catch problems that prevent task messages +from being sent back to the suite, such as hard job kills, network +outages, or unplanned task host shutdown. Routine polling is done only +for the polling *task communication method* (below) unless +suite-specific polling is configured in the suite configuration. +A list of interval values can be specified, with the last value used +repeatedly until the task is finished - this allows more frequent +polling near the beginning and end of the anticipated task run time. +Multipliers can be used as shorthand as in the example below. + +- *type*: ISO 8601 duration/interval representation (e.g. + ``PT10S``, 10 seconds, or ``PT1M``, 1 minute). +- *default*: +- *example*: ``execution polling intervals = 5*PT1M, 10*PT5M, 5*PT1M`` + + +.. _submission_polling: + +[hosts] ``->`` [[HOST]] ``->`` submission polling intervals +""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" + +Cylc can also poll submitted jobs to catch problems that prevent the +submitted job from executing at all, such as deletion from an external +batch scheduler queue. Routine polling is done only for the polling +*task communication method* (above) unless suite-specific polling +is configured in the suite configuration. A list of interval +values can be specified as for execution polling (above) but a single +value is probably sufficient for job submission polling. + +- *type*: ISO 8601 duration/interval representation (e.g. + ``PT10S``, 10 seconds, or ``PT1M``, 1 minute). +- *default*: +- *example*: (see the execution polling example above) + + +[hosts] ``->`` [[HOST]] ``->`` scp command +"""""""""""""""""""""""""""""""""""""""""" + +A string for the command used to copy files to a remote host. This is not used +on the suite host unless you run local tasks under another user account. The +value is assumed to be ``scp`` with some initial options or a command +that implements a similar interface to ``scp``. + +- *type*: string +- *localhost default*: ``scp -oBatchMode=yes -oConnectTimeout=10`` + + +[hosts] ``->`` [[HOST]] ``->`` ssh command +"""""""""""""""""""""""""""""""""""""""""" + +A string for the command used to invoke commands on this host. This is not +used on the suite host unless you run local tasks under another user account. +The value is assumed to be ``ssh`` with some initial options or a +command that implements a similar interface to ``ssh``. + +- *type*: string +- *localhost default*: ``ssh -oBatchMode=yes -oConnectTimeout=10`` + + +[hosts] ``->`` [[HOST]] ``->`` use login shell +"""""""""""""""""""""""""""""""""""""""""""""" + +Whether to use a login shell or not for remote command invocation. By +default cylc runs remote ssh commands using a login shell: + +.. code-block:: bash + + ssh user@host 'bash --login cylc ...' + +which will source ``/etc/profile`` and +``~/.profile`` to set up the user environment. However, for +security reasons some institutions do not allow unattended commands to +start login shells, so you can turn off this behaviour to get: + +.. code-block:: bash + + ssh user@host 'cylc ...' + +which will use the default shell on the remote machine, +sourcing ``~/.bashrc`` (or ``~/.cshrc``) to set up the +environment. + +- *type*: boolean +- *localhost default*: True + + +[hosts] ``->`` [[HOST]] ``->`` cylc executable +"""""""""""""""""""""""""""""""""""""""""""""" + +The ``cylc`` executable on a remote host. + +.. note:: + + This should normally point to the cylc multi-version wrapper + (see :ref:`CUI`) on the host, not ``bin/cylc`` for a specific + installed version. + +Specify a full path if ``cylc`` is not in ``\$PATH`` when it is +invoked via ``ssh`` on this host. + +- *type*: string +- *localhost default*: ``cylc`` + + +.. _GlobalInitScript: + +[hosts] ``->`` [[HOST]] ``->`` global init-script +""""""""""""""""""""""""""""""""""""""""""""""""" + +If specified, the value of this setting will be inserted to just before the +``init-script`` section of all job scripts that are to be +submitted to the specified remote host. + +- *type*: string +- *localhost default*: ``""`` + + +[hosts] ``->`` [[HOST]] ``->`` copyable environment variables +""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" + +A list containing the names of the environment variables that can and/or need +to be copied from the suite server program to a job. + +- *type*: string\_list +- *localhost default*: ``[]`` + + +[hosts] ``->`` [[HOST]] ``->`` retrieve job logs +"""""""""""""""""""""""""""""""""""""""""""""""" + +Global default for the :ref:`runtime-remote-retrieve-job-logs` setting for +the specified host. + + +[hosts] ``->`` [[HOST]] ``->`` retrieve job logs command +"""""""""""""""""""""""""""""""""""""""""""""""""""""""" + +If ``rsync -a`` is unavailable or insufficient to retrieve job logs +from a remote host, you can use this setting to specify a suitable command. + +- *type*: string +- *default*: rsync -a + + +[hosts] ``->`` [[HOST]] ``->`` retrieve job logs max size +""""""""""""""""""""""""""""""""""""""""""""""""""""""""" + +Global default for the :ref:`runtime-remote-retrieve-job-logs-max-size` +setting for the specified host. + + +[hosts] ``->`` [[HOST]] ``->`` retrieve job logs retry delays +""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" + +Global default for the :ref:`runtime-remote-retrieve-job-logs-retry-delays` +setting for the specified host. + + +[hosts] ``->`` [[HOST]] ``->`` task event handler retry delays +"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" + +Host specific default for the :ref:`runtime-events-handler-retry-delays` +setting. + + +.. _tail-command-template: + +[hosts] ``->`` [[HOST]] ``->`` tail command template +"""""""""""""""""""""""""""""""""""""""""""""""""""" + +A command template (with ``%(filename)s`` substitution) to tail-follow +job logs on HOST, by the GUI log viewer and ``cylc cat-log``. You are +unlikely to need to override this. + +- *type*: string +- *default*: ``tail -n +1 -F %(filename)s`` + + +[hosts] ``->`` [[HOST]] ``->`` [[[batch systems]]] +"""""""""""""""""""""""""""""""""""""""""""""""""" + +Settings for particular batch systems on HOST. In the subsections below, SYSTEM +should be replaced with the cylc batch system handler name that represents the +batch system (see :ref:`RuntimeJobSubMethods`). + + +.. _err-tailer: + +[hosts] ``->`` [[HOST]] ``->`` [[[batch systems]]] ``->`` [[[[SYSTEM]]]] ``->`` err tailer +'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +A command template (with ``%(job_id)s`` substitution) that can be used +to tail-follow the stderr stream of a running job if SYSTEM does +not use the normal log file location while the job is running. This setting +overrides :ref:`tail-command-template` above. + +- *type*: string +- *default*: (none) +- *example*: For PBS: + +.. code-block:: cylc + + [hosts] + [[ myhpc*]] + [[[batch systems]]] + [[[[pbs]]]] + err tailer = qcat -f -e %(job_id)s + out tailer = qcat -f -o %(job_id)s + err viewer = qcat -e %(job_id)s + out viewer = qcat -o %(job_id)s + + +.. _out-tailer: + +[hosts] ``->`` [[HOST]] ``->`` [[[batch systems]]] ``->`` [[[[SYSTEM]]]] ``->`` out tailer +'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +A command template (with ``%(job_id)s`` substitution) that can be used +to tail-follow the stdout stream of a running job if SYSTEM does +not use the normal log file location while the job is running. This setting +overrides :ref:`tail-command-template` above. + +- *type*: string +- *default*: (none) +- *example*: see :ref:`err-tailer` + + +[hosts] ``->`` [[HOST]] ``->`` [[[batch systems]]] ``->`` [[[[SYSTEM]]]] ``->`` err viewer +'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +A command template (with ``%(job_id)s`` substitution) that can be used +to view the stderr stream of a running job if SYSTEM does +not use the normal log file location while the job is running. + +- *type*: string +- *default*: (none) +- *example*: see :ref:`err-tailer` + + +[hosts] ``->`` [[HOST]] ``->`` [[[batch systems]]] ``->`` [[[[SYSTEM]]]] ``->`` out viewer +'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +A command template (with ``%(job_id)s`` substitution) that can be used +to view the stdout stream of a running job if SYSTEM does +not use the normal log file location while the job is running. + +- *type*: string +- *default*: (none) +- *example*: see :ref:`err-tailer` + + +.. _JobNameLengthMaximum: + +[hosts] ``->`` [[HOST]] ``->`` [[[batch systems]]] ``->`` [[[[SYSTEM]]]] ``->`` job name length maximum +''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +The maximum length for job name acceptable by a batch system on a given host. +Currently, this setting is only meaningful for PBS jobs. For example, PBS 12 +or older will fail a job submit if the job name has more than 15 characters, +which is the default setting. If you have PBS 13 or above, you may want to +modify this setting to a larger value. + +- *type*: integer +- *default*: (none) +- *example*: For PBS: + +.. code-block:: cylc + + [hosts] + [[myhpc*]] + [[[batch systems]]] + [[[[pbs]]]] + # PBS 13 + job name length maximum = 236 + + +.. _ExecutionTimeLimitPollingIntervals: + +[hosts] ``->`` [[HOST]] ``->`` [[[batch systems]]] ``->`` [[[[SYSTEM]]]] ``->`` execution time limit polling intervals +'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +The intervals between polling after a task job (submitted to the relevant batch +system on the relevant host) exceeds its execution time limit. The default +setting is PT1M, PT2M, PT7M. The accumulated times (in minutes) for these +intervals will be roughly 1, 1 + 2 = 3 and 1 + 2 + 7 = 10 after a task job +exceeds its execution time limit. + + - *type*: Comma-separated list of ISO 8601 duration/interval + representations, optionally *preceded* by multipliers. + - *default*: PT1M, PT2M, PT7M + - *example*: + +.. code-block:: cylc + + [hosts] + [[myhpc*]] + [[[batch systems]]] + [[[[pbs]]]] + execution time limit polling intervals = 5*PT2M + + +.. _global-suite-servers: + +[suite servers] +--------------- + +Configure allowed suite hosts and ports for starting up (running or +restarting) suites and enabling them to be detected whilst running via +utilities such as ``cylc gscan``. Additionally configure host +selection settings specifying how to determine the most suitable run host at +any given time from those configured. + + +[suite servers] ``->`` auto restart delay +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Relates to Cylc's auto stop-restart mechanism (see :ref:`auto-stop-restart`). +When a host is set to automatically shutdown/restart it will first wait a +random period of time between zero and ``auto restart delay`` +seconds before beginning the process. This is to prevent large numbers +of suites from restarting simultaneously. + +- *type*: integer +- *default*: ``0`` + + +[suite servers] ``->`` condemned hosts +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Hosts specified in ``condemned hosts`` will not be considered as suite +run hosts. If suites are already running on ``condemned hosts`` they +will be automatically shutdown and restarted (see :ref:`auto-stop-restart`). + +- *type*: comma-separated list of host names and/or IP addresses. +- *default*: (none) + + +[suite servers] ``->`` run hosts +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +A list of allowed suite run hosts. One of these hosts will be appointed for +a suite to start up on if an explicit host is not provided as an option to +a ``run`` or ``restart`` command. + +- *type*: comma-separated list of host names and/or IP addresses. +- *default*: ``localhost`` + + +[suite servers] ``->`` scan hosts +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +A list of hosts to scan for running suites. + +- *type*: comma-separated list of host names and/or IP addresses. +- *default*: ``localhost`` + + +[suite servers] ``->`` run ports +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +A list of allowed ports for Cylc to use to run suites. + +.. note:: + + Only one suite can run per port for a given host, so the length + of this list determines the maximum number of suites that can run + at once per suite host. + +This config item supersedes the deprecated settings ``base port`` +and ``maximum number of ports``, where the base port is equivalent to +the first port, and the maximum number of ports to the length, of this list. + +- *type*: string in the format ``X .. Y`` for + ``X <= Y`` where ``X`` and ``Y`` are integers. +- *default*: ``43001 .. 43100`` (equivalent to the list + ``43001, 43002, ... , 43099, 43100``) + + +[suite servers] ``->`` scan ports +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +A list of ports to scan for running suites on each host set in scan hosts. + +- *type*: string in the format ``X .. Y`` for + ``X <= Y`` where ``X`` and ``Y`` are integers. +- *default*: ``43001 .. 43100`` (equivalent to the list + ``43001, 43002, ... , 43099, 43100``) + + +[suite servers] ``->`` [[run host select]] +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Configure thresholds for excluding insufficient hosts and a method for +ranking the remaining hosts to be applied in selection of the most suitable +``run host``, from those configured, at start-up whenever a set host +is not specified on the command line via the ``--host=`` option. + + +[suite servers] ``->`` [[run host select]] ``->`` rank +"""""""""""""""""""""""""""""""""""""""""""""""""""""" + +The method to use to rank the ``run host`` list in order of +suitability. + +- *type*: string (which must be one of the options outlined below) +- *default*: ``random`` +- *options*: + + - **random** - shuffle the hosts to select a host at random + - **load:1** - rank and select for the lowest load average over + 1 minute (as given by the ``uptime`` command) + - **load:5** - as for ``load:1`` above, but over 5 minutes + - **load:15** - as for ``load:1`` above, but over 15 minutes + - **memory** - rank and select for the highest usable memory i.e. + free memory plus memory in the buffer cache ('buffers') and in the + page cache ('cache'), as specified under ``/proc/meminfo`` + - **disk-space:PATH** - rank and select for the highest free disk + space for a given mount directory path ``PATH`` as given by + the ``df`` command, where multiple paths may be specified + individually i.e. via ``disk-space:PATH_1`` and + ``disk-space:PATH_2``, etc. + +- *default*: (none) + + +[suite servers] ``->`` [[run host select]] ``->`` thresholds +"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" + +A list of thresholds i.e. cutoff values which run hosts must meet in order +to be considered as a possible run host. Each threshold is a minimum or a +maximum requirement depending on the context of the measure; usable +memory (``memory``) and free disk space +(``disk-space:PATH``) threshold values set a *minimum* value, +which must be exceeded, whereas load average (``load:1``, +``load:5`` and ``load:15``) threshold values set a +*maximum*, which must not be. Failure to meet a threshold results in +exclusion from the list of hosts that undergo ranking to +determine the best host which becomes the run host. + +- *type*: string in format ``MEASURE_1 CUTOFF_1; ... ;MEASURE_n CUTOFF_n`` + (etc), where each ``MEASURE_N`` is one of the options below (note + these correspond to all the rank methods accepted under the rank setting + except for ``random`` which does not make sense as a threshold + measure). Spaces delimit corresponding measures and their values, while + semi-colons (optionally with subsequent spaces) delimit each measure-value + pair. +- *options*: + + - **load:1** - load average over 1 minute (as given by + the ``uptime`` command) + - **load:5** - as for ``load:1`` above, but over 5 minutes + - **load:15** - as for ``load:1`` above, but over 15 minutes + - **memory** - usable memory i.e. free memory plus memory in the + buffer cache ('buffers') and in the page cache ('cache'), in KB, as + specified under ``/proc/meminfo`` + - **disk-space:PATH** - free disk space for a given mount + directory path ``PATH``, in KB, as given by the ``df`` + command, where multiple paths may be specified individually i.e. via + ``disk-space:PATH_1`` and ``disk-space:PATH_2``, etc. + +- *default*: (none) +- *examples*: + + - ``thresholds = memory 2000`` (set a minimum of 2000 KB in usable + memory for possible run hosts) + - ``thresholds = load:5 0.5; load:15 1.0; disk-space:/ 5000`` (set a maximum + of 0.5 and 1.0 for load averages over 5 + and 15 minutes respectively and a minimum of 5000 KB of free disk-space on + the ``/`` mount directory. If any of these thresholds are not met + by a host, it will be excluded for running a suite on.) + + +[suite host self-identification] +-------------------------------- + +The suite host's identity must be determined locally by cylc and passed +to running tasks (via ``$CYLC_SUITE_HOST``) so that task messages +can target the right suite on the right host. + +.. todo:: + + Is it conceivable that different remote task hosts at the same + site might see the suite host differently? If so we would need to be + able to override the target in suite configurations. + + +[suite host self-identification] ``->`` method +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +This item determines how cylc finds the identity of the suite host. For +the default *name* method cylc asks the suite host for its host +name. This should resolve on remote task hosts to the IP address of the +suite host; if it doesn't, adjust network settings or use one of the +other methods. For the *address* method, cylc attempts to use a +special external "target address" to determine the IP address of the +suite host as seen by remote task hosts (in-source documentation in +``/lib/cylc/hostuserutil.py`` explains how this works). +And finally, as a last resort, you can choose the *hardwired* method +and manually specify the host name or IP address of the suite host. + +- *type*: string +- *options*: + + - name - self-identified host name + - address - automatically determined IP address (requires *target*, + below) + - hardwired - manually specified host name or IP address (requires + *host*, below) + +- *default*: name + + +[suite host self-identification] ``->`` target +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +This item is required for the *address* self-identification method. +If your suite host sees the internet, a common address such as +``google.com`` will do; otherwise choose a host visible on your +intranet. + +- *type*: string (an inter- or intranet URL visible from the suite host) +- *default*: ``google.com`` + + +[suite host self-identification] ``->`` host +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Use this item to explicitly set the name or IP address of the suite host +if you have to use the *hardwired* self-identification method. + +- *type*: string (host name or IP address) +- *default*: (none) + + +[task events] +------------- + +Global site/user defaults for :ref:`TaskEventHandling`. + + +[test battery] +-------------- + +Settings for the automated development tests. + +.. note:: + + The test battery reads + ``/etc/global-tests.rc`` instead of the normal site/user + global config files. + + +[test battery] ``->`` remote host with shared fs +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The name of a remote host that sees the same HOME file system as the host +running the test battery. + + +[test battery] ``->`` remote host +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Host name of a remote account that does not see the same home directory as +the account running the test battery - see also "remote owner" below). + + +[test battery] ``->`` remote owner +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +User name of a remote account that does not see the same home directory as the +account running the test battery - see also "remote host" above). + + +[test battery] ``->`` [[batch systems]] +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Settings for testing supported batch systems (job submission methods). The +tests for a batch system are only performed if the batch system is available on +the test host or a remote host accessible via SSH from the test host. + + +[test battery] ``->`` [[batch systems]] ``->`` [[[SYSTEM]]] +""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" + +SYSTEM is the name of a supported batch system with automated tests. +This can currently be "loadleveler", "lsf", "pbs", "sge" and/or "slurm". + + +[test battery] ``->`` [[batch systems]] ``->`` [[[SYSTEM]]] ``->`` host +''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +The name of a host where commands for this batch system is available. Use +"localhost" if the batch system is available on the host running the test +battery. Any specified remote host should be accessible via SSH from the host +running the test battery. + + +[test battery] ``->`` [[batch systems]] ``->`` [[[SYSTEM]]] ``->`` err viewer +''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +The command template (with ``\%(job_id)s`` substitution) for testing +the run time stderr viewer functionality for this batch system. + + +[test battery] ``->`` [[batch systems]] ``->`` [[[SYSTEM]]] ``->`` out viewer +''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +The command template (with ``\%(job_id)s`` substitution) for testing +the run time stdout viewer functionality for this batch system. + + +[test battery] ``->`` [[batch systems]] ``->`` [[[SYSTEM]]] ``->`` [[[[directives]]]] +''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +The minimum set of directives that must be supplied to the batch system on the +site to initiate jobs for the tests. + + +[cylc] +------ + +Default values for entries in the suite.rc ``[cylc]`` section. + + +.. _SiteUTCMode: + +[cylc] ``->`` UTC mode +^^^^^^^^^^^^^^^^^^^^^^ + +Allows you to set a default value for UTC mode in a suite at the site level. +See :ref:`UTC-mode` for details. + + +[cylc] ``->`` health check interval +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Site default suite health check interval. +See :ref:`health-check-interval` for details. + + +[cylc] ``->`` task event mail interval +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Site default task event mail interval. +See :ref:`task-event-mail-interval` for details. + + +.. _SiteCylcHooks: + +[cylc] ``->`` [[events]] +^^^^^^^^^^^^^^^^^^^^^^^^ + +You can define site defaults for each of the following options, details +of which can be found under :ref:`SuiteEventHandling`: + + +[cylc] ``->`` [[events]] ``->`` handlers +"""""""""""""""""""""""""""""""""""""""" + + +[cylc] ``->`` [[events]] ``->`` handler events +"""""""""""""""""""""""""""""""""""""""""""""" + + +[cylc] ``->`` [[events]] ``->`` startup handler +""""""""""""""""""""""""""""""""""""""""""""""" + + +[cylc] ``->`` [[events]] ``->`` shutdown handler +"""""""""""""""""""""""""""""""""""""""""""""""" + + +[cylc] ``->`` [[events]] ``->`` mail events +""""""""""""""""""""""""""""""""""""""""""" + + +[cylc] ``->`` [[events]] ``->`` mail footer +""""""""""""""""""""""""""""""""""""""""""" + + +[cylc] ``->`` [[events]] ``->`` mail from +""""""""""""""""""""""""""""""""""""""""" + + +[cylc] ``->`` [[events]] ``->`` mail smtp +""""""""""""""""""""""""""""""""""""""""" + + +[cylc] ``->`` [[events]] ``->`` mail to +""""""""""""""""""""""""""""""""""""""" + + +[cylc] ``->`` [[events]] ``->`` timeout handler +""""""""""""""""""""""""""""""""""""""""""""""" + + +[cylc] ``->`` [[events]] ``->`` timeout +""""""""""""""""""""""""""""""""""""""" + + +[cylc] ``->`` [[events]] ``->`` abort on timeout +"""""""""""""""""""""""""""""""""""""""""""""""" + + +[cylc] ``->`` [[events]] ``->`` stalled handler +""""""""""""""""""""""""""""""""""""""""""""""" + + +[cylc] ``->`` [[events]] ``->`` abort on stalled +"""""""""""""""""""""""""""""""""""""""""""""""" + + +[cylc] ``->`` [[events]] ``->`` inactivity handler +"""""""""""""""""""""""""""""""""""""""""""""""""" + + +[cylc] ``->`` [[events]] ``->`` inactivity +"""""""""""""""""""""""""""""""""""""""""" + + +[cylc] ``->`` [[events]] ``->`` abort on inactivity +""""""""""""""""""""""""""""""""""""""""""""""""""" + + +.. _GlobalAuth: + +[authentication] +---------------- + +Authentication of client programs with suite server programs can be configured +here, and overridden in suites if necessary (see :ref:`SuiteAuth`). + +The suite-specific passphrase must be installed on a user's account to +authorize full control privileges (see :ref:`tutPassphrases` +and :ref:`ConnectionAuthentication`). In the future we plan to move to a more +traditional user account model so that each authorized user can have their own +password. + + +[authentication] ``->`` public +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +This sets the client privilege level for public access - i.e. no +suite passphrase required. + +- *type*: string (must be one of the following options) +- *options*: + + - *identity* - only suite and owner names revealed + - *description* - identity plus suite title and description + - *state-totals* - identity, description, and task state totals + - *full-read* - full read-only access for monitor and GUI + - *shutdown* - full read access plus shutdown, but no other + control. + +- *default*: state-totals diff --git a/doc/src/appendices/suiterc-config-ref.rst b/doc/src/appendices/suiterc-config-ref.rst new file mode 100644 index 00000000000..65dd2c0c214 --- /dev/null +++ b/doc/src/appendices/suiterc-config-ref.rst @@ -0,0 +1,2522 @@ +.. _SuiteRCReference: + +Suite.rc Reference +================== + +This appendix defines all legal suite configuration items. +Embedded Jinja2 code (see :ref:`Jinja`) must process to a valid +raw suite.rc file. See also :ref:`SuiteRCFile` for a descriptive +overview of suite.rc files, including syntax (:ref:`Syntax`). + + +Top Level Items +--------------- + +The only top level configuration items at present are the suite title +and description. + + +[meta] +------ + +Section containing metadata items for this suite. Several items +(title, description, URL) are pre-defined and are used by the GUI. Others +can be user-defined and passed to suite event handlers to be interpreted +according to your needs. For example, the value of a "suite-priority" item +could determine how an event handler responds to failure events. + + +[meta] ``->`` title +^^^^^^^^^^^^^^^^^^^ + +A single line description of the suite. It is displayed in the GUI +"Open Another Suite" window and can be retrieved at run time with the +``cylc show`` command. + +- *type*: single line string +- *default*: (none) + + +[meta] ``->`` description +^^^^^^^^^^^^^^^^^^^^^^^^^ + +A multi-line description of the suite. It can be retrieved at run time +with the ``cylc show`` command. + +- *type*: multi-line string +- *default*: (none) + + +.. _SuiteURL: + +[meta] ``->`` URL +^^^^^^^^^^^^^^^^^ + +A web URL to suite documentation. If present it can be browsed with the +``cylc doc`` command, or from the gcylc Suite menu. The string +template ``%(suite_name)s`` will be replaced with the actual suite +name. See also :ref:`TaskURL`. + +- *type*: string (URL) +- *default*: (none) +- *example*: ``http://my-site.com/suites/%(suite_name)s/index.html`` + + +[meta] ``->`` group +^^^^^^^^^^^^^^^^^^^ + +A group name for a suite. In the gscan GUI, suites with the same group name can +be collapsed into a single state summary when the "group" column is displayed. + +- *type*: single line string +- *default*: (none) + + +[meta] ``->`` \_\_MANY\_\_ +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Replace \_\_MANY\_\_ with any user-defined metadata item. These, like +title, URL, etc. can be passed to suite event handlers to be interpreted +according to your needs. For example, "suite-priority". + +- *type*: String or integer +- *default*: (none) +- *example*: + + .. code-block:: cylc + + [meta] + suite-priority = high + + +[cylc] +------ + +This section is for configuration that is not specifically task-related. + + +[cylc] ``->`` required run mode +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +If this item is set cylc will abort if the suite is not started in the +specified mode. This can be used for demo suites that have to be +run in simulation mode, for example, because they have been taken out of +their normal operational context; or to prevent accidental submission of +expensive real tasks during suite development. + +- *type*: string +- *legal values*: live, dummy, dummy-local, simulation +- *default*: None + + +.. _UTC-mode: + +[cylc] ``->`` UTC mode +^^^^^^^^^^^^^^^^^^^^^^ + +Cylc runs off the suite host's system clock by default. This item allows +you to run the suite in UTC even if the system clock is set to local time. +Clock-trigger tasks will trigger when the current UTC time is equal to +their cycle point date-time plus offset; other time values used, reported, or +logged by the suite server program will usually also be in UTC. The default for +this can be set at the site level (see :ref:`SiteUTCMode`). + +- *type*: boolean +- *default*: False, unless overridden at site level. + + +.. _cycle-point-format: + +[cylc] ``->`` cycle point format +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +To just alter the timezone used in the date-time cycle point format, see +:ref:`cycle-point-time-zone`. To just alter the number of expanded year digits +(for years below 0 or above 9999), see +:ref:`cycle-point-num-expanded-year-digits`. + +Cylc usually uses a ``CCYYMMDDThhmmZ`` (``Z`` in the special +case of UTC) or ``CCYYMMDDThhmm+hhmm`` format (``+`` standing +for ``+`` or ``-`` here) for writing down date-time cycle +points, which follows one of the basic formats outlined in the ISO 8601 +standard. For example, a cycle point on the 3rd of February 2001 at 4:50 in +the morning, UTC (+0000 timezone), would be written +``20010203T0450Z``. Similarly, for the 3rd of February 2001 at +4:50 in the morning, +1300 timezone, cylc would write +``20010203T0450+1300``. + +You may use the isodatetime library's syntax to write dates and times in ISO +8601 formats - ``CC`` for century, ``YY`` for decade and +decadal year, ``+X`` for expanded year digits and their positive or +negative sign, thereafter following the ISO 8601 standard example notation +except for fractional digits, which are represented as ``,ii`` for +``hh``, ``,nn`` for ``mm``, etc. For example, to write +date-times as week dates with fractional hours, set cycle point format to +``CCYYWwwDThh,iiZ`` e.g. ``1987W041T08,5Z`` for 08:30 UTC on +Monday on the fourth ISO week of 1987. + +You can also use a subset of the strptime/strftime POSIX standard - supported +tokens are ``%F``, ``%H``, ``%M``, ``%S``, +``%Y``, ``%d``, ``%j``, ``%m``, ``%s``, ``%z``. + +The ISO8601 extended date-time format can be used +(``%Y-%m-%dT%H:%M``) but +note that the "-" and ":" characters end up in job log directory paths. + +The pre cylc-6 legacy 10-digit date-time format YYYYMMDDHH is not ISO8601 +compliant and can no longer be used as the cycle point format. For job +scripts that still require the old format, use the +``cylc cyclepoint`` utility to translate the ISO8601 cycle point +inside job scripts, e.g.: + +.. code-block:: cylc + + [runtime] + [[root]] + [[[environment]]] + CYCLE_TIME = $(cylc cyclepoint --template=%Y%m%d%H) + + +.. _cycle-point-num-expanded-year-digits: + +[cylc] ``->`` cycle point num expanded year digits +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +For years below 0 or above 9999, the ISO 8601 standard specifies that an +extra number of year digits and a sign should be used. This extra number needs +to be written down somewhere (here). + +For example, if this extra number is set to 2, 00Z on the 1st of January in +the year 10040 will be represented as ``+0100400101T0000Z`` (2 extra +year digits used). With this number set to 3, 06Z on the 4th of May 1985 would +be written as ``+00019850504T0600Z``. + +This number defaults to 0 (no sign or extra digits used). + + +.. _cycle-point-time-zone: + +[cylc] ``->`` cycle point time zone +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +If you set UTC mode to True (:ref:`UTC-mode`) then this will default to +``Z``. If you use a custom cycle point format +(:ref:`cycle-point-format`), you should specify the timezone choice (or null +timezone choice) here as well. + +You may set your own time zone choice here, which will be used for all +date-time cycle point dumping. Time zones should be expressed as ISO 8601 time +zone offsets from UTC, such as ``+13``, ``+1300``, +``-0500`` or ``+0645``, with ``Z`` representing the +special ``+0000`` case. Cycle points will be converted to the time +zone you give and will be represented with this string at the end. + +Cycle points that are input without time zones (e.g. as an initial cycle +point +setting) will use this time zone if set. If this isn't set (and UTC mode is +also not set), then they will default to the current local time zone. + +.. note:: + + The ISO standard also allows writing the hour and minute separated + by a ":" (e.g. ``+13:00``) - however, this is not recommended, given + that the time zone is used as part of task output filenames. + + +[cylc] ``->`` abort if any task fails +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Cylc does not normally abort if tasks fail, but if this item is turned +on it will abort with exit status 1 if any task fails. + +- *type*: boolean +- *default*: False + + +.. _health-check-interval: + +[cylc] ``->`` health check interval +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Specify the time interval on which a running cylc suite will check that its run +directory exists and that its contact file contains the expected information. +If not, the suite will shut itself down automatically. + +- *type*: ISO 8601 duration/interval representation (e.g. + ``PT5M``, 5 minutes (note: by contrast, ``P5M`` means 5 + months, so remember the ``T``!)). +- *default*: PT10M + + +.. _task-event-mail-interval: + +[cylc] ``->`` task event mail interval +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Group together all the task event mail notifications into a single email within +a given interval. This is useful to prevent flooding users' mail boxes when +many task events occur within a short period of time. + +- *type*: ISO 8601 duration/interval representation (e.g. + ``PT10S``, 10 seconds, or ``PT1M``, 1 minute). +- *default*: PT5M + + +[cylc] ``->`` disable automatic shutdown +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +This has the same effect as the ``--no-auto-shutdown`` flag for +the suite run commands: it prevents the suite server program from shutting down +normally when all tasks have finished (a suite timeout can still be used to +stop the daemon after a period of inactivity, however). This option can +make it easier to re-trigger tasks manually near the end of a suite run, +during suite development and debugging. + +- *type*: boolean +- *default*: False + + +[cylc] ``->`` log resolved dependencies +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +If this is turned on cylc will write the resolved dependencies of each +task to the suite log as it becomes ready to run (a list of the IDs of +the tasks that actually satisfied its prerequisites at run time). Mainly +used for cylc testing and development. + +- *type*: boolean +- *default*: False + + +[cylc] ``->`` [[parameters]] +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Define parameter values here for use in expanding *parameterized tasks* - +see :ref:`Parameterized Tasks Label`. + +- *type*: list of strings, or an integer range + ``LOWER..UPPER..STEP`` (two dots, inclusive bounds, "STEP" optional) +- *default*: (none) +- *examples*: + - ``run = control, test1, test2`` + - ``mem = 1..5`` (equivalent to ``1, 2, 3, 4, 5``). + - ``mem = -11..-7..2`` (equivalent to ``-11, -9, -7``). + + +.. _RefParameterTemplates: + +[cylc] ``->`` [[parameter templates]] +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Parameterized task names (see previous item, and +:ref:`Parameterized Tasks Label`) are expanded, for each parameter value, +using string templates. You can assign templates to parameter names here, +to override the default templates. + +- *type*: a Python-style string template +- *default} for integer parameters* ``p``: + ``_p%(p)0Nd`` + where ``N`` is the number of digits of the maximum integer value, + e.g. ``foo`` becomes ``foo_run3`` for ``run`` value ``3``. +- *default for non-integer parameters* ``p``: + ``_%(p)s`` e.g. ``foo`` becomes ``foo_top`` for + ``run`` value ``top``. +- *example*: ``run = -R%(run)s`` e.g. ``foo`` becomes ``foo-R3`` for + ``run`` value ``3``. + +.. note:: + + The values of a parameter named ``p`` are substituted for ``%(p)s``. + In ``_run%(run)s`` the first "run" is a string literal, and the second + gets substituted with each value of the parameter. + + +.. _SuiteEventHandling: + +[cylc] ``->`` [[events]] +^^^^^^^^^^^^^^^^^^^^^^^^ + +Cylc has internal "hooks" to which you can attach handlers that are +called by the suite server program whenever certain events occur. This section +configures suite event hooks; see :ref:`TaskEventHandling` for +task event hooks. + +Event handler commands can send an email or an SMS, call a pager, intervene in +the operation of their own suite, or whatever. +They can be held in the suite bin directory, otherwise it is up to you +to ensure their location is in ``$PATH`` (in the shell in which +cylc runs, on the suite host). The commands should require +very little resource to run and should return quickly. + +Each event handler can be specified as a list of command lines or command +line templates. + +A command line template may have any or all of these patterns which will be +substituted with actual values: + +- \%(event)s: event name (see below) +- \%(suite)s: suite name +- \%(suite\_url)s: suite URL +- \%(suite\_uuid)s: suite UUID string +- \%(message)s: event message, if any +- any suite [meta] item, e.g.: + - \%(title)s: suite title + - \%(importance)s: example custom suite metadata + +Otherwise the command line will be called with the following default +arguments: + +.. code-block:: none + + %(event)s %(suite)s %(message)s + +.. note:: + + Substitution patterns should not be quoted in the template strings. + This is done automatically where required. + +Additional information can be passed to event handlers via +[cylc] ``->`` [[environment]]. + + +[cylc] ``->`` [[events]] ``->`` EVENT handler +""""""""""""""""""""""""""""""""""""""""""""" + +A comma-separated list of one or more event handlers to call when one of the +following EVENTs occurs: + +- **startup** - the suite has started running +- **shutdown** - the suite is shutting down +- **timeout** - the suite has timed out +- **stalled** - the suite has stalled +- **inactivity** - the suite is inactive + +Default values for these can be set at the site level via the siterc file +(see :ref:`SiteCylcHooks`). + +Item details: + +- *type*: string (event handler script name) +- *default*: None, unless defined at the site level. +- *example*: ``startup handler = my-handler.sh`` + + +[cylc] ``->`` [[[events]]] ``->`` handlers +"""""""""""""""""""""""""""""""""""""""""" + +Specify the general event handlers as a list of command lines or command line +templates. + +- *type*: Comma-separated list of strings (event handler command line or + command line templates). +- *default*: (none) +- *example*: ``handlers = my-handler.sh`` + + +[cylc] ``->`` [[events]] ``->`` handler events +"""""""""""""""""""""""""""""""""""""""""""""" + +Specify the events for which the general event handlers should be invoked. + +- *type*: Comma-separated list of events +- *default*: (none) +- *example*: ``handler events = timeout, shutdown`` + + +[cylc] ``->`` [[events]] ``->`` mail events +""""""""""""""""""""""""""""""""""""""""""" + +Specify the suite events for which notification emails should be sent. + +- *type*: Comma-separated list of events +- *default*: (none) +- *example*: ``mail events = startup, shutdown, timeout`` + + +[cylc] ``->`` [[events]] ``->`` mail footer +""""""""""""""""""""""""""""""""""""""""""" + +Specify a string or string template to insert to footers of notification emails +for both suite events and task events. + +A template string may have any or all of these patterns which will be +substituted with actual values: + +- \%(host)s: suite host name +- \%(port)s: suite port number +- \%(owner)s: suite owner name +- \%(suite)s: suite name + +- *type*: +- *default*: (none) +- *example*: + ``mail footer = see: http://localhost/%(owner)s/notes-on/%(suite)s/`` + + +[cylc] ``->`` [[events]] ``->`` mail from +""""""""""""""""""""""""""""""""""""""""" + +Specify an alternate ``from:`` email address for suite event notifications. + +- *type*: string +- *default*: None, (notifications@HOSTNAME) +- *example*: ``mail from = no-reply@your-org`` + + +[cylc] ``->`` [[events]] ``->`` mail smtp +""""""""""""""""""""""""""""""""""""""""" + +Specify the SMTP server for sending suite event email notifications. + +- *type*: string +- *default*: None, (localhost:25) +- *example*: ``mail smtp = smtp.yourorg`` + + +[cylc] ``->`` [[events]] ``->`` mail to +""""""""""""""""""""""""""""""""""""""" + +A list of email addresses to send suite event notifications. The list can be +anything accepted by the ``mail`` command. + +- *type*: string +- *default*: None, (USER@HOSTNAME) +- *example*: ``mail to = your.colleague`` + + +[cylc] ``->`` [[events]] ``->`` timeout +""""""""""""""""""""""""""""""""""""""" + +If a timeout is set and the timeout event is handled, the timeout event +handler(s) will be called if the suite stays in a stalled state for some period +of time. The timer is set initially at suite start up. It is possible to set a +default for this at the site level (see :ref:`SiteCylcHooks`). + +- *type*: ISO 8601 duration/interval representation (e.g. + ``PT5S``, 5 seconds, ``PT1S``, 1 second) - minimum 0 seconds. +- *default*: (none), unless set at the site level. + + +[cylc] ``->`` [[events]] ``->`` inactivity +"""""""""""""""""""""""""""""""""""""""""" + +If inactivity is set and the inactivity event is handled, the inactivity event +handler(s) will be called if there is no activity in the suite for some period +of time. The timer is set initially at suite start up. It is possible to set a +default for this at the site level (see :ref:`SiteCylcHooks`). + +- *type*: ISO 8601 duration/interval representation (e.g. + ``PT5S``, 5 seconds, ``PT1S``, 1 second) - minimum 0 seconds. +- *default*: (none), unless set at the site level. + + +[cylc] ``->`` [[events]] ``->`` abort on stalled +"""""""""""""""""""""""""""""""""""""""""""""""" + +If this is set to True it will cause the suite to abort with error status +if it stalls. A suite is considered "stalled" if there are no active, +queued or submitting tasks or tasks waiting for clock triggers to be met. It is +possible to set a default for this at the site level +(see :ref:`SiteCylcHooks`). + +- *type*: boolean +- *default*: False, unless set at the site level. + + +[cylc] ``->`` [[events]] ``->`` abort on timeout +"""""""""""""""""""""""""""""""""""""""""""""""" + +If a suite timer is set (above) this will cause the suite to abort with +error status if the suite times out while still running. It is possible to set +a default for this at the site level (see :ref:`SiteCylcHooks`). + +- *type*: boolean +- *default*: False, unless set at the site level. + + +[cylc] ``->`` [[events]] ``->`` abort on inactivity +""""""""""""""""""""""""""""""""""""""""""""""""""" + +If a suite inactivity timer is set (above) this will cause the suite to abort +with error status if the suite is inactive for some period while still running. +It is possible to set a default for this at the site level +(see :ref:`SiteCylcHooks`). + +- *type*: boolean +- *default*: False, unless set at the site level. + + +[cylc] ``->`` [[events]] ``->`` abort if EVENT handler fails +"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" + +Cylc does not normally care whether an event handler succeeds or fails, +but if this is turned on the EVENT handler will be executed in the +foreground (which will block the suite while it is running) and the +suite will abort if the handler fails. + +- *type*: boolean +- *default*: False + + +[cylc] ``->`` [[environment]] +""""""""""""""""""""""""""""" + +Environment variables defined in this section are passed to suite and +task event handlers. + +- These variables are not passed to tasks - use task runtime + variables for that. Similarly, task runtime variables are not + available to event handlers - which are executed by the suite server + program, (not by running tasks) in response to task events. +- Cylc-defined environment variables such as + ``$CYLC_SUITE_RUN_DIR`` are not passed to task event + handlers by default, but you can make them available by + extracting them to the cylc environment like this: + + .. code-block:: cylc + + [cylc] + [[environment]] + CYLC_SUITE_RUN_DIR = $CYLC_SUITE_RUN_DIR + +- These variables - unlike task execution environment variables + which are written to job scripts and interpreted by the shell at + task run time - are not interpreted by the shell prior to use + so shell variable expansion expressions cannot be used here. + + +[cylc] ``->`` [[environment]] ``->`` \_\_VARIABLE\_\_ +""""""""""""""""""""""""""""""""""""""""""""""""""""" + +Replace ``\_\_VARIABLE\_\_`` with any number of environment variable +assignment expressions. +Values may refer to other local environment variables (order of +definition is preserved) and are not evaluated or manipulated by +cylc, so any variable assignment expression that is legal in the +shell in which cylc is running can be used (but see the warning +above on variable expansions, which will not be evaluated). +White space around the ``=`` is allowed (as far as cylc's file +parser is concerned these are just suite configuration items). + +- *type*: string +- *default*: (none) +- *examples*: ``FOO = $HOME/foo`` + + +.. _ReferenceTestConfig: + +[cylc] ``->`` [[reference test]] +"""""""""""""""""""""""""""""""" + +Reference tests are finite-duration suite runs that abort with non-zero +exit status if cylc fails, if any task fails, if the suite times +out, or if a shutdown event handler that (by default) compares the test +run with a reference run reports failure. See :ref:`AutoRefTests`. + + +[cylc] ``->`` [[reference test]] ``->`` suite shutdown event handler +"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" + +A shutdown event handler that should compare the test run with the +reference run, exiting with zero exit status only if the test run +verifies. + +- *type*: string (event handler command name or path) +- *default*: ``cylc hook check-triggering`` + +As for any event handler, the full path can be omitted if the script is +located somewhere in ``$PATH`` or in the suite bin directory. + + +[cylc] ``->`` [[reference test]] ``->`` required run mode +""""""""""""""""""""""""""""""""""""""""""""""""""""""""" + +If your reference test is only valid for a particular run mode, this +setting will cause cylc to abort if a reference test is attempted +in another run mode. + +- *type*: string +- *legal values*: live, dummy, dummy-local, simulation +- *default*: None + + +[cylc] ``->`` [[reference test]] ``->`` allow task failures +""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" + +A reference test run will abort immediately if any task fails, unless +this item is set, or a list of *expected task failures* is provided +(below). + +- *type*: boolean +- *default*: False + + +[cylc] ``->`` [[reference test]] ``->`` expected task failures +"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" + +A reference test run will abort immediately if any task fails, unless +*allow task failures* is set (above) or the failed task is found +in a list IDs of tasks that are expected to fail. + +- *type*: Comma-separated list of strings (task IDs: ``name.cycle_point``). +- *default*: (none) +- *example*: ``foo.20120808, bar.20120908`` + + +[cylc] ``->`` [[reference test]] ``->`` live mode suite timeout +""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" + +The timeout value, expressed as an ISO 8601 duration/interval, after which the +test run should be aborted if it has not finished, in live mode. Test runs +cannot be done in live mode unless you define a value for this item, because +it is not possible to arrive at a sensible default for all suites. + +- *type*: ISO 8601 duration/interval representation, e.g. + ``PT5M`` is 5 minutes (note: by contrast ``P5M`` means 5 + months, so remember the ``T``!). +- *default*: PT1M (1 minute) + + +[cylc] ``->`` [[reference test]] ``->`` simulation mode suite timeout +""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" + +The timeout value in minutes after which the test run should be aborted +if it has not finished, in simulation mode. Test runs cannot be done in +simulation mode unless you define a value for this item, because it is +not possible to arrive at a sensible default for all suites. + +- *type*: ISO 8601 duration/interval representation (e.g. + ``PT5M``, 5 minutes (note: by contrast, ``P5M`` means 5 + months, so remember the ``T``!)). +- *default*: PT1M (1 minute) + + +[cylc] ``->`` [[reference test]] ``->`` dummy mode suite timeout +"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" + +The timeout value, expressed as an ISO 8601 duration/interval, after which the +test run should be aborted if it has not finished, in dummy mode. Test runs +cannot be done in dummy mode unless you define a value for this item, because +it is not possible to arrive at a sensible default for all suites. + +- *type*: ISO 8601 duration/interval representation (e.g. + ``PT5M``, 5 minutes (note: by contrast, ``P5M`` means 5 + months, so remember the ``T``!)). +- *default*: PT1M (1 minute) + + +.. _SuiteAuth: + +[cylc] ``->`` [[authentication]] +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Authentication of client programs with suite server programs can be set in the +global site/user config files and overridden here if necessary. +See :ref:`GlobalAuth` for more information. + + +[cylc] ``->`` [[authentication]] ``->`` public +"""""""""""""""""""""""""""""""""""""""""""""" + +The client privilege level granted for public access - i.e. no suite passphrase +required. See :ref:`GlobalAuth` for legal values. + + +[cylc] ``->`` [[simulation]] +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Suite-level configuration for the *simulation* and *dummy* run modes +described in :ref:`SimulationMode`. + + +[cylc] ``->`` [[simulation]] ``->`` disable suite event handlers +"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" + +If this is set to ``True`` configured suite event handlers will not be +called in simulation or dummy modes. + +- *type*: boolean +- *default*: ``True`` + + +[scheduling] +------------ + +This section allows cylc to determine when tasks are ready to run. + + +.. _cycling-mode: + +[scheduling] ``->`` cycling mode +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Cylc runs using the proleptic Gregorian calendar by default. This item allows +you to either run the suite using the 360 day calendar (12 months of 30 days +in a year) or using integer cycling. It also supports use of the 365 (never a +leap year) and 366 (always a leap year) calendars. + +- *type*: string +- *legal values*: gregorian, 360day, 365day, 366day, integer +- *default*: gregorian + + +.. _initial cycle point: + +[scheduling] ``->`` initial cycle point +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In a cold start each cycling task (unless specifically excluded under +``[special tasks]``) will be loaded into the suite with this cycle point, +or with the closest subsequent valid cycle point for the task. This item can +be overridden on the command line or in the gcylc suite start panel. + +In date-time cycling, if you do not provide time zone information for this, +it will be assumed to be local time, or in UTC if :ref:`UTC-mode` is set, or in +the time zone determined by :ref:`cycle-point-time-zone` if that is set. + +- *type*: ISO 8601 date-time point representation (e.g. + ``CCYYMMDDThhmm``, 19951231T0630) or "now". +- *default*: (none) + +The string "now" converts to the current date-time on the suite host (adjusted +to UTC if the suite is in UTC mode but the host is not) to minute resolution. +Minutes (or hours, etc.) may be ignored depending on your cycle point format +(:ref:`cycle-point-format`). + + +[scheduling] ``->`` [[initial cycle point]] ``->`` initial cycle point relative to current time +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +This can be used to set the initial cycle point time relative to the +current time. + +Two additional commands, ``next`` and ``previous``, can be used when setting +the initial cycle point. + +The syntax uses truncated ISO8601 time representations, and is of the style: +``next(Thh:mmZ)``, ``previous(T-mm)``; e.g. + +* ``initial cycle point = next(T15:00Z)`` +* ``initial cycle point = previous(T09:00)`` +* ``initial cycle point = next(T12)`` +* ``initial cycle point = previous(T-20)`` + +Examples of interpretation are given in +:numref:`fig-relative-initial-cycle-point-time-syntax-interpretation`. + +A list of times, separated by semicolons, can be provided, e.g. +``next(T-00;T-15;T-30;T-45)``. At least one time is required within the +brackets, and if more than one is given, the major time unit in each (hours +or minutes) should all be of the same type. + +If an offset from the specified date or time is required, this should be +used in the form: ``previous(Thh:mm) +/- PxTy`` in the same way as is used +for determining cycle periods, e.g. + +* ``initial cycle point = previous(T06) +P1D`` +* ``initial cycle point = next(T-30) -PT1H`` + +The section in the bracket attached to the next/previous command is +interpreted first, and then the offset is applied. + +The offset can also be used independently without a ``next`` or ``previous`` +command, and will be interpreted as an offset from "now". + +.. _fig-relative-initial-cycle-point-time-syntax-interpretation: + +.. table:: Examples of setting relative initial cycle point for times and offsets using ``now = 2018-03-14T15:12Z`` (and UTC mode) + + ==================================== ================== + Syntax Interpretation + ==================================== ================== + ``next(T-00)`` 2018-03-14T16:00Z + ``previous(T-00)`` 2018-03-14T15:00Z + ``next(T-00; T-15; T-30; T-45)`` 2018-03-14T15:15Z + ``previous(T-00; T-15; T-30; T-45)`` 2018-03-14T15:00Z + ``next(T00)`` 2018-03-15T00:00Z + ``previous(T00)`` 2018-03-14T00:00Z + ``next(T06:30Z)`` 2018-03-15T06:30Z + ``previous(T06:30) -P1D`` 2018-03-13T06:30Z + ``next(T00; T06; T12; T18)`` 2018-03-14T18:00Z + ``previous(T00; T06; T12; T18)`` 2018-03-14T12:00Z + ``next(T00; T06; T12; T18) +P1W`` 2018-03-21T18:00Z + ``PT1H`` 2018-03-14T16:12Z + ``-P1M`` 2018-02-14T15:12Z + ==================================== ================== + +The relative initial cycle point also works with truncated dates, including +weeks and ordinal date, using ISO8601 truncated date representations. +Note that day-of-week should always be specified when using weeks. If a time +is not included, the calculation of the next or previous corresponding +point will be done from midnight of the current day. +Examples of interpretation are given in +:numref:`fig-relative-initial-cycle-point-date-syntax-interpretation`. + +.. _fig-relative-initial-cycle-point-date-syntax-interpretation: + +.. table:: Examples of setting relative initial cycle point for dates using ``now = 2018-03-14T15:12Z`` (and UTC mode) + + ==================================== ================== + Syntax Interpretation + ==================================== ================== + ``next(-00)`` 2100-01-01T00:00Z + ``previous(--01)`` 2018-01-01T00:00Z + ``next(---01)`` 2018-04-01T00:00Z + ``previous(--1225)`` 2017-12-25T00:00Z + ``next(-2006)`` 2020-06-01T00:00Z + ``previous(-W101)`` 2018-03-05T00:00Z + ``next(-W-1; -W-3; -W-5)`` 2018-03-14T00:00Z + ``next(-001; -091; -181; -271)`` 2018-04-01T00:00Z + ``previous(-365T12Z)`` 2017-12-31T12:00Z + ==================================== ================== + + +[scheduling] ``->`` final cycle point +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Cycling tasks are held once they pass the final cycle point, if one is +specified. Once all tasks have achieved this state the suite will shut +down. If this item is provided you can override it on the command line +or in the gcylc suite start panel. + +In date-time cycling, if you do not provide time zone information for this, +it will be assumed to be local time, or in UTC if :ref:`UTC-mode` is set, or in +the :ref:`cycle-point-time-zone` if that is set. + +- *type*: ISO 8601 date-time point representation (e.g. + ``CCYYMMDDThhmm``, 19951231T1230) or ISO 8601 date-time offset + (e.g. +P1D+PT6H) +- *default*: (none) + + +.. _initial cycle point constraints: + +[scheduling] ``->`` initial cycle point constraints +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In a cycling suite it is possible to restrict the initial cycle point by +defining a list of truncated time points under the initial cycle point +constraints. + +- *type*: Comma-separated list of ISO 8601 truncated time point + representations (e.g. T00, T06, T-30). +- *default*: (none) + + +.. _final cycle point constraints: + +[scheduling] ``->`` final cycle point constraints +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In a cycling suite it is possible to restrict the final cycle point by +defining a list of truncated time points under the final cycle point +constraints. + +- *type*: Comma-separated list of ISO 8601 truncated time point + representations (e.g. T00, T06, T-30). +- *default*: (none) + + +[scheduling] ``->`` hold after point +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Cycling tasks are held once they pass the hold after cycle point, if one is +specified. Unlike the final cycle point suite will not shut down once all tasks +have passed this point. If this item is provided you can override it on the +command line or in the gcylc suite start panel. + + +.. _runahead limit: + +[scheduling] ``->`` runahead limit +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Runahead limiting prevents the fastest tasks in a suite from getting too far +ahead of the slowest ones, as documented in :ref:`RunaheadLimit`. + +This config item specifies a hard limit as a cycle interval between the +slowest and fastest tasks. It is deprecated in favour of the newer default +limiting by ``max active cycle points`` (:ref:`max active cycle points`). + +- *type*: Cycle interval string e.g. ``PT12H`` + for a 12 hour limit under ISO 8601 cycling. +- *default*: (none) + + +.. _max active cycle points: + +[scheduling] ``->`` max active cycle points +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Runahead limiting prevents the fastest tasks in a suite from getting too far +ahead of the slowest ones, as documented in :ref:`RunaheadLimit`. + +This config item supersedes the deprecated hard ``runahead limit`` +(:ref:`runahead limit`). It allows up to ``N`` (default 3) consecutive +cycle points to be active at any time, adjusted up if necessary for +any future triggering. + +- *type*: integer +- *default*: 3 + + +.. _spawn to max active cycle points: + +[scheduling] ``->`` spawn to max active cycle points +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Allows tasks to spawn out to ``max active cycle points`` +(:ref:`max active cycle points`), removing restriction that a task has to have +submitted before its successor can be spawned. + +*Important*: This should be used with care given the potential impact of +additional task proxies both in terms of memory and cpu for the cylc daemon as +well as overheads in rendering all the additional tasks in gcylc. Also, use +of the setting may highlight any issues with suite design relying on the +default behaviour where downstream tasks would otherwise be waiting on ones +upstream submitting and the suite would have stalled e.g. a housekeeping task +at a later cycle deleting an earlier cycle's data before that cycle has had +chance to run where previously the task would not have been spawned until its +predecessor had been submitted. + +- *type*: boolean +- *default*: False + + +[scheduling] ``->`` [[queues]] +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Configuration of internal queues, by which the number of simultaneously +active tasks (submitted or running) can be limited, per queue. By +default a single queue called *default* is defined, with all tasks +assigned to it and no limit. To use a single queue for the whole suite +just set the limit on the *default* queue as required. +See also :ref:`InternalQueues`. + + +[scheduling] ``->`` [[queues]] ``->`` [[[\_\_QUEUE\_\_]]] +""""""""""""""""""""""""""""""""""""""""""""""""""""""""" + +Section heading for configuration of a single queue. Replace +``\_\_QUEUE\_\_`` with a queue name, and repeat the section as required. + +- *type*: string +- *default*: "default" + + +[scheduling] ``->`` [[queues]] ``->`` [[[\_\_QUEUE\_\_]]] ``->`` limit +'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +The maximum number of active tasks allowed at any one time, for this queue. + +- *type*: integer +- *default*: 0 (i.e. no limit) + + +[scheduling] ``->`` [[queues]] ``->`` [[[\_\_QUEUE\_\_]]] ``->`` members +'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +A list of member tasks, or task family names, to assign to this queue +(assigned tasks will automatically be removed from the default queue). + +- *type*: Comma-separated list of strings (task or family names). +- *default*: none for user-defined queues; all tasks for the "default" queue + + +[scheduling] ``->`` [[xtriggers]] +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +This section is for *External Trigger* function declarations - +see :ref:`External Triggers`. + + +[scheduling] ``->`` [[xtriggers]] ``->`` \_\_MANY\_\_ +""""""""""""""""""""""""""""""""""""""""""""""""""""" + +Replace ``\_\_MANY\_\_`` with any user-defined event trigger function +declarations and corresponding labels for use in the graph: + +- *type*: string: function signature followed by optional call interval +- *example*: ``trig_1 = my_trigger(arg1, arg2, kwarg1, kwarg2):PT10S`` + +(See :ref:`External Triggers` for details). + + +[scheduling] ``->`` [[special tasks]] +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +This section is used to identify tasks with special behaviour. Family names can +be used in special task lists as shorthand for listing all member tasks. + + +[scheduling] ``->`` [[special tasks]] ``->`` clock-trigger +"""""""""""""""""""""""""""""""""""""""""""""""""""""""""" + +.. note:: + + Please read :ref:`External Triggers` before + using the older clock triggers described in this section. + +Clock-trigger tasks (see :ref:`ClockTriggerTasks`) wait on a wall clock time +specified as an offset from their own cycle point. + +- *type*: Comma-separated list of task or family names with + associated date-time offsets expressed as ISO8601 interval strings, + positive or negative, e.g. ``PT1H`` for 1 hour. The offset + specification may be omitted to trigger right on the cycle point. +- *default*: (none) +- *example*: + + .. code-block:: cylc + + clock-trigger = foo(PT1H30M), bar(PT1.5H), baz + + +.. _ClockExpireRef: + +[scheduling] ``->`` [[special tasks]] ``->`` clock-expire +""""""""""""""""""""""""""""""""""""""""""""""""""""""""" + +Clock-expire tasks enter the ``expired`` state and skip job submission if too +far behind the wall clock when they become ready to run. The expiry time is +specified as an offset from wall-clock time; typically it should be negative - +see :ref:`ClockExpireTasks`. + +- *type*: Comma-separated list of task or family names with + associated date-time offsets expressed as ISO8601 interval strings, + positive or negative, e.g. ``PT1H`` for 1 hour. The offset + may be omitted if it is zero. +- *default*: (none) +- *example*: + + .. code-block:: cylc + + clock-expire = foo(-P1D) + + +[scheduling] ``->`` [[special tasks]] ``->`` external-trigger +""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" + +.. note:: + + Please read :ref:`External Triggers` before + using the older mechanism described in this section. + +Externally triggered tasks (see :ref:`Old-Style External Triggers`) wait on +external events reported via the ``cylc ext-trigger`` command. To +constrain triggers to a specific cycle point, include +``$CYLC_TASK_CYCLE_POINT`` in the trigger message string and pass the +cycle point to the ``cylc ext-trigger`` command. + +- *type*: Comma-separated list of task names with associated + external trigger message strings. +- *default*: (none) +- *example*: (note the comma and line-continuation character) + + .. code-block:: none + + external-trigger = get-satx("new sat-X data ready"), + get-saty("new sat-Y data ready for $CYLC_TASK_CYCLE_POINT") + + +[scheduling] ``->`` [[special tasks]] ``->`` sequential +""""""""""""""""""""""""""""""""""""""""""""""""""""""" + +Sequential tasks automatically depend on their own previous-cycle instance. +This declaration is deprecated in favour of explicit inter-cycle triggers - +see :ref:`SequentialTasks`. + +- *type*: Comma-separated list of task or family names. +- *default*: (none) +- *example*: ``sequential = foo, bar`` + + +.. _EASU: + +[scheduling] ``->`` [[special tasks]] ``->`` exclude at start-up +"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" + +Any task listed here will be excluded from the initial task pool (this +goes for suite restarts too). If an *inclusion* list is also +specified, the initial pool will contain only included tasks that have +not been excluded. Excluded tasks can still be inserted at run time. +Other tasks may still depend on excluded tasks if they have not been +removed from the suite dependency graph, in which case some manual +triggering, or insertion of excluded tasks, may be required. + +- *type*: Comma-separated list of task or family names. +- *default*: (none) + + +.. _IASU: + +[scheduling] ``->`` [[special tasks]] ``->`` include at start-up +"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" + +If this list is not empty, any task *not* listed in it will be +excluded from the initial task pool (this goes for suite restarts too). +If an *exclusion* list is also specified, the initial pool will +contain only included tasks that have not been excluded. Excluded tasks +can still be inserted at run time. Other tasks may still depend on +excluded tasks if they have not been removed from the suite dependency +graph, in which case some manual triggering, or insertion of excluded +tasks, may be required. + +- *type*: Comma-separated list of task or family names. +- *default*: (none) + + +[scheduling] ``->`` [[dependencies]] +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The suite dependency graph is defined under this section. You can plot +the dependency graph as you work on it, with ``cylc graph`` or +by right clicking on the suite in the db viewer. See +also :ref:`ConfiguringScheduling`. + + +[scheduling] ``->`` [[dependencies]] ``->`` graph +""""""""""""""""""""""""""""""""""""""""""""""""" + +The dependency graph for a completely non-cycling suites can go here. +See also :ref:`GraphDescrip` below and :ref:`ConfiguringScheduling`, for graph +string syntax. + +- *type*: string +- *example*: (see :ref:`GraphDescrip` below) + + +[scheduling] ``->`` [[dependencies]] ``->`` [[[\_\_RECURRENCE\_\_]]] +"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" + +``\_\_RECURRENCE\_\_`` section headings define the sequence of cycle points for +which the subsequent graph section is valid. These should be specified in +our ISO 8601 derived sequence syntax, or similar for integer cycling: + +- *examples*: + - date-time cycling: ``[[[T00,T06,T12,T18]]]`` or ``[[[PT6H]]]`` + - integer cycling (stepped by 2): ``[[[P2]]]`` +- *default*: (none) + + +See :ref:`GraphTypes` for more on recurrence expressions, and how multiple +graph sections combine. + + +.. _GraphDescrip: + +[scheduling] ``->`` [[dependencies]] ``->`` [[[\_\_RECURRENCE\_\_]]] ``->`` graph +''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +The dependency graph for a given recurrence section goes here. Syntax examples +follow; see also :ref:`ConfiguringScheduling` and :ref:`TriggerTypes`. + +- *type*: string +- *examples*: + + .. code-block:: cylc + + graph = """ + foo => bar => baz & waz # baz and waz both trigger off bar + foo[-P1D-PT6H] => bar # bar triggers off foo[-P1D-PT6H] + baz:out1 => faz # faz triggers off a message output of baz + X:start => Y # Y triggers if X starts executing + X:fail => Y # Y triggers if X fails + foo[-PT6H]:fail => bar # bar triggers if foo[-PT6H] fails + X => !Y # Y suicides if X succeeds + X | X:fail => Z # Z triggers if X succeeds or fails + X:finish => Z # Z triggers if X succeeds or fails + (A | B & C ) | D => foo # general conditional triggers + foo:submit => bar # bar triggers if foo is successfully submitted + foo:submit-fail => bar # bar triggers if submission of foo fails + # comment + """ + +- *default*: (none) + + +[runtime] +--------- + +This section is used to specify how, where, and what to execute when +tasks are ready to run. Common +configuration can be factored out in a multiple-inheritance hierarchy of +runtime namespaces that culminates in the tasks of the suite. Order of +precedence is determined by the C3 linearization algorithm as used to +find the *method resolution order* in Python language class +hierarchies. For details and examples see :ref:`NIORP`. + + +[runtime] ``->`` [[\_\_NAME\_\_]] +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Replace ``\_\_NAME\_\_`` with a namespace name, or a comma-separated list of +names, and repeat as needed to define all tasks in the suite. Names may +contain letters, digits, underscores, and hyphens. A namespace +represents a group or family of tasks if other namespaces inherit from +it, or a task if no others inherit from it. + +.. todo:: + follolwing para here is a '%' thing i.e. not ment to be seen /comment: + + Names may not contain colons (which would preclude use of directory paths + involving the registration name in ``$PATH`` variables). They + may not contain the "." character (it will be interpreted as the + namespace hierarchy delimiter, separating groups and names -huh?). + +- *legal values*: + - ``[[foo]]`` + - ``[[foo, bar, baz]]`` + +If multiple names are listed the subsequent settings apply to each. + +All namespaces inherit initially from *root*, which can be +explicitly configured to provide or override default settings +for all tasks in the suite. + + +[runtime] ``->`` [[\_\_NAME\_\_]] ``->`` extra log files +"""""""""""""""""""""""""""""""""""""""""""""""""""""""" + +A list of user-defined log files associated with a task. Files defined here +will appear alongside the default log files in the cylc GUI. Log files +must reside in the job log directory ``$CYLC_TASK_LOG_DIR`` and ideally +should be named using the ``$CYLC_TASK_LOG_ROOT`` prefix +(see :ref:`Task Job Script Variables`). + +- *type*: Comma-separated list of strings (log file names). +- *default*: (none) +- *example*: (job.custom-log-name) + + +[runtime] ``->`` [[\_\_NAME\_\_]] ``->`` inherit +"""""""""""""""""""""""""""""""""""""""""""""""" + +A list of the immediate parent(s) this namespace inherits from. If no +parents are listed ``root`` is assumed. + +- *type*: Comma-separated list of strings (parent namespace names). +- *default*: ``root`` + + +[runtime] ``->`` [[\_\_NAME\_\_]] ``->`` init-script +"""""""""""""""""""""""""""""""""""""""""""""""""""" + +Custom script invoked by the task job script before the task execution +environment is configured - so it does not have access to any suite or task +environment variables. It can be an external command or script, or inlined +scripting. The original intention for this item was to allow remote tasks to +source login scripts to configure their access to cylc, but this should no +longer be necessary (see :ref:`HowTasksGetAccessToCylc`). See also +``env-script``, ``err-script``, ``exit-script``, +``pre-script``, ``script``, and ``post-script``. + +- *type*: string +- *default*: (none) +- *example*: ``init-script = "echo Hello World"`` + + +[runtime] ``->`` [[\_\_NAME\_\_]] ``->`` env-script +""""""""""""""""""""""""""""""""""""""""""""""""""" + +Custom script invoked by the task job script between the cylc-defined environment +(suite and task identity, etc.) and the user-defined task runtime environment - +so it has access to the cylc environment (and the task environment has +access to variables defined by this scripting). It can be an external command +or script, or inlined scripting. See also ``init-script``, +``err-script``, ``exit-script``, ``pre-script``, +``script``, and ``post-script``. + +- *type*: string +- *default*: (none) +- *example*: ``env-script = "echo Hello World"`` + + +[runtime] ``->`` [[\_\_NAME\_\_]] ``->`` exit-script +"""""""""""""""""""""""""""""""""""""""""""""""""""" + +Custom script invoked at the very end of *successful* job execution, just +before the job script exits. It should execute very quickly. Companion of +``err-script``, which is executed on job failure. It can be an external +command or script, or inlined scripting. See also ``init-script``, +``env-script``, ``exit-script``, ``pre-script``, +``script``, and ``post-script``. + +- *type*: string +- *default*: (none) +- *example*: ``exit-script = "rm -f $TMP_FILES"`` + + +[runtime] ``->`` [[\_\_NAME\_\_]] ``->`` err-script +""""""""""""""""""""""""""""""""""""""""""""""""""" + +Custom script to be invoked at the end of the error trap, which is triggered +due to failure of a command in the task job script or trapable job kill. The +output of this will always be sent to STDERR and ``$1`` is set to the +name of the signal caught by the error trap. The script should be fast and use +very little system resource to ensure that the error trap can return quickly. +Companion of ``exit-script``, which is executed on job success. +It can be an external command or script, or inlined scripting. See also +``init-script``, ``env-script``, ``exit-script``, +``pre-script``, ``script``, and ``post-script``. + +- *type*: string +- *default*: (none) +- *example*: ``err-script = "printenv FOO"`` + + +[runtime] ``->`` [[\_\_NAME\_\_]] ``->`` pre-script +""""""""""""""""""""""""""""""""""""""""""""""""""" + +Custom script invoked by the task job script immediately before the ``script`` +item (just below). It can be an external command or script, or inlined scripting. +See also ``init-script``, ``env-script``, +``err-script``, ``exit-script``, ``script``, and +``post-script``. + +- *type*: string +- *default*: (none) +- *example*: + + .. code-block:: cylc + + pre-script = """ + . $HOME/.profile + echo Hello from suite ${CYLC_SUITE_NAME}!""" + + +.. _ScriptItem: + +[runtime] ``->`` [[\_\_NAME\_\_]] ``->`` script +""""""""""""""""""""""""""""""""""""""""""""""" + +The main custom script invoked from the task job script. It can be an +external command or script, or inlined scripting. See also +``init-script``, ``env-script``, ``err-script``, +``exit-script``, ``pre-script``, and ``post-script``. + +- *type*: string +- *root default*: (none) + + +[runtime] ``->`` [[\_\_NAME\_\_]] ``->`` post-script +"""""""""""""""""""""""""""""""""""""""""""""""""""" + +Custom script invoked by the task job script immediately after the +``script`` item (just above). It can be an external command or script, +or inlined scripting. See also +``init-script``, ``env-script``, ``err-script``, +``exit-script``, ``pre-script``, and ``script``. + +- *type*: string +- *default*: (none) + + +.. _worksubdirectory: + +[runtime] ``->`` [[\_\_NAME\_\_]] ``->`` work sub-directory +""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" + +Task job scripts are executed from within *work directories* created +automatically under the suite run directory. A task can get its own work +directory from ``$CYLC_TASK_WORK_DIR`` (or simply ``$PWD`` if +it does not ``cd`` elsewhere at runtime). The default directory +path contains task name and cycle point, to provide a unique workspace for +every instance of every task. If several tasks need to exchange files and +simply read and write from their from current working directory, this item +can be used to override the default to make them all use the same workspace. + +The top level share and work directory location can be changed (e.g. to a +large data area) by a global config setting (see :ref:`workdirectory`). + +- *type*: string (directory path, can contain environment variables) +- *default*: ``$CYLC_TASK_CYCLE_POINT/$CYLC_TASK_NAME`` +- *example*: ``$CYLC_TASK_CYCLE_POINT/shared/`` + +.. note:: + + If you omit cycle point from the work sub-directory path successive + instances of the task will share the same workspace. Consider the effect + on cycle point offset housekeeping of work directories before doing this. + + +[runtime] ``->`` [[\_\_NAME\_\_]] ``->`` [[[meta]]] +""""""""""""""""""""""""""""""""""""""""""""""""""" + +Section containing metadata items for this task or family namespace. +Several items (title, description, URL) are pre-defined and are used by +the GUI. Others can be user-defined and passed to task event handlers to be +interpreted according to your needs. For example, the value of an +"importance" item could determine how an event handler responds to task +failure events. + +Any suite meta item can now be passed to task event handlers by prefixing the +string template item name with "suite\_", for example: + +.. code-block:: cylc + + [runtime] + [[root]] + [[[events]]] + failed handler = send-help.sh %(suite_title)s %(suite_importance)s %(title)s + + +[runtime] ``->`` [[\_\_NAME\_\_]] ``->`` [[[meta]]] ``->`` title +'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +A single line description of this namespace. It is displayed by the +``cylc list`` command and can be retrieved from running tasks +with the ``cylc show`` command. + +- *type*: single line string +- *root default*: (none) + + +[runtime] ``->`` [[\_\_NAME\_\_]] ``->`` [[[meta]]] ``->`` description +'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +A multi-line description of this namespace, retrievable from running tasks with the +``cylc show`` command. + +- *type*: multi-line string +- *root default*: (none) + + +.. _TaskURL: + +[runtime] ``->`` [[\_\_NAME\_\_]] ``->`` [[[meta]]] ``->`` URL +'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +A web URL to task documentation for this suite. If present it can be browsed +with the ``cylc doc`` command, or by right-clicking on the task in +gcylc. The string templates ``%(suite_name)s`` and +``%(task_name)s`` will be replaced with the actual suite and task names. +See also :ref:`SuiteURL`. + +- *type*: string (URL) +- *default*: (none) +- *example*: you can set URLs to all tasks in a suite by putting + something like the following in the root namespace: + + .. code-block:: cylc + + [runtime] + [[root]] + [[[meta]]] + URL = http://my-site.com/suites/%(suite_name)s/%(task_name)s.html + +.. note:: + + URLs containing the comment delimiter ``#`` must be protected by quotes. + + +[runtime] ``->`` [[\_\_NAME\_\_]] ``->`` [[[meta]]] ``->`` \_\_MANY\_\_ +''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +Replace ``\_\_MANY\_\_`` with any user-defined metadata item. These, like title, +URL, etc. can be passed to task event handlers to be interpreted according to your +needs. For example, the value of an "importance" item could determine how an event +handler responds to task failure events. + +- *type*: String or integer +- *default*: (none) +- *example*: + + .. code-block:: cylc + + [runtime] + [[root]] + [[[meta]]] + importance = high + color = red + + +[runtime] ``->`` [[\_\_NAME\_\_]] ``->`` [[[job]]] +"""""""""""""""""""""""""""""""""""""""""""""""""" + +This section configures the means by which cylc submits task job scripts +to run. + + +.. _RuntimeJobSubMethods: + +[runtime] ``->`` [[\_\_NAME\_\_]] ``->`` [[[job]]] ``->`` batch system +'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +See :ref:`TaskJobSubmission` for how job submission works, and how to define +new handlers for different batch systems. Cylc has a number of built in batch +system handlers: + +- *type*: string +- *legal values*: + + - ``background`` - invoke a child process + - ``at`` - the rudimentary Unix ``at`` scheduler + - ``loadleveler`` - IBM LoadLeveler ``llsubmit``, with directives + defined in the suite.rc file + - ``lsf`` - IBM Platform LSF ``bsub``, with directives defined in the + suite.rc file + - ``pbs`` - PBS ``qsub``, with directives defined in the suite.rc file + - ``sge`` - Sun Grid Engine ``qsub``, with directives defined in the + suite.rc file + - ``slurm`` - Simple Linux Utility for Resource Management ``sbatch``, with + directives defined in the suite.rc file + - ``moab`` - Moab workload manager ``msub``, with directives defined in the + suite.rc file + +- *default*: ``background`` + + +[runtime] ``->`` [[\_\_NAME\_\_]] ``->`` [[[job]]] ``->`` execution time limit +'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +Specify the execution wall clock limit for a job of the task. +For ``background`` and ``at``, the job script will be invoked using the ``timeout`` +command. For other batch systems, the specified time will be automatically +translated into the equivalent directive for wall clock limit. + +Tasks are polled multiple times, where necessary, when they exceed their +execution time limits. (See :ref:`ExecutionTimeLimitPollingIntervals` for +how to configure the polling intervals). + + - *type*: ISO 8601 duration/interval representation + - *example*: ``PT5M``, 5 minutes, ``PT1H``, 1 hour + - *default*: (none) + + +[runtime] ``->`` [[\_\_NAME\_\_]] ``->`` [[[job]]] ``->`` batch submit command template +''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +This allows you to override the actual command used by the chosen batch +system. The template's ``\%(job)s`` will be substituted by the +job file path. + +- *type*: string +- *legal values*: a string template +- *example*: ``llsubmit \%(job)s`` + + +.. _JobSubShell: + +[runtime] ``->`` [[\_\_NAME\_\_]] ``->`` [[[job]]] ``->`` shell +''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +Location of the command used to interpret the job script submitted by the suite +server program when a task is ready to run. This can be set to the location of +``bash`` in the job host if the shell is not installed in the standard +location. + +.. note:: + + It has no bearing on any sub-shells that may be called by the job script. + +Setting this to the path of a ksh93 interpreter is deprecated. Support of which +will be withdrawn in a future cylc release. Setting this to any other shell is +not supported. + +- *type*: string +- *root default*: ``/bin/bash`` + + +.. _JobSubRefRetries: + +[runtime] ``->`` [[\_\_NAME\_\_]] ``->`` [[[job]]] ``->`` submission retry delays +''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +A list of duration (in ISO 8601 syntax), after which to resubmit if job +submission fails. + +- *type*: Comma-separated list of ISO 8601 duration/interval + representations, optionally *preceded* by multipliers. +- *example*: ``PT1M,3*PT1H, P1D`` is equivalent to + ``PT1M, PT1H, PT1H, PT1H, P1D`` - 1 minute, 1 hour, 1 hour, 1 + hour, 1 day. +- *default*: (none) + + +.. _RefRetries: + +[runtime] ``->`` [[\_\_NAME\_\_]] ``->`` [[[job]]] ``->`` execution retry delays +'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +See also :ref:`TaskRetries`. + +A list of ISO 8601 time duration/intervals after which to resubmit the task +if it fails. The variable ``$CYLC_TASK_TRY_NUMBER`` in the task +execution environment is incremented each time, starting from 1 for the +first try - this can be used to vary task behaviour by try number. + +- *type*: Comma-separated list of ISO 8601 duration/interval representations, + optionally *preceded* by multipliers. +- *example*: ``PT1.5M,3*PT10M`` is equivalent to + ``PT1.5M, PT10M, PT10M, PT10M`` - 1.5 minutes, 10 minutes, 10 minutes, 10 minutes. +- *default*: (none) + + +.. _SubmissionPollingIntervals: + +[runtime] ``->`` [[\_\_NAME\_\_]] ``->`` [[[job]]] ``->`` submission polling intervals +'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +A list of intervals, expressed as ISO 8601 duration/intervals, with optional +multipliers, after which cylc will poll for status while the task is in the +submitted state. + +For the polling task communication method this overrides the default +submission polling interval in the site/user config files +(:ref:`SiteAndUserConfiguration`). For default and ssh task communications, +polling is not done by default but it can still be configured here as a +regular check on the health of submitted tasks. + +Each list value is used in turn until the last, which is used repeatedly +until finished. + +- *type*: Comma-separated list of ISO 8601 duration/interval + representations, optionally *preceded* by multipliers. +- *example*: ``PT1M,3*PT1H, PT1M`` is equivalent to + ``PT1M, PT1H, PT1H, PT1H, PT1M`` - 1 minute, 1 hour, 1 hour, 1 + hour, 1 minute. +- *default*: (none) + +A single interval value is probably appropriate for submission polling. + + +.. _ExecutionPollingIntervals: + +[runtime] ``->`` [[\_\_NAME\_\_]] ``->`` [[[job]]] ``->`` execution polling intervals +''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +A list of intervals, expressed as ISO 8601 duration/intervals, with optional +multipliers, after which cylc will poll for status while the task is in the +running state. + +For the polling task communication method this overrides the default +execution polling interval in the site/user config files +(:ref:`SiteAndUserConfiguration`). For default and ssh task communications, +polling is not done by default but it can still be configured here as a +regular check on the health of submitted tasks. + +Each list value is used in turn until the last, which is used repeatedly +until finished. + +- *type*: Comma-separated list of ISO 8601 duration/interval + representations, optionally *preceded* by multipliers. +- *example*: ``PT1M,3*PT1H, PT1M`` is equivalent to + ``PT1M, PT1H, PT1H, PT1H, PT1M`` - 1 minute, 1 hour, 1 hour, 1 + hour, 1 minute. +- *default*: (none) + + +[runtime] ``->`` [[\_\_NAME\_\_]] ``->`` [[[remote]]] +""""""""""""""""""""""""""""""""""""""""""""""""""""" + +Configure host and username, for tasks that do not run on the suite host +account. Non-interactive ssh is used to submit the task by the configured +batch system, so you must distribute your ssh key to allow +this. Cylc must be installed on task remote accounts, but no external +software dependencies are required there. + + +.. _DynamicHostSelection: + +[runtime] ``->`` [[\_\_NAME\_\_]] ``->`` [[[remote]]] ``->`` host +''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +The remote host for this namespace. This can be a static hostname, an +environment variable that holds a hostname, or a command that prints a +hostname to stdout. Host selection commands are executed just prior to +job submission. The host (static or dynamic) may have an entry in the +cylc site or user config file to specify parameters such as the location +of cylc on the remote machine; if not, the corresponding local settings +(on the suite host) will be assumed to apply on the remote host. + +- *type*: string (a valid hostname on the network) +- *default*: (none) +- *examples*: + + - static host name: ``host = foo`` + - fully qualified: ``host = foo.bar.baz`` + - dynamic host selection: + + - shell command (1): ``host = $(host-selector.sh)`` + - shell command (2): ``host = \`host-selector.sh\``` + - environment variable: ``host = $MY_HOST`` + + +[runtime] ``->`` [[\_\_NAME\_\_]] ``->`` [[[remote]]] ``->`` owner +'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +The username of the task host account. This is (only) used in the +non-interactive ssh command invoked by the suite server program to submit the +remote task (consequently it may be defined using local environment variables +(i.e. the shell in which cylc runs, and ``[cylc] -> [[environment]]``). + +If you use dynamic host selection and have different usernames on +the different selectable hosts, you can configure your +``$HOME/.ssh/config`` to handle username translation. + +- *type*: string (a valid username on the remote host) +- *default*: (none) + + +.. _runtime-remote-retrieve-job-logs: + +[runtime] ``->`` [[\_\_NAME\_\_]] ``->`` [[[remote]]] ``->`` retrieve job logs +'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +Remote task job logs are saved to the suite run directory on the task host, not +on the suite host. If you want the job logs pulled back to the suite host +automatically, you can set this item to ``True``. The suite will +then attempt to ``rsync`` the job logs once from the remote host each +time a task job completes. E.g. if the job file is +``~/cylc-run/tut.oneoff.remote/log/job/1/hello/01/job``, anything under +``~/cylc-run/tut.oneoff.remote/log/job/1/hello/01/`` will be retrieved. + +- *type*: boolean +- *default*: False + + +.. _runtime-remote-retrieve-job-logs-max-size: + +[runtime] ``->`` [[\_\_NAME\_\_]] ``->`` [[[remote]]] ``->`` retrieve job logs max size +''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +If the disk space of the suite host is limited, you may want to set the maximum +sizes of the job log files to retrieve. The value can be anything that is +accepted by the ``--max-size=SIZE`` option of the ``rsync`` command. + +- *type*: string +- *default*: None + + +.. _runtime-remote-retrieve-job-logs-retry-delays: + +[runtime] ``->`` [[\_\_NAME\_\_]] ``->`` [[[remote]]] ``->`` retrieve job logs retry delays +''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +Some batch systems have considerable delays between the time when the job +completes and when it writes the job logs in its normal location. If this is +the case, you can configure an initial delay and some retry delays between +subsequent attempts. The default behaviour is to attempt once without any delay. + +- *type*: Comma-separated list of ISO 8601 duration/interval representations, optionally + *preceded* by multipliers. +- *default*: (none) +- *example*: ``retrieve job logs retry delays = PT10S, PT1M, PT5M`` + + +[runtime] ``->`` [[\_\_NAME\_\_]] ``->`` [[[remote]]] ``->`` suite definition directory +'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +The path to the suite configuration directory on the remote account, needed if +remote tasks require access to files stored there (via +``$CYLC_SUITE_DEF_PATH``) or in the suite bin directory (via +``$PATH``). If this item is not defined, the local suite +configuration directory path will be assumed, with the suite owner's home +directory, if present, replaced by ``'$HOME'`` for +interpretation on the remote account. + +- *type*: string (a valid directory path on the remote account) +- *default*: (local suite configuration path with ``$HOME`` replaced) + + +.. _TaskEventHandling: + +[runtime] ``->`` [[\_\_NAME\_\_]] ``->`` [[[events]]] +""""""""""""""""""""""""""""""""""""""""""""""""""""" + +Cylc can call nominated event handlers when certain task events occur. This +section configures specific task event handlers; see :ref:`SuiteEventHandling` +for suite events. + +Event handlers can be located in the suite ``bin/`` directory, +otherwise it is up to you to ensure their location is in ``$PATH`` (in +the shell in which the suite server program runs). They should require little +resource to run and return quickly. + +Each task event handler can be specified as a list of command lines or command +line templates. They can contain any or all of the following patterns, which +will be substituted with actual values: + +- \%(event)s: event name +- \%(suite)s: suite name +- \%(suite\_uuid)s: suite UUID string +- \%(point)s: cycle point +- \%(name)s: task name +- \%(submit\_num)s: submit number +- \%(try\_num)s: try number +- \%(id)s: task ID (i.e. \%(name)s.\%(point)s) +- \%(batch\_sys\_name)s: batch system name +- \%(batch\_sys\_job\_id)s: batch system job ID +- \%(message)s: event message, if any +- any task [meta] item, e.g.: + - \%(title)s: task title + - \%(URL)s: task URL + - \%(importance)s - example custom task metadata +- any suite [meta] item, prefixed with "suite\_", e.g.: + - \%(suite\_title)s: suite title + - \%(suite\_URL)s: suite URL + - \%(suite\_rating)s - example custom suite metadata + +Otherwise, the command line will be called with the following default +arguments: + +.. code-block:: none + + %(event)s %(suite)s %(id)s %(message)s + +.. note:: + + Substitution patterns should not be quoted in the template strings. + This is done automatically where required. + +For an explanation of the substitution syntax, see +`String Formatting Operations in the Python +documentation `_. + +Additional information can be passed to event handlers via the +``[cylc] -> [[environment]]`` (but not via task +runtime environments - event handlers are not called by tasks). + + +[runtime] ``->`` [[\_\_NAME\_\_]] ``->`` [[[events]]] ``->`` EVENT handler +'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +A list of one or more event handlers to call when one of the following EVENTs occurs: + +- **submitted** - the job submit command was successful +- **submission failed** - the job submit command failed, or the + submitted job was killed before it started executing +- **submission retry** - job submit failed, but cylc will resubmit it + after a configured delay +- **submission timeout** - the submitted job timed out without commencing execution +- **started** - the task reported commencement of execution +- **succeeded** - the task reported that it completed successfully +- **failed** - the task reported that if tailed to complete successfully +- **retry** - the task failed, but cylc will resubmit it + after a configured delay +- **execution timeout** - the task timed out after execution commenced +- **warning** - the task reported a WARNING severity message +- **critical** - the task reported a CRITICAL severity message +- **custom** - the task reported a CUSTOM severity message +- **late** - the task is never active and is late + +Item details: +- *type*: Comma-separated list of strings (event handler scripts). +- *default*: None +- *example*: ``failed handler = my-failed-handler.sh`` + + +.. _runtime-event-hooks-submission-timeout: + +[runtime] ``->`` [[\_\_NAME\_\_]] ``->`` [[[events]]] ``->`` submission timeout +''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +If a task has not started after the specified ISO 8601 duration/interval, the +*submission timeout* event handler(s) will be called. + +- *type*: ISO 8601 duration/interval representation (e.g. + ``PT30M``, 30 minutes or ``P1D``, 1 day). +- *default*: (none) + + +.. _runtime-event-hooks-execution-timeout: + +[runtime] ``->`` [[\_\_NAME\_\_]] ``->`` [[[events]]] ``->`` execution timeout +'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +If a task has not finished after the specified ISO 8601 duration/interval, the +*execution timeout* event handler(s) will be called. + +- *type*: ISO 8601 duration/interval representation (e.g. + ``PT4H``, 4 hours or ``P1D``, 1 day). +- *default*: (none) + + +[runtime] ``->`` [[\_\_NAME\_\_]] ``->`` [[[events]]] ``->`` handlers +''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +Specify a list of command lines or command line templates as task event handlers. + +- *type*: Comma-separated list of strings (event handler command line or command + line templates). +- *default*: (none) +- *example*: ``handlers = my-handler.sh`` + + +[runtime] ``->`` [[\_\_NAME\_\_]] ``->`` [[[events]]] ``->`` handler events +''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +Specify the events for which the general task event handlers should be invoked. + +- *type*: Comma-separated list of events +- *default*: (none) +- *example*: ``handler events = submission failed, failed`` + + +.. _runtime-events-handler-retry-delays: + +[runtime] ``->`` [[\_\_NAME\_\_]] ``->`` [[[events]]] ``->`` handler retry delays +''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +Specify an initial delay before running an event handler command and any retry +delays in case the command returns a non-zero code. The default behaviour is to +run an event handler command once without any delay. + +- *type*: Comma-separated list of ISO 8601 duration/interval representations, + optionally *preceded* by multipliers. +- *default*: (none) +- *example*: ``handler retry delays = PT10S, PT1M, PT5M`` + + +[runtime] ``->`` [[\_\_NAME\_\_]] ``->`` [[[events]]] ``->`` mail events +'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +Specify the events for which notification emails should be sent. + +- *type*: Comma-separated list of events +- *default*: (none) +- *example*: ``mail events = submission failed, failed`` + + +[runtime] ``->`` [[\_\_NAME\_\_]] ``->`` [[[events]]] ``->`` mail from +'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +Specify an alternate ``from:`` email address for event notifications. + +- *type*: string +- *default*: None, (notifications@HOSTNAME) +- *example*: ``mail from = no-reply@your-org`` + + +[runtime] ``->`` [[\_\_NAME\_\_]] ``->`` [[[events]]] ``->`` mail retry delays +'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +Specify an initial delay before running the mail notification command and any +retry delays in case the command returns a non-zero code. The default behaviour +is to run the mail notification command once without any delay. + +- *type*: Comma-separated list of ISO 8601 duration/interval representations, + optionally *preceded* by multipliers. +- *default*: (none) +- *example*: ``mail retry delays = PT10S, PT1M, PT5M`` + + +[runtime] ``->`` [[\_\_NAME\_\_]] ``->`` [[[events]]] ``->`` mail smtp +'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +Specify the SMTP server for sending email notifications. + +- *type*: string +- *default*: None, (localhost:25) +- *example*: ``mail smtp = smtp.yourorg`` + + +[runtime] ``->`` [[\_\_NAME\_\_]] ``->`` [[[events]]] ``->`` mail to +'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +A list of email addresses to send task event notifications. The list can be +anything accepted by the ``mail`` command. + +- *type*: string +- *default*: None, (USER@HOSTNAME) +- *example*: ``mail to = your.colleague`` + + +[runtime] ``->`` [[\_\_NAME\_\_]] ``->`` [[[environment]]] +"""""""""""""""""""""""""""""""""""""""""""""""""""""""""" + +The user defined task execution environment. Variables defined here can +refer to cylc suite and task identity variables, which are exported +earlier in the task job script, and variable assignment expressions can +use cylc utility commands because access to cylc is also configured +earlier in the script. See also :ref:`TaskExecutionEnvironment`. + + +.. _AppendixTaskExecutionEnvironment: + +[runtime] ``->`` [[\_\_NAME\_\_]] ``->`` [[[environment]]] ``->`` \_\_VARIABLE\_\_ +'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +Replace ``\_\_VARIABLE\_\_`` with any number of environment variable +assignment expressions. Order of definition is preserved so values can +refer to previously defined variables. Values are passed through to the task +job script without evaluation or manipulation by cylc, so any variable assignment +expression that is legal in the job submission shell can be used. +White space around the ``=`` is allowed (as far as cylc's suite.rc +parser is concerned these are just normal configuration items). + +- *type*: string +- *default*: (none) +- *legal values*: depends to some extent on the task job + submission shell (:ref:`JobSubShell`). +- *examples*, for the bash shell: + + - ``FOO = $HOME/bar/baz`` + - ``BAR = ${FOO}$GLOBALVAR`` + - ``BAZ = $( echo "hello world" )`` + - ``WAZ = ${FOO%.jpg}.png`` + - ``NEXT_CYCLE = $( cylc cycle-point --offset=PT6H )`` + - ``PREV_CYCLE = \`cylc cycle-point --offset=-PT6H``` + - ``ZAZ = "${FOO#bar}" # <-- QUOTED to escape the suite.rc comment character`` + + +.. _EnvironmentFilter: + +[runtime] ``->`` [[\_\_NAME\_\_]] ``->`` [[[environment filter]]] +""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" + +This section contains environment variable inclusion and exclusion +lists that can be used to filter the inherited environment. *This is +not intended as an alternative to a well-designed inheritance hierarchy +that provides each task with just the variables it needs.* Filters can, +however, improve suites with tasks that inherit a lot of environment +they don't need, by making it clear which tasks use which variables. +They can optionally be used routinely as explicit "task environment +interfaces" too, at some cost to brevity, because they guarantee that +variables filtered out of the inherited task environment are not used. + +.. note:: + + Environment filtering is done after inheritance is completely + worked out, not at each level on the way, so filter lists in higher-level + namespaces only have an effect if they are not overridden by descendants. + + +[runtime] ``->`` [[\_\_NAME\_\_]] ``->`` [[[environment filter]]] ``->`` include +'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +If given, only variables named in this list will be included from the +inherited environment, others will be filtered out. Variables may also +be explicitly excluded by an ``exclude`` list. + +- *type*: Comma-separated list of strings (variable names). +- *default*: (none) + + +[runtime] ``->`` [[\_\_NAME\_\_]] ``->`` [[[environment filter]]] ``->`` exclude +'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +Variables named in this list will be filtered out of the inherited +environment. Variables may also be implicitly excluded by +omission from an ``include`` list. + +- *type*: Comma-separated list of strings (variable names). +- *default*: (none) + + +[runtime] ``->`` [[\_\_NAME\_\_]] ``->`` [[[parameter environment templates]]] +"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" + +The user defined task execution parameter environment templates. This is only +relevant for *parameterized tasks* - see :ref:`Parameterized Tasks Label`. + + +[runtime] ``->`` [[\_\_NAME\_\_]] ``->`` [[[parameter environment templates]]] ``->`` \_\_VARIABLE\_\_ +'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +Replace ``\_\_VARIABLE\_\_`` with pairs of environment variable +name and Python string template for parameter substitution. This is only +relevant for *parameterized tasks* - see :ref:`Parameterized Tasks Label`. + +If specified, in addition to the standard ``CYLC\_TASK\_PARAM\_`` +variables, the job script will also export the named variables specified +here, with the template strings substituted with the parameter values. + +- *type*: string +- *default*: (none) +- *legal values*: name=string template pairs +- *examples*, for the bash shell: + + - ``MYNUM=%(i)d`` + - ``MYITEM=%(item)s`` + - ``MYFILE=/path/to/%(i)03d/%(item)s`` + + +[runtime] ``->`` [[\_\_NAME\_\_]] ``->`` [[[directives]]] +""""""""""""""""""""""""""""""""""""""""""""""""""""""""" + +Batch queue scheduler directives. Whether or not these are used depends +on the batch system. For the built-in methods that support directives +(``loadleveler``, ``lsf``, ``pbs``, ``sge``, +``slurm``, ``moab``), directives are written to the top of the +task job script in the correct format for the method. Specifying directives +individually like this allows use of default directives that can be +individually overridden at lower levels of the runtime namespace hierarchy. + + +[runtime] ``->`` [[\_\_NAME\_\_]] ``->`` [[[directives]]] ``->`` \_\_DIRECTIVE\_\_ +'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +Replace ``\_\_DIRECTIVE\_\_`` with each directive assignment, e.g. +``class = parallel``. + +- *type*: string +- *default*: (none) + +Example directives for the built-in batch system handlers are shown +in :ref:`AvailableMethods`. + + +[runtime] ``->`` [[\_\_NAME\_\_]] ``->`` [[[outputs]]] +"""""""""""""""""""""""""""""""""""""""""""""""""""""" + +Register custom task outputs for use in message triggering in this section +(:ref:`MessageTriggers`) + + +[runtime] ``->`` [[\_\_NAME\_\_]] ``->`` [[[outputs]]] ``->`` \_\_OUTPUT\_\_ +'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +Replace ``\_\_OUTPUT\_\_`` with one or more custom task output messages +(:ref:`MessageTriggers`). The item name is used to select the custom output +message in graph trigger notation. + +- *type*: string +- *default*: (none) +- *examples*: + + .. code-block:: cylc + + out1 = "sea state products ready" + out2 = "NWP restart files completed" + + + +[runtime] ``->`` [[\_\_NAME\_\_]] ``->`` [[[suite state polling]]] +"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" + +Configure automatic suite polling tasks as described +in :ref:`SuiteStatePolling`. The +items in this section reflect the options and defaults of the +``cylc suite-state`` command, except that the target suite name and the +``--task``, ``--cycle``, and ``--status`` options are +taken from the graph notation. + + +[runtime] ``->`` [[\_\_NAME\_\_]] ``->`` [[[suite state polling]]] ``->`` run-dir +''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +For your own suites the run database location is determined by your +site/user config. For other suites, e.g. those owned by others, or +mirrored suite databases, use this item to specify the location +of the top level cylc run directory (the database should be a +suite-name sub-directory of this location). + +- *type*: string (a directory path on the target suite host) +- *default*: as configured by site/user config (for your own suites) + + +[runtime] ``->`` [[\_\_NAME\_\_]] ``->`` [[[suite state polling]]] ``->`` interval +'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +Polling interval expressed as an ISO 8601 duration/interval. + +- *type*: ISO 8601 duration/interval representation (e.g. + ``PT10S``, 10 seconds, or ``PT1M``, 1 minute). +- *default*: PT1M + + +[runtime] ``->`` [[\_\_NAME\_\_]] ``->`` [[[suite state polling]]] ``->`` max-polls +''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +The maximum number of polls before timing out and entering the "failed" state. + +- *type*: integer +- *default*: 10 + + +[runtime] ``->`` [[\_\_NAME\_\_]] ``->`` [[[suite state polling]]] ``->`` user +'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +Username of an account on the suite host to which you have access. The +polling ``cylc suite-state`` command will be invoked +on the remote account. + +- *type*: string (username) +- *default*: (none) + + +[runtime] ``->`` [[\_\_NAME\_\_]] ``->`` [[[suite state polling]]] ``->`` host +'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +The hostname of the target suite. The polling ``cylc suite-state`` command +will be invoked on the remote account. + +- *type*: string (hostname) +- *default*: (none) + + +[runtime] ``->`` [[\_\_NAME\_\_]] ``->`` [[[suite state polling]]] ``->`` message +''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +Wait for the target task in the target suite to receive a specified message +rather than achieve a state. + +- *type*: string (the message) +- *default*: (none) + + +[runtime] ``->`` [[\_\_NAME\_\_]] ``->`` [[[suite state polling]]] ``->`` verbose +''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +Run the polling ``cylc suite-state`` command in verbose output mode. + +- *type*: boolean +- *default*: False + + +.. _suiterc-sim-config: + +[runtime] ``->`` [[\_\_NAME\_\_]] ``->`` [[[simulation]]] +""""""""""""""""""""""""""""""""""""""""""""""""""""""""" + +Task configuration for the suite *simulation* and *dummy* run modes +described in :ref:`SimulationMode`. + + +[runtime] ``->`` [[\_\_NAME\_\_]] ``->`` [[[simulation]]] ``->`` default run length +''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +The default simulated job run length, if ``[job]execution time limit`` +and ``[simulation]speedup factor`` are not set. + +- *type*: ISO 8601 duration/interval representation (e.g. + ``PT10S``, 10 seconds, or ``PT1M``, 1 minute). +- *default*: ``PT10S`` + + +[runtime] ``->`` [[\_\_NAME\_\_]] ``->`` [[[simulation]]] ``->`` speedup factor +''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +If ``[job]execution time limit`` is set, the task simulated run length +is computed by dividing it by this factor. + +- *type*: float +- *default*: (none) - i.e. do not use proportional run length +- *example*: ``10.0`` + + +[runtime] ``->`` [[\_\_NAME\_\_]] ``->`` [[[simulation]]] ``->`` time limit buffer +'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +For dummy jobs, a new ``[job]execution time limit`` is set to the +simulated task run length plus this buffer interval, to avoid job kill due to +exceeding the time limit. + +- *type*: ISO 8601 duration/interval representation (e.g. + ``PT10S``, 10 seconds, or ``PT1M``, 1 minute). +- *default*: PT10S + + +[runtime] ``->`` [[\_\_NAME\_\_]] ``->`` [[[simulation]]] ``->`` fail cycle points +'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +Configure simulated or dummy jobs to fail at certain cycle points. + +- *type*: list of strings (cycle points), or *all* +- *default*: (none) - no instances of the task will fail +- *examples*: + - ``all`` - all instance of the task will fail + - ``2017-08-12T06, 2017-08-12T18`` - these instances of the task will fail + + +[runtime] ``->`` [[\_\_NAME\_\_]] ``->`` [[[simulation]]] ``->`` fail try 1 only +'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +If this is set to ``True`` only the first run of the task instance will +fail, otherwise retries will fail too. + +- *type*: boolean +- *default*: ``True`` + + +[runtime] ``->`` [[\_\_NAME\_\_]] ``->`` [[[simulation]]] ``->`` disable task event handlers +'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +If this is set to ``True`` configured task event handlers will not be called +in simulation or dummy modes. + +- *type*: boolean +- *default*: ``True`` + + +[visualization] +--------------- + +Configuration of suite graphing for the ``cylc graph`` command (graph +extent, styling, and initial family-collapsed state) and the gcylc graph view +(initial family-collapsed state). See the +`Graphviz documentation of node shapes `_. + + +[visualization] ``->`` initial cycle point +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The initial cycle point for graph plotting. + +- *type*: ISO 8601 date-time representation (e.g. CCYYMMDDThhmm) +- *default*: the suite initial cycle point + +The visualization initial cycle point gets adjusted up if necessary to the +suite initial cycling point. + + +[visualization] ``->`` final cycle point +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +An explicit final cycle point for graph plotting. If used, this overrides the +preferred *number of cycle points* (below). + +- *type*: ISO 8601 date-time representation (e.g. CCYYMMDDThhmm) +- *default*: (none) + +The visualization final cycle point gets adjusted down if necessary to the +suite final cycle point. + + +[visualization] ``->`` number of cycle points +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The number of cycle points to graph starting from the visualization initial +cycle point. This is the preferred way of defining the graph end point, but +it can be overridden by an explicit *final cycle point* (above). + +- *type*: integer +- *default*: 3 + + +[visualization] ``->`` collapsed families +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +A list of family (namespace) names to be shown in the collapsed state +(i.e. the family members will be replaced by a single family node) when +the suite is first plotted in the graph viewer or the gcylc graph view. +If this item is not set, the default is to collapse all families at first. +Interactive GUI controls can then be used to group and ungroup family +nodes at will. + +- *type*: Comma-separated list of family names. +- *default*: (none) + + +[visualization] ``->`` use node color for edges +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Plot graph edges (dependency arrows) with the same color as the upstream +node, otherwise default to black. + +- *type*: boolean +- *default*: False + + +[visualization] ``->`` use node fillcolor for edges +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Plot graph edges (i.e. dependency arrows) with the same fillcolor as the +upstream node, if it is filled, otherwise default to black. + +- *type*: boolean +- *default*: False + + +[visualization] ``->`` node penwidth +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Line width of node shape borders. + +- *type*: integer +- *default*: 2 + + +[visualization] ``->`` edge penwidth +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Line width of graph edges (dependency arrows). + +- *type*: integer +- *default*: 2 + + +[visualization] ``->`` use node color for labels +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Graph node labels can be printed in the same color as the node outline. + +- *type*: boolean +- *default*: False + + +[visualization] ``->`` default node attributes +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Set the default attributes (color and style etc.) of graph nodes (tasks and families). +Attribute pairs must be quoted to hide the internal ``=`` character. + +- *type*: Comma-separated list of quoted ``'attribute=value'`` pairs. +- *legal values*: see graphviz or pygraphviz documentation +- *default*: ``'style=filled', 'fillcolor=yellow', 'shape=box'`` + + +[visualization] ``->`` default edge attributes +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Set the default attributes (color and style etc.) of graph edges +(dependency arrows). Attribute pairs must be quoted to hide the +internal ``=`` character. + +- *type*: Comma-separated list of quoted ``'attribute=value'`` pairs. +- *legal values*: see graphviz or pygraphviz documentation +- *default*: ``'color=black'`` + + +[visualization] ``->`` [[node groups]] +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Define named groups of graph nodes (tasks and families) which can styled +en masse, by name, in ``[visualization] -> [[node attributes]]``. +Node groups are automatically defined for all task families, including +root, so you can style family and member nodes at once by family name. + + +[visualization] ``->`` [[node groups]] ``->`` __GROUP__ +""""""""""""""""""""""""""""""""""""""""""""""""""""""" + +Replace ``\_\_GROUP\_\_`` with each named group of tasks or families. + +- *type*: Comma-separated list of task or family names. +- *default*: (none) +- *example*: + + - PreProc = foo, bar + - PostProc = baz, waz + + +[visualization] ``->`` [[node attributes]] +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Here you can assign graph node attributes to specific nodes, or to all +members of named groups defined in ``[visualization] -> [[node groups]]``. +Task families are automatically node groups. Styling of a +family node applies to all member nodes (tasks and sub-families), but +precedence is determined by ordering in the suite configuration. For +example, if you style a family red and then one of its members green, +cylc will plot a red family with one green member; but if you style one +member green and then the family red, the red family styling will +override the earlier green styling of the member. + + +[visualization] ``->`` [[node attributes]] ``->`` \_\_NAME\_\_ +"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" + +Replace ``\_\_NAME\_\_`` with each node or node group for style attribute +assignment. + +- *type*: Comma-separated list of quoted ``'attribute=value'`` pairs. +- *legal values*: see the Graphviz or PyGraphviz documentation +- *default*: (none) +- *example* (with reference to the node groups defined above): + + - PreProc = 'style=filled', 'fillcolor=orange' + - PostProc = 'color=red' + - foo = 'style=filled' diff --git a/doc/src/conf.py b/doc/src/conf.py new file mode 100644 index 00000000000..df64e80c0f9 --- /dev/null +++ b/doc/src/conf.py @@ -0,0 +1,157 @@ +# THIS FILE IS PART OF THE CYLC SUITE ENGINE. +# Copyright (C) 2008-2019 NIWA & British Crown (Met Office) & Contributors. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +import sys +import os +from cylc.version import CYLC_VERSION + + +# -- General configuration ------------------------------------------------ + +# minimal Sphinx version required. +needs_sphinx = '1.5.3' + +# Sphinx extension module names. +sys.path.append(os.path.abspath('custom')) # path to custom extensions. +extensions = [ + 'sphinx.ext.autodoc', + 'sphinx.ext.doctest', + 'sphinx.ext.intersphinx', + 'sphinx.ext.todo', + 'cylc_lang', +] + +# Add any paths that contain templates. +templates_path = ['_templates'] + +# The suffix of source filenames. +source_suffix = '.rst' + +# The master toctree document. +master_doc = 'index' + +# General information about the project. +project = u'The Cylc Suite Engine' +copyright = u'2008-2019 NIWA & British Crown (Met Office) & Contributors' + +# Versioning information. Sphinx advises version strictly meaning X.Y. +version = '.'.join(CYLC_VERSION.split('.')[:2]) # The short X.Y version. +release = CYLC_VERSION # The full version, including alpha/beta/rc tags. + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +exclude_patterns = ['_build'] + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = 'manni' + +# Enable automatic numbering of any captioned figures, tables & code blocks. +numfig = True +numfig_secnum_depth = 0 + + +# -- Options for HTML output ---------------------------------------------- + +# The builtin HTML theme to build upon, with customisations to it. Notably +# customise with a white 'sticky' sidebar; make headings & links text the Cylc +# logo colours & make code block background the logo green made much lighter. +html_theme = "classic" +html_theme_options = { + "stickysidebar": True, + "sidebarwidth": 250, + + "relbarbgcolor": "black", + "footerbgcolor": "black", + "sidebarbgcolor": "white", + "sidebartextcolor": "black", + "sidebarlinkcolor": "#00B3FD", + "headbgcolor": "white", + "headtextcolor": "#FF5966", + "linkcolor": "#00C697", + "visitedlinkcolor": "#00C697", + "headlinkcolor": "#00C697", + "codebgcolor": "#ebf9f6", +} + +# Custom sidebar templates, maps document names to template names. +html_sidebars = { + '**': ['globaltoc.html', 'searchbox.html', 'sourcelink.html'], + 'using/windows': ['windowssidebar.html', 'searchbox.html'], +} + +# Logo and favicon to display. +html_logo = "graphics/png/orig/cylc-logo.png" +# sphinx specifies this should be .ico format +html_favicon = "graphics/cylc-favicon.ico" + +# Disable timestamp otherwise inserted at bottom of every page. +html_last_updated_fmt = '' + +# Remove "Created using Sphinx" text in footer. +html_show_sphinx = False + +# Output file base name for HTML help builder. +htmlhelp_basename = 'cylcdoc' + + +# -- Options for LaTeX output --------------------------------------------- + +latex_elements = { +'papersize': 'a4paper', +'pointsize': '11pt', +} + +# Grouping the document tree into LaTeX files. List of tuples +# (source start file, target name, title, +# author, documentclass [howto, manual, or own class]). +latex_documents = [ + ('index', 'cylc.tex', u'Cylc Documentation', + u'2008-2019 NIWA & British Crown (Met Office) & Contributors', 'manual'), +] + +# Image file to place at the top of the title page. +latex_logo = "graphics/png/orig/cylc-logo.png" + +# If true, show URL addresses after external links. +latex_show_urls = "footnote" + + +# -- Options for manual page output --------------------------------------- + +# One entry per manual page. List of tuples +# (source start file, name, description, authors, manual section). +man_pages = [ + ('index', 'cylc', u'cylc Documentation', + u'2008-2019 NIWA & British Crown (Met Office) & Contributors', 1), +] + +# If true, show URL addresses after external links. +man_show_urls = True + + +# -- Options for Texinfo output ------------------------------------------- + +# Grouping the document tree into Texinfo files. List of tuples +# (source start file, target name, title, author, +# dir menu entry, description, category) +texinfo_documents = [ + ('index', 'cylc', u'Cylc Documentation', + u'2008-2019 NIWA & British Crown (Met Office) & Contributors', + 'cylc', 'The Cylc Suite Engine', 'Miscellaneous'), +] + +# How to display URL addresses. +texinfo_show_urls = 'footnote' diff --git a/doc/src/custom/cylc_lang.py b/doc/src/custom/cylc_lang.py new file mode 100644 index 00000000000..a0add0e46f6 --- /dev/null +++ b/doc/src/custom/cylc_lang.py @@ -0,0 +1,285 @@ +#!/usr/bin/env python2 + +# THIS FILE IS PART OF THE CYLC SUITE ENGINE. +# Copyright (C) 2008-2019 NIWA & British Crown (Met Office) & Contributors. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +"""An extension providing pygments lexers for suite.rc files and Cylc graph +strings.""" + +from pygments.lexer import RegexLexer, bygroups, include +from pygments.token import (Name, Comment, Text, Operator, String, + Punctuation, Error, Keyword, Other) + + +class CylcLexer(RegexLexer): + """Pygments lexer for the Cylc suite.rc language.""" + + # Pygments tokens for Cylc suite.rc elements which have no direct + # translation. + HEADING_TOKEN = Name.Tag + SETTING_TOKEN = Name.Variable + GRAPH_TASK_TOKEN = Keyword.Declaration + GRAPH_XTRIGGER_TOKEN = Keyword.Type + PARAMETERISED_TASK_TOKEN = Name.Builtin + EXTERNAL_SUITE_TOKEN = Name.Builtin.Pseudo + INTERCYCLE_OFFSET_TOKEN = Name.Builtin + + EMPY_BLOCK_REGEX = r'@\{([^\{\}]+|\{[^\}]+\})+\}' + EMPY_BLOCK_REGEX = ( + r'@\%(open)s(' # open empy block + r'[^\%(open)s\%(close)s]+|' # either not a close character + r'\%(open)s([^\%(close)s]+)?\%(close)s)+' # or permit 1 level nesting + r'\%(close)s') # close empy block + + # Pygments values. + name = 'Cylc' + aliases = ['cylc', 'suiterc'] + filenames = ['suite.rc'] + # mimetypes = ['text/x-ini', 'text/inf'] + + # Patterns, rules and tokens. + tokens = { + 'root': [ + # Jinja2 opening braces: {{ {% {# + include('preproc'), + + # Cylc comments: # ... + include('comment'), + + # Leading whitespace. + (r'^[\s\t]+', Text), + + # Cylc headings: [] + (r'([\[]+)', HEADING_TOKEN, 'heading'), + + # Multi-line graph sections: graph = """ ... + (r'(graph)(\s+)?(=)([\s+])?(\"\"\")', + bygroups(SETTING_TOKEN, + Text, + Operator, + Text, + String.Double), 'multiline-graph'), + + # Inline graph sections: graph = ... + (r'(graph)(\s+)?(=)', + bygroups(SETTING_TOKEN, + String, + Operator), 'inline-graph'), + + # Multi-line settings: key = """ ... + (r'([^=\n]+)(=)([\s+])?(\"\"\")', + bygroups(SETTING_TOKEN, + Operator, + Text, + String.Double), 'multiline-setting'), + + # Inline settings: key = ... + (r'([^=\n]+)(=)', + bygroups(SETTING_TOKEN, + Operator), 'setting'), + + # Include files + (r'(%include)( )(.*)', bygroups(Operator, Text, String)), + + # Arbitrary whitespace + (r'\s', Text) + ], + + 'heading': [ + (r'[\]]+', HEADING_TOKEN, '#pop'), + include('preproc'), + include('parameterisation'), + (r'(\\\n|.)', HEADING_TOKEN), # Allow line continuation chars. + ], + + # Cylc comments. + 'comment': [ + # Allow whitespace so this will work for comments following + # headings. + # NOTE: Does not highlight `${#`. + (r'(\s+)?(? bar & baz + 'graph': [ + include('preproc'), + include('comment'), + include('inter-suite-trigger'), + include('parameterisation'), + (r'@\w+', GRAPH_XTRIGGER_TOKEN), + (r'\w+', GRAPH_TASK_TOKEN), + (r'\!\w+', Other), + (r'\s', Text), + (r'=>', Operator), + (r'[\&\|]', Operator), + (r'[\(\)]', Punctuation), + (r'\[', Text, 'intercycle-offset'), + (r'.', Comment) + ], + + 'inter-suite-trigger': [ + (r'(\<)' + r'([^\>]+)' # foreign suite + r'(::)' + r'([^\>]+)' # foreign task + r'(\>)', + bygroups(Text, EXTERNAL_SUITE_TOKEN, Text, + PARAMETERISED_TASK_TOKEN, Text)), + ], + + # Parameterised syntax: + 'parameterisation': [ + (r'(\<)' # Opening greater-than bracket. + r'(\s?\w+\s?' # Parameter name (permit whitespace). + r'(?:[+-=]\s?\w+)?' # [+-=] for selecting parameters. + r'\s?' # Permit whitespace. + r'(?:' # BEGIN optional extra parameter groups... + r'(?:\s?,\s?\w+\s?' # Comma seperated parameters. + r'(?:[+-=]\s?\w+)?' # [+-=] for selecting parameters. + r'\s?)' # Permit whitespace. + r'+)?' # ...END optional extra parameter groups. + r')(\>)', # Closing lesser-than bracket. + bygroups(Text, PARAMETERISED_TASK_TOKEN, Text)), + (r'(\<)(.*)(\>)', bygroups(Text, Error, Text)) + ], + + # Task inter-cycle offset for graphing: foo[-P1DT1M] + 'intercycle-offset': [ + include('integer-duration'), + include('iso8601-duration'), + (r'[\^\$]', INTERCYCLE_OFFSET_TOKEN), + (r'\]', Text, '#pop') + ], + + # An integer duration: +P1 + 'integer-duration': [ + (r'[+-]P\d+(?![\w-])', INTERCYCLE_OFFSET_TOKEN) + ], + + # An ISO8601 duration: +P1DT1H + 'iso8601-duration': [ + # Basic format. + (r'([+-])?P' + r'(?![\]\s])' # Require something to follow. + r'(' + + # Weekly format (ISO8601-1:4.4.4.5): + r'\d{1,2}W' + + r'|' # OR + + # Extended Format (ISO8601-1:4.4.4.4): + r'(' + r'\d{8}T\d{6}' + r'|' + r'\d{4}\-\d{2}\-\d{2}T\d{2}\:\d{2}\:\d{2}' + r')' + + r'|' # OR + + # Basic format (ISO8601-1:4.4.4.4): + # ..Year + r'(\d{1,4}Y)?' + # ..Month + r'(\d{1,2}M)?' + # ..Day + r'(\d{1,2}D)?' + r'(T' + # ..Hours. + r'(\d{1,2}H)?' + # ..Minutes. + r'(\d{1,2}M)?' + # ..Secconds. + r'(\d{1,2}S)?' + r')?' + + r')', + INTERCYCLE_OFFSET_TOKEN), + ], + + # Wrapper for multi-line graph strings. + 'multiline-graph': [ + (r'\"\"\"', String.Double, '#pop'), + include('graph'), + ], + + # Wrapper for inline graph strings. + 'inline-graph': [ + (r'\n', Text, '#pop'), + include('graph') + ], + + 'empy': [ + (r'#![Ee]mpy', Comment.Hashbang), # #!empy + (r'@@', Text), # @@ + # @[...] + (EMPY_BLOCK_REGEX % {'open': '(', 'close': ')'}, Comment.Preproc), + # @{...} + (EMPY_BLOCK_REGEX % {'open': '{', 'close': '}'}, Comment.Preproc), + # @(...) + (EMPY_BLOCK_REGEX % {'open': '[', 'close': ']'}, Comment.Preproc), + (r'@empy\.[\w]+[^\n]+', Comment.Preproc), # @empy... + (r'(\s+)?@#.*', Comment.Multi), # @# ... + (r'@[\w.]+', Comment.Preproc) # @... + ], + + 'jinja2': [ + (r'#![Jj]inja2', Comment.Hashbang), # #!jinja2 + (r'\{\{((.|\n)+?)(?=\}\})\}\}', Comment.Preproc), # {{...}} + (r'\{\%((.|\n)+?)(?=\%\})\%\}', Comment.Preproc), # {%...%} + (r'\{\#((.|\n)+?)(?=\#\})\#\}', Comment.Multi), # {#...#} + ], + + 'preproc': [ + include('empy'), + include('jinja2') + ] + + } + + +class CylcGraphLexer(CylcLexer): + """Pygments lexer for Cylc graph strings.""" + + tokens = dict(CylcLexer.tokens) + tokens['root'] = list(tokens['graph']) + + name = 'Cylc Graph' + aliases = ['cylc-graph'] + filenames = [] + + +def setup(app): + """Sphinx plugin setup function.""" + app.add_lexer('cylc', CylcLexer()) + app.add_lexer('cylc-graph', CylcGraphLexer()) diff --git a/doc/src/index.css b/doc/src/custom/index.css similarity index 96% rename from doc/src/index.css rename to doc/src/custom/index.css index e96adb81165..4c925beda33 100644 --- a/doc/src/index.css +++ b/doc/src/custom/index.css @@ -23,7 +23,7 @@ div.uberpage { div.page { color:#00b3fd; background:white; - background-image:url(graphics/cylc-logo.png); + background-image:url(graphics/png/orig/cylc-logo.png); background-repeat:no-repeat; background-position: top right; margin:0 auto; diff --git a/doc/src/custom/make-commands.sh b/doc/src/custom/make-commands.sh new file mode 100755 index 00000000000..7ee3093576c --- /dev/null +++ b/doc/src/custom/make-commands.sh @@ -0,0 +1,86 @@ +#!/bin/bash + +# THIS FILE IS PART OF THE CYLC SUITE ENGINE. +# Copyright (C) 2008-2019 NIWA & British Crown (Met Office) & Contributors. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +# Create appendices/command-ref.rst for inclusion in HTML doc. + +# All paths relative to 'doc/src/custom/' directory: +COMMAND_REF_FILE="$(dirname $0)/../appendices/command-ref.rst" +CYLC=$(dirname $0)/../../../bin/cylc + +$(cat > "$COMMAND_REF_FILE" <> "$COMMAND_REF_FILE" <> "$COMMAND_REF_FILE" <> "$COMMAND_REF_FILE" <. - -.PHONY: all pdf html html-multi html-single clean - -DEPS := $(shell ./scripts/get-deps.sh) -CYLC = ../../../bin/cylc - -all: $(DEPS) - -pdf: pdf/cug-pdf.pdf - -html: html-multi html-single - -html-multi: html/multi/cug-html.html - -html-single: html/single/cug-html.html - -commands.tex: $(CYLC)-* - ./scripts/make-commands.sh - -html/multi/cug-html.html: commands.tex cug-html.tex cug.tex suiterc.tex siterc.tex gcylcrc.tex cug-html.cfg - - ./scripts/make-html.sh multi - -html/single/cug-html.html: commands.tex cug-html.tex cug.tex suiterc.tex siterc.tex gcylcrc.tex cug-html.cfg - - ./scripts/make-html.sh single - -pdf/cug-pdf.pdf: commands.tex cug-pdf.tex cug.tex suiterc.tex siterc.tex gcylcrc.tex - - ./scripts/make-pdf.sh - -clean: - rm -rf pdf html commands.tex cylc-version.txt diff --git a/doc/src/cylc-user-guide/README b/doc/src/cylc-user-guide/README deleted file mode 100644 index d14fb644850..00000000000 --- a/doc/src/cylc-user-guide/README +++ /dev/null @@ -1,15 +0,0 @@ - -To generate pdf and html (single and multi-page) Cylc User Guides: - - | cd /doc - | make - -The following make targets are also available: - | make pdf - | make html - | make html-single - | make html-multi - -This will generate the documents under pdf/ and html/ sub-directories, -and an index.html file at the top level that links to them. - diff --git a/doc/src/cylc-user-guide/abstract.tex b/doc/src/cylc-user-guide/abstract.tex deleted file mode 100644 index 4a0e21ff1c9..00000000000 --- a/doc/src/cylc-user-guide/abstract.tex +++ /dev/null @@ -1,49 +0,0 @@ -\begin{abstract} - - {\em Cylc} (``silk'') is a - metascheduler\footnote{A metascheduler determines when dependent - jobs are {\em ready to run} and then submits them to run by other - means, usually a batch queue scheduler. The - term can also refer to an aggregate view of multiple distributed - resource managers, but that is not the topic of this document. We - drop the ``meta'' prefix from here on because a metascheduler is - also a type of scheduler.} for - cycling environmental forecasting suites containing many forecast - models and associated processing tasks. Cylc has a novel - self-organising scheduling algorithm: a - pool of task proxy objects, that each know just their own inputs and - outputs, negotiate dependencies so that correct scheduling emerges - naturally at run time. Cylc does not group tasks - artificially by forecast cycle\footnote{A {\em forecast cycle} - comprises all tasks with a common {\em cycle time} (later referred - to here as {\em cycle point}) i.e.\ the analysis time or nominal - start time of a forecast model, or that of the associated forecast - model(s) for other tasks.} (each task has a private cycle time and - is self-spawning - there is no suite-wide cycle time) and handles - dependencies within and between cycles equally so that tasks from - multiple cycles can run at once to the maximum possible extent. This - matters in particular whenever the external driving data - \footnote{Forecast suites are typically driven by real time - observational data or timely model fields from an external - forecasting system.} for upcoming cycles are available in advance: - cylc suites can catch up from delays very quickly, parallel test - suites can be started behind the main operation to catch up quickly, - and one can likewise achieve greater throughput in historical case - studies; the usual sequence of distinct forecast cycles emerges - naturally if a suite catches up to real time operation. Cylc can - easily use existing tasks and can run suites distributed across a - heterogenous network. Suites can be stopped and restarted in any state - of operation, and they dynamically adapt to insertion and removal of - tasks, and to delays or failures in particular tasks or in the - external environment: tasks not directly affected will carry on - cycling as normal while the problem is addressed, and then the - affected tasks will catch up as quickly as possible. Cylc has - comprehensive command line and graphical interfaces, including a - dependency graph based suite control GUI. Other notable features - include suite databases; a fast simulation mode; a structured, - validated suite definition file format; dependency graph plotting; - task event hooks for centralized alerting; and cryptographic suite - security. -\end{abstract} - - diff --git a/doc/src/cylc-user-guide/cug-html.cfg b/doc/src/cylc-user-guide/cug-html.cfg deleted file mode 100644 index 069a4395e84..00000000000 --- a/doc/src/cylc-user-guide/cug-html.cfg +++ /dev/null @@ -1,11 +0,0 @@ -\Preamble{html} -\begin{document} - \Css{ - .lstlisting { color : crimson; } - .lstinputlisting { color : crimson; } - .lstinline { color : crimson; } - body { width : 700px; border:1px solid grey; padding:50px; margin: 0 auto; margin-top:20px; margin-bottom:80px; background:white } - html { background: LightSteelBlue } - } -\TocAt{section,subsection} -\EndPreamble diff --git a/doc/src/cylc-user-guide/cug-html.tex b/doc/src/cylc-user-guide/cug-html.tex deleted file mode 100644 index b28811e4f8e..00000000000 --- a/doc/src/cylc-user-guide/cug-html.tex +++ /dev/null @@ -1,238 +0,0 @@ -%\documentclass[11pt,a4paper]{report} -%\documentclass[11pt,a4paper,titlepage]{article} -%\documentclass[11pt,a4paper]{amsart} - -\documentclass[titlepage]{article} - -\usepackage{parskip} % don't indent paragraphs -% 1 inch margins -\usepackage{textcomp} -\usepackage{fullpage} -\usepackage{textcomp} % \textgreater, \textless -\usepackage{framed} -\usepackage{listings} -\usepackage{courier} -\usepackage{dirtree} -\usepackage{amsmath} -\usepackage{verbatim} -\usepackage{graphicx} % latex, eps -%\usepackage[pdftex]{graphicx} % pdflatex, png, jpg, pdf -\usepackage[usenames]{color} % dvips here screws up graphicx png version, above -\usepackage{hyperref} -%\usepackage{titletoc} - -\usepackage{fancyhdr} -\pagestyle{fancy} -\fancyhf{} -\renewcommand{\headheight}{14pt} -\renewcommand{\headsep}{10pt} -\lhead{\leftmark} -\rhead{\rightmark} -\lfoot{Copyright (C) 2008-2018 NIWA \& British Crown (Met Office) \& Contributors.} -\rfoot{\thepage} - -\usepackage{titlepic} % off CTAN, held locally in cylc doc dir. - -\usepackage{tocloft} -% prevent double digit sub-sections crowding the toc line -\addtolength\cftsubsecnumwidth{0.5em} % see tocloft manual - -\definecolor{codeblock}{rgb}{0.95,0.95,1.0} -%\definecolor{keywords}{rgb}{1.0,0.3,0.0} -\definecolor{keywords}{rgb}{0.7,0.1,1.0} -%\definecolor{comments}{rgb}{0.0,0.7,0.8} -\definecolor{comments}{rgb}{1.0,0.4,0.0} -\definecolor{identifiers}{rgb}{0.0,0.2,0.5} -\definecolor{strings}{rgb}{0.0,0.6,0.0} -\definecolor{basic}{rgb}{0.1,0.1,0.2} -\definecolor{command}{rgb}{0.0,0.2,0.1} -\definecolor{transcr}{rgb}{0.0,0.2,0.4} -\definecolor{level1}{rgb}{1.0,0.2,1.0} -\definecolor{level2}{rgb}{0.6,0.0,0.6} -\definecolor{level3}{rgb}{0.2,0.0,0.2} - -% hyperlink color: -%\definecolor{linkc}{rgb}{0,0.2,0.68} -% colored hyperlink instead of boxed -%\hypersetup{colorlinks=true, linkcolor=linkc} -\hypersetup{colorlinks=true, linkcolor=blue} - -\definecolor{shadecolor}{rgb}{0.9,0.9,0.1} - -\lstset{ -language=, -%%xleftmargin=2em, -%%frame=single, -backgroundcolor=\color{codeblock}, -basicstyle=\color{basic}, -%identifierstyle=\color{identifiers}, -%keywordstyle=\color{keywords}, -%commentstyle=\color{comments}, -%stringstyle=\color{strings}, -%showstringspaces=false, -numbers=left, -%%numberstyle=\color{Gray} -} - -\lstdefinelanguage{jobhosts} -{ -string=[b]{'}, -sensitive=true, -comment=[l]{\#}, -keywords={ssh, rm, mkdir, bash, scp, rsync, hobo@otherhost, vagrant@localhost}, -} - -\lstdefinelanguage{transcript} -{ -showstringspaces=false, -string=[b]{"}, -comment=[l][\color{comments}]{\#}, -morecomment=[l][\color{command}]{\%}, -} - -%\lstset{ -%language=bash, -%basicstyle=\color{blue}\ttfamily, -%stringstyle=\color{black}, -%} - -\lstdefinelanguage{suiterc} -{ -showstringspaces=false, -string=[b]{"}, -sensitive=true, -comment=[l]{\#}, -morecomment=[s][\color{level1}]{[}{]}, -morecomment=[s][\color{level2}]{[[}{]]}, -morecomment=[s][\color{level3}]{[[[}{]]]}, -} - -\lstdefinelanguage{usage} -{ -%string=[b]{"}, -%sensitive=false, -%morecomment=[l]{Usage:}, -%morecomment=[l]{USAGE:}, -%morecomment=[l]{usage:}, -%morecomment=[l]{HELP:}, -%morecomment=[l]{CATEGORY:}, -%morecomment=[l]{COMMANDs:}, -%morecomment=[l]{Arguments:}, -%morecomment=[l]{Options:}, -%morecomment=[l]{arguments:}, -%morecomment=[l]{command-options:}, -%morecomment=[l]{COMMANDS:}, -%morecomment=[l]{options:}, -%%morecomment=[l]{\#}, -numbers=none, -} - -\lstset{ -language=usage, -basicstyle=\color{basic}\ttfamily, -} - -% allow \paragraph as subsubsubsection -% and \subparagraph as subsubsubsubsection -\setcounter{secnumdepth}{5} -\setcounter{tocdepth}{5} - -% the follow makes \paragraph{} be followed -% by a newline, as for section headings. -\makeatletter -\renewcommand\paragraph{% - \@startsection{paragraph}{4}{0mm}% - {-\baselineskip}% - {.5\baselineskip}% - {\normalfont\normalsize\bfseries}} -\makeatother -% and similarly for \subparagraph{} -\makeatletter -\renewcommand\subparagraph{% - \@startsection{subparagraph}{4}{0mm}% - {-\baselineskip}% - {.5\baselineskip}% - {\normalfont\normalsize\bfseries}} -\makeatother - -% define a more compact itemized list environment -\newenvironment{myitemize} { -\begin{itemize} - \setlength{\itemsep}{1pt} - \setlength{\parskip}{0pt} - \setlength{\parsep}{0pt} - \setlength{\topsep}{0pt} - }{\end{itemize}} - -% define a more compact enumerate list environment -\newenvironment{myenumerate} { -\begin{enumerate} - \setlength{\itemsep}{1pt} - \setlength{\parskip}{0pt} - \setlength{\parsep}{0pt} - \setlength{\topsep}{0pt} - }{\end{enumerate}} - -\usepackage{color} -\newcommand{\hilight}[1]{\colorbox{yellow}{#1}} - -\begin{document} - -\Configure{section}{}{}{ - \HCode{ -

- \thesection - } -}{ - \HCode{

} -} - -\Configure{subsection}{}{}{ - \HCode{ -

- - - \thesection.\arabic{subsection} - } -}{ - \HCode{

} -} - -\Configure{subsubsection}{}{}{ - \HCode{ -

- - \thesection.\arabic{subsection}.\arabic{subsubsection} - } -}{ - \HCode{

} -} - - % cylc-version.txt is generated each time by doc/process -\title{The Cylc Suite Engine\\ -User Guide \\ -\protect \input{cylc-version.txt} \\ -GNU GPL v3.0 Software License \\ -Copyright (C) 2008-2018 NIWA \& British Crown (Met Office) \& Contributors.} - -\author{Hilary Oliver} - -\titlepic{\includegraphics{graphics/png/orig/logo.png} \\ -\includegraphics{graphics/png/orig/niwa-colour-small.png}} - -\maketitle - -%\pagebreak -%\input{abstract.tex} -%\pagebreak - -\tableofcontents -%\listoffigures -%\listoftables - -%\pagebreak - -\input{cug.tex} - -\end{document} diff --git a/doc/src/cylc-user-guide/cug-pdf.tex b/doc/src/cylc-user-guide/cug-pdf.tex deleted file mode 100644 index e533a2443dc..00000000000 --- a/doc/src/cylc-user-guide/cug-pdf.tex +++ /dev/null @@ -1,214 +0,0 @@ -%\documentclass[11pt,a4paper]{report} -\documentclass[11pt,a4paper,titlepage]{article} -%\documentclass[11pt,a4paper]{amsart} - -% 1 inch margins -\usepackage{parskip} % don't indent paragraphs -\usepackage{textcomp} -\usepackage{fullpage} -\usepackage{textcomp} % \textgreater, \textless -\usepackage{framed} -\usepackage{listings} -\usepackage{courier} -\usepackage{dirtree} -\usepackage{amsmath} -\usepackage{verbatim} -%\usepackage{graphicx} % latex, eps -\usepackage[pdftex]{graphicx} % pdflatex, png, jpg, pdf -%\usepackage[dvips,usenames,dvipsnames]{color} % dvips here screws up graphicx png version, above -\usepackage[usenames]{color} % dvips here screws up graphicx png version, above -\usepackage{hyperref} -\usepackage{caption} -\usepackage{float} -%\usepackage{titletoc} - -\usepackage{fancyhdr} -\pagestyle{fancy} -\fancyhf{} -\renewcommand{\headheight}{14pt} -\renewcommand{\headsep}{10pt} -\lhead{\leftmark} -\rhead{\rightmark} -\lfoot{Copyright (C) 2008-2018 NIWA \& British Crown (Met Office) \& Contributors.} -\rfoot{\thepage} - -\usepackage{tgbonum} -%\renewcommand{\familydefault}{\tgbonum} - - -\usepackage{titlepic} % off CTAN, held locally in cylc doc dir. - -\usepackage{tocloft} -% prevent double digit sub-sections crowding the toc line -\addtolength\cftsubsecnumwidth{0.5em} % see tocloft manual - -%\definecolor{codeblock}{rgb}{0.98,0.93,0.85} -%\definecolor{codeblock}{rgb}{0.96,0.96,0.96} -%\definecolor{keywords}{rgb}{1.0,0.3,0.0} -\definecolor{keywords}{rgb}{0.8,0.4,0.0} -%\definecolor{comments}{rgb}{0.0,0.7,0.8} -\definecolor{comments}{rgb}{1.0,0.2,0.3} -\definecolor{identifiers}{rgb}{0.3,0.4,0.5} -\definecolor{strings}{rgb}{0.2,0.5,0.3} -\definecolor{basic}{rgb}{0.3,0.4,0.5} -\definecolor{command}{rgb}{0.0,0.2,0.1} -\definecolor{transcr}{rgb}{0.0,0.2,0.4} -\definecolor{level1}{rgb}{0.0,0.2,0.6} -\definecolor{level2}{rgb}{0.0,0.3,0.7} -\definecolor{level3}{rgb}{0.0,0.4,0.8} -\definecolor{jinja2}{rgb}{0.1,0.7,0.4} - -% hyperlink color: -%\definecolor{linkc}{rgb}{0,0.2,0.68} -% colored hyperlink instead of boxed -%\hypersetup{colorlinks=true, linkcolor=linkc} -\hypersetup{colorlinks=true, linkcolor=blue} - -%\definecolor{shadecolor}{rgb}{0.9,0.9,0.1} -\usepackage{lmodern} -\usepackage[T1]{fontenc} - -\newcommand\mysmall{\fontsize{9}{9.2}\selectfont} - -\lstset{ -language=, -%xleftmargin=2em, -%frame=single, -%backgroundcolor=\color{codeblock}, -%basicstyle=\color{basic}\footnotesize\ttfamily\bfseries, -%basicstyle=\color{basic}\scriptsize\ttfamily\bfseries, -%basicstyle=\color{basic}\small\ttfamily\bfseries, -%basicstyle=\color{basic}\normalsize\ttfamily\bfseries, -basicstyle=\color{basic}\mysmall\ttfamily, -identifierstyle=\color{identifiers}, -keywordstyle=\color{keywords}, -commentstyle=\color{comments}, -stringstyle=\color{strings}, -showstringspaces=false, -%numbers=left, -%numberstyle=\color{Gray} -} - -\lstdefinelanguage{jobhosts} -{ -string=[b]{'}, -sensitive=true, -comment=[l]{\#}, -keywords={ssh, rm, mkdir, bash, scp, rsync, hobo@otherhost, vagrant@localhost}, -} - -\lstdefinelanguage{transcript} -{ -string=[b]{"}, -comment=[l][\color{comments}]{\#}, -alsoletter={$}, -morekeywords={shell$}, -} - -\lstdefinelanguage{suiterc} -{ -string=[b]{"}, -sensitive=true, -comment=[l]{\#}, -morecomment=[s][\color{level1}]{[}{]}, -morecomment=[s][\color{level2}]{[[}{]]}, -morecomment=[s][\color{level3}]{[[[}{]]]}, -morecomment=[s][\color{jinja2}]{\{\%}{\%\}}, -morecomment=[s][\color{jinja2}]{\{\{}{\}\}}, -} - -\lstdefinelanguage{usage} -{ -string=[b]{"}, -sensitive=false, -morecomment=[l]{Usage:}, -morecomment=[l]{USAGE:}, -morecomment=[l]{usage:}, -morecomment=[l]{HELP:}, -morecomment=[l]{CATEGORY:}, -morecomment=[l]{COMMANDs:}, -morecomment=[l]{Arguments:}, -morecomment=[l]{Options:}, -morecomment=[l]{arguments:}, -morecomment=[l]{command-options:}, -morecomment=[l]{COMMANDS:}, -morecomment=[l]{options:}, -%morecomment=[l]{\#}, -numbers=none, -morekeywords={shell$}, -} - -% allow \paragraph as subsubsubsection -% and \subparagraph as subsubsubsubsection -\setcounter{secnumdepth}{5} -\setcounter{tocdepth}{5} - -% the follow makes \paragraph{} be followed -% by a newline, as for section headings. -\makeatletter -\renewcommand\paragraph{% - \@startsection{paragraph}{4}{0mm}% - {-\baselineskip}% - {.5\baselineskip}% - {\normalfont\normalsize\bfseries}} -\makeatother -% and similarly for \subparagraph{} -\makeatletter -\renewcommand\subparagraph{% - \@startsection{subparagraph}{4}{0mm}% - {-\baselineskip}% - {.5\baselineskip}% - {\normalfont\normalsize\bfseries}} -\makeatother - -\title{The Cylc Suite Engine\linebreak -User Guide \linebreak -{\em \small -\input{cylc-version.txt} % generated each time by doc/process -} \linebreak -{\em \small Released Under the GNU GPL v3.0 Software License} \linebreak -{\small Copyright (C) 2008-2018 NIWA \& British Crown (Met Office) \& Contributors.}} - -\author{Hilary Oliver} - -\titlepic{\includegraphics[width=0.5\textwidth]{graphics/png/orig/niwa-colour.png}} - -% define a more compact itemized list environment -\newenvironment{myitemize} { -\begin{itemize} - \setlength{\itemsep}{1pt} - \setlength{\parskip}{0pt} - \setlength{\parsep}{0pt} - \setlength{\topsep}{0pt} - }{\end{itemize}} - -% define a more compact enumerate list environment -\newenvironment{myenumerate} { -\begin{enumerate} - \setlength{\itemsep}{1pt} - \setlength{\parskip}{0pt} - \setlength{\parsep}{0pt} - \setlength{\topsep}{0pt} - }{\end{enumerate}} - -\usepackage{color} -\definecolor{hilight}{rgb}{0.5,1.0,0.8} -\newcommand{\hilight}[1]{\colorbox{hilight}{#1}} - -\begin{document} - -\maketitle - -\pagebreak - -\input{abstract.tex} - -\pagebreak -\tableofcontents -\listoffigures -%\listoftables - -\pagebreak -\input{cug.tex} - -\end{document} diff --git a/doc/src/cylc-user-guide/cug.tex b/doc/src/cylc-user-guide/cug.tex deleted file mode 100644 index 7eacf09b410..00000000000 --- a/doc/src/cylc-user-guide/cug.tex +++ /dev/null @@ -1,8700 +0,0 @@ -\lstset{language=transcript} - -\section{Introduction: How Cylc Works} -\label{HowCylcWorks} - -\subsection{Scheduling Forecast Suites} -\label{SchedulingForecastSuites} - -Environmental forecasting suites generate forecast products from a -potentially large group of interdependent scientific models and -associated data processing tasks. They are constrained by availability -of external driving data: typically one or more tasks will wait on real -time observations and/or model data from an external system, and these -will drive other downstream tasks, and so on. The dependency diagram for -a single forecast cycle point in such a system is a {\em Directed Acyclic -Graph} as shown in Figure~\ref{fig-dep-one} (in our terminology, a {\em -forecast cycle point} is comprised of all tasks with a common {\em cycle -point}, which is the nominal analysis time or start time of the forecast -models in the group). In real time operation processing will consist of -a series of distinct forecast cycle points that are each initiated, after a -gap, by arrival of the new cycle point's external driving data. - -From a job scheduling perspective task execution order in such a system -must be carefully controlled in order to avoid dependency violations. -Ideally, each task should be queued for execution at the instant its -last prerequisite is satisfied; this is the best that can be done even -if queued tasks are not able to execute immediately because of resource -contention. - -\subsection{EcoConnect} -\label{EcoConnect} - -Cylc was developed for the EcoConnect Forecasting System at NIWA -(National Institute of Water and Atmospheric Research, New Zealand). -EcoConnect takes real time atmospheric and stream flow observations, and -operational global weather forecasts from the Met Office (UK), and uses -these to drive global sea state and regional data assimilating weather -models, which in turn drive regional sea state, storm surge, and -catchment river models, plus tide prediction, and a large number of -associated data collection, quality control, preprocessing, -post-processing, product generation, and archiving tasks.\footnote{Future -plans for EcoConnect include additional deterministic regional weather -forecasts and a statistical ensemble.} The global sea state forecast -runs once daily. The regional weather forecast runs four times daily but -it supplies surface winds and pressure to several downstream models that -run only twice daily, and precipitation accumulations to catchment river -models that run on an hourly cycle assimilating real time stream flow -observations and using the most recently available regional weather -forecast. EcoConnect runs on heterogeneous distributed hardware, -including a massively parallel supercomputer and several Linux servers. - - -\subsection{Dependence Between Tasks} - -\subsubsection{Intra-cycle Dependence} -\label{IntracycleDependence} - -Most dependence between tasks applies within a single forecast cycle -point. Figure~\ref{fig-dep-one} shows the dependency diagram for a single -forecast cycle point of a simple example suite of three forecast models -({\em a, b,} and {\em c}) and three post processing or product generation -tasks ({\em d, e} and {\em f}). A scheduler capable of handling this -must manage, within a single forecast cycle point, multiple parallel -streams of execution that branch when one task generates output for -several downstream tasks, and merge when one task takes input from several -upstream tasks. - -\begin{figure} - \begin{center} - \includegraphics[width=6cm]{graphics/png/orig/dep-one-cycle.png} - \end{center} - \caption[A single cycle point dependency graph for a simple suite] - {\scriptsize - The dependency graph for a single forecast cycle point of a simple - example suite. Tasks {\em a, b,} and {\em c} represent forecast models, - {\em d, e} and {\em f} are post processing or product generation - tasks, and {\em x} represents external data that the upstream - forecast model depends on.} - \label{fig-dep-one} -\end{figure} - -\begin{figure} - \begin{center} - \includegraphics[width=8cm]{graphics/png/orig/timeline-one.png} - \end{center} - \caption[A single cycle point job schedule for real time operation] - {\scriptsize - The optimal job schedule for two consecutive cycle points of our - example suite during real time operation, assuming that all tasks - trigger off upstream tasks finishing completely. The horizontal - extent of a task bar represents its execution time, and the vertical - blue lines show when the external driving data becomes available.} - \label{fig-time-one} -\end{figure} - -Figure~\ref{fig-time-one} shows the optimal job schedule for two -consecutive cycle points of the example suite in real time operation, given -execution times represented by the horizontal extent of the task bars. -There is a time gap between cycle points as the suite waits on new external -driving data. Each task in the example suite happens to trigger off -upstream tasks {\em finishing}, rather than off any intermediate output -or event; this is merely a simplification that makes for clearer -diagrams. - -\begin{figure} - \begin{center} - \includegraphics[width=10cm]{graphics/png/orig/dep-two-cycles-linked.png} - \end{center} - \caption[What if the external driving data is available early?]{\scriptsize If - the external driving data is available in advance, can we start - running the next cycle point early?} - \label{fig-dep-two-linked} -\end{figure} - -\begin{figure} - \begin{center} - \includegraphics[width=6cm]{graphics/png/orig/timeline-one-c.png} - \end{center} - \caption[Attempted overlap of consecutive single-cycle-point job - schedules]{\scriptsize A naive attempt to overlap two consecutive cycle - points using the single-cycle-point dependency graph. The red shaded - tasks will fail because of dependency violations (or will not be able to - run because of upstream dependency violations).} - \label{fig-overlap} -\end{figure} - -\begin{figure} - \begin{center} - \includegraphics[width=8cm]{graphics/png/orig/timeline-one-a.png} - \end{center} - \caption[The only safe multi-cycle-point job schedule?] - {\scriptsize The best that can be done {\em in general} when - inter-cycle dependence is ignored.} - \label{fig-job-no-overlap} -\end{figure} - -Now the question arises, what happens if the external driving data for -upcoming cycle points is available in advance, as it would be after a -significant delay in operations, or when running a historical case -study? While the forecast model {\em a} appears to depend only on the -external data {\em x} at this stage of the discussion, in fact it would -typically also depend on its own previous instance for the model {\em -background state} used in initializing the new forecast. Thus, as -alluded to in Figure~\ref{fig-dep-two-linked}, task {\em a} could in -principle start -as soon as its predecessor has finished. Figure~\ref{fig-overlap} -shows, however, that starting a whole new cycle point at this point is -dangerous - it results in dependency violations in half of the tasks in -the example suite. In fact the situation could be even worse than this -- imagine that task {\em b} in the first cycle point is delayed for some -reason {\em after} the second cycle point has been launched. Clearly we must -consider handling inter-cycle dependence explicitly or else agree not to -start the next cycle point early, as is illustrated in -Figure~\ref{fig-job-no-overlap}. - -\subsubsection{Inter-Cycle Dependence} -\label{InterCyclePointDependence} - -Forecast models typically depend on their own most recent previous -forecast for background state or restart files of some kind (this is -called {\em warm cycling}) but there can also be inter-cycle dependence -between different tasks. In an atmospheric forecast analysis suite, for -instance, the weather model may generate background states for observation -processing and data-assimilation tasks in the next cycle point as well as for -the next forecast model run. In real time operation inter-cycle -dependence can be ignored because it is automatically satisfied when one cycle -point finishes before the next begins. If it is not ignored it drastically -complicates the dependency graph by blurring the clean boundary between -cycle points. Figure~\ref{fig-dep-multi} illustrates the problem for our -simple example suite assuming minimal inter-cycle dependence: the warm -cycled models ($a$, $b$, and $c$) each depend on their own previous instances. - -For this reason, and because we tend to see forecasting suites in terms of -their real time characteristics, other metaschedulers have ignored -inter-cycle dependence and are thus restricted to running entire cycle -points in sequence at all times. This does not affect normal real time -operation but it can be a serious impediment when advance availability of -external driving data makes it possible, in principle, to run some tasks from -upcoming cycle points before the current cycle point is finished - as was -suggested at the end of the previous section. This can occur, for instance, -after operational delays (late arrival of external data, system maintenance, -etc.) and to an even greater extent in historical case studies and parallel -test suites started behind a real time operation. It can be a serious problem -for suites that have little downtime between forecast cycle points and -therefore take many cycle points to catch up after a delay. Without taking -account of inter-cycle dependence, the best that can be done, in -general, is to reduce the gap between cycle points to zero as shown in -Figure~\ref{fig-job-no-overlap}. A limited crude overlap of the single cycle -point job schedule may be possible for specific task sets but the allowable -overlap may change if new tasks are added, and it is still dangerous: it -amounts to running different parts of a dependent system as if they were not -dependent and as such it cannot be guaranteed that some unforeseen delay in -one cycle point, after the next cycle point has begun, (e.g.\ due to resource -contention or task failures) won't result in dependency violations. - -\begin{figure} - \begin{center} - \includegraphics[width=8cm]{graphics/png/orig/dep-multi-cycle.png} - \end{center} - \caption[The complete multi-cycle-point dependency graph] - {\scriptsize The complete dependency graph for the example suite, assuming - the least possible inter-cycle dependence: the forecast models ($a$, - $b$, and $c$) depend on their own previous instances. The dashed arrows - show connections to previous and subsequent forecast cycle points.} - \label{fig-dep-multi} -\end{figure} - -\begin{figure} - \begin{center} - \includegraphics[width=6cm]{graphics/png/orig/timeline-two-cycles-optimal.png} - \end{center} - \caption[The optimal two-cycle-point job schedule] - {\scriptsize The optimal two cycle job schedule when the next cycle's driving data is available in - advance, possible in principle when inter-cycle dependence is - handled explicitly.} - \label{fig-optimal-two} -\end{figure} - -Figure~\ref{fig-optimal-two} shows, in contrast to -Figure~\ref{fig-overlap}, the optimal two cycle point job schedule obtained by -respecting all inter-cycle dependence. This assumes no delays due to -resource contention or otherwise - i.e.\ every task runs -as soon as it is ready to run. The scheduler running -this suite must be able to adapt dynamically to external conditions -that impact on multi-cycle-point scheduling in the presence of -inter-cycle dependence or else, again, risk bringing the system down -with dependency violations. - -\begin{figure} - \begin{center} - \includegraphics[width=12cm]{graphics/png/orig/timeline-three.png} - \end{center} - \caption[Comparison of job schedules after a delay]{\scriptsize Job - schedules for the example suite after a delay of almost one whole - forecast cycle point, when inter-cycle dependence is - taken into account (above the time axis), and when it is not - (below the time axis). The colored lines indicate the time that - each cycle point is delayed, and normal ``caught up'' cycle points - are shaded gray.} - \label{fig-time-three} -\end{figure} - -\begin{figure} - \begin{center} - \includegraphics[width=8cm]{graphics/png/orig/timeline-two.png} - \end{center} - \caption[Optimal job schedule when all external data is - available]{\scriptsize Job schedules for the example suite in case study - mode, or after a long delay, when the external driving data are - available many cycle points in advance. Above the time axis is the optimal - schedule obtained when the suite is constrained only by its true - dependencies, as in Figure \ref{fig-dep-two-linked}, and underneath - is the best that can be done, in general, when inter-cycle - dependence is ignored.} - \label{fig-time-two} -\end{figure} - -To further illustrate the potential benefits of proper inter-cycle -dependency handling, Figure~\ref{fig-time-three} shows an operational -delay of almost one whole cycle point in a suite with little downtime between -cycle points. Above the time axis is the optimal schedule that is possible in -principle when inter-cycle dependence is taken into account, and below -it is the only safe schedule possible {\em in general} when it is ignored. -In the former case, even the cycle point immediately after the delay is hardly -affected, and subsequent cycle points are all on time, whilst in the latter -case it takes five full cycle points to catch up to normal real time -operation. - -%Note that simply overlapping the single cycle point schedules of -%Figure~\ref{fig-time-one} from the same start point would have resulted -%in dependency violation by task {\em c}. - -Similarly, Figure~\ref{fig-time-two} shows example suite job schedules -for an historical case study, or when catching up after a very long -delay; i.e.\ when the external driving data are available many cycle -points in advance. Task {\em a}, which as the most upstream forecast -model is likely to be a resource intensive atmosphere or ocean model, -has no upstream dependence on co-temporal tasks and can therefore run -continuously, regardless of how much downstream processing is yet to be -completed in its own, or any previous, forecast cycle point (actually, -task {\em a} does depend on co-temporal task {\em x} which waits on the -external driving data, but that returns immediately when the data is -available in advance, so the result stands). The other forecast models -can also cycle continuously or with a short gap between, and some -post processing tasks, which have no previous-instance dependence, can -run continuously or even overlap (e.g.\ {\em e} in this case). Thus, -even for this very simple example suite, tasks from three or four -different cycle points can in principle run simultaneously at any given -time. - -In fact, if our tasks are able to trigger off internal outputs of -upstream tasks (message triggers) rather than waiting on full completion, -then successive instances of the forecast models could overlap as well (because -model restart outputs are generally completed early in the forecast) for an -even more efficient job schedule. - -%Finally, we note again that a good job scheduler should be able to -%dynamically adapt to delays in any part of the suite due to resource -%contention, varying run times, or anything else that will inevitably -%modify the depicted job schedules. - -\subsection{The Cylc Scheduling Algorithm} -\label{TheCylcSchedulingAlgorithm} - -\begin{figure} - \begin{center} - \includegraphics[width=8cm]{graphics/png/orig/task-pool.png} - \end{center} - \caption[The cylc task pool]{\scriptsize How cylc sees a suite, in - contrast to the multi-cycle-point dependency graph of - Figure~\ref{fig-dep-multi}. - Task colors represent different cycle points, and the small squares - and circles represent different prerequisites and outputs. A task - can run when its prerequisites are satisfied by the outputs - of other tasks in the pool.} - \label{fig-task-pool} -\end{figure} - -Cylc manages a pool of proxy objects that represent the real tasks in a -suite. Task proxies know how to run the real tasks that they represent, -and they receive progress messages from the tasks as they run (usually -reports of completed outputs). There is no global cycling mechanism to -advance the suite; instead individual task proxies have their own -private cycle point and spawn their own successors when the time is -right. Task proxies are self-contained - they know their own -prerequisites and outputs but are not aware of the wider suite. -Inter-cycle dependence is not treated as special, and the task pool can -be populated with tasks with many different cycle points. The task pool -is illustrated in Figure~\ref{fig-task-pool}. {\em Whenever any task -changes state due to completion of an output, every task checks to see -if its own prerequisites have been satisfied.} -%\footnote{In fact this dependency negotiation goes through a broker -%object (rather than every task literally checking every other task) -%which scales as $n$ (rather than $n^2$) where $n$ is the number of task -%proxies in the pool.} -In effect, cylc gets a pool of tasks to self-organize by negotiating -their own dependencies so that optimal scheduling, as described in the -previous section, emerges naturally at run time. - -%\pagebreak -\section{Cylc Screenshots} - -\begin{figure} - \begin{center} - \includegraphics[width=0.8\textwidth]{graphics/png/orig/gcylc-graph-and-dot-views.png} - \end{center} -\caption[gcylc graph and dot views]{\scriptsize gcylc graph and dot views.} -\label{fig-gcylc-1} -\end{figure} - -\begin{figure} - \begin{center} - \includegraphics[width=0.8\textwidth]{graphics/png/orig/gcylc-text-view.png} - \end{center} -\caption[gcylc text view]{\scriptsize gcylc text view.} -\label{fig-gcylc-2} -\end{figure} - -\begin{figure} - \begin{center} - \includegraphics[width=0.5\textwidth]{graphics/png/orig/gscan.png} - \end{center} -\caption[gscan multi-suite state summary GUI]{\scriptsize gscan multi-suite state summary GUI.} -\label{fig-gscan} -\end{figure} - - -\begin{figure} - \begin{center} - \includegraphics[width=\textwidth]{graphics/png/orig/ecox-1.png} - \end{center} -\caption[A large-ish suite graphed by cylc]{\scriptsize A large-ish suite graphed by cylc.} -\label{fig-ecox-1} -\end{figure} - -% dump floats -\clearpage - -%\pagebreak - -\section{Installation} -\label{Requirements} - -Cylc runs on Linux. It is tested quite thoroughly on modern RHEL and Ubuntu -distros. Some users have also managed to make it work on other Unix variants -including Apple OS X, but they are not officially tested and supported. - -\subsection{Third-Party Software Packages} - -{\bf Python 2 \lstinline@>=@ 2.6} is required. -{\bf Python 2 \lstinline@>=@ 2.7.9} is recommended for the best security. -Python 2 should already be installed in your Linux system. -\url{https://python.org/}. - -For Cylc's HTTPS communications layer: -\begin{myitemize} - \item {\bf OpenSSL} - \url{https://www.openssl.org/} - \item {\bf pyOpenSSL} - \url{http://www.pyopenssl.org/} - \item {\bf python-requests} - \url{http://docs.python-requests.org/} - \item ({\bf python-urllib3} - should be bundled with python-requests) -\end{myitemize} - -The following packages are highly recommended, but are technically optional as -you can construct and run suites without dependency graph visualisation or -the Cylc GUIs: - -\begin{myitemize} - \item {\bf PyGTK} - GUI toolkit \url{http://www.pygtk.org}. {\em Note PyGTK - typically comes with your system Python. It is allegedly quite - difficult to install if you need to do so for another Python version.} - \item {\bf Graphviz} - graph layout engine (tested 2.36.0): - \url{http://www.graphviz.org}. - \item {\bf Pygraphviz} - Python Graphviz interface (tested 1.2): - \url{http://pygraphviz.github.io/}. To build this you may need some {\em - devel} packages too: - \begin{myitemize} - \item python-devel - \item graphviz-devel - \end{myitemize} -\end{myitemize} - -The Cylc Review service does not need any additional packages. - -The following packages are necessary for running all the tests in Cylc: - -\begin{myitemize} - \item {\bf mock} - \url{https://mock.readthedocs.io} -\end{myitemize} - -The User Guide is generated from \LaTeX source files by running -\lstinline=make= in the top level Cylc directory. The specific packages -required may vary by distribution, e.g.: - -\begin{myitemize} - \item texlive - \item texlive-tocloft - \item texlive-framed - \item texlive-preprint (for \lstinline=fullpage.sty=) - \item texlive-tex4ht - \item texlive-generic-extra (for \lstinline=dirtree.sty=) -\end{myitemize} - -To generate the HTML User Guide {\bf ImageMagick} is also needed. - -In most modern Linux distributions all of the software above can be installed -via the system package manager. Otherwise download packages manually and follow -their native installation instructions. To check that all (non \LaTeX packages) -are installed properly: - -\lstset{language=transcript} -\begin{lstlisting} -$ cylc check-software -Checking your software... - -Individual results: -============================================================================= -Package (version requirements) Outcome (version found) -============================================================================= - *REQUIRED SOFTWARE* -Python (2.6+, <3)...................FOUND & min. version MET (2.7.12.final.0) - - *OPTIONAL SOFTWARE for the GUI & dependency graph visualisation* -Python:pygtk (2.0+).........................FOUND & min. version MET (2.24.0) -graphviz (any).................................................FOUND (2.38.0) -Python:pygraphviz (any).........................................FOUND (1.3.1) - - *OPTIONAL SOFTWARE for the HTML User Guide* -ImageMagick (any).............................................FOUND (6.8.9-9) - - *OPTIONAL SOFTWARE for the HTTPS communications layer* -Python:urllib3 (any)...........................................FOUND (1.13.1) -Python:OpenSSL (any)...........................................FOUND (17.2.0) -Python:requests (2.4.2+).....................FOUND & min. version MET (2.9.1) - - *OPTIONAL SOFTWARE for the LaTeX User Guide* -TeX:framed (any)..................................................FOUND (n/a) -TeX (3.0+)..............................FOUND & min. version MET (3.14159265) -TeX:preprint (any)................................................FOUND (n/a) -TeX:tex4ht (any)..................................................FOUND (n/a) -TeX:tocloft (any).................................................FOUND (n/a) -TeX:texlive (any).................................................FOUND (n/a) -============================================================================= - -Summary: - **************************** - Core requirements: ok - Full-functionality: ok - **************************** -\end{lstlisting} - -If errors are reported then the packages concerned are either not installed or -not in your Python search path. (Note that \lstinline=cylc check-software= has -become quite trivial as we've removed or bundled some former dependencies, but -in future we intend to make it print a comprehensive list of library versions -etc.\ to include in with bug reports.) - -To check for specific packages only, supply these as arguments to the -\lstinline=check-software= command, either in the form used in the output of -the bare command, without any parent package prefix and colon, or -alternatively all in lower-case, should the given form contain capitals. For -example: - -\begin{lstlisting} -$ cylc check-software Python graphviz imagemagick -\end{lstlisting} - -With arguments, check-software provides an exit status indicating a -collective pass (zero) or a failure of that number of packages to satisfy -the requirements (non-zero integer). - -\subsection{Software Bundled With Cylc} - -Cylc bundles several third party packages which do not need to be installed -separately. - -\begin{myitemize} - \item {\bf cherrypy 6.0.2} (slightly modified): a pure Python HTTP framework - that we use as a web server for communication between server processes - (suite server programs) and client programs (running tasks, GUIs, CLI commands). - Client communication is via the Python {\bf requests} library if available - (recommended) or else pure Python via {\bf urllib2}. -\newline \url{http://www.cherrypy.org/} -\newline \url{http://docs.python-requests.org/} - \item {\bf Jinja2 2.10}: a full featured template engine for Python, and its - dependency {\bf MarkupSafe 0.23}; both BSD licensed. -\newline \url{http://jinja.pocoo.org/} -\newline \url{http://www.pocoo.org/projects/markupsafe/} - \item the {\bf xdot} graph viewer (modified), LGPL licensed: - \newline \url{https://github.com/jrfonseca/xdot.py} -\end{myitemize} - -\subsection{Installing Cylc} -\label{InstallCylc} - -Cylc releases can be downloaded from \url{https://cylc.github.io/cylc}. - -The wrapper script \lstinline=usr/bin/cylc= should be installed to -the system executable search path (e.g.\ \lstinline=/usr/local/bin/=) and -modified slightly to point to a location such as \lstinline=/opt= where -successive Cylc releases will be unpacked side by side. - -To install Cylc, unpack the release tarball in the right location, e.g.\ -\lstinline=/opt/cylc-7.7.0=, type \lstinline=make= inside the release -directory, and set site defaults - if necessary - in a site global config file -(below). - -Make a symbolic link from \lstinline=cylc= to the latest installed version: -\lstinline=ln -s /opt/cylc-7.7.0 /opt/cylc=. This will be invoked by the -central wrapper if a specific version is not requested. Otherwise, the -wrapper will attempt to invoke the Cylc version specified in -\lstinline@$CYLC_VERSION@, e.g.\ \lstinline@CYLC_VERSION=7.7.0@. This variable -is automatically set in task job scripts to ensure that jobs use the same Cylc -version as their parent suite server program. It can also be set by users, -manually or in login scripts, to fix the Cylc version in their environment. - -Installing subsequent releases is just a matter of unpacking the new tarballs -next to the previous releases, running \lstinline=make= in them, and copying -in (possibly with modifications) the previous site global config file. - -\subsubsection{Local User Installation} -\label{LocalInstall} - -It is easy to install Cylc under your own user account if you don't have -root or sudo access to the system: just put the central Cylc wrapper in -\lstinline=$HOME/bin/= (making sure that is in your \lstinline=$PATH=) and -modify it to point to a directory such as \lstinline=$HOME/cylc/= where you -will unpack and install release tarballs. Local installation of third party -dependencies like Graphviz is also possible, but that depends on the particular -installation methods used and is outside of the scope of this document. - -\subsubsection{Create A Site Config File} - -Site and user global config files define some important parameters that affect -all suites, some of which may need to be customized for your site. -See~\ref{SiteAndUserConfiguration} for how to generate an initial site file and -where to install it. All legal site and user global config items are defined -in~\ref{SiteRCReference}. - -\subsubsection{Configure Site Environment on Job Hosts} -\label{Configure Site Environment on Job Hosts} - -If your users submit task jobs to hosts other than the hosts they use to run -their suites, you should ensure that the job hosts have the correct environment -for running cylc. A cylc suite generates task job scripts that normally invoke -\lstinline=bash -l=, i.e. it will invoke bash as a login shell to run the job -script. Users and sites should ensure that their bash login profiles are able -to set up the correct environment for running cylc and their task jobs. - -Your site administrator may customise the environment for all task jobs by adding -a \lstinline=/etc/job-init-env.sh= file and populate it with the -appropriate contents. If customisation is still required, you can add your own -\lstinline=${HOME}/.cylc/job-init-env.sh= file and populate it with the -appropriate contents. - -\begin{myitemize} -\item \lstinline=${HOME}/.cylc/job-init-env.sh= -\item \lstinline=/etc/job-init-env.sh= -\end{myitemize} - -The job will attempt to source the first of these files it finds to set up its -environment. - -\subsubsection{Configuring Cylc Review Under Apache} -\label{ConfiguringCylcReviewApache} - -The Cylc Review web service displays suite job logs and other information in -web pages - see~\ref{ViewingSuiteLogsCylcReview} and -Figure~\ref{fig-review-screenshot}. It can run under a WSGI server (e.g.\ -Apache with \lstinline=mod_wsgi=) as a service for all users, or as an ad hoc -service under your own user account. - -To run Cylc Review under Apache, install \lstinline=mod_wsgi= and configure it -as follows, with paths modified appropriately: - -\lstset{language=bash} -\begin{lstlisting} -# Apache mod_wsgi config file, e.g.: -# Red Hat Linux: /etc/httpd/conf.d/cylc-wsgi.conf -# Ubuntu Linux: /etc/apache2/mods-available/wsgi.conf -# E.g. for /opt/cylc-7.8.1/ -WSGIPythonPath /opt/cylc-7.8.1/lib -WSGIScriptAlias /cylc-review /opt/cylc-7.8.1/bin/cylc-review -\end{lstlisting} -(Note the \lstinline=WSGIScriptAlias= determines the service URL under the -server root). - -And allow Apache access to the Cylc library: - -\begin{lstlisting} -# Directory access, in main Apache config file, e.g.: -# Red Hat Linux: /etc/httpd/conf/httpd.conf -# Ubuntu Linux: /etc/apache2/apache2.conf -# E.g. for /opt/cylc-7.8.1/ - - AllowOverride None - Require all granted - -\end{lstlisting} - -The host running the Cylc Review web service, and the service itself (or the -user that it runs as) must be able to view the \lstinline=~/cylc-run= directory -of all Cylc users. - -Use the web server log, e.g.\ \lstinline=/var/log/httpd/= or -\lstinline=/var/log/apache2/=, to debug problems. - - - -\subsection{Automated Tests} -\label{RTAST} - -The cylc test battery is primarily intended for developers to check that -changes to the source code don't break existing functionality. Note that -some test failures can be expected to result from suites timing out, -even if nothing is wrong, if you run too many tests in parallel. See -\lstinline=cylc test-battery --help=. - -\section{Cylc Terminology} - -\subsection{Jobs and Tasks} - -A {\em job} is a program or script that runs on a computer, and a {\em task} is -a workflow abstraction - a node in the suite dependency graph - that represents -a job. - -\subsection{Cycle Points} - -A {\em cycle point} is a particular date-time (or integer) point in a sequence -of date-time (or integer) points. Each cylc task has a private cycle point and -can advance independently to subsequent cycle points. It may sometimes be -convenient, however, to refer to the ``current cycle point'' of a suite (or the -previous or next one, etc.) with reference to a particular task, or in the -sense of all tasks instances that ``belong to'' a particular cycle point. But -keep in mind that different tasks may pass through the ``current cycle point'' -(etc.) at different times as the suite evolves. - -\section{Workflows For Cycling Systems} -\label{Workflows For Cycling Systems} - -A model run and associated processing may need to be cycled for the following -reasons: - -\begin{myitemize} - \item In real time forecasting systems, a new forecast may be initiated - at regular intervals when new real time data comes in. - \item It may be convenient (or necessary, e.g.\ due to batch scheduler - queue limits) to split single long model runs into many smaller chunks, - each with associated pre- and post-processing workflows. -\end{myitemize} - -Cylc provides two ways of constructing workflows for cycling systems: {\em cycling workflows} and {\em parameterized tasks}. - -\subsection{Cycling Workflows} -\label{Cycling Workflows} - -This is cylc's classic cycling mode as described in the Introduction. Each -instance of a cycling job is represented by a new instance of {\em the same -task}, with a new cycle point. The suite configuration defines patterns for -extending the workflow on the fly, so it can keep running indefinitely if -necessary. For example, to cycle \lstinline=model.exe= on a monthly sequence we -could define a single task \lstinline=model=, an initial cycle point, and a -monthly sequence. Cylc then generates the date-time sequence and creates a new -task instance for each cycle point as it comes up. Workflow dependencies are -defined generically with respect to the ``current cycle point'' of the tasks -involved. - -This is the only sensible way to run very large suites or operational suites -that need to continue cycling indefinitely. The cycling is configured with -standards-based ISO 8601 date-time {\em recurrence expressions}. Multiple -cycling sequences can be used at once in the same suite. See -Section~\ref{ConfiguringScheduling}. - -\subsection{Parameterized Tasks as a Proxy for Cycling} -\label{Parameterized Tasks as a Proxy for Cycling} - -It is also possible to run cycling jobs with a pre-defined static workflow in -which each instance of a cycling job is represented by {\em a different task}: -as far as the abstract workflow is concerned there is no cycling. The sequence -of tasks can be constructed efficiently, however, using cylc's built-in suite -parameters (\ref{Parameterized Cycling}) or explicit Jinja2 loops -(\ref{Jinja2}). - -For example, to run \lstinline=model.exe= 12 times on a monthly cycle we could -loop over an integer parameter \lstinline@R = 0, 1, 2, ..., 11@ to define tasks -\lstinline=model-R0, model-R1, model-R2, ...model-R11=, and the parameter -values could be multiplied by the interval \lstinline=P1M= (one month) to get -the start point for the corresponding model run. - -This method is only good for smaller workflows of finite duration because every -single task has to be mapped out in advance, and cylc has to be aware of all of -them throughout the entire run. Additionally Cylc's {\em cycling workflow} -capabilities (above) are more powerful, more flexible, and generally easier to -use (Cylc will generate the cycle point date-times for you, for instance), so -that is the recommended way to drive most cycling systems. - -The primary use for parameterized tasks in cylc is to generate ensembles and -other groups of related tasks at the same cycle point, not as a proxy for -cycling. - -\subsection{Mixed Cycling Workflows} - -For completeness we note that parameterized cycling can be used within a -cycling workflow. For example, in a daily cycling workflow long (daily) -model runs could be split into four shorter runs by parameterized cycling. -A simpler six-hourly cycling workflow should be considered first, however. - -\section{Global (Site, User) Configuration Files} -\label{SiteAndUserConfiguration} - -Cylc site and user global configuration files contain settings that affect all -suites. Some of these, such as the range of network ports used by cylc, -should be set at site level. Legal items, values, and system defaults are -documented in (\ref{SiteRCReference}). - -\lstset{language=transcript} -\begin{lstlisting} -# cylc site global config file -/etc/global.rc -\end{lstlisting} -Others, such as the preferred text editor for suite configurations, -can be overridden by users, -\lstset{language=transcript} -\begin{lstlisting} -# cylc user global config file -~/.cylc/$(cylc --version)/global.rc # e.g. ~/.cylc/7.7.0/global.rc -\end{lstlisting} - -The file \lstinline=/etc/global.rc.eg= contains instructions on how -to generate and install site and user global config files: -\lstinputlisting{../../../etc/global.rc.eg} - -%\pagebreak -\section{Tutorial} -\label{Tutorial} - -This section provides a hands-on tutorial introduction to basic cylc -functionality. - -\subsection{User Config File} - -Some settings affecting cylc's behaviour can be defined in site and user -{\em global config files}. For example, to choose the text editor invoked by -cylc on suite configurations: - -\lstset{language=suiterc} -\begin{lstlisting} -# $HOME/.cylc/$(cylc --version)/global.rc -[editors] - terminal = vim - gui = gvim -f -\end{lstlisting} - -\begin{myitemize} -\item For more on site and user global config files - see~\ref{SiteAndUserConfiguration} and~\ref{SiteRCReference}. -\end{myitemize} - -\subsubsection{Configure Environment on Job Hosts} -\label{Configure Environment on Job Hosts} - -See~\ref{Configure Site Environment on Job Hosts} for information. - -\subsection{User Interfaces} -\label{CUI} - -You should have access to the cylc command line (CLI) and graphical (GUI) user -interfaces once cylc has been installed as described in -Section~\ref{InstallCylc}. - -\subsubsection{Command Line Interface (CLI)} - -The command line interface is unified under a single top level -\lstinline=cylc= command that provides access to many sub-commands -and their help documentation. - -\lstset{language=transcript} -\begin{lstlisting} -$ cylc help # Top level command help. -$ cylc run --help # Example command-specific help. -\end{lstlisting} - -Command help transcripts are printed in~\ref{CommandReference} and are -available from the GUI Help menu. - -Cylc is {\em scriptable} - the error status returned by commands can be -relied on. - -\subsubsection{Graphical User Interface (GUI)} - -The cylc GUI covers the same functionality as the CLI, but it has more -sophisticated suite monitoring capability. It can start and stop suites, or -connect to suites that are already running; in either case, shutting down the -GUI does not affect the suite itself. - -\lstset{language=transcript} -\begin{lstlisting} -$ gcylc & # or: -$ cylc gui & # Single suite control GUI. -$ cylc gscan & # Multi-suite monitor GUI. -\end{lstlisting} - -Clicking on a suite in gscan, shown in Figure~\ref{fig-gscan}, opens a gcylc -instance for it. - -\subsection{Suite Configuration} - -Cylc suites are defined by extended-INI format \lstinline=suite.rc= -files (the main file format extension is section nesting). These reside -in {\em suite configuration directories} that may also contain a -\lstinline=bin= directory and any other suite-related files. - -\begin{myitemize} -\item For more on the suite configuration file format, see~\ref{SuiteDefinition} - and~\ref{SuiteRCReference}. -\end{myitemize} - -\subsection{Suite Registration} - -Suite registration creates a run directory (under \lstinline=~/cylc-run/= by -default) and populates it with authentication files and a symbolic link to a -suite configuration directory. Cylc commands that parse suites can take -the file path or the suite name as input. Commands that interact with running -suites have to target the suite by name. - -\lstset{language=transcript} -\begin{lstlisting} -# Target a suite by file path: -$ cylc validate /path/to/my/suite/suite.rc -$ cylc graph /path/to/my/suite/suite.rc - -# Register a suite: -$ cylc register my.suite /path/to/my/suite/ - -# Target a suite by name: -$ cylc graph my.suite -$ cylc validate my.suite -$ cylc run my.suite -$ cylc stop my.suite -# etc. -\end{lstlisting} - -\subsection{Suite Passphrases} -\label{tutPassphrases} - -Registration (above) also generates a suite-specific passphrase file under -\lstinline=.service/= in the suite run directory. It is loaded by the suite -server program at start-up and used to authenticate connections from client -programs. - -Possession of a suite's passphrase file gives full control over it. -Without it, the information available to a client is determined by the suite's -public access privilege level. - -For more on connection authentication, suite passphrases, and public access, -see~\ref{ConnectionAuthentication}. - - -\subsection{Import The Example Suites} -\label{ImportTheExampleSuites} - -Run the following command to copy cylc's example suites and register them for -your own use: - -\lstset{language=transcript} -\begin{lstlisting} -$ cylc import-examples /tmp -\end{lstlisting} - -\subsection{Rename The Imported Tutorial Suites} - -Suites can be renamed by simply renaming (i.e.\ moving) their run directories. -Make the tutorial suite names shorter, and print their locations with -\lstinline=cylc print=: - -\begin{lstlisting} -$ mv ~/cylc-run/examples/$(cylc --version)/tutorial ~/cylc-run/tut -$ cylc print -ya tut -tut/oneoff/jinja2 | /tmp/cylc-examples/7.0.0/tutorial/oneoff/jinja2 -tut/cycling/two | /tmp/cylc-examples/7.0.0/tutorial/cycling/two -tut/cycling/three | /tmp/cylc-examples/7.0.0/tutorial/cycling/three -# ... -\end{lstlisting} - -See \lstinline=cylc print --help= for other display options. - -\subsection{Suite Validation} - -Suite configurations can be validated to detect syntax (and other) errors: - -\lstset{language=transcript} -\begin{lstlisting} -# pass: -$ cylc validate tut/oneoff/basic -Valid for cylc-6.0.0 -$ echo $? -0 -# fail: -$ cylc validate my/bad/suite -Illegal item: [scheduling]special tusks -$ echo $? -1 -\end{lstlisting} - -\subsection{Hello World in Cylc} - -\hilight{ suite: \lstinline=tut/oneoff/basic= } -\vspace{3mm} - -Here's the traditional {\em Hello World} program rendered as a cylc -suite: -\lstset{language=suiterc} -\lstinputlisting{../../../etc/examples/tutorial/oneoff/basic/suite.rc} -\lstset{language=transcript} - -Cylc suites feature a clean separation of scheduling configuration, -which determines {\em when} tasks are ready to run; and runtime -configuration, which determines {\em what} to run (and {\em where} and -{\em how} to run it) when a task is ready. In this example the -\lstinline=[scheduling]= section defines a single task called -\lstinline=hello= that triggers immediately when the suite starts -up. When the task finishes the suite shuts down. That this is a -{\em dependency graph} will be more obvious when more tasks are added. -Under the \lstinline=[runtime]= section the -\lstinline=script= item defines a simple inlined -implementation for \lstinline=hello=: it sleeps for ten seconds, -then prints \lstinline=Hello World!=, and exits. This ends up in a {\em -job script} generated by cylc to encapsulate the task (below) and, -thanks to some defaults designed to allow quick -prototyping of new suites, it is submitted to run as a background job on -the suite host. In fact cylc even provides a default task implementation -that makes the entire \lstinline=[runtime]= section technically optional: -\lstset{language=suiterc} -\lstinputlisting{../../../etc/examples/tutorial/oneoff/minimal/suite.rc} -\lstset{language=transcript} -(the resulting {\em dummy task} just prints out some identifying -information and exits). - -\subsection{Editing Suites} - -The text editor invoked by Cylc on suite configurations is determined -by cylc site and user global config files, as shown above in~\ref{CUI}. -Check that you have renamed the tutorial examples suites as described -just above and open the {\em Hello World} suite in your text editor: -\lstset{language=transcript} -\begin{lstlisting} -$ cylc edit tut/oneoff/basic # in-terminal -$ cylc edit -g tut/oneoff/basic & # or GUI -\end{lstlisting} -Alternatively, start gcylc on the suite: -\lstset{language=transcript} -\begin{lstlisting} -$ gcylc tut/oneoff/basic & -\end{lstlisting} -and choose {\em Suite } \textrightarrow {\em Edit} from the menu. - -The editor will be invoked from within the suite configuration directory for easy -access to other suite files (in this case there are none). There are syntax -highlighting control files for several text editors under -\lstinline=/etc/syntax/=; see in-file comments for installation -instructions. - -\subsection{Running Suites} -\label{RunningSuitesCLI} - -\subsubsection{CLI} -Run \lstinline=tut/oneoff/basic= using the \lstinline=cylc run= command. -As a suite runs detailed timestamped information is written to a {\em suite -log} and progress can be followed with cylc's suite monitoring tools (below). -By default a suite server program daemonizes after printing a short message so -that you can exit the terminal or even log out without killing the suite: - -\lstset{language=transcript} -\begin{lstlisting} -$ cylc run tut/oneoff/basic - ._. - | | The Cylc Suite Engine [7.0.0] -._____._. ._| |_____. Copyright (C) 2008-2018 NIWA & British Crown (Met Office) & Contributors. -| .___| | | | | .___| _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ -| !___| !_! | | !___. This program comes with ABSOLUTELY NO WARRANTY; -!_____!___. |_!_____! see `cylc warranty`. It is free software, you - .___! | are welcome to redistribute it under certain - !_____! conditions; see `cylc conditions`. - -*** listening on https://nwp-1:43027/ *** - -To view suite server program contact information: - $ cylc get-suite-contact tut/oneoff/basic - -Other ways to see if the suite is still running: - $ cylc scan -n '\btut/oneoff/basic\b' nwp-1 - $ cylc ping -v --host=nwp-1 tut/oneoff/basic - $ ps h -opid,args 123456 # on nwp-1 - -\end{lstlisting} - -If you're quick enough (this example only takes 10-15 seconds to run) the -\lstinline=cylc scan= command will detect the running suite: -\begin{lstlisting} -$ cylc scan -tut/oneoff/basic oliverh@nwp-1:43027 -\end{lstlisting} - -Note you can use the \lstinline=--no-detach= and \lstinline=--debug= options -to \lstinline=cylc-run= to prevent the suite from daemonizing (i.e.\ to make -it stay attached to your terminal until it exits). - -When a task is ready cylc generates a {\em job script} to run it, by -default as a background jobs on the suite host. The job process ID is -captured, and job output is directed to log files in standard -locations under the suite run directory. - -Log file locations relative to the suite run directory look like -\lstinline=job/1/hello/01/= where the first digit is the {\em cycle point} of -the task \lstinline=hello= (for non-cycling tasks this is just `1'); and the -final \lstinline=01= is the {\em submit number} (so that job logs do not get -overwritten if a job is resubmitted for any reason). - -The suite shuts down automatically once all tasks have succeeded. - -\subsubsection{GUI} - -The cylc GUI can start and stop suites, or (re)connect to suites that -are already running: -\begin{lstlisting} -$ cylc gui tut/oneoff/basic & -\end{lstlisting} -Use the tool bar {\em Play} button, or the {\em Control} -\textrightarrow {\em Run} menu item, to run the suite again. -You may want to alter the suite configuration slightly to make the task -take longer to run. Try right-clicking on the \lstinline=hello= task -to view its output logs. The relative merits of the three {\em suite -views} - dot, text, and graph - will be more apparent later when we -have more tasks. Closing the GUI does not affect the suite itself. - -\subsection{Remote Suites} -\label{RemoteSuites} - -Suites can run on {\em localhost} or on a {\em remote} host. - -To start up a suite on a given host, specify it explicitly via the -\lstinline@--host=@ option to a \lstinline=run= or \lstinline=restart= -command. - -Otherwise, Cylc selects the best host to start up on from allowed -\lstinline=run hosts= as specified in the global config under -\lstinline=[suite servers]=, which defaults to localhost. Should there be -more than one allowed host set, the {\em most suitable} is determined -according to the settings specified under \lstinline=[[run host select]]=, -namely exclusion of hosts not meeting suitability {\em thresholds}, if -provided, then ranking according to the given {\em rank} method. - -\subsection{Discovering Running Suites} - -Suites that are currently running can be detected with command line or -GUI tools: -\begin{lstlisting} -# list currently running suites and their port numbers: -$ cylc scan -tut/oneoff/basic oliverh@nwp-1:43001 - -# GUI summary view of running suites: -$ cylc gscan & -\end{lstlisting} - -The scan GUI is shown in Figure~\ref{fig-gscan}; clicking on a suite in it -opens gcylc. - -\subsection{Task Identifiers} - -At run time, task instances are identified by {\em name}, which is -determined entirely by the suite configuration, and a {\em cycle point} which is -usually a date-time or an integer: -\lstset{language=transcript} -\begin{lstlisting} -foo.20100808T00Z # a task with a date-time cycle point -bar.1 # a task with an integer cycle point (could be non-cycling) -\end{lstlisting} -Non-cycling tasks usually just have the cycle point \lstinline=1=, but this -still has to be used to target the task instance with cylc commands. - -\subsection{Job Submission: How Tasks Are Executed} - -\hilight{ suite: \lstinline=tut/oneoff/jobsub= } -\vspace{3mm} - -Task {\em job scripts} are generated by cylc to wrap the task implementation -specified in the suite configuration (environment, script, etc.) in -error trapping code, messaging calls to report task progress back to the suite -server program, and so forth. Job scripts are written to the {\em suite job log -directory} where they can be viewed alongside the job output logs. They -can be accessed at run time by right-clicking on the task in the cylc GUI, or -printed to the terminal: -\lstset{language=transcript} -\begin{lstlisting} -$ cylc cat-log tut/oneoff/basic hello.1 -\end{lstlisting} -This command can also print the suite log (and stdout and stderr for suites -in daemon mode) and task stdout and stderr logs (see -\lstinline=cylc cat-log --help=). -A new job script can also be generated on the fly for inspection: -\lstset{language=transcript} -\begin{lstlisting} -$ cylc jobscript tut/oneoff/basic hello.1 -\end{lstlisting} - -Take a look at the job script generated for \lstinline=hello.1= during -the suite run above. The custom scripting should be clearly visible -toward the bottom of the file. - -The \lstinline=hello= task in the first tutorial suite defaults to -running as a background job on the suite host. To submit it to the Unix -\lstinline=at= scheduler instead, configure its job submission settings -as in \lstinline=tut/oneoff/jobsub=: - -\lstset{language=suiterc} -\begin{lstlisting} -[runtime] - [[hello]] - script = "sleep 10; echo Hello World!" - [[[job]]] - batch system = at -\end{lstlisting} - -Run the suite again after checking that \lstinline=atd= is running on your -system. - -Cylc supports a number of different batch systems. Tasks -submitted to external batch queuing systems like \lstinline=at=, -\lstinline=PBS=, \lstinline=SLURM=, \lstinline=Moab=, or -\lstinline=LoadLeveler=, are displayed as {\em submitted} in the cylc GUI until -they start executing. - -\begin{myitemize} -\item For more on task job scripts, see~\ref{JobScripts}. -\item For more on batch systems, see~\ref{AvailableMethods}. -\end{myitemize} - -\subsection{Locating Suite And Task Output} - -If the \lstinline=--no-detach= option is not used, suite stdout and -stderr will be directed to the suite run directory along with the -time-stamped suite log file, and task job scripts and job logs -(task stdout and stderr). The default suite run directory location is -\lstinline=$HOME/cylc-run=: - -\lstset{language=transcript} -\begin{lstlisting} -$ tree $HOME/cylc-run/tut/oneoff/basic/ -|-- .service # location of run time service files -| |-- contact # detail on how to contact the running suite -| |-- db # private suite run database -| |-- passphrase # passphrase for client authentication -| |-- source # symbolic link to source directory -| |-- ssl.cert # SSL certificate for the suite server -| `-- ssl.pem # SSL private key -|-- cylc-suite.db # back compat symlink to public suite run database -|-- share # suite share directory (not used in this example) -|-- work # task work space (sub-dirs are deleted if not used) -| `-- 1 # task cycle point directory (or 1) -| `-- hello # task work directory (deleted if not used) -|-- log # suite log directory -| |-- db # public suite run database -| |-- job # task job log directory -| | `-- 1 # task cycle point directory (or 1) -| | `-- hello # task name -| | |-- 01 # task submission number -| | | |-- job # task job script -| | | `-- job-activity.log # task job activity log -| | | |-- job.err # task stderr log -| | | |-- job.out # task stdout log -| | | `-- job.status # task status file -| | `-- NN -> 01 # symlink to latest submission number -| `-- suite # suite server log directory -| |-- err # suite server stderr log (daemon mode only) -| |-- out # suite server stdout log (daemon mode only) -| `-- log # suite server event log (timestamped info) -\end{lstlisting} -The suite run database files, suite environment file, -and task status files are used internally by cylc. Tasks execute in -private \lstinline=work/= directories that are deleted automatically -if empty when the task finishes. The suite -\lstinline=share/= directory is made available to all tasks (by -\lstinline=$CYLC_SUITE_SHARE_DIR=) as a common share space. The task submission -number increments from 1 if a task retries; this is used as a sub-directory of -the log tree to avoid overwriting log files from earlier job submissions. - -The top level run directory location can be changed in site and user -config files if necessary, and the suite share and work locations can be -configured separately because of the potentially larger disk space -requirement. - -Task job logs can be viewed by right-clicking on tasks in the gcylc -GUI (so long as the task proxy is live in the suite), manually -accessed from the log directory (of course), or printed to the terminal -with the \lstinline=cylc cat-log= command: -\lstset{language=transcript} -\begin{lstlisting} -# suite logs: -$ cylc cat-log tut/oneoff/basic # suite event log -$ cylc cat-log -o tut/oneoff/basic # suite stdout log -$ cylc cat-log -e tut/oneoff/basic # suite stderr log -# task logs: -$ cylc cat-log tut/oneoff/basic hello.1 # task job script -$ cylc cat-log -o tut/oneoff/basic hello.1 # task stdout log -$ cylc cat-log -e tut/oneoff/basic hello.1 # task stderr log -\end{lstlisting} -\begin{myitemize} - \item For a web-based interface to suite and task logs (and much more), - see {\em Rose} in~\ref{SuiteStorageEtc}. - \item For more on environment variables supplied to tasks, - such as \lstinline=$CYLC_SUITE_SHARE_DIR=, see~\ref{TaskExecutionEnvironment}. -\end{myitemize} - -\subsection{Viewing Suite Logs in a Web Browser: Cylc Review} -\label{ViewingSuiteLogsCylcReview} - -The Cylc Review web service displays suite job logs and other information in -web pages, as shown in Figure~\ref{fig-review-screenshot}. It can run under a -WSGI server (e.g.\ Apache with \lstinline=mod_wsgi=) as a service for all -users, or as an ad hoc service under your own user account. - -If a central Cylc Review service has been set up at your site (e.g.\ as -described in~\ref{ConfiguringCylcReviewApache}) the URL will typically be -something like \lstinline=http:///cylc-review/=. - - -\begin{figure} - \begin{center} - \includegraphics[width=0.5\textwidth]{graphics/png/orig/cylc-review-screenshot.png} - \end{center} - \caption{Screenshot of a Cylc Review web page} -\label{fig-review-screenshot} -\end{figure} - -Otherwise, to start an ad hoc Cylc Review service to view your own suite logs -(or those of others, if you have read access to them), run: - -\lstinline=setsid cylc review start 0/dev/null 2>&1 &= - -The service should start at \lstinline=http://:8080= (the port number -can optionally be set on the command line). Service logs are written to -\lstinline=~/.cylc/cylc-review*=. Run \lstinline=cylc review status= to view -status information, and \lstinline=cylc review stop= to stop the service. - -\subsection{Remote Tasks} -\label{RemoteTasks} - -\hilight{ suite: \lstinline=tut/oneoff/remote= } -\vspace{3mm} - -The \lstinline=hello= task in the first two tutorial suites defaults to -running on the suite host~\ref{RemoteSuites}. To make it run on a different host instead -change its runtime configuration as in \lstinline=tut/oneoff/remote=: -\lstset{language=suiterc} -\begin{lstlisting} -[runtime] - [[hello]] - script = "sleep 10; echo Hello World!" - [[[remote]]] - host = server1.niwa.co.nz -\end{lstlisting} - -In general, a {\em task remote} is a user account, other than the account -running the suite server program, where a task job is submitted to run. It can -be on the same machine running the suite or on another machine. - -A task remote account must satisfy several requirements: -\begin{myitemize} - -\item Non-interactive ssh must be enabled from the account running the suite -server program to the account for submitting (and managing) the remote task job. - -\item Network settings must allow communication {\em back} from the remote task -job to the suite, either by network ports or ssh, unless the last-resort one -way {\em task polling} communication method is used. - -\item Cylc must be installed and runnable on the task remote account. Other -software dependencies like graphviz are not required there. - -\item Any files needed by a remote task must be installed on the task -host. In this example there is nothing to install because the -implementation of \lstinline=hello= is inlined in the suite configuration -and thus ends up entirely contained within the task job script. - -\end{myitemize} - -If your username is different on the task host, you can add a \lstinline=User= -setting for the relevant host in your \lstinline=~/.ssh/config=. -If you are unable to do so, the \lstinline=[[[remote]]]= section also supports an -\lstinline@owner=username@ item. - -If you configure a task account according to the requirements cylc will invoke -itself on the remote account (with a login shell by default) to create log -directories, transfer any essential service files, send the task job script -over, and submit it to run there by the configured batch system. - -Remote task job logs are saved to the suite run directory on the task remote, -not on the account running the suite. They can be retrieved by right-clicking -on the task in the GUI, or to have cylc pull them back to the suite account -automatically do this: - -\lstset{language=suiterc} -\begin{lstlisting} -[runtime] - [[hello]] - script = "sleep 10; echo Hello World!" - [[[remote]]] - host = server1.niwa.co.nz - retrieve job logs = True -\end{lstlisting} - -This suite will attempt to \lstinline=rsync= job logs from the remote -host each time a task job completes. - -Some batch systems have considerable delays between the time when the job -completes and when it writes the job logs in its normal location. If this is -the case, you can configure an initial delay and retry delays for job log -retrieval by setting some delays. E.g.: - -\lstset{language=suiterc} -\begin{lstlisting} -[runtime] - [[hello]] - script = "sleep 10; echo Hello World!" - [[[remote]]] - host = server1.niwa.co.nz - retrieve job logs = True - # Retry after 10 seconds, 1 minute and 3 minutes - retrieve job logs retry delays = PT10S, PT1M, PT3M -\end{lstlisting} - -Finally, if the disk space of the suite host is limited, you may want to set -\lstinline@[[[remote]]]retrieve job logs max size=SIZE@. The value of SIZE can -be anything that is accepted by the \lstinline@--max-size=SIZE@ option of the -\lstinline=rsync= command. E.g.: - -\lstset{language=suiterc} -\begin{lstlisting} -[runtime] - [[hello]] - script = "sleep 10; echo Hello World!" - [[[remote]]] - host = server1.niwa.co.nz - retrieve job logs = True - # Don't get anything bigger than 10MB - retrieve job logs max size = 10M -\end{lstlisting} - -It is worth noting that cylc uses the existence of a job's \lstinline=job.out= -or \lstinline=job.err= in the local file system to indicate a successful job -log retrieval. If \lstinline=retrieve job logs max size=SIZE= is set and both -\lstinline=job.out= and \lstinline=job.err= are bigger than \lstinline=SIZE= -then cylc will consider the retrieval as failed. If retry delays are specified, -this will trigger some useless (but harmless) retries. If this occurs -regularly, you should try the following: - -\begin{myitemize} -\item Reduce the verbosity of STDOUT or STDERR from the task. -\item Redirect the verbosity from STDOUT or STDERR to an alternate log file. -\item Adjust the size limit with tolerance to the expected size of STDOUT or STDERR. -\end{myitemize} - -\begin{myitemize} -\item For more on remote tasks see~\ref{RunningTasksOnARemoteHost} - -\item For more on task communications, see~\ref{TaskComms}. - -\item For more on suite passphrases and authentication, - see~\ref{tutPassphrases} and~\ref{ConnectionAuthentication}. -\end{myitemize} - - -\subsection{Task Triggering} - -\hilight{ suite: \lstinline=tut/oneoff/goodbye= } -\vspace{3mm} - -To make a second task called \lstinline=goodbye= trigger after -\lstinline=hello= finishes successfully, return to the original -example, \lstinline=tut/oneoff/basic=, and change the suite graph -as in \lstinline=tut/oneoff/goodbye=: -\lstset{language=suiterc} -\begin{lstlisting} -[scheduling] - [[dependencies]] - graph = "hello => goodbye" -\end{lstlisting} -or to trigger it at the same time as \lstinline=hello=, -\lstset{language=suiterc} -\begin{lstlisting} -[scheduling] - [[dependencies]] - graph = "hello & goodbye" -\end{lstlisting} -and configure the new task's behaviour under \lstinline=[runtime]=: -\lstset{language=suiterc} -\begin{lstlisting} -[runtime] - [[goodbye]] - script = "sleep 10; echo Goodbye World!" -\end{lstlisting} - -Run \lstinline=tut/oneoff/goodbye= and check the output from the new -task: -\lstset{language=transcript} -\begin{lstlisting} -$ cat ~/cylc-run/tut/oneoff/goodbye/log/job/1/goodbye/01/job.out - # or -$ cylc cat-log -o tut/oneoff/goodbye goodbye.1 -JOB SCRIPT STARTING -cylc (scheduler - 2014-08-14T15:09:30+12): goodbye.1 started at 2014-08-14T15:09:30+12 -cylc Suite and Task Identity: - Suite Name : tut/oneoff/goodbye - Suite Host : oliverh-34403dl.niwa.local - Suite Port : 43001 - Suite Owner : oliverh - Task ID : goodbye.1 - Task Host : nwp-1 - Task Owner : oliverh - Task Try No.: 1 - -Goodbye World! -cylc (scheduler - 2014-08-14T15:09:40+12): goodbye.1 succeeded at 2014-08-14T15:09:40+12 -JOB SCRIPT EXITING (TASK SUCCEEDED) -\end{lstlisting} - -\subsubsection{Task Failure And Suicide Triggering} - -\hilight{ suite: \lstinline=tut/oneoff/suicide= } -\vspace{3mm} - -Task names in the graph string can be qualified with a state indicator -to trigger off task states other than success: -\lstset{language=suiterc} -\lstset{language=suiterc} -\begin{lstlisting} - graph = """ - a => b # trigger b if a succeeds - c:submit => d # trigger d if c submits - e:finish => f # trigger f if e succeeds or fails - g:start => h # trigger h if g starts executing - i:fail => j # trigger j if i fails - """ -\end{lstlisting} - -A common use of this is to automate recovery from known modes of failure: -\lstset{language=suiterc} -\begin{lstlisting} - graph = "goodbye:fail => really_goodbye" -\end{lstlisting} -i.e.\ if task \lstinline=goodbye= fails, trigger another task that -(presumably) really says goodbye. - -Failure triggering generally requires use of {\em suicide triggers} as -well, to remove the recovery task if it isn't required (otherwise it -would hang about indefinitely in the waiting state): -\begin{lstlisting} -[scheduling] - [[dependencies]] - graph = """hello => goodbye - goodbye:fail => really_goodbye - goodbye => !really_goodbye # suicide""" -\end{lstlisting} -This means if \lstinline=goodbye= fails, trigger -\lstinline=really_goodbye=; and otherwise, if \lstinline=goodbye= -succeeds, remove \lstinline=really_goodbye= from the suite. - -Try running \lstinline=tut/oneoff/suicide=, which also configures -the \lstinline=hello= task's runtime to make it fail, to see how this -works. -\begin{myitemize} - \item For more on suite dependency graphs see~\ref{ConfiguringScheduling}. - \item For more on task triggering see~\ref{TriggerTypes}. -\end{myitemize} - -\subsection{Runtime Inheritance} - -\hilight{ suite: \lstinline=tut/oneoff/inherit= } -\vspace{3mm} - -The \lstinline=[runtime]= section is actually a {\em multiple -inheritance} hierarchy. Each subsection is a {\em namespace} that -represents a task, or if it is inherited by other namespaces, a {\em -family}. This allows common configuration to be factored out of related -tasks very efficiently. -\lstset{language=suiterc} -\lstinputlisting{../../../etc/examples/tutorial/oneoff/inherit/suite.rc} -The \lstinline=[root]= namespace provides defaults for all tasks in the suite. -Here both tasks inherit \lstinline=script= from \lstinline=root=, which they -customize with different values of the environment variable -\lstinline=$GREETING=. Note that inheritance from \lstinline=root= is -implicit; from other parents an explicit \lstinline@inherit = PARENT@ -is required, as shown below. - -\begin{myitemize} -\item For more on runtime inheritance, see~\ref{NIORP}. -\end{myitemize} - -\subsection{Triggering Families} - -\hilight{ suite: \lstinline=tut/oneoff/ftrigger1= } -\vspace{3mm} - -Task families defined by runtime inheritance can also be used as -shorthand in graph trigger expressions. To see this, consider two -``greeter'' tasks that trigger off another task \lstinline=foo=: -\lstset{language=suiterc} -\begin{lstlisting} -[scheduling] - [[dependencies]] - graph = "foo => greeter_1 & greeter_2" -\end{lstlisting} -If we put the common greeting functionality of \lstinline=greeter_1= -and \lstinline=greeter_2= into a special \lstinline=GREETERS= family, -the graph can be expressed more efficiently like this: -\lstset{language=suiterc} -\begin{lstlisting} -[scheduling] - [[dependencies]] - graph = "foo => GREETERS" -\end{lstlisting} -i.e.\ if \lstinline=foo= succeeds, trigger all members of -\lstinline=GREETERS= at once. Here's the full suite with runtime -hierarchy shown: -\lstset{language=suiterc} -\lstinputlisting{../../../etc/examples/tutorial/oneoff/ftrigger1/suite.rc} - -(Note that we recommend given ALL-CAPS names to task families to help -distinguish them from task names. However, this is just a convention). - -Experiment with the \lstinline=tut/oneoff/ftrigger1= suite to see -how this works. - -\subsection{Triggering Off Of Families} - -\hilight{ suite: \lstinline=tut/oneoff/ftrigger2= } -\vspace{3mm} - -Tasks (or families) can also trigger {\em off} other families, but -in this case we need to specify what the trigger means in terms of -the upstream family members. Here's how to trigger another task -\lstinline=bar= if all members of \lstinline=GREETERS= succeed: -\lstset{language=suiterc} -\begin{lstlisting} -[scheduling] - [[dependencies]] - graph = """foo => GREETERS - GREETERS:succeed-all => bar""" -\end{lstlisting} -Verbose validation in this case reports: -\lstset{language=transcript} -\begin{lstlisting} -$ cylc val -v tut/oneoff/ftrigger2 -... -Graph line substitutions occurred: - IN: GREETERS:succeed-all => bar - OUT: greeter_1:succeed & greeter_2:succeed => bar -... -\end{lstlisting} -Cylc ignores family member qualifiers like \lstinline=succeed-all= on -the right side of a trigger arrow, where they don't make sense, to -allow the two graph lines above to be combined in simple cases: -\lstset{language=suiterc} -\begin{lstlisting} -[scheduling] - [[dependencies]] - graph = "foo => GREETERS:succeed-all => bar" -\end{lstlisting} - -Any task triggering status qualified by \lstinline=-all= or -\lstinline=-any=, for the members, can be used with a family trigger. -For example, here's how to trigger \lstinline=bar= if all members -of \lstinline=GREETERS= finish (succeed or fail) and any of them -succeed: -\lstset{language=suiterc} -\begin{lstlisting} -[scheduling] - [[dependencies]] - graph = """foo => GREETERS - GREETERS:finish-all & GREETERS:succeed-any => bar""" -\end{lstlisting} -(use of \lstinline@GREETERS:succeed-any@ by itself here would trigger -\lstinline=bar= as soon as any one member of \lstinline=GREETERS= -completed successfully). Verbose validation now begins to show how -family triggers can simplify complex graphs, even for this tiny -two-member family: -\lstset{language=transcript} -\begin{lstlisting} -$ cylc val -v tut/oneoff/ftrigger2 -... -Graph line substitutions occurred: - IN: GREETERS:finish-all & GREETERS:succeed-any => bar - OUT: ( greeter_1:succeed | greeter_1:fail ) & \ - ( greeter_2:succeed | greeter_2:fail ) & \ - ( greeter_1:succeed | greeter_2:succeed ) => bar -... -\end{lstlisting} - -Experiment with \lstinline=tut/oneoff/ftrigger2= to see how this -works. - -\begin{myitemize} -\item For more on family triggering, see~\ref{FamilyTriggers}. -\end{myitemize} - -\subsection{Suite Visualization} - -\lstset{language=suiterc} -You can style dependency graphs with an optional -\lstinline=[visualization]= section, as shown in -\lstinline=tut/oneoff/ftrigger2=: -\lstset{language=suiterc} -\begin{lstlisting} -[visualization] - default node attributes = "style=filled" - [[node attributes]] - foo = "fillcolor=#6789ab", "color=magenta" - GREETERS = "fillcolor=#ba9876" - bar = "fillcolor=#89ab67" -\end{lstlisting} - -To display the graph in an interactive viewer: -\lstset{language=transcript} -\begin{lstlisting} -$ cylc graph tut/oneoff/ftrigger2 & # dependency graph -$ cylc graph -n tut/oneoff/ftrigger2 & # runtime inheritance graph -\end{lstlisting} -It should look like Figure~\ref{fig-tut-hello-multi} (with the -GREETERS family node expanded on the right). -\begin{figure} - \begin{center} - \includegraphics[height=0.3\textheight]{graphics/png/orig/tut-hello-multi-1.png} - \hspace{20mm} - \includegraphics[height=0.3\textheight]{graphics/png/orig/tut-hello-multi-2.png} - \hspace{20mm} - \includegraphics[height=0.3\textheight]{graphics/png/orig/tut-hello-multi-3.png} - \end{center} - \caption{The {\em tut/oneoff/ftrigger2} dependency and runtime inheritance graphs} -\label{fig-tut-hello-multi} -\end{figure} - -Graph styling can be applied to entire families at once, and custom -``node groups'' can also be defined for non-family groups. - - -\subsection{External Task Scripts} - -\hilight{ suite: \lstinline=tut/oneoff/external= } -\vspace{3mm} - -The tasks in our examples so far have all had inlined implementation, in -the suite configuration, but real tasks often need to call external -commands, scripts, or executables. To try this, let's return to the -basic Hello World suite and cut the implementation of the task -\lstinline=hello= out to a file \lstinline=hello.sh= in the suite -bin directory: -\lstset{language=bash} -\lstinputlisting{../../../etc/examples/tutorial/oneoff/external/bin/hello.sh} -Make the task script executable, and change the \lstinline=hello= task -runtime section to invoke it: -\lstset{language=suiterc} -\lstinputlisting{../../../etc/examples/tutorial/oneoff/external/suite.rc} - -If you run the suite now the new greeting from the external task script -should appear in the \lstinline=hello= task stdout log. This works -because cylc automatically adds the suite bin directory to -\lstinline=$PATH= in the environment passed to tasks via their job -scripts. To execute scripts (etc.) located elsewhere you can -refer to the file by its full file path, or set \lstinline=$PATH= -appropriately yourself (this could be done via -\lstinline=$HOME/.profile=, which is sourced at the top of the task job -script, or in the suite configuration itself). - -Note the use of \lstinline=set -e= above to make the script abort on -error. This allows the error trapping code in the task job script to -automatically detect unforeseen errors. - -\subsection{Cycling Tasks} - -\hilight{ suite: \lstinline=tut/cycling/one= } -\vspace{3mm} - -So far we've considered non-cycling tasks, which finish without spawning -a successor. - -Cycling is based around iterating through date-time or integer sequences. A -cycling task may run at each cycle point in a given sequence (cycle). For -example, a sequence might be a set of date-times every 6 hours starting from a -particular date-time. A cycling task may run for each date-time item (cycle -point) in that sequence. - -There may be multiple instances of this type of task running in parallel, if -the opportunity arises and their dependencies allow it. Alternatively, a -sequence can be defined with only one valid cycle point - in that case, a task -belonging to that sequence may only run once. - -Open the \lstinline=tut/cycling/one= suite: -\lstset{language=suiterc} -\lstinputlisting{../../../etc/examples/tutorial/cycling/one/suite.rc} -The difference between cycling and non-cycling suites is all in the -\lstinline=[scheduling]= section, so we will leave the -\lstinline=[runtime]= section alone for now (this will result in -cycling dummy tasks). Note that the graph is now defined under a new -section heading that makes each task under it have a succession of cycle points -ending in $00$ or $12$ hours, between specified initial and final cycle -points (or indefinitely if no final cycle point is given), as shown in -Figure~\ref{fig-tut-one}. - -\begin{figure} - \begin{center} - %Q Image out of date now - \includegraphics[width=0.5\textwidth]{graphics/png/orig/tut-one.png} - \end{center} - \caption{The \lstinline=tut/cycling/one= suite} -\label{fig-tut-one} -\end{figure} - -\lstset{language=transcript} - -If you run this suite instances of \lstinline=foo= will spawn in parallel out -to the {\em runahead limit}, and each \lstinline=bar= will trigger off the -corresponding instance of \lstinline=foo= at the same cycle point. The -runahead limit, which defaults to a few cycles but is configurable, prevents -uncontrolled spawning of cycling tasks in suites that are not constrained by -clock triggers in real time operation. - -Experiment with \lstinline=tut/cycling/one= to see how cycling tasks work. - -\subsubsection{ISO 8601 Date-Time Syntax} - -The suite above is a very simple example of a cycling date-time workflow. More -generally, cylc comprehensively supports the ISO 8601 standard for date-time -instants, intervals, and sequences. Cycling graph sections can be specified -using full ISO 8601 recurrence expressions, but these may be simplified -by assuming context information from the suite - namely initial and final cycle -points. One form of the recurrence syntax looks like -\lstinline=Rn/start-date-time/period= (\lstinline=Rn= means run -\lstinline=n= times). In the example above, if the initial cycle point -is always at 00 or 12 hours then \lstinline=[[[T00,T12]]]= could be -written as \lstinline=[[[PT12H]]]=, which is short for -\lstinline=[[[R/initial-cycle-point/PT12H/]]]= - i.e.\ run every 12 hours -indefinitely starting at the initial cycle point. It is possible to add -constraints to the suite to only allow initial cycle points at 00 or 12 hours -e.g. - -\lstset{language=suiterc} -\begin{lstlisting} -[scheduling] - initial cycle point = 20130808T00 - initial cycle point constraints = T00, T12 -\end{lstlisting} -\lstset{language=transcript} - -\begin{myitemize} - %Q Runahead factor now - \item For a comprehensive description of ISO 8601 based date-time cycling, - see~\ref{AdvancedCycling} - \item For more on runahead limiting in cycling suites, - see~\ref{RunaheadLimit}. -\end{myitemize} - -\subsubsection{Inter-Cycle Triggers} -\label{TutInterCyclePointTriggers} - -\hilight{ suite: \lstinline=tut/cycling/two= } -\vspace{3mm} - -The \lstinline=tut/cycling/two= suite adds inter-cycle dependence -to the previous example: -\begin{lstlisting} -[scheduling] - [[dependencies]] - # Repeat with cycle points of 00 and 12 hours every day: - [[[T00,T12]]] - graph = "foo[-PT12H] => foo => bar" -\end{lstlisting} -For any given cycle point in the sequence defined by the -cycling graph section heading, \lstinline=bar= triggers off -\lstinline=foo= as before, but now \lstinline=foo= triggers off its own -previous instance \lstinline=foo[-PT12H]=. Date-time offsets in -inter-cycle triggers are expressed as ISO 8601 intervals (12 hours -in this case). Figure~\ref{fig-tut-two} shows how this connects the cycling -graph sections together. -\begin{figure} - \begin{center} - \includegraphics[width=0.5\textwidth]{graphics/png/orig/tut-two.png} - \end{center} - \caption{The \lstinline=tut/cycling/two= suite} -\label{fig-tut-two} -\end{figure} - -Experiment with this suite to see how inter-cycle triggers work. -Note that the first instance of \lstinline=foo=, at suite start-up, will -trigger immediately in spite of its inter-cycle trigger, because cylc -ignores dependence on points earlier than the initial cycle point. -However, the presence of an inter-cycle trigger usually implies something -special has to happen at start-up. If a model depends on its own previous -instance for restart files, for example, then some special process has to -generate the initial set of restart files when there is no previous cycle point -to do it. The following section shows one way to handle this in cylc suites. - -\subsubsection{Initial Non-Repeating (R1) Tasks} -\label{initial-non-repeating-r1-tasks} -\hilight{ suite: \lstinline=tut/cycling/three= } -\vspace{3mm} - -Sometimes we want to be able to run a task at the initial cycle point, but -refrain from running it in subsequent cycles. We can do this by writing an -extra set of dependencies that are only valid at a single date-time cycle -point. If we choose this to be the initial cycle point, these will only apply -at the very start of the suite. - -The cylc syntax for writing this single date-time cycle point occurrence is -\lstinline=R1=, which stands for -\lstinline=R1/no-specified-date-time/no-specified-period=. -This is an adaptation of part of the ISO 8601 date-time standard's recurrence -syntax (\lstinline=Rn/date-time/period=) with some special context information -supplied by cylc for the \lstinline=no-specified-*= data. - -The \lstinline=1= in the \lstinline=R1= means run once. As we've specified -no date-time, Cylc will use the initial cycle point date-time by default, -which is what we want. We've also missed out specifying the period - this is -set by cylc to a zero amount of time in this case (as it never -repeats, this is not significant). - -For example, in \lstinline=tut/cycling/three=: -\lstset{language=suiterc} -\begin{lstlisting} -[cylc] - cycle point time zone = +13 -[scheduling] - initial cycle point = 20130808T00 - final cycle point = 20130812T00 - [[dependencies]] - [[[R1]]] - graph = "prep => foo" - [[[T00,T12]]] - graph = "foo[-PT12H] => foo => bar" -\end{lstlisting} -\lstset{language=transcript} -This is shown in Figure~\ref{fig-tut-three}. - -Note that the time zone has been set to \lstinline=+1300= in this case, -instead of UTC (\lstinline=Z=) as before. If no time zone or UTC mode was -set, the local time zone of your machine will be used in the cycle points. - - -At the initial cycle point, \lstinline=foo= will depend on -\lstinline=foo[-PT12H]= and also on \lstinline=prep=: -\lstset{language=suiterc} -\begin{lstlisting} -prep.20130808T0000+13 & foo.20130807T1200+13 => foo.20130808T0000+13 -\end{lstlisting} -\lstset{language=transcript} - -Thereafter, it will just look like e.g.: -\lstset{language=suiterc} -\begin{lstlisting} -foo.20130808T0000+13 => foo.20130808T1200+13 -\end{lstlisting} -\lstset{language=transcript} - -However, in our initial cycle point example, the dependence on -\lstinline=foo.20130807T1200+13= will be ignored, because that task's cycle -point is earlier than the suite's initial cycle point and so it cannot run. -This means that the initial cycle point dependencies for \lstinline=foo= -actually look like: -\lstset{language=suiterc} -\begin{lstlisting} -prep.20130808T0000+13 => foo.20130808T0000+13 -\end{lstlisting} -\lstset{language=transcript} - -\begin{figure} - \begin{center} - \includegraphics[width=0.5\textwidth]{graphics/png/orig/tut-three.png} - \end{center} - \caption{The \lstinline=tut/cycling/three= suite} -\label{fig-tut-three} -\end{figure} - -\begin{myitemize} - \item \lstinline=R1= tasks can also be used to make something special - happen at suite shutdown, or at any single cycle point throughout the - suite run. For a full primer on cycling syntax, - see~\ref{AdvancedCycling}. -\end{myitemize} - - -\subsubsection{Integer Cycling} -\label{TutInteger} -\hilight{ suite: \lstinline=tut/cycling/integer= } -\vspace{3mm} - -Cylc can do also do integer cycling for repeating workflows that are not -date-time based. - -Open the \lstinline=tut/cycling/integer= suite, which is plotted in -Figure~\ref{fig-tut-int}. -\lstset{language=suiterc} -\lstinputlisting{../../../etc/examples/tutorial/cycling/integer/suite.rc} - -\begin{figure} - \begin{center} - \includegraphics[width=0.65\textwidth]{graphics/png/orig/tut-cyc-int.png} - \end{center} - \caption{The \lstinline=tut/cycling/integer= suite} -\label{fig-tut-int} -\end{figure} - -The integer cycling notation is intended to look similar to the ISO 8601 -date-time notation, but it is simpler for obvious reasons. The example suite -illustrates two recurrence forms, -\lstinline=Rn/start-point/period= and -\lstinline=Rn/period/stop-point=, simplified somewhat using suite context -information (namely the initial and final cycle points). The first form is -used to run one special task called \lstinline=start= at start-up, and for the -main cycling body of the suite; and the second form to run another special task -called \lstinline=stop= in the final two cycles. The \lstinline=P= character -denotes period (interval) just like in the date-time notation. -\lstinline=R/1/P2= would generate the sequence of points \lstinline=1,3,5,...=. - -\begin{myitemize} - \item For more on integer cycling, including a more realistic usage example - see ~\ref{IntegerCycling}. -\end{myitemize} - -\subsection{Jinja2} -\hilight{ suite: \lstinline=tut/oneoff/jinja2= } -\vspace{3mm} - -Cylc has built in support for the Jinja2 template processor, which -allows us to embed code in suite configurations to generate the -final result seen by cylc. - -The \lstinline=tut/oneoff/jinja2= suite illustrates two common -uses of Jinja2: changing suite content or structure based on the value -of a logical switch; and iteratively generating dependencies and runtime -configuration for groups of related tasks: -\lstset{language=suiterc} -\lstinputlisting{../../../etc/examples/tutorial/oneoff/jinja2/suite.rc} - -To view the result of Jinja2 processing with the Jinja2 flag -\lstinline@MULTI@ set to \lstinline=False=: -\lstset{language=transcript} -\begin{lstlisting} -$ cylc view --jinja2 --stdout tut/oneoff/jinja2 -\end{lstlisting} -\lstset{language=suiterc} -\begin{lstlisting} -[meta] - title = "A Jinja2 Hello World! suite" -[scheduling] - [[dependencies]] - graph = "hello" -[runtime] - [[hello]] - script = "sleep 10; echo Hello World!" -\end{lstlisting} - -And with \lstinline=MULTI= set to \lstinline=True=: -\lstset{language=transcript} -\begin{lstlisting} -$ cylc view --jinja2 --stdout tut/oneoff/jinja2 -\end{lstlisting} -\lstset{language=suiterc} -\begin{lstlisting} -[meta] - title = "A Jinja2 Hello World! suite" -[scheduling] - [[dependencies]] - graph = "hello => BYE" -[runtime] - [[hello]] - script = "sleep 10; echo Hello World!" - [[BYE]] - script = "sleep 10; echo Goodbye World!" - [[ goodbye_0 ]] - inherit = BYE - [[ goodbye_1 ]] - inherit = BYE - [[ goodbye_2 ]] - inherit = BYE -\end{lstlisting} - -\subsection{Task Retry On Failure} - -\hilight{ suite: \lstinline=tut/oneoff/retry= } -\vspace{3mm} - -Tasks can be configured to retry a number of times if they fail. -An environment variable \lstinline=$CYLC_TASK_TRY_NUMBER= increments -from $1$ on each successive try, and is passed to the task to allow -different behaviour on the retry: -\lstset{language=suiterc} -\lstinputlisting{../../../etc/examples/tutorial/oneoff/retry/suite.rc} - -If a task with configured retries fails, it goes into the {\em retrying} state -until the next retry delay is up, then it resubmits. It only enters the {\em -failed} state on a final definitive failure. - -If a task with configured retries is {\em killed} (by \lstinline=cylc kill= or -via the GUI) it goes to the {\em held} state so that the operator can decide -whether to release it and continue the retry sequence or to abort the retry -sequence by manually resetting it to the {\em failed} state. - -Experiment with \lstinline=tut/oneoff/retry= to see how this works. - -\subsection{Other Users' Suites} - -If you have read access to another user's account (even on another host) -it is possible to use \lstinline=cylc monitor= to look at their suite's -progress without full shell access to their account. To do this, you -will need to copy their suite passphrase to -\lstset{language=transcript} -\begin{lstlisting} - $HOME/.cylc/SUITE_OWNER@SUITE_HOST/SUITE_NAME/passphrase -\end{lstlisting} -(use of the host and owner names is optional here - see~\ref{passphrases}) -{\em and} also retrieve the port number of the running suite from: -\begin{lstlisting} - ~SUITE_OWNER/cylc-run/SUITE_NAME/.service/contact -\end{lstlisting} -Once you have this information, you can run -\begin{lstlisting} -$ cylc monitor --user=SUITE_OWNER --port=SUITE_PORT SUITE_NAME -\end{lstlisting} -to view the progress of their suite. - -Other suite-connecting commands work in the same way; see~\ref{RemoteControl}. - -\subsection{Other Things To Try} - -Almost every feature of cylc can be tested quickly and easily with a -simple dummy suite. You can write your own, or start from one of the -example suites in \lstinline=/path/to/cylc/examples= (see use of -\lstinline=cylc import-examples= above) - they all run ``out the box'' -and can be copied and modified at will. - -\begin{myitemize} - -\item Change the suite runahead limit in a cycling suite. - -\item Stop a suite mid-run with \lstinline=cylc stop=, and restart -it again with \lstinline=cylc restart=. - -\item Hold (pause) a suite mid-run with \lstinline=cylc hold=, - then modify the suite configuration and \lstinline=cylc reload= it - before using \lstinline=cylc release= to continue (you can also - reload without holding). - -\item Use the gcylc View menu to show the task state color key and -watch tasks in the \lstinline=task-states= example evolve -as the suite runs. - -\item Manually re-run a task that has already completed or failed, - with \lstinline=cylc trigger=. - -\item Use an {\em internal queue} to prevent more than an alotted number - of tasks from running at once even though they are ready - - see~\ref{InternalQueues}. - -\item Configure task event hooks to send an email, or shut the suite down, - on task failure. - -\end{myitemize} - - -\section{Suite Name Registration} -\label{SuiteRegistration} - -Cylc commands target suites via their names, which are relative path names -under the suite run directory (\lstinline=~/cylc-run/= by default). Suites can -be grouped together under sub-directories. E.g.: -\begin{lstlisting} -$ cylc print -t nwp -nwp - |-oper - | |-region1 Local Model Region1 /home/oliverh/cylc-run/nwp/oper/region1 - | `-region2 Local Model Region2 /home/oliverh/cylc-run/nwp/oper/region2 - `-test - `-region1 Local Model TEST Region1 /home/oliverh/cylc-run/nwp/test/region1 -\end{lstlisting} - -Suite names can be pre-registered with the \lstinline=cylc register= command, -which creates the suite run directory structure and some service files -underneath it. Otherwise, \lstinline=cylc run= will do this at suite start up. - -%\pagebreak -\section{Suite Configuration} -\label{SuiteDefinition} - -Cylc suites are defined in structured, validated, {\em suite.rc} files -that concisely specify the properties of, and the relationships -between, the various tasks managed by the suite. This section of the -User Guide deals with the format and content of the suite.rc file, -including task definition. Task implementation - what's required of the -real commands, scripts, or programs that do the processing that the -tasks represent - is covered in~\ref{TaskImplementation}; and -task job submission - how tasks are submitted to run - is -in~\ref{TaskJobSubmission}. - -\subsection{Suite Configuration Directories} -\label{SuiteDefinitionDirectories} - -A cylc {\em suite configuration directory} contains: -\begin{myitemize} - \item {\bf A suite.rc file}: this is the suite configuration. - \begin{myitemize} - \item And any include-files used in it (see below; may be - kept in sub-directories). - \end{myitemize} - \item {\bf A \lstinline=bin/= sub-directory} (optional) - \begin{myitemize} - \item For scripts and executables that implement, or are - used by, suite tasks. - \item Automatically added to \lstinline=$PATH= in task - execution environments. - \item Alternatively, tasks can call external - commands, scripts, or programs; or they can be scripted - entirely within the suite.rc file. - \end{myitemize} - \item {\bf A \lstinline=lib/python/= sub-directory} (optional) - \begin{myitemize} - \item For custom job submission modules - (see~\ref{CustomJobSubmissionMethods}) - and local Python modules imported by custom Jinja2 filters, - tests and globals (see~\ref{CustomJinja2Filters}). - \end{myitemize} - \item {\bf Any other sub-directories and files} - documentation, - control files, etc. (optional) - \begin{myitemize} - \item Holding everything in one place makes proper suite - revision control possible. - \item Portable access to files here, for running tasks, is - provided through - \lstinline=$CYLC_SUITE_DEF_PATH= - (see~\ref{TaskExecutionEnvironment}). - \item Ignored by cylc, but the entire suite configuration - directory tree is copied when you copy a - suite using cylc commands. - - \end{myitemize} -\end{myitemize} -A typical example: -\lstset{language=transcript} -\begin{lstlisting} -/path/to/my/suite # suite configuration directory - suite.rc # THE SUITE CONFIGURATION FILE - bin/ # scripts and executables used by tasks - foo.sh - bar.sh - ... - # (OPTIONAL) any other suite-related files, for example: - inc/ # suite.rc include-files - nwp-tasks.rc - globals.rc - ... - doc/ # documentation - control/ # control files - ancil/ # ancillary files - ... -\end{lstlisting} - -\subsection{Suite.rc File Overview} -\label{SuiteRCFile} - -Suite.rc files are an extended-INI format with section nesting. - -Embedded template processor expressions may also be used in the file, to -programatically generate the final suite configuration seen by -cylc. Currently the Jinja2 (\url{http://jinja.pocoo.org/docs}) and EmPy -\url{http://www.alcyone.com/software/empy}) template processors are supported; -see~\ref{Jinja2} and~\ref{EmPy} for examples. In the future cylc may provide -a plug-in interface to allow use of other template engines too. - -\subsubsection{Syntax} -\label{Syntax} - -The following defines legal suite.rc syntax: -\begin{myitemize} - \item {\bf Items} are of the form \lstinline@item = value@. - \item {\bf [Section]} headings are enclosed in square brackets. - \item {\bf Sub-section [[nesting]]} is defined by repeated square brackets. - \item Sections are {\bf closed} by the next section heading. - \item {\bf Comments} (line and trailing) follow a hash character: \# - \item {\bf List values} are comma-separated. - \item {\bf Single-line string values} can be single-, double-, or un-quoted. - \item {\bf Multi-line string values} are triple-quoted (using - single or double quote characters). - \item {\bf Boolean values} are capitalized: True, False. - \item {\bf Leading and trailing whitespace} is ignored. - \item {\bf Indentation} is optional but should be used for clarity. - \item {\bf Continuation lines} follow a trailing backslash: \textbackslash - \item {\bf Duplicate sections} add their items to those previously - defined under the same section. - \item {\bf Duplicate items} override, {\em except for dependency - \lstinline=graph= strings, which are additive}. - \item {\bf Include-files} \lstinline=%include inc/foo.rc= can be - used as a verbatim inlining mechanism. -\end{myitemize} -Suites that embed templating code (see~\ref{Jinja2} and~\ref{EmPy}) must -process to raw suite.rc syntax. - -\subsubsection{Include-Files} - -Cylc has native support for suite.rc include-files, which may help to -organize large suites. Inclusion boundaries are completely arbitrary - -you can think of include-files as chunks of the suite.rc file simply -cut-and-pasted into another file. Include-files may be included -multiple times in the same file, and even nested. Include-file paths -can be specified portably relative to the suite configuration directory, -e.g.: -\lstset{language=suiterc} -\begin{lstlisting} -# include the file $CYLC_SUITE_DEF_PATH/inc/foo.rc: -%include inc/foo.rc -\end{lstlisting} - -\paragraph{Editing Temporarily Inlined Suites} - -Cylc's native file inclusion mechanism supports optional inlined -editing: -\lstset{language=transcript} -\begin{lstlisting} -$ cylc edit --inline SUITE -\end{lstlisting} -The suite will be split back into its constituent include-files when you -exit the edit session. While editing, the inlined file becomes the -official suite configuration so that changes take effect whenever you save -the file. See \lstinline=cylc prep edit --help= for more information. - -\paragraph{Include-Files via Jinja2} - -Jinja2 (\ref{Jinja2}) also has template inclusion functionality. - -\subsubsection{Syntax Highlighting For Suite Configuration} -\label{SyntaxHighlighting} - -\lstset{language=transcript} -Cylc comes with syntax files for a number of text editors: -\lstset{language=transcript} -\begin{lstlisting} -/etc/syntax/cylc.vim # vim -/etc/syntax/cylc-mode.el # emacs -/etc/syntax/cylc.lang # gedit (and other gtksourceview programs) -/etc/syntax/cylc.xml # kate -\end{lstlisting} -Refer to comments at the top of each file to see how to use them. - -\subsubsection{Gross File Structure} - -Cylc suite.rc files consist of a suite title and description followed by -configuration items grouped under several top level section headings: - -\begin{myitemize} - \item {\bf [cylc] } - {\em non task-specific suite configuration} - \item {\bf [scheduling] } - {\em determines when tasks are ready to run} - \begin{myitemize} - \item tasks with special behaviour, e.g.\ clock-trigger tasks - \item the dependency graph, which defines the relationships - between tasks - \end{myitemize} - \item {\bf [runtime] } - {\em determines how, where, and what to - execute when tasks are ready} - \begin{myitemize} - \item script, environment, job submission, remote - hosting, etc. - \item suite-wide defaults in the {\em root} namespace - \item a nested family hierarchy with common properties - inherited by related tasks - \end{myitemize} - \item {\bf [visualization] } - suite graph styling -\end{myitemize} - - -\subsubsection{Validation} -\label{Validation} - -Cylc suite.rc files are automatically validated against a specification -that defines all legal entries, values, options, and defaults. This -detects formatting errors, typographic errors, illegal items and illegal -values prior to run time. Some values are complex strings that require -further parsing by cylc to determine their correctness (this is also -done during validation). All legal entries are documented in the {\em -Suite.rc Reference} (\ref{SuiteRCReference}). - -The validator reports the line numbers of detected errors. Here's an -example showing a section heading with a missing right bracket: -\lstset{language=transcript} -\begin{lstlisting} -$ cylc validate my.suite - [[special tasks] -'Section bracket mismatch, line 19' -\end{lstlisting} - -If the suite.rc file uses include-files \lstinline=cylc view= will -show an inlined copy of the suite with correct line numbers -(you can also edit suites in a temporarily inlined state with -\lstinline=cylc edit --inline=). - -Validation does not check the validity of chosen batch systems. -%this is to allow users to extend cylc with their own job submission -%methods, which are by definition unknown to the suite.rc spec. - -\subsection{Scheduling - Dependency Graphs} -\label{ConfiguringScheduling} - -\lstset{language=suiterc} -The \lstinline=[scheduling]= section of a suite.rc file defines the -relationships between tasks in a suite - the information that allows -cylc to determine when tasks are ready to run. The most important -component of this is the suite dependency graph. Cylc graph notation -makes clear textual graph representations that are very concise because -sections of the graph that repeat at different hours of the day, say, -only have to be defined once. Here's an example with dependencies that -vary depending on the particular cycle point: -\lstset{language=suiterc} -\begin{lstlisting} -[scheduling] - initial cycle point = 20200401 - final cycle point = 20200405 - [[dependencies]] - [[[T00,T06,T12,T18]]] # validity (hours) - graph = """ -A => B & C # B and C trigger off A -A[-PT6H] => A # Model A restart trigger - """ - [[[T06,T18]]] # hours - graph = "C => X" -\end{lstlisting} -\lstset{language=transcript} -Figure~\ref{fig-dep-eg-1} shows the complete suite.rc listing alongside -the suite graph. -This is a complete, valid, runnable suite (it will use default -task runtime properties such as \lstinline=script=). - -\begin{figure} -\begin{minipage}[b]{0.5\textwidth} -\lstset{language=suiterc} -\begin{lstlisting} -[meta] - title = "Dependency Example 1" -[cylc] - UTC mode = True -[scheduling] - initial cycle point = 20200401 - final cycle point = 20200405 - [[dependencies]] - [[[T00,T06,T12,T18]]] # validity (hours) - graph = """ -A => B & C # B and C trigger off A -A[-PT6H] => A # Model A restart trigger - """ - [[[T06,T18]]] # hours - graph = "C => X" -[visualization] - initial cycle point = 20200401 - final cycle point = 20200401T06 - [[node attributes]] - X = "color=red" -\end{lstlisting} -\lstset{language=transcript} -\end{minipage} -\hfill -\begin{minipage}[b]{0.5\textwidth} - \begin{center} - \includegraphics[width=\textwidth]{graphics/png/orig/dep-eg-1.png} - \end{center} -\end{minipage} -\caption[Example Suite]{\scriptsize Example Suite} -\label{fig-dep-eg-1} -\end{figure} - -\subsubsection{Graph String Syntax} - -Multiline graph strings may contain: -\begin{myitemize} - \item {\bf blank lines} - \item {\bf arbitrary white space} - \item {\bf internal comments:} following the \lstinline=#= character - \item {\bf conditional task trigger expressions} - see below. -\end{myitemize} - -\subsubsection{Interpreting Graph Strings} - -Suite dependency graphs can be broken down into pairs in which the left -side (which may be a single task or family, or several that are -conditionally related) defines a trigger for the task or family on the -right. For instance the ``word graph'' {\em C triggers off B which -triggers off A} can be deconstructed into pairs {\em C triggers off B} -and {\em B triggers off A}. In this section we use only the default -trigger type, which is to trigger off the upstream task succeeding; -see~\ref{TriggerTypes} for other available triggers. - -In the case of cycling tasks, the triggers defined by a graph string are -valid for cycle points matching the list of hours specified for the -graph section. For example this graph: -\lstset{language=suiterc} -\begin{lstlisting} -[scheduling] - [[dependencies]] - [[[T00,T12]]] - graph = "A => B" -\end{lstlisting} -\lstset{language=transcript} -implies that B triggers off A for cycle points in which the hour matches $00$ -or $12$. - -To define inter-cycle dependencies, attach an offset indicator to the -left side of a pair: -\lstset{language=suiterc} -\begin{lstlisting} -[scheduling] - [[dependencies]] - [[[T00,T12]]] - graph = "A[-PT12H] => B" -\end{lstlisting} -\lstset{language=transcript} -This means B[time] triggers off A[time-PT12H] (12 hours before) for cycle -points with hours matching $00$ or $12$. $time$ is implicit because this keeps -graphs clean and concise, given that the majority of tasks will typically -depend only on others with the same cycle point. Cycle point offsets can only -appear on the left of a pair, because a pairs define triggers for the right -task at cycle point $time$. However, \lstinline@A => B[-PT6H]@, which is -illegal, can be reformulated as a {\em future trigger} -\lstinline@A[+PT6H] => B@ (see~\ref{InterCyclePointTriggers}). It is also -possible to combine multiple offsets within a cycle point offset e.g. -\lstset{language=suiterc} -\begin{lstlisting} -[scheduling] - [[dependencies]] - [[[T00,T12]]] - graph = "A[-P1D-PT12H] => B" -\end{lstlisting} -\lstset{language=transcript} -This means that B[Time] triggers off A[time-P1D-PT12H] (1 day and 12 hours -before). - -Triggers can be chained together. This graph: -\lstset{language=suiterc} -\begin{lstlisting} - graph = """A => B # B triggers off A - B => C # C triggers off B""" -\end{lstlisting} -is equivalent to this: -\begin{lstlisting} - graph = "A => B => C" -\end{lstlisting} -\lstset{language=transcript} - -{\em Each trigger in the graph must be unique} but {\em the same task -can appear in multiple pairs or chains}. Separately defined triggers -for the same task have an AND relationship. So this: -\lstset{language=suiterc} -\begin{lstlisting} - graph = """A => X # X triggers off A - B => X # X also triggers off B""" -\end{lstlisting} - -is equivalent to this: -\lstset{language=suiterc} -\begin{lstlisting} - graph = "A & B => X" # X triggers off A AND B -\end{lstlisting} -\lstset{language=transcript} - -In summary, the branching tree structure of a dependency graph can -be partitioned into lines (in the suite.rc graph string) of pairs -or chains, in any way you like, with liberal use of internal white space -and comments to make the graph structure as clear as possible. - -\begin{lstlisting} -# B triggers if A succeeds, then C and D trigger if B succeeds: - graph = "A => B => C & D" -# which is equivalent to this: - graph = """A => B => C - B => D""" -# and to this: - graph = """A => B => D - B => C""" -# and to this: - graph = """A => B - B => C - B => D""" -# and it can even be written like this: - graph = """A => B # blank line follows: - - B => C # comment ... - B => D""" -\end{lstlisting} - -\paragraph{Splitting Up Long Graph Lines} - -\lstset{language=suiterc} - -It is not necessary to use the general line continuation marker -\lstinline=\= to split long graph lines. Just break at dependency arrows, -or split long chains into smaller ones. This graph: -\begin{lstlisting} - graph = "A => B => C" -\end{lstlisting} - -is equivalent to this: -\begin{lstlisting} - graph = """A => B => - C""" -\end{lstlisting} - -and also to this: -\begin{lstlisting} - graph = """A => B - B => C""" -\end{lstlisting} - -\subsubsection{Graph Types} -\label{GraphTypes} - -A suite configuration can contain multiple graph strings that are combined -to generate the final graph. - -\paragraph{One-off (Non-Cycling)} - -Figure~\ref{fig-test1} shows a small suite of one-off non-cycling -tasks; these all share a single cycle point (\lstinline=1=) and don't spawn -successors (once they're all finished the suite just exits). The integer -\lstinline=1= attached to each graph node is just an arbitrary label here. -\begin{figure} -\begin{minipage}[b]{0.5\textwidth} -\lstset{language=suiterc} -\begin{lstlisting} -[meta] - title = some one-off tasks -[scheduling] - [[dependencies]] - graph = "foo => bar & baz => qux" -\end{lstlisting} -\lstset{language=transcript} -\end{minipage} -\hfill -\begin{minipage}[b]{0.5\textwidth} - \begin{center} - \includegraphics[width=0.25\textwidth]{graphics/png/orig/test1.png} - \end{center} -\end{minipage} -\caption[One-off (Non-Cycling) Tasks]{\scriptsize One-off (Non-Cycling) Tasks.} -\label{fig-test1} -\end{figure} - -\paragraph{Cycling Graphs} - -For cycling tasks the graph section heading defines a sequence of cycle points -for which the subsequent graph section is valid. Figure~\ref{fig-test2} shows -a small suite of cycling tasks. -\begin{figure} -\begin{minipage}[b]{0.5\textwidth} -\lstset{language=suiterc} -\begin{lstlisting} -[meta] - title = some cycling tasks -# (no dependence between cycle points) -[scheduling] - [[dependencies]] - [[[T00,T12]]] - graph = "foo => bar & baz => qux" -\end{lstlisting} -\lstset{language=transcript} -\end{minipage} -\hfill -\begin{minipage}[b]{0.5\textwidth} - \begin{center} - \includegraphics[width=\textwidth]{graphics/png/orig/test2.png} - \end{center} -\end{minipage} -\caption[Cycling Tasks]{\scriptsize Cycling Tasks.} -\label{fig-test2} -\end{figure} - -\subsubsection{Graph Section Headings} - -Graph section headings define recurrence expressions, the graph within a graph -section heading defines a workflow at each point of the recurrence. For -example in the following scenario: - -\lstset{language=suiterc} -\begin{lstlisting} -[scheduling] - [[dependencies]] - [[[ T06 ]]] # A graph section heading - graph = foo => bar -\end{lstlisting} -\lstset{language=transcript} - -\lstinline=T06= means "Run every day starting at 06:00 after the -initial cycle point". Cylc allows you to start (or end) at any particular -time, repeat at whatever frequency you like, and even optionally limit the -number of repetitions. - -Graph section heading can also be used with integer cycling see -\ref{IntegerCycling}. - -\paragraph{Syntax Rules} - -Date-time cycling information is made up of a starting {\em date-time}, an -{\em interval}, and an optional {\em limit}. - -The time is assumed to be in the local time zone unless you set -\lstinline=[cylc]cycle point time zone= or \lstinline=[cylc]UTC mode=. The -calendar is assumed to be the proleptic Gregorian calendar unless you set -\lstinline=[scheduling]cycling mode=. - -The syntax for representations is based on the ISO 8601 date-time standard. -This includes the representation of {\em date-time}, {\em interval}. What we -define for cylc's cycling syntax is our own optionally-heavily-condensed form -of ISO 8601 recurrence syntax. The most common full form is: -\lstinline=R[limit?]/[date-time]/[interval]=. However, we allow omitting -information that can be guessed from the context (rules below). This means -that it can be written as: -\begin{lstlisting} -R[limit?]/[date-time] -R[limit?]//[interval] -[date-time]/[interval] -R[limit?] # Special limit of 1 case -[date-time] -[interval] -\end{lstlisting} - -with example graph headings for each form being: - -\lstset{language=suiterc} -\begin{lstlisting} -[[[ R5/T00 ]]] # Run 5 times at 00:00 every day -[[[ R//PT1H ]]] # Run every hour (Note the R// is redundant) -[[[ 20000101T00Z/P1D ]]] # Run every day starting at 00:00 1st Jan 2000 -[[[ R1 ]]] # Run once at the initial cycle point -[[[ R1/20000101T00Z ]]] # Run once at 00:00 1st Jan 2000 -[[[ P1Y ]]] # Run every year -\end{lstlisting} - -Note that \lstinline=T00= is an example of \lstinline=[date-time]=, with an -inferred 1 day period and no limit. - -Where some or all {\em date-time} information is omitted, it is inferred to -be relative to the initial date-time cycle point. For example, \lstinline=T00= -by itself would mean the next occurrence of midnight that follows, or is, the -initial cycle point. Entering \lstinline=+PT6H= would mean 6 hours after the -initial cycle point. Entering \lstinline=-P1D= would mean 1 day before the -initial cycle point. Entering no information for the {\em date-time} implies -the initial cycle point date-time itself. - -Where the {\em interval} is omitted and some (but not all) {\em date-time} -information is omitted, it is inferred to be a single unit above -the largest given specific {\em date-time } unit. For example, the largest -given specific unit in \lstinline=T00= is hours, so the inferred interval is -1 day (daily), \lstinline=P1D=. - -Where the {\em limit} is omitted, unlimited cycling is assumed. This will be -bounded by the final cycle point's date-time if given. - -Another supported form of ISO 8601 recurrence is: -\lstinline=R[limit?]/[interval]/[date-time]=. This form uses the -{\em date-time } as the end of the cycling sequence rather than the start. -For example, \lstinline=R3/P5D/20140430T06= means: -\begin{lstlisting} -20140420T06 -20140425T06 -20140430T06 -\end{lstlisting} - -This kind of form can be used for specifying special behaviour near the end of -the suite, at the final cycle point's date-time. We can also represent this in -cylc with a collapsed form: -\begin{lstlisting} -R[limit?]/[interval] -R[limit?]//[date-time] -[interval]/[date-time] -\end{lstlisting} - -So, for example, you can write: -\lstset{language=suiterc} -\begin{lstlisting} -[[[ R1//+P0D ]]] # Run once at the final cycle point -[[[ R5/P1D ]]] # Run 5 times, every 1 day, ending at the final - # cycle point -[[[ P2W/T00 ]]] # Run every 2 weeks ending at 00:00 following - # the final cycle point -[[[ R//T00 ]]] # Run every 1 day ending at 00:00 following the - # final cycle point -\end{lstlisting} -\lstset{language=transcript} - -\paragraph{Referencing The Initial And Final Cycle Points} -\label{referencing-the-initial-and-final-cycle-points} - -For convenience the caret and dollar symbols may be used as shorthand for the -initial and final cycle points. Using this shorthand you can write: - -\lstset{language=suiterc} -\begin{lstlisting} -[[[ R1/^+PT12H ]]] # Repeat once 12 hours after the initial cycle point - # R[limit]/[date-time] - # Equivalent to [[[ R1/+PT12H ]]] -[[[ R1/$ ]]] # Repeat once at the final cycle point - # R[limit]/[date-time] - # Equivalent to [[[ R1//+P0D ]]] -[[[ $-P2D/PT3H ]]] # Repeat 3 hourly starting two days before the - # [date-time]/[interval] - # final cycle point -\end{lstlisting} -\lstset{language=transcript} - -Note that there can be multiple ways to write the same headings, for instance -the following all run once at the final cycle point: - -\lstset{language=suiterc} -\begin{lstlisting} -[[[ R1/P0Y ]]] # R[limit]/[interval] -[[[ R1/P0Y/$ ]]] # R[limit]/[interval]/[date-time] -[[[ R1/$ ]]] # R[limit]/[date-time] -\end{lstlisting} -\lstset{language=transcript} - -\paragraph{Excluding Dates} -\label{excluding-dates} -\lstset{language=suiterc} - -Date-times can be excluded from a recurrence by an exclamation mark for example -\lstinline=[[[ PT1D!20000101 ]]]= means run daily except on the -first of January 2000. - -This syntax can be used to exclude one or multiple date-times from a recurrence. -Multiple date-times are excluded using the syntax -\lstinline=[[[ PT1D!(20000101,20000102,...) ]]]=. All date-times listed within -the parentheses after the exclamation mark will be excluded. Note that the -\lstinline=^= and \lstinline=$= symbols (shorthand for the initial -and final cycle points) are both date-times so \lstinline=[[[ T12!$-PT1D ]]]= -is valid. - -If using a run limit in combination with an exclusion, the heading might not -run the number of times specified in the limit. For example in the following -suite \lstinline=foo= will only run once as its second run has been excluded. - -\begin{lstlisting} -[scheduling] - initial cycle point = 20000101T00Z - final cycle point = 20000105T00Z - [[dependencies]] - [[[ R2/P1D!20000102 ]]] - graph = foo -\end{lstlisting} -\lstset{language=transcript} - -\paragraph{Advanced exclusion syntax} - -In addition to excluding isolated date-time points or lists of date-time points -from recurrences, exclusions themselves may be date-time recurrence sequences. -Any partial date-time or sequence given after the exclamation mark will be -excluded from the main sequence. - -For example, partial date-times can be excluded using the syntax: -\lstset{language=suiterc} -\begin{lstlisting} -[[[ PT1H ! T12 ]]] # Run hourly but not at 12:00 from the initial - # cycle point. -[[[ T-00 ! (T00, T06, T12, T18) ]]] # Run hourly but not at 00:00, 06:00, - # 12:00, 18:00. -[[[ PT5M ! T-15 ]]] # Run 5-minutely but not at 15 minutes past the - # hour from the initial cycle point. -[[[ T00 ! W-1T00 ]]] # Run daily at 00:00 except on Mondays. -\end{lstlisting} -\lstset{language=transcript} - -It is also valid to use sequences for exclusions. For example: -\lstset{language=suiterc} -\begin{lstlisting} -[[[ PT1H ! PT6H ]]] # Run hourly from the initial cycle point but - # not 6-hourly from the initial cycle point. -[[[ T-00 ! PT6H ]]] # Run hourly on the hour but not 6-hourly - # on the hour. - # Same as [[[ T-00 ! T-00/PT6H ]]] (T-00 context is implied) - # Same as [[[ T-00 ! (T00, T06, T12, T18) ]]] - # Same as [[[ PT1H ! (T00, T06, T12, T18) ]]] Initial cycle point dependent - -[[[ T12 ! T12/P15D ]]] # Run daily at 12:00 except every 15th day. - -[[[ R/^/P1H ! R5/20000101T00/P1D ]]] # Any valid recurrence may be used to - # determine exclusions. This example - # translates to: Repeat every hour from - # the initial cycle point, but exclude - # 00:00 for 5 days from the 1st January - # 2000. - -\end{lstlisting} -\lstset{language=transcript} - -You can combine exclusion sequences and single point exclusions within a -comma separated list enclosed in parentheses: - -\lstset{language=suiterc} -\begin{lstlisting} -[[[ T-00 ! (20000101T07, PT2H) ]]] # Run hourly on the hour but not at 07:00 - # on the 1st Jan, 2000 and not 2-hourly - # on the hour. -\end{lstlisting} -\lstset{language=transcript} - - -\paragraph{How Multiple Graph Strings Combine} -\label{HowMultipleGraphStringsCombine} - -For a cycling graph with multiple validity sections for different -hours of the day, the different sections {\em add} to generate the -complete graph. Different graph sections can overlap (i.e.\ the same -hours may appear in multiple section headings) and the same tasks may -appear in multiple sections, but individual dependencies should be -unique across the entire graph. For example, the following graph defines -a duplicate prerequisite for task C: -\lstset{language=suiterc} -\begin{lstlisting} -[scheduling] - [[dependencies]] - [[[T00,T06,T12,T18]]] - graph = "A => B => C" - [[[T06,T18]]] - graph = "B => C => X" - # duplicate prerequisite: B => C already defined at T06, T18 -\end{lstlisting} -\lstset{language=transcript} -This does not affect scheduling, but for the sake of clarity and brevity -the graph should be written like this: -\lstset{language=suiterc} -\begin{lstlisting} -[scheduling] - [[dependencies]] - [[[T00,T06,T12,T18]]] - graph = "A => B => C" - [[[T06,T18]]] - # X triggers off C only at 6 and 18 hours - graph = "C => X" -\end{lstlisting} -\lstset{language=transcript} - -\paragraph{Advanced Examples} -\label{AdvancedCycling} - -The following examples show the various ways of writing graph headings in cylc. -\lstset{language=suiterc} -\begin{lstlisting} -[[[ R1 ]]] # Run once at the initial cycle point -[[[ P1D ]]] # Run every day starting at the initial cycle point -[[[ PT5M ]]] # Run every 5 minutes starting at the initial cycle - # point -[[[ T00/P2W ]]] # Run every 2 weeks starting at 00:00 after the - # initial cycle point -[[[ +P5D/P1M ]]] # Run every month, starting 5 days after the initial - # cycle point -[[[ R1/T06 ]]] # Run once at 06:00 after the initial cycle point -[[[ R1/P0Y ]]] # Run once at the final cycle point -[[[ R1/$ ]]] # Run once at the final cycle point (alternative - # form) -[[[ R1/$-P3D ]]] # Run once three days before the final cycle point -[[[ R3/T0830 ]]] # Run 3 times, every day at 08:30 after the initial - # cycle point -[[[ R3/01T00 ]]] # Run 3 times, every month at 00:00 on the first - # of the month after the initial cycle point -[[[ R5/W-1/P1M ]]] # Run 5 times, every month starting on Monday - # following the initial cycle point -[[[ T00!^ ]]] # Run at the first occurrence of T00 that isn't the - # initial cycle point -[[[ PT1D!20000101 ]]] # Run every day days excluding 1st Jan 2000 -[[[ 20140201T06/P1D ]]] # Run every day starting at 20140201T06 -[[[ R1/min(T00,T06,T12,T18) ]]] # Run once at the first instance - # of either T00, T06, T12 or T18 - # starting at the initial cycle - # point -\end{lstlisting} -\lstset{language=transcript} - -\paragraph{Advanced Starting Up} -\label{AdvancedStartingUp} - -Dependencies that are only valid at the initial cycle point can be written -using the \lstinline=R1= notation (e.g.\ as -in~\ref{initial-non-repeating-r1-tasks}. For example: -\lstset{language=suiterc} -\begin{lstlisting} -[cylc] - UTC mode = True -[scheduling] - initial cycle point = 20130808T00 - final cycle point = 20130812T00 - [[dependencies]] - [[[R1]]] - graph = "prep => foo" - [[[T00]]] - graph = "foo[-P1D] => foo => bar" -\end{lstlisting} -\lstset{language=transcript} - -In the example above, \lstinline=R1= implies \lstinline=R1/20130808T00=, so -\lstinline=prep= only runs once at that cycle point (the initial cycle point). -At that cycle point, \lstinline=foo= will have a dependence on -\lstinline=prep= - but not at subsequent cycle points. - -However, it is possible to have a suite that has multiple effective initial -cycles - for example, one starting at \lstinline=T00= and another starting -at \lstinline=T12=. What if they need to share an initial task? - -Let's suppose that we add the following section to the suite example above: -\lstset{language=suiterc} -\begin{lstlisting} -[cylc] - UTC mode = True -[scheduling] - initial cycle point = 20130808T00 - final cycle point = 20130812T00 - [[dependencies]] - [[[R1]]] - graph = "prep => foo" - [[[T00]]] - graph = "foo[-P1D] => foo => bar" - [[[T12]]] - graph = "baz[-P1D] => baz => qux" -\end{lstlisting} -\lstset{language=transcript} - -We'll also say that there should be a starting dependence between -\lstinline=prep= and our new task \lstinline=baz= - but we still want to have -a single \lstinline=prep= task, at a single cycle. - -We can write this using a special case of the \lstinline=task[-interval]= syntax - -if the interval is null, this implies the task at the initial cycle point. - -For example, we can write our suite like~\ref{fig-test4}. - -\begin{figure} -\begin{minipage}[b]{0.5\textwidth} -\lstset{language=suiterc} -\begin{lstlisting} -[cylc] - UTC mode = True -[scheduling] - initial cycle point = 20130808T00 - final cycle point = 20130812T00 - [[dependencies]] - [[[R1]]] - graph = "prep" - [[[R1/T00]]] -# ^ implies the initial cycle point: - graph = "prep[^] => foo" - [[[R1/T12]]] -# ^ is initial cycle point, as above: - graph = "prep[^] => baz" - [[[T00]]] - graph = "foo[-P1D] => foo => bar" - [[[T12]]] - graph = "baz[-P1D] => baz => qux" -[visualization] - initial cycle point = 20130808T00 - final cycle point = 20130810T00 - [[node attributes]] - foo = "color=red" - bar = "color=orange" - baz = "color=green" - qux = "color=blue" -\end{lstlisting} -\lstset{language=transcript} -\end{minipage} -\hfill -\begin{minipage}[b]{0.5\textwidth} - \begin{center} - \includegraphics[width=\textwidth]{graphics/png/orig/test4.png} - \end{center} -\end{minipage} -\caption[Staggered Start Suite]{\scriptsize Staggered Start Suite} -\label{fig-test4} -\end{figure} - -This neatly expresses what we want - a task running at the initial cycle point -that has one-off dependencies with other task sets at different cycles. - -\begin{figure} -\begin{minipage}[h]{0.5\textwidth} -\lstset{language=suiterc} -\begin{lstlisting} -[cylc] - UTC mode = True -[scheduling] - initial cycle point = 20130808T00 - final cycle point = 20130808T18 - [[dependencies]] - [[[R1]]] - graph = "setup_foo => foo" - [[[+PT6H/PT6H]]] - graph = """ - foo[-PT6H] => foo - foo => bar - """ -[visualization] - initial cycle point = 20130808T00 - final cycle point = 20130808T18 - [[node attributes]] - foo = "color=red" - bar = "color=orange" -\end{lstlisting} -\lstset{language=transcript} -\end{minipage} -\hfill -\begin{minipage}[h]{0.5\textwidth} - \begin{center} - \includegraphics[width=\textwidth]{graphics/png/orig/test5.png} - \end{center} -\end{minipage} -\caption[Restricted First Cycle Point Suite]{ - \scriptsize Restricted First Cycle Point Suite} -\label{fig-test5} -\end{figure} - - -A different kind of requirement is displayed in Figure \ref{fig-test5}. -Usually, we want to specify additional tasks and dependencies at the initial -cycle point. What if we want our first cycle point to be entirely special, with -some tasks missing compared to subsequent cycle points? - -In Figure \ref{fig-test5}, \lstinline=bar= will not be run at the initial -cycle point, but will still run at subsequent cycle points. -\lstinline=[[[+PT6H/PT6H]]]= means start at \lstinline=+PT6H= (6 hours after -the initial cycle point) and then repeat every \lstinline=PT6H= (6 hours). - -Some suites may have staggered start-up sequences where different tasks need -running once but only at specific cycle points, potentially due to differing -data sources at different cycle points with different possible initial cycle -points. To allow this cylc provides a \lstinline=min( )= function that can be -used as follows: - -\lstset{language=suiterc} -\begin{lstlisting} -[cylc] - UTC mode = True -[scheduling] - initial cycle point = 20100101T03 - [[dependencies]] - [[[R1/min(T00,T12)]]] - graph = "prep1 => foo" - [[[R1/min(T06,T18)]]] - graph = "prep2 => foo" - [[[T00,T06,T12,T18]]] - graph = "foo => bar" - -\end{lstlisting} -\lstset{language=transcript} - - -In this example the initial cycle point is \lstinline=20100101T03=, so the -\lstinline=prep1= task will run once at \lstinline=20100101T12= and the -\lstinline=prep2= task will run once at \lstinline=20100101T06= as these are -the first cycle points after the initial cycle point in the respective -\lstinline=min( )= entries. - - -\paragraph{Integer Cycling} -\label{IntegerCycling} - -In addition to non-repeating and date-time cycling workflows, cylc can do -integer cycling for repeating workflows that are not date-time based. - -To construct an integer cycling suite, set -\lstinline@[scheduling]cycling mode = integer@, and specify integer values for -the initial and (optional) final cycle points. The notation for intervals, -offsets, and recurrences (sequences) is similar to the date-time cycling -notation, except for the simple integer values. - -The full integer recurrence expressions supported are: -\begin{myitemize} - \item \lstinline@Rn/start-point/interval # e.g. R3/1/P2@ - \item \lstinline@Rn/interval/end-point # e.g. R3/P2/9@ -\end{myitemize} -But, as for date-time cycling, sequence start and end points can be omitted -where suite initial and final cycle points can be assumed. Some examples: - -\lstset{language=suiterc} -\begin{lstlisting} -[[[ R1 ]]] # Run once at the initial cycle point - # (short for R1/initial-point/?) -[[[ P1 ]]] # Repeat with step 1 from the initial cycle point - # (short for R/initial-point/P1) -[[[ P5 ]]] # Repeat with step 5 from the initial cycle point - # (short for R/initial-point/P5) -[[[ R2//P2 ]]] # Run twice with step 3 from the initial cycle point - # (short for R2/initial-point/P2) -[[[ R/+P1/P2 ]]] # Repeat with step 2, from 1 after the initial cycle point -[[[ R2/P2 ]]] # Run twice with step 2, to the final cycle point - # (short for R2/P2/final-point) -[[[ R1/P0 ]]] # Run once at the final cycle point - # (short for R1/P0/final-point) -\end{lstlisting} - -\subparagraph{Example} - -The tutorial illustrates integer cycling in~\ref{TutInteger}, and -\lstinline=/etc/examples/satellite/= is a -self-contained example of a realistic use for integer cycling. It simulates -the processing of incoming satellite data: each new dataset arrives after a -random (as far as the suite is concerned) interval, and is labeled by an -arbitrary (as far as the suite is concerned) ID in the filename. A task called -\lstinline=get_data= at the top of the repeating workflow waits on the next -dataset and, when it finds one, moves it to a cycle-point-specific shared -workspace for processing by the downstream tasks. When \lstinline=get_data.1= -finishes, \lstinline=get_data.2= triggers and begins waiting for the next -dataset at the same time as the downstream tasks in cycle point 1 are -processing the first one, and so on. In this way multiple datasets can be -processed at once if they happen to come in quickly. A single shutdown task -runs at the end of the final cycle to collate results. The suite graph is -shown in Figure~\ref{fig-satellite}. - -\begin{figure} - \begin{center} - \includegraphics[width=0.4\textwidth]{graphics/png/orig/satellite.png} - \end{center} - \caption{The \lstinline=etc/examples/satellite= integer suite} -\label{fig-satellite} -\end{figure} - -\subparagraph{Advanced Integer Cycling Syntax} - -The same syntax used to reference the initial and final cycle points -(introduced in~\ref{referencing-the-initial-and-final-cycle-points}) for -use with date-time cycling can also be used for integer cycling. For -example you can write: - -\lstset{language=suiterc} -\begin{lstlisting} -[[[ R1/^ ]]] # Run once at the initial cycle point -[[[ R1/$ ]]] # Run once at the final cycle point -[[[ R3/^/P2 ]]] # Run three times with step two starting at the - # initial cycle point -\end{lstlisting} -\lstset{language=transcript} - -Likewise the syntax introduced in~\ref{excluding-dates} for excluding -a particular point from a recurrence also works for integer cycling. For -example: - -\lstset{language=suiterc} -\begin{lstlisting} -[[[ R/P4!8 ]]] # Run with step 4, to the final cycle point - # but not at point 8 -[[[ R3/3/P2!5 ]]] # Run with step 2 from point 3 but not at - # point 5 -[[[ R/+P1/P6!14 ]]] # Run with step 6 from 1 step after the - # initial cycle point but not at point 14 -\end{lstlisting} -\lstset{language=transcript} - -Multiple integer exclusions are also valid in the same way as the syntax -in~\ref{excluding-dates}. Integer exclusions may be a list of single -integer points, an integer sequence, or a combination of both: - -\lstset{language=suiterc} -\begin{lstlisting} -[[[ R/P1!(2,3,7) ]]] # Run with step 1 to the final cycle point, - # but not at points 2, 3, or 7. -[[[ P1 ! P2 ]]] # Run with step 1 from the initial to final - # cycle point, skipping every other step from - # the initial cycle point. -[[[ P1 ! +P1/P2 ]]] # Run with step 1 from the initial cycle point, - # excluding every other step beginning one step - # after the initial cycle point. -[[[ P1 !(P2,6,8) ]]] # Run with step 1 from the initial cycle point, - # excluding every other step, and also excluding - # steps 6 and 8. -\end{lstlisting} -\lstset{language=transcript} - - -\subsubsection{Task Triggering} -\label{TriggerTypes} - -\lstset{language=suiterc} - -A task is said to ``trigger'' when it submits its job to run, as soon as all of -its dependencies (also known as its separate ``triggers'') are met. Tasks can -be made to trigger off of the state of other tasks (indicated by a -\lstinline=:state= qualifier on the upstream task (or family) -name in the graph) and, and off the clock, and arbitrary external events. - -External triggering is relatively more complicated, and is documented -separately in Section~\ref{External Triggers}. - -\paragraph{Success Triggers} - -The default, with no trigger type specified, is to trigger off the -upstream task succeeding: -\begin{lstlisting} -# B triggers if A SUCCEEDS: - graph = "A => B" -\end{lstlisting} -For consistency and completeness, however, the success trigger can be explicit: -\begin{lstlisting} -# B triggers if A SUCCEEDS: - graph = "A => B" -# or: - graph = "A:succeed => B" -\end{lstlisting} - -\paragraph{Failure Triggers} - -To trigger off the upstream task reporting failure: -\begin{lstlisting} -# B triggers if A FAILS: - graph = "A:fail => B" -\end{lstlisting} -{\em Suicide triggers} can be used to remove task \lstinline=B= here if -\lstinline=A= does not fail, see~\ref{SuicideTriggers}. - -\paragraph{Start Triggers} - -To trigger off the upstream task starting to execute: -\begin{lstlisting} -# B triggers if A STARTS EXECUTING: - graph = "A:start => B" -\end{lstlisting} -This can be used to trigger tasks that monitor other tasks once they -(the target tasks) start executing. Consider a long-running forecast model, -for instance, that generates a sequence of output files as it runs. A -postprocessing task could be launched with a start trigger on the model -(\lstinline@model:start => post@) to process the model output as it -becomes available. Note, however, that there are several alternative -ways of handling this scenario: both tasks could be triggered at the -same time (\lstinline@foo => model & post@), but depending on -external queue delays this could result in the monitoring task starting -to execute first; or a different postprocessing task could be -triggered off a message output for each data file -(\lstinline@model:out1 => post1@ etc.; see~\ref{MessageTriggers}), but this -may not be practical if the -number of output files is large or if it is difficult to add cylc -messaging calls to the model. - -\paragraph{Finish Triggers} - -To trigger off the upstream task succeeding or failing, i.e.\ finishing -one way or the other: -\begin{lstlisting} -# B triggers if A either SUCCEEDS or FAILS: - graph = "A | A:fail => B" -# or - graph = "A:finish => B" -\end{lstlisting} - -\paragraph{Message Triggers} -\label{MessageTriggers} - -Tasks can also trigger off custom output messages. These must be registered in -the \lstinline=[runtime]= section of the emitting task, and reported using the -\lstinline=cylc message= command in task scripting. The graph trigger notation -refers to the item name of the registered output message. -The example suite \lstinline=/etc/examples/message-triggers= illustrates -message triggering. - -\lstset{language=suiterc} -\lstinputlisting{../../../etc/examples/message-triggers/suite.rc} - -\paragraph{Job Submission Triggers} - -It is also possible to trigger off a task submitting, or failing to submit: -\begin{lstlisting} -# B triggers if A submits successfully: - graph = "A:submit => B" -# D triggers if C fails to submit successfully: - graph = "C:submit-fail => D" -\end{lstlisting} - -A possible use case for submit-fail triggers: if a task goes into the -submit-failed state, possibly after several job submission retries, -another task that inherits the same runtime but sets a different job -submission method and/or host could be triggered to, in effect, run the -same job on a different platform. - - -\paragraph{Conditional Triggers} - -AND operators (\lstinline=&=) can appear on both sides of an arrow. They -provide a concise alternative to defining multiple triggers separately: -\begin{lstlisting} -# 1/ this: - graph = "A & B => C" -# is equivalent to: - graph = """A => C - B => C""" -# 2/ this: - graph = "A => B & C" -# is equivalent to: - graph = """A => B - A => C""" -# 3/ and this: - graph = "A & B => C & D" -# is equivalent to this: - graph = """A => C - B => C - A => D - B => D""" -\end{lstlisting} - -OR operators (\lstinline=|=) which result in true conditional triggers, -can only appear on the left,\footnote{An OR -operator on the right doesn't make much sense: if ``B or C'' triggers -off A, what exactly should cylc do when A finishes?} -\begin{lstlisting} -# C triggers when either A or B finishes: - graph = "A | B => C" -\end{lstlisting} - -Forecasting suites typically have simple conditional -triggering requirements, but any valid conditional expression can be -used, as shown in Figure~\ref{fig-conditional} -(conditional triggers are plotted with open arrow heads). -\begin{figure} -\begin{minipage}[b]{0.5\textwidth} -\lstset{language=suiterc} -\begin{lstlisting} - graph = """ -# D triggers if A or (B and C) succeed -A | B & C => D -# just to align the two graph sections -D => W -# Z triggers if (W or X) and Y succeed -(W|X) & Y => Z - """ -\end{lstlisting} -\lstset{language=transcript} -\end{minipage} -\hfill -\begin{minipage}[b]{0.5\textwidth} - \begin{center} - \includegraphics[width=0.5\textwidth]{graphics/png/orig/conditional-triggers.png} - \end{center} -\end{minipage} -\caption[Conditional Triggers] {\scriptsize -Conditional triggers are plotted with open arrow heads.} -\label{fig-conditional} -\end{figure} - -\paragraph{Suicide Triggers} -\label{SuicideTriggers} - -Suicide triggers take tasks out of the suite. This can be used for -automated failure recovery. The suite.rc listing and accompanying -graph in Figure~\ref{fig-suicide} show how to define a chain of failure -recovery tasks -that trigger if they're needed but otherwise remove themselves from the -suite (you can run the {\em AutoRecover.async} example suite to see how -this works). The dashed graph edges ending in solid dots indicate -suicide triggers, and the open arrowheads indicate conditional triggers -as usual. Suicide triggers are ignored by default in the graph view, unless you toggle them on with {\em View} -> {\em Options} -> {\em Ignore Suicide Triggers}. - -\begin{figure} -\begin{minipage}[b]{0.5\textwidth} -\lstset{language=suiterc} -\begin{lstlisting} -[meta] - title = automated failure recovery - description = """ -Model task failure triggers diagnosis -and recovery tasks, which take themselves -out of the suite if model succeeds. Model -post processing triggers off model OR -recovery tasks. - """ -[scheduling] - [[dependencies]] - graph = """ -pre => model -model:fail => diagnose => recover -model => !diagnose & !recover -model | recover => post - """ -[runtime] - [[model]] - # UNCOMMENT TO TEST FAILURE: - # script = /bin/false -\end{lstlisting} -\lstset{language=transcript} -\end{minipage} -\hfill -\begin{minipage}[b]{0.5\textwidth} - \begin{center} - \includegraphics[width=0.5\textwidth]{graphics/png/orig/suicide.png} - \end{center} -\end{minipage} -\caption[Automated failure recovery via suicide triggers] {\scriptsize -Automated failure recovery via suicide triggers.} -\label{fig-suicide} -\end{figure} - -Note that multiple suicide triggers combine in the same way as other triggers, so this: -\begin{lstlisting} -foo => !baz -bar => !baz -\end{lstlisting} -is equivalent to this: -\begin{lstlisting} -foo & bar => !baz -\end{lstlisting} -i.e.\ both \lstinline=foo= and \lstinline=bar= must succeed for -\lstinline=baz= to be taken out of the suite. If you really want a task -to be taken out if any one of several events occurs then be careful to -write it that way: -\begin{lstlisting} -foo | bar => !baz -\end{lstlisting} - -A word of warning on the meaning of ``bare suicide triggers''. Consider -the following suite: -\lstset{language=suiterc} -\begin{lstlisting} -[scheduling] - [[dependencies]] - graph = "foo => !bar" -\end{lstlisting} -Task \lstinline=bar= has a suicide trigger but no normal prerequisites -(a suicide trigger is not a task triggering prerequisite, it is a task -removal prerequisite) so this is entirely equivalent to: -\lstset{language=suiterc} -\begin{lstlisting} -[scheduling] - [[dependencies]] - graph = """ - foo & bar - foo => !bar - """ -\end{lstlisting} -In other words both tasks will trigger immediately, at the same time, -and then \lstinline=bar= will be removed if \lstinline=foo= succeeds. - -If an active task proxy (currently in the submitted or running states) -is removed from the suite by a suicide trigger, a warning will be logged. - -\paragraph{Family Triggers} -\label{FamilyTriggers} - -Families defined by the namespace inheritance hierarchy -(~\ref{NIORP}) can be used in the graph trigger whole groups of -tasks at the same time (e.g.\ forecast model ensembles and groups of -tasks for processing different observation types at the same time) and -for triggering downstream tasks off families as a whole. Higher level -families, i.e.\ families of families, can also be used, and are reduced -to the lowest level member tasks. Note that tasks can also trigger off -individual family members if necessary. - -To trigger an entire task family at once: -\begin{lstlisting} -[scheduling] - [[dependencies]] - graph = "foo => FAM" -[runtime] - [[FAM]] # a family (because others inherit from it) - [[m1,m2]] # family members (inherit from namespace FAM) - inherit = FAM -\end{lstlisting} -This is equivalent to: -\begin{lstlisting} -[scheduling] - [[dependencies]] - graph = "foo => m1 & m2" -[runtime] - [[FAM]] - [[m1,m2]] - inherit = FAM -\end{lstlisting} - -To trigger other tasks off families we have to specify whether -to triggering off {\em all members} starting, succeeding, failing, -or finishing, or off {\em any} members (doing the same). Legal family -triggers are thus: - -\begin{lstlisting} -[scheduling] - [[dependencies]] - graph = """ - # all-member triggers: - FAM:start-all => one - FAM:succeed-all => one - FAM:fail-all => one - FAM:finish-all => one - # any-member triggers: - FAM:start-any => one - FAM:succeed-any => one - FAM:fail-any => one - FAM:finish-any => one - """ -\end{lstlisting} - -Here's how to trigger downstream processing after if one or more family -members succeed, but only after all members have finished (succeeded or -failed): - -\begin{lstlisting} -[scheduling] - [[dependencies]] - graph = """ - FAM:finish-all & FAM:succeed-any => foo - """ -\end{lstlisting} - -\paragraph{Efficient Inter-Family Triggering} -\label{EfficientInterFamilyTriggering} - -While cylc allows writing dependencies between two families it is important to -consider the number of dependencies this will generate. In the following -example, each member of \lstinline=FAM2= has dependencies pointing at all the -members of \lstinline=FAM1=. - -\begin{lstlisting} -[scheduling] - [[dependencies]] - graph = """ - FAM1:succeed-any => FAM2 - """ -\end{lstlisting} - -Expanding this out, you generate \lstinline=N * M= dependencies, where -\lstinline=N= is the number of members of \lstinline=FAM1= and \lstinline=M= is -the number of members of \lstinline=FAM2=. This can result in high memory use -as the number of members of these families grows, potentially rendering the -suite impractical for running on some systems. - -You can greatly reduce the number of dependencies generated in these situations -by putting dummy tasks in the graphing to represent the state of the family you -want to trigger off. For example, if \lstinline=FAM2= should trigger off any -member of \lstinline=FAM1= succeeding you can create a dummy task -\lstinline=FAM1_succeed_any_marker= and place a dependency on it as follows: - -\begin{lstlisting} -[scheduling] - [[dependencies]] - graph = """ - FAM1:succeed-any => FAM1_succeed_any_marker => FAM2 - """ -[runtime] -... - [[FAM1_succeed_any_marker]] - script = true -... -\end{lstlisting} - -This graph generates only \lstinline=N + M= dependencies, which takes -significantly less memory and CPU to store and evaluate. - -\paragraph{Inter-Cycle Triggers} -\label{InterCyclePointTriggers} - -Typically most tasks in a suite will trigger off others in the same -cycle point, but some may depend on others with other cycle points. -This notably applies to warm-cycled forecast models, which depend on -their own previous instances (see below); but other kinds of inter-cycle -dependence are possible too.\footnote{In NWP forecast analysis -suites parts of the observation processing and data assimilation -subsystem will typically also depend on model background fields -generated by the previous forecast.} Here's how to express this -kind of relationship in cylc: -\begin{lstlisting} -[dependencies] - [[PT6H]] - # B triggers off A in the previous cycle point - graph = "A[-PT6H] => B" -\end{lstlisting} -inter-cycle and trigger type (or message trigger) notation can be -combined: -\begin{lstlisting} - # B triggers if A in the previous cycle point fails: - graph = "A[-PT6H]:fail => B" -\end{lstlisting} - -At suite start-up inter-cycle triggers refer to a previous cycle point -that does not exist. This does not cause the dependent task to wait -indefinitely, however, because cylc ignores triggers that reach back -beyond the initial cycle point. That said, the presence of an -inter-cycle trigger does normally imply that something special has to -happen at start-up. If a model depends on its own previous instance for -restart files, for instance, then an initial set of restart files has to be -generated somehow or the first model task will presumably fail with -missing input files. There are several ways to handle this in cylc -using different kinds of one-off (non-cycling) tasks that run at suite -start-up. They are illustrated in the Tutorial -(\ref{TutInterCyclePointTriggers}); to summarize here briefly: - -\begin{myitemize} - \item \lstinline=R1= tasks (recommended): -\lstset{language=suiterc} -\begin{lstlisting} -[scheduling] - [[dependencies]] - [[[R1]]] - graph = "prep" - [[[R1/T00,R1/T12]]] - graph = "prep[^] => foo" - [[[T00,T12]]] - graph = "foo[-PT12H] => foo => bar" -\end{lstlisting} - -\end{myitemize} -\lstset{language=transcript} - -\lstinline=R1=, or \lstinline=R1/date-time= tasks are the recommended way to -specify unusual start up conditions. They allow you to specify a clean -distinction between the dependencies of initial cycles and the dependencies -of the subsequent cycles. - -Initial tasks can be used for real model cold-start processes, whereby a -warm-cycled model at any given cycle point can in principle have its inputs -satisfied by a previous instance of itself, {\em or} by an initial task with -(nominally) the same cycle point. - -In effect, the \lstinline=R1= task masquerades as the previous-cycle-point trigger -of its associated cycling task. At suite start-up initial tasks will -trigger the first cycling tasks, and thereafter the inter-cycle trigger -will take effect. - -If a task has a dependency on another task in a different cycle point, the -dependency can be written using the \lstinline=[offset]= syntax such as -\lstinline=[-PT12H]= in \lstinline@foo[-PT12H] => foo@. This means that -\lstinline=foo= at the current cycle point depends on a previous instance of - \lstinline=foo= at 12 hours before the current cycle point. Unlike the - cycling section headings (e.g.\ \lstinline=[[[T00,T12]]]=), dependencies - assume that relative times are relative to the current cycle point, not the - initial cycle point. - -However, it can be useful to have specific dependencies on tasks at or near -the initial cycle point. You can switch the context of the offset to be -the initial cycle point by using the caret symbol: \lstinline=^=. - -For example, you can write \lstinline=foo[^]= to mean foo at the initial -cycle point, and \lstinline=foo[^+PT6H]= to mean foo 6 hours after the initial -cycle point. Usually, this kind of dependency will only apply in a limited -number of cycle points near the start of the suite, so you may want to write -it in \lstinline=R1=-based cycling sections. Here's the example inter-cycle -\lstinline=R1= suite from above again. - -\lstset{language=suiterc} -\begin{lstlisting} -[scheduling] - [[dependencies]] - [[[R1]]] - graph = "prep" - [[[R1/T00,R1/T12]]] - graph = "prep[^] => foo" - [[[T00,T12]]] - graph = "foo[-PT12H] => foo => bar" -\end{lstlisting} -\lstset{language=transcript} - -You can see there is a dependence on the initial \lstinline=R1= task -\lstinline=prep= for \lstinline=foo= at the first \lstinline=T00= cycle point, -and at the first \lstinline=T12= cycle point. Thereafter, \lstinline=foo= just -depends on its previous (12 hours ago) instance. - -Finally, it is also possible to have a dependency on a task at a specific cycle -point. - -\lstset{language=suiterc} -\begin{lstlisting} -[scheduling] - [[dependencies]] - [[[R1/20200202]]] - graph = "baz[20200101] => qux" -\end{lstlisting} -\lstset{language=transcript} - -However, in a long running suite, a repeating cycle should avoid having a -dependency on a task with a specific cycle point (including the initial cycle -point) - as it can currently cause performance issue. In the following example, -all instances of \lstinline=qux= will depend on \lstinline=baz.20200101=, which -will never be removed from the task pool.: - -\lstset{language=suiterc} -\begin{lstlisting} -[scheduling] - initial cycle point = 2010 - [[dependencies]] - # Can cause performance issue! - [[[P1D]]] - graph = "baz[20200101] => qux" -\end{lstlisting} -\lstset{language=transcript} - -\paragraph{Special Sequential Tasks} -\label{SequentialTasks} - -Tasks that depend on their own previous-cycle instance can be declared as {\em -sequential}: -\lstset{language=suiterc} -\begin{lstlisting} -[scheduling] - [[special tasks]] - # foo depends on its previous instance: - sequential = foo # deprecated - see below! - [[dependencies]] - [[[T00,T12]]] - graph = "foo => bar" -\end{lstlisting} - -{\em The sequential declaration is deprecated} however, in favor of explicit -inter-cycle triggers which clearly expose the same scheduling behaviour in the -graph: -\lstset{language=suiterc} -\begin{lstlisting} -[scheduling] - [[dependencies]] - [[[T00,T12]]] - # foo depends on its previous instance: - graph = "foo[-PT12H] => foo => bar" -\end{lstlisting} - -The sequential declaration is arguably convenient in one unusual situation -though: if a task has a non-uniform cycling sequence then multiple explicit -triggers, -\lstset{language=suiterc} -\begin{lstlisting} -[scheduling] - [[dependencies]] - [[[T00,T03,T11]]] - graph = "foo => bar" - [[[T00]]] - graph = "foo[-PT13H] => foo" - [[[T03]]] - graph = "foo[-PT3H] => foo" - [[[T11]]] - graph = "foo[-PT8H] => foo" -\end{lstlisting} -can be replaced by a single sequential declaration, -\lstset{language=suiterc} -\begin{lstlisting} -[scheduling] - [[special tasks]] - sequential = foo - [[dependencies]] - [[[T00,T03,T11]]] - graph = "foo => bar" -\end{lstlisting} - -\paragraph{Future Triggers} - -Cylc also supports inter-cycle triggering off tasks ``in the future'' (with -respect to cycle point - which has no bearing on wall-clock job submission time -unless the task has a clock trigger): -\begin{lstlisting} -[[dependencies]] - [[[T00,T06,T12,T18]]] - graph = """ - # A runs in this cycle: - A - # B in this cycle triggers off A in the next cycle. - A[PT6H] => B - """ -\end{lstlisting} -Future triggers present a problem at suite shutdown rather than at start-up. -Here, \lstinline=B= at the final cycle point wants to trigger off an instance -of \lstinline=A= that will never exist because it is beyond the suite stop -point. Consequently Cylc prevents tasks from spawning successors that depend on -other tasks beyond the final point. - -\paragraph{Clock Triggers} -\label{ClockTriggerTasks} - -{\em NOTE: please read {\em External Triggers} (\ref{External Triggers}) before -using the older clock triggers described in this section.} - -By default, date-time cycle points are not connected to the real time ``wall -clock''. They are just labels that are passed to task jobs (e.g.\ to -initialize an atmospheric model run with a particular date-time value). In real -time cycling systems, however, some tasks - typically those near the top of the -graph in each cycle - need to trigger at or near the time when their cycle point -is equal to the real clock date-time. - -So {\em clock triggers} allow tasks to trigger at (or after, depending on other -triggers) a wall clock time expressed as an offset from cycle point: -\lstset{language=suiterc} -\begin{lstlisting} -[scheduling] - [[special tasks]] - clock-trigger = foo(PT2H) - [[dependencies]] - [[[T00]]] - graph = foo -\end{lstlisting} -Here, \lstinline=foo[2015-08-23T00]= would trigger (other dependencies allowing) -when the wall clock time reaches \lstinline=2015-08-23T02=. Clock-trigger -offsets are normally positive, to trigger some time {\em after} the wall-clock -time is equal to task cycle point. - -Clock-triggers have no effect on scheduling if a suite is running sufficiently -far behind the clock (e.g.\ after a delay, or because it is processing archived -historical data) that the trigger times, which are relative to task cycle -point, have already passed. - -\paragraph{Clock-Expire Triggers} -\label{ClockExpireTasks} - -Tasks can be configured to {\em expire} - i.e.\ to skip job submission and -enter the {\em expired} state - if they are too far behind the wall clock when -they become ready to run, and other tasks can trigger off this. As a possible -use case, consider a cycling task that copies the latest of a set of files to -overwrite the previous set: if the task is delayed by more than one cycle there -may be no point in running it because the freshly copied files will just be -overwritten immediately by the next task instance as the suite catches back up -to real time operation. Clock-expire tasks are configured like clock-trigger -tasks, with a date-time offset relative to cycle point (\ref{ClockExpireRef}). -The offset should be positive to make the task expire if the wall-clock time -has gone beyond the cycle point. Triggering off an expired task typically -requires suicide triggers to remove the workflow that runs if the task has not -expired. Here a task called \lstinline=copy= expires, and its downstream -workflow is skipped, if it is more than one day behind the wall-clock (see also -\lstinline=etc/examples/clock-expire=): -\lstset{language=suiterc} -\begin{lstlisting} -[cylc] - cycle point format = %Y-%m-%dT%H -[scheduling] - initial cycle point = 2015-08-15T00 - [[special tasks]] - clock-expire = copy(-P1D) - [[dependencies]] - [[[P1D]]] - graph = """ - model[-P1D] => model => copy => proc - copy:expired => !proc""" -\end{lstlisting} - -\paragraph{External Triggers} - -This is a substantial topic, documented in Section~\ref{External Triggers}. - -\subsubsection{Model Restart Dependencies} -\label{ModelRestartDependencies} - -Warm-cycled forecast models generate {\em restart files}, e.g.\ model -background fields, to initialize the next forecast. This kind of -dependence requires an inter-cycle trigger: -\lstset{language=suiterc} -\begin{lstlisting} -[scheduling] - [[dependencies]] - [[[T00,T06,T12,T18]]] - graph = "A[-PT6H] => A" -\end{lstlisting} - -If your model is configured to write out additional restart files -to allow one or more cycle points to be skipped in an emergency {\em do not -represent these potential dependencies in the suite graph} as they -should not be used under normal circumstances. For example, the -following graph would result in task \lstinline=A= erroneously -triggering off \lstinline=A[T-24]= as a matter of course, instead of -off \lstinline=A[T-6]=, because \lstinline=A[T-24]= will always -be finished first: -\lstset{language=suiterc} -\begin{lstlisting} -[scheduling] - [[dependencies]] - [[[T00,T06,T12,T18]]] - # DO NOT DO THIS (SEE ACCOMPANYING TEXT): - graph = "A[-PT24H] | A[-PT18H] | A[-PT12H] | A[-PT6H] => A" -\end{lstlisting} - -\subsubsection{How The Graph Determines Task Instantiation} - -A graph trigger pair like \lstinline@foo => bar@ determines the existence and -prerequisites (dependencies) of the downstream task \lstinline=bar=, for -the cycle points defined by the associated graph section heading. In general it -does not say anything about the dependencies or existence of the upstream task -\lstinline=foo=. However {\em if the trigger has no cycle point offset} Cylc -will infer that \lstinline=bar= must exist at the same cycle points as -\lstinline=foo=. This is a convenience to allow this: - -\lstset{language=suiterc} -\begin{lstlisting} -graph = "foo => bar" -\end{lstlisting} - -to be written as shorthand for this: - -\lstset{language=suiterc} -\begin{lstlisting} -graph = """foo - foo => bar""" -\end{lstlisting} - -(where \lstinline=foo= by itself means \lstinline@ => foo@, i.e.\ the -task exists at these cycle points but has no prerequisites - although other -prerequisites may be defined for it in other parts of the graph). - -{\em Cylc does not infer the existence of the upstream task in offset -triggers} like \lstinline@foo[-P1D] => bar@ because, as explained in -Section~\ref{cylc-6-migration-implicit-cycling}, a typo in the offset interval -should generate an error rather than silently creating tasks on an erroneous -cycling sequence. - -As a result you need to be careful not to define inter-cycle dependencies that -cannot be satisfied at run time. Suite validation catches this kind of error if -the existence of the cycle offset task is not defined anywhere at all: - -\lstset{language=suiterc} -\begin{lstlisting} -[scheduling] - initial cycle point = 2020 - [[dependencies]] - [[[P1Y]]] - # ERROR - graph = "foo[-P1Y] => bar" -\end{lstlisting} - -\lstset{language=transcript} -\begin{lstlisting} -$ cylc validate SUITE -'ERROR: No cycling sequences defined for foo' -\end{lstlisting} - -To fix this, use another line in the graph to tell Cylc to define -\lstinline=foo= at each cycle point: - -\lstset{language=suiterc} -\begin{lstlisting} -[scheduling] - initial cycle point = 2020 - [[dependencies]] - [[[P1Y]]] - graph = """ - foo - foo[-P1Y] => bar""" -\end{lstlisting} - -But validation does not catch this kind of error if the offset task -is defined only on a different cycling sequence: - -\lstset{language=suiterc} -\begin{lstlisting} -[scheduling] - initial cycle point = 2020 - [[dependencies]] - [[[P2Y]]] - graph = """foo - # ERROR - foo[-P1Y] => bar""" -\end{lstlisting} - -This suite will validate OK, but it will stall at runtime with \lstinline=bar= -waiting on \lstinline=foo[-P1Y]= at the intermediate years where it does not -exist. The offset \lstinline=[-P1Y]= is presumably an error (it should be -\lstinline=[-P2Y]=), or else another graph line is needed to generate -\lstinline=foo= instances on the yearly sequence: - -\lstset{language=suiterc} -\begin{lstlisting} -[scheduling] - initial cycle point = 2020 - [[dependencies]] - [[[P1Y]]] - graph = "foo" - [[[P2Y]]] - graph = "foo[-P1Y] => bar" -\end{lstlisting} - -Similarly the following suite will validate OK, but it will stall at -runtime with \lstinline=bar= waiting on \lstinline=foo[-P1Y]= in -every cycle point, when only a single instance of it exists, at the initial -cycle point: - -\lstset{language=suiterc} -\begin{lstlisting} -[scheduling] - initial cycle point = 2020 - [[dependencies]] - [[[R1]]] - graph = foo - [[[P1Y]]] - # ERROR - graph = foo[-P1Y] => bar -\end{lstlisting} - -Note that \lstinline=cylc graph= will display un-satisfiable inter-cycle -dependencies as ``ghost nodes''. Figure \ref{ghost-node-screenshot} is a -screenshot of cylc graph displaying the above example with the un-satisfiable -task (foo) displayed as a ``ghost node''. - -\begin{figure} - \begin{center} - \includegraphics[width=0.6\textwidth]{graphics/png/orig/ghost-node-example.png} - \end{center} - \caption{Screenshot of \lstinline=cylc graph= showing one task as a - ``ghost node''} - \label{ghost-node-screenshot} -\end{figure} - -\subsection{Runtime - Task Configuration} -\label{NIORP} - -The \lstinline=[runtime]= section of a suite configuration configures what -to execute (and where and how to execute it) when each task is ready to -run, in a {\em multiple inheritance hierarchy} of {\em -namespaces} culminating in individual tasks. This allows all common -configuration detail to be factored out and defined in one place. - -Any namespace can configure any or all of the items defined in the -{\em Suite.rc Reference} (\ref{SuiteRCReference}). - -Namespaces that do not explicitly inherit from others automatically -inherit from the {\em root} namespace (below). - -Nested namespaces define {\em task families} that can be used in the -graph as convenient shorthand for triggering all member tasks at once, -or for triggering other tasks off all members at once - -see~\ref{FamilyTriggers}. Nested namespaces can be -progressively expanded and collapsed in the dependency graph viewer, and -in the gcylc graph and text views. Only the first parent of each -namespace (as for single-inheritance) is used for suite visualization -purposes. - -\subsubsection{Namespace Names} - -Namespace names may contain letters, digits, underscores, and hyphens. - -Note that {\em task names need not be hardwired into task implementations} -because task and suite identity can be extracted portably from the task -execution environment supplied by the suite server program -(\ref{TaskExecutionEnvironment}) - then to rename a task you can just change -its name in the suite configuration. - -\subsubsection{Root - Runtime Defaults} - -The root namespace, at the base of the inheritance hierarchy, -provides default configuration for all tasks in the suite. -Most root items are unset by default, but some have default values -sufficient to allow test suites to be defined by dependency graph alone. -The {\em script} item, for example, defaults to code that -prints a message then sleeps for between 1 and 15 seconds and -exits. Default values are documented with each item in~\ref{SuiteRCReference}. -You can override the defaults or -provide your own defaults by explicitly configuring the root namespace. - -\subsubsection{Defining Multiple Namespaces At Once} -\label{MultiTaskDef} - -If a namespace section heading is a comma-separated list of names -then the subsequent configuration applies to each list member. -Particular tasks can be singled out at run time using the -\lstinline=$CYLC_TASK_NAME= variable. - -As an example, consider a suite containing an ensemble of closely -related tasks that each invokes the same script but with a unique -argument that identifies the calling task name: - -\lstset{language=suiterc} -\begin{lstlisting} -[runtime] - [[ENSEMBLE]] - script = "run-model.sh $CYLC_TASK_NAME" - [[m1, m2, m3]] - inherit = ENSEMBLE -\end{lstlisting} - -For large ensembles template processing can be used to -automatically generate the member names and associated dependencies -(see~\ref{Jinja2} and~\ref{EmPy}). - -\subsubsection{Runtime Inheritance - Single} - -The following listing of the {\em inherit.single.one} example suite -illustrates basic runtime inheritance with single parents. - -\lstset{language=suiterc} -\lstinputlisting{../../../etc/examples/inherit/single/one/suite.rc} -\lstset{language=transcript} - -\subsubsection{Runtime Inheritance - Multiple} - -If a namespace inherits from multiple parents the linear order of -precedence (which namespace overrides which) is determined by the -so-called {\em C3 algorithm} used to find the linear {\em method -resolution order} for class hierarchies in Python and several other -object oriented programming languages. The result of this should be -fairly obvious for typical use of multiple inheritance in cylc suites, -but for detailed documentation of how the algorithm works refer to the -official Python documentation here: -\url{http://www.python.org/download/releases/2.3/mro/}. - -The {\em inherit.multi.one} example suite, listed here, makes use of -multiple inheritance: - -\lstset{language=suiterc} -\lstinputlisting{../../../etc/examples/inherit/multi/one/suite.rc} -\lstset{language=transcript} - -\lstinline=cylc get-suite-config= provides an easy way to check the result of -inheritance in a suite. You can extract specific items, e.g.: -\begin{lstlisting} -$ cylc get-suite-config --item '[runtime][var_p2]script' \ - inherit.multi.one -echo ``RUN: run-var.sh'' -\end{lstlisting} -or use the \lstinline=--sparse= option to print entire namespaces -without obscuring the result with the dense runtime structure obtained -from the root namespace: -\begin{lstlisting} -$ cylc get-suite-config --sparse --item '[runtime]ops_s1' inherit.multi.one -script = echo ``RUN: run-ops.sh'' -inherit = ['OPS', 'SERIAL'] -[directives] - job_type = serial -\end{lstlisting} - -\paragraph{Suite Visualization And Multiple Inheritance} - -The first parent inherited by a namespace is also used as the -collapsible family group when visualizing the suite. If this is not what -you want, you can demote the first parent for visualization purposes, -without affecting the order of inheritance of runtime properties: -\begin{lstlisting} -[runtime] - [[BAR]] - # ... - [[foo]] - # inherit properties from BAR, but stay under root for visualization: - inherit = None, BAR -\end{lstlisting} - - -\subsubsection{How Runtime Inheritance Works} - -The linear precedence order of ancestors is computed for each namespace -using the C3 algorithm. Then any runtime items that are explicitly -configured in the suite configuration are ``inherited'' up the linearized -hierarchy for each task, starting at the root namespace: if a particular -item is defined at multiple levels in the hierarchy, the level nearest -the final task namespace takes precedence. Finally, root namespace -defaults are applied for every item that has not been configured in the -inheritance process (this is more efficient than carrying the full dense -namespace structure through from root from the beginning). - -\subsubsection{Task Execution Environment} -\label{TaskExecutionEnvironment} - -The task execution environment contains suite and task identity variables -provided by the suite server program, and user-defined environment variables. -The environment is explicitly exported (by the task job script) prior to -executing the task \lstinline=script= (see~\ref{TaskJobSubmission}). - -Suite and task identity are exported first, so that user-defined -variables can refer to them. Order of definition is preserved throughout -so that variable assignment expressions can safely refer to previously -defined variables. - -Additionally, access to cylc itself is configured prior to the user-defined -environment, so that variable assignment expressions can make use of -cylc utility commands: -\lstset{language=suiterc} -\begin{lstlisting} -[runtime] - [[foo]] - [[[environment]]] - REFERENCE_TIME = $( cylc util cycletime --offset-hours=6 ) -\end{lstlisting} - -\paragraph{User Environment Variables} - -A task's user-defined environment results from its inherited -\lstinline=[[[environment]]]= sections: -\begin{lstlisting} -[runtime] - [[root]] - [[[environment]]] - COLOR = red - SHAPE = circle - [[foo]] - [[[environment]]] - COLOR = blue # root override - TEXTURE = rough # new variable -\end{lstlisting} -This results in a task {\em foo} with -\lstinline@SHAPE=circle@, -\lstinline@COLOR=blue@, and -\lstinline@TEXTURE=rough@ in its environment. - -\paragraph{Overriding Environment Variables} - -When you override inherited namespace items the original parent -item definition is {\em replaced} by the new definition. This applies to -all items including those in the environment sub-sections which, -strictly speaking, are not ``environment variables'' until they are -written, post inheritance processing, to the task job script that -executes the associated task. Consequently, if you override an -environment variable you cannot also access the original parent value: -\begin{lstlisting} -[runtime] - [[FOO]] - [[[environment]]] - COLOR = red - [[bar]] - inherit = FOO - [[[environment]]] - tmp = $COLOR # !! ERROR: $COLOR is undefined here - COLOR = dark-$tmp # !! as this overrides COLOR in FOO. -\end{lstlisting} -The compressed variant of this, \lstinline@COLOR = dark-$COLOR@, is -also in error for the same reason. To achieve the desired result you -must use a different name for the parent variable: -\begin{lstlisting} -[runtime] - [[FOO]] - [[[environment]]] - FOO_COLOR = red - [[bar]] - inherit = FOO - [[[environment]]] - COLOR = dark-$FOO_COLOR # OK -\end{lstlisting} - -\paragraph{Task Job Script Variables} -\label{Task Job Script Variables} - -These are variables that can be referenced (but should not be modified) in a -task job script. - -The task job script may export the following environment variables: - -\lstset{language=bash} -\begin{lstlisting} -CYLC_DEBUG # Debug mode, true or not defined -CYLC_DIR # Location of cylc installation used -CYLC_VERSION # Version of cylc installation used - -CYLC_CYCLING_MODE # Cycling mode, e.g. gregorian -CYLC_SUITE_FINAL_CYCLE_POINT # Final cycle point -CYLC_SUITE_INITIAL_CYCLE_POINT # Initial cycle point -CYLC_SUITE_NAME # Suite name -CYLC_UTC # UTC mode, True or False -CYLC_VERBOSE # Verbose mode, True or False -TZ # Set to "UTC" in UTC mode or not defined - -CYLC_SUITE_RUN_DIR # Location of the suite run directory in - # job host, e.g. ~/cylc-run/foo -CYLC_SUITE_DEF_PATH # Location of the suite configuration directory in - # job host, e.g. ~/cylc-run/foo -CYLC_SUITE_HOST # Host running the suite process -CYLC_SUITE_OWNER # User ID running the suite process -CYLC_SUITE_DEF_PATH_ON_SUITE_HOST - # Location of the suite configuration directory in - # suite host, e.g. ~/cylc-run/foo -CYLC_SUITE_SHARE_DIR # Suite (or task!) shared directory (see below) -CYLC_SUITE_UUID # Suite UUID string -CYLC_SUITE_WORK_DIR # Suite work directory (see below) - -CYLC_TASK_JOB # Task job identifier expressed as - # CYCLE-POINT/TASK-NAME/SUBMIT-NUM - # e.g. 20110511T1800Z/t1/01 -CYLC_TASK_CYCLE_POINT # Cycle point, e.g. 20110511T1800Z -CYLC_TASK_NAME # Job's task name, e.g. t1 -CYLC_TASK_SUBMIT_NUMBER # Job's submit number, e.g. 1, - # increments with every submit -CYLC_TASK_TRY_NUMBER # Number of execution tries, e.g. 1 - # increments with automatic retry-on-fail -CYLC_TASK_ID # Task instance identifier expressed as - # TASK-NAME.CYCLE-POINT - # e.g. t1.20110511T1800Z -CYLC_TASK_LOG_DIR # Location of the job log directory - # e.g. ~/cylc-run/foo/log/job/20110511T1800Z/t1/01/ -CYLC_TASK_LOG_ROOT # The task job file path - # e.g. ~/cylc-run/foo/log/job/20110511T1800Z/t1/01/job -CYLC_TASK_WORK_DIR # Location of task work directory (see below) - # e.g. ~/cylc-run/foo/work/20110511T1800Z/t1 -CYLC_TASK_NAMESPACE_HIERARCHY # Linearised family namespace of the task, - # e.g. root postproc t1 -CYLC_TASK_DEPENDENCIES # List of met dependencies that triggered the task - # e.g. foo.1 bar.1 - -CYLC_TASK_COMMS_METHOD # Set to "ssh" if communication method is "ssh" -CYLC_TASK_SSH_LOGIN_SHELL # With "ssh" communication, if set to "True", - # use login shell on suite host -\end{lstlisting} - -There are also some global shell variables that may be defined in the task job -script (but not exported to the environment). These include: -\lstset{language=bash} -\begin{lstlisting} -CYLC_FAIL_SIGNALS # List of signals trapped by the error trap -CYLC_VACATION_SIGNALS # List of signals trapped by the vacation trap -CYLC_SUITE_WORK_DIR_ROOT # Root directory above the suite work directory - # in the job host -CYLC_TASK_MESSAGE_STARTED_PID # PID of "cylc message" job started" command -CYLC_TASK_WORK_DIR_BASE # Alternate task work directory, - # relative to the suite work directory -\end{lstlisting} - -\paragraph{Suite Share Directories} - -A {\em suite share directory} is created automatically under the suite run -directory as a share space for tasks. The location is available to tasks as -\lstinline=$CYLC_SUITE_SHARE_DIR=. In a cycling suite, output files are -typically held in cycle point sub-directories of the suite share directory. - -The top level share and work directory (below) location can be changed -(e.g.\ to a large data area) by a global config setting -(see~\ref{workdirectory}). - -\paragraph{Task Work Directories} - -Task job scripts are executed from within {\em work directories} created -automatically under the suite run directory. A task can get its own work -directory from \lstinline=$CYLC_TASK_WORK_DIR= (or simply \lstinline=$PWD= if -it does not \lstinline=cd= elsewhere at runtime). By default the location -contains task name and cycle point, to provide a unique workspace for every -instance of every task. This can be overridden in the suite configuration, -however, to get several tasks to share the same work directory -(see~\ref{worksubdirectory}). - -The top level work and share directory (above) location can be changed -(e.g.\ to a large data area) by a global config setting -(see~\ref{workdirectory}). - -\lstset{language=transcript} - -\paragraph{Environment Variable Evaluation} - -Variables in the task execution environment are not evaluated in the -shell in which the suite is running prior to submitting the task. They -are written in unevaluated form to the job script that is submitted by -cylc to run the task (\ref{JobScripts}) and are therefore -evaluated when the task begins executing under the task owner account -on the task host. Thus \lstinline=$HOME=, for instance, evaluates at -run time to the home directory of task owner on the task host. - -\subsubsection{How Tasks Get Access To The Suite Directory} - -Tasks can use \lstinline=$CYLC_SUITE_DEF_PATH= to access suite files on -the task host, and the suite bin directory is automatically added -\lstinline=$PATH=. If a remote suite configuration directory is not -specified the local (suite host) path will be assumed with the local -home directory, if present, swapped for literal \lstinline=$HOME= for -evaluation on the task host. - -\subsubsection{Remote Task Hosting} -\label{RunningTasksOnARemoteHost} - -If a task declares an owner other than the suite owner and/or -a host other than the suite host, cylc will use non-interactive ssh to -execute the task on the \lstinline=owner@host= account by the configured -batch system: -\lstset{language=suiterc} -\begin{lstlisting} -[runtime] - [[foo]] - [[[remote]]] - host = orca.niwa.co.nz - owner = bob - [[[job]]] - batch system = pbs -\end{lstlisting} -\lstset{language=transcript} -For this to work: -\begin{myitemize} - \item non-interactive ssh is required from the suite host to the remote - task accounts. - - \item cylc must be installed on task hosts. - \begin{myitemize} - \item Optional software dependencies such as graphviz and - Jinja2 are not needed on task hosts. - \item If polling task communication is used, there is no other - requirement. - \item If SSH task communication is configured, non-interactive ssh is - required from the task host to the suite host. - \item If (default) task communication is configured, the task host - should have access to the port on the suite host. - \end{myitemize} - \item the suite configuration directory, or some fraction of its - content, can be installed on the task host, if needed. -\end{myitemize} - -To learn how to give remote tasks access to cylc, -see~\ref{HowTasksGetAccessToCylc}. - -Tasks running on the suite host under another user account are treated as -remote tasks. - -Remote hosting, like all namespace settings, can be declared globally in -the root namespace, or per family, or for individual tasks. - -\paragraph{Dynamic Host Selection} - -Instead of hardwiring host names into the suite configuration you can -specify a shell command that prints a hostname, or an environment -variable that holds a hostname, as the value of the host config item. -See~\ref{DynamicHostSelection}. - -\paragraph{Remote Task Log Directories} - -Task stdout and stderr streams are written to log files in a -suite-specific sub-directory of the {\em suite run directory}, as -explained in~\ref{WhitherStdoutAndStderr}. For remote tasks -the same directory is used, but {\em on the task host}. -Remote task log directories, like local ones, are created on the fly, if -necessary, during job submission. - -\subsection{Visualization} -\label{viso} - -The visualization section of a suite configuration is used to configure -suite graphing, principally graph node (task) and edge (dependency -arrow) style attributes. Tasks can be grouped for the purpose of -applying common style attributes. See~\ref{SuiteRCReference} for details. - -\subsubsection{Collapsible Families In Suite Graphs} - -\lstset{language=suiterc} -\begin{lstlisting} -[visualization] - collapsed families = family1, family2 -\end{lstlisting} -\lstset{language=transcript} - -Nested families from the runtime inheritance hierarchy can be expanded -and collapsed in suite graphs and the gcylc graph view. All families -are displayed in the collapsed state at first, unless -\lstinline=[visualization]collapsed families= is used to single out -specific families for initial collapsing. - -In the gcylc graph view, nodes outside of the main graph (such as the -members of collapsed families) are plotted as rectangular nodes to -the right if they are doing anything interesting (submitted, running, -failed). - -Figure~\ref{fig-namespaces} illustrates successive expansion of nested task -families in the {\em namespaces} example suite. - -\begin{figure} -\begin{minipage}[t]{0.3\textwidth} - \begin{center} - \includegraphics[width=\textwidth]{graphics/png/orig/inherit-2.png} - \end{center} -\end{minipage} -\hfill -\begin{minipage}[t]{0.3\textwidth} - \begin{center} - \includegraphics[width=\textwidth]{graphics/png/orig/inherit-3.png} - \end{center} -\end{minipage} -\hfill -\begin{minipage}[t]{0.3\textwidth} - \begin{center} - \includegraphics[width=\textwidth]{graphics/png/orig/inherit-4.png} - \end{center} -\end{minipage} - -\begin{minipage}[t]{0.3\textwidth} - \begin{center} - \includegraphics[width=\textwidth]{graphics/png/orig/inherit-5.png} - \end{center} -\end{minipage} -\hfill -\begin{minipage}[t]{0.3\textwidth} - \begin{center} - \includegraphics[width=\textwidth]{graphics/png/orig/inherit-6.png} - \end{center} -\end{minipage} -\hfill -\begin{minipage}[t]{0.3\textwidth} - \begin{center} - \includegraphics[width=\textwidth]{graphics/png/orig/inherit-7.png} - \end{center} -\end{minipage} -\caption[{\em namespaces} example suite graphs]{\scriptsize Graphs of the {\em -namespaces} example suite showing various states of expansion of the -nested namespace family hierarchy, from all families collapsed (top -left) through to all expanded (bottom right). This can also be done by -right-clicking on tasks in the gcylc graph view.} -\label{fig-namespaces} -\end{figure} - -\subsection{Parameterized Tasks} -\label{Parameterized Tasks} - -Cylc can automatically generate tasks and dependencies by expanding -parameterized task names over lists of parameter values. Uses for this -include: -\begin{myitemize} - \item generating an ensemble of similar model runs - \item generating chains of tasks to process similar datasets - \item replicating an entire workflow, or part thereof, over several runs - \item splitting a long model run into smaller steps or ``chunks`` - (parameterized cycling) -\end{myitemize} - -{\em Note that this can be done with Jinja2 loops too (Section~\ref{Jinja2}) - but parameterization is much cleaner (nested loops can seriously reduce -the clarity of a suite configuration).} - -\subsubsection{Parameter Expansion} - -Parameter values can be lists of strings, or lists of integers and -integer ranges (with inclusive bounds). Numeric values in a list of strings are -considered strings. It is not possible to mix strings with integer ranges. - -For example: - -\begin{lstlisting} -[cylc] - [[parameters]] - # parameters: "ship", "buoy", "plane" - # default task suffixes: _ship, _buoy, _plane - obs = ship, buoy, plane - - # parameters: 1, 2, 3, 4, 5 - # default task suffixes: _run1, _run2, _run3, _run4, _run5 - run = 1..5 - - # parameters: 1, 3, 5, 7, 9 - # default task suffixes: _idx1, _idx3, _idx5, _idx7, _idx9 - idx = 1..9..2 - - # parameters: -11, -1, 9 - # default task suffixes: _idx-11, _idx-01, _idx+09 - idx = -11..9..10 - - # parameters: 1, 3, 5, 10, 11, 12, 13 - # default task suffixes: _i01, _i03, _i05, _i10, _i11, _i12, _i13 - i = 1..5..2, 10, 11..13 - - # parameters: "0", "1", "e", "pi", "i" - # default task suffixes: _0, _1, _e, _pi, _i - item = 0, 1, e, pi, i - - # ERROR: mix strings with int range - p = one, two, 3..5 -\end{lstlisting} -Then angle brackets denote use of these parameters throughout the suite -configuration. For the values above, this parameterized name: -\begin{lstlisting} - model # for run = 1..2 -\end{lstlisting} -expands to these concrete task names: -\begin{lstlisting} - model_run1, model_run2 -\end{lstlisting} -and this parameterized name: -\begin{lstlisting} - proc # for obs = ship, buoy, plane -\end{lstlisting} -expands to these concrete task names: -\begin{lstlisting} - proc_ship, proc_buoy, proc_plane -\end{lstlisting} -By default, to avoid any ambiguity, the parameter name appears in the expanded -task names for integer values, but not for string values. For example, -\lstinline=model_run1= for \lstinline@run = 1@, but \lstinline=proc_ship= for -\lstinline@obs = ship@. However, the default expansion templates can be -overridden if need be: -\begin{lstlisting} -[cylc] - [[parameters]] - obs = ship, buoy, plane - run = 1..5 - [[parameter templates]] - run = -R%(run)s # Make foo expand to foo-R1 etc. -\end{lstlisting} -(See~\ref{RefParameterTemplates} for more on the string template syntax.) - -Any number of parameters can be used at once. This parameterization: -\begin{lstlisting} - model # for run = 1..2 and obs = ship, buoy, plane -\end{lstlisting} -expands to these tasks names: -\begin{lstlisting} - model_run1_ship, model_run1_buoy, model_run1_plane, - model_run2_ship, model_run2_buoy, model_run2_plane -\end{lstlisting} - -Here's a simple but complete example suite: -\begin{lstlisting} -[cylc] - [[parameters]] - run = 1..2 -[scheduling] - [[dependencies]] - graph = "prep => model" -[runtime] - [[model]] - # ... -\end{lstlisting} -The result, post parameter expansion, is this: -\begin{lstlisting} -[scheduling] - [[dependencies]] - graph = "prep => model_run1 & model_run2" -[runtime] - [[model_run1]] - # ... - [[model_run2]] - # ... -\end{lstlisting} - -Here's a more complex graph using two parameters (\lstinline@[runtime]@ omitted): -\begin{lstlisting} -[cylc] - [[parameters]] - run = 1..2 - mem = cat, dog -[scheduling] - [[dependencies]] - graph = """prep => init => model => - post => wrap => done""" -\end{lstlisting} -%which expands to: -%\begin{lstlisting} -%[scheduling] -% [[dependencies]] -% graph = """ -% prep => init_run1 => model_run1_cat => post_run1_cat => wrap_run1 => done -% init_run1 => model_run1_dog => post_run2_dog => wrap_run1 -% prep => init_run2 => model_run2_cat => post_run2_cat => wrap_run2 => done -% init_run2 => model_run2_dog => post_run2_dog => wrap_run2""" -%\end{lstlisting} -Figure~\ref{fig-params-1} shows the result as visualized by \lstinline=cylc graph=. -\begin{figure} - \begin{center} - \includegraphics[width=10cm]{graphics/png/orig/params1.png} - \end{center} - \caption[Parameter expansion example.]{\scriptsize - Parameter expansion example.} - \label{fig-params-1} -\end{figure} - -\paragraph{Zero-Padded Integer Values} - -Integer parameter values are given a default template for generating task -suffixes that are zero-padded according to the longest size of their values. -For example, the default template for \lstinline@p = 9..10@ would be -\lstinline@_p%(p)02d@, so that \lstinline@foo

@ would become -\lstinline@foo_p09, foo_p10@. -If negative values are present in the parameter list, the -default template will include the sign. -For example, the default template for \lstinline@p = -1..1@ would be -\lstinline@_p%(p)+02d@, so that \lstinline@foo

@ would become -\lstinline@foo_p-1, foo_p+0, foo_p+1@. - -To get thicker padding and/or alternate suffixes, use a template. E.g.: - -\begin{lstlisting} -[cylc] - [[parameters]] - i = 1..9 - p = 3..14 - [[parameter templates]] - i = _i%(i)02d # suffixes = _i01, _i02, ..., _i09 - # A double-percent gives a literal percent character - p = %%p%(p)03d # suffixes = %p003, %p004, ..., %p013, %p014 -\end{lstlisting} - -\paragraph{Parameters as Full Task Names} - -Parameter values can be used as full task names, but the default template -should be overridden to remove the initial underscore. -For example: - -\begin{lstlisting} -[cylc] - [[parameters]] - i = 1..4 - obs = ship, buoy, plane - [[parameter templates]] - i = i%(i)d # task name must begin with an alphabet - obs = %(obs)s -[scheduling] - [[dependencies]] - graph = """ -foo => # foo => i1 & i2 & i3 & i4 - => bar # ship & buoy & plane => bar -""" -\end{lstlisting} - -\subsubsection{Passing Parameter Values To Tasks} - -Parameter values are passed as environment variables to tasks generated by -parameter expansion. For example, if we have: - -\begin{lstlisting} -[cylc] - [[parameters]] - obs = ship, buoy, plane - run = 1..5 -[scheduling] - [[dependencies]] - graph = model -\end{lstlisting} - -Then task \lstinline=model_run2_ship= would get the following standard -environment variables: - -\begin{lstlisting} -# In a job script of an instance of the "model_run2_ship" task: -export CYLC_TASK_PARAM_run="2" -export CYLC_TASK_PARAM_obs="ship" -\end{lstlisting} - -These variables allow tasks to determine which member of a parameterized -group they are, and so to vary their behaviour accordingly. - -You can also define custom variables and string templates for parameter value -substitution. For example, if we add this to the above configuration: - -\begin{lstlisting} -[runtime] - [[model]] - [[[parameter environment templates]]] - MYNAME = %(obs)sy-mc%(obs)sface - MYFILE = /path/to/run%(run)03d/%(obs)s -\end{lstlisting} - -Then task \lstinline=model_run2_ship= would get the following custom -environment variables: - -\begin{lstlisting} -# In a job script of an instance of the "model_run2_ship" task: -export MYNAME=shipy-mcshipface -export MYFILE=/path/to/run002/ship -\end{lstlisting} - -\subsubsection{Selecting Specific Parameter Values} - -Specific parameter values can be singled out in the graph and under -\lstinline=[runtime]= with the notation \lstinline@@ (for example). -Here's how to make a special task trigger off just the first of a -set of model runs: -\begin{lstlisting} -[cylc] - [[parameters]] - run = 1..5 -[scheduling] - [[dependencies]] - graph = """model => post_proc # general case - model => check_first_run # special case""" -[runtime] - [[model]] - # config for all "model" runs... - [[model - # special config (if any) for the first model run... - #... -\end{lstlisting} - -\subsubsection{Selecting Partial Parameter Ranges} - -The parameter notation does not currently support partial range selection such -as \lstinline@foo@, but you can achieve the same result by defining a -second parameter that covers the partial range and giving it the same expansion -template as the full-range parameter. For example: - -\begin{lstlisting} -[cylc] - [[parameters]] - run = 1..10 # 1, 2, ..., 10 - runx = 1..3 # 1, 2, 3 - [[parameter templates]] - run = _R%(run)02d # _R01, _R02, ..., _R10 - runx = _R%(runx)02d # _R01, _R02, _R03 -[scheduling] - [[dependencies]] - graph = """model => post - model => checkx""" -[runtime] - [[model]] - # ... - #... -\end{lstlisting} - - -\subsubsection{Parameter Offsets In The Graph} - -A negative offset notation \lstinline@@ is interpreted as the previous -value in the ordered list of parameter values, while a positive offset is -interpreted as the next value. For example, to split a model run into multiple -steps with each step depending on the previous one, either of these graphs: -\begin{lstlisting} - graph = "model => model" # for run = 1, 2, 3 - graph = "model => model" # for run = 1, 2, 3 -\end{lstlisting} -expands to: -\begin{lstlisting} - graph = """model_run1 => model_run2 - model_run2 => model_run3""" -# or equivalently: - graph = "model_run1 => model_run2 => model_run3" -\end{lstlisting} -And this graph: -\begin{lstlisting} - graph = "proc => proc" # for size = small, big, huge -\end{lstlisting} -expands to: -\begin{lstlisting} - graph = """proc_small => proc_big - proc_big => proc_huge""" -# or equivalently: - graph = "proc_small => proc_big => proc_huge" -\end{lstlisting} - -However, a quirk in the current system means that you should avoid mixing -conditional logic in these statements. For example, the following will do the -unexpected: - -\begin{lstlisting} - graph = foo & baz => foo # for m = cat, dog -\end{lstlisting} -currently expands to: -\begin{lstlisting} - graph = foo_cat & baz => foo_dog -# when users may expect it to be: -# graph = foo_cat => foo_dog -# graph = baz => foo_cat & foo_dog -\end{lstlisting} - -For the time being, writing out the logic explicitly will give you the correct -graph. - -\begin{lstlisting} - graph = """foo => foo # for m = cat, dog - baz => foo""" -\end{lstlisting} - -\subsubsection{Task Families And Parameterization} - -Task family members can be generated by parameter expansion: -\begin{lstlisting} -[runtime] - [[FAM]] - [[member]] - inherit = FAM -# Result: family FAM contains member_r1, member_r2, etc. -\end{lstlisting} - -Family names can be parameterized too, just like task names: -\begin{lstlisting} -[runtime] - [[RUN]] - [[model]] - inherit = RUN - [[post_proc]] - inherit = RUN -# Result: family RUN_r1 contains model_r1 and post_proc_r1, -# family RUN_r2 contains model_r2 and post_proc_r1, etc. -\end{lstlisting} - -As described in Section~\ref{FamilyTriggers} family names can be used to -trigger all members at once: -\begin{lstlisting} - graph = "foo => FAMILY" -\end{lstlisting} -or to trigger off all members: -\begin{lstlisting} - graph = "FAMILY:succeed-all => bar" -\end{lstlisting} -or to trigger off any members: -\begin{lstlisting} - graph = "FAMILY:succeed-any => bar" -\end{lstlisting} - -If the members of \lstinline=FAMILY= were generated with parameters, you can -also trigger them all at once with parameter notation: -\begin{lstlisting} - graph = "foo => member" -\end{lstlisting} -Similarly, to trigger off all members: -\begin{lstlisting} - graph = "member => bar" - # (member:fail etc., for other trigger types) -\end{lstlisting} - -Family names are still needed in the graph, however, to succinctly express -``succeed-any'' triggering semantics, and all-to-all or any-to-all triggering: -\begin{lstlisting} - graph = "FAM1:succeed-any => FAM2" -\end{lstlisting} -(Direct all-to-all and any-to-all family triggering is not recommended for -efficiency reasons though - see Section~\ref{EfficientInterFamilyTriggering}). - -For family {\em member-to-member} triggering use parameterized members. -For example, if family \lstinline=OBS_GET= has members \lstinline=get= and -family \lstinline=OBS_PROC= has members \lstinline=proc= then this graph: -\begin{lstlisting} - graph = "get => proc" # for obs = ship, buoy, plane -\end{lstlisting} -expands to: -\begin{lstlisting} - get_ship => proc_ship - get_buoy => proc_buoy - get_plane => proc_plane -\end{lstlisting} - -\subsubsection{Parameterized Cycling} -\label{Parameterized Cycling} - -Two ways of constructing cycling systems are described and contrasted in -Section~\ref{Workflows For Cycling Systems}. For most purposes use of a proper -{\em cycling workflow} is recommended, wherein cylc incrementally generates the -date-time sequence and extends the workflow, potentially indefinitely, at run -time. For smaller systems of finite duration, however, parameter expansion -can be used to generate a sequence of pre-defined tasks as a proxy for cycling. - -Here's a cycling workflow of two-monthly model runs for one year, -with previous-instance model dependence (e.g.\ for model restart files): -\lstset{language=suiterc} -\begin{lstlisting} -[scheduling] - initial cycle point = 2020-01 - final cycle point = 2020-12 - [[dependencies]] - [[[R1]]] # Run once, at the initial point. - graph = "prep => model" - [[[P2M]]] # Run at 2-month intervals between the initial and final points. - graph = "model[-P2M] => model => post_proc & archive" -[runtime] - [[model]] - script = "run-model $CYLC_TASK_CYCLE_POINT" -\end{lstlisting} - -And here's how to do the same thing with parameterized tasks: -\lstset{language=suiterc} -\begin{lstlisting} -[cylc] - [[parameters]] - chunk = 1..6 -[scheduling] - [[dependencies]] - graph = """prep => model - model => model => - post_proc & archive""" -[runtime] - [[model]] - script = """ -# Compute start date from chunk index and interval, then run the model. -INITIAL_POINT=2020-01 -INTERVAL_MONTHS=2 -OFFSET_MONTHS=(( (CYLC_TASK_PARAM_chunk - 1)*INTERVAL_MONTHS )) -OFFSET=P${OFFSET_MONTHS}M # e.g. P4M for chunk=3 -run-model $(cylc cyclepoint --offset=$OFFSET $INITIAL_POINT)""" -\end{lstlisting} - -The two workflows are shown together in Figure~\ref{fig-eg2}. They both achieve -the same result, and both can include special tasks at the start, end, or -anywhere in between. But as noted earlier the parameterized version has -several disadvantages: it must be finite in extent and not too large; the -date-time arithmetic has to be done by the user; and the full extent of the -workflow will be visible at all times as the suite runs. - -\begin{figure} - \begin{center} - \includegraphics[width=16cm]{graphics/png/orig/eg2-static.png} - \end{center} - \begin{center} - \includegraphics[width=10cm]{graphics/png/orig/eg2-dynamic.png} - \end{center} - \caption[Parameterized (top) and cycling (bottom) versions of the same - workflow.]{\scriptsize parameterized and cycling versions of the same - workflow. The first three cycle points are shown in the - cycling case. The parameterized case does not have ``cycle - points''.} - \label{fig-eg2} -\end{figure} - -Here's a yearly-cycling suite with four parameterized chunks in each cycle -point: -\begin{lstlisting} -[cylc] - [[parameters]] - chunk = 1..4 -[scheduling] - initial cycle point = 2020-01 - [[dependencies]] - [[[P1Y]]] - graph = """model => model - model[-P1Y] => model""" -\end{lstlisting} -Note the inter-cycle trigger that connects the first chunk in each cycle point -to the last chunk in the previous cycle point. Of course it would be simpler -to just use 3-monthly cycling: -\begin{lstlisting} -[scheduling] - initial cycle point = 2020-01 - [[dependencies]] - [[[P3M]]] - graph = "model[-P3M] => model" -\end{lstlisting} - -Here's a possible valid use-case for mixed cycling: consider a portable -date-time cycling workflow of model jobs that can each take too long to run on -some supported platforms. This could be handled without changing the cycling -structure of the suite by splitting the run (at each cycle point) into a -variable number of shorter steps, using more steps on less powerful hosts. - -\paragraph{Cycle Point And Parameter Offsets At Start-Up} - -In cycling workflows cylc ignores anything earlier than the suite initial -cycle point. So this graph: -\begin{lstlisting} - graph = "model[-P1D] => model" -\end{lstlisting} -simplifies at the initial cycle point to this: -\begin{lstlisting} - graph = "model" -\end{lstlisting} - -Similarly, parameter offsets are ignored if they extend beyond the start of the -parameter value list. So this graph: -\begin{lstlisting} - graph = "model => model" -\end{lstlisting} -simplifies for \lstinline@chunk=1@ to this: -\begin{lstlisting} - graph = "model_chunk1" -\end{lstlisting} - -Note however that the initial cut-off applies to every parameter list, but only -to cycle point sequences that start at the suite initial cycle point. Therefore -it may be somewhat easier to use parameterized cycling if you need multiple -date-time sequences {\em with different start points} in the same suite. We -plan to allow this sequence-start simplification for any date-time sequence in -the future, not just at the suite initial point, but it needs to be optional -because delayed-start cycling tasks sometimes need to trigger off earlier -cycling tasks. - -\subsection{Jinja2} -\label{Jinja2} - -{\em This section needs to be revised - the Parameterized Task feature - introduced in cylc-6.11.0 (see Section~\ref{Parameterized Tasks}) provides - a cleaner way to auto-generate tasks without coding messy Jinja2 loops.} - -Cylc has built in support for the Jinja2 template processor in suite -configurations. Jinja2 variables, mathematical expressions, loop control -structures, conditional logic, etc., are automatically processed to -generate the final suite configuration seen by cylc. - -The need for Jinja2 processing must be declared with a hash-bang -comment as the first line of the suite.rc file: -\begin{lstlisting} -#!jinja2 -# ... -\end{lstlisting} - -Potential uses for this include automatic generation of repeated groups -of similar tasks and dependencies, and inclusion or exclusion of entire -suite sections according to the value of a single flag. Consider a -large complicated operational suite and several related parallel test -suites with slightly different task content and structure (the parallel -suites, for instance, might take certain large input files from the -operation or the archive rather than downloading them again) - these can -now be maintained as a single master suite configuration that reconfigures -itself according to the value of a flag variable indicating the intended use. - -Template processing is the first thing done on parsing a suite -configuration so Jinja2 expressions can appear anywhere in the file (inside -strings and namespace headings, for example). - -Jinja2 is well documented at \url{http://jinja.pocoo.org/docs}, so here -we just provide an example suite that uses it. The meaning of the -embedded Jinja2 code should be reasonably self-evident to anyone familiar -with standard programming techniques. - -\begin{figure} - \begin{center} - \includegraphics[width=10cm]{graphics/png/orig/jinja2-ensemble-graph.png} - \end{center} - \caption[The Jinja2 ensemble example suite graph.]{\scriptsize - The Jinja2 ensemble example suite graph.} - \label{fig-jinja2-ensemble} -\end{figure} - -The \lstinline=jinja2.ensemble= example, graphed in -Figure~\ref{fig-jinja2-ensemble}, shows an ensemble of similar tasks -generated using Jinja2: -\lstset{language=suiterc} -\begin{lstlisting} -#!jinja2 -{% set N_MEMBERS = 5 %} -[scheduling] - [[dependencies]] - graph = """{# generate ensemble dependencies #} - {% for I in range( 0, N_MEMBERS ) %} - foo => mem_{{ I }} => post_{{ I }} => bar - {% endfor %}""" -\end{lstlisting} -Here is the generated suite configuration, after Jinja2 processing: -\lstset{language=suiterc} -\begin{lstlisting} -#!jinja2 -[scheduling] - [[dependencies]] - graph = """ - foo => mem_0 => post_0 => bar - foo => mem_1 => post_1 => bar - foo => mem_2 => post_2 => bar - foo => mem_3 => post_3 => bar - foo => mem_4 => post_4 => bar - """ -\end{lstlisting} - -And finally, the \lstinline=jinja2.cities= example uses variables, -includes or excludes special cleanup tasks according to the value of a -logical flag, and it automatically generates all dependencies and family -relationships for a group of tasks that is repeated for each city in the -suite. To add a new city and associated tasks and dependencies simply -add the city name to list at the top of the file. The suite is graphed, -with the New York City task family expanded, in -Figure~\ref{fig-jinja2-cities}. - -\lstset{language=suiterc} -\lstinputlisting{../../../etc/examples/jinja2/cities/suite.rc} -\lstset{language=transcript} - -\begin{figure} - \begin{center} - \includegraphics[width=16cm]{graphics/png/orig/jinja2-suite-graph.png} - \end{center} - \caption[Jinja2 cities example suite graph.]{\scriptsize - The Jinja2 cities example suite graph, with the - New York City task family expanded.} - \label{fig-jinja2-cities} -\end{figure} - -\subsubsection{Accessing Environment Variables With Jinja2} - -This functionality is not provided by Jinja2 by default, but cylc -automatically imports the user environment to template's global namespace -(see~\ref{CustomJinja2Filters}) in a dictionary structure called -{\em environ}. A usage example: -\begin{lstlisting} -#!Jinja2 -#... -[runtime] - [[root]] - [[[environment]]] - SUITE_OWNER_HOME_DIR_ON_SUITE_HOST = {{environ['HOME']}} -\end{lstlisting} -This example is emphasizes that {\em the environment is read on the suite -host at the time the suite configuration is parsed} - it is not, for -instance, read at task run time on the task host. - -\subsubsection{Custom Jinja2 Filters, Tests and Globals} -\label{CustomJinja2Filters} - -Jinja2 has three different namespaces used to separate ``globals'', -``filters'' and ``tests''. Globals are template-wide accessible variables -and functions. Cylc extends this namespace with ``environ'' dictionary and -``raise'' and ``assert'' functions for raising exceptions -(see~\ref{Jinja2RaisingExceptions}). - -Filters can be used to modify variable values and are applied using pipe -notation. For example, the built-in \lstinline=trim= filter strips leading -and trailing white space from a string: -\lstset{language=suiterc} -\begin{lstlisting} -{% set MyString = " dog " %} -{{ MyString | trim() }} # "dog" -\end{lstlisting} - -Additionally, variable values can be tested using ``is'' keyword followed by -the name of the test, e.g.\ \lstinline=VARIABLE is defined=. -See official Jinja2 documentation for available built-in globals, filters -and tests. - -Cylc also supports custom Jinja2 globals, filters and tests. A custom global, -filter or test is a single Python function in a source file with the same name -as the function (plus ``.py'' extension) and stored in one of the following -locations: -\begin{myitemize} - \item \lstinline=/lib/Jinja2[namespace]/= - \item \lstinline=[suite configuration directory]/Jinja2[namespace]/= - \item \lstinline=$HOME/.cylc/Jinja2[namespace]/= -\end{myitemize} -where \lstinline=[namespace]/= is one of \lstinline=Globals/=, -\lstinline=Filters/= or \lstinline=Tests/=. - -In the argument list of filter or test function, the first argument is -the variable value to be ``filtered'' or ``tested'', respectively, and -subsequent arguments can be whatever else is needed. Currently there are three -custom filters: - - -\paragraph{pad} - -The ``pad'' filter is for padding string values to some -constant length with a fill character - useful for generating task names -and related values in ensemble suites: - -\lstset{language=suiterc} -\begin{lstlisting} -{% for i in range(0,100) %} # 0, 1, ..., 99 - {% set j = i | pad(2,'0') %} - A_{{j}} # A_00, A_01, ..., A_99 -{% endfor %} -\end{lstlisting} - -\paragraph{strftime} - -The ``strftime'' filter can be used to format ISO8601 date-time strings using -an strftime string. - -\lstset{language=suiterc} -\begin{lstlisting} -{% set START_CYCLE = '10661004T08+01' %} -{{ START_CYCLE | strftime('%H') }} # 00 -\end{lstlisting} - -Examples: - -\begin{myitemize} - \item \lstinline={{START_CYCLE | strftime('%Y')}}= - 1066 - \item \lstinline={{START_CYCLE | strftime('%m')}}= - 10 - \item \lstinline={{START_CYCLE | strftime('%d')}}= - 14 - \item \lstinline={{START_CYCLE | strftime('%H:%M:%S %z')}}= - 08:00:00 +01 -\end{myitemize} - -It is also possible to parse non-standard date-time strings by passing a -strptime string as the second argument. - -Examples: - -\begin{myitemize} - \item \lstinline={{'12,30,2000' | strftime('%m', '%m,%d,%Y')}}= - 12 - \item \lstinline={{'1066/10/14 08:00:00' | strftime('%Y%m%dT%H', '%Y/%m/%d %H:%M:%S')}}= - 10661014T08 -\end{myitemize} - -\paragraph{duration\_as} - -The ``duration\_as'' filter can be used to format ISO8601 duration -strings as a floating-point number of several different units. Units -for the conversion can be specified in a case-insensitive short or long -form: -\begin{myitemize} - \item Seconds - ``s'' or ``seconds'' - \item Minutes - ``m'' or ``minutes'' - \item Hours - ``h`` or ``hours'' - \item Days - ``d'' or ``days'' - \item Weeks - ``w'' or ``weeks'' -\end{myitemize} - -Within the suite, this becomes - -\lstset{language=suiterc} -\begin{lstlisting} -{% set CYCLE_INTERVAL = 'PT1D' %} -{{ CYCLE_INTERVAL | duration_as('h') }} # 24.0 -{% set CYCLE_SUBINTERVAL = 'PT30M' %} -{{ CYCLE_SUBINTERVAL | duration_as('hours') }} # 0.5 -{% set CYCLE_INTERVAL = 'PT1D' %} -{{ CYCLE_INTERVAL | duration_as('s') }} # 86400.0 -{% set CYCLE_SUBINTERVAL = 'PT30M' %} -{{ CYCLE_SUBINTERVAL | duration_as('seconds') }} # 1800.0 -\end{lstlisting} - -While the filtered value is a floating-point number, it is often required to -supply an integer to suite entities (e.g.\ environment variables) that require -it. This is accomplished by chaining filters: - -\begin{myitemize} - \item \lstinline={{CYCLE_INTERVAL | duration_as('h') | int}}= - 24 - \item \lstinline={{CYCLE_SUBINTERVAL | duration_as('h') | int}}= - 0 - \item \lstinline={{CYCLE_INTERVAL | duration_as('s') | int}}= - 86400 - \item \lstinline={{CYCLE_SUBINTERVAL | duration_as('s') | int}}= - 1800 -\end{myitemize} - - -\subsubsection{Associative Arrays In Jinja2} - -Associative arrays ({\em dicts} in Python) can be very useful. -Here's an example, from \\* -\lstinline=/etc/examples/jinja2/dict=: - -\lstset{language=suiterc} -\begin{lstlisting} -#!Jinja2 -{% set obs_types = ['airs', 'iasi'] %} -{% set resource = { 'airs':'ncpus=9', 'iasi':'ncpus=20' } %} - -[scheduling] - [[dependencies]] - graph = OBS -[runtime] - [[OBS]] - [[[job]]] - batch system = pbs - {% for i in obs_types %} - [[ {{i}} ]] - inherit = OBS - [[[directives]]] - -I = {{ resource[i] }} - {% endfor %} - \end{lstlisting} - -Here's the result: -\lstset{language=transcript} -\begin{lstlisting} -$ cylc get-suite-config -i [runtime][airs]directives SUITE --I = ncpus=9 -\end{lstlisting} - -\subsubsection{Jinja2 Default Values And Template Inputs} - -The values of Jinja2 variables can be passed in from the cylc command -line rather than hardwired in the suite configuration. -Here's an example, from \\* -\lstinline=/etc/examples/jinja2/defaults=: - -\lstset{language=suiterc} -\begin{lstlisting} -#!Jinja2 - -[meta] - - title = "Jinja2 example: use of defaults and external input" - - description = """ -The template variable FIRST_TASK must be given on the cylc command line -using --set or --set-file=FILE; two other variables, LAST_TASK and -N_MEMBERS can be set similarly, but if not they have default values.""" - -{% set LAST_TASK = LAST_TASK | default( 'baz' ) %} -{% set N_MEMBERS = N_MEMBERS | default( 3 ) | int %} - -{# input of FIRST_TASK is required - no default #} - -[scheduling] - initial cycle point = 20100808T00 - final cycle point = 20100816T00 - [[dependencies]] - [[[0]]] - graph = """{{ FIRST_TASK }} => ENS - ENS:succeed-all => {{ LAST_TASK }}""" -[runtime] - [[ENS]] -{% for I in range( 0, N_MEMBERS ) %} - [[ mem_{{ I }} ]] - inherit = ENS -{% endfor %} -\end{lstlisting} - -Here's the result: - -\lstset{language=transcript} -\begin{lstlisting} -$ cylc list SUITE -Jinja2 Template Error -'FIRST_TASK' is undefined -cylc-list foo failed: 1 - -$ cylc list --set FIRST_TASK=bob foo -bob -baz -mem_2 -mem_1 -mem_0 - -$ cylc list --set FIRST_TASK=bob --set LAST_TASK=alice foo -bob -alice -mem_2 -mem_1 -mem_0 - -$ cylc list --set FIRST_TASK=bob --set N_MEMBERS=10 foo -mem_9 -mem_8 -mem_7 -mem_6 -mem_5 -mem_4 -mem_3 -mem_2 -mem_1 -mem_0 -baz -bob -\end{lstlisting} - -\lstset{language=suiterc} -Note also that -\lstinline@cylc view --set FIRST_TASK=bob --jinja2 SUITE@ will show the -suite with the Jinja2 variables as set. - -{\em Note:} suites started with template variables set on the command -line will {\em restart} with the same settings. However, you can set -them again on the \lstinline=cylc restart= command line if they need to -be overridden. - -\subsubsection{Jinja2 Variable Scope} - -Jinja2 variable scoping rules may be surprising. Variables set inside a -{\em for loop} block, for instance, are not accessible outside of the block, -so the following will print \lstinline=# FOO is 0=, not \lstinline=# FOO is 9=: - -\lstset{language=suiterc} -\begin{lstlisting} -{% set FOO = false %} -{% for item in items %} - {% if item.check_something() %} - {% set FOO = true %} - {% endif %} -{% endfor %} -# FOO is {{FOO}} -\end{lstlisting} - -Jinja2 documentation suggests using alternative constructs like the loop else -block or the special \lstinline=loop= variable. More complex use cases can be -handled using \lstinline=namespace= objects which allow propagating of changes -across scopes: - -\lstset{language=suiterc} -\begin{lstlisting} -{% set ns = namespace(foo=false) %} -{% for item in items %} - {% if item.check_something() %} - {% set ns.foo = true %} - {% endif %} -{% endfor %} -# FOO is {{ns.foo}} -\end{lstlisting} - -For detail, see: -\href{http://jinja.pocoo.org/docs/2.10/templates/#assignments}{Jinja2 Template Designer Documentation > Assignments} - -\subsubsection{Raising Exceptions} -\label{Jinja2RaisingExceptions} - -Cylc provides two functions for raising exceptions using Jinja2. These -exceptions are raised when the suite.rc file is loaded and will prevent a suite -from running. - -Note: These functions must be contained within \lstinline={{= Jinja2 -blocks as opposed to \lstinline={%= blocks. - -\paragraph{Raise} - -The ``raise'' function will result in an error containing the provided text. - -\lstset{language=suiterc} -\begin{lstlisting} -{% if not VARIABLE is defined %} - {{ raise('VARIABLE must be defined for this suite.') }} -{% endif %} -\end{lstlisting} - -\paragraph{Assert} - -The ``assert'' function will raise an exception containing the text provided in -the second argument providing that the first argument evaluates as False. The -following example is equivalent to the ``raise'' example above. - -\lstset{language=suiterc} -\begin{lstlisting} -{{ assert(VARIABLE is defined, 'VARIABLE must be defined for this suite.') }} -\end{lstlisting} - -\subsubsection{Importing additional Python modules} - -Jinja2 allows to gather variable and macro definitions in a separate template -that can be imported into (and thus shared among) other templates. - -\lstset{language=suiterc} -\begin{lstlisting} -{% import "suite-utils.rc" as utils %} -{% from "suite-utils.rc" import VARIABLE as ALIAS %} -{{ utils.VARIABLE is equalto(ALIAS)) }} -\end{lstlisting} - -Cylc extends this functionality to allow import of arbitrary Python modules. - -\lstset{language=suiterc} -\begin{lstlisting} -{% from "itertools" import product %} -[runtime] -{% for group, member in product(['a', 'b'], [0, 1, 2]) %} - [[{{group}}_{{member}}]] -{% endfor %} -\end{lstlisting} - -For better clarity and disambiguation Python modules can be prefixed with -\lstinline=__python__=: - -\lstset{language=suiterc} -\begin{lstlisting} -{% from "__python__.itertools" import product %} -\end{lstlisting} - -\subsection{EmPy} -\label{EmPy} - -In addition to Jinja2, Cylc supports EmPy template processor in suite -configurations. Similarly to Jinja2, EmPy provides variables, mathematical -expressions, loop control structures, conditional logic, etc., that are expanded -to generate the final suite configuration seen by Cylc. See -\href{http://www.alcyone.com/software/empy}{EmPy documentation} for more details -on its templating features and how to use them. Please note that EmPy is not -bundled with Cylc and must be installed separately. It should be available to -Python through standard \lstinline[language=Python]=import em=. Please also -note that there is another Python package called ``em'' that provides -a conflicting module of the same name. You can run -\lstinline=cylc check-software= command to check your installation. - -The need for EmPy processing must be declared with a hash-bang comment as -the first line of the suite.rc file: -\begin{lstlisting} -#!empy -# ... -\end{lstlisting} - -An example suite \lstinline=empy.cities= demonstrating its use is shown below. -It is a translation of \lstinline=jinja2.cities= example from -Section~\ref{Jinja2} and can be directly compared against it. - -\lstset{language=suiterc} -\lstinputlisting{../../../etc/examples/empy/cities/suite.rc} -\lstset{language=transcript} - -For basic usage the difference between Jinja2 and EmPy amounts to a different -markup syntax with little else to distinguish them. EmPy might be preferable, -however, in cases where more complicated processing logic have to be -implemented. - -EmPy is a system for embedding Python expressions and statements in template -text. It makes the full power of Python language and its ecosystem easily -accessible from within the template. This might be desirable for several -reasons: - -\begin{myitemize} - \item no need to learn different language and its idiosyncrasies just for - writing template logic - \item availability of lambda functions, list and dictionary comprehensions - can make template code smaller and more readable compared to Jinja2 - \item natural and straightforward integration with Python package ecosystem - \item no two-language barrier between writing template logic and processing - extensions makes it easier to refactor and maintain the template code - as its complexity grows -- inline pieces of Python code can be - gathered into subroutines and eventually into separate modules and - packages in a seamless manner -\end{myitemize} - -\subsection{Omitting Tasks At Runtime} - -It is sometimes convenient to omit certain tasks from the suite at -runtime without actually deleting their definitions from the suite. - -Defining [runtime] properties for tasks that do not appear in the suite -graph results in verbose-mode validation warnings that the tasks are -disabled. They cannot be used because the suite graph is what defines -their dependencies and valid cycle points. Nevertheless, it is legal to -leave these orphaned runtime sections in the suite configuration because it -allows you to temporarily remove tasks from the suite by simply -commenting them out of the graph. - -To omit a task from the suite at runtime but still leave it fully -defined and available for use (by insertion or \lstinline=cylc submit=) -use one or both of [scheduling][[special task]] lists, {\em include at -start-up} or {\em exclude at start-up} (documented in~\ref{IASU} -and~\ref{EASU}). Then the graph still defines the -validity of the tasks and their dependencies, but they are not actually -loaded into the suite at start-up. Other tasks that depend on the -omitted ones, if any, will have to wait on their insertion at a later -time or otherwise be triggered manually. - -Finally, with Jinja2 (\ref{Jinja2}) you can radically alter -suite structure by including or excluding tasks from the [scheduling] -and [runtime] sections according to the value of a single logical flag -defined at the top of the suite. -\subsection{Naked Dummy Tasks And Strict Validation} - -A {\em naked dummy task} appears in the suite graph but has no -explicit runtime configuration section. Such tasks automatically -inherit the default ``dummy task'' configuration from the root -namespace. This is very useful because it allows functional suites to -be mocked up quickly for test and demonstration purposes by simply -defining the graph. It is somewhat dangerous, however, because there -is no way to distinguish an intentional naked dummy task from one -generated by typographic error: misspelling a task name in the graph -results in a new naked dummy task replacing the intended task in the -affected trigger expression; and misspelling a task name in a runtime -section heading results in the intended task becoming a dummy task -itself (by divorcing it from its intended runtime config section). - -To avoid this problem any dummy task used in a real suite should not be -naked - i.e.\ it should have an explicit entry in under the runtime -section of the suite configuration, even if the section is empty. This -results in exactly the same dummy task behaviour, via implicit -inheritance from root, but it allows use of -\lstinline=cylc validate --strict= -to catch errors in task names by failing the suite if any naked dummy -tasks are detected. - -\section{Task Implementation} -\label{TaskImplementation} - -Existing scripts and executables can be used as cylc tasks without modification -so long as they return standard exit status - zero on success, non-zero -for failure - and do not spawn detaching processes internally -(see~\ref{DetachingJobs}). - -\subsection{Task Job Scripts} -\label{JobScripts} - -When the suite dameon determines that a task is ready to run it generates a -{\em job script} that embodies the task runtime configuration in the suite.rc -file, and submits it to the configured job host and batch system -(see~\ref{TaskJobSubmission}). - -Task job scripts are written to the suite's job log directory. They can be -printed with \lstinline=cylc cat-log= or generated and printed with -\lstinline=cylc jobscript=. - -\subsection{Inlined Tasks} - -Task {\em script} items can be multi-line strings of \lstinline=bash= code, so -many tasks can be entirely inlined in the suite.rc file. For anything more than -a few lines of code, however, we recommend using external shell scripts to allow -independent testing, re-use, and shell mode editing. - -\subsection{Task Messages} - -Tasks messages can be sent back to the suite server program to report completed -outputs and arbitrary messages of different severity levels. - -Some types of message - in addition to events like task failure - can -optionally trigger execution of event handlers in the suite server program -(see~\ref{EventHandling}). - -Normal severity messages are printed to \lstinline=job.out= and logged by the -suite server program: -\lstset{language=bash} -\begin{lstlisting} -cylc message -- "${CYLC_SUITE_NAME}" "${CYLC_TASK_JOB}" \ - "Hello from ${CYLC_TASK_ID}" -\end{lstlisting} - -CUSTOM severity messages are printed to \lstinline=job.out=, logged by the -suite server program, and can be used to trigger {\em custom} event handlers: -\lstset{language=bash} -\begin{lstlisting} -cylc message -- "${CYLC_SUITE_NAME}" "${CYLC_TASK_JOB}" \ - "CUSTOM:data available for ${CYLC_TASK_CYCLE_POINT}" -\end{lstlisting} -Custom severity messages and event handlers can be used to signal special -events that are neither routine information or an error condition, such as -production of a particular data file. Task output messages, used for triggering -other tasks, can also be sent with custom severity if need be. - -WARNING severity messages are printed to \lstinline=job.err=, logged by the -suite server program, and can be passed to {\em warning} event handlers: -\begin{lstlisting} -cylc message -- "${CYLC_SUITE_NAME}" "${CYLC_TASK_JOB}" \ - "WARNING:Uh-oh, something's not right here." -\end{lstlisting} - -CRITICAL severity messages are printed to \lstinline=job.err=, logged by the -suite server program, and can be passed to {\em critical} event handlers: -\begin{lstlisting} -cylc message -- "${CYLC_SUITE_NAME}" "${CYLC_TASK_JOB}" \ - "CRITICAL:ERROR occurred in process X!" -\end{lstlisting} - -\subsection{Aborting Job Scripts on Error} - -Task job scripts use \lstinline=set -x= to abort on any error, and -trap ERR, EXIT, and SIGTERM to send task failed messages back to the -suite server program before aborting. Other scripts called from job scripts -should therefore abort with standard non-zero exit status on error, to trigger -the job script error trap. - -To prevent a command that is expected to generate a non-zero exit status from -triggering the exit trap, protect it with a control statement such as: -\lstset{language=bash} -\begin{lstlisting} -if cmp FILE1 FILE2; then - : # success: do stuff -else - : # failure: do other stuff -fi -\end{lstlisting} - -Task job scripts also use \lstinline=set -u= to abort on referencing any -undefined variable (useful for picking up typos); and \lstinline=set -o pipefail= -to abort if any part of a pipe fails (by default the shell only returns the -exit status of the final command in a pipeline). - -\subsubsection{Custom Failure Messages} - -Critical events normally warrant aborting a job script rather than just sending -a message. As described just above, \lstinline=exit 1= or any failing command -not protected by the surrounding scripting will cause a job script to abort and -report failure to the suite server program, potentially triggering a -{\em failed} task event handler. - -For failures detected by the scripting you could send a critical message back -before aborting, potentially triggering a {\em critical} task event handler: -\begin{lstlisting} -if ! /bin/false; then - cylc message -- "${CYLC_SUITE_NAME}" "${CYLC_TASK_JOB}" \ - "CRITICAL:ERROR: /bin/false failed!" - exit 1 -fi -\end{lstlisting} - -To abort a job script with a custom message that can be passed to a {\em -failed} task event handler, use the built-in \lstinline=cylc__job_abort= shell -function: -\begin{lstlisting} -if ! /bin/false; then - cylc__job_abort "ERROR: /bin/false failed!" -fi -\end{lstlisting} - -\subsection{Avoid Detaching Processes} -\label{DetachingJobs} - -\lstset{language=transcript} -If a task script starts background sub-processes and does not wait on them, or -internally submits jobs to a batch scheduler and then exits immediately, the -detached processes will not be visible to cylc and the task will appear to -finish when the top-level script finishes. You will need to modify scripts -like this to make them execute all sub-processes in the foreground (or use the -shell \lstinline=wait= command to wait on them before exiting) and to prevent -job submission commands from returning before the job completes (e.g.\ -\lstinline=llsubmit -s= for Loadleveler, -\lstinline=qsub -sync yes= for Sun Grid Engine, and -\lstinline@qsub -W block=true@ for PBS). - -If this is not possible - perhaps you don't have control over the script -or can't work out how to fix it - one alternative approach is to use another -task to repeatedly poll for the results of the detached processes: -\begin{lstlisting} -[scheduling] - [[dependencies]] - graph = "model => checker => post-proc" -[runtime] - [[model]] - # Uh-oh, this script does an internal job submission to run model.exe: - script = "run-model.sh" - [[checker]] - # Fail and retry every minute (for 10 tries at the most) if model's - # job.done indicator file does not exist yet. - script = "[[ ! -f $RUN_DIR/job.done ]] && exit 1" - [[[job]]] - execution retry delays = 10 * PT1M -\end{lstlisting} - -\section{Task Job Submission and Management} -\label{TaskJobSubmission} - -For the requirements a command, script, or program, must fulfill in order -to function as a cylc task, see~\ref{TaskImplementation}. -This section explains how tasks are submitted by the suite server program when -they are ready to run, and how to define new batch system handlers. - -When a task is ready cylc generates a job script (see~\ref{JobScripts}). The -job script is submitted to run by the {\em batch system} chosen for -the task. Different tasks can use different batch systems. Like -other runtime properties, you can set a suite default batch system and -override it for specific tasks or families: -\lstset{language=suiterc} -\begin{lstlisting} -[runtime] - [[root]] # suite defaults - [[[job]]] - batch system = loadleveler - [[foo]] # just task foo - [[[job]]] - batch system = at -\end{lstlisting} - -\subsection{Supported Job Submission Methods} -\label{AvailableMethods} - -Cylc supports a number of commonly used batch systems. -See~\ref{CustomJobSubmissionMethods} for how to add new job -submission methods. - -\subsubsection{background} - -Runs task job scripts as Unix background processes. - -If an execution time limit is specified for a task, its job will be wrapped -by the \lstinline=timeout= command. - -\subsubsection{at} - -Submits task job scripts to the rudimentary Unix \lstinline=at= scheduler. The -\lstinline=atd= daemon must be running. - -If an execution time limit is specified for a task, its job will be wrapped -by the \lstinline=timeout= command. - -\subsubsection{loadleveler} - -Submits task job scripts to loadleveler by the \lstinline=llsubmit= command. -Loadleveler directives can be provided in the suite.rc file: - -\lstset{language=suiterc} -\begin{lstlisting} -[runtime] - [[my_task]] - [[[job]]] - batch system = loadleveler - execution time limit = PT10M - [[[directives]]] - foo = bar - baz = qux -\end{lstlisting} -These are written to the top of the task job script like this: -\lstset{language=bash} -\begin{lstlisting} -#!/bin/bash -# DIRECTIVES -# @ foo = bar -# @ baz = qux -# @ wall_clock_limit = 660,600 -# @ queue -\end{lstlisting} - -If restart=yes is specified as a directive for loadleveler, the job will -automatically trap SIGUSR1, which loadleveler may use to preempt the job. On -trapping SIGUSR1, the job will inform the suite that it has been vacated by -loadleveler. This will put it back to the submitted state, until it starts -running again. - -If \lstinline=execution time limit= is specified, it is used to generate the -\lstinline=wall_clock_limit= directive. The setting is assumed to be the soft -limit. The hard limit will be set by adding an extra minute to the soft limit. -Do not specify the \lstinline=wall_clock_limit= directive explicitly if -\lstinline=execution time limit= is specified. Otherwise, the execution time -limit known by the suite may be out of sync with what is submitted to the batch -system. - -\subsubsection{lsf} - -Submits task job scripts to IBM Platform LSF by the \lstinline=bsub= command. -LSF directives can be provided in the suite.rc file: - -\lstset{language=suiterc} -\begin{lstlisting} -[runtime] - [[my_task]] - [[[job]]] - batch system = lsf - execution time limit = PT10M - [[[directives]]] - -q = foo -\end{lstlisting} -These are written to the top of the task job script like this: -\lstset{language=bash} -\begin{lstlisting} -#!/bin/bash -# DIRECTIVES -#BSUB -q = foo -#BSUB -W = 10 -\end{lstlisting} - -If \lstinline=execution time limit= is specified, it is used to generate the -\lstinline=-W= directive. Do not specify the \lstinline=-W= directive -explicitly if \lstinline=execution time limit= is specified. Otherwise, the -execution time limit known by the suite may be out of sync with what is -submitted to the batch system. - -\subsubsection{pbs} - -Submits task job scripts to PBS (or Torque) by the \lstinline=qsub= command. -PBS directives can be provided in the suite.rc file: -\lstset{language=suiterc} -\begin{lstlisting} -[runtime] - [[my_task]] - [[[job]]] - batch system = pbs - execution time limit = PT1M - [[[directives]]] - -V = - -q = foo - -l nodes = 1 -\end{lstlisting} -These are written to the top of the task job script like this: -\lstset{language=bash} -\begin{lstlisting} -#!/bin/bash -# DIRECTIVES -#PBS -V -#PBS -q foo -#PBS -l nodes=1 -#PBS -l walltime=60 -\end{lstlisting} - -If \lstinline=execution time limit= is specified, it is used to generate the -\lstinline=-l walltime= directive. Do not specify the \lstinline=-l walltime= -directive explicitly if \lstinline=execution time limit= is specified. -Otherwise, the execution time limit known by the suite may be out of sync with -what is submitted to the batch system. - -\subsubsection{moab} - -Submits task job scripts to the Moab workload manager by the \lstinline=msub= -command. Moab directives can be provided in the suite.rc file; the syntax is -very similar to PBS: -\lstset{language=suiterc} -\begin{lstlisting} -[runtime] - [[my_task]] - [[[job]]] - batch system = moab - execution time limit = PT1M - [[[directives]]] - -V = - -q = foo - -l nodes = 1 -\end{lstlisting} -These are written to the top of the task job script like this: -\lstset{language=bash} -\begin{lstlisting} -#!/bin/bash -# DIRECTIVES -#PBS -V -#PBS -q foo -#PBS -l nodes=1 -#PBS -l walltime=60 -\end{lstlisting} -(Moab understands \lstinline=#PBS= directives). - -If \lstinline=execution time limit= is specified, it is used to generate the -\lstinline=-l walltime= directive. Do not specify the \lstinline=-l walltime= -directive explicitly if \lstinline=execution time limit= is specified. -Otherwise, the execution time limit known by the suite may be out of sync with -what is submitted to the batch system. - -\subsubsection{sge} - -Submits task job scripts to Sun/Oracle Grid Engine by the \lstinline=qsub= -command. SGE directives can be provided in the suite.rc file: -\lstset{language=suiterc} -\begin{lstlisting} -[runtime] - [[my_task]] - [[[job]]] - batch system = sge - execution time limit = P1D - [[[directives]]] - -cwd = - -q = foo - -l h_data = 1024M - -l h_rt = 24:00:00 -\end{lstlisting} -These are written to the top of the task job script like this: -\lstset{language=bash} -\begin{lstlisting} -#!/bin/bash -# DIRECTIVES -#$ -cwd -#$ -q foo -#$ -l h_data=1024M -#$ -l h_rt=24:00:00 -\end{lstlisting} - -If \lstinline=execution time limit= is specified, it is used to generate the -\lstinline=-l h_rt= directive. Do not specify the \lstinline=-l h_rt= -directive explicitly if \lstinline=execution time limit= is specified. -Otherwise, the execution time limit known by the suite may be out of sync with -what is submitted to the batch system. - -\subsubsection{slurm} - -Submits task job scripts to Simple Linux Utility for Resource Management by the -\lstinline=sbatch= command. SLURM directives can be provided in the suite.rc -file (note that since not all SLURM commands have a short form, cylc requires -the long form directives): -\lstset{language=suiterc} -\begin{lstlisting} -[runtime] - [[my_task]] - [[[job]]] - batch system = slurm - execution time limit = PT1H - [[[directives]]] - --nodes = 5 - --account = QXZ5W2 -\end{lstlisting} -These are written to the top of the task job script like this: -\lstset{language=bash} -\begin{lstlisting} -#!/bin/bash -#SBATCH --nodes=5 -#SBATCH --time=60:00 -#SBATCH --account=QXZ5W2 -\end{lstlisting} - -If \lstinline=execution time limit= is specified, it is used to generate the -\lstinline=--time= directive. Do not specify the \lstinline=--time= -directive explicitly if \lstinline=execution time limit= is specified. -Otherwise, the execution time limit known by the suite may be out of sync with -what is submitted to the batch system. - -\subsubsection{Default Directives Provided} - -For batch systems that use job file directives (PBS, Loadleveler, -etc.) default directives are provided to set the job name, stdout and stderr -file paths, and the execution time limit (if specified). - -Cylc constructs the job name string using a combination of the task ID and the -suite name. PBS fails a job submit if the job name in \lstinline=-N name= is -too long. For version 12 or below, this is 15 characters. For version 13, this -is 236 characters. The default setting will truncate the job name string to 15 -characters. If you have PBS 13 at your site, you should modify your site's -global configuration file to allow the job name to be longer. (See also -Section~\ref{JobNameLengthMaximum}.) For example: - -\begin{lstlisting} -[hosts] - [[myhpc*]] - [[[batch systems]]] - [[[[pbs]]]] - # PBS 13 - job name length maximum = 236 -\end{lstlisting} - -\subsubsection{Directives Section Quirks (PBS, SGE, ...) } - -To specify an option with no argument, such as \lstinline=-V= in PBS or -\lstinline=-cwd= in SGE you must give a null string as the directive value in -the suite.rc file. - -The left hand side of a setting (i.e.\ the string before the first equal sign) -must be unique. To specify multiple values using an option such as -\lstinline=-l= option in PBS, SGE, etc., either specify all items in a single -line: - -\begin{lstlisting} --l=select=28:ncpus=36:mpiprocs=18:ompthreads=2:walltime=12:00:00 -\end{lstlisting} - -(Left hand side is \lstinline=-l=. A second \lstinline@-l=...@ line will -override the first.) - -Or separate the items (note: no equal sign after \lstinline=-l=): - -\begin{lstlisting} --l select=28 --l ncpus=36 --l mpiprocs=18 --l ompthreads=2 --l walltime=12:00:00 -\end{lstlisting} - -(Left hand sides are now \lstinline=-l select=, \lstinline=-l ncpus=, etc.) - -\subsection{Task stdout And stderr Logs} -\label{WhitherStdoutAndStderr} - -When a task is ready to run cylc generates a filename root to be used -for the task job script and log files. The filename containing the task -name, cycle point, and a submit number that increments if the same task is -re-triggered multiple times: - -\lstset{language=bash} -\begin{lstlisting} -# task job script: -~/cylc-run/tut/oneoff/basic/log/job/1/hello/01/job -# task stdout: -~/cylc-run/tut/oneoff/basic/log/job/1/hello/01/job.out -# task stderr: -~/cylc-run/tut/oneoff/basic/log/job/1/hello/01/job.err -\end{lstlisting} - -How the stdout and stderr streams are directed into these files depends -on the batch system. The \lstinline=background= method just uses -appropriate output redirection on the command line, as shown above. The -\lstinline=loadleveler= method writes appropriate directives to the job -script that is submitted to loadleveler. - -Cylc obviously has no control over the stdout and stderr output from -tasks that do their own internal output management (e.g.\ tasks -that submit internal jobs and direct the associated output to other -files). For less internally complex tasks, however, the files referred -to here will be complete task job logs. - -Some batch systems, such as \lstinline=pbs=, redirect a job's stdout -and stderr streams to a separate cache area while the job is running. The -contents are only copied to the normal locations when the job completes. This -means that \lstinline=cylc cat-log= or the gcylc GUI will be unable to find the -job's stdout and stderr streams while the job is running. Some sites with these -batch systems are known to provide commands for viewing and/or -tail-follow a job's stdout and stderr streams that are redirected to these -cache areas. If this is the case at your site, you can configure cylc to make -use of the provided commands by adding some settings to the global site/user -config. E.g.: - -\begin{lstlisting} -[hosts] - [[HOST]] # <= replace this with a real host name - [[[batch systems]]] - [[[[pbs]]]] - err tailer = qcat -f -e \%(job_id)s - out tailer = qcat -f -o \%(job_id)s - err viewer = qcat -e \%(job_id)s - out viewer = qcat -o \%(job_id)s -\end{lstlisting} - -\subsection{Overriding The Job Submission Command} -\label{CommandTemplate} - -\lstset{language=suiterc} -To change the form of the actual command used to submit a job you do not -need to define a new batch system handler; just override the -\lstinline=command template= in the relevant job submission sections of -your suite.rc file: -\begin{lstlisting} -[runtime] - [[root]] - [[[job]]] - batch system = loadleveler - # Use '-s' to stop llsubmit returning - # until all job steps have completed: - batch submit command template = llsubmit -s %(job)s -\end{lstlisting} -As explained in~\ref{SuiteRCReference} -the template's \%(job)s will be substituted by the job file path. - -\subsection{Job Polling} - -For supported batch systems, one-way polling can be used to determine actual -job status: the suite server program executes a process on the task host, by -non-interactive ssh, to interrogate the batch queueing system there, and to -read a {\em status file} that is automatically generated by the task job script -as it runs. - -Polling may be required to update the suite state correctly after unusual -events such as a machine being rebooted with tasks running on it, or network -problems that prevent task messages from getting back to the suite host. - -Tasks can be polled on demand by right-clicking on them in gcylc or using the -\lstinline=cylc poll= command. - -Tasks are polled automatically, once, if they timeout while queueing in a -batch scheduler and submission timeout is set. (See~\ref{TaskEventHandling} for -how to configure timeouts). - -Tasks are polled multiple times, where necessary, when they exceed their -execution time limits. These are normally set with some initial delays to allow -the batch systems to kill the jobs. -(See~\ref{ExecutionTimeLimitPollingIntervals} for how to configure the polling -intervals). - -Any tasks recorded in the {\em submitted} or {\em running} states at suite -restart are automatically polled to determine what happened to them while the -suite was down. - -Regular polling can also be configured as a health check on tasks submitted to -hosts that are known to be flaky, or as the sole method of determining task -status on hosts that do not allow task messages to be routed back to the suite -host. - -To use polling instead of task-to-suite messaging set -\lstinline@task communication method = poll@ -in cylc site and user global config (see~\ref{task_comms_method}). -The default polling intervals can be overridden for all suites there too -(see~\ref{submission_polling} and~\ref{execution_polling}), or in specific -suite configurations (in which case polling will be done regardless of the -task communication method configured for the host; -see~\ref{SubmissionPollingIntervals} and~\ref{ExecutionPollingIntervals}). - -Note that regular polling is not as efficient as task messaging in updating -task status, and it should be used sparingly in large suites. - -Note that for polling to work correctly, the batch queueing system must have a -job listing command for listing your jobs, and that the job listing must -display job IDs as they are returned by the batch queueing system submit -command. For example, for pbs, moab and sge, the \lstinline=qstat= command -should list jobs with their IDs displayed in exactly the same format as they -are returned by the \lstinline=qsub= command. - -\subsection{Job Killing} - -For supported batch systems, the suite server program can execute a process on -the task host, by non-interactive ssh, to kill a submitted or running job -according to its batch system. - -Tasks can be killed on demand by right-clicking on them in gcylc or using the -\lstinline=cylc kill= command. - -\subsection{Execution Time Limit} - -You can specify an \lstinline=execution time limit= for all supported job -submission methods. E.g.: - -\lstset{language=suiterc} -\begin{lstlisting} -[runtime] - [[task-x]] - [[[job]]] - execution time limit = PT1H -\end{lstlisting} - -For tasks running with \lstinline=background= or \lstinline=at=, their jobs -will be wrapped using the \lstinline=timeout= command. For all other methods, -the relevant time limit directive will be added to their job files. - -The \lstinline=execution time limit= setting will also inform the suite when a -a task job should complete by. If a task job has not reported completing within -the specified time, the suite will poll the task job. (The default -setting is PT1M, PT2M, PT7M. The accumulated times for these intervals will be -roughly 1 minute, 1 + 2 = 3 minutes and 1 + 2 + 7 = 10 minutes after a task job -exceeds its execution time limit.) - -\subsubsection{Execution Time Limit and Execution Timeout} - -If you specify an \lstinline=execution time limit= the -\lstinline=execution timeout event handler= will only be called if the job has -not completed after the final poll (by default, 10 min after the time limit). -This should only happen if the submission method you are using is not enforcing -wallclock limits (unlikely) or you are unable to contact the machine to confirm -the job status. - -If you specify an \lstinline=execution timeout= and not an -\lstinline=execution time limit= then the -\lstinline=execution timeout event handler= will be called as soon as the -specified time is reached. The job will also be polled to check its latest -status (possibly resulting in an update in its status and the calling of the -relevant event handler). This behaviour is deprecated, which users should avoid -using. - -If you specify an \lstinline=execution timeout= and an -\lstinline=execution time limit= then the execution timeout setting will be -ignored. - -\subsection{Custom Job Submission Methods} -\label{CustomJobSubmissionMethods} - -Defining a new batch system handler requires a little Python programming. Use -the built-in handlers as examples, and read the documentation in -\lstinline=lib/cylc/batch_sys_manager.py=. - -\lstset{language=Python} - -\subsubsection{An Example} - -The following \lstinline=qsub.py= module overrides the built-in {\em pbs} -batch system handler to change the directive prefix from \lstinline=#PBS= to -\lstinline=#QSUB=: - -\begin{lstlisting} -#!/usr/bin/env python2 - -from cylc.batch_sys_handlers.pbs import PBSHandler - -class QSUBHandler(PBSHandler): - DIRECTIVE_PREFIX = "#QSUB " - -BATCH_SYSTEM_HANDLER = QSUBHandler() -\end{lstlisting} - -If this is in the Python search path (see~\ref{Where To Put Batch System -Handler Modules} below) you can use it by name in suite configurations: -\lstset{language=suiterc} -\begin{lstlisting} -[scheduling] - [[dependencies]] - graph = "a" -[runtime] - [[root]] - [[[job]]] - batch system = qsub # <---! - execution time limit = PT1M - [[[directives]]] - -l nodes = 1 - -q = long - -V = -\end{lstlisting} - -Generate a job script to see the resulting directives: -\lstset{language=transcript} -\begin{lstlisting} -$ cylc register test $HOME/test -$ cylc jobscript test a.1 | grep QSUB -#QSUB -e /home/oliverh/cylc-run/my.suite/log/job/1/a/01/job.err -#QSUB -l nodes=1 -#QSUB -l walltime=60 -#QSUB -o /home/oliverh/cylc-run/my.suite/log/job/1/a/01/job.out -#QSUB -N a.1 -#QSUB -q long -#QSUB -V -\end{lstlisting} - -(Of course this suite will fail at run time because we only changed the -directive format, and PBS does not accept \lstinline=#QSUB= directives in -reality). - -\subsubsection{Where To Put Batch System Handler Modules} -\label{Where To Put Batch System Handler Modules} - -{\em Custom batch system handlers must be installed on suite and job -hosts} in one of these locations: -\begin{myitemize} - \item under \lstinline=SUITE-DEF-PATH/lib/python/= - \item under \lstinline=CYLC-PATH/lib/cylc/batch_sys_handlers/= - \item or anywhere in \lstinline=$PYTHONPATH= -\end{myitemize} - -(A note for Rose users: \lstinline=rose suite-run= automatically installs -\lstinline=SUITE-DEF-PATH/lib/python/= to job hosts). - -\section{External Triggers} -\label{External Triggers} - -{\em WARNING: this is a new capability and its suite configuration -interface may change somewhat in future releases - see {\em Current -Limitations} below in~\ref{Current Trigger Function Limitations}.} - -External triggers allow tasks to trigger directly off of external events, which -is often preferable to implementing long-running polling tasks in the workflow. -The triggering mechanism described in this section replaces an older and less -powerful one documented in~\ref{Old-Style External Triggers}. - -If you can write a Python function to check the status of an external -condition or event, the suite server program can call it at configurable -intervals until it reports success, at which point dependent tasks can trigger -and data returned by the function will be passed to the job environments of -those tasks. Functions can be written for triggering off of almost anything, -such as delivery of a new dataset, creation of a new entry in a database -table, or appearance of new data availability notifications in a message -broker. - -External triggers are visible in suite visualizations as bare graph nodes (just -the trigger names). They are plotted against all dependent tasks, not in a -cycle point specific way like tasks. This is because external triggers may or -may not be cycle point (or even task name) specific - it depends on the -arguments passed to the corresponding trigger functions. For example, if an -external trigger does not depend on task name or cycle point it will only be -called once - albeit repeatedly until satisfied - for the entire suite run, -after which the function result will be remembered for all dependent tasks -throughout the suite run. - -Several built-in external trigger functions are located in -\lstinline=/lib/cylc/xtriggers/=: -\begin{myitemize} - \item clock triggers - see~\ref{Built-in Clock Triggers} - \item inter-suite triggers - see~\ref{Built-in Suite State Triggers} -\end{myitemize} - -Trigger functions are normal Python functions, with certain constraints as -described below in: -\begin{myitemize} - \item custom trigger functions - see~\ref{Custom Trigger Functions} -\end{myitemize} - -\subsection{Built-in Clock Triggers} -\label{Built-in Clock Triggers} - -These are more transparent (exposed in the graph) and efficient (shared among -dependent tasks) than the older clock triggers described -in~\ref{ClockTriggerTasks}. (However we don't recommend wholesale conversion -to the new method yet, until its interface has stabilized - -see~\ref{Current Trigger Function Limitations}.) - -Clock triggers, unlike other trigger functions, are executed synchronously in -the main process. The clock trigger function signature looks like this: - -\begin{lstlisting} -wall_clock(offset=None) -\end{lstlisting} - -The \lstinline=offset= argument is a date-time duration (\lstinline=PT1H= is 1 -hour) relative to the dependent task's cycle point (automatically passed to the -function via a second argument not shown above). - -In the following suite, task \lstinline=foo= has a daily cycle point sequence, -and each task instance can trigger once the wall clock time has passed its -cycle point value by one hour: -\lstset{language=suiterc} -\begin{lstlisting} -[scheduling] - initial cycle point = 2018-01-01 - [[xtriggers]] - clock_1 = wall_clock(offset=PT1H):PT10S - [[dependencies]] - [[[P1D]]] - graph = "@clock_1 => foo" -[runtime] - [[foo]] - script = run-foo.sh -\end{lstlisting} - -Notice that the short label \lstinline=clock_1= is used to represent the -trigger function in the graph. The function call interval, which determines how -often the suite server program checks the clock, is optional. Here it is -\lstinline=PT10S= (i.e.\ 10 seconds, which is also the default value). - -Argument keywords can be omitted if called in the right order, so the -\lstinline=clock_1= trigger can also be declared like this: -\lstset{language=suiterc} -\begin{lstlisting} - [[xtriggers]] - clock_1 = wall_clock(PT1H) -\end{lstlisting} - -Finally, a zero-offset clock trigger does not need to be declared under -the \lstinline=[xtriggers]= section: -\lstset{language=suiterc} -\begin{lstlisting} -[scheduling] - initial cycle point = 2018-01-01 - [[dependencies]] - [[[P1D]]] - # zero-offset clock trigger: - graph = "@wall_clock => foo" -[runtime] - [[foo]] - script = run-foo.sh -\end{lstlisting} - -\subsection{Built-in Suite State Triggers} -\label{Built-in Suite State Triggers} - -These can be used instead of the older suite state polling tasks described -in~\ref{SuiteStatePolling} for inter-suite triggering - i.e.\ to trigger local -tasks off of remote task statuses or messages in other suites. (However we -don't recommend wholesale conversion to the new method yet, until its -interface has stabilized - see~\ref{Current Trigger Function Limitations}.) - -The suite state trigger function signature looks like this: -\begin{lstlisting} -suite_state(suite, task, point, offset=None, status='succeeded', - message=None, cylc_run_dir=None, debug=False) -\end{lstlisting} - -The first three arguments are compulsory; they single out the target suite name -(\lstinline=suite=) task name (\lstinline=task=) and cycle point -(\lstinline=point=). The function arguments mirror the arguments and options of -the \lstinline=cylc suite-state= command - see -\lstinline=cylc suite-state --help= for documentation. - -As a simple example, consider the suites in -\lstinline=/etc/dev-suites/xtrigger/suite_state/=. The ``upstream'' -suite (which we want to trigger off of) looks like this: - -\lstinputlisting{../../../etc/dev-suites/xtrigger/suite_state/upstream/suite.rc} - -It must be registered and run under the name {\em up}, as referenced in the -``downstream'' suite that depends on it: - -\lstinputlisting{../../../etc/dev-suites/xtrigger/suite_state/downstream/suite.rc} - -Try starting the downstream suite first, then the upstream, and watch what happens. -In each cycle point the \lstinline=@upstream= trigger in the downstream suite -waits on the task \lstinline=foo= (with the same cycle point) in the upstream -suite to emit the {\em data ready} message. - -Some important points to note about this: -\begin{myitemize} - \item the function call interval, which determines how often the suite - server program checks the clock, is optional. Here it is - \lstinline=PT10S= (i.e.\ 10 seconds, which is also the default value). - \item the \lstinline=suite_state= trigger function, like the - \lstinline=cylc suite-state= command, must have read-access to the upstream - suite's public database. - \item the cycle point argument is supplied by a string template - \lstinline=%(point)s=. The string templates available to trigger function - arguments are described in {\em Custom Trigger Functions} (\ref{Custom - Trigger Functions}). -\end{myitemize} - -The return value of the \lstinline=suite_state= trigger function looks like -this: -\begin{lstlisting} - results = { - 'suite': suite, - 'task': task, - 'point': point, - 'offset': offset, - 'status': status, - 'message': message, - 'cylc_run_dir': cylc_run_dir - } - return (satisfied, results) -\end{lstlisting} -The \lstinline=satisified= variable is boolean (value True or False, depending -on whether or not the trigger condition was found to be satisfied). The -\lstinline=results= dictionary contains the names and values of all of the -target suite state parameters. Each item in it gets qualified with the -unique trigger label (``upstream'' here) and passed to the environment of -dependent task jobs (the members of the \lstinline=FAM= family in this case). -To see this, take a look at the job script for one of the downstream tasks: - -\begin{lstlisting} -% cylc cat-log -f j dn f2.2011 -... -cylc__job__inst__user_env() { - # TASK RUNTIME ENVIRONMENT: - export upstream_suite upstream_cylc_run_dir upstream_offset \ - upstream_message upstream_status upstream_point upstream_task - upstream_suite="up" - upstream_cylc_run_dir="/home/vagrant/cylc-run" - upstream_offset="None" - upstream_message="data ready" - upstream_status="succeeded" - upstream_point="2011" - upstream_task="foo" -} -... -\end{lstlisting} - -Note that the task has to know the name (label) of the external trigger that it -depends on - ``upstream'' in this case - in order to use this information. -However the name could be given to the task environment in the suite -configuration. - -\subsection{Custom Trigger Functions} -\label{Custom Trigger Functions} - -Trigger functions are just normal Python functions, with a few special -properties: -\begin{myitemize} - \item they must be defined in a module with the same name as the function - \item they can be located in: - \begin{myitemize} - \item \lstinline=/lib/cylc/xtriggers/= - \item \lstinline=/lib/python/= - \item (or anywhere in your Python library path) - \end{myitemize} - \item they can take arbitrary positional and keyword arguments - \item suite and task identity, and cycle point, can be passed to trigger - functions by using string templates in function arguments (see below) - \item integer, float, boolean, and string arguments will be recognized and - passed to the function as such - \item if a trigger function depends on files or directories (for example) - that might not exist when the function is first called, just return - unsatisified until everything required does exist. -\end{myitemize} - -Note that trigger functions cannot store data Pythonically between invocations -because each call is executed in an independent process in the process pool. If -necessary the filesystem can be used for this purpose. - -The following string templates are available for use, if the trigger function -needs any of this information, in function arguments in the suite configuration: -\begin{myitemize} - \item \lstinline=%(name)s= - name of the dependent task - \item \lstinline=%(id)s= - identity of the dependent task (name.cycle-point) - \item \lstinline=%(point)s= - cycle point of the dependent task - \item \lstinline=%(debug)s= - suite debug mode -\end{myitemize} -and less commonly needed: -\begin{myitemize} - \item \lstinline=%(user_name)s= - suite owner's user name - \item \lstinline=%(suite_name)s= - registered suite name - \item \lstinline=%(suite_run_dir)s= - suite run directory - \item \lstinline=%(suite_share_dir)s= - suite share directory -\end{myitemize} - -Function return values should be as follows: -\begin{myitemize} - \item if the trigger condition is {\em not satisfied}: - \begin{myitemize} - \item return \lstinline=(False, {})= - \end{myitemize} - \item if the trigger condition is {\em satisfied}: - \begin{myitemize} - \item return \lstinline=(True, results)= - \end{myitemize} -\end{myitemize} -where \lstinline=results= is an arbitrary dictionary of information to be -passed to dependent tasks. How this looks to these tasks is described above -in {\em Built-in Suite State Triggers} (\ref{Built-in Suite State Triggers}). - -The suite server program manages trigger functions as follows: -\begin{myitemize} - \item they are called asynchronously in the process pool - \begin{myitemize} - \item (except for clock triggers, which are called from the main process) - \end{myitemize} - \item they are called repeatedly on a configurable interval, until satisified - \begin{myitemize} - \item the call interval defaults to \lstinline=PT10S= (10 seconds) - \item repeat calls are not made until the previous call has returned - \end{myitemize} - \item they are subject to the normal process pool command time out - if they - take too long to return, the process will be killed - \item they are shared for efficiency: a single call will be made for all - triggers that share the same function signature - i.e.\ the same function - name and arguments - \item their return status and results are stored in the suite DB and persist across - suite restarts - \item their stdout, if any, is redirected to stderr and will be visible in - the suite log in debug mode (stdout is needed to communicate return values - from the sub-process in which the function executes) -\end{myitemize} - -\subsubsection{Toy Examples} - -A couple of toy examples in \lstinline=/lib/cylc/xtriggers/= may -be a useful aid to understanding trigger functions and how they work. - -\paragraph{echo} - -The \lstinline=echo= function is a trivial one that takes any number of -positional and keyword arguments (from the suite configuration) and simply -prints them to stdout, and then returns False (i.e.\ trigger condition not -satisfied). Here it is in its entirety. - -\begin{lstlisting} -def echo(*args, **kwargs): - print "echo: ARGS:", args - print "echo: KWARGS:", kwargs - return (False, {}) -\end{lstlisting} - -Here's an example echo trigger suite: -\begin{lstlisting} -[scheduling] - initial cycle point = now - [[xtriggers]] - echo_1 = echo(hello, 99, qux=True, point=%(point)s, foo=10) - [[dependencies]] - [[[PT1H]]] - graph = "@echo_1 => foo" -[runtime] - [[foo]] - script = exit 1 -\end{lstlisting} - -To see the result, run this suite in debug mode and take a look at the -suite log (or run \lstinline=cylc run --debug --no-detach = and watch -your terminal). - -\paragraph{xrandom} - -The \lstinline=xrandom= function sleeps for a configurable amount of time -(useful for testing the effect of a long-running trigger function - which -should be avoided) and has a configurable random chance of success. The -function signature is: - -\begin{lstlisting} -xrandom(percent, secs=0, _=None, debug=False) -\end{lstlisting} - -The \lstinline=percent= argument sets the odds of success in any given call; -\lstinline=secs= is the number of seconds to sleep before returning; and the -\lstinline=_= argument (underscore is a conventional name for a variable -that is not used, in Python) is provided to allow specialization of the trigger -to (for example) task name, task ID, or cycle point (just use the appropriate -string templates in the suite configuration for this). - -An example xrandom trigger suite is -\lstinline=/etc/dev-suites/xtriggers/xrandom/= - -\subsection{Current Limitations} -\label{Current Trigger Function Limitations} - -The following issues may be addressed in future Cylc releases: -\begin{myitemize} - \item trigger labels cannot currently be used in conditional (OR) expressions - in the graph; attempts to do so will fail validation. - \item aside from the predefined zero-offset \lstinline=wall_clock= trigger, all - unique trigger function calls must be declared {\em with all of - their arguments} under the \lstinline=[scheduling][xtriggers]= section, and - referred to by label alone in the graph. It would be convenient (and less - verbose, although no more functional) if we could just declare a label - against the {\em common} arguments, and give remaining arguments (such as - different wall clock offsets in clock triggers) as needed in the graph. - \item we may move away from the string templating method for providing suite - and task attributes to trigger function arguments. -\end{myitemize} - -\subsection{Filesystem Events?} - -Cylc does not have built-in support for triggering off of filesystem events -such as \lstinline=inotify= on Linux. There is no cross-platform standard for -this, and in any case filesystem events are not very useful in HPC cluster -environments where events can only be detected at the specific node on which -they were generated. - -\subsection{Continuous Event Watchers?} - -For some applications a persistent process that continually monitors the -external world is better than discrete periodic checking. This would be more -difficult to support as a plugin mechanism in Cylc, but we may decide to do it -in the future. In the meantime, consider implementing a small daemon process as -the watcher (e.g.\ to watch continuously for filesystem events) and have your -Cylc trigger functions interact with it. - -\subsection{Old-Style External Triggers (Deprecated)} -\label{Old-Style External Triggers} - -{\em NOTE: This mechanism is now technically deprecated by the newer external -trigger functions (\ref{External Triggers}). (However we don't recommend -wholesale conversion to the new method yet, until its interface has -stabilized - see~\ref{Current Trigger Function Limitations}.)} - -These old-style external triggers are hidden task prerequisites that must be -satisfied by using the \lstinline=cylc ext-trigger= client command to send an -associated pre-defined event message to the suite along with an ID string that -distinguishes one instance of the event from another (the name of the target -task and its current cycle point are not required). The event ID is just an -arbitrary string to Cylc, but it can be used to identify something associated -with the event to the suite - such as the filename of a new -externally-generated dataset. When the suite server program receives the event -notification it will trigger the next instance of any task waiting on that -trigger (whatever its cycle point) and then broadcast -(see~\ref{cylc-broadcast}) the event ID to the cycle point of the triggered -task as \lstinline=$CYLC_EXT_TRIGGER_ID=. Downstream tasks with the same cycle -point therefore know the new event ID too and can use it, if they need to, to -identify the same new dataset. In this way a whole workflow can be associated -with each new dataset, and multiple datasets can be processed in parallel if -they happen to arrive in quick succession. - -An externally-triggered task must register the event it waits on in the suite -scheduling section: -\lstset{language=suiterc} -\begin{lstlisting} -# suite "sat-proc" -[scheduling] - cycling mode = integer - initial cycle point = 1 - [[special tasks]] - external-trigger = get-data("new sat X data avail") - [[dependencies]] - [[[P1]]] - graph = get-data => conv-data => products -\end{lstlisting} - -Then, each time a new dataset arrives the external detection system should -notify the suite like this: -\lstset{language=transcript} -\begin{lstlisting} -$ cylc ext-trigger sat-proc "new sat X data avail" passX12334a -\end{lstlisting} -where ``sat-proc'' is the suite name and ``passX12334a'' is the ID string for -the new event. The suite passphrase must be installed on triggering account. - -Note that only one task in a suite can trigger off a particular external -message. Other tasks can trigger off the externally triggered task as required, -of course. - -\lstinline=/etc/examples/satellite/ext-triggers/suite.rc= is a working -example of a simulated satellite processing suite. - -External triggers are not normally needed in date-time cycling suites driven -by real time data that comes in at regular intervals. In these cases a data -retrieval task can be clock-triggered (and have appropriate retry intervals) to -submit at the expected data arrival time, so little time is wasted in polling. -However, if the arrival time of the cycle-point-specific data is highly -variable, external triggering may be used with the cycle point embedded in the -message: -\lstset{language=suiterc} -\begin{lstlisting} -# suite "data-proc" -[scheduling] - initial cycle point = 20150125T00 - final cycle point = 20150126T00 - [[special tasks]] - external-trigger = get-data("data arrived for $CYLC_TASK_CYCLE_POINT") - [[dependencies]] - [[[T00]]] - graph = init-process => get-data => post-process -\end{lstlisting} - -Once the variable-length waiting is finished, an external detection system -should notify the suite like this: -\lstset{language=transcript} -\begin{lstlisting} -$ cylc ext-trigger data-proc "data arrived for 20150126T00" passX12334a -\end{lstlisting} -where ``data-proc'' is the suite name, the cycle point has replaced the -variable in the trigger string, and ``passX12334a'' is the ID string for -the new event. The suite passphrase must be installed on the triggering -account. In this case, the event will trigger for the second cycle point but -not the first because of the cycle-point matching. - -\section{Running Suites} -\label{RunningSuites} - -This chapter currently features a diverse collection of topics related -to running suites. Please also see the Tutorial (\ref{Tutorial}) and -command documentation (\ref{CommandReference}), and experiment with -plenty of examples. - -\subsection{Suite Start-Up} -\label{SuiteStartUp} - -There are three ways to start a suite running: {\em cold start} and {\em warm -start}, which start from scratch; and {\em restart}, which starts from a prior -suite state checkpoint. The only difference between cold starts and warm starts -is that warm starts start from a point beyond the suite initial cycle point. - -Once a suite is up and running it is typically a restart that is needed most -often (but see also \lstinline=cylc reload=). {\em Be aware that cold and warm -starts wipe out prior suite state, so you can't go back to a restart if you -decide you made a mistake.} - -\subsubsection{Cold Start} -\label{Cold Start} - -A cold start is the primary way to start a suite run from scratch: -\lstset{language=transcript} -\begin{lstlisting} -$ cylc run SUITE [INITIAL_CYCLE_POINT] -\end{lstlisting} -The initial cycle point may be specified on the command line or in the suite.rc -file. The scheduler starts by loading the first instance of each task at the -suite initial cycle point, or at the next valid point for the task. - -\subsubsection{Warm Start} -\label{Warm Start} - -A warm start runs a suite from scratch like a cold start, but from the -beginning of a given cycle point that is beyond the suite initial cycle point. -This is generally inferior to a {\em restart} (which loads a previously -recorded suite state - see~\ref{RestartingSuites}) because it may result in -some tasks rerunning. However, a warm start may be required if a restart is not -possible, e.g.\ because the suite run database was accidentally deleted. The -warm start cycle point must be given on the command line: -\lstset{language=transcript} -\begin{lstlisting} -$ cylc run --warm SUITE [START_CYCLE_POINT] -\end{lstlisting} -The original suite initial cycle point is preserved, but all tasks and -dependencies before the given warm start cycle point are ignored. - -The scheduler starts by loading a first instance of each task at the warm -start cycle point, or at the next valid point for the task. -\lstinline=R1=-type tasks behave exactly the same as other tasks - if their -cycle point is at or later than the given start cycle point, they will run; if -not, they will be ignored. - -\subsubsection{Restart and Suite State Checkpoints} -\label{RestartingSuites} - -At restart (see \lstinline=cylc restart --help=) a suite server program -initializes its task pool from a previously recorded checkpoint state. By -default the latest automatic checkpoint - which is updated with every task -state change - is loaded so that the suite can carry on exactly as it was just -before being shut down or killed. - -\lstset{language=transcript} -\begin{lstlisting} -$ cylc restart SUITE -\end{lstlisting} - -Tasks recorded in the `submitted' or `running' states are automatically polled -(see Section~\ref{Task Job Polling}) at start-up to determine what happened to -them while the suite was down. - -\paragraph{Restart From Latest Checkpoint} - -To restart from the latest checkpoint simply invoke the \lstinline=cylc restart= -command with the suite name (or select `restart' in the GUI suite start dialog -window): - -\lstset{language=transcript} -\begin{lstlisting} -$ cylc restart SUITE -\end{lstlisting} - -\paragraph{Restart From Another Checkpoint} - -Suite server programs automatically update the ``latest'' checkpoint every time -a task changes state, and at every suite restart, but you can also take -checkpoints at other times. To tell a suite server program to checkpoint its -current state: -\lstset{language=transcript} -\begin{lstlisting} -$ cylc checkpoint SUITE-NAME CHECKPOINT-NAME -\end{lstlisting} - -The 2nd argument is a name to identify the checkpoint later with: -\begin{lstlisting} -$ cylc ls-checkpoints SUITE-NAME -\end{lstlisting} - -For example, with checkpoints named `bob', `alice', and `breakfast': -\begin{lstlisting} -$ cylc ls-checkpoints SUITE-NAME -####################################################################### -# CHECKPOINT ID (ID|TIME|EVENT) -1|2017-11-01T15:48:34+13|bob -2|2017-11-01T15:48:47+13|alice -3|2017-11-01T15:49:00+13|breakfast -... -0|2017-11-01T17:29:19+13|latest -\end{lstlisting} - -To see the actual task state content of a given checkpoint ID (if you need to), -for the moment you have to interrogate the suite DB, e.g.: - -\begin{lstlisting} -$ sqlite3 ~/cylc-run/SUITE-NAME/log/db \ - 'select * from task_pool_checkpoints where id == 3;' -3|2012|model|1|running| -3|2013|pre|0|waiting| -3|2013|post|0|waiting| -3|2013|model|0|waiting| -3|2013|upload|0|waiting| -\end{lstlisting} - -Note that a checkpoint captures the instantaneous state of every task in the -suite, including any tasks that are currently active, so you may want to be -careful where you do it. Tasks recorded as active are polled automatically on -restart to determine what happened to them. - -The checkpoint ID 0 (zero) is always used for latest state of the suite, which -is updated continuously as the suite progresses. The checkpoint IDs of earlier -states are positive integers starting from 1, incremented each time a new -checkpoint is stored. Currently suites automatically store checkpoints before -and after reloads, and on restarts (using the latest checkpoints before the -restarts). - -Once you have identified the right checkpoint, restart the suite like this: -\lstset{language=transcript} -\begin{lstlisting} -$ cylc restart --checkpoint=CHECKPOINT-ID SUITE -\end{lstlisting} -or enter the checkpoint ID in the space provided in the GUI restart window. - -\paragraph{Checkpointing With A Task} - -Checkpoints can be generated automatically at particular points in the -workflow by coding tasks that run the \lstinline=cylc checkpoint= command: - -\lstset{language=suiterc} -\begin{lstlisting} -[scheduling] - [[dependencies]] - [[[PT6H]]] - graph = "pre => model => post => checkpointer" -[runtime] - # ... - [[checkpointer]] - script = """ -wait "${CYLC_TASK_MESSAGE_STARTED_PID}" 2>/dev/null || true -cylc checkpoint ${CYLC_SUITE_NAME} CP-${CYLC_TASK_CYCLE_POINT} - """ -\end{lstlisting} - -Note that we need to \lstinline=wait= on the ``task started'' message - which -is sent in the background to avoid holding tasks up in a network outage - to -ensure that the checkpointer task is correctly recorded as running in the -checkpoint (at restart the suite server program will poll to determine that -that task job finished successfully). Otherwise it may be recorded in the -waiting state and, if its upstream dependencies have already been cleaned up, -it will need to be manually reset from waiting to succeeded after the restart -to avoid stalling the suite. - -\paragraph{Behaviour of Tasks on Restart} - -All tasks are reloaded in exactly their checkpointed states. Failed tasks are -not automatically resubmitted at restart in case the underlying problem has not -been addressed yet. - -Tasks recorded in the submitted or running states are automatically polled on -restart, to see if they are still waiting in a batch queue, still running, or -if they succeeded or failed while the suite was down. The suite state will be -updated automatically according to the poll results. - -Existing instances of tasks removed from the suite configuration before restart -are not removed from the task pool automatically, but they will not spawn new -instances. They can be removed manually if necessary, -with~\lstinline=cylc remove=. - -Similarly, instances of new tasks added to the suite configuration before -restart are not inserted into the task pool automatically, because it is -very difficult in general to automatically determine the cycle point of -the first instance. Instead, the first instance of a new task should be -inserted manually at the right cycle point, with~\lstinline=cylc insert=. - -\subsection{Reloading The Suite Configuration At Runtime} - -The \lstinline=cylc reload= command tells a suite server program to reload its -suite configuration at run time. This is an alternative to shutting a suite down -and restarting it after making changes. - -As for a restart, existing instances of tasks removed from the suite configuration -before reload are not removed from the task pool automatically, but they -will not spawn new instances. They can be removed manually if necessary, -with~\lstinline=cylc remove=. - -Similarly, instances of new tasks added to the suite configuration before -reload are not inserted into the pool automatically. The first instance of each -must be inserted manually at the right cycle point, with~\lstinline=cylc insert=. - -\subsection{Task Job Access To Cylc} -\label{HowTasksGetAccessToCylc} - -Task jobs need access to Cylc on the job host, primarily for task messaging, -but also to allow user-defined task scripting to run other Cylc commands. - -Cylc should be installed on job hosts as on suite hosts, with different releases -installed side-by-side and invoked via the central Cylc wrapper according to -the value of \lstinline=$CYLC_VERSION= - see Section~\ref{InstallCylc}. Task -job scripts set \lstinline=$CYLC_VERSION= to the version of the parent suite -server program, so that the right Cylc will be invoked by jobs on the job host. - -Access to the Cylc executable (preferably the central wrapper as just -described) for different job hosts can be configured using site and user -global configuration files (on the suite host). If the environment for running -the Cylc executable is only set up correctly in a login shell for a given host, -you can set \lstinline@[hosts][HOST]use login shell = True@ for the relevant -host (this is the default, to cover more sites automatically). If the -environment is already correct without the login shell, but the Cylc executable -is not in \lstinline=$PATH=, then \lstinline=[hosts][HOST]cylc executable= can -be used to specify the direct path to the executable. - -To customize the environment more generally for Cylc on jobs hosts, -use of \lstinline=job-init-env.sh= is described in Section~\ref{Configure -Environment on Job Hosts}. - -\subsection{The Suite Contact File} -\label{The Suite Contact File} - -At start-up, suite server programs write a {\em suite contact file} -\lstinline=$HOME/cylc-run/SUITE/.service/contact= that records suite host, -user, port number, process ID, Cylc version, and other information. Client -commands can read this file, if they have access to it, to find the target -suite server program. - -\subsection{Task Job Polling} -\label{Task Job Polling} - -At any point after job submission task jobs can be {\em polled} to check that -their true state conforms to what is currently recorded by the suite server -program. See \lstinline=cylc poll --help= for how to poll one or more tasks -manually, or right-click poll a task or family in GUI. - -Polling may be necessary if, for example, a task job gets killed by the -untrappable SIGKILL signal (e.g.\ \lstinline=kill -9 PID=), or if a network -outage prevents task success or failure messages getting through, or if the -suite server program itself is down when tasks finish execution. - -To poll a task job the suite server program interrogates the batch system, and the -\lstinline=job.status= file, on the job host. This information is enough to -determine the final task status even if the job finished while the suite -server program was down or unreachable on the network. - -\subsubsection{Routine Polling} - -Task jobs are automatically polled at certain times: once on job submission -timeout; several times on exceeding the job execution time limit; and at suite -restart any tasks recorded as active in the suite state checkpoint are polled -to find out what happened to them while the suite was down. - -Finally, in necessary routine polling can be configured as a way to track job -status on job hosts that do not allow networking routing back to the suite host -for task messaging by HTTPS or ssh. See~\ref{Polling To Track Job Status}. - -\subsection{Tracking Task State} -\label{TaskComms} - -Cylc supports three ways of tracking task state on job hosts: -\begin{myitemize} - \item task-to-suite messaging via HTTPS - \item task-to-suite messaging via non-interactive ssh to the suite host, then local HTTPS - \item regular polling by the suite server program -\end{myitemize} - -These can be configured per job host in the Cylc global config file - see -~/ref{SiteRCReference}. - -If your site prohibits HTTPS and ssh back from job hosts to suite hosts, before resorting -to the polling method you should consider installing dedicated Cylc servers or -VMs inside the HPC trust zone (where HTTPS and ssh should be allowed). - -It is also possible to run Cylc suite server programs on HPC login nodes, but this is -not recommended for load, run duration, and GUI reasons. - -Finally, it has been suggested that {\em port forwarding} may provide another -solution - but that is beyond the scope of this document. - -\subsubsection{HTTPS Task Messaging} - -Task job wrappers automatically invoke \lstinline=cylc message= to report -progress back to the suite server program when they begin executing, at normal exit -(success) and abnormal exit (failure). - -By default the messaging occurs via an authenticated, HTTPS connection to the -suite server program. This is the preferred task communications method - it is -efficient and direct. - -Suite server programs automatically install suite contact information and credentials -on job hosts. Users only need to do this manually for remote access to -suites on other hosts, or suites owned by other users - see~\ref{RemoteControl}. - -\subsubsection{Ssh Task Messaging} - -Cylc can be configured to re-invoke task messaging commands on the suite host via -non-interactive ssh (from job host to suite host). Then a local HTTPS -connection is made to the suite server program. - -(User-invoked client commands (aside from the GUI, which requires HTTPS) can do -the same thing with the \lstinline=--use-ssh= command option). - -This is less efficient than direct HTTPS messaging, but it may be useful at -sites where the HTTPS ports are blocked but non-interactive ssh is allowed. - -\subsubsection{Polling to Track Job Status} -\label{Polling To Track Job Status} - -Finally, suite server programs can actively poll task jobs at configurable intervals, -via non-interactive ssh to the job host. - -Polling is the least efficient task communications method because task state is -updated only at intervals, not when task events actually occur. However, it -may be needed at sites that do not allow HTTPS or non-interactive ssh from job -host to suite host. - -Be careful to avoid spamming task hosts with polling commands. Each poll -opens (and then closes) a new ssh connection. - -Polling intervals are configurable under \lstinline=[runtime]= because -they should may depend on the expected execution time. For instance, a -task that typically takes an hour to run might be polled every 10 -minutes initially, and then every minute toward the end of its run. -Interval values are used in turn until the last value, which is used -repeatedly -until finished: -\lstset{language=suiterc} -\begin{lstlisting} -[runtime] - [[foo]] - [[[job]]] - # poll every minute in the 'submitted' state: - submission polling intervals = PT1M - # poll one minute after foo starts running, then every 10 - # minutes for 50 minutes, then every minute until finished: - execution polling intervals = PT1M, 5*PT10M, PT1M -\end{lstlisting} -A list of intervals with optional multipliers can be used for both -submission and execution polling, although a single value is probably -sufficient for submission polling. If these items are not configured -default values from site and user global config will be used for the polling -task communication method; polling is not done by default under the -other task communications methods (but it can still be used if you -like). - -\subsubsection{Task Communications Configuration} - - -\subsection{The Suite Service Directory} -\label{The Suite Service Directory} - -At registration time a {\em suite service directory}, -\lstinline=$HOME/cylc-run//.service/=, is created and populated -with a private passphrase file (containing random text), a self-signed -SSL certificate (see~\ref{ConnectionAuthentication}), and a symlink to the -suite source directory. An existing passphrase file will not be overwritten -if a suite is re-registered. - -At run time, the private suite run database is also written to the service -directory, along with a {\em suite contact file} that records the host, -user, port number, process ID, Cylc version, and other information about the -suite server program. Client commands automatically read daemon targetting -information from the contact file, if they have access to it. - -\subsection{File-Reading Commands} - -Some Cylc commands and GUI actions parse suite configurations or read other files -from the suite host account, rather than communicate with a suite server -program over the network. In future we plan to have suite server program serve -up these files to clients, but for the moment this functionality requires -read-access to the relevant files on the suite host. - -If you are logged into the suite host account, file-reading commands will just -work. - -\subsubsection{Remote Host, Shared Home Directory} - -If you are logged into another host with shared home directories (shared -filesystems are common in HPC environments) file-reading commands will just -work because suite files will look ``local'' on both hosts. - -\subsubsection{Remote Host, Different Home Directory} - -If you are logged into another host with no shared home directory, file-reading -commands require non-interactive ssh to the suite host account, and use of the -\lstinline=--host= and \lstinline=--user= options to re-invoke the command -on the suite account. - -\subsubsection{Same Host, Different User Account} - -(This is essentially the same as {\em Remote Host, Different Home Directory}.) - - -\subsection{Client-Server Interaction} -\label{ConnectionAuthentication} - -Cylc server programs listen on dedicated network ports for -HTTPS communications from Cylc clients (task jobs, and user-invoked commands -and GUIs). - -Use \lstinline=cylc scan= to see which suites are listening on which ports on -scanned hosts (this lists your own suites by default, but it can show others -too - see \lstinline=cylc scan --help=). - -Cylc supports two kinds of access to suite server programs: -\begin{myitemize} - \item {\em public} (non-authenticated) - the amount of information - revealed is configurable, see~\ref{PublicAccess} - \item {\em control} (authenticated) - full control, suite passphrase - required, see~\ref{passphrases} -\end{myitemize} - -\subsubsection{Public Access - No Auth Files} -\label{PublicAccess} - -Without a suite passphrase the amount of information revealed by a suite -server program is determined by the public access privilege level set in global -site/user config (\ref{GlobalAuth}) and optionally overidden in suites -(\ref{SuiteAuth}): -\begin{myitemize} - \item {\em identity} - only suite and owner names revealed - \item {\em description} - identity plus suite title and description - \item {\em state-totals} - identity, description, and task state totals - \item {\em full-read} - full read-only access for monitor and GUI - \item {\em shutdown} - full read access plus shutdown, but no other - control. -\end{myitemize} -The default public access level is {\em state-totals}. - -The \lstinline=cylc scan= command and the \lstinline=cylc gscan= GUI can print -descriptions and task state totals in addition to basic suite identity, if the -that information is revealed publicly. - -\subsubsection{Full Control - With Auth Files} -\label{passphrases} - -Suite auth files (passphrase and SSL certificate) give full control. They are -loaded from the suite service directory by the suite server program at -start-up, and used to authenticate subsequent client connections. Passphrases -are used in a secure encrypted challenge-response scheme, never sent in plain -text over the network. - -If two users need access to the same suite server program, they must both -possess the passphrase file for that suite. Fine-grained access to a single -suite server program via distinct user accounts is not currently supported. - -Suite server programs automatically install their auth and contact files to job -hosts via ssh, to enable task jobs to connect back to the suite server program -for task messaging. - -Client programs invoked by the suite owner automatically load the passphrase, -SSL certificate, and contact file too, for automatic connection to suites. - -{\em Manual installation of suite auth files is only needed for remote control, -if you do not have a shared filesystem - see below.} - - -\subsection{GUI-to-Suite Interaction} -\label{GUI-to-Suite Interaction} - -The gcylc GUI is mainly a network client to retrieve and display suite status -information from the suite server program, but it can also invoke file-reading -commands to view and graph the suite configuration and so on. This is entirely -transparent if the GUI is running on the suite host account, but full -functionality for remote suites requires either a shared filesystem, or -(see~\ref{RemoteControl}) auth file installation {\em and} non-interactive ssh -access to the suite host. Without the auth files you will not be able to connect -to the suite, and without ssh you will see ``permission denied'' errors on -attempting file access. - -\subsection{Remote Control} -\label{RemoteControl} - -Cylc client programs - command line and GUI - can interact with suite server -programs running on other accounts or hosts. How this works depends on whether -or not you have: -\begin{myitemize} - \item a {\em shared filesystem} such that you see the same home directory on - both hosts. - \item {\em non-interactive ssh} from the client account to the server - account. -\end{myitemize} - -With a shared filesystem, a suite registered on the remote (server) host is -also - in effect - registered on the local (client) host. In this case you -can invoke client commands without the \lstinline=--host= option; the client -will automatically read the host and port from the contact file in the -suite service directory. - -To control suite server programs running under other user accounts or on other -hosts without a shared filesystem, the suite SSL certificate and passphrase -must be installed under your \lstinline=$HOME/.cylc/= directory: -\lstset{language=transcript} -\begin{lstlisting} -$HOME/.cylc/auth/OWNER@HOST/SUITE/ - ssl.cert - passphrase - contact # (optional - see below) -\end{lstlisting} -where \lstinline=OWNER@HOST= is the suite host account and \lstinline=SUITE= -is the suite name. Client commands should then be invoked with the -\lstinline=--user= and \lstinline=--host= options, e.g.: -\lstset{language=transcript} -\begin{lstlisting} -$ cylc gui --user=OWNER --host=HOST SUITE -\end{lstlisting} - -Note remote suite auth files do not need to be installed for read-only access - -see~\ref{PublicAccess} - via the GUI or monitor. - -The suite contact file (see~\ref{The Suite Contact File}) is not needed if -you have read-access to the remote suite run directory via the local -filesystem or non-interactive ssh to the suite host account - client commands -will automatically read it. If you do install the contact file in your auth -directory note that the port number will need to be updated if the suite gets -restarted on a different port. Otherwise use \lstinline=cylc scan= to determine -the suite port number and use the \lstinline=--port= client command option. - -{\em WARNING: possession of a suite passphrase gives full control over the -target suite, including {\em edit run} functionality - which lets you run -arbitrary scripting on job hosts as the suite owner. Further, -non-interactive ssh gives full access to the target user account, so we -recommended that this is only used to interact with suites running on -accounts to which you already have full access. } - -\subsection{Scan And Gscan} -\label{Scan And Gscan} - -Both \lstinline=cylc scan= and the \lstinline=cylc gscan= GUI can display -suites owned by other users on other hosts, including task state totals if the -public access level permits that (see~\ref{PublicAccess}). Clicking on a remote -suite in \lstinline=gscan= will open a \lstinline=cylc gui= to connect to that -suite. This will give you full control, if you have the suite auth files -installed; or it will display full read only information if the public access -level allows that. - -\subsection{Task States Explained} - -As a suite runs, its task proxies may pass through the following states: - -\begin{myitemize} - \item {\bf waiting} - still waiting for prerequisites (e.g.\ dependence on - other tasks, and clock triggers) to be satisfied. - - \item {\bf held} - will not be submitted to run even if all prerequisites - are satisfied, until released/un-held. - - \item {\bf queued} - ready to run (prerequisites satisfied) but - temporarily held back by an {\em internal cylc queue} - (see~\ref{InternalQueues}). - - \item {\bf ready} - ready to run (prerequisites satisfied) and - handed to cylc's job submission sub-system. - - \item {\bf submitted} - submitted to run, but not executing yet - (could be waiting in an external batch scheduler queue). - - \item {\bf submit-failed} - job submission failed {\em or} - submitted job killed (cancelled) before commencing execution. - - \item {\bf submit-retrying} - job submission failed, but a submission retry - was configured. Will only enter the {\em submit-failed} state if all - configured submission retries are exhausted. - - \item {\bf running} - currently executing (a {\em task started} - message was received, or the task polled as running). - - \item {\bf succeeded} - finished executing successfully (a {\em task - succeeded} message was received, or the task polled as succeeded). - - \item {\bf failed} - aborted execution due to some error condition (a - {\em task failed} message was received, or the task polled as failed). - - \item {\bf retrying} - job execution failed, but an execution retry - was configured. Will only enter the {\em failed} state if all configured - execution retries are exhausted. - - \item {\bf runahead} - will not have prerequisites checked (and so - automatically held, in effect) until the rest of the suite catches up - sufficiently. The amount of runahead allowed is configurable - see - ~\ref{RunaheadLimit}. - - \item {\bf expired} - will not be submitted to run, due to falling too far - behind the wall-clock relative to its cycle point - - see~\ref{ClockExpireTasks}. - -\end{myitemize} - -\subsection{What The Suite Control GUI Shows} - -The GUI Text-tree and Dot Views display the state of every task proxy present -in the task pool. Once a task has succeeded and Cylc has determined that it can -no longer be needed to satisfy the prerequisites of other tasks, its proxy will -be cleaned up (removed from the pool) and it will disappear from the GUI. To -rerun a task that has disappeared from the pool, you need to re-insert its task -proxy and then re-trigger it. - -The Graph View is slightly different: it displays the complete dependency graph -over the range of cycle points currently present in the task pool. This often -includes some greyed-out {\em base} or {\em ghost nodes} that are empty - i.e.\ -there are no corresponding task proxies currently present in the pool. Base -nodes just flesh out the graph structure. Groups of them may be cut out and -replaced by single {\em scissor nodes} in sections of the graph that are -currently inactive. - - -\subsection{Network Connection Timeouts} - -A connection timeout can be set in site and user global config files -(see~\ref{SiteAndUserConfiguration}) so that messaging commands -cannot hang indefinitely if the suite is not responding (this can be -caused by suspending a suite with Ctrl-Z) thereby preventing the task -from completing. The same can be done on the command line for other -suite-connecting user commands, with the \lstinline=--comms-timeout= option. - -\subsection{Runahead Limiting} -\label{RunaheadLimit} - -Runahead limiting prevents the fastest tasks in a suite from getting too far -ahead of the slowest ones. Newly spawned tasks are released to the task pool -only when they fall below the runahead limit. A low runhead limit can prevent -cylc from interleaving cycles, but it will not stall a suite unless it fails to -extend out past a future trigger (see~\ref{InterCyclePointTriggers}). -A high runahead limit may allow fast tasks that are not constrained by -dependencies or clock-triggers to spawn far ahead of the pack, which could have -performance implications for the suite server program when running very large -suites. Succeeded and failed tasks are ignored when computing the runahead -limit. - -The preferred runahead limiting mechanism restricts the number of consecutive -active cycle points. The default value is three active cycle points; -see~\ref{max active cycle points}. Alternatively the interval between the -slowest and fastest tasks can be specified as hard limit; -see~\ref{runahead limit}. - -\subsection{Limiting Activity With Internal Queues} -\label{InternalQueues} - -Large suites can potentially overwhelm task hosts by submitting too many -tasks at once. You can prevent this with {\em internal queues}, which -limit the number of tasks that can be active (submitted or running) -at the same time. - -Internal queues behave in the first-in-first-out (FIFO) manner, i.e.\ tasks are -released from a queue in the same order that they were queued. - -A queue is defined by a {\em name}; a {\em limit}, which is the maximum -number of active tasks allowed for the queue; and a list of {\em members}, -assigned by task or family name. - -Queue configuration is done under the [scheduling] section of the suite.rc file -(like dependencies, internal queues constrain {\em when} a task runs). - -By default every task is assigned to the {\em default} queue, which by default -has a zero limit (interpreted by cylc as no limit). To use a single queue for -the whole suite just set the default queue limit: -\lstset{language=suiterc} -\begin{lstlisting} -[scheduling] - [[ queues]] - # limit the entire suite to 5 active tasks at once - [[[default]]] - limit = 5 -\end{lstlisting} -To use additional queues just name each one, set their limits, and assign -members: -\begin{lstlisting} -[scheduling] - [[ queues]] - [[[q_foo]]] - limit = 5 - members = foo, bar, baz -\end{lstlisting} -Any tasks not assigned to a particular queue will remain in the default -queue. The {\em queues} example suite illustrates how queues work by -running two task trees side by side (as seen in the graph GUI) each -limited to 2 and 3 tasks respectively: -\lstset{language=suiterc} -\lstinputlisting{../../../etc/examples/queues/suite.rc} - -\subsection{Automatic Task Retry On Failure} -\label{TaskRetries} - -See also~\ref{RefRetries} in the {\em Suite.rc Reference}. - -Tasks can be configured with a list of ``retry delay'' intervals, as -ISO 8601 durations. If the task job fails it will go into the {\em retrying} -state and resubmit after the next configured delay interval. An example is -shown in the suite listed below under~\ref{EventHandling}. - -If a task with configured retries is {\em killed} (by \lstinline=cylc kill= or -via the GUI) it goes to the {\em held} state so that the operator can decide -whether to release it and continue the retry sequence or to abort the retry -sequence by manually resetting it to the {\em failed} state. - -\subsection{Task Event Handling} -\label{EventHandling} - -See also~\ref{SuiteEventHandling} and~\ref{TaskEventHandling} in the {\em -Suite.rc Reference}. - -Cylc can call nominated event handlers - to do whatever you like - when certain -suite or task events occur. This facilitates centralized alerting and automated -handling of critical events. Event handlers can be used to send a message, call -a pager, or whatever; they can even intervene in the operation of their own -suite using cylc commands. - -To send an email, use the built-in setting \lstinline=[[[events]]]mail events= -to specify a list of events for which notifications should be sent. (The name of -a registered task output can also be used as an event name in this case.) E.g.\ to -send an email on (submission) failed and retry: - -\lstset{language=suiterc} -\begin{lstlisting} -[runtime] - [[foo]] - script = """ - test ${CYLC_TASK_TRY_NUMBER} -eq 3 - cylc message -- "${CYLC_SUITE_NAME}" "${CYLC_TASK_JOB}" 'oopsy daisy' - """ - [[[events]]] - mail events = submission failed, submission retry, failed, retry, oops - [[[job]]] - execution retry delays = PT0S, PT30S - [[[outputs]]] - oops = oopsy daisy -\end{lstlisting} - -By default, the emails will be sent to the current user with: - -\begin{myitemize} - \item \lstinline=to:= set as \lstinline=$USER= - \item \lstinline=from:= set as \lstinline=notifications@$(hostname)= - \item SMTP server at \lstinline=localhost:25= -\end{myitemize} - -These can be configured using the settings: -\begin{myitemize} - \item \lstinline=[[[events]]]mail to= (list of email addresses), - \item \lstinline=[[[events]]]mail from= - \item \lstinline=[[[events]]]mail smtp=. -\end{myitemize} - -By default, a cylc suite will send you no more than one task event email every -5 minutes - this is to prevent your inbox from being flooded by emails should a -large group of tasks all fail at similar time. -See ~\ref{task-event-mail-interval} for details. - -Event handlers can be located in the suite \lstinline=bin/= directory; -otherwise it is up to you to ensure their location is in \lstinline=$PATH= (in -the shell in which the suite server program runs). They should require little -resource and return quickly - see~\ref{Managing External Command Execution}. - -Task event handlers can be specified using the -\lstinline=[[[events]]] handler= settings, where -\lstinline== is one of: -\begin{myitemize} - \item `submitted' - the job submit command was successful - \item `submission failed' - the job submit command failed - \item `submission timeout' - task job submission timed out - \item `submission retry' - task job submission failed, but will retry after - a configured delay - \item `started' - the task reported commencement of execution - \item `succeeded' - the task reported successful completion - \item `warning' - the task reported a WARNING severity message - \item `critical' - the task reported a CRITICAL severity message - \item `custom' - the task reported a CUSTOM severity message - \item `late' - the task is never active and is late - \item `failed' - the task failed - \item `retry' - the task failed but will retry after a configured delay - \item `execution timeout' - task execution timed out -\end{myitemize} - -The value of each setting should be a list of command lines or command line -templates (see below). - -Alternatively you can use \lstinline=[[[events]]]handlers= and -\lstinline=[[[events]]]handler events=, where the former is a list of command -lines or command line templates (see below) and the latter is a list of events -for which these commands should be invoked. (The name of a registered task -output can also be used as an event name in this case.) - -Event handler arguments can be constructed from various templates -representing suite name; task ID, name, cycle point, message, and submit -number name; and any suite or task [meta] item. See~\ref{SuiteEventHandling} -and~\ref{TaskEventHandling} for options. - -If no template arguments are supplied the following default command line -will be used: -\begin{lstlisting} - %(event)s %(suite)s %(id)s %(message)s -\end{lstlisting} - -{\em Note: substitution patterns should not be quoted in the template strings. -This is done automatically where required.} - -For an explanation of the substitution syntax, see -\href{https://docs.python.org/2/library/stdtypes.html#string-formatting}{String Formatting Operations} -in the Python documentation. - -The retry event occurs if a task fails and has any remaining retries -configured (see~\ref{TaskRetries}). -The event handler will be called as soon as the task fails, not after -the retry delay period when it is resubmitted. - -{\em Note that event handlers are called by the suite server program, not by -task jobs.} If you wish to pass additional information to them use [cylc] -\textrightarrow [[environment]], not task runtime environment. - -The following 2 \lstinline=suite.rc= snippets are examples on how to specify -event handlers using the alternate methods: - -\lstset{language=suiterc} -\begin{lstlisting} -[runtime] - [[foo]] - script = test ${CYLC_TASK_TRY_NUMBER} -eq 2 - [[[events]]] - retry handler = "echo '!!!!!EVENT!!!!!' " - failed handler = "echo '!!!!!EVENT!!!!!' " - [[[job]]] - execution retry delays = PT0S, PT30S -\end{lstlisting} - -\lstset{language=suiterc} -\begin{lstlisting} -[runtime] - [[foo]] - script = """ - test ${CYLC_TASK_TRY_NUMBER} -eq 2 - cylc message -- "${CYLC_SUITE_NAME}" "${CYLC_TASK_JOB}" 'oopsy daisy' - """ - [[[events]]] - handlers = "echo '!!!!!EVENT!!!!!' " - # Note: task output name can be used as an event in this method - handler events = retry, failed, oops - [[[job]]] - execution retry delays = PT0S, PT30S - [[[outputs]]] - oops = oopsy daisy -\end{lstlisting} -The handler command here - specified with no arguments - is called with the -default arguments, like this: -\begin{lstlisting} -echo '!!!!!EVENT!!!!!' %(event)s %(suite)s %(id)s %(message)s -\end{lstlisting} - -\subsubsection{Late Events} -\label{Late Events} - -You may want to be notified when certain tasks are running late in a real time -production system - i.e.\ when they have not triggered by {\em the usual time}. -Tasks of primary interest are not normally clock-triggered however, so their -trigger times are mostly a function of how the suite runs in its environment, -and even external factors such as contention with other suites.\footnote{Late -notification of clock-triggered tasks is not very useful in any case because -they typically do not depend on other tasks, and as such they can often trigger -on time even if the suite is delayed to the point that downstream tasks are -late due to their dependence on previous-cycle tasks that are delayed.} - -But if your system is reasonably stable from one cycle to the next such that a -given task has consistently triggered by some interval beyond its cycle point, -you can configure Cylc to emit a {\em late event} if it has not triggered by -that time. For example, if a task \lstinline=forecast= normally triggers by 30 -minutes after its cycle point, configure late notification for it like this: - -\begin{lstlisting} -[runtime] - [[forecast]] - script = run-model.sh - [[[events]]] - late offset = PT30M - late handler = my-handler %(message)s -\end{lstlisting} - -{\em Late offset intervals are not computed automatically so be careful -to update them after any change that affects triggering times.} - -Note that Cylc can only check for lateness in tasks that it is currently aware -of. If a suite gets delayed over many cycles the next tasks coming up can be -identified as late immediately, and subsequent tasks can be identified as late -as the suite progresses to subsequent cycle points, until it catches up to the -clock. - -\subsection{Managing External Command Execution} -\label{Managing External Command Execution} - -Job submission commands, event handlers, and job poll and kill commands, are -executed by the suite server program in a ``pool'' of asynchronous -subprocesses, in order to avoid holding the suite up. The process pool is -actively managed to limit it to a configurable size (\ref{process pool size}). -Custom event handlers should be light-weight and quick-running because they -will tie up a process pool member until they complete, and the suite will -appear to stall if the pool is saturated with long-running processes. Processes -are killed after a configurable timeout (\ref{process pool timeout}) however, -to guard against rogue commands that hang indefinitely. All process kills are -logged by the suite server program. For killed job submissions the associated -tasks also go to the {\em submit-failed} state. - -\subsection{Handling Job Preemption} -\label{PreemptionHPC} - -Some HPC facilities allow job preemption: the resource manager can kill -or suspend running low priority jobs in order to make way for high -priority jobs. The preempted jobs may then be automatically restarted -by the resource manager, from the same point (if suspended) or requeued -to run again from the start (if killed). - -Suspended jobs will poll as still running (their job status file says they -started running, and they still appear in the resource manager queue). -Loadleveler jobs that are preempted by kill-and-requeue ("job vacation") are -automatically returned to the submitted state by Cylc. This is possible -because Loadleveler sends the SIGUSR1 signal before SIGKILL for preemption. -Other batch schedulers just send SIGTERM before SIGKILL as normal, so Cylc -cannot distinguish a preemption job kill from a normal job kill. After this the -job will poll as failed (correctly, because it was killed, and the job status -file records that). To handle this kind of preemption automatically you could -use a task failed or retry event handler that queries the batch scheduler queue -(after an appropriate delay if necessary) and then, if the job has been -requeued, uses \lstinline=cylc reset= to reset the task to the submitted state. - -\subsection{Manual Task Triggering and Edit-Run} - -Any task proxy currently present in the suite can be manually triggered at any -time using the \lstinline=cylc trigger= command, or from the right-click task -menu in gcylc. If the task belongs to a limited internal queue -(see~\ref{InternalQueues}), this will queue it; if not, or if it is already -queued, it will submit immediately. - -With \lstinline=cylc trigger --edit= (also in the gcylc right-click task menu) -you can edit the generated task job script to make one-off changes before the -task submits. - -\subsection{Cylc Broadcast} -\label{cylc-broadcast} - -The \lstinline=cylc broadcast= command overrides \lstinline=[runtime]= -settings in a running suite. This can -be used to communicate information to downstream tasks by broadcasting -environment variables (communication of information from one task to -another normally takes place via the filesystem, i.e.\ the input/output -file relationships embodied in inter-task dependencies). Variables (and -any other runtime settings) may be broadcast to all subsequent tasks, -or targeted specifically at a specific task, all subsequent tasks with a -given name, or all tasks with a given cycle point; see broadcast command help -for details. - -Broadcast settings targeted at a specific task ID or cycle point expire and -are forgotten as the suite moves on. Un-targeted variables and those -targeted at a task name persist throughout the suite run, even across -restarts, unless manually cleared using the broadcast command - and so -should be used sparingly. - -\subsection{The Meaning And Use Of Initial Cycle Point} - -When a suite is started with the \lstinline=cylc run= command (cold or -warm start) the cycle point at which it starts can be given on the command -line or hardwired into the suite.rc file: -\begin{lstlisting} -cylc run foo 20120808T06Z -\end{lstlisting} -or: -\begin{lstlisting} -[scheduling] - initial cycle point = 20100808T06Z -\end{lstlisting} -An initial cycle given on the command line will override one in the -suite.rc file. - -\subsubsection[CYLC\_SUITE\_INITIAL\_CYCLE\_POINT]{The Environment Variable CYLC\_SUITE\_INITIAL\_CYCLE\_POINT} - -In the case of a {\em cold start only} the initial cycle point is passed -through to task execution environments as -\lstinline=$CYLC_SUITE_INITIAL_CYCLE_POINT=. The value is then stored in -suite database files and persists across restarts, but it does get wiped out -(set to \lstinline=None=) after a warm start, because a warm start is really an -implicit restart in which all state information is lost (except that the -previous cycle is assumed to have completed). - -The \lstinline=$CYLC_SUITE_INITIAL_CYCLE_POINT= variable allows tasks to -determine if they are running in the initial cold-start cycle point, when -different behaviour may be required, or in a normal mid-run cycle point. -Note however that an initial \lstinline=R1= graph section is now the preferred -way to get different behaviour at suite start-up. - -\subsection{Simulating Suite Behaviour} -\label{SimulationMode} - -Several suite run modes allow you to simulate suite behaviour quickly without -running the suite's real jobs - which may be long-running and resource-hungry: - -\begin{myitemize} - \item {\em dummy mode} - runs dummy tasks as background jobs on configured - job hosts. - \begin{myitemize} - \item simulates scheduling, job host connectivity, and - generates all job files on suite and job hosts. - \end{myitemize} - \item {\em dummy-local mode} - runs real dummy tasks as background jobs on - the suite host, which allows dummy-running suites from other sites. - \begin{myitemize} - \item simulates scheduling and generates all job files on the - suite host. - \end{myitemize} - \item {\em simulation mode} - does not run any real tasks. - \begin{myitemize} - \item simulates scheduling without generating any job files. - \end{myitemize} -\end{myitemize} - -Set the run mode (default {\em live}) in the GUI suite start dialog box, or on -the command line: -\lstset{language=transcript} -\begin{lstlisting} -$ cylc run --mode=dummy SUITE -$ cylc restart --mode=dummy SUITE -\end{lstlisting} - -You can get specified tasks to fail in these modes, for more flexible suite -testing. See Section~\ref{suiterc-sim-config} for simulation configuration. - -\subsubsection{Proportional Simulated Run Length} - -If task \lstinline=[job]execution time limit= is set, Cylc divides it by -\lstinline=[simulation]speedup factor= (default \lstinline=10.0=) to compute -simulated task run lengths (default 10 seconds). - -\subsubsection{Limitations Of Suite Simulation} - -Dummy mode ignores batch scheduler settings because Cylc does not know which -job resource directives (requested memory, number of compute nodes, etc.) would -need to be changed for the dummy jobs. If you need to dummy-run jobs on a -batch scheduler manually comment out \lstinline=script= items and modify -directives in your live suite, or else use a custom live mode test suite. - -Note that the dummy modes ignore all configured task \lstinline=script= items -including \lstinline=init-script=. If your \lstinline=init-script= is required -to run even dummy tasks on a job host, note that host environment setup should -be done elsewhere - see~\ref{Configure Site Environment on Job Hosts}. - -\subsubsection{Restarting Suites With A Different Run Mode?} - -The run mode is recorded in the suite run database files. Cylc will not let -you {\em restart} a non-live mode suite in live mode, or vice versa. To -test a live suite in simulation mode just take a quick copy of it and run the -the copy in simulation mode. - -\subsection{Automated Reference Test Suites} -\label{AutoRefTests} - -Reference tests are finite-duration suite runs that abort with non-zero -exit status if any of the following conditions occur (by default): - -\begin{myitemize} - \item cylc fails - \item any task fails - \item the suite times out (e.g.\ a task dies without reporting failure) - \item a nominated shutdown event handler exits with error status -\end{myitemize} - -The default shutdown event handler for reference tests is -\lstinline=cylc hook check-triggering= which compares task triggering -information (what triggers off what at run time) in the test run suite -log to that from an earlier reference run, disregarding the timing and -order of events - which can vary according to the external queueing -conditions, runahead limit, and so on. - -To prepare a reference log for a suite, run it with the -\lstinline=--reference-log= option, and manually verify the -correctness of the reference run. - -To reference test a suite, just run it (in dummy mode for the most -comprehensive test without running real tasks) with the -\lstinline=--reference-test= option. - -A battery of automated reference tests is used to test cylc before -posting a new release version. Reference tests can also be used to check that -a cylc upgrade will not break your own complex -suites - the triggering check will catch any bug that causes a task to -run when it shouldn't, for instance; even in a dummy mode reference -test the full task job script (sans \lstinline=script= items) executes on the -proper task host by the proper batch system. - -Reference tests can be configured with the following settings: -\lstset{language=suiterc} -\begin{lstlisting} -[cylc] - [[reference test]] - suite shutdown event handler = cylc check-triggering - required run mode = dummy - allow task failures = False - live mode suite timeout = PT5M - dummy mode suite timeout = PT2M - simulation mode suite timeout = PT2M -\end{lstlisting} - -\subsubsection{Roll-your-own Reference Tests} - -If the default reference test is not sufficient for your needs, firstly -note that you can override the default shutdown event handler, and -secondly that the \lstinline=--reference-test= option is merely a short -cut to the following suite.rc settings which can also be set manually if -you wish: - -\lstset{language=suiterc} -\begin{lstlisting} -[cylc] - abort if any task fails = True - [[events]] - shutdown handler = cylc check-triggering - timeout = PT5M - abort if shutdown handler fails = True - abort on timeout = True -\end{lstlisting} - -\subsection{Triggering Off Of Tasks In Other Suites} -\label{SuiteStatePolling} - -{\em NOTE: please read {\em External Triggers} (\ref{External Triggers}) before -using the older inter-suite triggering mechanism described in this section.} - -The \lstinline=cylc suite-state= command interrogates suite run databases. It -has a polling mode that waits for a given task in the target suite to achieve a -given state, or receive a given message. This can be used to make task -scripting wait for a remote task to succeed (for example). - -Automatic suite-state polling tasks can be defined with in the graph. They get -automatically-generated task scripting that uses \lstinline=cylc suite-state= -appropriately (it is an error to give your own \lstinline=script= item for these -tasks). - -Here's how to trigger a task \lstinline=bar= off a task \lstinline=foo= in -a remote suite called \lstinline=other.suite=: -\begin{lstlisting} -[scheduling] - [[dependencies]] - [[[T00, T12]]] - graph = "my-foo => bar" -\end{lstlisting} -Local task \lstinline=my-foo= will poll for the success of \lstinline=foo= -in suite \lstinline=other.suite=, at the same cycle point, succeeding only when -or if it succeeds. Other task states can also be polled: -\begin{lstlisting} - graph = "my-foo => bar" -\end{lstlisting} - -The default polling parameters (e.g.\ maximum number of polls and the interval -between them) are printed by \lstinline=cylc suite-state --help= and can be -configured if necessary under the local polling task runtime section: -\begin{lstlisting} -[scheduling] - [[ dependencies]] - [[[T00,T12]]] - graph = "my-foo => bar" -[runtime] - [[my-foo]] - [[[suite state polling]]] - max-polls = 100 - interval = PT10S -\end{lstlisting} - -To poll for the target task to receive a message rather than achieve a state, -give the message in the runtime configuration (in which case the task status -inferred from the graph syntax will be ignored): - -\begin{lstlisting} -[runtime] - [[my-foo]] - [[[suite state polling]]] - message = "the quick brown fox" -\end{lstlisting} - -For suites owned by others, or those with run databases in non-standard -locations, use the \lstinline=--run-dir= option, or in-suite: -\begin{lstlisting} -[runtime] - [[my-foo]] - [[[suite state polling]]] - run-dir = /path/to/top/level/cylc/run-directory -\end{lstlisting} - -If the remote task has a different cycling sequence, just arrange for the -local polling task to be on the same sequence as the remote task that it -represents. For instance, if local task \lstinline=cat= cycles 6-hourly at -\lstinline=0,6,12,18= but needs to trigger off a remote task \lstinline=dog= -at \lstinline=3,9,15,21=: -\begin{lstlisting} -[scheduling] - [[dependencies]] - [[[T03,T09,T15,T21]]] - graph = "my-dog" - [[[T00,T06,T12,T18]]] - graph = "my-dog[-PT3H] => cat" -\end{lstlisting} - -For suite-state polling, the cycle point is automatically converted to the -cycle point format of the target suite. - -The remote suite does not have to be running when polling commences because the -command interrogates the suite run database, not the suite server program. - -Note that the graph syntax for suite polling tasks cannot be combined with -cycle point offsets, family triggers, or parameterized task notation. This does -not present a problem because suite polling tasks can be put on the same -cycling sequence as the remote-suite target task (as recommended above), and -there is no point in having multiple tasks (family members or parameterized -tasks) performing the same polling operation. Task state triggers can be used -with suite polling, e.g.\ to trigger another task if polling fails after 10 -tries at 10 second intervals: - -\begin{lstlisting} -[scheduling] - [[dependencies]] - graph = "poller:fail => another-task" -[runtime] - [[my-foo]] - [[[suite state polling]]] - max-polls = 10 - interval = PT10S -\end{lstlisting} - -\subsection{Suite Server Logs} -\label{Suite Server Logs} - -Each suite maintains its own log of time-stamped events under the {\em suite -server log directory}: - -\begin{lstlisting} -$HOME/cylc-run/SUITE-NAME/log/suite/ -\end{lstlisting} - -By way of example, we will show the complete server log generated (at -cylc-7.2.0) by a small suite that runs two 30-second dummy tasks -\lstinline=foo= and \lstinline=bar= for a single cycle point -\lstinline=2017-01-01T00Z= before shutting down: - -\lstset{language=suiterc,breaklines=true} -\begin{lstlisting} -[cylc] - cycle point format = %Y-%m-%dT%HZ -[scheduling] - initial cycle point = 2017-01-01T00Z - final cycle point = 2017-01-01T00Z - [[dependencies]] - graph = "foo => bar" -[runtime] - [[foo]] - script = sleep 30; /bin/false - [[bar]] - script = sleep 30; /bin/true -\end{lstlisting} - -By the task scripting defined above, this suite will stall when \lstinline=foo= -fails. Then, the suite owner {\em vagrant@cylon} manually resets the failed -task's state to {\em succeeded}, allowing \lstinline=bar= to trigger and the -suite to finish and shut down. Here's the complete suite log for this run: - -\lstset{language=transcript} -\begin{lstlisting} -$ cylc cat-log SUITE-NAME -2017-03-30T09:46:10Z INFO - Suite starting: server=localhost:43086 pid=3483 -2017-03-30T09:46:10Z INFO - Run mode: live -2017-03-30T09:46:10Z INFO - Initial point: 2017-01-01T00Z -2017-03-30T09:46:10Z INFO - Final point: 2017-01-01T00Z -2017-03-30T09:46:10Z INFO - Cold Start 2017-01-01T00Z -2017-03-30T09:46:11Z INFO - [foo.2017-01-01T00Z] -submit_method_id=3507 -2017-03-30T09:46:11Z INFO - [foo.2017-01-01T00Z] -submission succeeded -2017-03-30T09:46:11Z INFO - [foo.2017-01-01T00Z] -(current:submitted)> started at 2017-03-30T09:46:10Z -2017-03-30T09:46:41Z CRITICAL - [foo.2017-01-01T00Z] -(current:running)> failed/EXIT at 2017-03-30T09:46:40Z -2017-03-30T09:46:42Z WARNING - suite stalled -2017-03-30T09:46:42Z WARNING - Unmet prerequisites for bar.2017-01-01T00Z: -2017-03-30T09:46:42Z WARNING - * foo.2017-01-01T00Z succeeded -2017-03-30T09:47:58Z INFO - [client-command] reset_task_states vagrant@cylon:cylc-reset 1e0d8e9f-2833-4dc9-a0c8-9cf263c4c8c3 -2017-03-30T09:47:58Z INFO - [foo.2017-01-01T00Z] -resetting state to succeeded -2017-03-30T09:47:58Z INFO - Command succeeded: reset_task_states([u'foo.2017'], state=succeeded) -2017-03-30T09:47:59Z INFO - [bar.2017-01-01T00Z] -submit_method_id=3565 -2017-03-30T09:47:59Z INFO - [bar.2017-01-01T00Z] -submission succeeded -2017-03-30T09:47:59Z INFO - [bar.2017-01-01T00Z] -(current:submitted)> started at 2017-03-30T09:47:58Z -2017-03-30T09:48:29Z INFO - [bar.2017-01-01T00Z] -(current:running)> succeeded at 2017-03-30T09:48:28Z -2017-03-30T09:48:30Z INFO - Waiting for the command process pool to empty for shutdown -2017-03-30T09:48:30Z INFO - Suite shutting down - AUTOMATIC -\end{lstlisting} - -The information logged here includes: - -\begin{myitemize} - \item event timestamps, at the start of each line - \item suite server host, port and process ID - \item suite initial and final cycle points - \item suite start type (cold start in this case) - \item task events (task started, succeeded, failed, etc.) - \item suite stalled warning (in this suite nothing else can run when - \lstinline=foo= fails) - \item the client command issued by {\em vagrant@cylon} to reset - \lstinline=foo= to {\em succeeded} - \item job IDs - in this case process IDs for background jobs (or PBS job IDs - etc.) - \item state changes due to incoming task progress message ("started at ..." - etc.) suite shutdown time and reasons (AUTOMATIC means "all tasks finished - and nothing else to do") -\end{myitemize} - -Note that suite log files are primarily intended for human eyes. If you need -to have an external system to monitor suite events automatically, interrogate -the sqlite {\em suite run database} (see~\ref{Suite Run -Databases}) rather than parse the log files. - -\subsection{Suite Run Databases} -\label{Suite Run Databases} - -Suite server programs maintain two \lstinline=sqlite= databases to record -restart checkpoints and various other aspects of run history: - -\lstset{language=transcript} -\begin{lstlisting} -$HOME/cylc-run/SUITE-NAME/log/db # public suite DB -$HOME/cylc-run/SUITE-NAME/.service/db # private suite DB -\end{lstlisting} - -The private DB is for use only by the suite server program. The identical -public DB is provided for use by external commands such as -\lstinline=cylc suite-state=, \lstinline=cylc ls-checkpoints=, and -\lstinline=cylc report-timings=. If the public DB gets locked for too long by -an external reader, the suite server program will eventually delete it and -replace it with a new copy of the private DB, to ensure that both correctly -reflect the suite state. - -You can interrogate the public DB with the \lstinline=sqlite3= command line tool, -the \lstinline=sqlite3= module in the Python standard library, or any other -sqlite interface. - -\begin{lstlisting} -$ sqlite3 ~/cylc-run/foo/log/db << _END_ -> .headers on -> select * from task_events where name is "foo"; -> _END_ -name|cycle|time|submit_num|event|message -foo|1|2017-03-12T11:06:09Z|1|submitted| -foo|1|2017-03-12T11:06:09Z|1|output completed|started -foo|1|2017-03-12T11:06:09Z|1|started| -foo|1|2017-03-12T11:06:19Z|1|output completed|succeeded -foo|1|2017-03-12T11:06:19Z|1|succeeded| -\end{lstlisting} - -\subsection{Disaster Recovery} -\label{Disaster Recovery} - -If a suite run directory gets deleted or corrupted, the options for recovery -are: -\begin{myitemize} - \item restore the run directory from back-up, and restart the suite - \item re-install from source, and warm start from the beginning of the - current cycle point -\end{myitemize} - -A warm start (see~\ref{Warm Start}) does not need a suite state checkpoint, but -it wipes out prior run history, and it could re-run a significant number of -tasks that had already completed. - -To restart the suite, the critical Cylc files that must be restored are: - -\lstset{language=transcript} -\begin{lstlisting} -# On the suite host: -~/cylc-run/SUITE-NAME/ - suite.rc # live suite configuration (located here in Rose suites) - log/db # public suite DB (can just be a copy of the private DB) - log/rose-suite-run.conf # (needed to restart a Rose suite) - .service/db # private suite DB - .service/source -> PATH-TO-SUITE-DIR # symlink to live suite directory - -# On job hosts (if no shared filesystem): -~/cylc-run/SUITE-NAME/ - log/job/CYCLE-POINT/TASK-NAME/SUBMIT-NUM/job.status -\end{lstlisting} - -{\em Note this discussion does not address restoration of files generated and -consumed by task jobs at run time}. How suite data is stored and recovered in -your environment is a matter of suite and system design. - -In short, you can simply restore the suite service directory, the log -directory, and the suite.rc file that is the target of the symlink in the -service directory. The service and log directories will come with extra files -that aren't strictly needed for a restart, but that doesn't matter - although -depending on your log housekeeping the \lstinline=log/job= directory could be -huge, so you might want to be selective about that. (Also in a Rose suite, the -\lstinline=suite.rc= file does not need to be restored if you restart with -\lstinline=rose suite-run= - which re-installs suite source files to the run -directory). - -The public DB is not strictly required for a restart - the suite server program -will recreate it if need be - but it is required by -\lstinline=cylc ls-checkpoints= if you need to identify the right restart -checkpoint. - -The job status files are only needed if the restart suite state checkpoint -contains active tasks that need to be polled to determine what happened to them -while the suite was down. Without them, polling will fail and those tasks will -need to be manually set to the correct state. - -{\em WARNING: it is not safe to copy or rsync a potentially-active sqlite DB - -the copy might end up corrupted. It is best to stop the suite before copying -a DB, or else write a back-up utility using the official sqlite backup API: -\url{http://www.sqlite.org/backup.html}.} - -\subsection{Auto Stop-Restart} -\label{auto-stop-restart} - -Cylc has the ability to automatically stop suites running on a particular host -and optionally, restart them on a different host. -This is useful if a host needs to be taken off-line e.g.\ for -scheduled maintenance. - -This functionality is configured via the following site configuration settings: -\begin{myitemize} - \item \lstinline=[run hosts][suite servers]auto restart delay= - \item \lstinline=[run hosts][suite servers]condemned hosts= - \item \lstinline=[run hosts][suite servers]run hosts= -\end{myitemize} - -The auto stop-restart feature has two modes: - -\begin{description} - \item[Normal Mode] \hfill - - When a host is added to the \lstinline=condemned hosts= list, any suites - running on that host will automatically shutdown then restart selecting a - new host from \lstinline=run hosts=. - - For safety, before attempting to stop the suite cylc will first wait for any - jobs running locally (under background or at) to complete. - - {\em In order for Cylc to be able to successfully restart suites the - \lstinline=run hosts= must all be on a shared filesystem.} - - \item[Force Mode] \hfill - - If a host is suffixed with an exclamation mark then Cylc will not attempt - to automatically restart the suite and any local jobs (running under - background or at) will be left running. -\end{description} - -For example in the following configuration any suites running on -\lstinline=foo= will attempt to restart on \lstinline=pub= whereas any suites -running on \lstinline=bar= will stop immediately, making no attempt to restart. - -\begin{lstlisting} -[suite servers] - run hosts = pub - condemned hosts = foo, bar! -\end{lstlisting} - -To prevent large numbers of suites attempting to restart simultaneously the -\lstinline=auto restart delay= setting defines a period of time in seconds. -Suites will wait for a random period of time between zero and -\lstinline=auto restart delay= seconds before attempting to stop and restart. - -At present the auto shutdown-restart functionality can only operate provided -that the user hasn't specified any behaviour which is not preserved by -\lstinline=cylc restart= (e.g.\ user specified hold point or run mode). This -caveat will be removed in a future version, currently Cylc will not attempt to -auto shutdown-restart suites which meet this criterion but will log a critical -error message to alert the user. - -See the \lstinline=[suite servers]= configuration section -(\ref{global-suite-servers}) for more details. - -\section{Suite Storage, Discovery, Revision Control, and Deployment} -\label{SuiteStorageEtc} - -Small groups of cylc users can of course share suites by manual copying, -and generic revision control tools can be used on cylc suites as for any -collection of files. Beyond this cylc does not have a built-in solution -for suite storage and discovery, revision control, and deployment, on a -network. That is not cylc's core purpose, and large sites may have -preferred revision control systems and suite meta-data requirements that -are difficult to anticipate. We can, however, recommend the use of {\em -Rose} to do all of this very easily and elegantly with cylc suites. - -\subsection{Rose} -\label{Rose} - -{\bf Rose} is {\em a framework for managing and running suites of -scientific applications}, developed at the UK Met Office for use with -cylc. It is available under the open source GPL license. - -\begin{myitemize} - \item Rose documentation: \url{http://metomi.github.io/rose/doc/rose.html} - \item Rose source repository: \url{https://github.com/metomi/rose} -\end{myitemize} - -\pagebreak - -\appendix - -\input{suiterc.tex} -\pagebreak - -\input{siterc.tex} - -\pagebreak - -\input{gcylcrc.tex} - -\pagebreak - -\input{gscanrc.tex} - -\pagebreak - -\input{job-host-1.tex} -\input{job-host-2.tex} - -\pagebreak - - -\section{Command Reference} -\label{CommandReference} - -%This section is auto-generated from the self-documenting command set. - -\lstset{language=usage} -\input{commands.tex} -\lstset{language=transcript} - -\section{The gcylc Graph View} -\label{TheGraphBasedcontrolGUI} - -The graph view in the gcylc GUI shows the structure of the suite as it -evolves. It can work well even for large suites, but be aware that the -graphviz layout engine has to do a new global layout every time a task -proxy appears in or disappears from the task pool. The following may help -mitigate any jumping layout problems: - -\begin{myitemize} - \item The disconnect button can be used to temporarily prevent the - graph from changing as the suite evolves. - \item The greyed-out base nodes, which are only present to fill out - the graph structure, can be toggled off (but this will split the - graph into disconnected sub-trees). - \item Right-click on a task and choose the ``Focus'' option to restrict - the graph display to that task's cycle point. Anything interesting - happening in other cycle points will show up as disconnected - rectangular nodes to the right of the graph (and you can click on - those to instantly refocus to their cycle points). - \item Task filtering is the ultimate quick route to focusing on just - the tasks you're interested in, but this will destroy the graph - structure. -\end{myitemize} - -\section{Cylc README File} - -\lstinputlisting{../../../README.md} - -\section{Cylc INSTALL File} -\label{INSTALL} - -\lstinputlisting{../../../INSTALL.md} - -\section{Cylc Development History - Major Changes} - -\begin{myitemize} - - \item {\bf pre-cylc-3} - early versions focused on the new - scheduling algorithm. A suite was a collection of ``task definition - files'' that encoded the prerequisites and outputs of each task, - exposing cylc's self-organising nature. Tasks could be transferred - from one suite to another by simply copying their taskdef files over - and checking prerequisite and output consistency. Global suite - structure was not easy to discern until run time (although cylc-2 - could generate resolved run time dependency graphs). - - \item {\bf cylc-3} - a new suite design interface: dependency graph - and task runtime properties defined in a single structured, - validated, configuration file - the suite.rc file; graphical user - interface; suite graphing. - - \item {\bf cylc-4} - refined and organized the suite.rc file - structure; task runtime properties defined by an efficient - inheritance hierarchy; support for the Jinja2 template processor in - suite configurations. - - \item {\bf cylc-5} - multi-threading for continuous network request - handling and job submission; more task states to distinguish job - submission from execution; dependence between suites via new suite - run databases; polling and killing of real task jobs; polling as - task communications option. - - \item {\bf cylc-6} - specification of all date-times and cycling - workflows via the ISO8601 date-times, durations, and recurrence - expressions; integer cycling; a multi-process pool to execute job - submissions, event handlers, and poll and kill commands. - - \item {\bf cylc-7} - Replaced the Pyro communications layer with - RESTful HTTPS. Removed deprecated pre cylc-6 syntax and features. - -\end{myitemize} - -\section{Communication Method} -\label{Communication} - -Cylc suite server programs and clients (commands, cylc gui, task messaging) -communicate via particular ports using the HTTPS protocol, secured -by HTTP Digest Authentication using the suite's 20-random-character -private passphrase and private SSL certificate. - -This is enabled via the included-in-cylc cherrypy library (for the -server) and either the Python requests library (if available) or -the built-in Python libraries for the clients. - -All suites are entirely isolated from one another. - -\section{Cylc 6 Migration Reference} -\label{cylc-6-migration} - -Cylc 6 introduced new date-time-related syntax for the suite.rc file. In -some places, this is quite radically different from the earlier syntax. - -\subsection{Timeouts and Delays} -\label{cylc-6-migration-timeout-delays} - -Timeouts and delays such as \lstinline=[cylc][[events]]timeout= or -\lstinline=[runtime][[my_task]][[[job]]]execution retry delays= were written in -a purely numeric form before cylc 6, in seconds, minutes (most common), or -hours, depending on the setting. - -They are now written in an ISO 8601 duration form, which has the benefit -that the units are user-selectable (use 1 day instead of 1440 minutes) -and explicit. - -Nearly all timeouts and delays in cylc were in minutes, except for:\\* -\lstinline=[runtime][[my_task]][[[suite state polling]]]interval= \\* -\lstinline=[runtime][[my_task]][[[simulation mode]]]run time range= \\* -which were in seconds, and\\* -\lstinline=[scheduling]runahead limit=\\* -which was in hours (this is a special case discussed below -in~\ref{cylc-6-migration-runahead-limit}). - -See Table \ref{cylc-6-migration-timeout-delays-table}. - -\begin{table}[ht] -\caption{Timeout/Delay Syntax Change Examples} -\centering -\begin{tabular}{ l c c } -Setting & Pre-Cylc-6 & Cylc-6+ \\ -\hline -\lstinline=[cylc][[events]]timeout= & 180 & PT3H \\ -\lstinline=[runtime][[my_task]][[[job]]]execution retry delays= & 2*30, 360, & 2*PT30M, PT6H, \\ - & 1440 & P1D \\ -\lstinline=[runtime][[my_task]][[[suite state polling]]]interval= & 2 & PT2S \\ -\end{tabular} -\label{cylc-6-migration-timeout-delays-table} -\end{table} - -\subsection{Runahead Limit} -\label{cylc-6-migration-runahead-limit} - -See~\ref{runahead limit}. - -The \lstinline=[scheduling]runahead limit= setting was written as a number of -hours in pre-cylc-6 suites. This is now in ISO 8601 format for date-time -cycling suites, so \lstinline@[scheduling]runahead limit=36@ would be written -\lstinline@[scheduling]runahead limit=PT36H@. - -There is a new preferred alternative to \lstinline=runahead limit=, -\lstinline=[scheduling]max active cycle points=. This allows the user to -configure how many cycle points can run at once (default \lstinline=3=). See -\ref{max active cycle points}. - -\subsection{Cycle Time/Cycle Point} -\label{cylc-6-migration-cycle-point} - -See~\ref{initial cycle point}. - -The following suite.rc settings have changed name (Table -\ref{cylc-6-migration-cycle-point-time-table}): - -\begin{table}[ht] -\caption{Cycle Point Renaming} -\centering -\begin{tabular}{ l l } -Pre-Cylc-6 & Cylc-6+ \\ -\hline -\lstinline=[scheduling]initial cycle time= & \lstinline=[scheduling]initial cycle point= \\ -\lstinline=[scheduling]final cycle time= & \lstinline=[scheduling]final cycle point= \\ -\lstinline=[visualization]initial cycle time= & \lstinline=[visualization]initial cycle point= \\ -\lstinline=[visualization]final cycle time= & \lstinline=[visualization]final cycle point= \\ -\end{tabular} -\label{cylc-6-migration-cycle-point-time-table} -\end{table} - -This change is to reflect the fact that cycling in cylc 6+ can now be over -e.g.\ integers instead of being purely based on date-time. - -Date-times written in \lstinline=initial cycle time= and -\lstinline=final cycle time= were in a cylc-specific 10-digit (or less) -\lstinline=CCYYMMDDhh= format, such as \lstinline=2014021400= for 00:00 on -the 14th of February 2014. - -Date-times are now required to be ISO 8601 compatible. This can be achieved -easily enough by inserting a \lstinline=T= between the day and the hour -digits. - -\begin{table}[ht] -\caption{Cycle Point Syntax Example} -\centering -\begin{tabular}{ l c c } -Setting & Pre-Cylc-6 & Cylc-6+ \\ -\hline -\lstinline=[scheduling]initial cycle time= & 2014021400 & 20140214T00 \\ -\end{tabular} -\label{cylc-6-migration-cycle-point-syntax-table} -\end{table} - -\subsection{Cycling} -\label{cylc-6-migration-cycling} - -Special {\em start-up} and {\em cold-start} tasks have been removed from cylc -6. Instead, use the initial/run-once notation as detailed -in~\ref{initial-non-repeating-r1-tasks} and~\ref{AdvancedStartingUp}. - -{\em Repeating asynchronous tasks} have also been removed because non date-time -workflows can now be handled more easily with integer cycling. See for instance -the satellite data processing example documented in~\ref{IntegerCycling}. - -For repeating tasks with hour-based cycling the syntax has only minor changes: - -Pre-cylc-6: -\lstset{language=suiterc} -\begin{lstlisting} -[scheduling] - ... - [[dependencies]] - [[[0,12]]] - graph = foo[T-12] => foo & bar => baz -\end{lstlisting} -\lstset{language=transcript} - -\lstset{language=suiterc} -\begin{lstlisting} -[scheduling] - ... - [[dependencies]] - [[[T00,T12]]] - graph = foo[-PT12H] => foo & bar => baz -\end{lstlisting} -\lstset{language=transcript} - -Hour-based cycling section names are easy enough to convert, as seen in Table -\ref{cylc-6-migration-cycling-hours-table}. - -\begin{table}[ht] -\caption{Hourly Cycling Sections} -\centering -\begin{tabular}{ l l } -Pre-Cylc-6 & Cylc-6+ \\ -\hline -\lstinline=[scheduling][[dependencies]][[[0]]]= & \lstinline=[scheduling][[dependencies]][[[T00]]]= \\ -\lstinline=[scheduling][[dependencies]][[[6]]]= & \lstinline=[scheduling][[dependencies]][[[T06]]]= \\ -\lstinline=[scheduling][[dependencies]][[[12]]]= & \lstinline=[scheduling][[dependencies]][[[T12]]]= \\ -\lstinline=[scheduling][[dependencies]][[[18]]]= & \lstinline=[scheduling][[dependencies]][[[T18]]]= \\ -\end{tabular} -\label{cylc-6-migration-cycling-hours-table} -\end{table} - -The graph text in hour-based cycling is also easy to convert, as seen in -Table \ref{cylc-6-migration-cycling-hours-offset-table}. - -\begin{table}[ht] -\caption{Hourly Cycling Offsets} -\centering -\begin{tabular}{ l l } -Pre-Cylc-6 & Cylc-6+ \\ -\hline -\lstinline=my_task[T-6]= & \lstinline=my_task[-PT6H]= \\ -\lstinline=my_task[T-12]= & \lstinline=my_task[-PT12H]= \\ -\lstinline=my_task[T-24]= & \lstinline=my_task[-PT24H]= or even \lstinline=my_task[-P1D]= \\ -\end{tabular} -\label{cylc-6-migration-cycling-hours-offset-table} -\end{table} - -\subsection{No Implicit Creation of Tasks by Offset Triggers} -\label{cylc-6-migration-implicit-cycling} - -Prior to cylc-6 intercycle offset triggers implicitly created task instances at -the offset cycle points. For example, this pre cylc-6 suite automatically -creates instances of task \lstinline=foo= at the offset hours -\lstinline=3,9,15,21= each day, for task \lstinline=bar= to trigger off at -\lstinline=0,6,12,18=: -\lstset{language=suiterc} -\begin{lstlisting} -# Pre cylc-6 implicit cycling. -[scheduling] - initial cycle time = 2014080800 - [[dependencies]] - [[[00,06,12,18]]] - # This creates foo instances at 03,09,15,21: - graph = foo[T-3] => bar -\end{lstlisting} - -Here's the direct translation to cylc-6+ format: -\lstset{language=suiterc} -\begin{lstlisting} -# In cylc-6+ this suite will stall. -[scheduling] - initial cycle point = 20140808T00 - [[dependencies]] - [[[T00,T06,T12,T18]]] - # This does NOT create foo instances at 03,09,15,21: - graph = foo[-PT3H] => bar -\end{lstlisting} - -This suite fails validation with -\lstinline=ERROR: No cycling sequences defined for foo=, -and at runtime it would stall with \lstinline=bar= instances waiting on -non-existent offset \lstinline=foo= instances (note that these -appear as ghost nodes in graph visualisations). - -To fix this, explicitly define the cycling of with an offset cycling sequence: -\lstinline=foo=: -\lstset{language=suiterc} -\begin{lstlisting} -# Cylc-6+ requires explicit task instance creation. -[scheduling] - initial cycle point = 20140808T00 - [[dependencies]] - [[[T03,T09,T15,T21]]] - graph = foo - [[[T00,T06,T12,T18]]] - graph = foo[-PT3H] => bar -\end{lstlisting} - -Implicit task creation by offset triggers is no longer allowed because it is -error prone: a mistaken task cycle point offset should cause a failure -rather than automatically creating task instances on the wrong cycling -sequence. - -\section{Known Issues} -\label{KnownIssues} - -\subsection{Current Known Issues} -\label{CurrentKnownIssues} - -The best place to find current known issues is on Github: -\url{https://github.com/cylc/cylc/issues}. - -\subsection{Notable Known Issues} -\label{NotableKnownIssues} - -\subsubsection{Use of pipes in job scripts} -\label{PipeInJobScripts} - -In bash, the return status of a pipeline is normally the exit status of the -last command. This is unsafe, because if any command in the pipeline fails, the -script will continue nevertheless. - -For safety, a cylc task job script running in bash will have the -\lstinline=set -o pipefail= option turned on automatically. If a pipeline -exists in a task's \lstinline=script=, etc section, the failure of any part of -a pipeline will cause the command to return a non-zero code at the end, which -will be reported as a task job failure. Due to the unique nature of a pipeline, -the job file will trap the failure of the individual commands, as well as the -whole pipeline, and will attempt to report a failure back to the suite twice. -The second message is ignored by the suite, and so the behaviour can be safely -ignored. (You should probably still investigate the failure, however!) - -\section{GNU GENERAL PUBLIC LICENSE v3.0} -\input{gpl-3.0} diff --git a/doc/src/cylc-user-guide/gcylcrc.tex b/doc/src/cylc-user-guide/gcylcrc.tex deleted file mode 100644 index 41af81684b9..00000000000 --- a/doc/src/cylc-user-guide/gcylcrc.tex +++ /dev/null @@ -1,273 +0,0 @@ -\section{Gcylc GUI (cylc gui) Config File Reference} -\label{GcylcRCReference} - -\lstset{language=bash} - -This section defines all legal items and values for the gcylc user config file, -which should be located in \lstinline=$HOME/.cylc/gcylc.rc=. Current settings -can be printed with the \lstinline=cylc get-gui-config= command. - -\subsection{Top Level Items} - -\subsubsection{dot icon size} - -Set the size of the task state dot icons displayed in the text and dot -views. - -\begin{myitemize} -\item {\em type:} string -\item {\em legal values:} ``small'' (10px), ``medium'' (14px), ``large'' (20px), - ``extra large (30px)'' -\item {\em default:} ``medium'' -\end{myitemize} - -\subsubsection{initial side-by-side views} - -Set the suite view panels initial orientation when the GUI starts. -This can be changed later using the ``View'' menu ``Toggle views side-by-side'' - option. - -\begin{myitemize} -\item {\em type:} boolean (False or True) -\item {\em default:} ``False'' -\end{myitemize} - -\subsubsection{initial views} - -Set the suite view panel(s) displayed initially, when the GUI starts. -This can be changed later using the tool bar. - -\begin{myitemize} -\item {\em type:} string (a list of one or two view names) -\item {\em legal values:} ``text'', ``dot'', ``graph'' -\item {\em default:} ``text'' -\item {\em example:} \lstinline@initial views = graph, dot@ -\end{myitemize} - -\subsubsection{maximum update interval} - -Set the maximum (longest) time interval between calls to the suite for data -update. - -The update frequency of the GUI is variable. It is determined by considering -the time of last update and the mean duration of the last 10 main loops of the -suite. - -In general, the GUI will use an update frequency that matches the mean duration -of the suite's main loop. In quiet time (or if the suite is not contactable), -it will gradually increase the update interval (i.e. reduce the update -frequency) to a maximum determined by this setting. - -Increasing this setting will reduce the network traffic and hits on the suite -process. However, if a quiet suite starts to pick up activity, the GUI may -initially appear out of sync with what is happening in the suite for the -duration of this interval. - -\begin{myitemize} -\item {\em type:} ISO 8601 duration/interval representation (e.g.\ -\lstinline=PT10S=, 10 seconds, or \lstinline=PT1M=, 1 minute). -\item {\em default: PT15S} -\end{myitemize} - -\subsubsection{sort by definition order} - -If this is not turned off the default sort order for task names and -families in the dot and text views will the order they appear in the -suite definition. Clicking on the task name column in the treeview will -toggle to alphanumeric sort, and a View menu item does the same for the -dot view. If turned off, the default sort order is alphanumeric and -definition order is not available at all. - -\begin{myitemize} -\item {\em type:} boolean -\item {\em default:} True -\end{myitemize} - - -\subsubsection{sort column} - -If ``text'' is in \lstinline@initial views@ then \lstinline@sort column@ sets -the column that will be sorted initially when the GUI launches. Sorting can be -changed later by clicking on the column headers. - -\begin{myitemize} - \item {\em type:} string - \item {\em legal values:} ``task'', ``state'', ``host'', ``job system'', - ``job ID'', ``T-submit'', ``T-start'', ``T-finish'', ``dT-mean'', - ``latest message'', ``none'' - \item {\em default:} ``none'' - \item {\em example:} \lstinline@sort column = T-start@ -\end{myitemize} - - -\subsubsection{sort column ascending} - -For use in combination with \lstinline@sort column@, sets whether the column will -be sorted using ascending or descending order. - -\begin{myitemize} - \item {\em type:} boolean - \item {\em default:} ``True'' - \item {\em example:} \lstinline@sort column ascending = False@ -\end{myitemize} - -\subsubsection{sub-graphs on} - -Set the sub-graphs view to be enabled by default. -This can be changed later using the toggle options for the graph view. - -\begin{myitemize} -\item {\em type:} boolean (False or True) -\item {\em default:} ``False'' -\end{myitemize} - - -\subsubsection{task filter highlight color} - -The color used to highlight active task filters in gcylc. It must be a name -from the X11 rgb.txt file, e.g.\ \lstinline=SteelBlue=; or a -{\em quoted} hexadecimal color code, e.g.\ \lstinline="#ff0000"= for red (quotes -are required to prevent the hex code being interpreted as a comment). - -\begin{myitemize} - \item {\em type:} string - \item {\em default:} \lstinline=PowderBlue= -\end{myitemize} - - -\subsubsection{task states to filter out} - -Set the initial filtering options when the GUI starts. Later this can be -changed by using the "View" menu "Task Filtering" option. - -\begin{myitemize} -\item {\em type:} string list -\item {\em legal values:} waiting, held, queued, ready, expired, submitted, -submit-failed, submit-retrying, running, succeeded, failed, retrying, runahead -\item {\em default:} runahead -\end{myitemize} - - -\subsubsection{transpose dot} - -Transposes the content in dot view so that it displays from left to right rather -than from top to bottom. Can be changed later using the options submenu -available via the view menu. - -\begin{myitemize} - \item {\em type:} boolean - \item {\em default:} ``False'' - \item {\em example:} \lstinline@transpose dot = True@ -\end{myitemize} - - -\subsubsection{transpose graph} - -Transposes the content in graph view so that it displays from left to right -rather than from top to bottom. Can be changed later using the options submenu -via the view menu. - -\begin{myitemize} - \item {\em type:} boolean - \item {\em default:} ``False'' - \item {\em example:} \lstinline@transpose graph = True@ -\end{myitemize} - - -\subsubsection{ungrouped views} - -List suite views, if any, that should be displayed initially in an -ungrouped state. Namespace family grouping can be changed later -using the tool bar. - -\begin{myitemize} -\item {\em type:} string (a list of zero or more view names) -\item {\em legal values:} ``text'', ``dot'', ``graph'' -\item {\em default:} (none) -\item {\em example:} \lstinline@ungrouped views = text, dot@ -\end{myitemize} - - -\subsubsection{use theme} - -Set the task state color theme, common to all views, to use initially. The -color theme can be changed later using the tool bar. See -\lstinline@etc/gcylc.rc.eg@ and \lstinline@etc/gcylc-themes.rc@ in the Cylc -installation directory for how to modify existing themes or define your own. -Use \lstinline@cylc get-gui-config@ to list available themes. - -\begin{myitemize} -\item {\em type:} string (theme name) -\item {\em legal values:} ``default'', ``solid'', ``high-contrast'', - ``color-blind'', and any custom or user-modified themes. -\item {\em default:} ``default'' -\end{myitemize} - - -\subsubsection{window size} - -Sets the size (in pixels) of the cylc GUI at startup. - -\begin{myitemize} - \item {\em type:} integer list: x, y - \item {\em legal values:} positive integers - \item {\em default:} 800, 500 - \item {\em example:} \lstinline@window size = 1000, 700@ -\end{myitemize} - - -\subsection{[themes]} - -This section may contain task state color theme definitions. - -\subsubsection[{[}THEME{]}]{[themes] \textrightarrow [[THEME]]} - -The name of the task state color-theme to be defined in this section. - -\begin{myitemize} -\item {\em type:} string -\end{myitemize} - -\paragraph[inherit]{[themes] \textrightarrow [[THEME]] \textrightarrow inherit} - -You can inherit from another theme in order to avoid defining all states. - -\begin{myitemize} -\item {\em type:} string (parent theme name) -\item {\em default:} ``default'' -\end{myitemize} - -\paragraph[defaults]{[themes] \textrightarrow [[THEME]] \textrightarrow defaults} - -Set default icon attributes for all state icons in this theme. - -\begin{myitemize} -\item {\em type:} string list (icon attributes) -\item {\em legal values:} \lstinline@"color=COLOR"@, \lstinline@"style=STYLE"@, \lstinline@"fontcolor=FONTCOLOR"@ -\item {\em default:} (none) -\end{myitemize} - -For the attribute values, COLOR and FONTCOLOR can be color names from the X11 -rgb.txt file, e.g.\ \lstinline=SteelBlue=; or hexadecimal color codes, e.g.\ -\lstinline@#ff0000@ for red; and STYLE can be ``filled'' or ``unfilled''. -See \lstinline@etc/gcylc.rc.eg@ and \lstinline@etc/gcylc-themes.rc@ in -the Cylc installation directory for examples. - -\paragraph[STATE]{[themes] \textrightarrow [[THEME]] \textrightarrow STATE} - -Set icon attributes for all task states in THEME, or for a subset of them if -you have used theme inheritance and/or defaults. Legal values of STATE are -any of the cylc task proxy states: {\em waiting, runahead, held, queued, ready, -submitted, submit-failed, running, succeeded, failed, retrying, submit-retrying}. - -\begin{myitemize} -\item {\em type:} string list (icon attributes) -\item {\em legal values:} \lstinline@"color=COLOR"@, \lstinline@"style=STYLE"@, \lstinline@"fontcolor=FONTCOLOR"@ -\item {\em default:} (none) -\end{myitemize} - -For the attribute values, COLOR and FONTCOLOR can be color names from the X11 -rgb.txt file, e.g.\ \lstinline=SteelBlue=; or hexadecimal color codes, e.g.\ -\lstinline@#ff0000@ for red; and STYLE can be ``filled'' or ``unfilled''. -See \lstinline@etc/gcylc.rc.eg@ and \lstinline@etc/gcylc-themes.rc@ in -the Cylc installation directory for examples. diff --git a/doc/src/cylc-user-guide/gpl-3.0.tex b/doc/src/cylc-user-guide/gpl-3.0.tex deleted file mode 100644 index 020af23c415..00000000000 --- a/doc/src/cylc-user-guide/gpl-3.0.tex +++ /dev/null @@ -1,725 +0,0 @@ -%\documentclass[11pt]{article} - -%\title{GNU GENERAL PUBLIC LICENSE} - -%\date{Version 3, 29 June 2007} - -%\begin{document} -%\maketitle - -\begin{center} -{\parindent 0in - -Copyright \copyright\ 2007 Free Software Foundation, Inc. \texttt{http://fsf.org/} - -\bigskip -Everyone is permitted to copy and distribute verbatim copies of this - -license document, but changing it is not allowed.} - -\end{center} - -%\renewcommand{\abstractname}{Preamble} -%\begin{abstract} -\begin{center} -{\Large \sc Preamble} -\end{center} - - -The GNU General Public License is a free, copyleft license for -software and other kinds of works. - -The licenses for most software and other practical works are designed -to take away your freedom to share and change the works. By contrast, -the GNU General Public License is intended to guarantee your freedom to -share and change all versions of a program--to make sure it remains free -software for all its users. We, the Free Software Foundation, use the -GNU General Public License for most of our software; it applies also to -any other work released this way by its authors. You can apply it to -your programs, too. - -When we speak of free software, we are referring to freedom, not -price. Our General Public Licenses are designed to make sure that you -have the freedom to distribute copies of free software (and charge for -them if you wish), that you receive source code or can get it if you -want it, that you can change the software or use pieces of it in new -free programs, and that you know you can do these things. - -To protect your rights, we need to prevent others from denying you -these rights or asking you to surrender the rights. Therefore, you have -certain responsibilities if you distribute copies of the software, or if -you modify it: responsibilities to respect the freedom of others. - -For example, if you distribute copies of such a program, whether -gratis or for a fee, you must pass on to the recipients the same -freedoms that you received. You must make sure that they, too, receive -or can get the source code. And you must show them these terms so they -know their rights. - -Developers that use the GNU GPL protect your rights with two steps: -(1) assert copyright on the software, and (2) offer you this License -giving you legal permission to copy, distribute and/or modify it. - -For the developers' and authors' protection, the GPL clearly explains -that there is no warranty for this free software. For both users' and -authors' sake, the GPL requires that modified versions be marked as -changed, so that their problems will not be attributed erroneously to -authors of previous versions. - -Some devices are designed to deny users access to install or run -modified versions of the software inside them, although the manufacturer -can do so. This is fundamentally incompatible with the aim of -protecting users' freedom to change the software. The systematic -pattern of such abuse occurs in the area of products for individuals to -use, which is precisely where it is most unacceptable. Therefore, we -have designed this version of the GPL to prohibit the practice for those -products. If such problems arise substantially in other domains, we -stand ready to extend this provision to those domains in future versions -of the GPL, as needed to protect the freedom of users. - -Finally, every program is threatened constantly by software patents. -States should not allow patents to restrict development and use of -software on general-purpose computers, but in those that do, we wish to -avoid the special danger that patents applied to a free program could -make it effectively proprietary. To prevent this, the GPL assures that -patents cannot be used to render the program non-free. - -The precise terms and conditions for copying, distribution and -modification follow. -%\end{abstract} - -\begin{center} -{\Large \sc Terms and Conditions} -\end{center} - - -\begin{enumerate} - -\addtocounter{enumi}{-1} - -\item Definitions. - -``This License'' refers to version 3 of the GNU General Public License. - -``Copyright'' also means copyright-like laws that apply to other kinds of -works, such as semiconductor masks. - -``The Program'' refers to any copyrightable work licensed under this -License. Each licensee is addressed as ``you''. ``Licensees'' and -``recipients'' may be individuals or organizations. - -To ``modify'' a work means to copy from or adapt all or part of the work -in a fashion requiring copyright permission, other than the making of an -exact copy. The resulting work is called a ``modified version'' of the -earlier work or a work ``based on'' the earlier work. - -A ``covered work'' means either the unmodified Program or a work based -on the Program. - -To ``propagate'' a work means to do anything with it that, without -permission, would make you directly or secondarily liable for -infringement under applicable copyright law, except executing it on a -computer or modifying a private copy. Propagation includes copying, -distribution (with or without modification), making available to the -public, and in some countries other activities as well. - -To ``convey'' a work means any kind of propagation that enables other -parties to make or receive copies. Mere interaction with a user through -a computer network, with no transfer of a copy, is not conveying. - -An interactive user interface displays ``Appropriate Legal Notices'' -to the extent that it includes a convenient and prominently visible -feature that (1) displays an appropriate copyright notice, and (2) -tells the user that there is no warranty for the work (except to the -extent that warranties are provided), that licensees may convey the -work under this License, and how to view a copy of this License. If -the interface presents a list of user commands or options, such as a -menu, a prominent item in the list meets this criterion. - -\item Source Code. - -The ``source code'' for a work means the preferred form of the work -for making modifications to it. ``Object code'' means any non-source -form of a work. - -A ``Standard Interface'' means an interface that either is an official -standard defined by a recognized standards body, or, in the case of -interfaces specified for a particular programming language, one that -is widely used among developers working in that language. - -The ``System Libraries'' of an executable work include anything, other -than the work as a whole, that (a) is included in the normal form of -packaging a Major Component, but which is not part of that Major -Component, and (b) serves only to enable use of the work with that -Major Component, or to implement a Standard Interface for which an -implementation is available to the public in source code form. A -``Major Component'', in this context, means a major essential component -(kernel, window system, and so on) of the specific operating system -(if any) on which the executable work runs, or a compiler used to -produce the work, or an object code interpreter used to run it. - -The ``Corresponding Source'' for a work in object code form means all -the source code needed to generate, install, and (for an executable -work) run the object code and to modify the work, including scripts to -control those activities. However, it does not include the work's -System Libraries, or general-purpose tools or generally available free -programs which are used unmodified in performing those activities but -which are not part of the work. For example, Corresponding Source -includes interface definition files associated with source files for -the work, and the source code for shared libraries and dynamically -linked subprograms that the work is specifically designed to require, -such as by intimate data communication or control flow between those -subprograms and other parts of the work. - -The Corresponding Source need not include anything that users -can regenerate automatically from other parts of the Corresponding -Source. - -The Corresponding Source for a work in source code form is that -same work. - -\item Basic Permissions. - -All rights granted under this License are granted for the term of -copyright on the Program, and are irrevocable provided the stated -conditions are met. This License explicitly affirms your unlimited -permission to run the unmodified Program. The output from running a -covered work is covered by this License only if the output, given its -content, constitutes a covered work. This License acknowledges your -rights of fair use or other equivalent, as provided by copyright law. - -You may make, run and propagate covered works that you do not -convey, without conditions so long as your license otherwise remains -in force. You may convey covered works to others for the sole purpose -of having them make modifications exclusively for you, or provide you -with facilities for running those works, provided that you comply with -the terms of this License in conveying all material for which you do -not control copyright. Those thus making or running the covered works -for you must do so exclusively on your behalf, under your direction -and control, on terms that prohibit them from making any copies of -your copyrighted material outside their relationship with you. - -Conveying under any other circumstances is permitted solely under -the conditions stated below. Sublicensing is not allowed; section 10 -makes it unnecessary. - -\item Protecting Users' Legal Rights From Anti-Circumvention Law. - -No covered work shall be deemed part of an effective technological -measure under any applicable law fulfilling obligations under article -11 of the WIPO copyright treaty adopted on 20 December 1996, or -similar laws prohibiting or restricting circumvention of such -measures. - -When you convey a covered work, you waive any legal power to forbid -circumvention of technological measures to the extent such circumvention -is effected by exercising rights under this License with respect to -the covered work, and you disclaim any intention to limit operation or -modification of the work as a means of enforcing, against the work's -users, your or third parties' legal rights to forbid circumvention of -technological measures. - -\item Conveying Verbatim Copies. - -You may convey verbatim copies of the Program's source code as you -receive it, in any medium, provided that you conspicuously and -appropriately publish on each copy an appropriate copyright notice; -keep intact all notices stating that this License and any -non-permissive terms added in accord with section 7 apply to the code; -keep intact all notices of the absence of any warranty; and give all -recipients a copy of this License along with the Program. - -You may charge any price or no price for each copy that you convey, -and you may offer support or warranty protection for a fee. - -\item Conveying Modified Source Versions. - -You may convey a work based on the Program, or the modifications to -produce it from the Program, in the form of source code under the -terms of section 4, provided that you also meet all of these conditions: - \begin{enumerate} - \item The work must carry prominent notices stating that you modified - it, and giving a relevant date. - - \item The work must carry prominent notices stating that it is - released under this License and any conditions added under section - 7. This requirement modifies the requirement in section 4 to - ``keep intact all notices''. - - \item You must license the entire work, as a whole, under this - License to anyone who comes into possession of a copy. This - License will therefore apply, along with any applicable section 7 - additional terms, to the whole of the work, and all its parts, - regardless of how they are packaged. This License gives no - permission to license the work in any other way, but it does not - invalidate such permission if you have separately received it. - - \item If the work has interactive user interfaces, each must display - Appropriate Legal Notices; however, if the Program has interactive - interfaces that do not display Appropriate Legal Notices, your - work need not make them do so. -\end{enumerate} -A compilation of a covered work with other separate and independent -works, which are not by their nature extensions of the covered work, -and which are not combined with it such as to form a larger program, -in or on a volume of a storage or distribution medium, is called an -``aggregate'' if the compilation and its resulting copyright are not -used to limit the access or legal rights of the compilation's users -beyond what the individual works permit. Inclusion of a covered work -in an aggregate does not cause this License to apply to the other -parts of the aggregate. - -\item Conveying Non-Source Forms. - -You may convey a covered work in object code form under the terms -of sections 4 and 5, provided that you also convey the -machine-readable Corresponding Source under the terms of this License, -in one of these ways: - \begin{enumerate} - \item Convey the object code in, or embodied in, a physical product - (including a physical distribution medium), accompanied by the - Corresponding Source fixed on a durable physical medium - customarily used for software interchange. - - \item Convey the object code in, or embodied in, a physical product - (including a physical distribution medium), accompanied by a - written offer, valid for at least three years and valid for as - long as you offer spare parts or customer support for that product - model, to give anyone who possesses the object code either (1) a - copy of the Corresponding Source for all the software in the - product that is covered by this License, on a durable physical - medium customarily used for software interchange, for a price no - more than your reasonable cost of physically performing this - conveying of source, or (2) access to copy the - Corresponding Source from a network server at no charge. - - \item Convey individual copies of the object code with a copy of the - written offer to provide the Corresponding Source. This - alternative is allowed only occasionally and noncommercially, and - only if you received the object code with such an offer, in accord - with subsection 6b. - - \item Convey the object code by offering access from a designated - place (gratis or for a charge), and offer equivalent access to the - Corresponding Source in the same way through the same place at no - further charge. You need not require recipients to copy the - Corresponding Source along with the object code. If the place to - copy the object code is a network server, the Corresponding Source - may be on a different server (operated by you or a third party) - that supports equivalent copying facilities, provided you maintain - clear directions next to the object code saying where to find the - Corresponding Source. Regardless of what server hosts the - Corresponding Source, you remain obligated to ensure that it is - available for as long as needed to satisfy these requirements. - - \item Convey the object code using peer-to-peer transmission, provided - you inform other peers where the object code and Corresponding - Source of the work are being offered to the general public at no - charge under subsection 6d. - \end{enumerate} - -A separable portion of the object code, whose source code is excluded -from the Corresponding Source as a System Library, need not be -included in conveying the object code work. - -A ``User Product'' is either (1) a ``consumer product'', which means any -tangible personal property which is normally used for personal, family, -or household purposes, or (2) anything designed or sold for incorporation -into a dwelling. In determining whether a product is a consumer product, -doubtful cases shall be resolved in favor of coverage. For a particular -product received by a particular user, ``normally used'' refers to a -typical or common use of that class of product, regardless of the status -of the particular user or of the way in which the particular user -actually uses, or expects or is expected to use, the product. A product -is a consumer product regardless of whether the product has substantial -commercial, industrial or non-consumer uses, unless such uses represent -the only significant mode of use of the product. - -``Installation Information'' for a User Product means any methods, -procedures, authorization keys, or other information required to install -and execute modified versions of a covered work in that User Product from -a modified version of its Corresponding Source. The information must -suffice to ensure that the continued functioning of the modified object -code is in no case prevented or interfered with solely because -modification has been made. - -If you convey an object code work under this section in, or with, or -specifically for use in, a User Product, and the conveying occurs as -part of a transaction in which the right of possession and use of the -User Product is transferred to the recipient in perpetuity or for a -fixed term (regardless of how the transaction is characterized), the -Corresponding Source conveyed under this section must be accompanied -by the Installation Information. But this requirement does not apply -if neither you nor any third party retains the ability to install -modified object code on the User Product (for example, the work has -been installed in ROM). - -The requirement to provide Installation Information does not include a -requirement to continue to provide support service, warranty, or updates -for a work that has been modified or installed by the recipient, or for -the User Product in which it has been modified or installed. Access to a -network may be denied when the modification itself materially and -adversely affects the operation of the network or violates the rules and -protocols for communication across the network. - -Corresponding Source conveyed, and Installation Information provided, -in accord with this section must be in a format that is publicly -documented (and with an implementation available to the public in -source code form), and must require no special password or key for -unpacking, reading or copying. - -\item Additional Terms. - -``Additional permissions'' are terms that supplement the terms of this -License by making exceptions from one or more of its conditions. -Additional permissions that are applicable to the entire Program shall -be treated as though they were included in this License, to the extent -that they are valid under applicable law. If additional permissions -apply only to part of the Program, that part may be used separately -under those permissions, but the entire Program remains governed by -this License without regard to the additional permissions. - -When you convey a copy of a covered work, you may at your option -remove any additional permissions from that copy, or from any part of -it. (Additional permissions may be written to require their own -removal in certain cases when you modify the work.) You may place -additional permissions on material, added by you to a covered work, -for which you have or can give appropriate copyright permission. - -Notwithstanding any other provision of this License, for material you -add to a covered work, you may (if authorized by the copyright holders of -that material) supplement the terms of this License with terms: - \begin{enumerate} - \item Disclaiming warranty or limiting liability differently from the - terms of sections 15 and 16 of this License; or - - \item Requiring preservation of specified reasonable legal notices or - author attributions in that material or in the Appropriate Legal - Notices displayed by works containing it; or - - \item Prohibiting misrepresentation of the origin of that material, or - requiring that modified versions of such material be marked in - reasonable ways as different from the original version; or - - \item Limiting the use for publicity purposes of names of licensors or - authors of the material; or - - \item Declining to grant rights under trademark law for use of some - trade names, trademarks, or service marks; or - - \item Requiring indemnification of licensors and authors of that - material by anyone who conveys the material (or modified versions of - it) with contractual assumptions of liability to the recipient, for - any liability that these contractual assumptions directly impose on - those licensors and authors. - \end{enumerate} - -All other non-permissive additional terms are considered ``further -restrictions'' within the meaning of section 10. If the Program as you -received it, or any part of it, contains a notice stating that it is -governed by this License along with a term that is a further -restriction, you may remove that term. If a license document contains -a further restriction but permits relicensing or conveying under this -License, you may add to a covered work material governed by the terms -of that license document, provided that the further restriction does -not survive such relicensing or conveying. - -If you add terms to a covered work in accord with this section, you -must place, in the relevant source files, a statement of the -additional terms that apply to those files, or a notice indicating -where to find the applicable terms. - -Additional terms, permissive or non-permissive, may be stated in the -form of a separately written license, or stated as exceptions; -the above requirements apply either way. - -\item Termination. - -You may not propagate or modify a covered work except as expressly -provided under this License. Any attempt otherwise to propagate or -modify it is void, and will automatically terminate your rights under -this License (including any patent licenses granted under the third -paragraph of section 11). - -However, if you cease all violation of this License, then your -license from a particular copyright holder is reinstated (a) -provisionally, unless and until the copyright holder explicitly and -finally terminates your license, and (b) permanently, if the copyright -holder fails to notify you of the violation by some reasonable means -prior to 60 days after the cessation. - -Moreover, your license from a particular copyright holder is -reinstated permanently if the copyright holder notifies you of the -violation by some reasonable means, this is the first time you have -received notice of violation of this License (for any work) from that -copyright holder, and you cure the violation prior to 30 days after -your receipt of the notice. - -Termination of your rights under this section does not terminate the -licenses of parties who have received copies or rights from you under -this License. If your rights have been terminated and not permanently -reinstated, you do not qualify to receive new licenses for the same -material under section 10. - -\item Acceptance Not Required for Having Copies. - -You are not required to accept this License in order to receive or -run a copy of the Program. Ancillary propagation of a covered work -occurring solely as a consequence of using peer-to-peer transmission -to receive a copy likewise does not require acceptance. However, -nothing other than this License grants you permission to propagate or -modify any covered work. These actions infringe copyright if you do -not accept this License. Therefore, by modifying or propagating a -covered work, you indicate your acceptance of this License to do so. - -\item Automatic Licensing of Downstream Recipients. - -Each time you convey a covered work, the recipient automatically -receives a license from the original licensors, to run, modify and -propagate that work, subject to this License. You are not responsible -for enforcing compliance by third parties with this License. - -An ``entity transaction'' is a transaction transferring control of an -organization, or substantially all assets of one, or subdividing an -organization, or merging organizations. If propagation of a covered -work results from an entity transaction, each party to that -transaction who receives a copy of the work also receives whatever -licenses to the work the party's predecessor in interest had or could -give under the previous paragraph, plus a right to possession of the -Corresponding Source of the work from the predecessor in interest, if -the predecessor has it or can get it with reasonable efforts. - -You may not impose any further restrictions on the exercise of the -rights granted or affirmed under this License. For example, you may -not impose a license fee, royalty, or other charge for exercise of -rights granted under this License, and you may not initiate litigation -(including a cross-claim or counterclaim in a lawsuit) alleging that -any patent claim is infringed by making, using, selling, offering for -sale, or importing the Program or any portion of it. - -\item Patents. - -A ``contributor'' is a copyright holder who authorizes use under this -License of the Program or a work on which the Program is based. The -work thus licensed is called the contributor's ``contributor version''. - -A contributor's ``essential patent claims'' are all patent claims -owned or controlled by the contributor, whether already acquired or -hereafter acquired, that would be infringed by some manner, permitted -by this License, of making, using, or selling its contributor version, -but do not include claims that would be infringed only as a -consequence of further modification of the contributor version. For -purposes of this definition, ``control'' includes the right to grant -patent sublicenses in a manner consistent with the requirements of -this License. - -Each contributor grants you a non-exclusive, worldwide, royalty-free -patent license under the contributor's essential patent claims, to -make, use, sell, offer for sale, import and otherwise run, modify and -propagate the contents of its contributor version. - -In the following three paragraphs, a ``patent license'' is any express -agreement or commitment, however denominated, not to enforce a patent -(such as an express permission to practice a patent or covenant not to -sue for patent infringement). To ``grant'' such a patent license to a -party means to make such an agreement or commitment not to enforce a -patent against the party. - -If you convey a covered work, knowingly relying on a patent license, -and the Corresponding Source of the work is not available for anyone -to copy, free of charge and under the terms of this License, through a -publicly available network server or other readily accessible means, -then you must either (1) cause the Corresponding Source to be so -available, or (2) arrange to deprive yourself of the benefit of the -patent license for this particular work, or (3) arrange, in a manner -consistent with the requirements of this License, to extend the patent -license to downstream recipients. ``Knowingly relying'' means you have -actual knowledge that, but for the patent license, your conveying the -covered work in a country, or your recipient's use of the covered work -in a country, would infringe one or more identifiable patents in that -country that you have reason to believe are valid. - -If, pursuant to or in connection with a single transaction or -arrangement, you convey, or propagate by procuring conveyance of, a -covered work, and grant a patent license to some of the parties -receiving the covered work authorizing them to use, propagate, modify -or convey a specific copy of the covered work, then the patent license -you grant is automatically extended to all recipients of the covered -work and works based on it. - -A patent license is ``discriminatory'' if it does not include within -the scope of its coverage, prohibits the exercise of, or is -conditioned on the non-exercise of one or more of the rights that are -specifically granted under this License. You may not convey a covered -work if you are a party to an arrangement with a third party that is -in the business of distributing software, under which you make payment -to the third party based on the extent of your activity of conveying -the work, and under which the third party grants, to any of the -parties who would receive the covered work from you, a discriminatory -patent license (a) in connection with copies of the covered work -conveyed by you (or copies made from those copies), or (b) primarily -for and in connection with specific products or compilations that -contain the covered work, unless you entered into that arrangement, -or that patent license was granted, prior to 28 March 2007. - -Nothing in this License shall be construed as excluding or limiting -any implied license or other defenses to infringement that may -otherwise be available to you under applicable patent law. - -\item No Surrender of Others' Freedom. - -If conditions are imposed on you (whether by court order, agreement or -otherwise) that contradict the conditions of this License, they do not -excuse you from the conditions of this License. If you cannot convey a -covered work so as to satisfy simultaneously your obligations under this -License and any other pertinent obligations, then as a consequence you may -not convey it at all. For example, if you agree to terms that obligate you -to collect a royalty for further conveying from those to whom you convey -the Program, the only way you could satisfy both those terms and this -License would be to refrain entirely from conveying the Program. - -\item Use with the GNU Affero General Public License. - -Notwithstanding any other provision of this License, you have -permission to link or combine any covered work with a work licensed -under version 3 of the GNU Affero General Public License into a single -combined work, and to convey the resulting work. The terms of this -License will continue to apply to the part which is the covered work, -but the special requirements of the GNU Affero General Public License, -section 13, concerning interaction through a network will apply to the -combination as such. - -\item Revised Versions of this License. - -The Free Software Foundation may publish revised and/or new versions of -the GNU General Public License from time to time. Such new versions will -be similar in spirit to the present version, but may differ in detail to -address new problems or concerns. - -Each version is given a distinguishing version number. If the -Program specifies that a certain numbered version of the GNU General -Public License ``or any later version'' applies to it, you have the -option of following the terms and conditions either of that numbered -version or of any later version published by the Free Software -Foundation. If the Program does not specify a version number of the -GNU General Public License, you may choose any version ever published -by the Free Software Foundation. - -If the Program specifies that a proxy can decide which future -versions of the GNU General Public License can be used, that proxy's -public statement of acceptance of a version permanently authorizes you -to choose that version for the Program. - -Later license versions may give you additional or different -permissions. However, no additional obligations are imposed on any -author or copyright holder as a result of your choosing to follow a -later version. - -\item Disclaimer of Warranty. - -\begin{sloppypar} - THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY - APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE - COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM ``AS IS'' - WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, - INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE - RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. - SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL - NECESSARY SERVICING, REPAIR OR CORRECTION. -\end{sloppypar} - -\item Limitation of Liability. - - IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN - WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES - AND/OR CONVEYS THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR - DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL - DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM - (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED - INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE - OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH - HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH - DAMAGES. - -\item Interpretation of Sections 15 and 16. - -If the disclaimer of warranty and limitation of liability provided -above cannot be given local legal effect according to their terms, -reviewing courts shall apply local law that most closely approximates -an absolute waiver of all civil liability in connection with the -Program, unless a warranty or assumption of liability accompanies a -copy of the Program in return for a fee. - -\begin{center} -{\Large\sc End of Terms and Conditions} - -\bigskip -How to Apply These Terms to Your New Programs -\end{center} - -If you develop a new program, and you want it to be of the greatest -possible use to the public, the best way to achieve this is to make it -free software which everyone can redistribute and change under these terms. - -To do so, attach the following notices to the program. It is safest -to attach them to the start of each source file to most effectively -state the exclusion of warranty; and each file should have at least -the ``copyright'' line and a pointer to where the full notice is found. - -{\footnotesize -\begin{verbatim} - - -Copyright (C) - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see . -\end{verbatim} -} - -Also add information on how to contact you by electronic and paper mail. - -If the program does terminal interaction, make it output a short -notice like this when it starts in an interactive mode: - -{\footnotesize -\begin{verbatim} - Copyright (C) - -This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. -This is free software, and you are welcome to redistribute it -under certain conditions; type `show c' for details. -\end{verbatim} -} - -The hypothetical commands {\tt show w} and {\tt show c} should show -the appropriate -parts of the General Public License. Of course, your program's commands -might be different; for a GUI interface, you would use an ``about box''. - -You should also get your employer (if you work as a programmer) or -school, if any, to sign a ``copyright disclaimer'' for the program, if -necessary. For more information on this, and how to apply and follow -the GNU GPL, see \texttt{http://www.gnu.org/licenses/}. - -The GNU General Public License does not permit incorporating your -program into proprietary programs. If your program is a subroutine -library, you may consider it more useful to permit linking proprietary -applications with the library. If this is what you want to do, use -the GNU Lesser General Public License instead of this License. But -first, please read \texttt{http://www.gnu.org/philosophy/why-not-lgpl.html}. - -\end{enumerate} - -%\end{document} diff --git a/doc/src/cylc-user-guide/graphviz.txt b/doc/src/cylc-user-guide/graphviz.txt deleted file mode 100644 index 5d63a83e3db..00000000000 --- a/doc/src/cylc-user-guide/graphviz.txt +++ /dev/null @@ -1,30 +0,0 @@ - * http://www.graphviz.org - * http://www.graphviz.org/doc/ - * - * Minimal postprocessing example:" - * $ dot -Tps foo.dot -o foo.ps # ps output" - * $ dot -Tsvg foo.dot -o foo.svg # svg output" - * - * Note that nodes in a subgraph with no internal edges all have - * the same rank (rank determines horizontal placement). To split - * the subgraph into several rows you can manually add invisible - * invisible edges, for example: "node1 -> node2 [color=invis];" - * - * When the default node style is "filled" use "style=" to unset filled, - * or fill with background color (there is no "unfilled" style attribute). - * - * Processing a dot-file the graphviz 'unflatten' command may result - * in a more pleasing layout. - * - * You can use the 'dot -G|N|E' commandline options to experiment with - * different global settings without editing a dot file directly. - * - * Printing large graphs successfully can be problematic. One method - * that works on Linux is to generate an svg layout, load into inkscape - * and set the page size to A3 under "document properties", save a PDF - * copy, load that into evince, set A3 again, and 'landscape' if - * necessary, in "Print Setup", then print the frickin' thing. - * - * You can tell dot to split a large layout into a multi-page mosaic - * that can be pieced together after printing: use the 'page=x,y' and - * 'size' graph attributes (see dot documentation for details). diff --git a/doc/src/cylc-user-guide/gscanrc.tex b/doc/src/cylc-user-guide/gscanrc.tex deleted file mode 100644 index 577a2ce27e2..00000000000 --- a/doc/src/cylc-user-guide/gscanrc.tex +++ /dev/null @@ -1,104 +0,0 @@ - -\section{Gscan GUI (cylc gscan) Config File Reference} -\label{GscanRCReference} - -\lstset{language=bash} - -This section defines all legal items and values for the gscan config -file which should be located in \lstinline=$HOME/.cylc/gscan.rc=. Some items -also affect the gpanel panel app. - -The main menubar can be hidden to maximise the display area. Its visibility -can be toggled via the mouse right-click menu, or by typing Alt-m. When -visible, the main View menu allows you to change properties such as the columns -that are displayed, which hosts to scan for running suites, and the task state -icon theme. - -At startup, the task state icon theme and icon size are taken from the gcylc -config file \lstinline=$HOME/.cylc/gcylc.rc=. - -\subsection{Top Level Items} - -\subsubsection{activate on startup} - -Set whether \lstinline=cylc gpanel= will activate automatically when the gui is -loaded or not. - -\begin{myitemize} - \item {\em type:} boolean (True or False) -\item {\em legal values:} ``True'', ``False'' -\item {\em default:} ``False'' -\item {\em example:} \lstinline@activate on startup = True@ -\end{myitemize} - -\subsubsection{columns} - -Set the columns to display when the \lstinline=cylc gscan= GUI starts. This can -be changed later with the View menu. The order in which the columns are -specified here does not affect the display order. - -\begin{myitemize} -\item {\em type:} string (a list of one or more view names) -\item {\em legal values:} ``host'', ``owner'', ``status'', ``suite'', - ``title'', ``updated'' -\item {\em default:} ``status'', ``suite'' -\item {\em example:} \lstinline@columns = suite, title, status@ -\end{myitemize} - -\subsubsection{suite listing update interval} - -Set the time interval between refreshing the suite listing (by file system or -port range scan). - -Increasing this setting will reduce the frequency of gscan looking for running -suites. Scanning for suites by port range scan can be a hit on the network and -the running suite processes, while scanning for suites by walking the file -system can hit the file system (especially if the file system is a network file -system). Therefore, this is normally set with a lower frequency than the status -update interval. Increasing this setting will make gscan friendlier to the -network and/or the file system, but gscan may appear out of sync if there are -many start up or shut down of suites between the intervals. - -\begin{myitemize} -\item {\em type:} ISO 8601 duration/interval representation (e.g.\ -\lstinline=PT10S=, 10 seconds, or \lstinline=PT1M=, 1 minute). -\item {\em default: PT1M} -\end{myitemize} - -\subsubsection{suite status update interval} - -Set the time interval between calls to known running suites (suites that are -known via the latest suite listing) for data updates. - -Increasing this setting will reduce the network traffic and hits on the suite -processes. However, gscan may appear out of sync with what may be happening -in very busy suites. - -\begin{myitemize} -\item {\em type:} ISO 8601 duration/interval representation (e.g.\ -\lstinline=PT10S=, 10 seconds, or \lstinline=PT1M=, 1 minute). -\item {\em default: PT15S} -\end{myitemize} - -\subsubsection{window size} - -Sets the size in pixels of the \lstinline=cylc gscan= GUI window at startup. - -\begin{myitemize} - \item {\em type:} integer list: x, y - \item {\em legal values:} positive integers - \item {\em default:} 300, 200 - \item {\em example:} \lstinline@window size = 1000, 700@ -\end{myitemize} - -\subsubsection{hide main menubar} - -Hide the main menubar of the \lstinline=cylc gscan= GUI window at startup. By -default, the menubar is not hidden. Either way, you can toggle its -visibility with Alt-m or via the right-click menu. - -\begin{myitemize} - \item {\em type:} boolean (True or False) - \item {\em default:} False - \item {\em example:} \lstinline@hide main menubar = True@ -\end{myitemize} diff --git a/doc/src/cylc-user-guide/job-host-1.tex b/doc/src/cylc-user-guide/job-host-1.tex deleted file mode 100644 index 8b9bdcb2101..00000000000 --- a/doc/src/cylc-user-guide/job-host-1.tex +++ /dev/null @@ -1,102 +0,0 @@ -\section{Remote Job Management} - -Managing tasks in a workflow requires more than just job execution: Cylc -performs additional actions with \lstinline=rsync= for file transfer, and -direct execution of \lstinline=cylc= sub-commands over non-interactive -SSH.\footnote{Cylc used to run bare shell expressions over SSH, which required -a bash shell and made whitelisting difficult.} - -\subsection{SSH-free Job Management?} - -Some sites may want to restrict access to job hosts by whitelisting SSH -connections to allow only \lstinline=rsync= for file transfer, and allowing job -execution only via a local batch system that sees the job hosts.\footnote{A -malicious script could be \lstinline=rsync='d and run from a batch job, but -batch jobs are considered easier to audit.} We are investigating the -feasibility of SSH-free job management when a local batch system is available, -but this is not yet possible unless your suite and job hosts also share a -filesystem, which allows Cylc to treat jobs as entirely local.\footnote{The job ID -must also be valid to query and kill the job via the local batch system. This -is not the case for Slurm, unless the \lstinline=--cluster= option is -explicitly used in job query and kill commands, otherwise the job ID is not -recognized by the local Slurm instance.} - -\subsection{SSH-based Job Management} - -Cylc does not have persistent agent processes running on job hosts to act on -instructions received over the network\footnote{This would be a more complex -solution, in terms of implementation, administration, and security.} so -instead we execute job management commands directly on job hosts over SSH. -Reasons for this include: -\begin{itemize} - \item it works equally for batch system and background jobs - \item SSH is {\em required} for background jobs, and for batch jobs if the - batch system is not available on the suite host - \item {\em querying the batch system alone is not sufficient for full job - polling functionality} because jobs can complete (and then be forgotten by - the batch system) while the network, suite host, or suite server program is - down (e.g.\ between suite shutdown and restart) - \begin{itemize} - \item to handle this we get the automatic job wrapper code to write - job messages and exit status to {\em job status files} that are - interrogated by suite server programs during job polling operations - \item job status files reside on the job host, so the interrogation - is done over SSH - \end{itemize} - \item job status files also hold batch system name and job ID; this is - written by the job submit command, and read by job poll and kill commands - (all over SSH) -\end{itemize} - -\subsection{A Concrete Example} - -The following suite, registered as \lstinline=suitex=, is used to illustrate -our current SSH-based remote job management. It submits two jobs to a remote, -and a local task views a remote job log then polls and kills the remote jobs. - -\lstset{language=suiterc} -\begin{lstlisting} -# suite.rc -[scheduling] - [[dependencies]] - graph = "delayer => master & REMOTES" -[runtime] - [[REMOTES]] - script = "sleep 30" - [[[remote]]] - host = wizard - owner = hobo - [[remote-a, remote-b]] - inherit = REMOTES - [[delayer]] - script = "sleep 10" - [[master]] - script = """ - sleep 5 - cylc cat-log -m c -f o $CYLC_SUITE_NAME remote-a.1 - sleep 2 - cylc poll $CYLC_SUITE_NAME REMOTES.1 - sleep 2 - cylc kill $CYLC_SUITE_NAME REMOTES.1 - sleep 2 - cylc remove $CYLC_SUITE_NAME REMOTES.1""" -\end{lstlisting} - -The {\em delayer} task just separates suite start-up from remote job -submission, for clarity when watching the job host (e.g.\ with -\lstinline=watch -n 1 find ~/cylc-run/suitex=). - -Global config specifies the path to the remote Cylc executable, says -to retrieve job logs, and not to use a remote login shell: -\begin{lstlisting} -# global.rc -[hosts] - [[wizard]] - cylc executable = /opt/bin/cylc - retrieve job logs = True - use login shell = False -\end{lstlisting} - -On running the suite, remote job host actions were captured in the transcripts -below by wrapping the \lstinline=ssh=, \lstinline=scp=, and \lstinline=rsync= -executables in scripts that log their command lines before taking action. diff --git a/doc/src/cylc-user-guide/job-host-2-html.tex b/doc/src/cylc-user-guide/job-host-2-html.tex deleted file mode 100644 index 175da76fe1c..00000000000 --- a/doc/src/cylc-user-guide/job-host-2-html.tex +++ /dev/null @@ -1,4 +0,0 @@ - -{\bf NOTE the rest of this section is omitted in the HTML User -Guide because the complex formatting translates badly to HTML. -Please see the PDF User Guide for details.} diff --git a/doc/src/cylc-user-guide/job-host-2.tex b/doc/src/cylc-user-guide/job-host-2.tex deleted file mode 100644 index 8a886450b3a..00000000000 --- a/doc/src/cylc-user-guide/job-host-2.tex +++ /dev/null @@ -1,256 +0,0 @@ -% SECOND HALF OF THE SECTION, omitted from HTML copy (formatting errors). - -\renewcommand*\DTstylecomment{\normalfont\ttfamily\color{comments}} -\renewcommand*\DTstyle{\bf\ttfamily\textcolor{identifiers}} - -\subsubsection{create suite run directory and install source files} - -Done by \lstinline=rose suite-run= before suite start-up -(the command will be migrated to Cylc soon though). - -\begin{itemize} - \item with \lstinline=--new= it invokes bash over SSH and a raw shell - expression, to delete previous-run files - \item it invokes itself over SSH to create top level suite directories - and install source files - \begin{itemize} - \item skips installation if server UUID file is found on the job host - (indicates a shared filesystem) - \end{itemize} - \item uses \lstinline=rsync= for suite source file installation - \item (note the same directory structure is used on suite and job hosts, for - consistency and simplicity, and because the suite host can also be a job host) -\end{itemize} - -\lstset{breaklines=true} -\lstset{language=jobhosts} - -\vspace{5mm} -\begin{lstlisting} -# rose suite-run --new only: initial clean-out -ssh -oBatchMode=yes -oConnectTimeout=10 hobo@wizard bash -l -O extglob -c 'cd; echo '"'"'673d7a0d-7816-42a4-8132-4b1ab394349c'"'"'; ls -d -r cylc-run/suitex/work cylc-run/suitex/share/cycle cylc-run/suitex/share cylc-run/suitex; rm -fr cylc-run/suitex/work cylc-run/suitex/share/cycle cylc-run/suitex/share cylc-run/suitex; (cd ; rmdir -p cylc-run/suitex/work cylc-run/suitex/share/cycle cylc-run/suitex/share cylc-run 2>/dev/null || true)' - -# rose suite-run: test for shared filesystem and create share/cycle directories -ssh -oBatchMode=yes -oConnectTimeout=10 -n hobo@wizard env ROSE_VERSION=2018.02.0 CYLC_VERSION=7.6.x bash -l -c '"$0" "$@"' rose suite-run -vv -n suitex --run=run --remote=uuid=231cd6a1-6d61-476d-96e1-4325ef9216fc,now-str=20180416T042319Z - -# rose suite-run: install suite source directory to job host -rsync -a --exclude=.* --timeout=1800 --rsh=ssh -oBatchMode=yes -oConnectTimeout=10 --exclude=231cd6a1-6d61-476d-96e1-4325ef9216fc --exclude=log/231cd6a1-6d61-476d-96e1-4325ef9216fc --exclude=share/231cd6a1-6d61-476d-96e1-4325ef9216fc --exclude=share/cycle/231cd6a1-6d61-476d-96e1-4325ef9216fc --exclude=work/231cd6a1-6d61-476d-96e1-4325ef9216fc --exclude=/.* --exclude=/cylc-suite.db --exclude=/log --exclude=/log.* --exclude=/state --exclude=/share --exclude=/work ./ hobo@wizard:cylc-run/suitex - # (internal rsync) - ssh -oBatchMode=yes -oConnectTimeout=10 -l hobo wizard rsync --server -logDtpre.iLsfx --timeout=1800 . cylc-run/suitex - # (internal rsync, back from hobo@wizard) - rsync --server -logDtpre.iLsfx --timeout=1800 . cylc-run/suitex -\end{lstlisting} - -\vspace{5mm} -Result: -\lstset{language=sh} -{\scriptsize -\dirtree{% -.1 \textasciitilde/cylc-run/suitex. -.2 log->log.20180418T025047Z\DTcomment{\textbf{LOG DIRECTORIES}}. -.2 log.20180418T025047Z\DTcomment{log directory for current suite run}. -.2 suiter.rc. -.2 xxx\DTcomment{(any suite source sub-dirs or file)}. -.2 work\DTcomment{\textbf{JOB WORK DIRECTORIES}}. -.2 share\DTcomment{\textbf{SUITE SHARE DIRECTORY}}. -.3 cycle. -} -} - -\subsubsection{server installs service directory} - -\begin{itemize} - \item server address and credentials, so that clients such as - \lstinline=cylc message= executed by jobs can connect - \item done just before the first job is submitted to a remote, and at - suite restart for the remotes of jobs running when the suite went - down (server host, port, etc.\ may change at restart) - \item uses SSH to invoke \lstinline=cylc remote-init= on - job hosts. If the remote command does not find a server-side UUID file - (which would indicate a shared filesystem) it reads a tar archive of - the service directory from stdin, and unpacks it to install. -\end{itemize} - -\lstset{language=jobhosts} - -\vspace{5mm} -\begin{lstlisting} -# cylc remote-init: install suite service directory -ssh -oBatchMode=yes -oConnectTimeout=10 hobo@wizard env CYLC_VERSION=7.6.x /opt/bin/cylc remote-init '066592b1-4525-48b5-b86e-da06eb2380d9' '$HOME/cylc-run/suitex' -\end{lstlisting} - -Result: -{\scriptsize -\dirtree{% -.1 \textasciitilde/cylc-run/suitex. -.2 .service\DTcomment{\textbf{SUITE SERVICE DIRECTORY}}. -.3 contact\DTcomment{{\color{blue} server address information}}. -.3 passphrase\DTcomment{{\color{blue} suite passphrase}}. -.3 ssl.cert\DTcomment{{\color{blue} suite SSL certificate}}. -.2 log->log.20180418T025047Z\DTcomment{\textbf{LOG DIRECTORIES}}. -.2 log.20180418T025047Z\DTcomment{log directory for current suite run}. -.2 suiter.rc. -.2 xxx\DTcomment{(any suite source sub-dirs or file)}. -.2 work\DTcomment{\textbf{JOB WORK DIRECTORIES}}. -.2 share\DTcomment{\textbf{SUITE SHARE DIRECTORY}}. -.3 cycle. -} -} - -\subsubsection{server submits jobs} -\begin{itemize} - \item done when tasks are ready to run, for multiple jobs at once - \item uses SSH to invoke \lstinline=cylc jobs-submit= on the - remote - to read job scripts from stdin, write them to disk, and submit - them to run -\end{itemize} - -\lstset{language=jobhosts} - -\vspace{5mm} -\begin{lstlisting} -# cylc jobs-submit: submit two jobs -ssh -oBatchMode=yes -oConnectTimeout=10 hobo@wizard env CYLC_VERSION=7.6.x /opt/bin/cylc jobs-submit '--remote-mode' '--' '$HOME/cylc-run/suitex/log/job' '1/remote-a/01' '1/remote-b/01' -\end{lstlisting} - -Result: -{\scriptsize -\dirtree{% -.1 \textasciitilde/cylc-run/suitex. -.2 .service\DTcomment{\textbf{SUITE SERVICE DIRECTORY}}. -.3 contact\DTcomment{{\color{blue} server address information}}. -.3 passphrase\DTcomment{{\color{blue} suite passphrase}}. -.3 ssl.cert\DTcomment{{\color{blue} suite SSL certificate}}. -.2 log->log.20180418T025047Z\DTcomment{\textbf{LOG DIRECTORIES}}. -.2 log.20180418T025047Z\DTcomment{log directory for current suite run}. -.3 job\DTcomment{job logs (to be distinguished from \lstinline=log/suite/= on the suite host)}. -.4 1\DTcomment{cycle point}. -.5 remote-a\DTcomment{task name}. -.6 01\DTcomment{job submit number}. -.7 job\DTcomment{{\color{blue}job script}}. -.7 job.out\DTcomment{{\color{blue} job stdout}}. -.7 job.err\DTcomment{{\color{blue} job stderr}}. -.7 job.status\DTcomment{{\color{blue} job status}}. -.6 NN->0l\DTcomment{symlink to latest submit number}. -.5 remote-b\DTcomment{task name}. -.6 01\DTcomment{job submit number}. -.7 job\DTcomment{{\color{blue}job script}}. -.7 job.out\DTcomment{{\color{blue} job stdout}}. -.7 job.err\DTcomment{{\color{blue} job stderr}}. -.7 job.status\DTcomment{{\color{blue} job status}}. -.6 NN->0l\DTcomment{symlink to latest submit number}. -.2 suiter.rc. -.2 xxx\DTcomment{(any suite source sub-dirs or file)}. -.2 work\DTcomment{\textbf{JOB WORK DIRECTORIES}}. -.3 1\DTcomment{cycle point}. -.4 remote-a\DTcomment{task name}. -.5 xxx\DTcomment{(any files written by job to PWD)}. -.4 remote-b\DTcomment{task name}. -.5 xxx\DTcomment{(any files written by job to PWD)}. -.2 share\DTcomment{\textbf{SUITE SHARE DIRECTORY}}. -.3 cycle. -.3 xxx\DTcomment{(any job-created sub-dirs and files)}. -} -} - -\subsubsection{server tracks job progress} - -\begin{itemize} - \item jobs send messages back to the server program on the suite host - \begin{itemize} - \item directly: client-server HTTPS over the network (requires service - files installed - see above) - \item indirectly: re-invoke clients on the suite host (requires reverse SSH) - \end{itemize} - \item OR server polls jobs at intervals (requires job polling - see below) -\end{itemize} - -\subsubsection{user views job logs} - -\begin{itemize} - \item command \lstinline=cylc cat-log= via CLI or GUI, invokes itself over - SSH to the remote - \item suites will serve job logs in future, but this will still be needed - (e.g.\ if the suite is down) -\end{itemize} - -\vspace{5mm} -\begin{lstlisting} -# cylc cat-log: view a job log -ssh -oBatchMode=yes -oConnectTimeout=10 -n hobo@wizard env CYLC_VERSION=7.6.x /opt/bin/cylc cat-log --remote-arg='$HOME/cylc-run/suitex/log/job/1/remote-a/NN/job.out' --remote-arg=cat --remote-arg='tail -n +1 -F %(filename)s' suitex -\end{lstlisting} - - -\subsubsection{server cancels or kills jobs} - -\begin{itemize} - \item done automatically or via user command \lstinline=cylc kill=, for - multiple jobs at once - \item uses SSH to invoke \lstinline=cylc jobs-kill= on the - remote, with job log paths on the command line. Reads job ID from the - job status file. -\end{itemize} - -\vspace{5mm} - \begin{lstlisting} -# cylc jobs-kill: kill two jobs -ssh -oBatchMode=yes -oConnectTimeout=10 hobo@wizard env CYLC_VERSION=7.6.x /opt/bin/cylc jobs-kill '--' '$HOME/cylc-run/suitex/log/job' '1/remote-a/01' '1/remote-b/01' - \end{lstlisting} - -\subsubsection{server polls jobs} - -\begin{itemize} - \item done automatically or via user command \lstinline=cylc poll=, for - multiple jobs at once - \item uses SSH to invoke \lstinline=cylc jobs-poll= on the - remote, with job log paths on the command line. Reads job ID from the - job status file. -\end{itemize} - -\vspace{5mm} - \begin{lstlisting} -# cylc jobs-poll: poll two jobs -ssh -oBatchMode=yes -oConnectTimeout=10 hobo@wizard env CYLC_VERSION=7.6.x /opt/bin/cylc jobs-poll '--' '$HOME/cylc-run/suitex/log/job' '1/remote-a/01' '1/remote-b/01' - \end{lstlisting} - - -\subsubsection{server retrieves jobs logs} - -\begin{itemize} - \item done at job completion, according to global config - \item uses \lstinline=rsync= -\end{itemize} - -\vspace{5mm} - \begin{lstlisting} -# rsync: retrieve two job logs -rsync -a --rsh=ssh -oBatchMode=yes -oConnectTimeout=10 --include=/1 --include=/1/remote-a --include=/1/remote-a/01 --include=/1/remote-a/01/** --include=/1/remote-b --include=/1/remote-b/01 --include=/1/remote-b/01/** --exclude=/** hobo@wizard:$HOME/cylc-run/suitex/log/job/ /home/vagrant/cylc-run/suitex/log/job/ - # (internal rsync) - ssh -oBatchMode=yes -oConnectTimeout=10 -l hobo wizard rsync --server --sender -logDtpre.iLsfx . $HOME/cylc-run/suitex/log/job/ - # (internal rsync, back from hobo@wizard) - rsync --server --sender -logDtpre.iLsfx . /home/hobo/cylc-run/suitex/log/job/ - \end{lstlisting} - -\subsubsection{server tidies job remote at shutdown} - -\begin{itemize} - \item removes \lstinline=.service/contact= so that clients won't repeatedly - try to connect -\end{itemize} - -\vspace{5mm} - \begin{lstlisting} -# cylc remote-tidy: remove the remote suite contact file -ssh -oBatchMode=yes -oConnectTimeout=10 hobo@wizard env CYLC_VERSION=7.6.x /opt/bin/cylc remote-tidy '$HOME/cylc-run/suitex' - \end{lstlisting} - -\subsection{Other Use of SSH in Cylc} - -\begin{itemize} - \item see if a suite is running on another host with a shared - filesystem - see \lstinline=detect_old_contact_file()= in - \lstinline=lib/cylc/suite_srv_files_mgr.py= - \item cat content of a remote service file over SSH, if possible, for - clients on that do not have suite credentials installed - see - \lstinline=_load_remote_item()= in \lstinline=suite_srv_files_mgr.py= -\end{itemize} diff --git a/doc/src/cylc-user-guide/scripts/get-deps.sh b/doc/src/cylc-user-guide/scripts/get-deps.sh deleted file mode 100755 index 1f9eec1480e..00000000000 --- a/doc/src/cylc-user-guide/scripts/get-deps.sh +++ /dev/null @@ -1,36 +0,0 @@ -#!/bin/bash - -PDFLATEX=$( which pdflatex 2> /dev/null ) -HTLATEX=$( which htlatex 2> /dev/null ) -CONVERT=$( which convert 2> /dev/null ) - -WARNED=false - -if [[ -z $PDFLATEX ]]; then - echo "*** WARNING: to generate PDF Cylc documentation install LaTeX pdflatex ***" >&2 - WARNED=true -else - DEPS="pdf" -fi - -if [[ -z $HTLATEX ]]; then - echo - echo "*** WARNING: to generate HTML Cylc documentation install LaTeX tex4ht ***" >&2 - WARNED=true -fi - -if [[ -z $CONVERT ]]; then - echo "*** WARNING: to generate HTML Cylc documentation install ImageMagick convert ***" >&2 - WARNED=true -fi - -if [[ -n $CONVERT && -n $HTLATEX ]]; then - DEPS="$DEPS html" -fi - -if $WARNED; then - # pause to ensure warnings are noticed. - sleep 2 -fi - -echo $DEPS diff --git a/doc/src/cylc-user-guide/scripts/make-commands.sh b/doc/src/cylc-user-guide/scripts/make-commands.sh deleted file mode 100755 index f14bee3aa50..00000000000 --- a/doc/src/cylc-user-guide/scripts/make-commands.sh +++ /dev/null @@ -1,55 +0,0 @@ -#!/bin/bash - -# THIS FILE IS PART OF THE CYLC SUITE ENGINE. -# Copyright (C) 2008-2018 NIWA & British Crown (Met Office) & Contributors. -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . - -# Create cylc-version.txt and commands.tex for inclusion in LaTeX doc. - -CYLC=$(dirname $0)/../../../../bin/cylc - -$CYLC --version > cylc-version.txt - -cat > commands.tex <> commands.tex <> commands.tex <> commands.tex <. - -# Make HTML Cylc User Guide (called from Makefile). - -set -e - -TYPE=$1 # "multi" or "single" - -DEST=html/$TYPE -rm -rf $DEST; mkdir -p $DEST - -cp -r *.tex cug-html.cfg cylc-version.txt titlepic.sty $DEST - -cd $DEST -ls *.tex | xargs -n 1 perl -pi -e 's@graphics/png/orig@../../graphics/png/scaled@g' -ls *.tex | xargs -n 1 perl -pi -e 's@\.\./etc/@../../../etc/@g' -perl -pi -e 's@categories/@../../categories/@g' commands.tex -perl -pi -e 's@commands/@../../commands/@g' commands.tex -perl -pi -e 's@cylc.txt@../../cylc.txt@g' commands.tex -perl -pi -e 's@\.\./README@../../../README@g' cug.tex -perl -pi -e 's@\.\./INSTALL@../../../INSTALL@g' cug.tex -perl -pi -e 's@job-host-2.tex@job-host-2-html.tex@g' cug.tex - -# NOTE the 5th argument '-halt-on-error' is passed to the latex -# compiler, but htlatex does not return error status if latex aborts. -# This is ok for cylc test purposes as we run pdflatex before htlatex. -if [[ $TYPE == multi ]]; then - htlatex cug-html.tex "cug-html.cfg,html,fn-in,2,next" "" "" "-halt-on-error" -else - htlatex cug-html.tex "cug-html.cfg,html,1,fn-in" "" "" "-halt-on-error" -fi diff --git a/doc/src/cylc-user-guide/scripts/make-pdf.sh b/doc/src/cylc-user-guide/scripts/make-pdf.sh deleted file mode 100755 index 4a8ad1d35f4..00000000000 --- a/doc/src/cylc-user-guide/scripts/make-pdf.sh +++ /dev/null @@ -1,40 +0,0 @@ -#!/bin/bash - -# THIS FILE IS PART OF THE CYLC SUITE ENGINE. -# Copyright (C) 2008-2018 NIWA & British Crown (Met Office) & Contributors. -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . - -# Make PDF Cylc User Guide (called from Makefile). - -set -e - -DEST=pdf -rm -rf $DEST; mkdir -p $DEST - -cp -r *.tex cylc-version.txt titlepic.sty $DEST - -cd $DEST -ls *.tex | xargs -n 1 perl -pi -e 's@graphics/png/orig@../graphics/png/orig@g' -ls *.tex | xargs -n 1 perl -pi -e 's@\.\./etc/@../../etc/@g' -perl -pi -e 's@categories/@../categories/@g' commands.tex -perl -pi -e 's@commands/@../commands/@g' commands.tex -perl -pi -e 's@cylc.txt@../cylc.txt@g' commands.tex -perl -pi -e 's@\.\./README@../../README@g' cug.tex -perl -pi -e 's@\.\./INSTALL@../../INSTALL@g' cug.tex - -# run pdflatex three times to resolve all cross-references -pdflatex -halt-on-error cug-pdf.tex -pdflatex -halt-on-error cug-pdf.tex -pdflatex -halt-on-error cug-pdf.tex diff --git a/doc/src/cylc-user-guide/siterc.tex b/doc/src/cylc-user-guide/siterc.tex deleted file mode 100644 index d7c8f943f46..00000000000 --- a/doc/src/cylc-user-guide/siterc.tex +++ /dev/null @@ -1,1116 +0,0 @@ -\section{Global (Site, User) Config File Reference} -\label{SiteRCReference} - -\lstset{language=transcript} - -This section defines all legal items and values for cylc site and -user config files. See {\em Site And User Config Files} -(Section~\ref{SiteAndUserConfiguration}) for file locations, intended -usage, and how to generate the files using the -\lstinline=cylc get-site-config= command. - -{\em As for suite configurations, Jinja2 expressions can be embedded in -site and user config files to generate the final result parsed by cylc.} -Use of Jinja2 in suite configurations is documented in -Section~\ref{Jinja2}. - -\subsection{Top Level Items} - -\subsubsection{temporary directory} - -A temporary directory is needed by a few cylc commands, and is cleaned -automatically on exit. Leave unset for the default (usually -\lstinline=$TMPDIR=). - -\begin{myitemize} -\item {\em type:} string (directory path) -\item {\em default:} (none) -\item {\em example:} \lstinline@temporary directory = /tmp/$USER/cylc@ -\end{myitemize} - -\subsubsection{process pool size} -\label{process pool size} - -Maximum number of concurrent processes used to execute external job submission, -event handlers, and job poll and kill commands - see~\ref{Managing External -Command Execution}. - -\begin{myitemize} -\item {\em type:} integer -\item {\em default:} 4 -\end{myitemize} - -\subsubsection{process pool timeout} -\label{process pool timeout} - -Interval after which long-running commands in the process pool will be killed - -see~\ref{Managing External Command Execution}. - -\begin{myitemize} -\item {\em type:} ISO 8601 duration/interval representation (e.g.\ -\lstinline=PT10S=, 10 seconds, or \lstinline=PT1M=, 1 minute). -\item {\em default:} PT10M - note this is set quite high to avoid killing - important processes when the system is under load. -\end{myitemize} - -\subsubsection{disable interactive command prompts} - -Commands that intervene in running suites can be made to ask for -confirmation before acting. Some find this annoying and ineffective as a -safety measure, however, so command prompts are disabled by default. - -\begin{myitemize} -\item {\em type:} boolean -\item {\em default:} True -\end{myitemize} - -\subsubsection{enable run directory housekeeping} - -The suite run directory tree is created anew with every suite start -(not restart) but output from the most recent previous runs can be -retained in a rolling archive. Set length to 0 to keep no backups. -{\bf This is incompatible with current Rose suite housekeeping} (see -Section~\ref{SuiteStorageEtc} for more on Rose) so it is disabled by -default, in which case new suite run files will overwrite existing ones -in the same run directory tree. Rarely, this can result in incorrect -polling results due to the presence of old task status files. - -\begin{myitemize} -\item {\em type:} boolean -\item {\em default:} False -\end{myitemize} - -\subsubsection{run directory rolling archive length} - -The number of old run directory trees to retain if run directory -housekeeping is enabled. -\begin{myitemize} -\item {\em type:} integer -\item {\em default:} 2 -\end{myitemize} - -\subsubsection{task host select command timeout} - -When a task host in a suite is a shell command string, cylc calls the shell to -determine the task host. This call is invoked by the main process, and may -cause the suite to hang while waiting for the command to finish. This setting -sets a timeout for such a command to ensure that the suite can continue. - -\begin{myitemize} -\item {\em type:} ISO 8601 duration/interval representation (e.g.\ -\lstinline=PT10S=, 10 seconds, or \lstinline=PT1M=, 1 minute). -\item {\em default: PT10S} -\end{myitemize} - -\subsection{[task messaging]} - -This section contains configuration items that affect task-to-suite -communications. - -\subsubsection[retry interval]{[task messaging] \textrightarrow retry interval} - -If a send fails, the messaging code will retry after a configured -delay interval. - -\begin{myitemize} -\item {\em type:} ISO 8601 duration/interval representation (e.g.\ -\lstinline=PT10S=, 10 seconds, or \lstinline=PT1M=, 1 minute). -\item {\em default:} PT5S -\end{myitemize} - -\subsubsection[maximum number of tries]{[task messaging] \textrightarrow maximum number of tries} - -If successive sends fail, the messaging code will give up after a -configured number of tries. - -\begin{myitemize} -\item {\em type:} integer -\item {\em minimum:} 1 -\item {\em default:} 7 -\end{myitemize} - -\subsubsection[connection timeout]{[task messaging] \textrightarrow connection timeout} - -This is the same as the \lstinline=--comms-timeout= option in cylc -commands. Without a timeout remote connections to unresponsive -suites can hang indefinitely (suites suspended with Ctrl-Z for instance). - -\begin{myitemize} -\item {\em type:} ISO 8601 duration/interval representation (e.g.\ -\lstinline=PT10S=, 10 seconds, or \lstinline=PT1M=, 1 minute). -\item {\em default:} PT30S -\end{myitemize} - -\subsection{[suite logging]} - -The suite event log, held under the suite run directory, is maintained -as a rolling archive. Logs are rolled over (backed up and started anew) -when they reach a configurable limit size. - -\subsubsection[rolling archive length]{[suite logging] \textrightarrow rolling archive length} - -How many rolled logs to retain in the archive. - -\begin{myitemize} -\item {\em type:} integer -\item {\em minimum:} 1 -\item {\em default:} 5 -\end{myitemize} - -\subsubsection[maximum size in bytes]{[suite logging] \textrightarrow maximum size in bytes} - -Suite event logs are rolled over when they reach this file size. - -\begin{myitemize} -\item {\em type:} integer -\item {\em default:} 1000000 -\end{myitemize} - -\subsection{[documentation]} - -Documentation locations for the \lstinline=cylc doc= command and gcylc -Help menus. - -\subsubsection[{[[}files{]]}]{[documentation] \textrightarrow [[files]]} - -File locations of documentation held locally on the cylc host server. - -\paragraph[html index]{[documentation] \textrightarrow [[files]] \textrightarrow html index } - -File location of the main cylc documentation index. -\begin{myitemize} -\item {\em type:} string -\item {\em default:} \lstinline=/doc/index.html= -\end{myitemize} - -\paragraph[pdf user guide]{[documentation] \textrightarrow [[files]] \textrightarrow pdf user guide } - -File location of the cylc User Guide, PDF version. -\begin{myitemize} -\item {\em type:} string -\item {\em default:} \lstinline=/doc/cug-pdf.pdf= -\end{myitemize} - -\paragraph[multi-page html user guide]{[documentation] \textrightarrow [[files]] \textrightarrow multi-page html user guide } - -File location of the cylc User Guide, multi-page HTML version. -\begin{myitemize} -\item {\em type:} string -\item {\em default:} \lstinline=/doc/html/multi/cug-html.html= -\end{myitemize} - -\paragraph[single-page html user guide]{[documentation] \textrightarrow [[files]] \textrightarrow single-page html user guide } - -File location of the cylc User Guide, single-page HTML version. -\begin{myitemize} -\item {\em type:} string -\item {\em default:} \lstinline=/doc/html/single/cug-html.html= -\end{myitemize} - -\subsubsection[{[[}urls{]]}]{[documentation] \textrightarrow [[urls]]} - -Online documentation URLs. - -\paragraph[internet homepage]{[documentation] \textrightarrow [[urls]] \textrightarrow internet homepage } - -URL of the cylc internet homepage, with links to documentation for the -latest official release. - -\begin{myitemize} -\item {\em type:} string -\item {\em default:} http://cylc.github.com/cylc/ -\end{myitemize} - -\paragraph[local index]{[documentation] \textrightarrow [[urls]] \textrightarrow local index} - -Local intranet URL of the main cylc documentation index. - -\begin{myitemize} -\item {\em type:} string -\item {\em default:} (none) -\end{myitemize} - -\subsection{[document viewers]} - -PDF and HTML viewers can be launched by cylc to view the documentation. - -\subsubsection[pdf]{[document viewers] \textrightarrow pdf} - -Your preferred PDF viewer program. - -\begin{myitemize} -\item {\em type:} string -\item {\em default:} evince -\end{myitemize} - -\subsubsection[html]{[document viewers] \textrightarrow html} - -Your preferred web browser. - -\begin{myitemize} -\item {\em type:} string -\item {\em default:} firefox -\end{myitemize} - -\subsection{[editors]} - -Choose your favourite text editor for editing suite configurations. - -\subsubsection[terminal]{[editors] \textrightarrow terminal} - -The editor to be invoked by the cylc command line interface. - -\begin{myitemize} -\item {\em type:} string -\item {\em default:} \lstinline=vim= -\item {\em examples:} - \begin{myitemize} - \item \lstinline@terminal = emacs -nw@ (emacs non-GUI) - \item \lstinline@terminal = emacs@ (emacs GUI) - \item \lstinline@terminal = gvim -f@ (vim GUI) - \end{myitemize} -\end{myitemize} - -\subsubsection[gui]{[editors] \textrightarrow gui} - -The editor to be invoked by the cylc GUI. - -\begin{myitemize} -\item {\em type:} string -\item {\em default:} \lstinline=gvim -f= -\item {\em examples:} - \begin{myitemize} - \item \lstinline@gui = emacs@ - \item \lstinline@gui = xterm -e vim@ - \end{myitemize} -\end{myitemize} - - -\subsection{[communication]} - -This section covers options for network communication between cylc -clients (suite-connecting commands and guis) servers (running suites). -Each suite listens on a dedicated network port, binding on the first -available starting at the configured base port. - -By default, the communication method is HTTPS secured with HTTP Digest -Authentication. If the system does not support SSL, you should configure -this section to use HTTP. Cylc will not automatically fall back to HTTP -if HTTPS is not available. - -\subsubsection[method]{[communication] \textrightarrow method } - -The choice of client-server communication method - currently only HTTPS -and HTTP are supported, although others could be developed and plugged in. -Cylc defaults to HTTPS if this setting is not explicitly configured. - -\begin{myitemize} -\item {\em type:} string -\item {\em options:} - \begin{myitemize} - \item {\bf https} - \item {\bf http} - \end{myitemize} -\item {\em default:} https -\end{myitemize} - -\subsubsection[base port]{[communication] \textrightarrow base port } - -The first port that Cylc is allowed to use. This item (and -\lstinline=maximum number of ports=) is deprecated; please use -\lstinline=run ports= under \lstinline=[suite servers]= instead. - -\begin{myitemize} -\item {\em type:} integer -\item {\em default:} \lstinline=43001= -\end{myitemize} - -\subsubsection[maximum number of ports]{[communication] \textrightarrow maximum number of ports} - -This setting (and \lstinline=base port=) is deprecated; please use -\lstinline=run ports= under \lstinline=[suite servers]= instead. - -\begin{myitemize} -\item {\em type:} integer -\item {\em default:} \lstinline=100= -\end{myitemize} - -\subsubsection[proxies on]{[communication] \textrightarrow proxies on} - -Enable or disable proxy servers for HTTPS - disabled by default. - -\begin{myitemize} -\item {\em type:} boolean -\item {\em localhost default:} False -\end{myitemize} - -\subsubsection[options]{[communication] \textrightarrow options} - -Option flags for the communication method. Currently only 'SHA1' is -supported for HTTPS, which alters HTTP Digest Auth to use the SHA1 hash -algorithm rather than the standard MD5. This is more secure but is also -less well supported by third party web clients including web browsers. -You may need to add the 'SHA1' option if you are running on platforms -where MD5 is discouraged (e.g.\ under FIPS). - -\begin{myitemize} -\item {\em type:} string\_list -\item {\em default:} \lstinline@[]@ -\item {\em options:} - \begin{myitemize} - \item {\bf SHA1} - \end{myitemize} -\end{myitemize} - -\subsection{[monitor]} - -Configurable settings for the command line \lstinline=cylc monitor= tool. - -\subsubsection[monitor]{[monitor] \textrightarrow sort order} - -The sort order for tasks in the monitor view. -\begin{myitemize} - \item {\em type:} string - \item {\em options:} - \begin{myitemize} - \item {\bf alphanumeric} - \item {\bf definition} - the order that tasks appear under - \lstinline=[runtime]= in the suite configuration. - \end{myitemize} - \item {\em default:} definition -\end{myitemize} - -\subsection{[hosts]} - -The [hosts] section configures some important host-specific settings for -the suite host (`localhost') and remote task hosts. Note that {\em -remote task behaviour is determined by the site/user config on the -suite host, not on the task host}. Suites can specify task hosts that -are not listed here, in which case local settings will be assumed, -with the local home directory path, if present, replaced by -\lstinline=$HOME= in items that configure directory locations. - -\subsubsection[{[[}HOST{]]}]{[hosts] \textrightarrow [[HOST]]} - -The default task host is the suite host, {\bf localhost}, with default -values as listed below. Use an explicit \lstinline=[hosts][[localhost]]= -section if you need to override the defaults. Localhost settings are -then also used as defaults for other hosts, with the local home -directory path replaced as described above. This applies to items -omitted from an explicit host section, and to hosts that are not listed -at all in the site and user config files. Explicit host sections are only -needed if the automatically modified local defaults are not sufficient. - -Host section headings can also be {\em regular expressions} to match -multiple hostnames. Note that the general regular expression wildcard -is `\lstinline=.*=' (zero or more of any character), not -`\lstinline=*='. -Hostname matching regular expressions are used as-is in the Python -\lstinline=re.match()= function. As such they match from the beginning -of the hostname string (as specified in the suite configuration) and they -do not have to match through to the end of the string (use the -string-end matching character `\lstinline=$=' in the expression to -force this). - -A hierarchy of host match expressions from specific to general can be -used because config items are processed in the order specified in the -file. - -\begin{myitemize} -\item {\em type:} string (hostname or regular expression) -\item {\em examples:} -\begin{myitemize} - \item \lstinline@server1.niwa.co.nz@ - explicit host name - \item \lstinline@server\d.niwa.co.nz@ - regular expression -\end{myitemize} -\end{myitemize} - -\paragraph[run directory]{[hosts] \textrightarrow [[HOST]] \textrightarrow run directory } - -The top level for suite logs and service files, etc. Can contain -\lstinline=$HOME= or \lstinline=$USER= but not other environment variables (the -item cannot actually be evaluated by the shell on HOST before use, but the -remote home directory is where \lstinline=rsync= and \lstinline=ssh= naturally -land, and the remote username is known by the suite server program). - -\begin{myitemize} -\item {\em type:} string (directory path) -\item {\em default:} \lstinline=$HOME/cylc-run= -\item {\em example:} \lstinline=/nfs/data/$USER/cylc-run= -\end{myitemize} - -\paragraph[work directory]{[hosts] \textrightarrow [[HOST]] \textrightarrow work directory } -\label{workdirectory} - -The top level for suite work and share directories. Can contain -\lstinline=$HOME= or \lstinline=$USER= but not other environment variables -(the item cannot actually be evaluated by the shell on HOST before use, but the -remote home directory is where \lstinline=rsync= and \lstinline=ssh= naturally -land, and the remote username is known by the suite server program). - -\begin{myitemize} -\item {\em type:} string (directory path) -\item {\em localhost default:} \lstinline=$HOME/cylc-run= -\item {\em example:} \lstinline=/nfs/data/$USER/cylc-run= -\end{myitemize} - - -\paragraph[task communication method]{[hosts] \textrightarrow [[HOST]] \textrightarrow task communication method } -\label{task_comms_method} - -The means by which task progress messages are reported back to the running suite. -See above for default polling intervals for the poll method. - -\begin{myitemize} -\item {\em type:} string (must be one of the following three options) -\item {\em options:} - \begin{myitemize} - \item {\bf default} - direct client-server communication via network ports - \item {\bf ssh} - use ssh to re-invoke the messaging commands on the suite server - \item {\bf poll} - the suite polls for the status of tasks (no task messaging) - \end{myitemize} -\item {\em localhost default:} default -\end{myitemize} - -\paragraph[execution polling intervals]{[hosts] \textrightarrow [[HOST]] \textrightarrow execution polling intervals} -\label{execution_polling} - -Cylc can poll running jobs to catch problems that prevent task messages -from being sent back to the suite, such as hard job kills, network -outages, or unplanned task host shutdown. Routine polling is done only -for the polling {\em task communication method} (below) unless -suite-specific polling is configured in the suite configuration. -A list of interval values can be specified, with the last value used -repeatedly until the task is finished - this allows more frequent -polling near the beginning and end of the anticipated task run time. -Multipliers can be used as shorthand as in the example below. - -\begin{myitemize} -\item {\em type:} ISO 8601 duration/interval representation (e.g.\ -\lstinline=PT10S=, 10 seconds, or \lstinline=PT1M=, 1 minute). -\item {\em default:} -\item {\em example:} \lstinline@execution polling intervals = 5*PT1M, 10*PT5M, 5*PT1M@ -\end{myitemize} - - -\paragraph[submission polling intervals]{[hosts] \textrightarrow [[HOST]] \textrightarrow submission polling intervals} -\label{submission_polling} - -Cylc can also poll submitted jobs to catch problems that prevent the -submitted job from executing at all, such as deletion from an external -batch scheduler queue. Routine polling is done only for the polling {\em -task communication method} (above) unless suite-specific polling -is configured in the suite configuration. A list of interval -values can be specified as for execution polling (above) but a single -value is probably sufficient for job submission polling. - -\begin{myitemize} -\item {\em type:} ISO 8601 duration/interval representation (e.g.\ -\lstinline=PT10S=, 10 seconds, or \lstinline=PT1M=, 1 minute). -\item {\em default:} -\item {\em example:} (see the execution polling example above) -\end{myitemize} - -\paragraph[scp command]{[hosts] \textrightarrow [[HOST]] \textrightarrow scp command } - -A string for the command used to copy files to a remote host. This is not used -on the suite host unless you run local tasks under another user account. The -value is assumed to be \lstinline=scp= with some initial options or a command -that implements a similar interface to \lstinline=scp=. - - -\begin{myitemize} -\item {\em type:} string -\item {\em localhost default:} \lstinline@scp -oBatchMode=yes -oConnectTimeout=10@ -\end{myitemize} - -\paragraph[ssh command]{[hosts] \textrightarrow [[HOST]] \textrightarrow ssh command } - -A string for the command used to invoke commands on this host. This is not -used on the suite host unless you run local tasks under another user account. -The value is assumed to be \lstinline=ssh= with some initial options or a -command that implements a similar interface to \lstinline=ssh=. - -\begin{myitemize} -\item {\em type:} string -\item {\em localhost default:} \lstinline@ssh -oBatchMode=yes -oConnectTimeout=10@ -\end{myitemize} - -\paragraph[use login shell]{[hosts] \textrightarrow [[HOST]] \textrightarrow use login shell } - -Whether to use a login shell or not for remote command invocation. By -default cylc runs remote ssh commands using a login shell: -\begin{lstlisting} - ssh user@host 'bash --login cylc ...' -\end{lstlisting} -which will source \lstinline=/etc/profile= and -\lstinline=~/.profile= to set up the user environment. However, for -security reasons some institutions do not allow unattended commands to -start login shells, so you can turn off this behaviour to get: -\begin{lstlisting} - ssh user@host 'cylc ...' -\end{lstlisting} -which will use the default shell on the remote machine, -sourcing \lstinline=~/.bashrc= (or \lstinline=~/.cshrc=) to set up the -environment. - -\begin{myitemize} -\item {\em type:} boolean -\item {\em localhost default:} True -\end{myitemize} - -\paragraph[cylc executable]{[hosts] \textrightarrow [[HOST]] \textrightarrow cylc executable } - -The \lstinline=cylc= executable on a remote host. Note this should normally -point to the cylc multi-version wrapper (see~\ref{CUI}) on the host, not -\lstinline=bin/cylc= for a specific installed version. -Specify a full path if \lstinline=cylc= is not in \lstinline=\$PATH= when it is -invoked via \lstinline=ssh= on this host. - -\begin{myitemize} -\item {\em type:} string -\item {\em localhost default:} \lstinline@cylc@ -\end{myitemize} - -\paragraph[global init-script]{[hosts] \textrightarrow [[HOST]] \textrightarrow global init-script } -\label{GlobalInitScript} - -If specified, the value of this setting will be inserted to just before the -\lstinline=init-script= section of all job scripts that are to be -submitted to the specified remote host. - -\begin{myitemize} -\item {\em type:} string -\item {\em localhost default:} \lstinline@""@ -\end{myitemize} - -\paragraph[copyable environment variables]{[hosts] \textrightarrow [[HOST]] \textrightarrow copyable environment variables } - -A list containing the names of the environment variables that can and/or need -to be copied from the suite server program to a job. - -\begin{myitemize} -\item {\em type:} string\_list -\item {\em localhost default:} \lstinline@[]@ -\end{myitemize} - -\paragraph[retrieve job logs]{[hosts] \textrightarrow [[HOST]] \textrightarrow retrieve job logs} - -Global default for the~\ref{runtime-remote-retrieve-job-logs} setting for the -specified host. - -\paragraph[retrieve job logs command]{[hosts] \textrightarrow [[HOST]] \textrightarrow retrieve job logs command} - -If \lstinline@rsync -a@ is unavailable or insufficient to retrieve job logs -from a remote host, you can use this setting to specify a suitable command. - -\begin{myitemize} -\item {\em type:} string -\item {\em default:} rsync -a -\end{myitemize} - -\paragraph[retrieve job logs max size]{[hosts] \textrightarrow [[HOST]] \textrightarrow retrieve job logs max size} - -Global default for the~\ref{runtime-remote-retrieve-job-logs-max-size} setting for the -specified host. - -\paragraph[retrieve job logs retry delays]{[hosts] \textrightarrow [[HOST]] \textrightarrow retrieve job logs retry delays} - -Global default for the~\ref{runtime-remote-retrieve-job-logs-retry-delays} -setting for the specified host. - -\paragraph[task event handler retry delays]{[hosts] \textrightarrow [[HOST]] \textrightarrow task event handler retry delays} - -Host specific default for the~\ref{runtime-events-handler-retry-delays} -setting. - -\paragraph[tail command template]{[hosts] \textrightarrow [[HOST]] \textrightarrow tail command template} -\label{tail-command-template} - -A command template (with \lstinline=%(filename)s= substitution) to tail-follow -job logs on HOST, by the GUI log viewer and \lstinline=cylc cat-log=. You are -unlikely to need to override this. - -\begin{myitemize} -\item {\em type:} string -\item {\em default:} \lstinline@tail -n +1 -F %(filename)s@ -\end{myitemize} - -\paragraph[{[[[}batch systems{]]]}]{[hosts] \textrightarrow [[HOST]] \textrightarrow [[[batch systems]]]} - -Settings for particular batch systems on HOST. In the subsections below, SYSTEM -should be replaced with the cylc batch system handler name that represents the -batch system (see~\ref{RuntimeJobSubMethods}). - -\subparagraph[{[[[[}SYSTEM{]]]]}err tailer]{[hosts] \textrightarrow [[HOST]] \textrightarrow [[[batch systems]]] \textrightarrow [[[[SYSTEM]]]] \textrightarrow err tailer} -\label{err-tailer} - -A command template (with \lstinline=%(job_id)s= substitution) that can be used -to tail-follow the stderr stream of a running job if SYSTEM does -not use the normal log file location while the job is running. This setting -overrides~\ref{tail-command-template} above. - -\begin{myitemize} -\item {\em type:} string -\item {\em default:} (none) -\item {\em example:} For PBS: - \begin{lstlisting} -[hosts] - [[ myhpc*]] - [[[batch systems]]] - [[[[pbs]]]] - err tailer = qcat -f -e %(job_id)s - out tailer = qcat -f -o %(job_id)s - err viewer = qcat -e %(job_id)s - out viewer = qcat -o %(job_id)s - \end{lstlisting} -\end{myitemize} - -\subparagraph[{[[[[}SYSTEM{]]]]}out tailer]{[hosts] \textrightarrow [[HOST]] \textrightarrow [[[batch systems]]] \textrightarrow [[[[SYSTEM]]]] \textrightarrow out tailer} -\label{out-tailer} - -A command template (with \lstinline=%(job_id)s= substitution) that can be used -to tail-follow the stdout stream of a running job if SYSTEM does -not use the normal log file location while the job is running. This setting -overrides~\ref{tail-command-template} above. - -\begin{myitemize} -\item {\em type:} string -\item {\em default:} (none) -\item {\em example:} see~\ref{err-tailer} -\end{myitemize} - -\subparagraph[{[[[[}SYSTEM{]]]]}err viewer]{[hosts] \textrightarrow [[HOST]] \textrightarrow [[[batch systems]]] \textrightarrow [[[[SYSTEM]]]] \textrightarrow err viewer} - -A command template (with \lstinline=%(job_id)s= substitution) that can be used -to view the stderr stream of a running job if SYSTEM does -not use the normal log file location while the job is running. - -\begin{myitemize} -\item {\em type:} string -\item {\em default:} (none) -\item {\em example:} see~\ref{err-tailer} -\end{myitemize} - -\subparagraph[{[[[[}SYSTEM{]]]]}out viewer]{[hosts] \textrightarrow [[HOST]] \textrightarrow [[[batch systems]]] \textrightarrow [[[[SYSTEM]]]] \textrightarrow out viewer} - -A command template (with \lstinline=%(job_id)s= substitution) that can be used -to view the stdout stream of a running job if SYSTEM does -not use the normal log file location while the job is running. - -\begin{myitemize} -\item {\em type:} string -\item {\em default:} (none) -\item {\em example:} see~\ref{err-tailer} -\end{myitemize} - -\subparagraph[{[[[[}SYSTEM{]]]]}job name length maximum]{[hosts] \textrightarrow [[HOST]] \textrightarrow [[[batch systems]]] \textrightarrow [[[[SYSTEM]]]] \textrightarrow job name length maximum} -\label{JobNameLengthMaximum} - -The maximum length for job name acceptable by a batch system on a given host. -Currently, this setting is only meaningful for PBS jobs. For example, PBS 12 -or older will fail a job submit if the job name has more than 15 characters, -which is the default setting. If you have PBS 13 or above, you may want to -modify this setting to a larger value. - -\begin{myitemize} -\item {\em type:} integer -\item {\em default:} (none) -\item {\em example:} For PBS: - \begin{lstlisting} -[hosts] - [[myhpc*]] - [[[batch systems]]] - [[[[pbs]]]] - # PBS 13 - job name length maximum = 236 - \end{lstlisting} -\end{myitemize} - -\subparagraph[{[[[[}SYSTEM{]]]]}execution time limit polling intervals]{[hosts] \textrightarrow [[HOST]] \textrightarrow [[[batch systems]]] \textrightarrow [[[[SYSTEM]]]] \textrightarrow execution time limit polling intervals} -\label{ExecutionTimeLimitPollingIntervals} - -The intervals between polling after a task job (submitted to the relevant batch -system on the relevant host) exceeds its execution time limit. The default -setting is PT1M, PT2M, PT7M. The accumulated times (in minutes) for these -intervals will be roughly 1, 1 + 2 = 3 and 1 + 2 + 7 = 10 after a task job -exceeds its execution time limit. - -\begin{myitemize} - \item {\em type:} Comma-separated list of ISO 8601 duration/interval - representations, optionally {\em preceded} by multipliers. - \item {\em default:} PT1M, PT2M, PT7M - \item {\em example:} - \begin{lstlisting} -[hosts] - [[myhpc*]] - [[[batch systems]]] - [[[[pbs]]]] - execution time limit polling intervals = 5*PT2M - \end{lstlisting} -\end{myitemize} - -\subsection{[suite servers] } - -\label{global-suite-servers} - -Configure allowed suite hosts and ports for starting up (running or -restarting) suites and enabling them to be detected whilst running via -utilities such as \lstinline=cylc gscan=. Additionally configure host -selection settings specifying how to determine the most suitable run host at -any given time from those configured. - -\subsubsection[run hosts]{[suite servers] \textrightarrow auto restart delay} - -Relates to Cylc's auto stop-restart mechanism (see~\ref{auto-stop-restart}). -When a host is set to automatically shutdown/restart it will first wait a -random period of time between zero and \lstinline=auto restart delay= -seconds before beginning the process. This is to prevent large numbers -of suites from restarting simultaneously. - -\begin{myitemize} -\item {\em type:} integer -\item {\em default:} \lstinline=0= -\end{myitemize} - -\subsubsection[run hosts]{[suite servers] \textrightarrow condemned hosts} - -Hosts specified in \lstinline=condemned hosts= will not be considered as suite -run hosts. If suites are already running on \lstinline=condemned hosts= they -will be automatically shutdown and restarted (see~\ref{auto-stop-restart}). - -\begin{myitemize} -\item {\em type:} comma-separated list of host names and/or IP addresses. -\item {\em default:} (none) -\end{myitemize} - -\subsubsection[run hosts]{[suite servers] \textrightarrow run hosts } - -A list of allowed suite run hosts. One of these hosts will be appointed for -a suite to start up on if an explicit host is not provided as an option to -a \lstinline=run= or \lstinline=restart= command. - -\begin{myitemize} -\item {\em type:} comma-separated list of host names and/or IP addresses. -\item {\em default:} \lstinline=localhost= -\end{myitemize} - -\subsubsection[scan hosts]{[suite servers] \textrightarrow scan hosts } - -A list of hosts to scan for running suites. - -\begin{myitemize} -\item {\em type:} comma-separated list of host names and/or IP addresses. -\item {\em default:} \lstinline=localhost= -\end{myitemize} - -\subsubsection[run ports]{[suite servers] \textrightarrow run ports } - -A list of allowed ports for Cylc to use to run suites. Note that only one -suite can run per port for a given host, so the length of this list -determines the maximum number of suites that can run at once per suite host. -This config item supersedes the deprecated settings \lstinline=base port= -and \lstinline=maximum number of ports=, where the base port is equivalent to -the first port, and the maximum number of ports to the length, of this list. - -\begin{myitemize} -\item {\em type:} string in the format \lstinline=X .. Y= for - \lstinline@X <= Y@ where \lstinline=X= and \lstinline=Y= are integers. -\item {\em default:} \lstinline=43001 .. 43100= (equivalent to the list -\lstinline=43001, 43002, ... , 43099, 43100=) -\end{myitemize} - -\subsubsection[scan ports]{[suite servers] \textrightarrow scan ports } - -A list of ports to scan for running suites on each host set in scan hosts. - -\begin{myitemize} -\item {\em type:} string in the format \lstinline=X .. Y= for - \lstinline@X <= Y@ where \lstinline=X= and \lstinline=Y= are integers. -\item {\em default:} \lstinline=43001 .. 43100= (equivalent to the list -\lstinline=43001, 43002, ... , 43099, 43100=) -\end{myitemize} - -\subsubsection[run host select]{[suite servers] \textrightarrow [[run host select]]} - -Configure thresholds for excluding insufficient hosts and a method for -ranking the remaining hosts to be applied in selection of the most suitable -\lstinline=run host=, from those configured, at start-up whenever a set host -is not specified on the command line via the \lstinline@--host=@ option. - -\paragraph[rank]{[suite servers] \textrightarrow [[run host select]] \textrightarrow rank} - -The method to use to rank the \lstinline=run host= list in order of -suitability. - -\begin{myitemize} -\item {\em type:} string (which must be one of the options outlined below) -\item {\em default:} \lstinline=random= -\item {\em options:} - \begin{myitemize} - \item {\bf random} - shuffle the hosts to select a host at random - \item {\bf load:1} - rank and select for the lowest load average over 1 minute (as given by the \lstinline=uptime= command) - \item {\bf load:5} - as for \lstinline=load:1= above, but over 5 minutes - \item {\bf load:15} - as for \lstinline=load:1= above, but over 15 minutes - \item {\bf memory} - rank and select for the highest usable memory i.e. - free memory plus memory in the buffer cache ('buffers') and in the - page cache ('cache'), as specified under \lstinline=/proc/meminfo= - \item {\bf disk-space:PATH} - rank and select for the highest free disk - space for a given mount directory path \lstinline=PATH= as given by - the \lstinline=df= command, where multiple paths may be specified - individually i.e. via \lstinline=disk-space:PATH_1= and - \lstinline=disk-space:PATH_2=, etc. - \end{myitemize} -\item {\em default:} (none) -\end{myitemize} - -\paragraph[thresholds]{[suite servers] \textrightarrow [[run host select]] \textrightarrow thresholds} - -A list of thresholds i.e. cutoff values which run hosts must meet in order -to be considered as a possible run host. Each threshold is a minimum or a -maximum requirement depending on the context of the measure; usable -memory (\lstinline=memory=) and free disk space -(\lstinline=disk-space:PATH=) threshold values set a {\em minimum} value, -which must be exceeded, whereas load average (\lstinline=load:1=, -\lstinline=load:5= and \lstinline=load:15=) threshold values set a -{\em maximum}, which must not be. Failure to meet a threshold results in -exclusion from the list of hosts that undergo ranking to -determine the best host which becomes the run host. - -\begin{myitemize} -\item {\em type:} string in format -\lstinline=MEASURE_1 CUTOFF_1; ... ;MEASURE_n CUTOFF_n= (etc), -where each \lstinline=MEASURE_N= is one of the options below (note -these correspond to all the rank methods accepted under the rank setting -except for \lstinline=random= which does not make sense as a threshold -measure). Spaces delimit corresponding measures and their values, while -semi-colons (optionally with subsequent spaces) delimit each measure-value -pair. -\item {\em options:} - \begin{myitemize} - \item {\bf load:1} - load average over 1 minute (as given by -the \lstinline=uptime= command) - \item {\bf load:5} - as for \lstinline=load:1= above, but over 5 minutes - \item {\bf load:15} - as for \lstinline=load:1= above, but over 15 minutes - \item {\bf memory} - usable memory i.e. free memory plus memory in the - buffer cache ('buffers') and in the page cache ('cache'), in KB, as - specified under \lstinline=/proc/meminfo= - \item {\bf disk-space:PATH} - free disk space for a given mount -directory path \lstinline=PATH=, in KB, as given by the \lstinline=df= -command, where multiple paths may be specified individually i.e. via -\lstinline=disk-space:PATH_1= and \lstinline=disk-space:PATH_2=, etc. - \end{myitemize} -\item {\em default:} (none) -\item {\em examples:} - \begin{myitemize} - \item \lstinline@thresholds = memory 2000@ (set a minimum of 2000 KB in usable memory for possible run hosts) - \item \lstinline@thresholds = load:5 0.5; load:15 1.0; disk-space:/ 5000@ (set a maximum of 0.5 and 1.0 for load averages over 5 -and 15 minutes respectively and a minimum of 5000 KB of free disk-space on -the \lstinline=/= mount directory. If any of these thresholds are not met -by a host, it will be excluded for running a suite on.) - \end{myitemize} -\end{myitemize} - -\subsection{[suite host self-identification] } - -The suite host's identity must be determined locally by cylc and passed -to running tasks (via \lstinline@$CYLC_SUITE_HOST@) so that task messages -can target the right suite on the right host. - -%(TO DO: is it conceivable that different remote task hosts at the same -%site might see the suite host differently? If so we would need to be -%able to override the target in suite configurations.) - -\subsubsection[method]{[suite host self-identification] \textrightarrow method } - -This item determines how cylc finds the identity of the suite host. For -the default {\em name} method cylc asks the suite host for its host -name. This should resolve on remote task hosts to the IP address of the -suite host; if it doesn't, adjust network settings or use one of the -other methods. For the {\em address} method, cylc attempts to use a -special external ``target address'' to determine the IP address of the -suite host as seen by remote task hosts (in-source documentation in -\lstinline=/lib/cylc/hostuserutil.py= explains how this works). -And finally, as a last resort, you can choose the {\em hardwired} method -and manually specify the host name or IP address of the suite host. - -\begin{myitemize} -\item {\em type:} string -\item {\em options:} -\begin{myitemize} - \item name - self-identified host name - \item address - automatically determined IP address (requires {\em target}, below) - \item hardwired - manually specified host name or IP address (requires {\em host}, below) -\end{myitemize} -\item {\em default:} name -\end{myitemize} - -\subsubsection[target]{[suite host self-identification] \textrightarrow target } - -This item is required for the {\em address} self-identification method. -If your suite host sees the internet, a common address such as -\lstinline@google.com@ will do; otherwise choose a host visible on your -intranet. -\begin{myitemize} -\item {\em type:} string (an inter- or intranet URL visible from the suite host) -\item {\em default:} \lstinline@google.com@ -\end{myitemize} - - -\subsubsection[host]{[suite host self-identification] \textrightarrow host } - -Use this item to explicitly set the name or IP address of the suite host -if you have to use the {\em hardwired} self-identification method. -\begin{myitemize} -\item {\em type:} string (host name or IP address) -\item {\em default:} (none) -\end{myitemize} - -\subsection{[task events]} - -Global site/user defaults for~\ref{TaskEventHandling}. - -\subsection{[test battery]} - -Settings for the automated development tests. Note the test battery reads -\lstinline=/etc/global-tests.rc= instead of the normal site/user -global config files. - -\subsubsection[remote host with shared fs]{[test battery] \textrightarrow remote host with shared fs} - -The name of a remote host that sees the same HOME file system as the host running the -test battery. - -\subsubsection[remote host]{[test battery] \textrightarrow remote host} - -Host name of a remote account that does not see the same home directory as -the account running the test battery - see also ``remote owner'' below). - -\subsubsection[remote owner]{[test battery] \textrightarrow remote owner} - -User name of a remote account that does not see the same home directory as the -account running the test battery - see also ``remote host'' above). - -\subsubsection[{[[}batch systems{]]}]{[test battery] \textrightarrow [[batch systems]]} - -Settings for testing supported batch systems (job submission methods). The -tests for a batch system are only performed if the batch system is available on -the test host or a remote host accessible via SSH from the test host. - -\paragraph[{[[[}SYSTEM{]]]}]{[test battery] \textrightarrow [[batch systems]] \textrightarrow [[[SYSTEM]]]} - -SYSTEM is the name of a supported batch system with automated tests. -This can currently be "loadleveler", "lsf", "pbs", "sge" and/or "slurm". - -\subparagraph[host]{[test battery] \textrightarrow [[batch systems]] \textrightarrow [[[SYSTEM]]] \textrightarrow host} - -The name of a host where commands for this batch system is available. Use -"localhost" if the batch system is available on the host running the test -battery. Any specified remote host should be accessible via SSH from the host -running the test battery. - -\subparagraph[err viewer]{[test battery] \textrightarrow [[batch systems]] \textrightarrow [[[SYSTEM]]] \textrightarrow err viewer} - -The command template (with \lstinline=\%(job_id)s= substitution) for testing -the run time stderr viewer functionality for this batch system. - -\subparagraph[out viewer]{[test battery] \textrightarrow [[batch systems]] \textrightarrow [[[SYSTEM]]] \textrightarrow out viewer} - -The command template (with \lstinline=\%(job_id)s= substitution) for testing -the run time stdout viewer functionality for this batch system. - -\subparagraph[{[[[[}directives{]]]]}]{[test battery] \textrightarrow [[batch systems]] \textrightarrow [[[SYSTEM]]] \textrightarrow [[[[directives]]]]} - -The minimum set of directives that must be supplied to the batch system on the -site to initiate jobs for the tests. - -\subsection{[cylc]} - -Default values for entries in the suite.rc [cylc] section. - -\subsubsection[UTC mode]{[cylc] \textrightarrow UTC mode} -\label{SiteUTCMode} - -Allows you to set a default value for UTC mode in a suite at the site level. -See ~\ref{UTC-mode} for details. - -\subsubsection[health check interval]{[cylc] \textrightarrow health check interval} - -Site default suite health check interval. -See ~\ref{health-check-interval} for details. - -\subsubsection[task event mail interval]{[cylc] \textrightarrow task event mail interval} - -Site default task event mail interval. -See ~\ref{task-event-mail-interval} for details. - -\subsubsection[{[}events{]}]{[cylc] \textrightarrow [[events]]} -\label{SiteCylcHooks} - -You can define site defaults for each of the following options, details -of which can be found under ~\ref{SuiteEventHandling}: - -\paragraph[handlers]{[cylc] \textrightarrow [[events]] \textrightarrow handlers} - -\paragraph[handler events]{[cylc] \textrightarrow [[events]] \textrightarrow handler events} - -\paragraph[startup handler]{[cylc] \textrightarrow [[events]] \textrightarrow startup handler} - -\paragraph[shutdown handler]{[cylc] \textrightarrow [[events]] \textrightarrow shutdown handler} - -\paragraph[mail events]{[cylc] \textrightarrow [[events]] \textrightarrow mail events} - -\paragraph[mail footer]{[cylc] \textrightarrow [[events]] \textrightarrow mail footer} - -\paragraph[mail from]{[cylc] \textrightarrow [[events]] \textrightarrow mail from} - -\paragraph[mail smtp]{[cylc] \textrightarrow [[events]] \textrightarrow mail smtp} - -\paragraph[mail to]{[cylc] \textrightarrow [[events]] \textrightarrow mail to} - -\paragraph[timeout handler]{[cylc] \textrightarrow [[events]] \textrightarrow timeout handler} - -\paragraph[timeout]{[cylc] \textrightarrow [[events]] \textrightarrow timeout} - -\paragraph[abort on timeout]{[cylc] \textrightarrow [[events]] \textrightarrow abort on timeout} - -\paragraph[stalled handler]{[cylc] \textrightarrow [[events]] \textrightarrow stalled handler} - -\paragraph[abort on stalled]{[cylc] \textrightarrow [[events]] \textrightarrow abort on stalled} - -\paragraph[inactivity handler]{[cylc] \textrightarrow [[events]] \textrightarrow inactivity handler} - -\paragraph[inactivity]{[cylc] \textrightarrow [[events]] \textrightarrow inactivity} - -\paragraph[abort on inactivity]{[cylc] \textrightarrow [[events]] \textrightarrow abort on inactivity} - -\subsection{[authentication]} -\label{GlobalAuth} - -Authentication of client programs with suite server programs can be configured -here, and overridden in suites if necessary (see~\ref{SuiteAuth}). - -The suite-specific passphrase must be installed on a user's account to -authorize full control privileges (see~\ref{tutPassphrases} -and~\ref{ConnectionAuthentication}). In the future we plan to move to a more -traditional user account model so that each authorized user can have their own -password. - -\subsubsection[public]{[authentication] \textrightarrow public} - -This sets the client privilege level for public access - i.e.\ no suite passphrase -required. - -\begin{myitemize} -\item {\em type:} string (must be one of the following options) -\item {\em options:} - \begin{myitemize} - \item {\em identity} - only suite and owner names revealed - \item {\em description} - identity plus suite title and description - \item {\em state-totals} - identity, description, and task state totals - \item {\em full-read} - full read-only access for monitor and GUI - \item {\em shutdown} - full read access plus shutdown, but no other - control. - \end{myitemize} -\item {\em default:} state-totals -\end{myitemize} diff --git a/doc/src/cylc-user-guide/suiterc.tex b/doc/src/cylc-user-guide/suiterc.tex deleted file mode 100644 index 087c4ffea3f..00000000000 --- a/doc/src/cylc-user-guide/suiterc.tex +++ /dev/null @@ -1,2395 +0,0 @@ -\section{Suite.rc Reference} -\label{SuiteRCReference} - -\lstset{language=bash} - -This appendix defines all legal suite configuration items. -Embedded Jinja2 code (see~\ref{Jinja2}) must process to a valid -raw suite.rc file. See also~\ref{SuiteRCFile} for a descriptive -overview of suite.rc files, including syntax (\ref{Syntax}). - -\subsection{Top Level Items} - -The only top level configuration items at present are the suite title -and description. - - -\subsection{[meta]} - -Section containing metadata items for this suite. Several items -(title, description, URL) are pre-defined and are used by the GUI. Others can be -user-defined and passed to suite event handlers to be interpreted according to your -needs. For example, the value of a ``suite-priority'' item could determine how an event -handler responds to failure events. - - -\subsubsection[title]{ [meta] \textrightarrow title} - -A single line description of the suite. It is displayed in the GUI ``Open Another Suite'' -window and can be retrieved at run time with the -\lstinline=cylc show= command. - -\begin{myitemize} -\item {\em type:} single line string -\item {\em default:} (none) -\end{myitemize} - -\subsubsection[description]{ [meta] \textrightarrow description} - -A multi-line description of the suite. It can be retrieved at run time with the -\lstinline=cylc show= command. - -\begin{myitemize} -\item {\em type:} multi-line string -\item {\em default:} (none) -\end{myitemize} - -\subsubsection[URL]{ [meta] \textrightarrow URL} -\label{SuiteURL} - -A web URL to suite documentation. If present it can be browsed with the -\lstinline=cylc doc= command, or from the gcylc Suite menu. The string -template \lstinline=%(suite_name)s= will be replaced with the actual suite -name. See also task URLs (\ref{TaskURL}). - -\begin{myitemize} -\item {\em type:} string (URL) -\item {\em default:} (none) -\item {\em example:} \lstinline=http://my-site.com/suites/%(suite_name)s/index.html= -\end{myitemize} - -\subsubsection{group}{ [meta] \textrightarrow group} - -A group name for a suite. In the gscan GUI, suites with the same group name can -be collapsed into a single state summary when the ``group'' column is displayed. - -\begin{myitemize} -\item {\em type:} single line string -\item {\em default:} (none) -\end{myitemize} - -\subsubsection[\_\_MANY\_\_]{ [meta] \textrightarrow \_\_MANY\_\_} - -Replace \_\_MANY\_\_ with any user-defined metadata item. These, like title, URL, etc. can be passed -to suite event handlers to be interpreted according to your needs. For example, ``suite-priority''. - -\begin{myitemize} -\item {\em type:} String or integer -\item {\em default:} (none) -\item {\em example:} - \begin{lstlisting} -[meta] - suite-priority = high - \end{lstlisting} -\end{myitemize} - - -\subsection{[cylc]} - -This section is for configuration that is not specifically task-related. - -\subsubsection[required run mode]{ [cylc] \textrightarrow required run mode} - -If this item is set cylc will abort if the suite is not started in the -specified mode. This can be used for demo suites that have to be -run in simulation mode, for example, because they have been taken out of -their normal operational context; or to prevent accidental submission of -expensive real tasks during suite development. -\begin{myitemize} - \item {\em type:} string - \item {\em legal values:} live, dummy, dummy-local, simulation - \item {\em default:} None -\end{myitemize} - -\subsubsection[UTC mode]{ [cylc] \textrightarrow UTC mode} -\label{UTC-mode} - -Cylc runs off the suite host's system clock by default. This item allows -you to run the suite in UTC even if the system clock is set to local time. -Clock-trigger tasks will trigger when the current UTC time is equal to -their cycle point date-time plus offset; other time values used, reported, or -logged by the suite server program will usually also be in UTC. The default for -this can be set at the site level (see ~\ref{SiteUTCMode}). - -\begin{myitemize} - \item {\em type:} boolean - \item {\em default:} False, unless overridden at site level. -\end{myitemize} - -\subsubsection[cycle point format]{ [cylc] \textrightarrow cycle point format} -\label{cycle-point-format} - -To just alter the timezone used in the date-time cycle point format, see -\ref{cycle-point-time-zone}. To just alter the number of expanded year digits -(for years below 0 or above 9999), see -\ref{cycle-point-num-expanded-year-digits}. - -Cylc usually uses a \lstinline=CCYYMMDDThhmmZ= (\lstinline=Z= in the special -case of UTC) or \lstinline=CCYYMMDDThhmm+hhmm= format (\lstinline=+= standing -for \lstinline=+= or \lstinline=-= here) for writing down date-time cycle -points, which follows one of the basic formats outlined in the ISO 8601 -standard. For example, a cycle point on the 3rd of February 2001 at 4:50 in -the morning, UTC (+0000 timezone), would be written -\lstinline=20010203T0450Z=. Similarly, for the 3rd of February 2001 at -4:50 in the morning, +1300 timezone, cylc would write -\lstinline=20010203T0450+1300=. - -You may use the isodatetime library's syntax to write dates and times in ISO -8601 formats - \lstinline=CC= for century, \lstinline=YY= for decade and -decadal year, \lstinline=+X= for expanded year digits and their positive or -negative sign, thereafter following the ISO 8601 standard example notation -except for fractional digits, which are represented as \lstinline=,ii= for -\lstinline=hh=, \lstinline=,nn= for \lstinline=mm=, etc. For example, to write -date-times as week dates with fractional hours, set cycle point format to -\lstinline=CCYYWwwDThh,iiZ= e.g.\ \lstinline=1987W041T08,5Z= for 08:30 UTC on -Monday on the fourth ISO week of 1987. - -You can also use a subset of the strptime/strftime POSIX standard - supported -tokens are \lstinline=%F=, \lstinline=%H=, \lstinline=%M=, \lstinline=%S=, -\lstinline=%Y=, \lstinline=%d=, \lstinline=%j=, \lstinline=%m=, -\lstinline=%s=, \lstinline=%z=. - -The ISO8601 extended date-time format can be used -(\lstinline=%Y-%m-%dT%H:%M=) but -note that the `-' and `:' characters end up in job log directory paths. - -The pre cylc-6 legacy 10-digit date-time format YYYYMMDDHH is not ISO8601 -compliant and can no longer be used as the cycle point format. For job -scripts that still require the old format, use the -\lstinline=cylc cyclepoint= utility to translate the ISO8601 cycle point -inside job scripts, e.g.: - -\lstset{language=suiterc} -\begin{lstlisting} -[runtime] - [[root]] - [[[environment]]] - CYCLE_TIME = $(cylc cyclepoint --template=%Y%m%d%H) -\end{lstlisting} - -\subsubsection[cycle point num expanded year digits]{ [cylc] \textrightarrow -cycle point num expanded year digits} -\label{cycle-point-num-expanded-year-digits} - -For years below 0 or above 9999, the ISO 8601 standard specifies that an -extra number of year digits and a sign should be used. This extra number needs -to be written down somewhere (here). - -For example, if this extra number is set to 2, 00Z on the 1st of January in -the year 10040 will be represented as \lstinline=+0100400101T0000Z= (2 extra -year digits used). With this number set to 3, 06Z on the 4th of May 1985 would -be written as \lstinline=+00019850504T0600Z=. - -This number defaults to 0 (no sign or extra digits used). - -\subsubsection[cycle point time zone]{ [cylc] \textrightarrow -cycle point time zone} -\label{cycle-point-time-zone} - -If you set UTC mode to True (\ref{UTC-mode}) then this will default to -\lstinline=Z=. If you use a custom cycle point format -(\ref{cycle-point-format}), you should specify the timezone choice (or null -timezone choice) here as well. - -You may set your own time zone choice here, which will be used for all -date-time cycle point dumping. Time zones should be expressed as ISO 8601 time -zone offsets from UTC, such as \lstinline=+13=, \lstinline=+1300=, -\lstinline=-0500= or \lstinline=+0645=, with \lstinline=Z= representing the -special \lstinline=+0000= case. Cycle points will be converted to the time -zone you give and will be represented with this string at the end. - -Cycle points that are input without time zones (e.g.\ as an initial cycle -point -setting) will use this time zone if set. If this isn't set (and UTC mode is -also not set), then they will default to the current local time zone. - -Note that the ISO standard also allows writing the hour and minute separated -by a ":" (e.g.\ \lstinline=+13:00=) - however, this is not recommended, given -that the time zone is used as part of task output filenames. - -\subsubsection[abort if any task fails]{[cylc] \textrightarrow abort if any task fails} - -Cylc does not normally abort if tasks fail, but if this item is turned -on it will abort with exit status 1 if any task fails. - -\begin{myitemize} - \item {\em type:} boolean - \item {\em default:} False -\end{myitemize} - -\subsubsection[health check interval]{[cylc] \textrightarrow health check interval} -\label{health-check-interval} - -Specify the time interval on which a running cylc suite will check that its run -directory exists and that its contact file contains the expected information. -If not, the suite will shut itself down automatically. - -\begin{myitemize} - \item {\em type:} ISO 8601 duration/interval representation (e.g.\ - \lstinline=PT5M=, 5 minutes (note: by contrast, \lstinline=P5M= means 5 - months, so remember the \lstinline=T=!)). - \item {\em default:} PT10M -\end{myitemize} - -\subsubsection[task event mail interval]{[cylc] \textrightarrow task event mail interval} -\label{task-event-mail-interval} - -Group together all the task event mail notifications into a single email within -a given interval. This is useful to prevent flooding users' mail boxes when -many task events occur within a short period of time. - -\begin{myitemize} - \item {\em type:} ISO 8601 duration/interval representation (e.g.\ \lstinline=PT10S=, 10 seconds, or \lstinline=PT1M=, 1 minute). - \item {\em default: PT5M} -\end{myitemize} - -\subsubsection[disable automatic shutdown]{[cylc] \textrightarrow disable automatic shutdown} - -This has the same effect as the \lstinline{--no-auto-shutdown} flag for -the suite run commands: it prevents the suite server program from shutting down -normally when all tasks have finished (a suite timeout can still be used to -stop the daemon after a period of inactivity, however). This option can -make it easier to re-trigger tasks manually near the end of a suite run, -during suite development and debugging. - -\begin{myitemize} - \item {\em type:} boolean - \item {\em default:} False -\end{myitemize} - -\subsubsection[log resolved dependencies]{[cylc] \textrightarrow log resolved dependencies} - -If this is turned on cylc will write the resolved dependencies of each -task to the suite log as it becomes ready to run (a list of the IDs of -the tasks that actually satisfied its prerequisites at run time). Mainly -used for cylc testing and development. - -\begin{myitemize} - \item {\em type:} boolean - \item {\em default:} False -\end{myitemize} - -\subsubsection[{[[}parameters{]]}]{[cylc] \textrightarrow [[parameters]]} - -Define parameter values here for use in expanding {\em parameterized tasks} - -see Section~\ref{Parameterized Tasks}. -\begin{myitemize} - \item {\em type:} list of strings, or an integer range - \lstinline=LOWER..UPPER..STEP= (two dots, inclusive bounds, STEP optional) - \item {\em default:} (none) - \item {\em examples:} - \begin{myitemize} - \item \lstinline@run = control, test1, test2@ - \item \lstinline@mem = 1..5@ (equivalent to \lstinline@1, 2, 3, 4, 5@). - \item \lstinline@mem = -11..-7..2@ (equivalent to \lstinline@-11, -9, -7@). - \end{myitemize} -\end{myitemize} - -\subsubsection[{[[}parameter templates{]]}]{[cylc] \textrightarrow [[parameter templates]]} -\label{RefParameterTemplates} - -Parameterized task names (see previous item, and Section~\ref{Parameterized -Tasks}) are expanded, for each parameter value, using string templates. You -can assign templates to parameter names here, to override the default templates. - -\begin{myitemize} - \item {\em type:} a Python-style string template - \item {\em default} for integer parameters \lstinline=p=: - \lstinline=_p%(p)0Nd= \\ - where N is the number of digits of the maximum integer value, - e.g.\ \lstinline=foo= becomes \lstinline=foo_run3= for - \lstinline@run@ value \lstinline@3@. - \item {\em default} for non-integer parameters \lstinline=p=: - \lstinline=_%(p)s= \\ - e.g.\ \lstinline=foo= becomes \lstinline=foo_top= for - \lstinline@run@ value \lstinline@top@. - \item {\em example:} \lstinline@run = -R%(run)s@ \\ - e.g.\ \lstinline=foo= becomes \lstinline=foo-R3= for - \lstinline@run@ value \lstinline@3@. -\end{myitemize} - -Note that the values of a parameter named \lstinline=p= are substituted for -\lstinline=%(p)s=. -In \lstinline=_run%(run)s= the first ``run'' is a string literal, and the second -gets substituted with each value of the parameter. - -\subsubsection[{[[}events{]]}]{[cylc] \textrightarrow [[events]]} -\label{SuiteEventHandling} - -Cylc has internal ``hooks'' to which you can attach handlers that are -called by the suite server program whenever certain events occur. This section -configures suite event hooks; see~\ref{TaskEventHandling} for -task event hooks. - -Event handler commands can send an email or an SMS, call a pager, intervene in -the operation of their own suite, or whatever. -They can be held in the suite bin directory, otherwise it is up to you -to ensure their location is in \lstinline=$PATH= (in the shell in which -cylc runs, on the suite host). The commands should require -very little resource to run and should return quickly. - -Each event handler can be specified as a list of command lines or command -line templates. - -A command line template may have any or all of these patterns which will be -substituted with actual values: -\begin{myitemize} - \item \%(event)s: event name (see below) - \item \%(suite)s: suite name - \item \%(suite\_url)s: suite URL - \item \%(suite\_uuid)s: suite UUID string - \item \%(message)s: event message, if any - \item any suite [meta] item, e.g.: - \begin{myitemize} - \item \%(title)s: suite title - \item \%(importance)s: example custom suite metadata - \end{myitemize} -\end{myitemize} - -Otherwise the command line will be called with the following default -arguments: -\begin{lstlisting} - %(event)s %(suite)s %(message)s -\end{lstlisting} - -{\em Note: substitution patterns should not be quoted in the template strings. -This is done automatically where required.} - -Additional information can be passed to event handlers via -[cylc] \textrightarrow [[environment]]. - -\paragraph[EVENT handler]{[cylc] \textrightarrow [[events]] \textrightarrow EVENT handler} - -A comma-separated list of one or more event handlers to call when one of the -following EVENTs occurs: -\begin{myitemize} - \item {\bf startup} - the suite has started running - \item {\bf shutdown} - the suite is shutting down - \item {\bf timeout} - the suite has timed out - \item {\bf stalled} - the suite has stalled - \item {\bf inactivity} - the suite is inactive -\end{myitemize} - -Default values for these can be set at the site level via the siterc file -(see ~\ref{SiteCylcHooks}). - -Item details: -\begin{myitemize} - \item {\em type:} string (event handler script name) - \item {\em default:} None, unless defined at the site level. - \item {\em example:} \lstinline@startup handler = my-handler.sh@ -\end{myitemize} - -\paragraph[handlers]{[cylc] \textrightarrow [[[events]]] \textrightarrow handlers} - -Specify the general event handlers as a list of command lines or command line -templates. - -\begin{myitemize} - \item {\em type:} Comma-separated list of strings (event handler command line or command line templates). - \item {\em default:} (none) - \item {\em example:} \lstinline@handlers = my-handler.sh@ -\end{myitemize} - -\paragraph[handler events]{[cylc] \textrightarrow [[events]] \textrightarrow handler events} - -Specify the events for which the general event handlers should be invoked. - -\begin{myitemize} - \item {\em type:} Comma-separated list of events - \item {\em default:} (none) - \item {\em example:} \lstinline@handler events = timeout, shutdown@ -\end{myitemize} - -\paragraph[mail events]{[cylc] \textrightarrow [[events]] \textrightarrow mail events} - -Specify the suite events for which notification emails should be sent. - -\begin{myitemize} - \item {\em type:} Comma-separated list of events - \item {\em default:} (none) - \item {\em example:} \lstinline@mail events = startup, shutdown, timeout@ -\end{myitemize} - -\paragraph[mail footer]{[cylc] \textrightarrow [[events]] \textrightarrow mail footer} - -Specify a string or string template to insert to footers of notification emails -for both suite events and task events. - -A template string may have any or all of these patterns which will be -substituted with actual values: -\begin{myitemize} - \item \%(host)s: suite host name - \item \%(port)s: suite port number - \item \%(owner)s: suite owner name - \item \%(suite)s: suite name -\end{myitemize} - -\begin{myitemize} - \item {\em type:} - \item {\em default:} (none) - \item {\em example:} \lstinline@mail footer = see: http://localhost/%(owner)s/notes-on/%(suite)s/@ -\end{myitemize} - -\paragraph[mail from]{[cylc] \textrightarrow [[events]] \textrightarrow mail from} - -Specify an alternate \lstinline=from:= email address for suite event notifications. - -\begin{myitemize} - \item {\em type:} string - \item {\em default:} None, (notifications@HOSTNAME) - \item {\em example:} \lstinline|mail from = no-reply@your-org| -\end{myitemize} - -\paragraph[mail smtp]{[cylc] \textrightarrow [[events]] \textrightarrow mail smtp} - -Specify the SMTP server for sending suite event email notifications. - -\begin{myitemize} - \item {\em type:} string - \item {\em default:} None, (localhost:25) - \item {\em example:} \lstinline@mail smtp = smtp.yourorg@ -\end{myitemize} - -\paragraph[mail to]{[cylc] \textrightarrow [[events]] \textrightarrow mail to} - -A list of email addresses to send suite event notifications. The list can be -anything accepted by the \lstinline=mail= command. - -\begin{myitemize} - \item {\em type:} string - \item {\em default:} None, (USER@HOSTNAME) - \item {\em example:} \lstinline@mail to = your.colleague@ -\end{myitemize} - -\paragraph[timeout]{[cylc] \textrightarrow [[events]] \textrightarrow timeout} - -If a timeout is set and the timeout event is handled, the timeout event -handler(s) will be called if the suite stays in a stalled state for some period -of time. The timer is set initially at suite start up. It is possible to set a -default for this at the site level (see ~\ref{SiteCylcHooks}). - -\begin{myitemize} - \item {\em type:} ISO 8601 duration/interval representation (e.g.\ - \lstinline=PT5S=, 5 seconds, \lstinline=PT1S=, 1 second) - minimum 0 seconds. - \item {\em default:} (none), unless set at the site level. -\end{myitemize} - -\paragraph[inactivity]{[cylc] \textrightarrow [[events]] \textrightarrow inactivity} - -If inactivity is set and the inactivity event is handled, the inactivity event -handler(s) will be called if there is no activity in the suite for some period -of time. The timer is set initially at suite start up. It is possible to set a -default for this at the site level (see ~\ref{SiteCylcHooks}). - -\begin{myitemize} - \item {\em type:} ISO 8601 duration/interval representation (e.g.\ - \lstinline=PT5S=, 5 seconds, \lstinline=PT1S=, 1 second) - minimum 0 seconds. - \item {\em default:} (none), unless set at the site level. -\end{myitemize} - -\paragraph[abort on stalled]{[cylc] \textrightarrow [[events]] \textrightarrow abort on stalled} - -If this is set to True it will cause the suite to abort with error status -if it stalls. A suite is considered "stalled" if there are no active, -queued or submitting tasks or tasks waiting for clock triggers to be met. It is -possible to set a default for this at the site level -(see ~\ref{SiteCylcHooks}). - -\begin{myitemize} - \item {\em type:} boolean - \item {\em default:} False, unless set at the site level. -\end{myitemize} - -\paragraph[abort on timeout]{[cylc] \textrightarrow [[events]] \textrightarrow abort on timeout} - -If a suite timer is set (above) this will cause the suite to abort with -error status if the suite times out while still running. It is possible to set -a default for this at the site level (see ~\ref{SiteCylcHooks}). - -\begin{myitemize} - \item {\em type:} boolean - \item {\em default:} False, unless set at the site level. -\end{myitemize} - -\paragraph[abort on inactivity]{[cylc] \textrightarrow [[events]] \textrightarrow abort on inactivity} - -If a suite inactivity timer is set (above) this will cause the suite to abort -with error status if the suite is inactive for some period while still running. -It is possible to set a default for this at the site level -(see ~\ref{SiteCylcHooks}). - -\begin{myitemize} - \item {\em type:} boolean - \item {\em default:} False, unless set at the site level. -\end{myitemize} - -\paragraph[abort if startup handler fails]{[cylc] \textrightarrow [[events]] \textrightarrow abort if EVENT handler fails} - -Cylc does not normally care whether an event handler succeeds or fails, -but if this is turned on the EVENT handler will be executed in the -foreground (which will block the suite while it is running) and the -suite will abort if the handler fails. - -\begin{myitemize} - \item {\em type:} boolean - \item {\em default:} False -\end{myitemize} - -\subsubsection[{[[}environment{]]} ]{[cylc] \textrightarrow [[environment]]} - -Environment variables defined in this section are passed to suite and -task event handlers. - -\begin{myitemize} - \item These variables are not passed to tasks - use task runtime - variables for that. Similarly, task runtime variables are not - available to event handlers - which are executed by the suite server - program, (not by running tasks) in response to task events. - - \item Cylc-defined environment variables such as - \lstinline=$CYLC_SUITE_RUN_DIR= are not passed to task event - handlers by default, but you can make them available by - extracting them to the cylc environment like this: -\begin{lstlisting} -[cylc] - [[environment]] - CYLC_SUITE_RUN_DIR = $CYLC_SUITE_RUN_DIR -\end{lstlisting} - - \item These variables - unlike task execution environment variables - which are written to job scripts and interpreted by the shell at - task run time - are not interpreted by the shell prior to use - so shell variable expansion expressions cannot be used here. -\end{myitemize} - -\paragraph[\_\_VARIABLE\_\_ ]{[cylc] \textrightarrow [[environment]] \textrightarrow \_\_VARIABLE\_\_} - -Replace \_\_VARIABLE\_\_ with any number of environment variable -assignment expressions. -Values may refer to other local environment variables (order of -definition is preserved) and are not evaluated or manipulated by -cylc, so any variable assignment expression that is legal in the -shell in which cylc is running can be used (but see the warning -above on variable expansions, which will not be evaluated). -White space around the `$=$' is allowed (as far as cylc's file -parser is concerned these are just suite configuration items). - -\begin{myitemize} - \item {\em type:} string - \item {\em default:} (none) - \item {\em examples:} - \begin{myitemize} - \item \lstinline@FOO = $HOME/foo@ - \end{myitemize} -\end{myitemize} - - -\subsubsection[{[[}reference test{]]}]{[cylc] \textrightarrow [[reference test]] } -\label{ReferenceTestConfig} - -Reference tests are finite-duration suite runs that abort with non-zero -exit status if cylc fails, if any task fails, if the suite times -out, or if a shutdown event handler that (by default) compares the test -run with a reference run reports failure. See~\ref{AutoRefTests}. - -\paragraph[suite shutdown event handler]{[cylc] \textrightarrow [[reference test]] \textrightarrow suite shutdown event handler} - -A shutdown event handler that should compare the test run with the -reference run, exiting with zero exit status only if the test run -verifies. - -\begin{myitemize} - \item {\em type:} string (event handler command name or path) - \item {\em default:} \lstinline=cylc hook check-triggering= -\end{myitemize} -As for any event handler, the full path can be ommitted if the script is -located somewhere in \lstinline=$PATH= or in the suite bin directory. - -\paragraph[required run mode]{[cylc] \textrightarrow [[reference test]] \textrightarrow required run mode} - -If your reference test is only valid for a particular run mode, this -setting will cause cylc to abort if a reference test is attempted -in another run mode. - -\begin{myitemize} - \item {\em type:} string - \item {\em legal values:} live, dummy, dummy-local, simulation - \item {\em default:} None -\end{myitemize} - -\paragraph[allow task failures]{[cylc] \textrightarrow [[reference test]] \textrightarrow allow task failures} - -A reference test run will abort immediately if any task fails, unless -this item is set, or a list of {\em expected task failures} is provided -(below). - -\begin{myitemize} - \item {\em type:} boolean - \item {\em default:} False -\end{myitemize} - -\paragraph[expected task failures]{[cylc] \textrightarrow [[reference test]] \textrightarrow expected task failures} - -A reference test run will abort immediately if any task fails, unless -{\em allow task failures} is set (above) or the failed task is found -in a list IDs of tasks that are expected to fail. - -\begin{myitemize} - \item {\em type:} Comma-separated list of strings (task IDs: \lstinline=name.cycle_point=). - \item {\em default:} (none) - \item {\em example:} \lstinline=foo.20120808, bar.20120908= -\end{myitemize} - -\paragraph[live mode suite timeout]{[cylc] \textrightarrow [[reference test]] \textrightarrow live mode suite timeout} - -The timeout value, expressed as an ISO 8601 duration/interval, after which the -test run should be aborted if it has not finished, in live mode. Test runs -cannot be done in live mode unless you define a value for this item, because -it is not possible to arrive at a sensible default for all suites. - -\begin{myitemize} - \item {\em type:} ISO 8601 duration/interval representation, e.g.\ - \lstinline=PT5M= is 5 minutes (note: by contrast \lstinline=P5M= means 5 - months, so remember the \lstinline=T=!). - \item {\em default:} PT1M (1 minute) -\end{myitemize} - -\paragraph[simulation mode suite timeout]{[cylc] \textrightarrow [[reference test]] \textrightarrow simulation mode suite timeout} - -The timeout value in minutes after which the test run should be aborted -if it has not finished, in simulation mode. Test runs cannot be done in -simulation mode unless you define a value for this item, because it is -not possible to arrive at a sensible default for all suites. - -\begin{myitemize} - \item {\em type:} ISO 8601 duration/interval representation (e.g.\ - \lstinline=PT5M=, 5 minutes (note: by contrast, \lstinline=P5M= means 5 - months, so remember the \lstinline=T=!)). - \item {\em default:} PT1M (1 minute) -\end{myitemize} - -\paragraph[dummy mode suite timeout]{[cylc] \textrightarrow [[reference test]] \textrightarrow dummy mode suite timeout} - -The timeout value, expressed as an ISO 8601 duration/interval, after which the -test run should be aborted if it has not finished, in dummy mode. Test runs -cannot be done in dummy mode unless you define a value for this item, because -it is not possible to arrive at a sensible default for all suites. - -\begin{myitemize} - \item {\em type:} ISO 8601 duration/interval representation (e.g.\ - \lstinline=PT5M=, 5 minutes (note: by contrast, \lstinline=P5M= means 5 - months, so remember the \lstinline=T=!)). - \item {\em default:} PT1M (1 minute) -\end{myitemize} - -\subsubsection[{[[}authentication{]]}]{[cylc] \textrightarrow [[authentication]] } -\label{SuiteAuth} - -Authentication of client programs with suite server programs can be set in the -global site/user config files and overridden here if necessary. -See~\ref{GlobalAuth} for more information. - -\paragraph[public]{[cylc] \textrightarrow [[authentication]] \textrightarrow public} - -The client privilege level granted for public access - i.e.\ no suite passphrase -required. See~\ref{GlobalAuth} for legal values. - -\subsubsection[{[[}simulation{]]} ]{[cylc] \textrightarrow [[simulation]]} - -Suite-level configuration for the {\em simulation} and {\em dummy} run modes -described in Section~\ref{SimulationMode}. - -\paragraph[disable suite event handlers]{[cylc] \textrightarrow [[simulation]] \textrightarrow disable suite event handlers} - -If this is set to \lstinline=True= configured suite event handlers will not be -called in simulation or dummy modes. - -\begin{myitemize} - \item {\em type:} boolean - \item {\em default:} \lstinline=True= -\end{myitemize} - -\subsection{[scheduling]} - -This section allows cylc to determine when tasks are ready to run. - -\subsubsection[cycling]{ [scheduling] \textrightarrow cycling mode } -\label{cycling-mode} - -Cylc runs using the proleptic Gregorian calendar by default. This item allows -you to either run the suite using the 360 day calendar (12 months of 30 days -in a year) or using integer cycling. It also supports use of the 365 (never a -leap year) and 366 (always a leap year) calendars. - -\begin{myitemize} - \item {\em type:} string - \item {\em legal values:} gregorian, 360day, 365day, 366day, integer - \item {\em default:} gregorian - -\end{myitemize} - -\subsubsection[initial cycle point]{[scheduling] \textrightarrow initial cycle point} -\label{initial cycle point} - -In a cold start each cycling task (unless specifically excluded under -[special tasks]) will be loaded into the suite with this cycle point, -or with the closest subsequent valid cycle point for the task. This item can -be overridden on the command line or in the gcylc suite start panel. - -In date-time cycling, if you do not provide time zone information for this, -it will be assumed to be local time, or in UTC if~\ref{UTC-mode} is set, or in -the time zone determined by \ref{cycle-point-time-zone} if that is set. - -\begin{myitemize} - \item {\em type:} ISO 8601 date-time point representation (e.g.\ - \lstinline=CCYYMMDDThhmm=, 19951231T0630) or ``now''. - \item {\em default:} (none) -\end{myitemize} - -The string ``now'' converts to the current date-time on the suite host (adjusted -to UTC if the suite is in UTC mode but the host is not) to minute resolution. -Minutes (or hours, etc.) may be ignored depending on your cycle point format - -(\ref{cycle-point-format}). - -\paragraph[initial cycle point relative to current time]{[scheduling] \textrightarrow [[initial cycle point]] \textrightarrow initial cycle point relative to current time} - -This can be used to set the initial cycle point time relative to the current time. - -Two additional commands, \lstinline=next= and \lstinline=previous=, can be used when setting the initial cycle point. - -The syntax uses truncated ISO8601 time representations, and is of the style: \lstinline=next(Thh:mmZ)=, \lstinline=previous(T-mm)=; e.g. -\begin{myitemize} - \item {\lstinline.initial cycle point = next(T15:00Z).} - \item {\lstinline.initial cycle point = previous(T09:00).} - \item {\lstinline.initial cycle point = next(T12).} - \item {\lstinline.initial cycle point = previous(T-20).} -\end{myitemize} -Examples of interpretation are given in figure \ref{relative initial cycle point time syntax interpretation}. - -A list of times, separated by semicolons, can be provided, e.g. \lstinline=next(T-00;T-15;T-30;T-45)=. -At least one time is required within the brackets, and if more than one is given, the major time unit in each (hours or minutes) should all be of the same type. - -If an offset from the specified date or time is required, this should be used in the form: \lstinline=previous(Thh:mm)= \begin{math} \pm \end{math}\lstinline=PxTy= -in the same way as is used for determining cycle periods, e.g. -\begin{myitemize} - \item {\lstinline.initial cycle point = previous(T06) +P1D.} - \item {\lstinline.initial cycle point = next(T-30) -PT1H.} -\end{myitemize} -The section in the bracket attached to the next/previous command is interpreted first, and then the offset is applied. - -The offset can also be used independently without a \lstinline=next= or \lstinline=previous= command, and will be interpreted as an offset from ``now''. - -\begin{figure}[H] -\centering -\caption{Examples of setting relative initial cycle point for times and offsets \\ -using now = 2018-03-14T15:12Z (and UTC mode)} -\begin{tabular}{ | l | l | } -\hline -Syntax & Interpretation \\ -\hline -\lstinline=next(T-00)= & 2018-03-14T16:00Z \\ -\lstinline=previous(T-00)= & 2018-03-14T15:00Z \\ -\lstinline=next(T-00; T-15; T-30; T-45)= & 2018-03-14T15:15Z \\ -\lstinline=previous(T-00; T-15; T-30; T-45)= & 2018-03-14T15:00Z \\ -\lstinline=next(T00)= & 2018-03-15T00:00Z \\ -\lstinline=previous(T00)= & 2018-03-14T00:00Z \\ -\lstinline=next(T06:30Z)= & 2018-03-15T06:30Z \\ -\lstinline=previous(T06:30) -P1D= & 2018-03-13T06:30Z \\ -\lstinline=next(T00; T06; T12; T18) = & 2018-03-14T18:00Z \\ -\lstinline=previous(T00; T06; T12; T18) = & 2018-03-14T12:00Z \\ -\lstinline=next(T00; T06; T12; T18) +P1W = & 2018-03-21T18:00Z \\ -\lstinline=PT1H= & 2018-03-14T16:12Z \\ -\lstinline=-P1M= & 2018-02-14T15:12Z \\ -\hline -\end{tabular} -\label{relative initial cycle point time syntax interpretation} -\end{figure} - -The relative initial cycle point also works with truncated dates, including weeks and ordinal date, using ISO8601 truncated date representations. -Note that day-of-week should always be specified when using weeks. If a time is not included, the calculation of the next or previous corresponding -point will be done from midnight of the current day. -Examples of interpretation are given in figure \ref{relative initial cycle point date syntax interpretation}. - -\begin{figure}[H] -\centering -\caption{Examples of setting relative initial cycle point for dates \\ -using now = 2018-03-14T15:12Z (and UTC mode)} -\begin{tabular}{ | l | l | } -\hline -Syntax & Interpretation \\ -\hline -\lstinline=next(-00)= & 2100-01-01T00:00Z \\ -\lstinline=previous(--01)= & 2018-01-01T00:00Z \\ -\lstinline=next(---01)= & 2018-04-01T00:00Z \\ -\lstinline=previous(--1225)= & 2017-12-25T00:00Z \\ -\lstinline=next(-2006)= & 2020-06-01T00:00Z \\ -\lstinline=previous(-W101)= & 2018-03-05T00:00Z \\ -\lstinline=next(-W-1; -W-3; -W-5)= & 2018-03-14T00:00Z \\ -\lstinline=next(-001; -091; -181; -271)= & 2018-04-01T00:00Z \\ -\lstinline=previous(-365T12Z)= & 2017-12-31T12:00Z \\ -\hline -\end{tabular} -\label{relative initial cycle point date syntax interpretation} -\end{figure} - - -\subsubsection[final cycle point]{[scheduling] \textrightarrow final cycle point} - -Cycling tasks are held once they pass the final cycle point, if one is -specified. Once all tasks have achieved this state the suite will shut -down. If this item is provided you can override it on the command line -or in the gcylc suite start panel. - -In date-time cycling, if you do not provide time zone information for this, -it will be assumed to be local time, or in UTC if \ref{UTC-mode} is set, or in -the \ref{cycle-point-time-zone} if that is set. - -\begin{myitemize} - \item {\em type:} ISO 8601 date-time point representation (e.g.\ - \lstinline=CCYYMMDDThhmm=, 19951231T1230) or ISO 8601 date-time offset - (e.g.\ +P1D+PT6H) - \item {\em default:} (none) -\end{myitemize} - -\subsubsection[initial cycle point constraints]{[scheduling] \textrightarrow initial cycle point constraints} -\label{initial cycle point constraints} - -In a cycling suite it is possible to restrict the initial cycle point by -defining a list of truncated time points under the initial cycle point -constraints. - -\begin{myitemize} - \item {\em type:} Comma-separated list of ISO 8601 truncated time point - representations (e.g.\ T00, T06, T-30). - \item {\em default:} (none) -\end{myitemize} - -\subsubsection[final cycle point constraints]{[scheduling] \textrightarrow final cycle point constraints} -\label{final cycle point constraints} - -In a cycling suite it is possible to restrict the final cycle point by -defining a list of truncated time points under the final cycle point -constraints. - -\begin{myitemize} - \item {\em type:} Comma-separated list of ISO 8601 truncated time point - representations (e.g.\ T00, T06, T-30). - \item {\em default:} (none) -\end{myitemize} - -\subsubsection[hold after point]{[scheduling] \textrightarrow hold after point} - -Cycling tasks are held once they pass the hold after cycle point, if one is -specified. Unlike the final cycle point suite will not shut down once all tasks -have passed this point. If this item is provided you can override it on the -command line or in the gcylc suite start panel. - -\subsubsection[runahead limit]{[scheduling] \textrightarrow runahead limit} -\label{runahead limit} - -Runahead limiting prevents the fastest tasks in a suite from getting too far -ahead of the slowest ones, as documented in~\ref{RunaheadLimit}. - -This config item specifies a hard limit as a cycle interval between the -slowest and fastest tasks. It is deprecated in favour of the newer default -limiting by \lstinline=max active cycle points= (\ref{max active cycle points}). - -\begin{myitemize} - \item {\em type:} Cycle interval string e.g.\ \lstinline=PT12H= - for a 12 hour limit under ISO 8601 cycling. - \item {\em default:} (none) -\end{myitemize} - -\subsubsection[max active cycle points]{[scheduling] \textrightarrow - max active cycle points} -\label{max active cycle points} - -Runahead limiting prevents the fastest tasks in a suite from getting too far -ahead of the slowest ones, as documented in~\ref{RunaheadLimit}. - -This config item supersedes the deprecated hard \lstinline=runahead limit= -(\ref{runahead limit}). It allows up to \lstinline=N= (default 3) consecutive -cycle points to be active at any time, adjusted up if necessary for -any future triggering. - -\begin{myitemize} - \item {\em type:} integer - \item {\em default:} 3 -\end{myitemize} - -\subsubsection[spawn to max active cycle points]{[scheduling] \textrightarrow - spawn to max active cycle points} -\label{spawn to max active cycle points} - -Allows tasks to spawn out to \lstinline=max active cycle points= -(\ref{max active cycle points}), removing restriction that a task has to have -submitted before its successor can be spawned. - -{\em Important:} This should be used with care given the potential impact of -additional task proxies both in terms of memory and cpu for the cylc daemon as -well as overheads in rendering all the additional tasks in gcylc. Also, use -of the setting may highlight any issues with suite design relying on the -default behaviour where downstream tasks would otherwise be waiting on ones -upstream submitting and the suite would have stalled e.g.\ a housekeeping task -at a later cycle deleting an earlier cycle's data before that cycle has had -chance to run where previously the task would not have been spawned until its -predecessor had been submitted. - -\begin{myitemize} - \item {\em type:} boolean - \item {\em default:} False -\end{myitemize} - -\subsubsection[{[[}queues{]]}]{[scheduling] \textrightarrow [[queues]]} - -Configuration of internal queues, by which the number of simultaneously -active tasks (submitted or running) can be limited, per queue. By -default a single queue called {\em default} is defined, with all tasks -assigned to it and no limit. To use a single queue for the whole suite -just set the limit on the {\em default} queue as required. -See also~\ref{InternalQueues}. - -\paragraph[{[[[}\_\_QUEUE\_\_{]]]}]{[scheduling] \textrightarrow [[queues]] \textrightarrow [[[\_\_QUEUE\_\_]]]} - -Section heading for configuration of a single queue. Replace -\_\_QUEUE\_\_ with a queue name, and repeat the section as required. - -\begin{myitemize} -\item {\em type:} string -\item {\em default:} ``default'' -\end{myitemize} - -\paragraph[limit]{[scheduling] \textrightarrow [[queues]] \textrightarrow [[[\_\_QUEUE\_\_]]] \textrightarrow limit} - -The maximum number of active tasks allowed at any one time, for this queue. -\begin{myitemize} -\item {\em type:} integer -\item {\em default:} 0 (i.e.\ no limit) -\end{myitemize} - -\paragraph[members]{[scheduling] \textrightarrow [[queues]] \textrightarrow [[[\_\_QUEUE\_\_]]] \textrightarrow members} - -A list of member tasks, or task family names, to assign to this queue -(assigned tasks will automatically be removed from the default queue). -\begin{myitemize} -\item {\em type:} Comma-separated list of strings (task or family names). -\item {\em default:} none for user-defined queues; all tasks for the ``default'' queue -\end{myitemize} - -\subsubsection[{[[}xtriggers{]]}]{[scheduling] \textrightarrow [[xtriggers]]} - -This section is for {\em External Trigger} function declarations - -see~\ref{External Triggers}. - -\paragraph[\_\_MANY\_\_]{[scheduling] \textrightarrow [[xtriggers]] \textrightarrow \_\_MANY\_\_} - -Replace \_\_MANY\_\_ with any user-defined event trigger function -declarations and corresponding labels for use in the graph: - -\begin{myitemize} -\item {\em type:} string: function signature followed by optional call interval -\item {\em example:} \lstinline@trig_1 = my_trigger(arg1, arg2, kwarg1, kwarg2):PT10S@ -\end{myitemize} - -(See~\ref{External Triggers} for details). - -\subsubsection[{[[}special tasks{]]}]{[scheduling] \textrightarrow [[special tasks]]} - -This section is used to identify tasks with special behaviour. Family names can -be used in special task lists as shorthand for listing all member tasks. - -\paragraph[clock-trigger]{[scheduling] \textrightarrow [[special tasks]] \textrightarrow clock-trigger} - -{\em NOTE: please read {\em External Triggers} (\ref{External Triggers}) before -using the older clock triggers described in this section.} - -Clock-trigger tasks (see~\ref{ClockTriggerTasks}) wait on a wall clock time -specified as an offset from their own cycle point. - -\begin{myitemize} - \item {\em type:} Comma-separated list of task or family names with - associated date-time offsets expressed as ISO8601 interval strings, - positive or negative, e.g.\ \lstinline=PT1H= for 1 hour. The offset - specification may be omitted to trigger right on the cycle point. - \item {\em default:} (none) - \item {\em example:} -\begin{lstlisting} - clock-trigger = foo(PT1H30M), bar(PT1.5H), baz -\end{lstlisting} -\end{myitemize} - -\paragraph[clock-expire]{[scheduling] \textrightarrow [[special tasks]] \textrightarrow clock-expire} -\label{ClockExpireRef} - -Clock-expire tasks enter the {\em expired} state and skip job submission if too -far behind the wall clock when they become ready to run. The expiry time is -specified as an offset from wall-clock time; typically it should be negative - -see~\ref{ClockExpireTasks}. - -\begin{myitemize} - \item {\em type:} Comma-separated list of task or family names with - associated date-time offsets expressed as ISO8601 interval strings, - positive or negative, e.g.\ \lstinline=PT1H= for 1 hour. The offset - may be omitted if it is zero. - \item {\em default:} (none) - \item {\em example:} -\begin{lstlisting} - clock-expire = foo(-P1D) -\end{lstlisting} -\end{myitemize} - -\paragraph[external-trigger]{[scheduling] \textrightarrow [[special tasks]] \textrightarrow external-trigger} - -{\em NOTE: please read {\em External Triggers} (\ref{External Triggers}) before -using the older mechanism described in this section.} - -Externally triggered tasks (see~\ref{Old-Style External Triggers}) wait on -external events reported via the \lstinline=cylc ext-trigger= command. To -constrain triggers to a specific cycle point, include -\lstinline=$CYLC_TASK_CYCLE_POINT= in the trigger message string and pass the -cycle point to the \lstinline=cylc ext-trigger= command. - -\begin{myitemize} - \item {\em type:} Comma-separated list of task names with associated - external trigger message strings. - \item {\em default:} (none) - \item {\em example:} (note the comma and line-continuation character) -\begin{lstlisting} - external-trigger = get-satx("new sat-X data ready"), \ - get-saty("new sat-Y data ready for $CYLC_TASK_CYCLE_POINT") -\end{lstlisting} -\end{myitemize} - - -\paragraph[sequential]{[scheduling] \textrightarrow [[special tasks]] \textrightarrow sequential} - -Sequential tasks automatically depend on their own previous-cycle instance. -This declaration is deprecated in favour of explicit inter-cycle triggers - -see~\ref{SequentialTasks}. - -\begin{myitemize} - \item {\em type:} Comma-separated list of task or family names. - \item {\em default:} (none) - \item {\em example:} \lstinline@sequential = foo, bar@ -\end{myitemize} - -\paragraph[exclude at start-up]{[scheduling] \textrightarrow [[special tasks]] \textrightarrow exclude at start-up} -\label{EASU} - -Any task listed here will be excluded from the initial task pool (this -goes for suite restarts too). If an {\em inclusion} list is also -specified, the initial pool will contain only included tasks that have -not been excluded. Excluded tasks can still be inserted at run time. -Other tasks may still depend on excluded tasks if they have not been -removed from the suite dependency graph, in which case some manual -triggering, or insertion of excluded tasks, may be required. - -\begin{myitemize} - \item {\em type:} Comma-separated list of task or family names. - \item {\em default:} (none) -\end{myitemize} - -\paragraph[include at start-up]{[scheduling] \textrightarrow [[special tasks]] \textrightarrow include at start-up} -\label{IASU} - -If this list is not empty, any task {\em not} listed in it will be -excluded from the initial task pool (this goes for suite restarts too). -If an {\em exclusion} list is also specified, the initial pool will -contain only included tasks that have not been excluded. Excluded tasks -can still be inserted at run time. Other tasks may still depend on -excluded tasks if they have not been removed from the suite dependency -graph, in which case some manual triggering, or insertion of excluded -tasks, may be required. - -\begin{myitemize} - \item {\em type:} Comma-separated list of task or family names. - \item {\em default:} (none) -\end{myitemize} - -\subsubsection[{[[}dependencies{]]}]{[scheduling] \textrightarrow [[dependencies]]} - -The suite dependency graph is defined under this section. You can plot -the dependency graph as you work on it, with \lstinline=cylc graph= or -by right clicking on the suite in the db viewer. See -also~\ref{ConfiguringScheduling}. - -\paragraph[graph]{ [scheduling] \textrightarrow [[dependencies]] \textrightarrow graph } - -The dependency graph for a completely non-cycling suites can go here. -See also~\ref{GraphDescrip} below and~\ref{ConfiguringScheduling}, for graph -string syntax. -\begin{myitemize} - \item {\em type:} string - \item {\em example:} (see~\ref{GraphDescrip} below) -\end{myitemize} - -\paragraph[{[[[}\_\_RECURRENCE\_\_{]]]}]{[scheduling] \textrightarrow [[dependencies]] \textrightarrow [[[\_\_RECURRENCE\_\_]]]} - -\_\_RECURRENCE\_\_ section headings define the sequence of cycle points for -which the subsequent graph section is valid. These should be specified in -our ISO 8601 derived sequence syntax, or similar for integer cycling: -\begin{myitemize} - \item {\em examples:} - \begin{myitemize} - \item date-time cycling: - \lstinline@[[[T00,T06,T12,T18]]]@ or \lstinline@[[[PT6H]]]@ - \item integer cycling (stepped by 2): - \lstinline@[[[P2]]]@ - \end{myitemize} - \item {\em default:} (none) -\end{myitemize} - -See~\ref{GraphTypes} for more on recurrence expressions, and how multiple graph -sections combine. - -\subparagraph[graph]{[scheduling] \textrightarrow [[dependencies]] \textrightarrow [[[\_\_RECURRENCE\_\_]]] \textrightarrow graph } -\label{GraphDescrip} - -The dependency graph for a given recurrence section goes here. Syntax examples -follow; see also~\ref{ConfiguringScheduling} and~\ref{TriggerTypes}. - -\begin{myitemize} -\item {\em type:} string -\item {\em examples:} - \begin{lstlisting} -graph = """ - foo => bar => baz & waz # baz and waz both trigger off bar - foo[-P1D-PT6H] => bar # bar triggers off foo[-P1D-PT6H] - baz:out1 => faz # faz triggers off a message output of baz - X:start => Y # Y triggers if X starts executing - X:fail => Y # Y triggers if X fails - foo[-PT6H]:fail => bar # bar triggers if foo[-PT6H] fails - X => !Y # Y suicides if X succeeds - X | X:fail => Z # Z triggers if X succeeds or fails - X:finish => Z # Z triggers if X succeeds or fails - (A | B & C ) | D => foo # general conditional triggers - foo:submit => bar # bar triggers if foo is successfully submitted - foo:submit-fail => bar # bar triggers if submission of foo fails - # comment - """ - \end{lstlisting} -\item {\em default:} (none) -\end{myitemize} - -\subsection{[runtime]} - -This section is used to specify how, where, and what to execute when -tasks are ready to run. Common -configuration can be factored out in a multiple-inheritance hierarchy of -runtime namespaces that culminates in the tasks of the suite. Order of -precedence is determined by the C3 linearization algorithm as used to -find the {\em method resolution order} in Python language class -hierarchies. For details and examples see~\ref{NIORP}. - -\subsubsection[{[[}\_\_NAME\_\_{]]}]{[runtime] \textrightarrow [[\_\_NAME\_\_]]} - -Replace \_\_NAME\_\_ with a namespace name, or a comma-separated list of -names, and repeat as needed to define all tasks in the suite. Names may -contain letters, digits, underscores, and hyphens. A namespace -represents a group or family of tasks if other namespaces inherit from -it, or a task if no others inherit from it. - -%Names may not contain colons (which would preclude use of directory paths -%involving the registration name in \lstinline=$PATH= variables). They -%may not contain the `.' character (it will be interpreted as the -%namespace hierarchy delimiter, separating groups and names -huh?). - -\begin{myitemize} -\item {\em legal values:} - \begin{myitemize} - \item \lstinline=[[foo]]= - \item \lstinline=[[foo, bar, baz]]= - \end{myitemize} -\end{myitemize} - -If multiple names are listed the subsequent settings apply to each. - -All namespaces inherit initially from {\em root}, which can be -explicitly configured to provide or override default settings -for all tasks in the suite. - -\paragraph[inherit]{[runtime] \textrightarrow [[\_\_NAME\_\_]] \textrightarrow -extra log files} - -A list of user-defined log files associated with a task. Files defined here -will appear alongside the default log files in the cylc gui. Log files -must reside in the job log directory \lstinline=$CYLC_TASK_LOG_DIR= and ideally -should be named using the \lstinline=$CYLC_TASK_LOG_ROOT= prefix -(see~\ref{Task Job Script Variables}). - -\begin{myitemize} -\item {\em type:} Comma-separated list of strings (log file names). -\item {\em default:} (none) -\item {\em example:} (job.custom-log-name) -\end{myitemize} - -\paragraph[inherit]{[runtime] \textrightarrow [[\_\_NAME\_\_]] \textrightarrow inherit} - -A list of the immediate parent(s) this namespace inherits from. If no -parents are listed \lstinline=root= is assumed. - -\begin{myitemize} -\item {\em type:} Comma-separated list of strings (parent namespace names). -\item {\em default:} \lstinline=root= -\end{myitemize} - -\paragraph[init-script]{[runtime] \textrightarrow [[\_\_NAME\_\_]] \textrightarrow init-script} - -Custom script invoked by the task job script before the task execution environment -is configured - so it does not have access to any suite or task -environment variables. It can be an external command or script, or inlined -scripting. The original intention for this item was to allow remote tasks to -source login scripts to configure their access to cylc, but this should no -longer be necessary (see~\ref{HowTasksGetAccessToCylc}). See also -\lstinline=env-script=, \lstinline=err-script=, \lstinline=exit-script=, -\lstinline=pre-script=, \lstinline=script=, and \lstinline=post-script=. - -\begin{myitemize} -\item {\em type:} string -\item {\em default:} (none) -\item {\em example:} \lstinline@init-script = "echo Hello World"@ -\end{myitemize} - -\paragraph[env-script]{[runtime] \textrightarrow [[\_\_NAME\_\_]] \textrightarrow env-script} - -Custom script invoked by the task job script between the cylc-defined environment -(suite and task identity, etc.) and the user-defined task runtime environment - -so it has access to the cylc environment (and the task environment has -access to variables defined by this scripting). It can be an external command -or script, or inlined scripting. See also \lstinline=init-script=, -\lstinline=err-script=, \lstinline=exit-script=, \lstinline=pre-script=, -\lstinline=script=, and \lstinline=post-script=. - -\begin{myitemize} -\item {\em type:} string -\item {\em default:} (none) -\item {\em example:} \lstinline@env-script = "echo Hello World"@ -\end{myitemize} - -\paragraph[exit-script]{[runtime] \textrightarrow [[\_\_NAME\_\_]] \textrightarrow exit-script} - -Custom script invoked at the very end of {\em successful} job execution, just -before the job script exits. It should execute very quickly. Companion of -\lstinline=err-script=, which is executed on job failure. It can be an external -command or script, or inlined scripting. See also \lstinline=init-script=, -\lstinline=env-script=, \lstinline=exit-script=, \lstinline=pre-script=, -\lstinline=script=, and \lstinline=post-script=. - -\begin{myitemize} -\item {\em type:} string -\item {\em default:} (none) -\item {\em example:} \lstinline@exit-script = "rm -f $TMP_FILES"@ -\end{myitemize} - -\paragraph[err-script]{[runtime] \textrightarrow [[\_\_NAME\_\_]] \textrightarrow err-script} - -Custom script to be invoked at the end of the error trap, which is triggered -due to failure of a command in the task job script or trapable job kill. The -output of this will always be sent to STDERR and \lstinline=$1= is set to the -name of the signal caught by the error trap. The script should be fast and use -very little system resource to ensure that the error trap can return quickly. -Companion of \lstinline=exit-script=, which is executed on job success. -It can be an external command or script, or inlined scripting. See also -\lstinline=init-script=, \lstinline=env-script=, \lstinline=exit-script=, -\lstinline=pre-script=, \lstinline=script=, and \lstinline=post-script=. - -\begin{myitemize} -\item {\em type:} string -\item {\em default:} (none) -\item {\em example:} \lstinline@err-script = "printenv FOO"@ -\end{myitemize} - -\paragraph[pre-script]{ [runtime] \textrightarrow [[\_\_NAME\_\_]] \textrightarrow pre-script} - -Custom script invoked by the task job script immediately before the \lstinline=script= -item (just below). It can be an external command or script, or inlined scripting. -See also \lstinline=init-script=, \lstinline=env-script=, -\lstinline=err-script=, \lstinline=exit-script=, \lstinline=script=, and -\lstinline=post-script=. - -\begin{myitemize} -\item {\em type:} string -\item {\em default:} (none) -\item {\em example:} - \begin{lstlisting} - pre-script = """ - . $HOME/.profile - echo Hello from suite ${CYLC_SUITE_NAME}!""" - \end{lstlisting} -\end{myitemize} - -\paragraph[script]{[runtime] \textrightarrow [[\_\_NAME\_\_]] \textrightarrow script} -\label{ScriptItem} - -The main custom script invoked from the task job script. It can be an -external command or script, or inlined scripting. See also -\lstinline=init-script=, \lstinline=env-script=, \lstinline=err-script=, -\lstinline=exit-script=, \lstinline=pre-script=, and \lstinline=post-script=. - -\begin{myitemize} -\item {\em type:} string -\item {\em root default:} (none) -\end{myitemize} - -\paragraph[post-script]{ [runtime] \textrightarrow [[\_\_NAME\_\_]] \textrightarrow post-script} -Custom script invoked by the task job script immediately after the -\lstinline=script= item (just above). It can be an external command or script, -or inlined scripting. See also -\lstinline=init-script=, \lstinline=env-script=, \lstinline=err-script=, -\lstinline=exit-script=, \lstinline=pre-script=, and \lstinline=script=. - -\begin{myitemize} -\item {\em type:} string -\item {\em default:} (none) -\end{myitemize} - -\paragraph[work sub-directory]{[runtime] \textrightarrow [[\_\_NAME\_\_]] \textrightarrow work sub-directory} -\label{worksubdirectory} - -Task job scripts are executed from within {\em work directories} created -automatically under the suite run directory. A task can get its own work -directory from \lstinline=$CYLC_TASK_WORK_DIR= (or simply \lstinline=$PWD= if -it does not \lstinline=cd= elsewhere at runtime). The default directory -path contains task name and cycle point, to provide a unique workspace for -every instance of every task. If several tasks need to exchange files and -simply read and write from their from current working directory, this item -can be used to override the default to make them all use the same workspace. - -The top level share and work directory location can be changed (e.g.\ to a -large data area) by a global config setting (see~\ref{workdirectory}). - -\begin{myitemize} -\item {\em type:} string (directory path, can contain environment variables) -\item {\em default:} \lstinline=$CYLC_TASK_CYCLE_POINT/$CYLC_TASK_NAME= -\item {\em example:} \lstinline=$CYLC_TASK_CYCLE_POINT/shared/= -\end{myitemize} - -Note that if you omit cycle point from the work sub-directory path successive -instances of the task will share the same workspace. Consider the effect on -cycle point offset housekeeping of work directories before doing this. - -\paragraph[{[[[}meta{]]]}]{[runtime] \textrightarrow [[\_\_NAME\_\_]] \textrightarrow [[[meta]]]} - -Section containing metadata items for this task or family namespace. Several items -(title, description, URL) are pre-defined and are used by the GUI. Others can be -user-defined and passed to task event handlers to be interpreted according to your -needs. For example, the value of an ``importance'' item could determine how an event -handler responds to task failure events. - -Any suite meta item can now be passed to task event handlers by prefixing the -string template item name with ``suite\_'', for example : - -\begin{lstlisting} -[runtime] - [[root]] - [[[events]]] - failed handler = send-help.sh %(suite_title)s %(suite_importance)s %(title)s -\end{lstlisting} - -\subparagraph[title]{[runtime] \textrightarrow [[\_\_NAME\_\_]] \textrightarrow [[[meta]]] \textrightarrow title} - -A single line description of this namespace. It is displayed by the -\lstinline=cylc list= command and can be retrieved from running tasks -with the \lstinline=cylc show= command. - -\begin{myitemize} -\item {\em type:} single line string -\item {\em root default:} (none) -\end{myitemize} - -\subparagraph[description]{[runtime] \textrightarrow [[\_\_NAME\_\_]] \textrightarrow [[[meta]]] \textrightarrow description} - -A multi-line description of this namespace, retrievable from running tasks with the -\lstinline=cylc show= command. - -\begin{myitemize} -\item {\em type:} multi-line string -\item {\em root default:} (none) -\end{myitemize} - -\subparagraph[URL]{[runtime] \textrightarrow [[\_\_NAME\_\_]] \textrightarrow [[[meta]]] \textrightarrow URL} -\label{TaskURL} - -A web URL to task documentation for this suite. If present it can be browsed -with the \lstinline=cylc doc= command, or by right-clicking on the task in -gcylc. The string templates \lstinline=%(suite_name)s= and -\lstinline=%(task_name)s= will be replaced with the actual suite and task names. -See also suite URLs (\ref{SuiteURL}). - -\begin{myitemize} -\item {\em type:} string (URL) -\item {\em default:} (none) -\item {\em example:} you can set URLs to all tasks in a suite by putting - something like the following in the root namespace: - \begin{lstlisting} -[runtime] - [[root]] - [[[meta]]] - URL = http://my-site.com/suites/%(suite_name)s/%(task_name)s.html - \end{lstlisting} -\end{myitemize} - -(Note that URLs containing the comment delimiter \lstinline=#= must be -protected by quotes). - -\subparagraph[\_\_MANY\_\_]{[runtime] \textrightarrow [[\_\_NAME\_\_]] \textrightarrow [[[meta]]] \textrightarrow \_\_MANY\_\_} - -Replace \_\_MANY\_\_ with any user-defined metadata item. These, like title, URL, etc. can be passed -to task event handlers to be interpreted according to your needs. For example, the value of an -"importance" item could determine how an event handler responds to task failure events. - -\begin{myitemize} -\item {\em type:} String or integer -\item {\em default:} (none) -\item {\em example:} - \begin{lstlisting} -[runtime] - [[root]] - [[[meta]]] - importance = high - color = red - \end{lstlisting} -\end{myitemize} - -\paragraph[{[[[}job{]]]}]{[runtime] \textrightarrow [[\_\_NAME\_\_]] \textrightarrow [[[job]]]} - -This section configures the means by which cylc submits task job scripts to run. - -\subparagraph[batch system]{[runtime] \textrightarrow [[\_\_NAME\_\_]] \textrightarrow [[[job]]] \textrightarrow batch system} -\label{RuntimeJobSubMethods} - -See~\ref{TaskJobSubmission} for how job submission works, and how to define -new handlers for different batch systems. Cylc has a number of built in batch system handlers: -\begin{myitemize} -\item {\em type:} string -\item {\em legal values:} - \begin{myitemize} - \item \lstinline=background= - invoke a child process - \item \lstinline=at= - the rudimentary Unix \lstinline=at= scheduler - \item \lstinline=loadleveler= - IBM LoadLeveler \lstinline=llsubmit=, with directives defined in the suite.rc file - \item \lstinline=lsf= - IBM Platform LSF \lstinline=bsub=, with directives defined in the suite.rc file - \item \lstinline=pbs= - PBS \lstinline=qsub=, with directives defined in the suite.rc file - \item \lstinline=sge= - Sun Grid Engine \lstinline=qsub=, with directives defined in the suite.rc file - \item \lstinline=slurm= - Simple Linux Utility for Resource Management \lstinline=sbatch=, with directives defined in the suite.rc file - \item \lstinline=moab= - Moab workload manager \lstinline=msub=, with directives defined in the suite.rc file - \end{myitemize} -\item {\em default:} \lstinline=background= -\end{myitemize} - -\subparagraph[execution time limit]{[runtime] \textrightarrow [[\_\_NAME\_\_]] \textrightarrow [[[job]]] \textrightarrow execution time limit} - -Specify the execution wall clock limit for a job of the task. -For \lstinline=background= and \lstinline=at=, the job script will be invoked using the \lstinline=timeout= command. -For other batch systems, the specified time will be automatically translated into the equivalent directive for wall clock limit. - -Tasks are polled multiple times, where necessary, when they exceed their -execution time limits. -(See~\ref{ExecutionTimeLimitPollingIntervals} for how to configure the polling -intervals). - -\begin{myitemize} - \item {\em type:} ISO 8601 duration/interval representation - \item {\em example:} \lstinline=PT5M=, 5 minutes, \lstinline=PT1H=, 1 hour - \item {\em default:} (none) -\end{myitemize} - -\subparagraph[batch submit command template]{[runtime] \textrightarrow [[\_\_NAME\_\_]] \textrightarrow [[[job]]] \textrightarrow batch submit command template} - -This allows you to override the actual command used by the chosen batch -system. The template's \%(job)s will be substituted by the -job file path. - -\begin{myitemize} -\item {\em type:} string -\item {\em legal values:} a string template -\item {\em example:} \lstinline@llsubmit \%(job)s@ -\end{myitemize} - -\subparagraph[shell]{[runtime] \textrightarrow [[\_\_NAME\_\_]] \textrightarrow [[[job]]] \textrightarrow shell} -\label{JobSubShell} - -Location of the command used to interpret the job script submitted by the suite -server program when a task is ready to run. This can be set to the location of -\lstinline=bash= in the job host if the shell is not installed in the standard -location. -{\em Note: It has no bearing on any sub-shells that may be called by the job script.} - -Setting this to the path of a ksh93 interpreter is deprecated. Support of which -will be withdrawn in a future cylc release. Setting this to any other shell is -not supported. - -\begin{myitemize} -\item {\em type:} string -\item {\em root default:} \lstinline=/bin/bash= -\end{myitemize} - -\subparagraph[submission retry delays]{[runtime] \textrightarrow [[\_\_NAME\_\_]] \textrightarrow [[[job]]] \textrightarrow submission retry delays} -\label{JobSubRefRetries} - -A list of duration (in ISO 8601 syntax), after which to resubmit if job -submission fails. -\begin{myitemize} - \item {\em type:} Comma-separated list of ISO 8601 duration/interval - representations, optionally {\em preceded} by multipliers. - \item {\em example:} \lstinline=PT1M,3*PT1H, P1D= is equivalent to - \lstinline=PT1M, PT1H, PT1H, PT1H, P1D= - 1 minute, 1 hour, 1 hour, 1 - hour, 1 day. - \item {\em default:} (none) -\end{myitemize} - -\subparagraph[execution retry delays]{[runtime] \textrightarrow [[\_\_NAME\_\_]] \textrightarrow [[[job]]] \textrightarrow execution retry delays} -\label{RefRetries} - -See also~\ref{TaskRetries}. - -A list of ISO 8601 time duration/intervals after which to resubmit the task -if it fails. The variable \lstinline=$CYLC_TASK_TRY_NUMBER= in the task -execution environment is incremented each time, starting from 1 for the -first try - this can be used to vary task behaviour by try number. - -\begin{myitemize} - \item {\em type:} Comma-separated list of ISO 8601 duration/interval representations, - optionally {\em preceded} by multipliers. - \item {\em example:} \lstinline=PT1.5M,3*PT10M= is equivalent to - \lstinline=PT1.5M, PT10M, PT10M, PT10M= - 1.5 minutes, 10 minutes, - 10 minutes, 10 minutes. - \item {\em default:} (none) -\end{myitemize} - -\subparagraph[submission polling intervals]{[runtime] \textrightarrow [[\_\_NAME\_\_]] \textrightarrow [[[job]]] \textrightarrow submission polling intervals} -\label{SubmissionPollingIntervals} - -A list of intervals, expressed as ISO 8601 duration/intervals, with optional -multipliers, after which cylc will poll for status while the task is in the -submitted state. - -For the polling task communication method this overrides the default -submission polling interval in the site/user config files -(\ref{SiteAndUserConfiguration}). For default and ssh task communications, -polling is not done by default but it can still be configured here as a -regular check on the health of submitted tasks. - -Each list value is used in turn until the last, which is used repeatedly -until finished. - -\begin{myitemize} - \item {\em type:} Comma-separated list of ISO 8601 duration/interval - representations, optionally {\em preceded} by multipliers. - \item {\em example:} \lstinline=PT1M,3*PT1H, PT1M= is equivalent to - \lstinline=PT1M, PT1H, PT1H, PT1H, PT1M= - 1 minute, 1 hour, 1 hour, 1 - hour, 1 minute. - \item {\em default:} (none) -\end{myitemize} -A single interval value is probably appropriate for submission polling. - -\subparagraph[execution polling intervals]{[runtime] \textrightarrow [[\_\_NAME\_\_]] \textrightarrow [[[job]]] \textrightarrow execution polling intervals} -\label{ExecutionPollingIntervals} - -A list of intervals, expressed as ISO 8601 duration/intervals, with optional -multipliers, after which cylc will poll for status while the task is in the -running state. - -For the polling task communication method this overrides the default -execution polling interval in the site/user config files -(\ref{SiteAndUserConfiguration}). For default and ssh task communications, -polling is not done by default but it can still be configured here as a -regular check on the health of submitted tasks. - -Each list value is used in turn until the last, which is used repeatedly -until finished. - -\begin{myitemize} - \item {\em type:} Comma-separated list of ISO 8601 duration/interval - representations, optionally {\em preceded} by multipliers. - \item {\em example:} \lstinline=PT1M,3*PT1H, PT1M= is equivalent to - \lstinline=PT1M, PT1H, PT1H, PT1H, PT1M= - 1 minute, 1 hour, 1 hour, 1 - hour, 1 minute. - \item {\em default:} (none) -\end{myitemize} - -\paragraph[{[[[}remote{]]]}]{[runtime] \textrightarrow [[\_\_NAME\_\_]] \textrightarrow [[[remote]]]} - -Configure host and username, for tasks that do not run on the suite host -account. Non-interactive ssh is used to submit the task by the configured -batch system, so you must distribute your ssh key to allow -this. Cylc must be installed on task remote accounts, but no external -software dependencies are required there. - -\subparagraph[host]{[runtime] \textrightarrow [[\_\_NAME\_\_]] \textrightarrow [[[remote]]] \textrightarrow host} -\label{DynamicHostSelection} - -The remote host for this namespace. This can be a static hostname, an -environment variable that holds a hostname, or a command that prints a -hostname to stdout. Host selection commands are executed just prior to -job submission. The host (static or dynamic) may have an entry in the -cylc site or user config file to specify parameters such as the location -of cylc on the remote machine; if not, the corresponding local settings -(on the suite host) will be assumed to apply on the remote host. - -\begin{myitemize} -\item {\em type:} string (a valid hostname on the network) -\item {\em default:} (none) -\item {\em examples:} - \begin{myitemize} - \item static host name: \lstinline@host = foo@ - \item fully qualified: \lstinline@host = foo.bar.baz@ - \item dynamic host selection: - \begin{myitemize} - \item shell command (1): \lstinline@host = $(host-selector.sh)@ - \item shell command (2): \lstinline@host = `host-selector.sh`@ - \item environment variable: \lstinline@host = $MY_HOST@ - \end{myitemize} - \end{myitemize} -\end{myitemize} - - -\subparagraph[owner]{[runtime] \textrightarrow [[\_\_NAME\_\_]] \textrightarrow [[[remote]]] \textrightarrow owner} - -The username of the task host account. This is (only) used in the -non-interactive ssh command invoked by the suite server program to submit the -remote task (consequently it may be defined using local environment variables -(i.e.\ the shell in which cylc runs, and [cylc] \textrightarrow [[environment]]). - -If you use dynamic host selection and have different usernames on -the different selectable hosts, you can configure your -\lstinline=$HOME/.ssh/config= to handle username translation. - -\begin{myitemize} -\item {\em type:} string (a valid username on the remote host) -\item {\em default:} (none) -\end{myitemize} - -\subparagraph[retrieve job logs]{[runtime] \textrightarrow [[\_\_NAME\_\_]] \textrightarrow [[[remote]]] \textrightarrow retrieve job logs} -\label{runtime-remote-retrieve-job-logs} - -Remote task job logs are saved to the suite run directory on the task host, not -on the suite host. If you want the job logs pulled back to the suite host -automatically, you can set this item to \lstinline=True=. The suite will -then attempt to \lstinline=rsync= the job logs once from the remote host each -time a task job completes. E.g. if the job file is -\lstinline=~/cylc-run/tut.oneoff.remote/log/job/1/hello/01/job=, anything under -\lstinline=~/cylc-run/tut.oneoff.remote/log/job/1/hello/01/= will be retrieved. - -\begin{myitemize} -\item {\em type:} boolean -\item {\em default:} False -\end{myitemize} - -\subparagraph[retrieve job logs max size]{[runtime] \textrightarrow [[\_\_NAME\_\_]] \textrightarrow [[[remote]]] \textrightarrow retrieve job logs max size} -\label{runtime-remote-retrieve-job-logs-max-size} - -If the disk space of the suite host is limited, you may want to set the maximum -sizes of the job log files to retrieve. The value can be anything that is -accepted by the \lstinline@--max-size=SIZE@ option of the \lstinline=rsync= -command. - -\begin{myitemize} -\item {\em type:} string -\item {\em default:} None -\end{myitemize} - -\subparagraph[retrieve job logs retry delays]{[runtime] \textrightarrow [[\_\_NAME\_\_]] \textrightarrow [[[remote]]] \textrightarrow retrieve job logs retry delays} -\label{runtime-remote-retrieve-job-logs-retry-delays} - -Some batch systems have considerable delays between the time when the job -completes and when it writes the job logs in its normal location. If this is -the case, you can configure an initial delay and some retry delays between -subsequent attempts. The default behaviour is to attempt once without any -delay. - -\begin{myitemize} - \item {\em type:} Comma-separated list of ISO 8601 duration/interval representations, optionally {\em preceded} by multipliers. - \item {\em default:} (none) - \item {\em example:} \lstinline@retrieve job logs retry delays = PT10S, PT1M, PT5M@ -\end{myitemize} - -\subparagraph[suite definition directory]{[runtime] \textrightarrow [[\_\_NAME\_\_]] \textrightarrow [[[remote]]] \textrightarrow suite definition directory} - -The path to the suite configuration directory on the remote account, needed if -remote tasks require access to files stored there (via -\lstinline=$CYLC_SUITE_DEF_PATH=) or in the suite bin directory (via -\lstinline=$PATH=). If this item is not defined, the local suite -configuration directory path will be assumed, with the suite owner's home -directory, if present, replaced by \lstinline='$HOME'= for -interpretation on the remote account. - -\begin{myitemize} -\item {\em type:} string (a valid directory path on the remote account) -\item {\em default:} (local suite configuration path with \lstinline=$HOME= - replaced) -\end{myitemize} - - -\paragraph[{[[[}events{]]]}]{[runtime] \textrightarrow [[\_\_NAME\_\_]] \textrightarrow [[[events]]]} -\label{TaskEventHandling} - -Cylc can call nominated event handlers when certain task events occur. This -section configures specific task event handlers; see~\ref{SuiteEventHandling} -for suite events. - -Event handlers can be located in the suite \lstinline=bin/= directory, -otherwise it is up to you to ensure their location is in \lstinline=$PATH= (in -the shell in which the suite server program runs). They should require little -resource to run and return quickly. - -Each task event handler can be specified as a list of command lines or command -line templates. They can contain any or all of the following patterns, which -will be substituted with actual values: -\begin{myitemize} - \item \%(event)s: event name - \item \%(suite)s: suite name - \item \%(suite\_uuid)s: suite UUID string - \item \%(point)s: cycle point - \item \%(name)s: task name - \item \%(submit\_num)s: submit number - \item \%(try\_num)s: try number - \item \%(id)s: task ID (i.e.\ \%(name)s.\%(point)s) - \item \%(batch\_sys\_name)s: batch system name - \item \%(batch\_sys\_job\_id)s: batch system job ID - \item \%(message)s: event message, if any - \item any task [meta] item, e.g.: - \begin{myitemize} - \item \%(title)s: task title - \item \%(URL)s: task URL - \item \%(importance)s - example custom task metadata - \end{myitemize} - \item any suite [meta] item, prefixed with ``suite\_'', e.g.: - \begin{myitemize} - \item \%(suite\_title)s: suite title - \item \%(suite\_URL)s: suite URL - \item \%(suite\_rating)s - example custom suite metadata - \end{myitemize} -\end{myitemize} - -Otherwise, the command line will be called with the following default -arguments: -\begin{lstlisting} - %(event)s %(suite)s %(id)s %(message)s -\end{lstlisting} - -{\em Note: substitution patterns should not be quoted in the template strings. -This is done automatically where required.} - -For an explanation of the substitution syntax, see String Formatting Operations -in the Python documentation: -\url{https://docs.python.org/2/library/stdtypes.html#string-formatting}. - -Additional information can be passed to event handlers via the -[cylc] \textrightarrow [[environment]] (but not via task -runtime environments - event handlers are not called by tasks). - -\subparagraph[EVENT handler]{[runtime] \textrightarrow [[\_\_NAME\_\_]] \textrightarrow [[[events]]] \textrightarrow EVENT handler} - -A list of one or more event handlers to call when one of the following EVENTs occurs: -\begin{myitemize} - \item {\bf submitted} - the job submit command was successful - \item {\bf submission failed} - the job submit command failed, or the - submitted job was killed before it started executing - \item {\bf submission retry} - job submit failed, but cylc will resubmit it - after a configured delay - \item {\bf submission timeout} - the submitted job timed out without commencing execution - - \item {\bf started} - the task reported commencement of execution - \item {\bf succeeded} - the task reported that it completed successfully - \item {\bf failed} - the task reported that if tailed to complete successfully - \item {\bf retry} - the task failed, but cylc will resubmit it - after a configured delay - \item {\bf execution timeout} - the task timed out after execution commenced - \item {\bf warning} - the task reported a WARNING severity message - \item {\bf critical} - the task reported a CRITICAL severity message - \item {\bf custom} - the task reported a CUSTOM severity message - \item {\bf late} - the task is never active and is late -\end{myitemize} - -Item details: -\begin{myitemize} - \item {\em type:} Comma-separated list of strings (event handler scripts). - \item {\em default:} None - \item {\em example:} \lstinline@failed handler = my-failed-handler.sh@ -\end{myitemize} - -\subparagraph[submission timeout]{[runtime] \textrightarrow [[\_\_NAME\_\_]] \textrightarrow [[[events]]] \textrightarrow submission timeout} -\label{runtime-event-hooks-submission-timeout} - -If a task has not started after the specified ISO 8601 duration/interval, the -{\em submission timeout} event handler(s) will be called. -\begin{myitemize} - \item {\em type:} ISO 8601 duration/interval representation (e.g.\ - \lstinline=PT30M=, 30 minutes or \lstinline=P1D=, 1 day). - \item {\em default:} (none) -\end{myitemize} - -\subparagraph[execution timeout]{[runtime] \textrightarrow [[\_\_NAME\_\_]] \textrightarrow [[[events]]] \textrightarrow execution timeout} -\label{runtime-event-hooks-execution-timeout} - -If a task has not finished after the specified ISO 8601 duration/interval, the -{\em execution timeout} event handler(s) will be called. -\begin{myitemize} - \item {\em type:} ISO 8601 duration/interval representation (e.g.\ - \lstinline=PT4H=, 4 hours or \lstinline=P1D=, 1 day). - \item {\em default:} (none) -\end{myitemize} - -\subparagraph[handlers]{[runtime] \textrightarrow [[\_\_NAME\_\_]] \textrightarrow [[[events]]] \textrightarrow handlers} - -Specify a list of command lines or command line templates as task event handlers. - -\begin{myitemize} - \item {\em type:} Comma-separated list of strings (event handler command line or command line templates). - \item {\em default:} (none) - \item {\em example:} \lstinline@handlers = my-handler.sh@ -\end{myitemize} - -\subparagraph[handler events]{[runtime] \textrightarrow [[\_\_NAME\_\_]] \textrightarrow [[[events]]] \textrightarrow handler events} - -Specify the events for which the general task event handlers should be invoked. - -\begin{myitemize} - \item {\em type:} Comma-separated list of events - \item {\em default:} (none) - \item {\em example:} \lstinline@handler events = submission failed, failed@ -\end{myitemize} - -\subparagraph[handler retry delays]{[runtime] \textrightarrow [[\_\_NAME\_\_]] \textrightarrow [[[events]]] \textrightarrow handler retry delays} -\label{runtime-events-handler-retry-delays} - -Specify an initial delay before running an event handler command and any retry -delays in case the command returns a non-zero code. The default behaviour is to -run an event handler command once without any delay. - -\begin{myitemize} - \item {\em type:} Comma-separated list of ISO 8601 duration/interval representations, optionally {\em preceded} by multipliers. - \item {\em default:} (none) - \item {\em example:} \lstinline@handler retry delays = PT10S, PT1M, PT5M@ -\end{myitemize} - -\subparagraph[mail events]{[runtime] \textrightarrow [[\_\_NAME\_\_]] \textrightarrow [[[events]]] \textrightarrow mail events} - -Specify the events for which notification emails should be sent. - -\begin{myitemize} - \item {\em type:} Comma-separated list of events - \item {\em default:} (none) - \item {\em example:} \lstinline@mail events = submission failed, failed@ -\end{myitemize} - -\subparagraph[mail from]{[runtime] \textrightarrow [[\_\_NAME\_\_]] \textrightarrow [[[events]]] \textrightarrow mail from} - -Specify an alternate \lstinline=from:= email address for event notifications. - -\begin{myitemize} - \item {\em type:} string - \item {\em default:} None, (notifications@HOSTNAME) - \item {\em example:} \lstinline|mail from = no-reply@your-org| -\end{myitemize} - -\subparagraph[mail retry delays]{[runtime] \textrightarrow [[\_\_NAME\_\_]] \textrightarrow [[[events]]] \textrightarrow mail retry delays} - -Specify an initial delay before running the mail notification command and any -retry delays in case the command returns a non-zero code. The default behaviour -is to run the mail notification command once without any delay. - -\begin{myitemize} - \item {\em type:} Comma-separated list of ISO 8601 duration/interval representations, optionally {\em preceded} by multipliers. - \item {\em default:} (none) - \item {\em example:} \lstinline@mail retry delays = PT10S, PT1M, PT5M@ -\end{myitemize} - -\subparagraph[mail smtp]{[runtime] \textrightarrow [[\_\_NAME\_\_]] \textrightarrow [[[events]]] \textrightarrow mail smtp} - -Specify the SMTP server for sending email notifications. - -\begin{myitemize} - \item {\em type:} string - \item {\em default:} None, (localhost:25) - \item {\em example:} \lstinline@mail smtp = smtp.yourorg@ -\end{myitemize} - -\subparagraph[mail to]{[runtime] \textrightarrow [[\_\_NAME\_\_]] \textrightarrow [[[events]]] \textrightarrow mail to} - -A list of email addresses to send task event notifications. The list can be -anything accepted by the \lstinline=mail= command. - -\begin{myitemize} - \item {\em type:} string - \item {\em default:} None, (USER@HOSTNAME) - \item {\em example:} \lstinline@mail to = your.colleague@ -\end{myitemize} - -\paragraph[{[[[}environment{]]]}]{[runtime] \textrightarrow [[\_\_NAME\_\_]] \textrightarrow [[[environment]]]} - -The user defined task execution environment. Variables defined here can -refer to cylc suite and task identity variables, which are exported -earlier in the task job script, and variable assignment expressions can -use cylc utility commands because access to cylc is also configured -earlier in the script. See also~\ref{TaskExecutionEnvironment}. - -\subparagraph[\_\_VARIABLE\_\_ ]{[runtime] \textrightarrow [[\_\_NAME\_\_]] \textrightarrow [[[environment]]] \textrightarrow \_\_VARIABLE\_\_} -\label{AppendixTaskExecutionEnvironment} - -Replace \_\_VARIABLE\_\_ with any number of environment variable -assignment expressions. -Order of definition is preserved so values can refer to previously -defined variables. Values are passed through to the task job script -without evaluation or manipulation by cylc, so any variable assignment -expression that is legal in the job submission shell can be used. -White space around the `$=$' is allowed (as far as cylc's suite.rc -parser is concerned these are just normal configuration items). - -\begin{myitemize} -\item {\em type:} string -\item {\em default:} (none) -\item {\em legal values:} depends to some extent on the task job - submission shell (\ref{JobSubShell}). -\item {\em examples}, for the bash shell: - \begin{myitemize} - \item \lstinline@FOO = $HOME/bar/baz@ - \item \lstinline@BAR = ${FOO}$GLOBALVAR@ - \item \lstinline@BAZ = $( echo "hello world" )@ - \item \lstinline@WAZ = ${FOO%.jpg}.png@ - \item \lstinline@NEXT_CYCLE = $( cylc cycle-point --offset=PT6H )@ - \item \lstinline@PREV_CYCLE = `cylc cycle-point --offset=-PT6H`@ - \item \lstinline@ZAZ = "${FOO#bar}" # <-- QUOTED to escape the suite.rc comment character@ - \end{myitemize} -\end{myitemize} - -\paragraph[{[[[}environment filter{]]]}]{ [runtime] \textrightarrow [[\_\_NAME\_\_]] \textrightarrow [[[environment filter]]]} -\label{EnvironmentFilter} - -This section contains environment variable inclusion and exclusion -lists that can be used to filter the inherited environment. {\em This is -not intended as an alternative to a well-designed inheritance hierarchy -that provides each task with just the variables it needs.} Filters can, -however, improve suites with tasks that inherit a lot of environment -they don't need, by making it clear which tasks use which variables. -They can optionally be used routinely as explicit ``task environment -interfaces'' too, at some cost to brevity, because they guarantee that -variables filtered out of the inherited task environment are not used. - -Note that environment filtering is done after inheritance is completely -worked out, not at each level on the way, so filter lists in higher-level -namespaces only have an effect if they are not overridden by descendants. - -\subparagraph[include]{[runtime] \textrightarrow [[\_\_NAME\_\_]] \textrightarrow [[[environment filter]]] \textrightarrow include} - -If given, only variables named in this list will be included from the -inherited environment, others will be filtered out. Variables may also -be explicitly excluded by an \lstinline=exclude= list. - -\begin{myitemize} -\item {\em type:} Comma-separated list of strings (variable names). -\item {\em default:} (none) -\end{myitemize} - -\subparagraph[exclude]{[runtime] \textrightarrow [[\_\_NAME\_\_]] \textrightarrow [[[environment filter]]] \textrightarrow exclude} - -Variables named in this list will be filtered out of the inherited -environment. Variables may also be implicitly excluded by -omission from an \lstinline=include= list. - -\begin{myitemize} -\item {\em type:} Comma-separated list of strings (variable names). -\item {\em default:} (none) -\end{myitemize} - -\paragraph[{[[[}parameter environment templates{]]]}]{[runtime] \textrightarrow [[\_\_NAME\_\_]] \textrightarrow [[[parameter environment templates]]]} - -The user defined task execution parameter environment templates. This is only -relevant for {\em parameterized tasks} - see Section~\ref{Parameterized Tasks}. - -\subparagraph[\_\_VARIABLE\_\_ ]{[runtime] \textrightarrow [[\_\_NAME\_\_]] \textrightarrow [[[parameter environment templates]]] \textrightarrow \_\_VARIABLE\_\_} - -Replace \_\_VARIABLE\_\_ with pairs of environment variable -name and Python string template for parameter substitution. This is only -relevant for {\em parameterized tasks} - see Section~\ref{Parameterized Tasks}. - -If specified, in addition to the standard CYLC\_TASK\_PARAM\_ variables, the -job script will also export the named variables specified here, with the -template strings substituted with the parameter values. - -\begin{myitemize} -\item {\em type:} string -\item {\em default:} (none) -\item {\em legal values:} name=string template pairs -\item {\em examples}, for the bash shell: - \begin{myitemize} - \item \lstinline@MYNUM=%(i)d@ - \item \lstinline@MYITEM=%(item)s@ - \item \lstinline@MYFILE=/path/to/%(i)03d/%(item)s@ - \end{myitemize} -\end{myitemize} - -\paragraph[{[[[}directives{]]]}]{[runtime] \textrightarrow [[\_\_NAME\_\_]] \textrightarrow [[[directives]]]} - -Batch queue scheduler directives. Whether or not these are used depends -on the batch system. For the built-in methods that support directives -(\lstinline=loadleveler=, \lstinline=lsf=, \lstinline=pbs=, \lstinline=sge=, -\lstinline=slurm=, \lstinline=moab=), directives are written to the top of the -task job script in the correct format for the method. Specifying directives -individually like this allows use of default directives that can be -individually overridden at lower levels of the runtime namespace hierarchy. - -\subparagraph[\_\_DIRECTIVE\_\_ ]{[runtime] \textrightarrow [[\_\_NAME\_\_]] \textrightarrow [[[directives]]] \textrightarrow \_\_DIRECTIVE\_\_} - -Replace \_\_DIRECTIVE\_\_ with each directive assignment, e.g.\ -\lstinline@class = parallel@ - -\begin{myitemize} -\item {\em type:} string -\item {\em default:} (none) -\end{myitemize} - -Example directives for the built-in batch system handlers are shown -in~\ref{AvailableMethods}. - -\paragraph[{[[[}outputs{]]]}]{[runtime] \textrightarrow [[\_\_NAME\_\_]] \textrightarrow [[[outputs]]]} - -Register custom task outputs for use in message triggering in this section -(\ref{MessageTriggers}) - -\subparagraph[\_\_OUTPUT\_\_ ]{[runtime] \textrightarrow [[\_\_NAME\_\_]] \textrightarrow [[[outputs]]] \textrightarrow \_\_OUTPUT\_\_} - -Replace \_\_OUTPUT\_\_ with one or more custom task output messages -(\ref{MessageTriggers}). The item name is used to select the custom output -message in graph trigger notation. -\begin{myitemize} - \item {\em type:} string - \item {\em default:} (none) - \item{ \em examples:} -\end{myitemize} -\begin{lstlisting} -out1 = "sea state products ready" -out2 = "NWP restart files completed" -\end{lstlisting} - -\paragraph[{[[[}suite state polling{]]]}]{[runtime] \textrightarrow [[\_\_NAME\_\_]] \textrightarrow [[[suite state polling]]]} - -\lstset{language=transcript} -Configure automatic suite polling tasks as described -in~\ref{SuiteStatePolling}. The -items in this section reflect the options and defaults of the -\lstinline=cylc suite-state= command, except that the target suite name and the -\lstinline=--task=, \lstinline=--cycle=, and \lstinline=--status= options are -taken from the graph notation. - -\subparagraph[run-dir]{[runtime] \textrightarrow [[\_\_NAME\_\_]] \textrightarrow [[[suite state polling]]] \textrightarrow run-dir} - -For your own suites the run database location is determined by your -site/user config. For other suites, e.g.\ those owned by others, or -mirrored suite databases, use this item to specify the location -of the top level cylc run directory (the database should be a -suite-name sub-directory of this location). - -\begin{myitemize} - \item {\em type:} string (a directory path on the target suite host) - \item {\em default:} as configured by site/user config (for your own suites) -\end{myitemize} - -\subparagraph[interval]{[runtime] \textrightarrow [[\_\_NAME\_\_]] \textrightarrow [[[suite state polling]]] \textrightarrow interval} - -Polling interval expressed as an ISO 8601 duration/interval. -\begin{myitemize} - \item {\em type:} ISO 8601 duration/interval representation (e.g.\ - \lstinline=PT10S=, 10 seconds, or \lstinline=PT1M=, 1 minute). - \item {\em default:} PT1M -\end{myitemize} - -\subparagraph[max-polls]{[runtime] \textrightarrow [[\_\_NAME\_\_]] \textrightarrow [[[suite state polling]]] \textrightarrow max-polls} - -The maximum number of polls before timing out and entering the `failed' state. - -\begin{myitemize} - \item {\em type:} integer - \item {\em default:} 10 -\end{myitemize} - -\subparagraph[user]{[runtime] \textrightarrow [[\_\_NAME\_\_]] \textrightarrow [[[suite state polling]]] \textrightarrow user} - -Username of an account on the suite host to which you have access. The -polling \lstinline=cylc suite-state= command will be invoked -on the remote account. - -\begin{myitemize} - \item {\em type:} string (username) - \item {\em default:} (none) -\end{myitemize} - -\subparagraph[host]{[runtime] \textrightarrow [[\_\_NAME\_\_]] \textrightarrow [[[suite state polling]]] \textrightarrow host} - -The hostname of the target suite. The polling \lstinline=cylc suite-state= command -will be invoked on the remote account. - -\begin{myitemize} - \item {\em type:} string (hostname) - \item {\em default:} (none) -\end{myitemize} - -\subparagraph[host]{[runtime] \textrightarrow [[\_\_NAME\_\_]] \textrightarrow [[[suite state polling]]] \textrightarrow message} - -Wait for the target task in the target suite to receive a specified message -rather than achieve a state. - -\begin{myitemize} - \item {\em type:} string (the message) - \item {\em default:} (none) -\end{myitemize} - -\subparagraph[verbose]{[runtime] \textrightarrow [[\_\_NAME\_\_]] \textrightarrow [[[suite state polling]]] \textrightarrow verbose} - -Run the polling \lstinline=cylc suite-state= command in verbose output mode. - -\begin{myitemize} - \item {\em type:} boolean - \item {\em default:} False -\end{myitemize} - -\paragraph[{[[[}simulation{]]]}]{[runtime] \textrightarrow [[\_\_NAME\_\_]] \textrightarrow [[[simulation]]]} -\label{suiterc-sim-config} - -\lstset{language=transcript} - -Task configuration for the suite {\em simulation} and {\em dummy} run modes -described in Section~\ref{SimulationMode}. - -\subparagraph[default run length]{[runtime] \textrightarrow [[\_\_NAME\_\_]] \textrightarrow [[[simulation]]] \textrightarrow default run length} - -The default simulated job run length, if \lstinline=[job]execution time limit= -and \lstinline=[simulation]speedup factor= are not set. - -\begin{myitemize} - \item {\em type:} ISO 8601 duration/interval representation (e.g.\ - \lstinline=PT10S=, 10 seconds, or \lstinline=PT1M=, 1 minute). - \item {\em default:} \lstinline=PT10S= -\end{myitemize} - -\subparagraph[speedup factor]{[runtime] \textrightarrow [[\_\_NAME\_\_]] \textrightarrow [[[simulation]]] \textrightarrow speedup factor} - -If \lstinline=[job]execution time limit= is set, the task simulated run length -is computed by dividing it by this factor. - -\begin{myitemize} - \item {\em type:} float - \item {\em default:} (none) - i.e.\ do not use proportional run length - \item {\em example:} \lstinline=10.0= -\end{myitemize} - -\subparagraph[time limit buffer]{[runtime] \textrightarrow [[\_\_NAME\_\_]] \textrightarrow [[[simulation]]] \textrightarrow time limit buffer} - -For dummy jobs, a new \lstinline=[job]execution time limit= is set to the -simulated task run length plus this buffer interval, to avoid job kill due to -exceeding the time limit. - -\begin{myitemize} - \item {\em type:} ISO 8601 duration/interval representation (e.g.\ - \lstinline=PT10S=, 10 seconds, or \lstinline=PT1M=, 1 minute). - \item {\em default:} PT10S -\end{myitemize} - -\subparagraph[fail cycle points]{[runtime] \textrightarrow [[\_\_NAME\_\_]] \textrightarrow [[[simulation]]] \textrightarrow fail cycle points} - -Configure simulated or dummy jobs to fail at certain cycle points. - -\begin{myitemize} - \item {\em type:} list of strings (cycle points), or {\em all} - \item {\em default:} (none) - no instances of the task will fail - \item {\em examples:} - \begin{myitemize} - \item \lstinline=all= - all instance of the task will fail - \item \lstinline=2017-08-12T06, 2017-08-12T18= - these instances of the - task will fail - \end{myitemize} -\end{myitemize} - -\subparagraph[fail try 1 only]{[runtime] \textrightarrow [[\_\_NAME\_\_]] \textrightarrow [[[simulation]]] \textrightarrow fail try 1 only} - -If this is set to \lstinline=True= only the first run of the task instance will -fail, otherwise retries will fail too. - -\begin{myitemize} - \item {\em type:} boolean - \item {\em default:} \lstinline=True= -\end{myitemize} - -\subparagraph[disable task event handlers]{[runtime] \textrightarrow [[\_\_NAME\_\_]] \textrightarrow [[[simulation]]] \textrightarrow disable task event handlers} - -If this is set to \lstinline=True= configured task event handlers will not be called -in simulation or dummy modes. - -\begin{myitemize} - \item {\em type:} boolean - \item {\em default:} \lstinline=True= -\end{myitemize} - -\subsection{[visualization]} - -Configuration of suite graphing for the \lstinline=cylc graph= command (graph -extent, styling, and initial family-collapsed state) and the gcylc graph view -(initial family-collapsed state). Graphviz documentation of node shapes -and so on can be found at \url{http://www.graphviz.org/documentation/}. - -\subsubsection[initial cycle point]{[visualization] \textrightarrow initial cycle point} - -The initial cycle point for graph plotting. -\begin{myitemize} - \item {\em type:} ISO 8601 date-time representation (e.g.\ CCYYMMDDThhmm) - \item {\em default:} the suite initial cycle point -\end{myitemize} -The visualization initial cycle point gets adjusted up if necessary to the -suite initial cycling point. - -\subsubsection[final cycle point]{[visualization] \textrightarrow final cycle point} - -An explicit final cycle point for graph plotting. If used, this overrides the -preferred {\em number of cycle points} (below). -\begin{myitemize} - \item {\em type:} ISO 8601 date-time representation (e.g.\ CCYYMMDDThhmm) - \item {\em default:} (none) -\end{myitemize} -The visualization final cycle point gets adjusted down if necessary to the -suite final cycle point. - -\subsubsection[number of cycle points]{[visualization] \textrightarrow number of cycle points} - -The number of cycle points to graph starting from the visualization initial -cycle point. This is the preferred way of defining the graph end point, but -it can be overridden by an explicit {\em final cycle point} (above). -\begin{myitemize} - \item {\em type:} integer - \item {\em default:} 3 -\end{myitemize} - -\subsubsection[collapsed families]{[visualization] \textrightarrow collapsed families} - -A list of family (namespace) names to be shown in the collapsed state -(i.e.\ the family members will be replaced by a single family node) when -the suite is first plotted in the graph viewer or the gcylc graph view. -If this item is not set, the default is to collapse all families at first. -Interactive GUI controls can then be used to group and ungroup family -nodes at will. - -\begin{myitemize} - \item {\em type:} Comma-separated list of family names. - \item {\em default:} (none) -\end{myitemize} - -\subsubsection[use node color for edges]{[visualization] \textrightarrow use node color for edges} - -Plot graph edges (dependency arrows) with the same color as the upstream -node, otherwise default to black. - -\begin{myitemize} - \item {\em type:} boolean - \item {\em default:} False -\end{myitemize} - -\subsubsection[use node fillcolor for edges]{[visualization] \textrightarrow use node fillcolor for edges} - -Plot graph edges (i.e. dependency arrows) with the same fillcolor as the -upstream node, if it is filled, otherwise default to black. - -\begin{myitemize} - \item {\em type:} boolean - \item {\em default:} False -\end{myitemize} - -\subsubsection[node penwidth]{[visualization] \textrightarrow node penwidth} - -Line width of node shape borders. - -\begin{myitemize} - \item {\em type:} integer - \item {\em default:} 2 -\end{myitemize} - -\subsubsection[edge penwidth]{[visualization] \textrightarrow edge penwidth} - -Line width of graph edges (dependency arrows). - -\begin{myitemize} - \item {\em type:} integer - \item {\em default:} 2 -\end{myitemize} - -\subsubsection[use node color for labels]{[visualization] \textrightarrow use node color for labels} - -Graph node labels can be printed in the same color as the node outline. - -\begin{myitemize} - \item {\em type:} boolean - \item {\em default:} False -\end{myitemize} - - -\subsubsection[default node attributes]{[visualization] \textrightarrow default node attributes} - -Set the default attributes (color and style etc.) of graph nodes (tasks and families). -Attribute pairs must be quoted to hide the internal \lstinline@=@ character. - -\begin{myitemize} - \item {\em type:} Comma-separated list of quoted \lstinline@'attribute=value'@ pairs. - \item {\em legal values:} see graphviz or pygraphviz documentation - \item {\em default:} \lstinline@'style=filled', 'fillcolor=yellow', 'shape=box'@ -\end{myitemize} - -\subsubsection[default edge attributes]{[visualization] \textrightarrow default edge attributes} - -Set the default attributes (color and style etc.) of graph edges -(dependency arrows). Attribute pairs must be quoted to hide the -internal \lstinline@=@ character. -\begin{myitemize} - \item {\em type:} Comma-separated list of quoted \lstinline@'attribute=value'@ pairs. - \item {\em legal values:} see graphviz or pygraphviz documentation - \item {\em default:} \lstinline@'color=black'@ -\end{myitemize} - -\subsubsection[{[[}node groups{]]}]{[visualization] \textrightarrow [[node groups]]} - -Define named groups of graph nodes (tasks and families) which can styled -en masse, by name, in [visualization] \textrightarrow [[node attributes]]. -Node groups are automatically defined for all task families, including -root, so you can style family and member nodes at once by family name. - -\paragraph[\_\_GROUP\_\_]{[visualization] \textrightarrow [[node groups]] \textrightarrow \_\_GROUP\_\_} - -Replace \_\_GROUP\_\_ with each named group of tasks or families. - -\begin{myitemize} - \item {\em type:} Comma-separated list of task or family names. - \item {\em default:} (none) - \item {\em example:} -\begin{lstlisting} - PreProc = foo, bar - PostProc = baz, waz -\end{lstlisting} -\end{myitemize} - -\subsubsection[{[[}node attributes{]]}]{[visualization] \textrightarrow [[node attributes]]} - -Here you can assign graph node attributes to specific nodes, or to all -members of named groups defined in [visualization] \textrightarrow [[node -groups]]. Task families are automatically node groups. Styling of a -family node applies to all member nodes (tasks and sub-families), but -precedence is determined by ordering in the suite configuration. For -example, if you style a family red and then one of its members green, -cylc will plot a red family with one green member; but if you style one -member green and then the family red, the red family styling will -override the earlier green styling of the member. - -\paragraph[\_\_NAME\_\_]{[visualization] \textrightarrow [[node attributes]] \textrightarrow \_\_NAME\_\_} - -Replace \_\_NAME\_\_ with each node or node group for style attribute -assignment. - -\begin{myitemize} - \item {\em type:} Comma-separated list of quoted \lstinline@'attribute=value'@ pairs. - \item {\em legal values:} see graphviz or pygraphviz documentation - \item {\em default:} (none) - \item {\em example:} (with reference to the node groups defined above) -\begin{lstlisting} - PreProc = 'style=filled', 'fillcolor=orange' - PostProc = 'color=red' - foo = 'style=filled' -\end{lstlisting} -\end{myitemize} diff --git a/doc/src/cylc-user-guide/titlepic.sty b/doc/src/cylc-user-guide/titlepic.sty deleted file mode 100644 index 974885c8998..00000000000 --- a/doc/src/cylc-user-guide/titlepic.sty +++ /dev/null @@ -1,68 +0,0 @@ -% titlepic.sty is a LaTeX package to show a picture on the cover produced by \maketitle. -% By Thomas ten Cate . Free software, no warranty of any kind. -% -% Version history: -% 1.1: now more self-contained, comes with a PDF manual -% 1.0: first release -% -% ----------------------------------------------------------------------------- - -% No idea whether it works on older LaTeXes. -\NeedsTeXFormat{LaTeX2e} - -% Package identification and version number. -\ProvidesPackage{titlepic}[2009/08/03 1.1 Package to display a picture on the title page] - -% Declare the options. -\DeclareOption{tt}{\gdef\@tptopspace{}\gdef\@tpsepspace{\vskip 3em}} -\DeclareOption{tc}{\gdef\@tptopspace{}\gdef\@tpsepspace{\vfil}} -\DeclareOption{cc}{\gdef\@tptopspace{\null\vfil}\gdef\@tpsepspace{\vskip 3em}} -\ExecuteOptions{cc} -\ProcessOptions - -% Define the sole command introduced by this package. -% Very similar to the definition of \title, etc. -\def\titlepic#1{\gdef\@titlepic{#1}} -\def\@titlepic{\@empty} % default: no picture - -% If a title page was requested from the document class (article/report/book), -% override \maketitle to show our picture. -\if@titlepage -\renewcommand\maketitle{ - \begin{titlepage}% - \let\footnotesize\small - \let\footnoterule\relax - \let \footnote \thanks - \@tptopspace% - \begin{center}% - {\LARGE \@title \par}% - \vskip 3em% - {\large - \lineskip .75em% - \begin{tabular}[t]{c}% - \@author - \end{tabular}\par% - }% - \vskip 1.5em% - {\large \@date \par}% % Set date in \large size. - \end{center}\par - \@tpsepspace% - {\centering\@titlepic\par} - \vfil - \@thanks - \end{titlepage}% - \setcounter{footnote}{0}% - \global\let\thanks\relax - \global\let\maketitle\relax - \global\let\@thanks\@empty - \global\let\@author\@empty - \global\let\@date\@empty - \global\let\@title\@empty - \global\let\@titlepic\@empty - \global\let\title\relax - \global\let\author\relax - \global\let\date\relax - \global\let\and\relax - \global\let\titlepic\relax -} -\fi diff --git a/doc/src/cylc-user-guide/titlepic/README b/doc/src/cylc-user-guide/titlepic/README deleted file mode 100644 index dbe4c98b89d..00000000000 --- a/doc/src/cylc-user-guide/titlepic/README +++ /dev/null @@ -1,13 +0,0 @@ -The titlepic package allows you to place a picture on the -title page (cover page) of a LaTeX document. - -Example of usage: -\usepackage[cc]{titlepic} -\usepackage{graphicx} -\titlepic{\includegraphics[width=\textwidth]{picture.png}} - -Note: the package currently only works with the document -classes article, report and book. - -Author: Thomas ten Cate -License: Public Domain \ No newline at end of file diff --git a/doc/src/cylc-user-guide/titlepic/titlepic-manual.pdf b/doc/src/cylc-user-guide/titlepic/titlepic-manual.pdf deleted file mode 100644 index 63a02124aba..00000000000 Binary files a/doc/src/cylc-user-guide/titlepic/titlepic-manual.pdf and /dev/null differ diff --git a/doc/src/cylc-user-guide/titlepic/titlepic-manual.tex b/doc/src/cylc-user-guide/titlepic/titlepic-manual.tex deleted file mode 100644 index dec9ff4100e..00000000000 --- a/doc/src/cylc-user-guide/titlepic/titlepic-manual.tex +++ /dev/null @@ -1,112 +0,0 @@ -% Manual, as well as usage example, for the titlepic.sty package. -% Run this through latex to see the output. - -\documentclass[titlepage]{article} - -\usepackage[cc]{titlepic} % Include the titlepic package. - -% Creates a placeholder for the picture (no real picture is included in the package to keep it small). -% The syntax is \placeholder{width}{height}. -\newcommand\placeholder[2]{% - \noindent% - {\setlength{\fboxsep}{0pt}% - \framebox[#1]{% - \parbox{0pt}{\rule{0pt}{#2}}% - \parbox{#1}{\centering The picture goes here.}% - }% - }% -} - -\title{\LaTeX{} \texttt{titlepic} Manual} -\author{Thomas ten Cate\thanks{\texttt{}}} -\date{August 5, 2008} -\titlepic{\placeholder{\textwidth}{0.75\textwidth}} % This is the magic command! - -\begin{document} - -\maketitle - -\section{Introduction} - -In \LaTeX, there is by default no way to put a picture on the title page or cover page that is produced by \verb$\maketitle$. Surprisingly, no package seemed to exist for this either, which is why I put together this very simple package named \verb$titlepic$. - -\textbf{Note:} \verb$titlepic$ only works with the default document classes \verb$article$, \verb$report$ and \verb$book$. - -\section{Installation} - -There are two ways to install the package: -\begin{itemize} - \item Simply drop \verb$titlepic.sty$ in the same directory as your \verb$.tex$ source document. This is the easiest option for casual use. - \item Put \verb$titlepic.sty$ somewhere in your \verb$texmf$ tree and rehash. The details depend on your \TeX{} distribution. This gives you a system-wide installation. -\end{itemize} - -\section{Usage} - -Include the package as normal, with: - -\begin{quote} - \verb$\usepackage{titlepic}$ -\end{quote} - -\noindent If you want to be able to include a picture, then you also need - -\begin{quote} - \verb$\usepackage{graphicx}$ -\end{quote} - -\textbf{Note:} when you use the \verb$article$ document class, be sure to pass it the \verb$titlepage$ option (\verb$\documentclass[titlepage]{article}$), because articles do not have a title page by default. - -Then, along with the usual \verb$\title$, \verb$\author$ and \verb$\date$, put a command like the following: - -\begin{quote} - \verb$\titlepic{\includegraphics[width=\textwidth]{cover.png}}$ -\end{quote} - -The argument to \verb$\titlepic$ will usually be an \verb$\includegraphics$ command, which produces a picture. You can change the size using the optional argument to \verb$\includegraphics$, for example, \verb$width=0.7\textwidth$. This will make the picture be 70\% as wide as the text, and scales the height accordingly. - -In fact, the argument to \verb$\titlepic$ can actually be pretty much anything. The output produced by this argument will be typeset centred on the title page when you invoke \verb$\maketitle$. - -\section{Package options} - -There are three optional arguments that control the vertical layout of the title page: - -\begin{description} -\item[\tt{tt}] - Put both the title (and author, and date) and the picture at the top of the page, separated by a fixed amount of space. -\item[\tt{tc}] - Put the title at the top of the page as with tt, but centre the picture vertically on the page. -\item[\tt{cc}] - Separate the title and the picture by a fixed amount of space, and centre both together vertically on the page. -\end{description} - -\section{Example} - -Here is a full example of what your document could look like. - -\begin{verbatim} -% Be sure to pass titlepage to the article class -\documentclass[titlepage]{article} - -% Centre the picture and the title vertically with cc -\usepackage[cc]{titlepic} - -% For \includegraphics -\usepackage{graphicx} - -\title{Example} -\author{John Doe} -\date{\today} - -% Now, put a picture on the title page! -\titlepic{\includegraphics[width=\textwidth]{picture.png}} - -\begin{document} - -\maketitle - -... - -\end{document} -\end{verbatim} - -\end{document} diff --git a/doc/src/cylc-user-guide/titlepic/titlepic.sty b/doc/src/cylc-user-guide/titlepic/titlepic.sty deleted file mode 100644 index 974885c8998..00000000000 --- a/doc/src/cylc-user-guide/titlepic/titlepic.sty +++ /dev/null @@ -1,68 +0,0 @@ -% titlepic.sty is a LaTeX package to show a picture on the cover produced by \maketitle. -% By Thomas ten Cate . Free software, no warranty of any kind. -% -% Version history: -% 1.1: now more self-contained, comes with a PDF manual -% 1.0: first release -% -% ----------------------------------------------------------------------------- - -% No idea whether it works on older LaTeXes. -\NeedsTeXFormat{LaTeX2e} - -% Package identification and version number. -\ProvidesPackage{titlepic}[2009/08/03 1.1 Package to display a picture on the title page] - -% Declare the options. -\DeclareOption{tt}{\gdef\@tptopspace{}\gdef\@tpsepspace{\vskip 3em}} -\DeclareOption{tc}{\gdef\@tptopspace{}\gdef\@tpsepspace{\vfil}} -\DeclareOption{cc}{\gdef\@tptopspace{\null\vfil}\gdef\@tpsepspace{\vskip 3em}} -\ExecuteOptions{cc} -\ProcessOptions - -% Define the sole command introduced by this package. -% Very similar to the definition of \title, etc. -\def\titlepic#1{\gdef\@titlepic{#1}} -\def\@titlepic{\@empty} % default: no picture - -% If a title page was requested from the document class (article/report/book), -% override \maketitle to show our picture. -\if@titlepage -\renewcommand\maketitle{ - \begin{titlepage}% - \let\footnotesize\small - \let\footnoterule\relax - \let \footnote \thanks - \@tptopspace% - \begin{center}% - {\LARGE \@title \par}% - \vskip 3em% - {\large - \lineskip .75em% - \begin{tabular}[t]{c}% - \@author - \end{tabular}\par% - }% - \vskip 1.5em% - {\large \@date \par}% % Set date in \large size. - \end{center}\par - \@tpsepspace% - {\centering\@titlepic\par} - \vfil - \@thanks - \end{titlepage}% - \setcounter{footnote}{0}% - \global\let\thanks\relax - \global\let\maketitle\relax - \global\let\@thanks\@empty - \global\let\@author\@empty - \global\let\@date\@empty - \global\let\@title\@empty - \global\let\@titlepic\@empty - \global\let\title\relax - \global\let\author\relax - \global\let\date\relax - \global\let\and\relax - \global\let\titlepic\relax -} -\fi diff --git a/doc/src/external-triggers.rst b/doc/src/external-triggers.rst new file mode 100644 index 00000000000..286a0e4f805 --- /dev/null +++ b/doc/src/external-triggers.rst @@ -0,0 +1,496 @@ +.. _External Triggers: + +External Triggers +================= + +.. warning:: + + This is a new capability and its suite configuration + interface may change somewhat in future releases - see Current + Limitations below in :ref:`Current Trigger Function Limitations`. + +External triggers allow tasks to trigger directly off of external events, which +is often preferable to implementing long-running polling tasks in the workflow. +The triggering mechanism described in this section replaces an older and less +powerful one documented in :ref:`Old-Style External Triggers`. + +If you can write a Python function to check the status of an external +condition or event, the suite server program can call it at configurable +intervals until it reports success, at which point dependent tasks can trigger +and data returned by the function will be passed to the job environments of +those tasks. Functions can be written for triggering off of almost anything, +such as delivery of a new dataset, creation of a new entry in a database +table, or appearance of new data availability notifications in a message +broker. + +External triggers are visible in suite visualizations as bare graph nodes (just +the trigger names). They are plotted against all dependent tasks, not in a +cycle point specific way like tasks. This is because external triggers may or +may not be cycle point (or even task name) specific - it depends on the +arguments passed to the corresponding trigger functions. For example, if an +external trigger does not depend on task name or cycle point it will only be +called once - albeit repeatedly until satisfied - for the entire suite run, +after which the function result will be remembered for all dependent tasks +throughout the suite run. + +Several built-in external trigger functions are located in +``/lib/cylc/xtriggers/``: + +- clock triggers - see :ref:`Built-in Clock Triggers` +- inter-suite triggers - see :ref:`Built-in Suite State Triggers` + +Trigger functions are normal Python functions, with certain constraints as +described below in: + +- custom trigger functions - see :ref:`Custom Trigger Functions` + + +.. _Built-in Clock Triggers: + +Built-in Clock Triggers +----------------------- + +These are more transparent (exposed in the graph) and efficient (shared among +dependent tasks) than the older clock triggers described +in :ref:`ClockTriggerTasks`. (However we don't recommend wholesale conversion +to the new method yet, until its interface has stabilized - +see :ref:`Current Trigger Function Limitations`.) + +Clock triggers, unlike other trigger functions, are executed synchronously in +the main process. The clock trigger function signature looks like this: + +.. code-block:: python + + wall_clock(offset=None) + +The ``offset`` argument is a date-time duration (``PT1H`` is 1 +hour) relative to the dependent task's cycle point (automatically passed to the +function via a second argument not shown above). + +In the following suite, task ``foo`` has a daily cycle point sequence, +and each task instance can trigger once the wall clock time has passed its +cycle point value by one hour: + +.. code-block:: cylc + + [scheduling] + initial cycle point = 2018-01-01 + [[xtriggers]] + clock_1 = wall_clock(offset=PT1H):PT10S + [[dependencies]] + [[[P1D]]] + graph = "@clock_1 => foo" + [runtime] + [[foo]] + script = run-foo.sh + +Notice that the short label ``clock_1`` is used to represent the +trigger function in the graph. The function call interval, which determines how +often the suite server program checks the clock, is optional. Here it is +``PT10S`` (i.e. 10 seconds, which is also the default value). + +Argument keywords can be omitted if called in the right order, so the +``clock_1`` trigger can also be declared like this: + +.. code-block:: cylc + + [[xtriggers]] + clock_1 = wall_clock(PT1H) + +Finally, a zero-offset clock trigger does not need to be declared under +the ``[xtriggers]`` section: + +.. code-block:: cylc + + [scheduling] + initial cycle point = 2018-01-01 + [[dependencies]] + [[[P1D]]] + # zero-offset clock trigger: + graph = "@wall_clock => foo" + [runtime] + [[foo]] + script = run-foo.sh + + +.. _Built-in Suite State Triggers: + +Built-in Suite State Triggers +----------------------------- + +These can be used instead of the older suite state polling tasks described +in :ref:`SuiteStatePolling` for inter-suite triggering - i.e. to trigger local +tasks off of remote task statuses or messages in other suites. (However we +don't recommend wholesale conversion to the new method yet, until its +interface has stabilized - see :ref:`Current Trigger Function Limitations`.) + +The suite state trigger function signature looks like this: + +.. code-block:: python + + suite_state(suite, task, point, offset=None, status='succeeded', + message=None, cylc_run_dir=None, debug=False) + +The first three arguments are compulsory; they single out the target suite name +(``suite``) task name (``task``) and cycle point +(``point``). The function arguments mirror the arguments and options of +the ``cylc suite-state`` command - see +``cylc suite-state --help`` for documentation. + +As a simple example, consider the suites in +``/etc/dev-suites/xtrigger/suite_state/``. The "upstream" +suite (which we want to trigger off of) looks like this: + +.. literalinclude:: ../../etc/dev-suites/xtrigger/suite_state/upstream/suite.rc + :language: cylc + +It must be registered and run under the name *up*, as referenced in the +"downstream" suite that depends on it: + +.. literalinclude:: ../../etc/dev-suites/xtrigger/suite_state/downstream/suite.rc + :language: cylc + +Try starting the downstream suite first, then the upstream, and +watch what happens. +In each cycle point the ``@upstream`` trigger in the downstream suite +waits on the task ``foo`` (with the same cycle point) in the upstream +suite to emit the *data ready* message. + +Some important points to note about this: + +- the function call interval, which determines how often the suite + server program checks the clock, is optional. Here it is + ``PT10S`` (i.e. 10 seconds, which is also the default value). +- the ``suite_state`` trigger function, like the + ``cylc suite-state`` command, must have read-access to the upstream + suite's public database. +- the cycle point argument is supplied by a string template + ``%(point)s``. The string templates available to trigger function + arguments are described in :ref:`Custom Trigger Functions`). + +The return value of the ``suite_state`` trigger function looks like +this: + +.. code-block:: python + + results = { + 'suite': suite, + 'task': task, + 'point': point, + 'offset': offset, + 'status': status, + 'message': message, + 'cylc_run_dir': cylc_run_dir + } + return (satisfied, results) + +The ``satisified`` variable is boolean (value True or False, depending +on whether or not the trigger condition was found to be satisfied). The +``results`` dictionary contains the names and values of all of the +target suite state parameters. Each item in it gets qualified with the +unique trigger label ("upstream" here) and passed to the environment of +dependent task jobs (the members of the ``FAM`` family in this case). +To see this, take a look at the job script for one of the downstream tasks: + +.. code-block:: bash + + % cylc cat-log -f j dn f2.2011 + ... + cylc__job__inst__user_env() { + # TASK RUNTIME ENVIRONMENT: + export upstream_suite upstream_cylc_run_dir upstream_offset \ + upstream_message upstream_status upstream_point upstream_task + upstream_suite="up" + upstream_cylc_run_dir="/home/vagrant/cylc-run" + upstream_offset="None" + upstream_message="data ready" + upstream_status="succeeded" + upstream_point="2011" + upstream_task="foo"} + ... + +.. note:: + + The task has to know the name (label) of the external trigger that it + depends on - "upstream" in this case - in order to use this information. + However the name could be given to the task environment in the suite + configuration. + + +.. _Custom Trigger Functions: + +Custom Trigger Functions +------------------------ + +Trigger functions are just normal Python functions, with a few special +properties: + +- they must be defined in a module with the same name as the function +- they can be located in: + - ``/lib/cylc/xtriggers/`` + - ``/lib/python/`` + - (or anywhere in your Python library path) +- they can take arbitrary positional and keyword arguments +- suite and task identity, and cycle point, can be passed to trigger + functions by using string templates in function arguments (see below) +- integer, float, boolean, and string arguments will be recognized and + passed to the function as such +- if a trigger function depends on files or directories (for example) + that might not exist when the function is first called, just return + unsatisified until everything required does exist. + +.. note:: + + Trigger functions cannot store data Pythonically between invocations + because each call is executed in an independent process in the process + pool. If necessary the filesystem can be used for this purpose. + +The following string templates are available for use, if the trigger function +needs any of this information, in function arguments in the suite configuration: + +- ``%(name)s`` - name of the dependent task +- ``%(id)s`` - identity of the dependent task (name.cycle-point) +- ``%(point)s`` - cycle point of the dependent task +- ``%(debug)s`` - suite debug mode + +and less commonly needed: + +- ``%(user_name)s`` - suite owner's user name +- ``%(suite_name)s`` - registered suite name +- ``%(suite_run_dir)s`` - suite run directory +- ``%(suite_share_dir)s`` - suite share directory + +Function return values should be as follows: + +- if the trigger condition is *not satisfied*: + + - return ``(False, {})`` + +- if the trigger condition is *satisfied*: + + - return ``(True, results)`` + +where ``results`` is an arbitrary dictionary of information to be +passed to dependent tasks. How this looks to these tasks is described above +in :ref:`Built-in Suite State Triggers`. + +The suite server program manages trigger functions as follows: + +- they are called asynchronously in the process pool + - (except for clock triggers, which are called from the main process) +- they are called repeatedly on a configurable interval, until satisified + - the call interval defaults to ``PT10S`` (10 seconds) + - repeat calls are not made until the previous call has returned +- they are subject to the normal process pool command time out - if they + take too long to return, the process will be killed +- they are shared for efficiency: a single call will be made for all + triggers that share the same function signature - i.e.\ the same function + name and arguments +- their return status and results are stored in the suite DB and persist across + suite restarts +- their stdout, if any, is redirected to stderr and will be visible in + the suite log in debug mode (stdout is needed to communicate return values + from the sub-process in which the function executes) + + +Toy Examples +^^^^^^^^^^^^ + +A couple of toy examples in ``/lib/cylc/xtriggers/`` may +be a useful aid to understanding trigger functions and how they work. + + +echo +"""" + +The ``echo`` function is a trivial one that takes any number of +positional and keyword arguments (from the suite configuration) and simply +prints them to stdout, and then returns False (i.e. trigger condition not +satisfied). Here it is in its entirety. + +.. code-block:: python + + def echo(*args, **kwargs): + print "echo: ARGS:", args + print "echo: KWARGS:", kwargs + return (False, {}) + +Here's an example echo trigger suite: + +.. code-block:: cylc + + [scheduling] + initial cycle point = now + [[xtriggers]] + echo_1 = echo(hello, 99, qux=True, point=%(point)s, foo=10) + [[dependencies]] + [[[PT1H]]] + graph = "@echo_1 => foo" + [runtime] + [[foo]] + script = exit 1 + +To see the result, run this suite in debug mode and take a look at the +suite log (or run ``cylc run --debug --no-detach `` and watch +your terminal). + + +xrandom +""""""" + +The ``xrandom`` function sleeps for a configurable amount of time +(useful for testing the effect of a long-running trigger function - which +should be avoided) and has a configurable random chance of success. The +function signature is: + +.. code-block:: python + + xrandom(percent, secs=0, _=None, debug=False) + +The ``percent`` argument sets the odds of success in any given call; +``secs`` is the number of seconds to sleep before returning; and the +``_`` argument (underscore is a conventional name for a variable +that is not used, in Python) is provided to allow specialization of the trigger +to (for example) task name, task ID, or cycle point (just use the appropriate +string templates in the suite configuration for this). + +An example xrandom trigger suite is +``/etc/dev-suites/xtriggers/xrandom/``. + + +.. _Current Trigger Function Limitations: + +Current Limitations +------------------- + +The following issues may be addressed in future Cylc releases: + +- trigger labels cannot currently be used in conditional (OR) expressions + in the graph; attempts to do so will fail validation. +- aside from the predefined zero-offset ``wall_clock`` trigger, all + unique trigger function calls must be declared *with all of + their arguments* under the ``[scheduling][xtriggers]`` section, and + referred to by label alone in the graph. It would be convenient (and less + verbose, although no more functional) if we could just declare a label + against the *common* arguments, and give remaining arguments (such as + different wall clock offsets in clock triggers) as needed in the graph. +- we may move away from the string templating method for providing suite + and task attributes to trigger function arguments. + + +Filesystem Events? +------------------ + +Cylc does not have built-in support for triggering off of filesystem events +such as ``inotify`` on Linux. There is no cross-platform standard for +this, and in any case filesystem events are not very useful in HPC cluster +environments where events can only be detected at the specific node on which +they were generated. + + +Continuous Event Watchers? +-------------------------- + +For some applications a persistent process that continually monitors the +external world is better than discrete periodic checking. This would be more +difficult to support as a plugin mechanism in Cylc, but we may decide to do it +in the future. In the meantime, consider implementing a small daemon process as +the watcher (e.g. to watch continuously for filesystem events) and have your +Cylc trigger functions interact with it. + + +.. _Old-Style External Triggers: + +Old-Style External Triggers (Deprecated) +---------------------------------------- + +.. note:: + + This mechanism is now technically deprecated by the newer external + trigger functions (:ref:`External Triggers`). (However we don't recommend + wholesale conversion to the new method yet, until its interface has + stabilized - see :ref:`Current Trigger Function Limitations`.) + +These old-style external triggers are hidden task prerequisites that must be +satisfied by using the ``cylc ext-trigger`` client command to send an +associated pre-defined event message to the suite along with an ID string that +distinguishes one instance of the event from another (the name of the target +task and its current cycle point are not required). The event ID is just an +arbitrary string to Cylc, but it can be used to identify something associated +with the event to the suite - such as the filename of a new +externally-generated dataset. When the suite server program receives the event +notification it will trigger the next instance of any task waiting on that +trigger (whatever its cycle point) and then broadcast +(see :ref:`cylc-broadcast`) the event ID to the cycle point of the triggered +task as ``$CYLC_EXT_TRIGGER_ID``. Downstream tasks with the same cycle +point therefore know the new event ID too and can use it, if they need to, to +identify the same new dataset. In this way a whole workflow can be associated +with each new dataset, and multiple datasets can be processed in parallel if +they happen to arrive in quick succession. + +An externally-triggered task must register the event it waits on in the suite +scheduling section: + +.. code-block:: cylc + + # suite "sat-proc" + [scheduling] + cycling mode = integer + initial cycle point = 1 + [[special tasks]] + external-trigger = get-data("new sat X data avail") + [[dependencies]] + [[[P1]]] + graph = get-data => conv-data => products + +Then, each time a new dataset arrives the external detection system should +notify the suite like this: + +.. code-block:: bash + + $ cylc ext-trigger sat-proc "new sat X data avail" passX12334a + +where "sat-proc" is the suite name and "passX12334a" is the ID string for +the new event. The suite passphrase must be installed on triggering account. + +.. note:: + + Only one task in a suite can trigger off a particular external message. + Other tasks can trigger off the externally triggered task as required, + of course. + +``/etc/examples/satellite/ext-triggers/suite.rc`` is a working +example of a simulated satellite processing suite. + +External triggers are not normally needed in date-time cycling suites driven +by real time data that comes in at regular intervals. In these cases a data +retrieval task can be clock-triggered (and have appropriate retry intervals) to +submit at the expected data arrival time, so little time is wasted in polling. +However, if the arrival time of the cycle-point-specific data is highly +variable, external triggering may be used with the cycle point embedded in the +message: + +.. code-block:: cylc + + # suite "data-proc" + [scheduling] + initial cycle point = 20150125T00 + final cycle point = 20150126T00 + [[special tasks]] + external-trigger = get-data("data arrived for $CYLC_TASK_CYCLE_POINT") + [[dependencies]] + [[[T00]]] + graph = init-process => get-data => post-process + +Once the variable-length waiting is finished, an external detection system +should notify the suite like this: + +.. code-block:: bash + + $ cylc ext-trigger data-proc "data arrived for 20150126T00" passX12334a + +where "data-proc" is the suite name, the cycle point has replaced the +variable in the trigger string, and "passX12334a" is the ID string for +the new event. The suite passphrase must be installed on the triggering +account. In this case, the event will trigger for the second cycle point but +not the first because of the cycle-point matching. diff --git a/doc/src/global-site-user-conf.rst b/doc/src/global-site-user-conf.rst new file mode 100644 index 00000000000..dc3e03ecd09 --- /dev/null +++ b/doc/src/global-site-user-conf.rst @@ -0,0 +1,28 @@ +.. _SiteAndUserConfiguration: + +Global (Site, User) Configuration Files +======================================= + +Cylc site and user global configuration files contain settings that affect all +suites. Some of these, such as the range of network ports used by cylc, +should be set at site level. Legal items, values, and system defaults are +documented in (:ref:`SiteRCReference`). + +.. code-block:: bash + + # cylc site global config file + /etc/global.rc + +Others, such as the preferred text editor for suite configurations, +can be overridden by users, + +.. code-block:: bash + + # cylc user global config file + ~/.cylc/$(cylc --version)/global.rc # e.g. ~/.cylc/7.7.0/global.rc + +The file ``/etc/global.rc.eg`` contains instructions on how +to generate and install site and user global config files: + +.. literalinclude:: ../../etc/global.rc.eg + :language: none diff --git a/doc/src/graphics/cylc-favicon.ico b/doc/src/graphics/cylc-favicon.ico new file mode 100644 index 00000000000..b225083b917 Binary files /dev/null and b/doc/src/graphics/cylc-favicon.ico differ diff --git a/doc/src/cylc-user-guide/graphics/png/orig/QuickStartA-ControlRunning.png b/doc/src/graphics/png/orig/QuickStartA-ControlRunning.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/orig/QuickStartA-ControlRunning.png rename to doc/src/graphics/png/orig/QuickStartA-ControlRunning.png diff --git a/doc/src/cylc-user-guide/graphics/png/orig/QuickStartA-ControlStalled.png b/doc/src/graphics/png/orig/QuickStartA-ControlStalled.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/orig/QuickStartA-ControlStalled.png rename to doc/src/graphics/png/orig/QuickStartA-ControlStalled.png diff --git a/doc/src/cylc-user-guide/graphics/png/orig/QuickStartA-ControlStart00.png b/doc/src/graphics/png/orig/QuickStartA-ControlStart00.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/orig/QuickStartA-ControlStart00.png rename to doc/src/graphics/png/orig/QuickStartA-ControlStart00.png diff --git a/doc/src/cylc-user-guide/graphics/png/orig/QuickStartA-ControlStart06.png b/doc/src/graphics/png/orig/QuickStartA-ControlStart06.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/orig/QuickStartA-ControlStart06.png rename to doc/src/graphics/png/orig/QuickStartA-ControlStart06.png diff --git a/doc/src/cylc-user-guide/graphics/png/orig/QuickStartA-ModelState.png b/doc/src/graphics/png/orig/QuickStartA-ModelState.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/orig/QuickStartA-ModelState.png rename to doc/src/graphics/png/orig/QuickStartA-ModelState.png diff --git a/doc/src/cylc-user-guide/graphics/png/orig/QuickStartA-graph18.png b/doc/src/graphics/png/orig/QuickStartA-graph18.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/orig/QuickStartA-graph18.png rename to doc/src/graphics/png/orig/QuickStartA-graph18.png diff --git a/doc/src/cylc-user-guide/graphics/png/orig/QuickStartB-graph18.png b/doc/src/graphics/png/orig/QuickStartB-graph18.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/orig/QuickStartB-graph18.png rename to doc/src/graphics/png/orig/QuickStartB-graph18.png diff --git a/doc/src/cylc-user-guide/graphics/png/orig/conditional-triggers.png b/doc/src/graphics/png/orig/conditional-triggers.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/orig/conditional-triggers.png rename to doc/src/graphics/png/orig/conditional-triggers.png diff --git a/doc/src/graphics/png/orig/cylc-favicon.png b/doc/src/graphics/png/orig/cylc-favicon.png new file mode 100644 index 00000000000..5a09687e42e Binary files /dev/null and b/doc/src/graphics/png/orig/cylc-favicon.png differ diff --git a/doc/src/cylc-logo.png b/doc/src/graphics/png/orig/cylc-logo.png similarity index 100% rename from doc/src/cylc-logo.png rename to doc/src/graphics/png/orig/cylc-logo.png diff --git a/doc/src/cylc-user-guide/graphics/png/orig/cylc-review-screenshot.png b/doc/src/graphics/png/orig/cylc-review-screenshot.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/orig/cylc-review-screenshot.png rename to doc/src/graphics/png/orig/cylc-review-screenshot.png diff --git a/doc/src/cylc-user-guide/graphics/png/orig/dep-eg-1.png b/doc/src/graphics/png/orig/dep-eg-1.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/orig/dep-eg-1.png rename to doc/src/graphics/png/orig/dep-eg-1.png diff --git a/doc/src/cylc-user-guide/graphics/png/orig/dep-multi-cycle.png b/doc/src/graphics/png/orig/dep-multi-cycle.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/orig/dep-multi-cycle.png rename to doc/src/graphics/png/orig/dep-multi-cycle.png diff --git a/doc/src/cylc-user-guide/graphics/png/orig/dep-one-cycle.png b/doc/src/graphics/png/orig/dep-one-cycle.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/orig/dep-one-cycle.png rename to doc/src/graphics/png/orig/dep-one-cycle.png diff --git a/doc/src/cylc-user-guide/graphics/png/orig/dep-two-cycles-linked.png b/doc/src/graphics/png/orig/dep-two-cycles-linked.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/orig/dep-two-cycles-linked.png rename to doc/src/graphics/png/orig/dep-two-cycles-linked.png diff --git a/doc/src/cylc-user-guide/graphics/png/orig/dep-two-cycles.png b/doc/src/graphics/png/orig/dep-two-cycles.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/orig/dep-two-cycles.png rename to doc/src/graphics/png/orig/dep-two-cycles.png diff --git a/doc/src/cylc-user-guide/graphics/png/orig/ecox-1.png b/doc/src/graphics/png/orig/ecox-1.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/orig/ecox-1.png rename to doc/src/graphics/png/orig/ecox-1.png diff --git a/doc/src/cylc-user-guide/graphics/png/orig/eg2-dynamic.png b/doc/src/graphics/png/orig/eg2-dynamic.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/orig/eg2-dynamic.png rename to doc/src/graphics/png/orig/eg2-dynamic.png diff --git a/doc/src/cylc-user-guide/graphics/png/orig/eg2-static.png b/doc/src/graphics/png/orig/eg2-static.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/orig/eg2-static.png rename to doc/src/graphics/png/orig/eg2-static.png diff --git a/doc/src/suite-design-guide/resources/png/failure-recovery.png b/doc/src/graphics/png/orig/failure-recovery.png similarity index 100% rename from doc/src/suite-design-guide/resources/png/failure-recovery.png rename to doc/src/graphics/png/orig/failure-recovery.png diff --git a/doc/src/suite-design-guide/resources/png/fam-to-fam-1.png b/doc/src/graphics/png/orig/fam-to-fam-1.png similarity index 100% rename from doc/src/suite-design-guide/resources/png/fam-to-fam-1.png rename to doc/src/graphics/png/orig/fam-to-fam-1.png diff --git a/doc/src/suite-design-guide/resources/png/fam-to-fam-2.png b/doc/src/graphics/png/orig/fam-to-fam-2.png similarity index 100% rename from doc/src/suite-design-guide/resources/png/fam-to-fam-2.png rename to doc/src/graphics/png/orig/fam-to-fam-2.png diff --git a/doc/src/cylc-user-guide/graphics/png/orig/gcylc-graph-and-dot-views.png b/doc/src/graphics/png/orig/gcylc-graph-and-dot-views.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/orig/gcylc-graph-and-dot-views.png rename to doc/src/graphics/png/orig/gcylc-graph-and-dot-views.png diff --git a/doc/src/cylc-user-guide/graphics/png/orig/gcylc-text-view.png b/doc/src/graphics/png/orig/gcylc-text-view.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/orig/gcylc-text-view.png rename to doc/src/graphics/png/orig/gcylc-text-view.png diff --git a/doc/src/cylc-user-guide/graphics/png/orig/ghost-node-example.png b/doc/src/graphics/png/orig/ghost-node-example.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/orig/ghost-node-example.png rename to doc/src/graphics/png/orig/ghost-node-example.png diff --git a/doc/src/cylc-user-guide/graphics/png/orig/gscan.png b/doc/src/graphics/png/orig/gscan.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/orig/gscan.png rename to doc/src/graphics/png/orig/gscan.png diff --git a/doc/src/cylc-user-guide/graphics/png/orig/inherit-2.png b/doc/src/graphics/png/orig/inherit-2.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/orig/inherit-2.png rename to doc/src/graphics/png/orig/inherit-2.png diff --git a/doc/src/cylc-user-guide/graphics/png/orig/inherit-3.png b/doc/src/graphics/png/orig/inherit-3.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/orig/inherit-3.png rename to doc/src/graphics/png/orig/inherit-3.png diff --git a/doc/src/cylc-user-guide/graphics/png/orig/inherit-4.png b/doc/src/graphics/png/orig/inherit-4.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/orig/inherit-4.png rename to doc/src/graphics/png/orig/inherit-4.png diff --git a/doc/src/cylc-user-guide/graphics/png/orig/inherit-5.png b/doc/src/graphics/png/orig/inherit-5.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/orig/inherit-5.png rename to doc/src/graphics/png/orig/inherit-5.png diff --git a/doc/src/cylc-user-guide/graphics/png/orig/inherit-6.png b/doc/src/graphics/png/orig/inherit-6.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/orig/inherit-6.png rename to doc/src/graphics/png/orig/inherit-6.png diff --git a/doc/src/cylc-user-guide/graphics/png/orig/inherit-7.png b/doc/src/graphics/png/orig/inherit-7.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/orig/inherit-7.png rename to doc/src/graphics/png/orig/inherit-7.png diff --git a/doc/src/cylc-user-guide/graphics/png/orig/jinja2-ensemble-graph.png b/doc/src/graphics/png/orig/jinja2-ensemble-graph.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/orig/jinja2-ensemble-graph.png rename to doc/src/graphics/png/orig/jinja2-ensemble-graph.png diff --git a/doc/src/cylc-user-guide/graphics/png/orig/jinja2-suite-graph.png b/doc/src/graphics/png/orig/jinja2-suite-graph.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/orig/jinja2-suite-graph.png rename to doc/src/graphics/png/orig/jinja2-suite-graph.png diff --git a/doc/src/cylc-user-guide/graphics/png/orig/logo.png b/doc/src/graphics/png/orig/logo.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/orig/logo.png rename to doc/src/graphics/png/orig/logo.png diff --git a/doc/src/cylc-user-guide/graphics/png/orig/niwa-colour-small.png b/doc/src/graphics/png/orig/niwa-colour-small.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/orig/niwa-colour-small.png rename to doc/src/graphics/png/orig/niwa-colour-small.png diff --git a/doc/src/cylc-user-guide/graphics/png/orig/niwa-colour.png b/doc/src/graphics/png/orig/niwa-colour.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/orig/niwa-colour.png rename to doc/src/graphics/png/orig/niwa-colour.png diff --git a/doc/src/suite-design-guide/resources/png/param-1.png b/doc/src/graphics/png/orig/param-1.png similarity index 100% rename from doc/src/suite-design-guide/resources/png/param-1.png rename to doc/src/graphics/png/orig/param-1.png diff --git a/doc/src/suite-design-guide/resources/png/param-2.png b/doc/src/graphics/png/orig/param-2.png similarity index 100% rename from doc/src/suite-design-guide/resources/png/param-2.png rename to doc/src/graphics/png/orig/param-2.png diff --git a/doc/src/cylc-user-guide/graphics/png/orig/params1.png b/doc/src/graphics/png/orig/params1.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/orig/params1.png rename to doc/src/graphics/png/orig/params1.png diff --git a/doc/src/suite-design-guide/resources/png/rose-logo.png b/doc/src/graphics/png/orig/rose-logo.png similarity index 100% rename from doc/src/suite-design-guide/resources/png/rose-logo.png rename to doc/src/graphics/png/orig/rose-logo.png diff --git a/doc/src/cylc-user-guide/graphics/png/orig/satellite.png b/doc/src/graphics/png/orig/satellite.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/orig/satellite.png rename to doc/src/graphics/png/orig/satellite.png diff --git a/doc/src/cylc-user-guide/graphics/png/orig/suicide.png b/doc/src/graphics/png/orig/suicide.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/orig/suicide.png rename to doc/src/graphics/png/orig/suicide.png diff --git a/doc/src/cylc-user-guide/graphics/png/orig/suite-log.png b/doc/src/graphics/png/orig/suite-log.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/orig/suite-log.png rename to doc/src/graphics/png/orig/suite-log.png diff --git a/doc/src/cylc-user-guide/graphics/png/orig/suite-output.png b/doc/src/graphics/png/orig/suite-output.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/orig/suite-output.png rename to doc/src/graphics/png/orig/suite-output.png diff --git a/doc/src/cylc-user-guide/graphics/png/orig/suiterc-jinja2.png b/doc/src/graphics/png/orig/suiterc-jinja2.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/orig/suiterc-jinja2.png rename to doc/src/graphics/png/orig/suiterc-jinja2.png diff --git a/doc/src/cylc-user-guide/graphics/png/orig/task-pool.png b/doc/src/graphics/png/orig/task-pool.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/orig/task-pool.png rename to doc/src/graphics/png/orig/task-pool.png diff --git a/doc/src/cylc-user-guide/graphics/png/orig/test1.png b/doc/src/graphics/png/orig/test1.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/orig/test1.png rename to doc/src/graphics/png/orig/test1.png diff --git a/doc/src/cylc-user-guide/graphics/png/orig/test2.png b/doc/src/graphics/png/orig/test2.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/orig/test2.png rename to doc/src/graphics/png/orig/test2.png diff --git a/doc/src/cylc-user-guide/graphics/png/orig/test4.png b/doc/src/graphics/png/orig/test4.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/orig/test4.png rename to doc/src/graphics/png/orig/test4.png diff --git a/doc/src/cylc-user-guide/graphics/png/orig/test5.png b/doc/src/graphics/png/orig/test5.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/orig/test5.png rename to doc/src/graphics/png/orig/test5.png diff --git a/doc/src/cylc-user-guide/graphics/png/orig/test6.png b/doc/src/graphics/png/orig/test6.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/orig/test6.png rename to doc/src/graphics/png/orig/test6.png diff --git a/doc/src/cylc-user-guide/graphics/png/orig/timeline-one-a.png b/doc/src/graphics/png/orig/timeline-one-a.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/orig/timeline-one-a.png rename to doc/src/graphics/png/orig/timeline-one-a.png diff --git a/doc/src/cylc-user-guide/graphics/png/orig/timeline-one-c.png b/doc/src/graphics/png/orig/timeline-one-c.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/orig/timeline-one-c.png rename to doc/src/graphics/png/orig/timeline-one-c.png diff --git a/doc/src/cylc-user-guide/graphics/png/orig/timeline-one.png b/doc/src/graphics/png/orig/timeline-one.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/orig/timeline-one.png rename to doc/src/graphics/png/orig/timeline-one.png diff --git a/doc/src/cylc-user-guide/graphics/png/orig/timeline-three.png b/doc/src/graphics/png/orig/timeline-three.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/orig/timeline-three.png rename to doc/src/graphics/png/orig/timeline-three.png diff --git a/doc/src/cylc-user-guide/graphics/png/orig/timeline-two-cycles-optimal.png b/doc/src/graphics/png/orig/timeline-two-cycles-optimal.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/orig/timeline-two-cycles-optimal.png rename to doc/src/graphics/png/orig/timeline-two-cycles-optimal.png diff --git a/doc/src/cylc-user-guide/graphics/png/orig/timeline-two.png b/doc/src/graphics/png/orig/timeline-two.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/orig/timeline-two.png rename to doc/src/graphics/png/orig/timeline-two.png diff --git a/doc/src/cylc-user-guide/graphics/png/orig/timeline-zero.png b/doc/src/graphics/png/orig/timeline-zero.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/orig/timeline-zero.png rename to doc/src/graphics/png/orig/timeline-zero.png diff --git a/doc/src/cylc-user-guide/graphics/png/orig/tut-cyc-int.png b/doc/src/graphics/png/orig/tut-cyc-int.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/orig/tut-cyc-int.png rename to doc/src/graphics/png/orig/tut-cyc-int.png diff --git a/doc/src/cylc-user-guide/graphics/png/orig/tut-four.png b/doc/src/graphics/png/orig/tut-four.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/orig/tut-four.png rename to doc/src/graphics/png/orig/tut-four.png diff --git a/doc/src/cylc-user-guide/graphics/png/orig/tut-hello-multi-1.png b/doc/src/graphics/png/orig/tut-hello-multi-1.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/orig/tut-hello-multi-1.png rename to doc/src/graphics/png/orig/tut-hello-multi-1.png diff --git a/doc/src/cylc-user-guide/graphics/png/orig/tut-hello-multi-2.png b/doc/src/graphics/png/orig/tut-hello-multi-2.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/orig/tut-hello-multi-2.png rename to doc/src/graphics/png/orig/tut-hello-multi-2.png diff --git a/doc/src/cylc-user-guide/graphics/png/orig/tut-hello-multi-3.png b/doc/src/graphics/png/orig/tut-hello-multi-3.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/orig/tut-hello-multi-3.png rename to doc/src/graphics/png/orig/tut-hello-multi-3.png diff --git a/doc/src/cylc-user-guide/graphics/png/orig/tut-one.png b/doc/src/graphics/png/orig/tut-one.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/orig/tut-one.png rename to doc/src/graphics/png/orig/tut-one.png diff --git a/doc/src/cylc-user-guide/graphics/png/orig/tut-three.png b/doc/src/graphics/png/orig/tut-three.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/orig/tut-three.png rename to doc/src/graphics/png/orig/tut-three.png diff --git a/doc/src/cylc-user-guide/graphics/png/orig/tut-two.png b/doc/src/graphics/png/orig/tut-two.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/orig/tut-two.png rename to doc/src/graphics/png/orig/tut-two.png diff --git a/doc/src/cylc-user-guide/graphics/png/scaled/QuickStartA-ControlRunning.png b/doc/src/graphics/png/scaled/QuickStartA-ControlRunning.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/scaled/QuickStartA-ControlRunning.png rename to doc/src/graphics/png/scaled/QuickStartA-ControlRunning.png diff --git a/doc/src/cylc-user-guide/graphics/png/scaled/QuickStartA-ControlStalled.png b/doc/src/graphics/png/scaled/QuickStartA-ControlStalled.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/scaled/QuickStartA-ControlStalled.png rename to doc/src/graphics/png/scaled/QuickStartA-ControlStalled.png diff --git a/doc/src/cylc-user-guide/graphics/png/scaled/QuickStartA-ControlStart00.png b/doc/src/graphics/png/scaled/QuickStartA-ControlStart00.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/scaled/QuickStartA-ControlStart00.png rename to doc/src/graphics/png/scaled/QuickStartA-ControlStart00.png diff --git a/doc/src/cylc-user-guide/graphics/png/scaled/QuickStartA-ControlStart06.png b/doc/src/graphics/png/scaled/QuickStartA-ControlStart06.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/scaled/QuickStartA-ControlStart06.png rename to doc/src/graphics/png/scaled/QuickStartA-ControlStart06.png diff --git a/doc/src/cylc-user-guide/graphics/png/scaled/QuickStartA-ModelState.png b/doc/src/graphics/png/scaled/QuickStartA-ModelState.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/scaled/QuickStartA-ModelState.png rename to doc/src/graphics/png/scaled/QuickStartA-ModelState.png diff --git a/doc/src/cylc-user-guide/graphics/png/scaled/QuickStartA-graph18.png b/doc/src/graphics/png/scaled/QuickStartA-graph18.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/scaled/QuickStartA-graph18.png rename to doc/src/graphics/png/scaled/QuickStartA-graph18.png diff --git a/doc/src/cylc-user-guide/graphics/png/scaled/QuickStartB-graph18.png b/doc/src/graphics/png/scaled/QuickStartB-graph18.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/scaled/QuickStartB-graph18.png rename to doc/src/graphics/png/scaled/QuickStartB-graph18.png diff --git a/doc/src/cylc-user-guide/graphics/png/scaled/conditional-triggers.png b/doc/src/graphics/png/scaled/conditional-triggers.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/scaled/conditional-triggers.png rename to doc/src/graphics/png/scaled/conditional-triggers.png diff --git a/doc/src/cylc-user-guide/graphics/png/scaled/dep-eg-1.png b/doc/src/graphics/png/scaled/dep-eg-1.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/scaled/dep-eg-1.png rename to doc/src/graphics/png/scaled/dep-eg-1.png diff --git a/doc/src/cylc-user-guide/graphics/png/scaled/dep-multi-cycle.png b/doc/src/graphics/png/scaled/dep-multi-cycle.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/scaled/dep-multi-cycle.png rename to doc/src/graphics/png/scaled/dep-multi-cycle.png diff --git a/doc/src/cylc-user-guide/graphics/png/scaled/dep-one-cycle.png b/doc/src/graphics/png/scaled/dep-one-cycle.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/scaled/dep-one-cycle.png rename to doc/src/graphics/png/scaled/dep-one-cycle.png diff --git a/doc/src/cylc-user-guide/graphics/png/scaled/dep-two-cycles-linked.png b/doc/src/graphics/png/scaled/dep-two-cycles-linked.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/scaled/dep-two-cycles-linked.png rename to doc/src/graphics/png/scaled/dep-two-cycles-linked.png diff --git a/doc/src/cylc-user-guide/graphics/png/scaled/dep-two-cycles.png b/doc/src/graphics/png/scaled/dep-two-cycles.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/scaled/dep-two-cycles.png rename to doc/src/graphics/png/scaled/dep-two-cycles.png diff --git a/doc/src/cylc-user-guide/graphics/png/scaled/ecox-1.png b/doc/src/graphics/png/scaled/ecox-1.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/scaled/ecox-1.png rename to doc/src/graphics/png/scaled/ecox-1.png diff --git a/doc/src/cylc-user-guide/graphics/png/scaled/eg2-dynamic.png b/doc/src/graphics/png/scaled/eg2-dynamic.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/scaled/eg2-dynamic.png rename to doc/src/graphics/png/scaled/eg2-dynamic.png diff --git a/doc/src/cylc-user-guide/graphics/png/scaled/eg2-static.png b/doc/src/graphics/png/scaled/eg2-static.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/scaled/eg2-static.png rename to doc/src/graphics/png/scaled/eg2-static.png diff --git a/doc/src/cylc-user-guide/graphics/png/scaled/gcylc-graph-and-dot-views.png b/doc/src/graphics/png/scaled/gcylc-graph-and-dot-views.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/scaled/gcylc-graph-and-dot-views.png rename to doc/src/graphics/png/scaled/gcylc-graph-and-dot-views.png diff --git a/doc/src/cylc-user-guide/graphics/png/scaled/gcylc-text-view.png b/doc/src/graphics/png/scaled/gcylc-text-view.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/scaled/gcylc-text-view.png rename to doc/src/graphics/png/scaled/gcylc-text-view.png diff --git a/doc/src/cylc-user-guide/graphics/png/scaled/gscan.png b/doc/src/graphics/png/scaled/gscan.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/scaled/gscan.png rename to doc/src/graphics/png/scaled/gscan.png diff --git a/doc/src/cylc-user-guide/graphics/png/scaled/inherit-2.png b/doc/src/graphics/png/scaled/inherit-2.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/scaled/inherit-2.png rename to doc/src/graphics/png/scaled/inherit-2.png diff --git a/doc/src/cylc-user-guide/graphics/png/scaled/inherit-3.png b/doc/src/graphics/png/scaled/inherit-3.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/scaled/inherit-3.png rename to doc/src/graphics/png/scaled/inherit-3.png diff --git a/doc/src/cylc-user-guide/graphics/png/scaled/inherit-4.png b/doc/src/graphics/png/scaled/inherit-4.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/scaled/inherit-4.png rename to doc/src/graphics/png/scaled/inherit-4.png diff --git a/doc/src/cylc-user-guide/graphics/png/scaled/inherit-5.png b/doc/src/graphics/png/scaled/inherit-5.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/scaled/inherit-5.png rename to doc/src/graphics/png/scaled/inherit-5.png diff --git a/doc/src/cylc-user-guide/graphics/png/scaled/inherit-6.png b/doc/src/graphics/png/scaled/inherit-6.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/scaled/inherit-6.png rename to doc/src/graphics/png/scaled/inherit-6.png diff --git a/doc/src/cylc-user-guide/graphics/png/scaled/inherit-7.png b/doc/src/graphics/png/scaled/inherit-7.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/scaled/inherit-7.png rename to doc/src/graphics/png/scaled/inherit-7.png diff --git a/doc/src/cylc-user-guide/graphics/png/scaled/jinja2-ensemble-graph.png b/doc/src/graphics/png/scaled/jinja2-ensemble-graph.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/scaled/jinja2-ensemble-graph.png rename to doc/src/graphics/png/scaled/jinja2-ensemble-graph.png diff --git a/doc/src/cylc-user-guide/graphics/png/scaled/jinja2-suite-graph.png b/doc/src/graphics/png/scaled/jinja2-suite-graph.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/scaled/jinja2-suite-graph.png rename to doc/src/graphics/png/scaled/jinja2-suite-graph.png diff --git a/doc/src/cylc-user-guide/graphics/png/scaled/logo.png b/doc/src/graphics/png/scaled/logo.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/scaled/logo.png rename to doc/src/graphics/png/scaled/logo.png diff --git a/doc/src/cylc-user-guide/graphics/png/scaled/niwa-colour-small.png b/doc/src/graphics/png/scaled/niwa-colour-small.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/scaled/niwa-colour-small.png rename to doc/src/graphics/png/scaled/niwa-colour-small.png diff --git a/doc/src/cylc-user-guide/graphics/png/scaled/niwa-colour.png b/doc/src/graphics/png/scaled/niwa-colour.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/scaled/niwa-colour.png rename to doc/src/graphics/png/scaled/niwa-colour.png diff --git a/doc/src/cylc-user-guide/graphics/png/scaled/params1.png b/doc/src/graphics/png/scaled/params1.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/scaled/params1.png rename to doc/src/graphics/png/scaled/params1.png diff --git a/doc/src/cylc-user-guide/graphics/png/scaled/satellite.png b/doc/src/graphics/png/scaled/satellite.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/scaled/satellite.png rename to doc/src/graphics/png/scaled/satellite.png diff --git a/doc/src/cylc-user-guide/graphics/png/scaled/suicide.png b/doc/src/graphics/png/scaled/suicide.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/scaled/suicide.png rename to doc/src/graphics/png/scaled/suicide.png diff --git a/doc/src/cylc-user-guide/graphics/png/scaled/suite-log.png b/doc/src/graphics/png/scaled/suite-log.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/scaled/suite-log.png rename to doc/src/graphics/png/scaled/suite-log.png diff --git a/doc/src/cylc-user-guide/graphics/png/scaled/suite-output.png b/doc/src/graphics/png/scaled/suite-output.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/scaled/suite-output.png rename to doc/src/graphics/png/scaled/suite-output.png diff --git a/doc/src/cylc-user-guide/graphics/png/scaled/suiterc-jinja2.png b/doc/src/graphics/png/scaled/suiterc-jinja2.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/scaled/suiterc-jinja2.png rename to doc/src/graphics/png/scaled/suiterc-jinja2.png diff --git a/doc/src/cylc-user-guide/graphics/png/scaled/task-pool.png b/doc/src/graphics/png/scaled/task-pool.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/scaled/task-pool.png rename to doc/src/graphics/png/scaled/task-pool.png diff --git a/doc/src/cylc-user-guide/graphics/png/scaled/test1.png b/doc/src/graphics/png/scaled/test1.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/scaled/test1.png rename to doc/src/graphics/png/scaled/test1.png diff --git a/doc/src/cylc-user-guide/graphics/png/scaled/test2.png b/doc/src/graphics/png/scaled/test2.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/scaled/test2.png rename to doc/src/graphics/png/scaled/test2.png diff --git a/doc/src/cylc-user-guide/graphics/png/scaled/test4.png b/doc/src/graphics/png/scaled/test4.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/scaled/test4.png rename to doc/src/graphics/png/scaled/test4.png diff --git a/doc/src/cylc-user-guide/graphics/png/scaled/test5.png b/doc/src/graphics/png/scaled/test5.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/scaled/test5.png rename to doc/src/graphics/png/scaled/test5.png diff --git a/doc/src/cylc-user-guide/graphics/png/scaled/test6.png b/doc/src/graphics/png/scaled/test6.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/scaled/test6.png rename to doc/src/graphics/png/scaled/test6.png diff --git a/doc/src/cylc-user-guide/graphics/png/scaled/timeline-one-a.png b/doc/src/graphics/png/scaled/timeline-one-a.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/scaled/timeline-one-a.png rename to doc/src/graphics/png/scaled/timeline-one-a.png diff --git a/doc/src/cylc-user-guide/graphics/png/scaled/timeline-one-c.png b/doc/src/graphics/png/scaled/timeline-one-c.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/scaled/timeline-one-c.png rename to doc/src/graphics/png/scaled/timeline-one-c.png diff --git a/doc/src/cylc-user-guide/graphics/png/scaled/timeline-one.png b/doc/src/graphics/png/scaled/timeline-one.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/scaled/timeline-one.png rename to doc/src/graphics/png/scaled/timeline-one.png diff --git a/doc/src/cylc-user-guide/graphics/png/scaled/timeline-three.png b/doc/src/graphics/png/scaled/timeline-three.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/scaled/timeline-three.png rename to doc/src/graphics/png/scaled/timeline-three.png diff --git a/doc/src/cylc-user-guide/graphics/png/scaled/timeline-two-cycles-optimal.png b/doc/src/graphics/png/scaled/timeline-two-cycles-optimal.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/scaled/timeline-two-cycles-optimal.png rename to doc/src/graphics/png/scaled/timeline-two-cycles-optimal.png diff --git a/doc/src/cylc-user-guide/graphics/png/scaled/timeline-two.png b/doc/src/graphics/png/scaled/timeline-two.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/scaled/timeline-two.png rename to doc/src/graphics/png/scaled/timeline-two.png diff --git a/doc/src/cylc-user-guide/graphics/png/scaled/timeline-zero.png b/doc/src/graphics/png/scaled/timeline-zero.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/scaled/timeline-zero.png rename to doc/src/graphics/png/scaled/timeline-zero.png diff --git a/doc/src/cylc-user-guide/graphics/png/scaled/tut-cyc-int.png b/doc/src/graphics/png/scaled/tut-cyc-int.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/scaled/tut-cyc-int.png rename to doc/src/graphics/png/scaled/tut-cyc-int.png diff --git a/doc/src/cylc-user-guide/graphics/png/scaled/tut-four.png b/doc/src/graphics/png/scaled/tut-four.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/scaled/tut-four.png rename to doc/src/graphics/png/scaled/tut-four.png diff --git a/doc/src/cylc-user-guide/graphics/png/scaled/tut-hello-multi-1.png b/doc/src/graphics/png/scaled/tut-hello-multi-1.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/scaled/tut-hello-multi-1.png rename to doc/src/graphics/png/scaled/tut-hello-multi-1.png diff --git a/doc/src/cylc-user-guide/graphics/png/scaled/tut-hello-multi-2.png b/doc/src/graphics/png/scaled/tut-hello-multi-2.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/scaled/tut-hello-multi-2.png rename to doc/src/graphics/png/scaled/tut-hello-multi-2.png diff --git a/doc/src/cylc-user-guide/graphics/png/scaled/tut-hello-multi-3.png b/doc/src/graphics/png/scaled/tut-hello-multi-3.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/scaled/tut-hello-multi-3.png rename to doc/src/graphics/png/scaled/tut-hello-multi-3.png diff --git a/doc/src/cylc-user-guide/graphics/png/scaled/tut-one.png b/doc/src/graphics/png/scaled/tut-one.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/scaled/tut-one.png rename to doc/src/graphics/png/scaled/tut-one.png diff --git a/doc/src/cylc-user-guide/graphics/png/scaled/tut-three.png b/doc/src/graphics/png/scaled/tut-three.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/scaled/tut-three.png rename to doc/src/graphics/png/scaled/tut-three.png diff --git a/doc/src/cylc-user-guide/graphics/png/scaled/tut-two.png b/doc/src/graphics/png/scaled/tut-two.png similarity index 100% rename from doc/src/cylc-user-guide/graphics/png/scaled/tut-two.png rename to doc/src/graphics/png/scaled/tut-two.png diff --git a/doc/src/cylc-user-guide/graphics/scale-images.sh b/doc/src/graphics/scale-images.sh similarity index 95% rename from doc/src/cylc-user-guide/graphics/scale-images.sh rename to doc/src/graphics/scale-images.sh index 6b25ed832cc..a4d9fdbb036 100755 --- a/doc/src/cylc-user-guide/graphics/scale-images.sh +++ b/doc/src/graphics/scale-images.sh @@ -1,7 +1,7 @@ #!/bin/bash # THIS FILE IS PART OF THE CYLC SUITE ENGINE. -# Copyright (C) 2008-2018 NIWA & British Crown (Met Office) & Contributors. +# Copyright (C) 2008-2019 NIWA & British Crown (Met Office) & Contributors. # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by diff --git a/doc/src/graphics/sdg-images/failure-recovery.png b/doc/src/graphics/sdg-images/failure-recovery.png new file mode 100644 index 00000000000..625a11bd3c4 Binary files /dev/null and b/doc/src/graphics/sdg-images/failure-recovery.png differ diff --git a/doc/src/graphics/sdg-images/fam-to-fam-1.png b/doc/src/graphics/sdg-images/fam-to-fam-1.png new file mode 100644 index 00000000000..e263198d12a Binary files /dev/null and b/doc/src/graphics/sdg-images/fam-to-fam-1.png differ diff --git a/doc/src/graphics/sdg-images/fam-to-fam-2.png b/doc/src/graphics/sdg-images/fam-to-fam-2.png new file mode 100644 index 00000000000..a0c2b97b3bc Binary files /dev/null and b/doc/src/graphics/sdg-images/fam-to-fam-2.png differ diff --git a/doc/src/graphics/sdg-images/param-1.png b/doc/src/graphics/sdg-images/param-1.png new file mode 100644 index 00000000000..6f839b0bd28 Binary files /dev/null and b/doc/src/graphics/sdg-images/param-1.png differ diff --git a/doc/src/graphics/sdg-images/param-2.png b/doc/src/graphics/sdg-images/param-2.png new file mode 100644 index 00000000000..5c5245176f4 Binary files /dev/null and b/doc/src/graphics/sdg-images/param-2.png differ diff --git a/doc/src/graphics/sdg-images/rose-logo.png b/doc/src/graphics/sdg-images/rose-logo.png new file mode 100644 index 00000000000..5007efebe9c Binary files /dev/null and b/doc/src/graphics/sdg-images/rose-logo.png differ diff --git a/doc/src/cylc-user-guide/graphics/vector/README.txt b/doc/src/graphics/vector/README.txt similarity index 100% rename from doc/src/cylc-user-guide/graphics/vector/README.txt rename to doc/src/graphics/vector/README.txt diff --git a/doc/src/cylc-user-guide/graphics/vector/eps/dep-multi-cycle.eps b/doc/src/graphics/vector/eps/dep-multi-cycle.eps similarity index 100% rename from doc/src/cylc-user-guide/graphics/vector/eps/dep-multi-cycle.eps rename to doc/src/graphics/vector/eps/dep-multi-cycle.eps diff --git a/doc/src/cylc-user-guide/graphics/vector/eps/dep-one-cycle.eps b/doc/src/graphics/vector/eps/dep-one-cycle.eps similarity index 100% rename from doc/src/cylc-user-guide/graphics/vector/eps/dep-one-cycle.eps rename to doc/src/graphics/vector/eps/dep-one-cycle.eps diff --git a/doc/src/cylc-user-guide/graphics/vector/eps/dep-two-cycles-linked.eps b/doc/src/graphics/vector/eps/dep-two-cycles-linked.eps similarity index 100% rename from doc/src/cylc-user-guide/graphics/vector/eps/dep-two-cycles-linked.eps rename to doc/src/graphics/vector/eps/dep-two-cycles-linked.eps diff --git a/doc/src/cylc-user-guide/graphics/vector/eps/dep-two-cycles.eps b/doc/src/graphics/vector/eps/dep-two-cycles.eps similarity index 100% rename from doc/src/cylc-user-guide/graphics/vector/eps/dep-two-cycles.eps rename to doc/src/graphics/vector/eps/dep-two-cycles.eps diff --git a/doc/src/cylc-user-guide/graphics/vector/eps/task-pool.eps b/doc/src/graphics/vector/eps/task-pool.eps similarity index 100% rename from doc/src/cylc-user-guide/graphics/vector/eps/task-pool.eps rename to doc/src/graphics/vector/eps/task-pool.eps diff --git a/doc/src/cylc-user-guide/graphics/vector/eps/timeline-one-a.eps b/doc/src/graphics/vector/eps/timeline-one-a.eps similarity index 100% rename from doc/src/cylc-user-guide/graphics/vector/eps/timeline-one-a.eps rename to doc/src/graphics/vector/eps/timeline-one-a.eps diff --git a/doc/src/cylc-user-guide/graphics/vector/eps/timeline-one-c.eps b/doc/src/graphics/vector/eps/timeline-one-c.eps similarity index 100% rename from doc/src/cylc-user-guide/graphics/vector/eps/timeline-one-c.eps rename to doc/src/graphics/vector/eps/timeline-one-c.eps diff --git a/doc/src/cylc-user-guide/graphics/vector/eps/timeline-one.eps b/doc/src/graphics/vector/eps/timeline-one.eps similarity index 100% rename from doc/src/cylc-user-guide/graphics/vector/eps/timeline-one.eps rename to doc/src/graphics/vector/eps/timeline-one.eps diff --git a/doc/src/cylc-user-guide/graphics/vector/eps/timeline-three.eps b/doc/src/graphics/vector/eps/timeline-three.eps similarity index 100% rename from doc/src/cylc-user-guide/graphics/vector/eps/timeline-three.eps rename to doc/src/graphics/vector/eps/timeline-three.eps diff --git a/doc/src/cylc-user-guide/graphics/vector/eps/timeline-two-cycles-optimal.eps b/doc/src/graphics/vector/eps/timeline-two-cycles-optimal.eps similarity index 100% rename from doc/src/cylc-user-guide/graphics/vector/eps/timeline-two-cycles-optimal.eps rename to doc/src/graphics/vector/eps/timeline-two-cycles-optimal.eps diff --git a/doc/src/cylc-user-guide/graphics/vector/eps/timeline-two.eps b/doc/src/graphics/vector/eps/timeline-two.eps similarity index 100% rename from doc/src/cylc-user-guide/graphics/vector/eps/timeline-two.eps rename to doc/src/graphics/vector/eps/timeline-two.eps diff --git a/doc/src/cylc-user-guide/graphics/vector/eps/timeline-zero.eps b/doc/src/graphics/vector/eps/timeline-zero.eps similarity index 100% rename from doc/src/cylc-user-guide/graphics/vector/eps/timeline-zero.eps rename to doc/src/graphics/vector/eps/timeline-zero.eps diff --git a/doc/src/cylc-user-guide/graphics/vector/svg/dep-multi-cycle.svg b/doc/src/graphics/vector/svg/dep-multi-cycle.svg similarity index 100% rename from doc/src/cylc-user-guide/graphics/vector/svg/dep-multi-cycle.svg rename to doc/src/graphics/vector/svg/dep-multi-cycle.svg diff --git a/doc/src/cylc-user-guide/graphics/vector/svg/dep-one-cycle.svg b/doc/src/graphics/vector/svg/dep-one-cycle.svg similarity index 100% rename from doc/src/cylc-user-guide/graphics/vector/svg/dep-one-cycle.svg rename to doc/src/graphics/vector/svg/dep-one-cycle.svg diff --git a/doc/src/cylc-user-guide/graphics/vector/svg/dep-two-cycles-linked.svg b/doc/src/graphics/vector/svg/dep-two-cycles-linked.svg similarity index 100% rename from doc/src/cylc-user-guide/graphics/vector/svg/dep-two-cycles-linked.svg rename to doc/src/graphics/vector/svg/dep-two-cycles-linked.svg diff --git a/doc/src/cylc-user-guide/graphics/vector/svg/dep-two-cycles.svg b/doc/src/graphics/vector/svg/dep-two-cycles.svg similarity index 100% rename from doc/src/cylc-user-guide/graphics/vector/svg/dep-two-cycles.svg rename to doc/src/graphics/vector/svg/dep-two-cycles.svg diff --git a/doc/src/cylc-user-guide/graphics/vector/svg/task-pool.svg b/doc/src/graphics/vector/svg/task-pool.svg similarity index 100% rename from doc/src/cylc-user-guide/graphics/vector/svg/task-pool.svg rename to doc/src/graphics/vector/svg/task-pool.svg diff --git a/doc/src/cylc-user-guide/graphics/vector/svg/timeline-one-a.svg b/doc/src/graphics/vector/svg/timeline-one-a.svg similarity index 100% rename from doc/src/cylc-user-guide/graphics/vector/svg/timeline-one-a.svg rename to doc/src/graphics/vector/svg/timeline-one-a.svg diff --git a/doc/src/cylc-user-guide/graphics/vector/svg/timeline-one-c.svg b/doc/src/graphics/vector/svg/timeline-one-c.svg similarity index 100% rename from doc/src/cylc-user-guide/graphics/vector/svg/timeline-one-c.svg rename to doc/src/graphics/vector/svg/timeline-one-c.svg diff --git a/doc/src/cylc-user-guide/graphics/vector/svg/timeline-one.svg b/doc/src/graphics/vector/svg/timeline-one.svg similarity index 100% rename from doc/src/cylc-user-guide/graphics/vector/svg/timeline-one.svg rename to doc/src/graphics/vector/svg/timeline-one.svg diff --git a/doc/src/cylc-user-guide/graphics/vector/svg/timeline-three-0.svg b/doc/src/graphics/vector/svg/timeline-three-0.svg similarity index 100% rename from doc/src/cylc-user-guide/graphics/vector/svg/timeline-three-0.svg rename to doc/src/graphics/vector/svg/timeline-three-0.svg diff --git a/doc/src/cylc-user-guide/graphics/vector/svg/timeline-three.svg b/doc/src/graphics/vector/svg/timeline-three.svg similarity index 100% rename from doc/src/cylc-user-guide/graphics/vector/svg/timeline-three.svg rename to doc/src/graphics/vector/svg/timeline-three.svg diff --git a/doc/src/cylc-user-guide/graphics/vector/svg/timeline-two-cycles-optimal.svg b/doc/src/graphics/vector/svg/timeline-two-cycles-optimal.svg similarity index 100% rename from doc/src/cylc-user-guide/graphics/vector/svg/timeline-two-cycles-optimal.svg rename to doc/src/graphics/vector/svg/timeline-two-cycles-optimal.svg diff --git a/doc/src/cylc-user-guide/graphics/vector/svg/timeline-two.svg b/doc/src/graphics/vector/svg/timeline-two.svg similarity index 100% rename from doc/src/cylc-user-guide/graphics/vector/svg/timeline-two.svg rename to doc/src/graphics/vector/svg/timeline-two.svg diff --git a/doc/src/cylc-user-guide/graphics/vector/svg/timeline-zero.svg b/doc/src/graphics/vector/svg/timeline-zero.svg similarity index 100% rename from doc/src/cylc-user-guide/graphics/vector/svg/timeline-zero.svg rename to doc/src/graphics/vector/svg/timeline-zero.svg diff --git a/doc/src/index.rst b/doc/src/index.rst new file mode 100644 index 00000000000..960362885cc --- /dev/null +++ b/doc/src/index.rst @@ -0,0 +1,45 @@ +.. cylc documentation master file. + +Cylc documentation +================== + +**The Cylc Suite Engine** + +Current release: |release| + +*Released Under the GNU GPL v3.0 Software License* + +*Copyright (C) 2008-2019 NIWA & British Crown (Met Office) & Contributors.* + +----------- + +Cylc ("*silk*") is a workflow engine for cycling systems - it orchestrates +distributed suites of interdependent cycling tasks that may continue to run +indefinitely. + +----------- + +**Table of Contents**: + +.. toctree:: + :maxdepth: 3 + :numbered: + + introduction + screenshots + installation + terminology + workflows + global-site-user-conf + tutorial + suite-name-reg + suite-config + task-implementation + task-job-submission + external-triggers + running-suites + suite-storage-etc + + appendices/appendices-master + + suite-design-guide/suite-design-guide-master diff --git a/doc/src/installation.rst b/doc/src/installation.rst new file mode 100644 index 00000000000..966417297ca --- /dev/null +++ b/doc/src/installation.rst @@ -0,0 +1,279 @@ +.. _Requirements: + +Installation +============ + +Cylc runs on Linux. It is tested quite thoroughly on modern RHEL and Ubuntu +distros. Some users have also managed to make it work on other Unix variants +including Apple OS X, but they are not officially tested and supported. + +Third-Party Software Packages +----------------------------- + +**Python 2** ``>=`` **2.6** is required. **Python 2** ``>=`` **2.7.9** is +recommended for the best security. `Python `_ 2 should +already be installed in your Linux system. + +For Cylc's HTTPS communications layer: + +- `OpenSSL `_ +- `pyOpenSSL `_ +- `python-requests `_ +- **python-urllib3** - should be bundled with python-requests + +The following packages are highly recommended, but are technically optional as +you can construct and run suites without dependency graph visualisation or +the Cylc GUIs: + +- `PyGTK `_ - GUI toolkit. + + .. note:: + + PyGTK typically comes with your system Python. It is allegedly quite + difficult to install if you need to do so for another Python version. + +- `Graphviz `_ - graph layout engine (tested 2.36.0) +- `Pygraphviz `_ - Python Graphviz interface + (tested 1.2). To build this you may need some *devel* packages too: + + - python-devel + - graphviz-devel + +The Cylc Review service does not need any additional packages. + +The following packages are necessary for running all the tests in Cylc: + +- `mock `_ + +To generate the HTML User Guide, you will need: + +- `Sphinx `_ of compatible version, + ``>=`` **1.5.3** and ``<=`` **1.7.9**. + +In most modern Linux distributions all of the software above can be installed +via the system package manager. Otherwise download packages manually and follow +their native installation instructions. To check that all packages +are installed properly: + +.. code-block:: none + + $ cylc check-software + Checking your software... + + Individual results: + =============================================================================== + Package (version requirements) Outcome (version found) + =============================================================================== + *REQUIRED SOFTWARE* + Python (2.6+, <3).....................FOUND & min. version MET (2.7.12.final.0) + + *OPTIONAL SOFTWARE for the GUI & dependency graph visualisation* + Python:pygraphviz (any)...........................................NOT FOUND (-) + graphviz (any)...................................................FOUND (2.26.0) + Python:pygtk (2.0+)...............................................NOT FOUND (-) + + *OPTIONAL SOFTWARE for the HTTPS communications layer* + Python:requests (2.4.2+)......................FOUND & min. version MET (2.11.1) + Python:urllib3 (any)..............................................NOT FOUND (-) + Python:OpenSSL (any)..............................................NOT FOUND (-) + + *OPTIONAL SOFTWARE for the configuration templating* + Python:EmPy (any).................................................NOT FOUND (-) + + *OPTIONAL SOFTWARE for the HTML documentation* + Python:sphinx (1.5.3+).........................FOUND & min. version MET (1.7.0) + =============================================================================== + + Summary: + **************************** + Core requirements: ok + Full-functionality: not ok + **************************** + +If errors are reported then the packages concerned are either not installed or +not in your Python search path. + +.. note:: + + ``cylc check-software`` has become quite trivial as we've removed or + bundled some former dependencies, but in future we intend to make it + print a comprehensive list of library versions etc. to include in with + bug reports. + +To check for specific packages only, supply these as arguments to the +``check-software`` command, either in the form used in the output of +the bare command, without any parent package prefix and colon, or +alternatively all in lower-case, should the given form contain capitals. For +example: + +.. code-block:: bash + + $ cylc check-software graphviz Python urllib3 + +With arguments, check-software provides an exit status indicating a +collective pass (zero) or a failure of that number of packages to satisfy +the requirements (non-zero integer). + +Software Bundled With Cylc +-------------------------- + +Cylc bundles several third party packages which do not need to be installed +separately. + +- `cherrypy `_ **6.0.2** (slightly modified): a pure + Python HTTP framework that we use as a web server for communication between + server processes (suite server programs) and client programs (running tasks, + GUIs, CLI commands). + + - Client communication is via the Python + `requests `_ library if available + (recommended) or else pure Python via **urllib2**. + +- `Jinja2 `_ **2.10**: a full featured template + engine for Python, and its dependency + `MarkupSafe `_ **0.23**; both + BSD licensed. + +- the `xdot `_ graph viewer (modified), + LGPL licensed. + + +.. _InstallCylc: + +Installing Cylc +--------------- + +Cylc releases can be downloaded from `GitHub `_. + +The wrapper script ``usr/bin/cylc`` should be installed to +the system executable search path (e.g. ``/usr/local/bin/``) and +modified slightly to point to a location such as ``/opt`` where +successive Cylc releases will be unpacked side by side. + +To install Cylc, unpack the release tarball in the right location, e.g. +``/opt/cylc-7.7.0``, type ``make`` inside the release +directory, and set site defaults - if necessary - in a site global config file +(below). + +Make a symbolic link from ``cylc`` to the latest installed version: +``ln -s /opt/cylc-7.7.0 /opt/cylc``. This will be invoked by the +central wrapper if a specific version is not requested. Otherwise, the +wrapper will attempt to invoke the Cylc version specified in +``$CYLC_VERSION``, e.g. ``CYLC_VERSION=7.7.0``. This variable +is automatically set in task job scripts to ensure that jobs use the same Cylc +version as their parent suite server program. It can also be set by users, +manually or in login scripts, to fix the Cylc version in their environment. + +Installing subsequent releases is just a matter of unpacking the new tarballs +next to the previous releases, running ``make`` in them, and copying +in (possibly with modifications) the previous site global config file. + + +.. _LocalInstall: + +Local User Installation +^^^^^^^^^^^^^^^^^^^^^^^ + +It is easy to install Cylc under your own user account if you don't have +root or sudo access to the system: just put the central Cylc wrapper in +``$HOME/bin/`` (making sure that is in your ``$PATH``) and +modify it to point to a directory such as ``$HOME/cylc/`` where you +will unpack and install release tarballs. Local installation of third party +dependencies like Graphviz is also possible, but that depends on the particular +installation methods used and is outside of the scope of this document. + +Create A Site Config File +^^^^^^^^^^^^^^^^^^^^^^^^^ + +Site and user global config files define some important parameters that affect +all suites, some of which may need to be customized for your site. +See :ref:`SiteAndUserConfiguration` for how to generate an initial site file and +where to install it. All legal site and user global config items are defined +in :ref:`SiteRCReference`. + + +.. _Configure Site Environment on Job Hosts: + +Configure Site Environment on Job Hosts +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +If your users submit task jobs to hosts other than the hosts they use to run +their suites, you should ensure that the job hosts have the correct environment +for running cylc. A cylc suite generates task job scripts that normally invoke +``bash -l``, i.e. it will invoke bash as a login shell to run the job +script. Users and sites should ensure that their bash login profiles are able +to set up the correct environment for running cylc and their task jobs. + +Your site administrator may customise the environment for all task jobs by +adding a ``/etc/job-init-env.sh`` file and populate it with the +appropriate contents. If customisation is still required, you can add your own +``${HOME}/.cylc/job-init-env.sh`` file and populate it with the +appropriate contents. + +- ``${HOME}/.cylc/job-init-env.sh`` +- ``/etc/job-init-env.sh`` + +The job will attempt to source the first of these files it finds to set up its +environment. + + +.. _ConfiguringCylcReviewApache: + +Configuring Cylc Review Under Apache +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The Cylc Review web service displays suite job logs and other information in +web pages - see :ref:`ViewingSuiteLogsCylcReview` and +:numref:`fig-review-screenshot`. It can run under a WSGI server (e.g. +Apache with ``mod_wsgi``) as a service for all users, or as an ad hoc +service under your own user account. + +To run Cylc Review under Apache, install ``mod_wsgi`` and configure it +as follows, with paths modified appropriately: + +.. code-block:: apacheconf + + # Apache mod_wsgi config file, e.g.: + # Red Hat Linux: /etc/httpd/conf.d/cylc-wsgi.conf + # Ubuntu Linux: /etc/apache2/mods-available/wsgi.conf + # E.g. for /opt/cylc-7.8.1/ + WSGIPythonPath /opt/cylc-7.8.1/lib + WSGIScriptAlias /cylc-review /opt/cylc-7.8.1/bin/cylc-review + +(Note the ``WSGIScriptAlias`` determines the service URL under the +server root). + +And allow Apache access to the Cylc library: + +.. code-block:: apacheconf + + # Directory access, in main Apache config file, e.g.: + # Red Hat Linux: /etc/httpd/conf/httpd.conf + # Ubuntu Linux: /etc/apache2/apache2.conf + # E.g. for /opt/cylc-7.8.1/ + + AllowOverride None + Require all granted + + +The host running the Cylc Review web service, and the service itself (or the +user that it runs as) must be able to view the ``~/cylc-run`` directory +of all Cylc users. + +Use the web server log, e.g. ``/var/log/httpd/`` or ``/var/log/apache2/``, to +debug problems. + + +.. _RTAST: + +Automated Tests +^^^^^^^^^^^^^^^ + +The cylc test battery is primarily intended for developers to check that +changes to the source code don't break existing functionality. + +.. note:: + + Some test failures can be expected to result from suites timing out, + even if nothing is wrong, if you run too many tests in parallel. See + ``cylc test-battery --help``. diff --git a/doc/src/introduction.rst b/doc/src/introduction.rst new file mode 100644 index 00000000000..340bb225d48 --- /dev/null +++ b/doc/src/introduction.rst @@ -0,0 +1,340 @@ +.. _HowCylcWorks: + +Introduction: How Cylc Works +============================ + + +.. _SchedulingForecastSuites: + +Scheduling Forecast Suites +-------------------------- + +Environmental forecasting suites generate forecast products from a +potentially large group of interdependent scientific models and +associated data processing tasks. They are constrained by availability +of external driving data: typically one or more tasks will wait on real +time observations and/or model data from an external system, and these +will drive other downstream tasks, and so on. The dependency diagram for +a single forecast cycle point in such a system is a *Directed Acyclic Graph* +as shown in :numref:`fig-dep-one` (in our terminology, a +*forecast cycle point* is comprised of all tasks with a common *cycle point*, +which is the nominal analysis time or start time of the forecast +models in the group). In real time operation processing will consist of +a series of distinct forecast cycle points that are each initiated, after a +gap, by arrival of the new cycle point's external driving data. + +From a job scheduling perspective task execution order in such a system +must be carefully controlled in order to avoid dependency violations. +Ideally, each task should be queued for execution at the instant its +last prerequisite is satisfied; this is the best that can be done even +if queued tasks are not able to execute immediately because of resource +contention. + + +.. _EcoConnect: + +EcoConnect +---------- + +Cylc was developed for the EcoConnect Forecasting System at NIWA +(National Institute of Water and Atmospheric Research, New Zealand). +EcoConnect takes real time atmospheric and stream flow observations, and +operational global weather forecasts from the Met Office (UK), and uses +these to drive global sea state and regional data assimilating weather +models, which in turn drive regional sea state, storm surge, and +catchment river models, plus tide prediction, and a large number of +associated data collection, quality control, preprocessing, +post-processing, product generation, and archiving tasks [1]_. +The global sea state forecast runs once daily. The regional +weather forecast runs four times daily but +it supplies surface winds and pressure to several downstream models that +run only twice daily, and precipitation accumulations to catchment river +models that run on an hourly cycle assimilating real time stream flow +observations and using the most recently available regional weather +forecast. EcoConnect runs on heterogeneous distributed hardware, +including a massively parallel supercomputer and several Linux servers. + + +Dependence Between Tasks +------------------------ + + +.. _IntracycleDependence: + +Intra-cycle Dependence +^^^^^^^^^^^^^^^^^^^^^^ + + +Most dependence between tasks applies within a single forecast cycle +point. :numref:`fig-dep-one` shows the dependency diagram for a single +forecast cycle point of a simple example suite of three forecast models +(*a*, *b*, and *c*) and three post processing or product generation +tasks (*d*, *e* and *f*). A scheduler capable of handling this +must manage, within a single forecast cycle point, multiple parallel +streams of execution that branch when one task generates output for +several downstream tasks, and merge when one task takes input from several +upstream tasks. + +.. _fig-dep-one: + +.. figure:: graphics/png/orig/dep-one-cycle.png + :align: center + + A single cycle point dependency graph for a simple suite. + The dependency graph for a single forecast cycle point of a simple + example suite. Tasks *a*, *b*, and *c* represent forecast models, + *d*, *e* and *f* are post processing or product generation + tasks, and *x* represents external data that the upstream + forecast model depends on. + +.. _fig-time-one: + +.. figure:: graphics/png/orig/timeline-one.png + :align: center + + A single cycle point job schedule for real time operation. + The optimal job schedule for two consecutive cycle points of our + example suite during real time operation, assuming that all tasks + trigger off upstream tasks finishing completely. The horizontal + extent of a task bar represents its execution time, and the vertical + blue lines show when the external driving data becomes available. + +:numref:`fig-time-one` shows the optimal job schedule for two +consecutive cycle points of the example suite in real time operation, given +execution times represented by the horizontal extent of the task bars. +There is a time gap between cycle points as the suite waits on new external +driving data. Each task in the example suite happens to trigger off +upstream tasks *finishing*, rather than off any intermediate output +or event; this is merely a simplification that makes for clearer +diagrams. + +.. _fig-dep-two-linked: + +.. figure:: graphics/png/orig/dep-two-cycles-linked.png + :align: center + + What if the external driving data is available early? If the external + driving data is available in advance, can we start running the next cycle + point early? + +.. _fig-overlap: + +.. figure:: graphics/png/orig/timeline-one-c.png + :align: center + + Attempted overlap of consecutive single-cycle-point job + schedules. A naive attempt to overlap two consecutive cycle + points using the single-cycle-point dependency graph. The red shaded + tasks will fail because of dependency violations (or will not be able to + run because of upstream dependency violations). + +.. _fig-job-no-overlap: + +.. figure:: graphics/png/orig/timeline-one-a.png + :align: center + + The only safe multi-cycle-point job schedule? The best that can be done + *in general* when inter-cycle dependence is ignored. + +Now the question arises, what happens if the external driving data for +upcoming cycle points is available in advance, as it would be after a +significant delay in operations, or when running a historical case +study? While the forecast model *a* appears to depend only on the +external data *x* at this stage of the discussion, in fact it would +typically also depend on its own previous instance for the model +*background state* used in initializing the new forecast. Thus, as +alluded to in :numref:`fig-dep-two-linked`, task *a* could in principle +start as soon as its predecessor has finished. :numref:`fig-overlap` +shows, however, that starting a whole new cycle point at this point is +dangerous - it results in dependency violations in half of the tasks in +the example suite. In fact the situation could be even worse than this +- imagine that task *b* in the first cycle point is delayed for some +reason *after* the second cycle point has been launched. Clearly we must +consider handling inter-cycle dependence explicitly or else agree not to +start the next cycle point early, as is illustrated in +:numref:`fig-job-no-overlap`. + + +.. _InterCyclePointDependence: + +Inter-Cycle Dependence +^^^^^^^^^^^^^^^^^^^^^^ + +Forecast models typically depend on their own most recent previous +forecast for background state or restart files of some kind (this is +called *warm cycling*) but there can also be inter-cycle dependence +between different tasks. In an atmospheric forecast analysis suite, for +instance, the weather model may generate background states for observation +processing and data-assimilation tasks in the next cycle point as well as for +the next forecast model run. In real time operation inter-cycle +dependence can be ignored because it is automatically satisfied when one cycle +point finishes before the next begins. If it is not ignored it drastically +complicates the dependency graph by blurring the clean boundary between +cycle points. :numref:`fig-dep-multi` illustrates the problem for our +simple example suite assuming minimal inter-cycle dependence: the warm +cycled models (*a*, *b*, and *c*) each depend on their own previous instances. + +For this reason, and because we tend to see forecasting suites in terms of +their real time characteristics, other metaschedulers have ignored +inter-cycle dependence and are thus restricted to running entire cycle +points in sequence at all times. This does not affect normal real time +operation but it can be a serious impediment when advance availability of +external driving data makes it possible, in principle, to run some tasks from +upcoming cycle points before the current cycle point is finished - as was +suggested at the end of the previous section. This can occur, for instance, +after operational delays (late arrival of external data, system maintenance, +etc.) and to an even greater extent in historical case studies and parallel +test suites started behind a real time operation. It can be a serious problem +for suites that have little downtime between forecast cycle points and +therefore take many cycle points to catch up after a delay. Without taking +account of inter-cycle dependence, the best that can be done, in +general, is to reduce the gap between cycle points to zero as shown in +:numref:`fig-job-no-overlap`. A limited crude overlap of the single +cycle point job schedule may be possible for specific task sets but the +allowable overlap may change if new tasks are added, and it is still dangerous: +it amounts to running different parts of a dependent system as if they were not +dependent and as such it cannot be guaranteed that some unforeseen delay in +one cycle point, after the next cycle point has begun, (e.g. due to resource +contention or task failures) won't result in dependency violations. + +.. _fig-dep-multi: + +.. figure:: graphics/png/orig/dep-multi-cycle.png + :align: center + + The complete multi-cycle-point dependency graph. + The complete dependency graph for the example suite, assuming + the least possible inter-cycle dependence: the forecast models (*a*, + *b*, and *c*) depend on their own previous instances. The dashed arrows + show connections to previous and subsequent forecast cycle points. + +.. _fig-optimal-two: + +.. figure:: graphics/png/orig/timeline-two-cycles-optimal.png + :align: center + + The optimal two-cycle-point job schedule. The optimal two cycle job + schedule when the next cycle's driving data is available in + advance, possible in principle when inter-cycle dependence is + handled explicitly. + +:numref:`fig-optimal-two` shows, in contrast to +:numref:`fig-overlap`, the optimal two cycle point job schedule +obtained by respecting all inter-cycle dependence. This assumes no delays due +to resource contention or otherwise - i.e. every task runs +as soon as it is ready to run. The scheduler running +this suite must be able to adapt dynamically to external conditions +that impact on multi-cycle-point scheduling in the presence of +inter-cycle dependence or else, again, risk bringing the system down +with dependency violations. + +.. _fig-time-three: + +.. figure:: graphics/png/orig/timeline-three.png + :align: center + + Comparison of job schedules after a delay. Job + schedules for the example suite after a delay of almost one whole + forecast cycle point, when inter-cycle dependence is + taken into account (above the time axis), and when it is not + (below the time axis). The colored lines indicate the time that + each cycle point is delayed, and normal "caught up" cycle points + are shaded gray. + +.. _fig-time-two: + +.. figure:: graphics/png/orig/timeline-two.png + :align: center + + Optimal job schedule when all external data is + available. Job schedules for the example suite in case study + mode, or after a long delay, when the external driving data are + available many cycle points in advance. Above the time axis is the optimal + schedule obtained when the suite is constrained only by its true + dependencies, as in :numref:`fig-dep-two-linked`, and underneath + is the best that can be done, in general, when inter-cycle + dependence is ignored. + +To further illustrate the potential benefits of proper inter-cycle +dependency handling, :numref:`fig-time-three` shows an operational +delay of almost one whole cycle point in a suite with little downtime between +cycle points. Above the time axis is the optimal schedule that is possible in +principle when inter-cycle dependence is taken into account, and below +it is the only safe schedule possible *in general* when it is ignored. +In the former case, even the cycle point immediately after the delay is hardly +affected, and subsequent cycle points are all on time, whilst in the latter +case it takes five full cycle points to catch up to normal real time +operation [2]_. + +Similarly, :numref:`fig-time-two` shows example suite job schedules +for an historical case study, or when catching up after a very long +delay; i.e. when the external driving data are available many cycle +points in advance. Task *a*, which as the most upstream forecast +model is likely to be a resource intensive atmosphere or ocean model, +has no upstream dependence on co-temporal tasks and can therefore run +continuously, regardless of how much downstream processing is yet to be +completed in its own, or any previous, forecast cycle point (actually, +task *a* does depend on co-temporal task *x* which waits on the +external driving data, but that returns immediately when the data is +available in advance, so the result stands). The other forecast models +can also cycle continuously or with a short gap between, and some +post processing tasks, which have no previous-instance dependence, can +run continuously or even overlap (e.g. *e* in this case). Thus, +even for this very simple example suite, tasks from three or four +different cycle points can in principle run simultaneously at any given time. + +In fact, if our tasks are able to trigger off internal outputs of +upstream tasks (message triggers) rather than waiting on full completion, +then successive instances of the forecast models could overlap as well +(because model restart outputs are generally completed early in the forecast) +for an even more efficient job schedule [3]_. + + +.. _TheCylcSchedulingAlgorithm: + +The Cylc Scheduling Algorithm +----------------------------- + +.. _fig-task-pool: + +.. figure:: graphics/png/orig/task-pool.png + :align: center + + The cylc task pool. How cylc sees a suite, in contrast to the + multi-cycle-point dependency graph of :numref:`fig-dep-multi`. + Task colors represent different cycle points, and the small squares + and circles represent different prerequisites and outputs. A task + can run when its prerequisites are satisfied by the outputs + of other tasks in the pool. + +Cylc manages a pool of proxy objects that represent the real tasks in a +suite. Task proxies know how to run the real tasks that they represent, +and they receive progress messages from the tasks as they run (usually +reports of completed outputs). There is no global cycling mechanism to +advance the suite; instead individual task proxies have their own +private cycle point and spawn their own successors when the time is +right. Task proxies are self-contained - they know their own +prerequisites and outputs but are not aware of the wider suite. +Inter-cycle dependence is not treated as special, and the task pool can +be populated with tasks with many different cycle points. The task pool +is illustrated in :numref:`fig-task-pool`. *Whenever any task +changes state due to completion of an output, every task checks to see +if its own prerequisites have been satisfied* [4]_. +In effect, cylc gets a pool of tasks to self-organize by negotiating +their own dependencies so that optimal scheduling, as described in the +previous section, emerges naturally at run time. + + +.. [1] Future plans for EcoConnect include additional deterministic regional + weather forecasts and a statistical ensemble. +.. [2] Note that simply overlapping the single cycle point schedules of + :numref:`fig-time-one` from the same start point would have + resulted in dependency violation by task *c*. +.. [3] Finally, we note again that a good job scheduler should be able to + dynamically adapt to delays in any part of the suite due to resource + contention, varying run times, or anything else that will inevitably + modify the depicted job schedules. +.. [4] In fact this dependency negotiation goes through a broker + object (rather than every task literally checking every other task) + which scales as *n* (rather than *n*:sup:`2`) where *n* is the number + of task proxies in the pool. diff --git a/doc/src/make-index.sh b/doc/src/make-index.sh deleted file mode 100755 index 98fcdd6e13f..00000000000 --- a/doc/src/make-index.sh +++ /dev/null @@ -1,164 +0,0 @@ -#!/bin/bash - -# THIS FILE IS PART OF THE CYLC SUITE ENGINE. -# Copyright (C) 2008-2018 NIWA & British Crown (Met Office) & Contributors. -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . - -# Install to 'install/' and create an HTML index page to Cylc docs. - -set -e - -OUT=install -rm -rf $OUT -mkdir -p $OUT -cp src/index.css $OUT -cp -r src/cylc-user-guide/graphics $OUT -cp src/cylc-logo.png $OUT/graphics - -CYLC_VERSION=$($(dirname $0)/../../bin/cylc --version) -INDEX=$OUT/index.html - -CUG_PDF=src/cylc-user-guide/pdf/cug-pdf.pdf -CUG_HTML_SINGLE=src/cylc-user-guide/html/single/ -CUG_HTML_MULTI=src/cylc-user-guide/html/multi/ -SDG_PDF=src/suite-design-guide/document.pdf - -cat > $INDEX <<__END__ - - - - Cylc-${CYLC_VERSION} - - - - -

-
- -

Cylc Documentation

- -

cylc-${CYLC_VERSION}

- -
-

Command Help

-
-cylc --help
-cylc COMMAND --help
-
-

Misc.

- -
- -
-

User Guide

-
    -__END__ - -if [[ -f $CUG_PDF ]]; then - cp $CUG_PDF $OUT/cylc-user-guide.pdf - cat >> $INDEX <<__END__ -
  • PDF
  • -__END__ -else - cat >> $INDEX <<__END__ -
  • PDF (not generated)
  • -__END__ -fi - -mkdir -p $OUT/html -if [[ -f $CUG_HTML_SINGLE/cug-html.html ]]; then - cp -r $CUG_HTML_SINGLE $OUT/html/single - cat >> $INDEX <<__END__ -
  • HTML (single page)
  • -__END__ -else - cat >> $INDEX <<__END__ -
  • HTML single page (not generated)
  • -__END__ -fi - -if [[ -f $CUG_HTML_MULTI/cug-html.html ]]; then - cp -r $CUG_HTML_MULTI $OUT/html/multi - cat >> $INDEX <<__END__ -
  • HTML (multi page)
  • -__END__ -else - cat >> $INDEX <<__END__ -
  • HTML multi page (not generated)
  • -__END__ -fi - -cat >> $INDEX <<__END__ -
-
- -
-

Suite Design Guide

-
    -__END__ - -if [[ -f $SDG_PDF ]]; then - cp $SDG_PDF $OUT/suite-design-guide.pdf - cat >> $INDEX <<__END__ -
  • PDF
  • -__END__ -else - cat >> $INDEX <<__END__ -
  • PDF (not generated)
  • -__END__ -fi - -cat >> $INDEX <<__END__ -
-
- - -
- -
-

Document generation:

-
    -
  • user: -__END__ -whoami >> $INDEX -cat >> $INDEX <<__END__ -
  • -
  • host: -__END__ -hostname -f >> $INDEX -cat >> $INDEX <<__END__ -
  • -
  • date: -__END__ -date >> $INDEX - -cat >> $INDEX <<__END__ -
-
- - - -__END__ diff --git a/doc/src/running-suites.rst b/doc/src/running-suites.rst new file mode 100644 index 00000000000..78b4a8f6a8d --- /dev/null +++ b/doc/src/running-suites.rst @@ -0,0 +1,1634 @@ +.. _RunningSuites: + +Running Suites +============== + +This chapter currently features a diverse collection of topics related +to running suites. Please also see :ref:`Tutorial` and +:ref:`CommandReference`, and experiment with plenty of examples. + + +.. _SuiteStartUp: + +Suite Start-Up +-------------- + +There are three ways to start a suite running: *cold start* and *warm start*, +which start from scratch; and *restart*, which starts from a prior +suite state checkpoint. The only difference between cold starts and warm starts +is that warm starts start from a point beyond the suite initial cycle point. + +Once a suite is up and running it is typically a restart that is needed most +often (but see also ``cylc reload``). *Be aware that cold and warm +starts wipe out prior suite state, so you can't go back to a restart if you +decide you made a mistake.* + + +.. _Cold Start: + +Cold Start +^^^^^^^^^^ + +A cold start is the primary way to start a suite run from scratch: + +.. code-block:: bash + + $ cylc run SUITE [INITIAL_CYCLE_POINT] + +The initial cycle point may be specified on the command line or in the suite.rc +file. The scheduler starts by loading the first instance of each task at the +suite initial cycle point, or at the next valid point for the task. + + +.. _Warm Start: + +Warm Start +^^^^^^^^^^ + +A warm start runs a suite from scratch like a cold start, but from the +beginning of a given cycle point that is beyond the suite initial cycle point. +This is generally inferior to a *restart* (which loads a previously +recorded suite state - see :ref:`RestartingSuites`) because it may result in +some tasks rerunning. However, a warm start may be required if a restart is not +possible, e.g. because the suite run database was accidentally deleted. The +warm start cycle point must be given on the command line: + +.. code-block:: bash + + $ cylc run --warm SUITE [START_CYCLE_POINT] + +The original suite initial cycle point is preserved, but all tasks and +dependencies before the given warm start cycle point are ignored. + +The scheduler starts by loading a first instance of each task at the warm +start cycle point, or at the next valid point for the task. +``R1``-type tasks behave exactly the same as other tasks - if their +cycle point is at or later than the given start cycle point, they will run; if +not, they will be ignored. + + +.. _RestartingSuites: + +Restart and Suite State Checkpoints +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +At restart (see ``cylc restart --help``) a suite server program +initializes its task pool from a previously recorded checkpoint state. By +default the latest automatic checkpoint - which is updated with every task +state change - is loaded so that the suite can carry on exactly as it was just +before being shut down or killed. + +.. code-block:: bash + + $ cylc restart SUITE + +Tasks recorded in the "submitted" or "running" states are automatically polled +(see :ref:`Task Job Polling`) at start-up to determine what happened to +them while the suite was down. + + +Restart From Latest Checkpoint +"""""""""""""""""""""""""""""" + +To restart from the latest checkpoint simply invoke the ``cylc restart`` +command with the suite name (or select "restart" in the GUI suite start dialog +window): + +.. code-block:: bash + + $ cylc restart SUITE + + +Restart From Another Checkpoint +""""""""""""""""""""""""""""""" + +Suite server programs automatically update the "latest" checkpoint every time +a task changes state, and at every suite restart, but you can also take +checkpoints at other times. To tell a suite server program to checkpoint its +current state: + +.. code-block:: bash + + $ cylc checkpoint SUITE-NAME CHECKPOINT-NAME + +The 2nd argument is a name to identify the checkpoint later with: + +.. code-block:: bash + + $ cylc ls-checkpoints SUITE-NAME + +For example, with checkpoints named "bob", "alice", and "breakfast": + +.. code-block:: bash + + $ cylc ls-checkpoints SUITE-NAME + ####################################################################### + # CHECKPOINT ID (ID|TIME|EVENT) + 1|2017-11-01T15:48:34+13|bob + 2|2017-11-01T15:48:47+13|alice + 3|2017-11-01T15:49:00+13|breakfast + ... + 0|2017-11-01T17:29:19+13|latest + +To see the actual task state content of a given checkpoint ID (if you need to), +for the moment you have to interrogate the suite DB, e.g.: + +.. code-block:: bash + + $ sqlite3 ~/cylc-run/SUITE-NAME/log/db \ + 'select * from task_pool_checkpoints where id == 3;' + 3|2012|model|1|running| + 3|2013|pre|0|waiting| + 3|2013|post|0|waiting| + 3|2013|model|0|waiting| + 3|2013|upload|0|waiting| + +.. note:: + + A checkpoint captures the instantaneous state of every task in the + suite, including any tasks that are currently active, so you may want + to be careful where you do it. Tasks recorded as active are polled + automatically on restart to determine what happened to them. + +The checkpoint ID 0 (zero) is always used for latest state of the suite, which +is updated continuously as the suite progresses. The checkpoint IDs of earlier +states are positive integers starting from 1, incremented each time a new +checkpoint is stored. Currently suites automatically store checkpoints before +and after reloads, and on restarts (using the latest checkpoints before the +restarts). + +Once you have identified the right checkpoint, restart the suite like this: + +.. code-block:: bash + + $ cylc restart --checkpoint=CHECKPOINT-ID SUITE + +or enter the checkpoint ID in the space provided in the GUI restart window. + + +Checkpointing With A Task +""""""""""""""""""""""""" + +Checkpoints can be generated automatically at particular points in the +workflow by coding tasks that run the ``cylc checkpoint`` command: + +.. code-block:: cylc + + [scheduling] + [[dependencies]] + [[[PT6H]]] + graph = "pre => model => post => checkpointer" + [runtime] + # ... + [[checkpointer]] + script = """ + wait "${CYLC_TASK_MESSAGE_STARTED_PID}" 2>/dev/null || true + cylc checkpoint ${CYLC_SUITE_NAME} CP-${CYLC_TASK_CYCLE_POINT} + """ + +.. note:: + + We need to "wait" on the "task started" message - which + is sent in the background to avoid holding tasks up in a network + outage - to ensure that the checkpointer task is correctly recorded + as running in the checkpoint (at restart the suite server program will + poll to determine that that task job finished successfully). Otherwise + it may be recorded in the waiting state and, if its upstream dependencies + have already been cleaned up, it will need to be manually reset from waiting + to succeeded after the restart to avoid stalling the suite. + + +Behaviour of Tasks on Restart +""""""""""""""""""""""""""""" + +All tasks are reloaded in exactly their checkpointed states. Failed tasks are +not automatically resubmitted at restart in case the underlying problem has not +been addressed yet. + +Tasks recorded in the submitted or running states are automatically polled on +restart, to see if they are still waiting in a batch queue, still running, or +if they succeeded or failed while the suite was down. The suite state will be +updated automatically according to the poll results. + +Existing instances of tasks removed from the suite configuration before restart +are not removed from the task pool automatically, but they will not spawn new +instances. They can be removed manually if necessary, +with~``cylc remove``. + +Similarly, instances of new tasks added to the suite configuration before +restart are not inserted into the task pool automatically, because it is +very difficult in general to automatically determine the cycle point of +the first instance. Instead, the first instance of a new task should be +inserted manually at the right cycle point, with ``cylc insert``. + + +Reloading The Suite Configuration At Runtime +-------------------------------------------- + +The ``cylc reload`` command tells a suite server program to reload its +suite configuration at run time. This is an alternative to shutting a +suite down and restarting it after making changes. + +As for a restart, existing instances of tasks removed from the suite +configuration before reload are not removed from the task pool +automatically, but they will not spawn new instances. They can be removed +manually if necessary, with ``cylc remove``. + +Similarly, instances of new tasks added to the suite configuration before +reload are not inserted into the pool automatically. The first instance of each +must be inserted manually at the right cycle point, with ``cylc insert``. + + +.. _HowTasksGetAccessToCylc: + +Task Job Access To Cylc +----------------------- + +Task jobs need access to Cylc on the job host, primarily for task messaging, +but also to allow user-defined task scripting to run other Cylc commands. + +Cylc should be installed on job hosts as on suite hosts, with different +releases installed side-by-side and invoked via the central Cylc +wrapper according to the value of ``$CYLC_VERSION`` - see +:ref:`InstallCylc`. Task job scripts set ``$CYLC_VERSION`` to the +version of the parent suite server program, so that the right Cylc will +be invoked by jobs on the job host. + +Access to the Cylc executable (preferably the central wrapper as just +described) for different job hosts can be configured using site and user +global configuration files (on the suite host). If the environment for running +the Cylc executable is only set up correctly in a login shell for a given host, +you can set ``[hosts][HOST]use login shell = True`` for the relevant +host (this is the default, to cover more sites automatically). If the +environment is already correct without the login shell, but the Cylc executable +is not in ``$PATH``, then ``[hosts][HOST]cylc executable`` can +be used to specify the direct path to the executable. + +To customize the environment more generally for Cylc on jobs hosts, +use of ``job-init-env.sh`` is described in +:ref:`Configure Environment on Job Hosts`. + + +.. _The Suite Contact File: + +The Suite Contact File +---------------------- + +At start-up, suite server programs write a *suite contact file* +``$HOME/cylc-run/SUITE/.service/contact`` that records suite host, +user, port number, process ID, Cylc version, and other information. Client +commands can read this file, if they have access to it, to find the target +suite server program. + + +.. _Task Job Polling: + +Task Job Polling +---------------- + +At any point after job submission task jobs can be *polled* to check that +their true state conforms to what is currently recorded by the suite server +program. See ``cylc poll --help`` for how to poll one or more tasks +manually, or right-click poll a task or family in GUI. + +Polling may be necessary if, for example, a task job gets killed by the +untrappable SIGKILL signal (e.g. ``kill -9 PID``), or if a network +outage prevents task success or failure messages getting through, or if the +suite server program itself is down when tasks finish execution. + +To poll a task job the suite server program interrogates the +batch system, and the ``job.status`` file, on the job host. This +information is enough to determine the final task status even if the +job finished while the suite server program was down or unreachable on +the network. + + +Routine Polling +^^^^^^^^^^^^^^^ + +Task jobs are automatically polled at certain times: once on job submission +timeout; several times on exceeding the job execution time limit; and at suite +restart any tasks recorded as active in the suite state checkpoint are polled +to find out what happened to them while the suite was down. + +Finally, in necessary routine polling can be configured as a way to track job +status on job hosts that do not allow networking routing back to the suite host +for task messaging by HTTPS or ssh. See :ref:`Polling To Track Job Status`. + + +.. _TaskComms: + +Tracking Task State +------------------- + +Cylc supports three ways of tracking task state on job hosts: + +- task-to-suite messaging via HTTPS +- task-to-suite messaging via non-interactive ssh to the suite host, + then local HTTPS +- regular polling by the suite server program + +These can be configured per job host in the Cylc global config file - see +:ref:`SiteRCReference`. + +If your site prohibits HTTPS and ssh back from job hosts to +suite hosts, before resorting to the polling method you should +consider installing dedicated Cylc servers or +VMs inside the HPC trust zone (where HTTPS and ssh should be allowed). + +It is also possible to run Cylc suite server programs on HPC login +nodes, but this is not recommended for load, run duration, +and GUI reasons. + +Finally, it has been suggested that *port forwarding* may provide another +solution - but that is beyond the scope of this document. + + +HTTPS Task Messaging +^^^^^^^^^^^^^^^^^^^^ + +Task job wrappers automatically invoke ``cylc message`` to report +progress back to the suite server program when they begin executing, +at normal exit (success) and abnormal exit (failure). + +By default the messaging occurs via an authenticated, HTTPS connection to the +suite server program. This is the preferred task communications +method - it is efficient and direct. + +Suite server programs automatically install suite contact information +and credentials on job hosts. Users only need to do this manually +for remote access to suites on other hosts, or suites owned by other +users - see :ref:`RemoteControl`. + + +Ssh Task Messaging +^^^^^^^^^^^^^^^^^^ + +Cylc can be configured to re-invoke task messaging commands on the +suite host via non-interactive ssh (from job host to suite host). +Then a local HTTPS connection is made to the suite server program. + +(User-invoked client commands (aside from the GUI, which requires HTTPS) +can do the same thing with the ``--use-ssh`` command option). + +This is less efficient than direct HTTPS messaging, but it may be useful at +sites where the HTTPS ports are blocked but non-interactive ssh is allowed. + + +.. _Polling To Track Job Status: + +Polling to Track Job Status +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Finally, suite server programs can actively poll task jobs at +configurable intervals, via non-interactive ssh to the job host. + +Polling is the least efficient task communications method because task state is +updated only at intervals, not when task events actually occur. However, it +may be needed at sites that do not allow HTTPS or non-interactive ssh from job +host to suite host. + +Be careful to avoid spamming task hosts with polling commands. Each poll +opens (and then closes) a new ssh connection. + +Polling intervals are configurable under ``[runtime]`` because +they should may depend on the expected execution time. For instance, a +task that typically takes an hour to run might be polled every 10 +minutes initially, and then every minute toward the end of its run. +Interval values are used in turn until the last value, which is used +repeatedly until finished: + +.. code-block:: cylc + + [runtime] + [[foo]] + [[[job]]] + # poll every minute in the 'submitted' state: + submission polling intervals = PT1M + # poll one minute after foo starts running, then every 10 + # minutes for 50 minutes, then every minute until finished: + execution polling intervals = PT1M, 5*PT10M, PT1M + +A list of intervals with optional multipliers can be used for both +submission and execution polling, although a single value is probably +sufficient for submission polling. If these items are not configured +default values from site and user global config will be used for the polling +task communication method; polling is not done by default under the +other task communications methods (but it can still be used if you +like). + + +Task Communications Configuration +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + + +.. _The Suite Service Directory: + +The Suite Service Directory +--------------------------- + +At registration time a *suite service directory*, +``$HOME/cylc-run//.service/``, is created and populated +with a private passphrase file (containing random text), a self-signed +SSL certificate (see :ref:`ConnectionAuthentication`), and a symlink to the +suite source directory. An existing passphrase file will not be overwritten +if a suite is re-registered. + +At run time, the private suite run database is also written to the service +directory, along with a *suite contact file* that records the host, +user, port number, process ID, Cylc version, and other information about the +suite server program. Client commands automatically read daemon targetting +information from the contact file, if they have access to it. + + +File-Reading Commands +--------------------- + +Some Cylc commands and GUI actions parse suite configurations or read +other files +from the suite host account, rather than communicate with a suite server +program over the network. In future we plan to have suite server program serve +up these files to clients, but for the moment this functionality requires +read-access to the relevant files on the suite host. + +If you are logged into the suite host account, file-reading commands will just +work. + + +Remote Host, Shared Home Directory +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +If you are logged into another host with shared home directories (shared +filesystems are common in HPC environments) file-reading commands will just +work because suite files will look "local" on both hosts. + + +Remote Host, Different Home Directory +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +If you are logged into another host with no shared home directory, file-reading +commands require non-interactive ssh to the suite host account, and use of the +``--host`` and ``--user`` options to re-invoke the command +on the suite account. + + +Same Host, Different User Account +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +(This is essentially the same as *Remote Host, Different Home Directory*.) + + +.. _ConnectionAuthentication: + +Client-Server Interaction +------------------------- + +Cylc server programs listen on dedicated network ports for +HTTPS communications from Cylc clients (task jobs, and user-invoked commands +and GUIs). + +Use ``cylc scan`` to see which suites are listening on which ports on +scanned hosts (this lists your own suites by default, but it can show others +too - see ``cylc scan --help``). + +Cylc supports two kinds of access to suite server programs: + +- *public* (non-authenticated) - the amount of information + revealed is configurable, see :ref:`PublicAccess` +- *control* (authenticated) - full control, suite passphrase + required, see :ref:`passphrases` + + +.. _PublicAccess: + +Public Access - No Auth Files +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Without a suite passphrase the amount of information revealed by a suite +server program is determined by the public access privilege level set in global +site/user config (:ref:`GlobalAuth`) and optionally overidden in suites +(:ref:`SuiteAuth`): + +- *identity* - only suite and owner names revealed +- *description* - identity plus suite title and description +- *state-totals* - identity, description, and task state totals +- *full-read* - full read-only access for monitor and GUI +- *shutdown* - full read access plus shutdown, but no other control. + +The default public access level is *state-totals*. + +The ``cylc scan`` command and the ``cylc gscan`` GUI can print +descriptions and task state totals in addition to basic suite identity, if the +that information is revealed publicly. + + +.. _passphrases: + +Full Control - With Auth Files +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Suite auth files (passphrase and SSL certificate) give full control. They are +loaded from the suite service directory by the suite server program at +start-up, and used to authenticate subsequent client connections. Passphrases +are used in a secure encrypted challenge-response scheme, never sent in plain +text over the network. + +If two users need access to the same suite server program, they must both +possess the passphrase file for that suite. Fine-grained access to a single +suite server program via distinct user accounts is not currently supported. + +Suite server programs automatically install their auth and contact files to job +hosts via ssh, to enable task jobs to connect back to the suite server program +for task messaging. + +Client programs invoked by the suite owner automatically load the passphrase, +SSL certificate, and contact file too, for automatic connection to suites. + +*Manual installation of suite auth files is only needed for remote control, +if you do not have a shared filesystem - see below.* + + +.. _GUI-to-Suite Interaction: + +GUI-to-Suite Interaction +------------------------ + +The gcylc GUI is mainly a network client to retrieve and display suite status +information from the suite server program, but it can also invoke file-reading +commands to view and graph the suite configuration and so on. This is entirely +transparent if the GUI is running on the suite host account, but full +functionality for remote suites requires either a shared filesystem, or +(see :ref:`RemoteControl`) auth file installation *and* non-interactive ssh +access to the suite host. Without the auth files you will not be able +to connect to the suite, and without ssh you will see "permission denied" +errors on attempting file access. + + +.. _RemoteControl: + +Remote Control +-------------- + +Cylc client programs - command line and GUI - can interact with suite server +programs running on other accounts or hosts. How this works depends on whether +or not you have: + +- a *shared filesystem* such that you see the same home directory on + both hosts. +- *non-interactive ssh* from the client account to the server + account. + +With a shared filesystem, a suite registered on the remote (server) host is +also - in effect - registered on the local (client) host. In this case you +can invoke client commands without the ``--host`` option; the client +will automatically read the host and port from the contact file in the +suite service directory. + +To control suite server programs running under other user accounts or on other +hosts without a shared filesystem, the suite SSL certificate and passphrase +must be installed under your ``$HOME/.cylc/`` directory: + +.. code-block:: bash + + $HOME/.cylc/auth/OWNER@HOST/SUITE/ + ssl.cert + passphrase + contact # (optional - see below) + +where ``OWNER@HOST`` is the suite host account and ``SUITE`` +is the suite name. Client commands should then be invoked with the +``--user`` and ``--host`` options, e.g.: + +.. code-block:: bash + + $ cylc gui --user=OWNER --host=HOST SUITE + +.. note:: + + Remote suite auth files do not need to be installed for read-only + access - see :ref:`PublicAccess` - via the GUI or monitor. + +The suite contact file (see :ref:`The Suite Contact File`) is not needed if +you have read-access to the remote suite run directory via the local +filesystem or non-interactive ssh to the suite host account - client commands +will automatically read it. If you do install the contact file in your auth +directory note that the port number will need to be updated if the suite gets +restarted on a different port. Otherwise use ``cylc scan`` to determine +the suite port number and use the ``--port`` client command option. + +.. warning:: + + Possession of a suite passphrase gives full control over the + target suite, including edit run functionality - which lets you run + arbitrary scripting on job hosts as the suite owner. Further, + non-interactive ssh gives full access to the target user account, so we + recommended that this is only used to interact with suites running on + accounts to which you already have full access. + + +.. _Scan And Gscan: + +Scan And Gscan +-------------- + +Both ``cylc scan`` and the ``cylc gscan`` GUI can display +suites owned by other users on other hosts, including task state totals if the +public access level permits that (see :ref:`PublicAccess`). Clicking on a +remote suite in ``gscan`` will open a ``cylc gui`` to connect to that +suite. This will give you full control, if you have the suite auth files +installed; or it will display full read only information if the public access +level allows that. + + +Task States Explained +--------------------- + +As a suite runs, its task proxies may pass through the following states: + +- **waiting** - still waiting for prerequisites (e.g. dependence on + other tasks, and clock triggers) to be satisfied. +- **held** - will not be submitted to run even if all prerequisites + are satisfied, until released/un-held. +- **queued** - ready to run (prerequisites satisfied) but + temporarily held back by an *internal cylc queue* + (see :ref:`InternalQueues`). +- **ready** - ready to run (prerequisites satisfied) and + handed to cylc's job submission sub-system. +- **submitted** - submitted to run, but not executing yet + (could be waiting in an external batch scheduler queue). +- **submit-failed** - job submission failed *or* + submitted job killed (cancelled) before commencing execution. +- **submit-retrying** - job submission failed, but a submission retry + was configured. Will only enter the *submit-failed* state if all + configured submission retries are exhausted. +- **running** - currently executing (a *task started* + message was received, or the task polled as running). +- **succeeded** - finished executing successfully (a *task + succeeded* message was received, or the task polled as succeeded). +- **failed** - aborted execution due to some error condition (a + *task failed* message was received, or the task polled as failed). +- **retrying** - job execution failed, but an execution retry + was configured. Will only enter the *failed* state if all configured + execution retries are exhausted. +- **runahead** - will not have prerequisites checked (and so + automatically held, in effect) until the rest of the suite catches up + sufficiently. The amount of runahead allowed is configurable - see + :ref:`RunaheadLimit`. +- **expired** - will not be submitted to run, due to falling too far + behind the wall-clock relative to its cycle point - + see :ref:`ClockExpireTasks`. + + +What The Suite Control GUI Shows +-------------------------------- + +The GUI Text-tree and Dot Views display the state of every task proxy present +in the task pool. Once a task has succeeded and Cylc has determined that it can +no longer be needed to satisfy the prerequisites of other tasks, its proxy will +be cleaned up (removed from the pool) and it will disappear from the GUI. To +rerun a task that has disappeared from the pool, you need to re-insert its task +proxy and then re-trigger it. + +The Graph View is slightly different: it displays the complete dependency graph +over the range of cycle points currently present in the task pool. This often +includes some greyed-out *base* or *ghost nodes* that are empty - i.e. +there are no corresponding task proxies currently present in the pool. Base +nodes just flesh out the graph structure. Groups of them may be cut out and +replaced by single *scissor nodes* in sections of the graph that are +currently inactive. + + +Network Connection Timeouts +--------------------------- + +A connection timeout can be set in site and user global config files +(see :ref:`SiteAndUserConfiguration`) so that messaging commands +cannot hang indefinitely if the suite is not responding (this can be +caused by suspending a suite with Ctrl-Z) thereby preventing the task +from completing. The same can be done on the command line for other +suite-connecting user commands, with the ``--comms-timeout`` option. + + +.. _RunaheadLimit: + +Runahead Limiting +----------------- + +Runahead limiting prevents the fastest tasks in a suite from getting too far +ahead of the slowest ones. Newly spawned tasks are released to the task pool +only when they fall below the runahead limit. A low runhead limit can prevent +cylc from interleaving cycles, but it will not stall a suite unless it fails to +extend out past a future trigger (see :ref:`InterCyclePointTriggers`). +A high runahead limit may allow fast tasks that are not constrained by +dependencies or clock-triggers to spawn far ahead of the pack, which could have +performance implications for the suite server program when running very large +suites. Succeeded and failed tasks are ignored when computing the runahead +limit. + +The preferred runahead limiting mechanism restricts the number of consecutive +active cycle points. The default value is three active cycle points; +see :ref:`max active cycle points`. Alternatively the interval between the +slowest and fastest tasks can be specified as hard limit; +see :ref:`runahead limit`. + + +.. _InternalQueues: + +Limiting Activity With Internal Queues +-------------------------------------- + +Large suites can potentially overwhelm task hosts by submitting too many +tasks at once. You can prevent this with *internal queues*, which +limit the number of tasks that can be active (submitted or running) +at the same time. + +Internal queues behave in the first-in-first-out (FIFO) manner, i.e. tasks are +released from a queue in the same order that they were queued. + +A queue is defined by a *name*; a *limit*, which is the maximum +number of active tasks allowed for the queue; and a list of *members*, +assigned by task or family name. + +Queue configuration is done under the ``[scheduling]`` section of the suite.rc +file (like dependencies, internal queues constrain *when* a task runs). + +By default every task is assigned to the *default* queue, which by default +has a zero limit (interpreted by cylc as no limit). To use a single queue for +the whole suite just set the default queue limit: + +.. code-block:: cylc + + [scheduling] + [[ queues]] + # limit the entire suite to 5 active tasks at once + [[[default]]] + limit = 5 + +To use additional queues just name each one, set their limits, and assign +members: + +.. code-block:: cylc + + [scheduling] + [[ queues]] + [[[q_foo]]] + limit = 5 + members = foo, bar, baz + +Any tasks not assigned to a particular queue will remain in the default +queue. The *queues* example suite illustrates how queues work by +running two task trees side by side (as seen in the graph GUI) each +limited to 2 and 3 tasks respectively: + +.. literalinclude:: ../../etc/examples/queues/suite.rc + :language: cylc + + +.. _TaskRetries: + +Automatic Task Retry On Failure +------------------------------- + +See also :ref:`RefRetries`. + +Tasks can be configured with a list of "retry delay" intervals, as +ISO 8601 durations. If the task job fails it will go into the *retrying* +state and resubmit after the next configured delay interval. An example is +shown in the suite listed below under :ref:`EventHandling`. + +If a task with configured retries is *killed* (by ``cylc kill`` or +via the GUI) it goes to the *held* state so that the operator can decide +whether to release it and continue the retry sequence or to abort the retry +sequence by manually resetting it to the *failed* state. + + +.. _EventHandling: + +Task Event Handling +------------------- + +See also :ref:`SuiteEventHandling` and :ref:`TaskEventHandling`. + +Cylc can call nominated event handlers - to do whatever you like - when certain +suite or task events occur. This facilitates centralized alerting and automated +handling of critical events. Event handlers can be used to send a message, call +a pager, or whatever; they can even intervene in the operation of their own +suite using cylc commands. + +To send an email, use the built-in setting ``[[[events]]]mail events`` +to specify a list of events for which notifications should be sent. (The +name of a registered task output can also be used as an event name in +this case.) E.g. to send an email on (submission) failed and retry: + +.. code-block:: cylc + + [runtime] + [[foo]] + script = """ + test ${CYLC_TASK_TRY_NUMBER} -eq 3 + cylc message -- "${CYLC_SUITE_NAME}" "${CYLC_TASK_JOB}" 'oopsy daisy' + """ + [[[events]]] + mail events = submission failed, submission retry, failed, retry, oops + [[[job]]] + execution retry delays = PT0S, PT30S + [[[outputs]]] + oops = oopsy daisy + +By default, the emails will be sent to the current user with: + +- ``to:`` set as ``$USER`` +- ``from:`` set as ``notifications@$(hostname)`` +- SMTP server at ``localhost:25`` + +These can be configured using the settings: + +- ``[[[events]]]mail to`` (list of email addresses), +- ``[[[events]]]mail from`` +- ``[[[events]]]mail smtp``. + +By default, a cylc suite will send you no more than one task event email every +5 minutes - this is to prevent your inbox from being flooded by emails should a +large group of tasks all fail at similar time. +See :ref:`task-event-mail-interval` for details. + +Event handlers can be located in the suite ``bin/`` directory; +otherwise it is up to you to ensure their location is in ``$PATH`` (in +the shell in which the suite server program runs). They should require little +resource and return quickly - see :ref:`Managing External Command Execution`. + +Task event handlers can be specified using the +``[[[events]]] handler`` settings, where +```` is one of: + +- 'submitted' - the job submit command was successful +- 'submission failed' - the job submit command failed +- 'submission timeout' - task job submission timed out +- 'submission retry' - task job submission failed, but will retry after + a configured delay +- 'started' - the task reported commencement of execution +- 'succeeded' - the task reported successful completion +- 'warning' - the task reported a WARNING severity message +- 'critical' - the task reported a CRITICAL severity message +- 'custom' - the task reported a CUSTOM severity message +- 'late' - the task is never active and is late +- 'failed' - the task failed +- 'retry' - the task failed but will retry after a configured delay +- 'execution timeout' - task execution timed out + +The value of each setting should be a list of command lines or command line +templates (see below). + +Alternatively you can use ``[[[events]]]handlers`` and +``[[[events]]]handler events``, where the former is a list of command +lines or command line templates (see below) and the latter is a list of events +for which these commands should be invoked. (The name of a registered task +output can also be used as an event name in this case.) + +Event handler arguments can be constructed from various templates +representing suite name; task ID, name, cycle point, message, and submit +number name; and any suite or task ``[meta]`` item. +See :ref:`SuiteEventHandling` and :ref:`TaskEventHandling` for options. + +If no template arguments are supplied the following default command line +will be used: + +.. code-block:: none + + %(event)s %(suite)s %(id)s %(message)s + +.. note:: + + Substitution patterns should not be quoted in the template strings. + This is done automatically where required. + +For an explanation of the substitution syntax, see +`String Formatting Operations +`_ +in the Python documentation. + +The retry event occurs if a task fails and has any remaining retries +configured (see :ref:`TaskRetries`). +The event handler will be called as soon as the task fails, not after +the retry delay period when it is resubmitted. + +.. note:: + + Event handlers are called by the suite server program, not by + task jobs. If you wish to pass additional information to them use + ``[cylc] -> [[environment]]``, not task runtime environment. + +The following two ``suite.rc`` snippets are examples on how to specify +event handlers using the alternate methods: + +.. code-block:: cylc + + [runtime] + [[foo]] + script = test ${CYLC_TASK_TRY_NUMBER} -eq 2 + [[[events]]] + retry handler = "echo '!!!!!EVENT!!!!!' " + failed handler = "echo '!!!!!EVENT!!!!!' " + [[[job]]] + execution retry delays = PT0S, PT30S + +.. code-block:: cylc + + [runtime] + [[foo]] + script = """ + test ${CYLC_TASK_TRY_NUMBER} -eq 2 + cylc message -- "${CYLC_SUITE_NAME}" "${CYLC_TASK_JOB}" 'oopsy daisy' + """ + [[[events]]] + handlers = "echo '!!!!!EVENT!!!!!' " + # Note: task output name can be used as an event in this method + handler events = retry, failed, oops + [[[job]]] + execution retry delays = PT0S, PT30S + [[[outputs]]] + oops = oopsy daisy + +The handler command here - specified with no arguments - is called with the +default arguments, like this: + +.. code-block:: bash + + echo '!!!!!EVENT!!!!!' %(event)s %(suite)s %(id)s %(message)s + + +.. _Late Events: + +Late Events +^^^^^^^^^^^ + +You may want to be notified when certain tasks are running late in a real time +production system - i.e. when they have not triggered by *the usual time*. +Tasks of primary interest are not normally clock-triggered however, so their +trigger times are mostly a function of how the suite runs in its environment, +and even external factors such as contention with other suites [3]_ . + +But if your system is reasonably stable from one cycle to the next such that a +given task has consistently triggered by some interval beyond its cycle point, +you can configure Cylc to emit a *late event* if it has not triggered by +that time. For example, if a task ``forecast`` normally triggers by 30 +minutes after its cycle point, configure late notification for it like this: + +.. code-block:: cylc + + [runtime] + [[forecast]] + script = run-model.sh + [[[events]]] + late offset = PT30M + late handler = my-handler %(message)s + +*Late offset intervals are not computed automatically so be careful +to update them after any change that affects triggering times.* + +.. note:: + + Cylc can only check for lateness in tasks that it is currently aware + of. If a suite gets delayed over many cycles the next tasks coming up + can be identified as late immediately, and subsequent tasks can be + identified as late as the suite progresses to subsequent cycle points, + until it catches up to the clock. + + +.. _Managing External Command Execution: + +Managing External Command Execution +----------------------------------- + +Job submission commands, event handlers, and job poll and kill commands, are +executed by the suite server program in a "pool" of asynchronous +subprocesses, in order to avoid holding the suite up. The process pool is +actively managed to limit it to a configurable size (:ref:`process pool size`). +Custom event handlers should be light-weight and quick-running because they +will tie up a process pool member until they complete, and the suite will +appear to stall if the pool is saturated with long-running processes. Processes +are killed after a configurable timeout (:ref:`process pool timeout`) however, +to guard against rogue commands that hang indefinitely. All process kills are +logged by the suite server program. For killed job submissions the associated +tasks also go to the *submit-failed* state. + + +.. _PreemptionHPC: + +Handling Job Preemption +----------------------- + +Some HPC facilities allow job preemption: the resource manager can kill +or suspend running low priority jobs in order to make way for high +priority jobs. The preempted jobs may then be automatically restarted +by the resource manager, from the same point (if suspended) or requeued +to run again from the start (if killed). + +Suspended jobs will poll as still running (their job status file says they +started running, and they still appear in the resource manager queue). +Loadleveler jobs that are preempted by kill-and-requeue ("job vacation") are +automatically returned to the submitted state by Cylc. This is possible +because Loadleveler sends the SIGUSR1 signal before SIGKILL for preemption. +Other batch schedulers just send SIGTERM before SIGKILL as normal, so Cylc +cannot distinguish a preemption job kill from a normal job kill. After this the +job will poll as failed (correctly, because it was killed, and the job status +file records that). To handle this kind of preemption automatically you could +use a task failed or retry event handler that queries the batch scheduler queue +(after an appropriate delay if necessary) and then, if the job has been +requeued, uses ``cylc reset`` to reset the task to the submitted state. + + +Manual Task Triggering and Edit-Run +----------------------------------- + +Any task proxy currently present in the suite can be manually triggered at any +time using the ``cylc trigger`` command, or from the right-click task +menu in gcylc. If the task belongs to a limited internal queue +(see :ref:`InternalQueues`), this will queue it; if not, or if it is already +queued, it will submit immediately. + +With ``cylc trigger --edit`` (also in the gcylc right-click task menu) +you can edit the generated task job script to make one-off changes before the +task submits. + + +.. _cylc-broadcast: + +Cylc Broadcast +-------------- + +The ``cylc broadcast`` command overrides ``[runtime]`` +settings in a running suite. This can +be used to communicate information to downstream tasks by broadcasting +environment variables (communication of information from one task to +another normally takes place via the filesystem, i.e. the input/output +file relationships embodied in inter-task dependencies). Variables (and +any other runtime settings) may be broadcast to all subsequent tasks, +or targeted specifically at a specific task, all subsequent tasks with a +given name, or all tasks with a given cycle point; see broadcast command help +for details. + +Broadcast settings targeted at a specific task ID or cycle point expire and +are forgotten as the suite moves on. Un-targeted variables and those +targeted at a task name persist throughout the suite run, even across +restarts, unless manually cleared using the broadcast command - and so +should be used sparingly. + + +The Meaning And Use Of Initial Cycle Point +------------------------------------------ + +When a suite is started with the ``cylc run`` command (cold or +warm start) the cycle point at which it starts can be given on the command +line or hardwired into the suite.rc file: + +.. code-block:: bash + + cylc run foo 20120808T06Z + +or: + +.. code-block:: cylc + + [scheduling] + initial cycle point = 20100808T06Z + +An initial cycle given on the command line will override one in the +suite.rc file. + + +The Environment Variable CYLC\_SUITE\_INITIAL\_CYCLE\_POINT +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In the case of a *cold start only* the initial cycle point is passed +through to task execution environments as +``$CYLC_SUITE_INITIAL_CYCLE_POINT``. The value is then stored in +suite database files and persists across restarts, but it does get wiped out +(set to ``None``) after a warm start, because a warm start is really an +implicit restart in which all state information is lost (except that the +previous cycle is assumed to have completed). + +The ``$CYLC_SUITE_INITIAL_CYCLE_POINT`` variable allows tasks to +determine if they are running in the initial cold-start cycle point, when +different behaviour may be required, or in a normal mid-run cycle point. +Note however that an initial ``R1`` graph section is now the preferred +way to get different behaviour at suite start-up. + + +.. _SimulationMode: + +Simulating Suite Behaviour +-------------------------- + +Several suite run modes allow you to simulate suite behaviour quickly without +running the suite's real jobs - which may be long-running and resource-hungry: + +- *dummy mode* - runs dummy tasks as background jobs on configured + job hosts. + + - simulates scheduling, job host connectivity, and + generates all job files on suite and job hosts. + +- *dummy-local mode* - runs real dummy tasks as background jobs on + the suite host, which allows dummy-running suites from other sites. + + - simulates scheduling and generates all job files on the + suite host. + +- *simulation mode* - does not run any real tasks. + + - simulates scheduling without generating any job files. + +Set the run mode (default *live*) in the GUI suite start dialog box, or on +the command line: + +.. code-block:: bash + + $ cylc run --mode=dummy SUITE + $ cylc restart --mode=dummy SUITE + +You can get specified tasks to fail in these modes, for more flexible suite +testing. See :ref:`suiterc-sim-config` for simulation configuration. + + +Proportional Simulated Run Length +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +If task ``[job]execution time limit`` is set, Cylc divides it by +``[simulation]speedup factor`` (default ``10.0``) to compute +simulated task run lengths (default 10 seconds). + + +Limitations Of Suite Simulation +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Dummy mode ignores batch scheduler settings because Cylc does not know which +job resource directives (requested memory, number of compute nodes, etc.) would +need to be changed for the dummy jobs. If you need to dummy-run jobs on a +batch scheduler manually comment out ``script`` items and modify +directives in your live suite, or else use a custom live mode test suite. + +.. note:: + + The dummy modes ignore all configured task ``script`` items + including ``init-script``. If your ``init-script`` is required + to run even dummy tasks on a job host, note that host environment + setup should be done + elsewhere - see :ref:`Configure Site Environment on Job Hosts`. + + +Restarting Suites With A Different Run Mode? +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The run mode is recorded in the suite run database files. Cylc will not let +you *restart* a non-live mode suite in live mode, or vice versa. To +test a live suite in simulation mode just take a quick copy of it and run the +the copy in simulation mode. + + +.. _AutoRefTests: + +Automated Reference Test Suites +------------------------------- + +Reference tests are finite-duration suite runs that abort with non-zero +exit status if any of the following conditions occur (by default): + +- cylc fails +- any task fails +- the suite times out (e.g. a task dies without reporting failure) +- a nominated shutdown event handler exits with error status + +The default shutdown event handler for reference tests is +``cylc hook check-triggering`` which compares task triggering +information (what triggers off what at run time) in the test run suite +log to that from an earlier reference run, disregarding the timing and +order of events - which can vary according to the external queueing +conditions, runahead limit, and so on. + +To prepare a reference log for a suite, run it with the +``--reference-log`` option, and manually verify the +correctness of the reference run. + +To reference test a suite, just run it (in dummy mode for the most +comprehensive test without running real tasks) with the +``--reference-test`` option. + +A battery of automated reference tests is used to test cylc before +posting a new release version. Reference tests can also be used to check that +a cylc upgrade will not break your own complex +suites - the triggering check will catch any bug that causes a task to +run when it shouldn't, for instance; even in a dummy mode reference +test the full task job script (sans ``script`` items) executes on the +proper task host by the proper batch system. + +Reference tests can be configured with the following settings: + +.. code-block:: cylc + + [cylc] + [[reference test]] + suite shutdown event handler = cylc check-triggering + required run mode = dummy + allow task failures = False + live mode suite timeout = PT5M + dummy mode suite timeout = PT2M + simulation mode suite timeout = PT2M + + +Roll-your-own Reference Tests +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +If the default reference test is not sufficient for your needs, firstly +note that you can override the default shutdown event handler, and +secondly that the ``--reference-test`` option is merely a short +cut to the following suite.rc settings which can also be set manually if +you wish: + +.. code-block:: cylc + + [cylc] + abort if any task fails = True + [[events]] + shutdown handler = cylc check-triggering + timeout = PT5M + abort if shutdown handler fails = True + abort on timeout = True + + +.. _SuiteStatePolling: + +Triggering Off Of Tasks In Other Suites +--------------------------------------- + +.. note:: + + Please read :ref:`External Triggers` before using + the older inter-suite triggering mechanism described in this section. + +The ``cylc suite-state`` command interrogates suite run databases. It +has a polling mode that waits for a given task in the target suite to achieve a +given state, or receive a given message. This can be used to make task +scripting wait for a remote task to succeed (for example). + +Automatic suite-state polling tasks can be defined with in the graph. They get +automatically-generated task scripting that uses ``cylc suite-state`` +appropriately (it is an error to give your own ``script`` item for these +tasks). + +Here's how to trigger a task ``bar`` off a task ``foo`` in +a remote suite called ``other.suite``: + +.. code-block:: cylc + + [scheduling] + [[dependencies]] + [[[T00, T12]]] + graph = "my-foo => bar" + +Local task ``my-foo`` will poll for the success of ``foo`` +in suite ``other.suite``, at the same cycle point, succeeding only when +or if it succeeds. Other task states can also be polled: + +.. code-block:: cylc + + graph = "my-foo => bar" + +The default polling parameters (e.g. maximum number of polls and the interval +between them) are printed by ``cylc suite-state --help`` and can be +configured if necessary under the local polling task runtime section: + +.. code-block:: cylc + + [scheduling] + [[ dependencies]] + [[[T00,T12]]] + graph = "my-foo => bar" + [runtime] + [[my-foo]] + [[[suite state polling]]] + max-polls = 100 + interval = PT10S + +To poll for the target task to receive a message rather than achieve a state, +give the message in the runtime configuration (in which case the task status +inferred from the graph syntax will be ignored): + +.. code-block:: cylc + + [runtime] + [[my-foo]] + [[[suite state polling]]] + message = "the quick brown fox" + +For suites owned by others, or those with run databases in non-standard +locations, use the ``--run-dir`` option, or in-suite: + +.. code-block:: cylc + + [runtime] + [[my-foo]] + [[[suite state polling]]] + run-dir = /path/to/top/level/cylc/run-directory + +If the remote task has a different cycling sequence, just arrange for the +local polling task to be on the same sequence as the remote task that it +represents. For instance, if local task ``cat`` cycles 6-hourly at +``0,6,12,18`` but needs to trigger off a remote task ``dog`` +at ``3,9,15,21``: + +.. code-block:: cylc + + [scheduling] + [[dependencies]] + [[[T03,T09,T15,T21]]] + graph = "my-dog" + [[[T00,T06,T12,T18]]] + graph = "my-dog[-PT3H] => cat" + +For suite-state polling, the cycle point is automatically converted to the +cycle point format of the target suite. + +The remote suite does not have to be running when polling commences because the +command interrogates the suite run database, not the suite server program. + +.. note:: + + The graph syntax for suite polling tasks cannot be combined with + cycle point offsets, family triggers, or parameterized task notation. + This does not present a problem because suite polling tasks can be put on + the same cycling sequence as the remote-suite target task (as recommended + above), and there is no point in having multiple tasks (family members or + parameterized tasks) performing the same polling operation. Task state + triggers can be used with suite polling, e.g. to trigger another task if + polling fails after 10 tries at 10 second intervals: + + .. code-block:: cylc + + [scheduling] + [[dependencies]] + graph = "poller:fail => another-task" + [runtime] + [[my-foo]] + [[[suite state polling]]] + max-polls = 10 + interval = PT10S + + +.. _Suite Server Logs: + +Suite Server Logs +----------------- + +Each suite maintains its own log of time-stamped events under the *suite +server log directory*: + +.. code-block:: bash + + $HOME/cylc-run/SUITE-NAME/log/suite/ + +By way of example, we will show the complete server log generated (at +cylc-7.2.0) by a small suite that runs two 30-second dummy tasks +``foo`` and ``bar`` for a single cycle point +``2017-01-01T00Z`` before shutting down: + +.. code-block:: cylc + + [cylc] + cycle point format = %Y-%m-%dT%HZ + [scheduling] + initial cycle point = 2017-01-01T00Z + final cycle point = 2017-01-01T00Z + [[dependencies]] + graph = "foo => bar" + [runtime] + [[foo]] + script = sleep 30; /bin/false + [[bar]] + script = sleep 30; /bin/true + +By the task scripting defined above, this suite will stall when ``foo`` +fails. Then, the suite owner *vagrant@cylon* manually resets the failed +task's state to *succeeded*, allowing ``bar`` to trigger and the +suite to finish and shut down. Here's the complete suite log for this run: + +.. code-block:: none + + $ cylc cat-log SUITE-NAME + 2017-03-30T09:46:10Z INFO - Suite starting: server=localhost:43086 pid=3483 + 2017-03-30T09:46:10Z INFO - Run mode: live + 2017-03-30T09:46:10Z INFO - Initial point: 2017-01-01T00Z + 2017-03-30T09:46:10Z INFO - Final point: 2017-01-01T00Z + 2017-03-30T09:46:10Z INFO - Cold Start 2017-01-01T00Z + 2017-03-30T09:46:11Z INFO - [foo.2017-01-01T00Z] -submit_method_id=3507 + 2017-03-30T09:46:11Z INFO - [foo.2017-01-01T00Z] -submission succeeded + 2017-03-30T09:46:11Z INFO - [foo.2017-01-01T00Z] -(current:submitted)> started at 2017-03-30T09:46:10Z + 2017-03-30T09:46:41Z CRITICAL - [foo.2017-01-01T00Z] -(current:running)> failed/EXIT at 2017-03-30T09:46:40Z + 2017-03-30T09:46:42Z WARNING - suite stalled + 2017-03-30T09:46:42Z WARNING - Unmet prerequisites for bar.2017-01-01T00Z: + 2017-03-30T09:46:42Z WARNING - * foo.2017-01-01T00Z succeeded + 2017-03-30T09:47:58Z INFO - [client-command] reset_task_states vagrant@cylon:cylc-reset 1e0d8e9f-2833-4dc9-a0c8-9cf263c4c8c3 + 2017-03-30T09:47:58Z INFO - [foo.2017-01-01T00Z] -resetting state to succeeded + 2017-03-30T09:47:58Z INFO - Command succeeded: reset_task_states([u'foo.2017'], state=succeeded) + 2017-03-30T09:47:59Z INFO - [bar.2017-01-01T00Z] -submit_method_id=3565 + 2017-03-30T09:47:59Z INFO - [bar.2017-01-01T00Z] -submission succeeded + 2017-03-30T09:47:59Z INFO - [bar.2017-01-01T00Z] -(current:submitted)> started at 2017-03-30T09:47:58Z + 2017-03-30T09:48:29Z INFO - [bar.2017-01-01T00Z] -(current:running)> succeeded at 2017-03-30T09:48:28Z + 2017-03-30T09:48:30Z INFO - Waiting for the command process pool to empty for shutdown + 2017-03-30T09:48:30Z INFO - Suite shutting down - AUTOMATIC + +The information logged here includes: + +- event timestamps, at the start of each line +- suite server host, port and process ID +- suite initial and final cycle points +- suite start type (cold start in this case) +- task events (task started, succeeded, failed, etc.) +- suite stalled warning (in this suite nothing else can run when + ``foo`` fails) +- the client command issued by *vagrant@cylon* to reset + ``foo`` to {\em succeeded} +- job IDs - in this case process IDs for background jobs (or PBS job IDs + etc.) +- state changes due to incoming task progress message ("started at ..." + etc.) suite shutdown time and reasons (AUTOMATIC means "all tasks finished + and nothing else to do") + +.. note:: + + Suite log files are primarily intended for human eyes. If you need + to have an external system to monitor suite events automatically, + interrogate the sqlite *suite run database* + (see :ref:`Suite Run Databases`) rather than parse the log files. + + +.. _Suite Run Databases: + +Suite Run Databases +------------------- + +Suite server programs maintain two ``sqlite`` databases to record +restart checkpoints and various other aspects of run history: + +.. code-block:: bash + + $HOME/cylc-run/SUITE-NAME/log/db # public suite DB + $HOME/cylc-run/SUITE-NAME/.service/db # private suite DB + +The private DB is for use only by the suite server program. The identical +public DB is provided for use by external commands such as +``cylc suite-state``, ``cylc ls-checkpoints``, and +``cylc report-timings``. If the public DB gets locked for too long by +an external reader, the suite server program will eventually delete it and +replace it with a new copy of the private DB, to ensure that both correctly +reflect the suite state. + +You can interrogate the public DB with the ``sqlite3`` command line tool, +the ``sqlite3`` module in the Python standard library, or any other +sqlite interface. + +.. code-block:: bash + + $ sqlite3 ~/cylc-run/foo/log/db << _END_ + > .headers on + > select * from task_events where name is "foo"; + > _END_ + name|cycle|time|submit_num|event|message + foo|1|2017-03-12T11:06:09Z|1|submitted| + foo|1|2017-03-12T11:06:09Z|1|output completed|started + foo|1|2017-03-12T11:06:09Z|1|started| + foo|1|2017-03-12T11:06:19Z|1|output completed|succeeded + foo|1|2017-03-12T11:06:19Z|1|succeeded| + + +.. _Disaster Recovery: + +Disaster Recovery +----------------- + +If a suite run directory gets deleted or corrupted, the options for recovery +are: + +- restore the run directory from back-up, and restart the suite +- re-install from source, and warm start from the beginning of the + current cycle point + +A warm start (see :ref:`Warm Start`) does not need a suite state +checkpoint, but it wipes out prior run history, and it could re-run +a significant number of tasks that had already completed. + +To restart the suite, the critical Cylc files that must be restored are: + +.. code-block:: bash + + # On the suite host: + ~/cylc-run/SUITE-NAME/ + suite.rc # live suite configuration (located here in Rose suites) + log/db # public suite DB (can just be a copy of the private DB) + log/rose-suite-run.conf # (needed to restart a Rose suite) + .service/db # private suite DB + .service/source -> PATH-TO-SUITE-DIR # symlink to live suite directory + + # On job hosts (if no shared filesystem): + ~/cylc-run/SUITE-NAME/ + log/job/CYCLE-POINT/TASK-NAME/SUBMIT-NUM/job.status + +.. note:: + + This discussion does not address restoration of files generated and + consumed by task jobs at run time. How suite data is stored and recovered + in your environment is a matter of suite and system design. + +In short, you can simply restore the suite service directory, the log +directory, and the suite.rc file that is the target of the symlink in the +service directory. The service and log directories will come with extra files +that aren't strictly needed for a restart, but that doesn't matter - although +depending on your log housekeeping the ``log/job`` directory could be +huge, so you might want to be selective about that. (Also in a Rose suite, the +``suite.rc`` file does not need to be restored if you restart with +``rose suite-run`` - which re-installs suite source files to the run +directory). + +The public DB is not strictly required for a restart - the suite server program +will recreate it if need be - but it is required by +``cylc ls-checkpoints`` if you need to identify the right restart +checkpoint. + +The job status files are only needed if the restart suite state checkpoint +contains active tasks that need to be polled to determine what happened to them +while the suite was down. Without them, polling will fail and those tasks will +need to be manually set to the correct state. + +.. warning:: + + It is not safe to copy or rsync a potentially-active sqlite DB - the copy + might end up corrupted. It is best to stop the suite before copying + a DB, or else write a back-up utility using the + `official sqlite backup API `_. + + +.. _auto-stop-restart: + +Auto Stop-Restart +----------------- + +Cylc has the ability to automatically stop suites running on a particular host +and optionally, restart them on a different host. +This is useful if a host needs to be taken off-line e.g. for +scheduled maintenance. + +This functionality is configured via the following site configuration settings: + +- ``[run hosts][suite servers]auto restart delay`` +- ``[run hosts][suite servers]condemned hosts`` +- ``[run hosts][suite servers]run hosts`` + +The auto stop-restart feature has two modes: + +- [Normal Mode] + + - When a host is added to the ``condemned hosts`` list, any suites + running on that host will automatically shutdown then restart selecting a + new host from ``run hosts``. + - For safety, before attempting to stop the suite cylc will first wait + for any jobs running locally (under background or at) to complete. + - *In order for Cylc to be able to successfully restart suites the + ``run hosts`` must all be on a shared filesystem.* + +- [Force Mode] + + - If a host is suffixed with an exclamation mark then Cylc will not attempt + to automatically restart the suite and any local jobs (running under + background or at) will be left running. + +For example in the following configuration any suites running on +``foo`` will attempt to restart on ``pub`` whereas any suites +running on ``bar`` will stop immediately, making no attempt to restart. + +.. code-block:: cylc + + [suite servers] + run hosts = pub + condemned hosts = foo, bar! + +To prevent large numbers of suites attempting to restart simultaneously the +``auto restart delay`` setting defines a period of time in seconds. +Suites will wait for a random period of time between zero and +``auto restart delay`` seconds before attempting to stop and restart. + +At present the auto shutdown-restart functionality can only operate provided +that the user hasn't specified any behaviour which is not preserved by +``cylc restart`` (e.g. user specified hold point or run mode). This +caveat will be removed in a future version, currently Cylc will not attempt to +auto shutdown-restart suites which meet this criterion but will log a critical +error message to alert the user. + +See the ``[suite servers]`` configuration section +(:ref:`global-suite-servers`) for more details. + + +.. [3] Late notification of clock-triggered tasks is not very useful in + any case because they typically do not depend on other tasks, and as + such they can often trigger on time even if the suite is delayed to + the point that downstream tasks are late due to their dependence on + previous-cycle tasks that are delayed. diff --git a/doc/src/screenshots.rst b/doc/src/screenshots.rst new file mode 100644 index 00000000000..7598a694cf7 --- /dev/null +++ b/doc/src/screenshots.rst @@ -0,0 +1,30 @@ +Cylc Screenshots +================ + +.. _fig-gcylc-1: + +.. figure:: graphics/png/orig/gcylc-graph-and-dot-views.png + :align: center + + gcylc graph and dot views. + +.. _fig-gcylc-2: + +.. figure:: graphics/png/orig/gcylc-text-view.png + :align: center + + gcylc text view. + +.. _fig-gscan: + +.. figure:: graphics/png/orig/gscan.png + :align: center + + gscan multi-suite state summary GUI. + +.. _fig-ecox-1: + +.. figure:: graphics/png/orig/ecox-1.png + :align: center + + A large-ish suite graphed by cylc. diff --git a/doc/src/suite-config.rst b/doc/src/suite-config.rst new file mode 100644 index 00000000000..4303bdf868d --- /dev/null +++ b/doc/src/suite-config.rst @@ -0,0 +1,3772 @@ +.. _SuiteDefinition: + +Suite Configuration +=================== + +Cylc suites are defined in structured, validated, *suite.rc* files +that concisely specify the properties of, and the relationships +between, the various tasks managed by the suite. This section of the +User Guide deals with the format and content of the suite.rc file, +including task definition. Task implementation - what's required of the +real commands, scripts, or programs that do the processing that the +tasks represent - is covered in :ref:`TaskImplementation`; and +task job submission - how tasks are submitted to run - is +in :ref:`TaskJobSubmission`. + + +.. _SuiteDefinitionDirectories: + +Suite Configuration Directories +------------------------------- + +A cylc *suite configuration directory* contains: + +- **A suite.rc file**: this is the suite configuration. + + - And any include-files used in it (see below; may be + kept in sub-directories). + +- **A** ``bin/`` **sub-directory** (optional) + + - For scripts and executables that implement, or are + used by, suite tasks. + - Automatically added to ``$PATH`` in task + execution environments. + - Alternatively, tasks can call external + commands, scripts, or programs; or they can be scripted + entirely within the suite.rc file. + +- **A** ``lib/python/`` **sub-directory** (optional) + + - For custom job submission modules + (see :ref:`CustomJobSubmissionMethods`) + and local Python modules imported by custom Jinja2 filters, + tests and globals (see :ref:`CustomJinja2Filters`). + +- **Any other sub-directories and files** - documentation, + control files, etc. (optional) + + - Holding everything in one place makes proper suite + revision control possible. + - Portable access to files here, for running tasks, is + provided through ``$CYLC_SUITE_DEF_PATH`` + (see :ref:`TaskExecutionEnvironment`). + - Ignored by cylc, but the entire suite configuration + directory tree is copied when you copy a + suite using cylc commands. + +A typical example: + +.. code-block:: bash + + /path/to/my/suite # suite configuration directory + suite.rc # THE SUITE CONFIGURATION FILE + bin/ # scripts and executables used by tasks + foo.sh + bar.sh + ... + # (OPTIONAL) any other suite-related files, for example: + inc/ # suite.rc include-files + nwp-tasks.rc + globals.rc + ... + doc/ # documentation + control/ # control files + ancil/ # ancillary files + ... + + +.. _SuiteRCFile: + +Suite.rc File Overview +---------------------- + +Suite.rc files are an extended-INI format with section nesting. + +Embedded template processor expressions may also be used in the file, to +programatically generate the final suite configuration seen by +cylc. Currently the `Jinja2 `_ and +`EmPy `_ template processors are +supported; see :ref:`Jinja` and :ref:`EmPylabel` for examples. In the future +cylc may provide a plug-in interface to allow use of other template +engines too. + + +.. _Syntax: + +Syntax +^^^^^^ + +The following defines legal suite.rc syntax: + +- **Items** are of the form ``item = value``. +- **[Section]** headings are enclosed in square brackets. +- **Sub-section [[nesting]]** is defined by repeated square brackets. + + - Sections are **closed** by the next section heading. + +- **Comments** (line and trailing) follow a hash character: ``#`` +- **List values** are comma-separated. +- **Single-line string values** can be single-, double-, or un-quoted. +- **Multi-line string values** are triple-quoted (using + single or double quote characters). +- **Boolean values** are capitalized: True, False. +- **Leading and trailing whitespace** is ignored. +- **Indentation** is optional but should be used for clarity. +- **Continuation lines** follow a trailing backslash: ``\`` +- **Duplicate sections** add their items to those previously + defined under the same section. +- **Duplicate items** override, *except for dependency + ``graph`` strings, which are additive*. +- **Include-files** ``%include inc/foo.rc`` can be + used as a verbatim inlining mechanism. + +Suites that embed templating code (see :ref:`Jinja` and :ref:`EmPylabel`) must +process to raw suite.rc syntax. + + +Include-Files +^^^^^^^^^^^^^ + +Cylc has native support for suite.rc include-files, which may help to +organize large suites. Inclusion boundaries are completely arbitrary - +you can think of include-files as chunks of the suite.rc file simply +cut-and-pasted into another file. Include-files may be included +multiple times in the same file, and even nested. Include-file paths +can be specified portably relative to the suite configuration directory, +e.g.: + +.. code-block:: cylc + + # include the file $CYLC_SUITE_DEF_PATH/inc/foo.rc: + %include inc/foo.rc + + +Editing Temporarily Inlined Suites +"""""""""""""""""""""""""""""""""" + +Cylc's native file inclusion mechanism supports optional inlined +editing: + +.. code-block:: bash + + $ cylc edit --inline SUITE + +The suite will be split back into its constituent include-files when you +exit the edit session. While editing, the inlined file becomes the +official suite configuration so that changes take effect whenever you save +the file. See ``cylc prep edit --help`` for more information. + + +Include-Files via Jinja2 +"""""""""""""""""""""""" + +Jinja2 (:ref:`Jinja`) also has template inclusion functionality. + + +.. _SyntaxHighlighting: + +Syntax Highlighting For Suite Configuration +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Cylc comes with syntax files for a number of text editors: + +.. code-block:: bash + + /etc/syntax/cylc.vim # vim + /etc/syntax/cylc-mode.el # emacs + /etc/syntax/cylc.lang # gedit (and other gtksourceview programs) + /etc/syntax/cylc.xml # kate + +Refer to comments at the top of each file to see how to use them. + + +Gross File Structure +^^^^^^^^^^^^^^^^^^^^ + +Cylc suite.rc files consist of a suite title and description followed by +configuration items grouped under several top level section headings: + +- **[cylc]** - *non task-specific suite configuration* +- **[scheduling]** - *determines when tasks are ready to run* + + - tasks with special behaviour, e.g. clock-trigger tasks + - the dependency graph, which defines the relationships + between tasks + +- **[runtime]** - *determines how, where, and what to + execute when tasks are ready* + + - script, environment, job submission, remote hosting, etc. + - suite-wide defaults in the *root* namespace + - a nested family hierarchy with common properties + inherited by related tasks + +- **[visualization]** - suite graph styling + + +.. _Validation: + +Validation +^^^^^^^^^^ + +Cylc suite.rc files are automatically validated against a specification +that defines all legal entries, values, options, and defaults. This +detects formatting errors, typographic errors, illegal items and illegal +values prior to run time. Some values are complex strings that require +further parsing by cylc to determine their correctness (this is also +done during validation). All legal entries are documented in +(:ref:`SuiteRCReference`). + +The validator reports the line numbers of detected errors. Here's an +example showing a section heading with a missing right bracket: + +.. code-block:: bash + + $ cylc validate my.suite + [[special tasks] + 'Section bracket mismatch, line 19' + +If the suite.rc file uses include-files ``cylc view`` will +show an inlined copy of the suite with correct line numbers +(you can also edit suites in a temporarily inlined state with +``cylc edit --inline``). + +Validation does not check the validity of chosen batch systems. + +.. todo:: + + This is to allow users to extend cylc with their own job submission + methods, which are by definition unknown to the suite.rc spec. + + +.. _ConfiguringScheduling: + +Scheduling - Dependency Graphs +------------------------------ + +The ``[scheduling]`` section of a suite.rc file defines the +relationships between tasks in a suite - the information that allows +cylc to determine when tasks are ready to run. The most important +component of this is the suite dependency graph. Cylc graph notation +makes clear textual graph representations that are very concise because +sections of the graph that repeat at different hours of the day, say, +only have to be defined once. Here's an example with dependencies that +vary depending on the particular cycle point: + +.. code-block:: cylc + + [scheduling] + initial cycle point = 20200401 + final cycle point = 20200405 + [[dependencies]] + [[[T00,T06,T12,T18]]] # validity (hours) + graph = """ + A => B & C # B and C trigger off A + A[-PT6H] => A # Model A restart trigger + """ + [[[T06,T18]]] # hours + graph = "C => X" + +:numref:`fig-dep-eg-1` shows the complete suite.rc listing alongside +the suite graph. This is a complete, valid, runnable suite (it will +use default task runtime properties such as ``script``). + +.. Need to use a 'container' directive to get centered image with + left-aligned caption (as required for code block text). + +.. container:: twocol + + .. container:: image + + .. _fig-dep-eg-1: + + .. figure:: graphics/png/orig/dep-eg-1.png + :align: center + + .. container:: caption + + Example Suite + + .. code-block:: cylc + + [meta] + title = "Dependency Example 1" + [cylc] + UTC mode = True + [scheduling] + initial cycle point = 20200401 + final cycle point = 20200405 + [[dependencies]] + [[[T00,T06,T12,T18]]] # validity (hours) + graph = """ + A => B & C # B and C trigger off A + A[-PT6H] => A # Model A restart trigger + """ + [[[T06,T18]]] # hours + graph = "C => X" + [visualization] + initial cycle point = 20200401 + final cycle point = 20200401T06 + [[node attributes]] + X = "color=red" + + +Graph String Syntax +^^^^^^^^^^^^^^^^^^^ + +Multiline graph strings may contain: + +- **blank lines** +- **arbitrary white space** +- **internal comments**: following the ``#`` character +- **conditional task trigger expressions** - see below. + + +Interpreting Graph Strings +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Suite dependency graphs can be broken down into pairs in which the left +side (which may be a single task or family, or several that are +conditionally related) defines a trigger for the task or family on the +right. For instance the "word graph" *C triggers off B which +triggers off A* can be deconstructed into pairs *C triggers off B* +and *B triggers off A*. In this section we use only the default +trigger type, which is to trigger off the upstream task succeeding; +see :ref:`TriggerTypes` for other available triggers. + +In the case of cycling tasks, the triggers defined by a graph string are +valid for cycle points matching the list of hours specified for the +graph section. For example this graph: + +.. code-block:: cylc + + [scheduling] + [[dependencies]] + [[[T00,T12]]] + graph = "A => B" + +implies that B triggers off A for cycle points in which the hour matches ``00`` +or ``12``. + +To define inter-cycle dependencies, attach an offset indicator to the +left side of a pair: + +.. code-block:: cylc + + [scheduling] + [[dependencies]] + [[[T00,T12]]] + graph = "A[-PT12H] => B" + +This means B[time] triggers off A[time-PT12H] (12 hours before) for cycle +points with hours matching ``00`` or ``12``. ``time`` is implicit because +this keeps graphs clean and concise, given that the +majority of tasks will typically +depend only on others with the same cycle point. Cycle point offsets can only +appear on the left of a pair, because a pairs define triggers for the right +task at cycle point ``time``. However, ``A => B[-PT6H]``, which is +illegal, can be reformulated as a *future trigger* +``A[+PT6H] => B`` (see :ref:`InterCyclePointTriggers`). It is also +possible to combine multiple offsets within a cycle point offset e.g. + +.. code-block:: cylc + + [scheduling] + [[dependencies]] + [[[T00,T12]]] + graph = "A[-P1D-PT12H] => B" + +This means that B[Time] triggers off A[time-P1D-PT12H] (1 day and 12 hours +before). + +Triggers can be chained together. This graph: + +.. code-block:: cylc + + graph = """A => B # B triggers off A + B => C # C triggers off B""" + +is equivalent to this: + +.. code-block:: cylc + + graph = "A => B => C" + +*Each trigger in the graph must be unique* but *the same task +can appear in multiple pairs or chains*. Separately defined triggers +for the same task have an AND relationship. So this: + +.. code-block:: cylc + + graph = """A => X # X triggers off A + B => X # X also triggers off B""" + +is equivalent to this: + +.. code-block:: cylc + + graph = "A & B => X" # X triggers off A AND B + +In summary, the branching tree structure of a dependency graph can +be partitioned into lines (in the suite.rc graph string) of pairs +or chains, in any way you like, with liberal use of internal white space +and comments to make the graph structure as clear as possible. + +.. code-block:: cylc + + # B triggers if A succeeds, then C and D trigger if B succeeds: + graph = "A => B => C & D" + # which is equivalent to this: + graph = """A => B => C + B => D""" + # and to this: + graph = """A => B => D + B => C""" + # and to this: + graph = """A => B + B => C + B => D""" + # and it can even be written like this: + graph = """A => B # blank line follows: + + B => C # comment ... + B => D""" + + +Splitting Up Long Graph Lines +""""""""""""""""""""""""""""" + +It is not necessary to use the general line continuation marker +``\`` to split long graph lines. Just break at dependency arrows, +or split long chains into smaller ones. This graph: + +.. code-block:: cylc + + graph = "A => B => C" + +is equivalent to this: + +.. code-block:: cylc + + graph = """A => B => + C""" + +and also to this: + +.. code-block:: cylc + + graph = """A => B + B => C""" + + +.. _GraphTypes: + +Graph Types +^^^^^^^^^^^ + +A suite configuration can contain multiple graph strings that are combined +to generate the final graph. + + +One-off (Non-Cycling) +""""""""""""""""""""" + +:numref:`fig-test1` shows a small suite of one-off non-cycling +tasks; these all share a single cycle point (``1``) and don't spawn +successors (once they're all finished the suite just exits). The integer +``1`` attached to each graph node is just an arbitrary label here. + +.. Need to use a 'container' directive to get centered image with + left-aligned caption (as required for code block text). + +.. container:: twocol + + .. container:: image + + .. _fig-test1: + + .. figure:: graphics/png/orig/test1.png + :align: center + + .. container:: caption + + One-off (Non-Cycling) Tasks. + + .. code-block:: cylc + + [meta] + title = some one-off tasks + [scheduling] + [[dependencies]] + graph = "foo => bar & baz => qux" + + +Cycling Graphs +"""""""""""""" + +For cycling tasks the graph section heading defines a sequence of cycle points +for which the subsequent graph section is valid. :numref:`fig-test2` +shows a small suite of cycling tasks. + +.. Need to use a 'container' directive to get centered image with + left-aligned caption (as required for code block text). + +.. container:: twocol + + .. container:: image + + .. _fig-test2: + + .. figure:: graphics/png/orig/test2.png + :align: center + + .. container:: caption + + Cycling Tasks. + + .. code-block:: cylc + + [meta] + title = some cycling tasks + # (no dependence between cycle points) + [scheduling] + [[dependencies]] + [[[T00,T12]]] + graph = "foo => bar & baz => qux" + + +Graph Section Headings +^^^^^^^^^^^^^^^^^^^^^^ + +Graph section headings define recurrence expressions, the graph within a graph +section heading defines a workflow at each point of the recurrence. For +example in the following scenario: + +.. code-block:: cylc + + [scheduling] + [[dependencies]] + [[[ T06 ]]] # A graph section heading + graph = foo => bar + +``T06`` means "Run every day starting at 06:00 after the +initial cycle point". Cylc allows you to start (or end) at any particular +time, repeat at whatever frequency you like, and even optionally limit the +number of repetitions. + +Graph section heading can also be used with integer cycling see +:ref:`IntegerCycling`. + + +Syntax Rules +"""""""""""" + +Date-time cycling information is made up of a starting *date-time*, an +*interval*, and an optional *limit*. + +The time is assumed to be in the local time zone unless you set +``[cylc]cycle point time zone`` or ``[cylc]UTC mode``. The +calendar is assumed to be the proleptic Gregorian calendar unless you set +``[scheduling]cycling mode``. + +The syntax for representations is based on the ISO 8601 date-time standard. +This includes the representation of *date-time*, *interval*. What we +define for cylc's cycling syntax is our own optionally-heavily-condensed form +of ISO 8601 recurrence syntax. The most common full form is: +``R[limit?]/[date-time]/[interval]``. However, we allow omitting +information that can be guessed from the context (rules below). This means +that it can be written as: + +.. code-block:: none + + R[limit?]/[date-time] + R[limit?]//[interval] + [date-time]/[interval] + R[limit?] # Special limit of 1 case + [date-time] + [interval] + +with example graph headings for each form being: + +.. code-block:: cylc + + [[[ R5/T00 ]]] # Run 5 times at 00:00 every day + [[[ R//PT1H ]]] # Run every hour (Note the R// is redundant) + [[[ 20000101T00Z/P1D ]]] # Run every day starting at 00:00 1st Jan 2000 + [[[ R1 ]]] # Run once at the initial cycle point + [[[ R1/20000101T00Z ]]] # Run once at 00:00 1st Jan 2000 + [[[ P1Y ]]] # Run every year + +.. note:: + + ``T00`` is an example of ``[date-time]``, with an + inferred 1 day period and no limit. + +Where some or all *date-time* information is omitted, it is inferred to +be relative to the initial date-time cycle point. For example, ``T00`` +by itself would mean the next occurrence of midnight that follows, or is, the +initial cycle point. Entering ``+PT6H`` would mean 6 hours after the +initial cycle point. Entering ``-P1D`` would mean 1 day before the +initial cycle point. Entering no information for the *date-time* implies +the initial cycle point date-time itself. + +Where the *interval* is omitted and some (but not all) *date-time* +information is omitted, it is inferred to be a single unit above +the largest given specific *date-time* unit. For example, the largest +given specific unit in ``T00`` is hours, so the inferred interval is +1 day (daily), ``P1D``. + +Where the *limit* is omitted, unlimited cycling is assumed. This will be +bounded by the final cycle point's date-time if given. + +Another supported form of ISO 8601 recurrence is: +``R[limit?]/[interval]/[date-time]``. This form uses the +*date-time* as the end of the cycling sequence rather than the start. +For example, ``R3/P5D/20140430T06`` means: + +.. code-block:: none + + 20140420T06 + 20140425T06 + 20140430T06 + +This kind of form can be used for specifying special behaviour near the end of +the suite, at the final cycle point's date-time. We can also represent this in +cylc with a collapsed form: + +.. code-block:: none + + R[limit?]/[interval] + R[limit?]//[date-time] + [interval]/[date-time] + +So, for example, you can write: + +.. code-block:: cylc + + [[[ R1//+P0D ]]] # Run once at the final cycle point + [[[ R5/P1D ]]] # Run 5 times, every 1 day, ending at the final + # cycle point + [[[ P2W/T00 ]]] # Run every 2 weeks ending at 00:00 following + # the final cycle point + [[[ R//T00 ]]] # Run every 1 day ending at 00:00 following the + # final cycle point + + +.. _referencing-the-initial-and-final-cycle-points: + +Referencing The Initial And Final Cycle Points +"""""""""""""""""""""""""""""""""""""""""""""" + +For convenience the caret and dollar symbols may be used as shorthand for the +initial and final cycle points. Using this shorthand you can write: + +.. code-block:: cylc + + [[[ R1/^+PT12H ]]] # Repeat once 12 hours after the initial cycle point + # R[limit]/[date-time] + # Equivalent to [[[ R1/+PT12H ]]] + [[[ R1/$ ]]] # Repeat once at the final cycle point + # R[limit]/[date-time] + # Equivalent to [[[ R1//+P0D ]]] + [[[ $-P2D/PT3H ]]] # Repeat 3 hourly starting two days before the + # [date-time]/[interval] + # final cycle point + +.. note:: + + There can be multiple ways to write the same headings, for instance + the following all run once at the final cycle point: + + .. code-block:: cylc + + [[[ R1/P0Y ]]] # R[limit]/[interval] + [[[ R1/P0Y/$ ]]] # R[limit]/[interval]/[date-time] + [[[ R1/$ ]]] # R[limit]/[date-time] + + +.. _excluding-dates: + +Excluding Dates +""""""""""""""" + +Date-times can be excluded from a recurrence by an exclamation mark for +example ``[[[ PT1D!20000101 ]]]`` means run daily except on the +first of January 2000. + +This syntax can be used to exclude one or multiple date-times from a +recurrence. Multiple date-times are excluded using the syntax +``[[[ PT1D!(20000101,20000102,...) ]]]``. All date-times listed within +the parentheses after the exclamation mark will be excluded. + +.. note:: + + The ``^`` and ``$`` symbols (shorthand for the initial + and final cycle points) are both date-times so ``[[[ T12!$-PT1D ]]]`` + is valid. + +If using a run limit in combination with an exclusion, the heading might not +run the number of times specified in the limit. For example in the following +suite ``foo`` will only run once as its second run has been excluded. + +.. code-block:: cylc + + [scheduling] + initial cycle point = 20000101T00Z + final cycle point = 20000105T00Z + [[dependencies]] + [[[ R2/P1D!20000102 ]]] + graph = foo + + +Advanced exclusion syntax +""""""""""""""""""""""""" + +In addition to excluding isolated date-time points or lists of date-time points +from recurrences, exclusions themselves may be date-time recurrence sequences. +Any partial date-time or sequence given after the exclamation mark will be +excluded from the main sequence. + +For example, partial date-times can be excluded using the syntax: + +.. code-block:: cylc + + [[[ PT1H ! T12 ]]] # Run hourly but not at 12:00 from the initial + # cycle point. + [[[ T-00 ! (T00, T06, T12, T18) ]]] # Run hourly but not at 00:00, 06:00, + # 12:00, 18:00. + [[[ PT5M ! T-15 ]]] # Run 5-minutely but not at 15 minutes past the + # hour from the initial cycle point. + [[[ T00 ! W-1T00 ]]] # Run daily at 00:00 except on Mondays. + +It is also valid to use sequences for exclusions. For example: + +.. code-block:: cylc + + [[[ PT1H ! PT6H ]]] # Run hourly from the initial cycle point but + # not 6-hourly from the initial cycle point. + [[[ T-00 ! PT6H ]]] # Run hourly on the hour but not 6-hourly + # on the hour. + # Same as [[[ T-00 ! T-00/PT6H ]]] (T-00 context is implied) + # Same as [[[ T-00 ! (T00, T06, T12, T18) ]]] + # Same as [[[ PT1H ! (T00, T06, T12, T18) ]]] Initial cycle point dependent + + [[[ T12 ! T12/P15D ]]] # Run daily at 12:00 except every 15th day. + + [[[ R/^/P1H ! R5/20000101T00/P1D ]]] # Any valid recurrence may be used to + # determine exclusions. This example + # translates to: Repeat every hour from + # the initial cycle point, but exclude + # 00:00 for 5 days from the 1st January + # 2000. + +You can combine exclusion sequences and single point exclusions within a +comma separated list enclosed in parentheses: + +.. code-block:: cylc + + [[[ T-00 ! (20000101T07, PT2H) ]]] # Run hourly on the hour but not at 07:00 + # on the 1st Jan, 2000 and not 2-hourly + # on the hour. + + +.. _HowMultipleGraphStringsCombine: + +How Multiple Graph Strings Combine +"""""""""""""""""""""""""""""""""" + +For a cycling graph with multiple validity sections for different +hours of the day, the different sections *add* to generate the +complete graph. Different graph sections can overlap (i.e. the same +hours may appear in multiple section headings) and the same tasks may +appear in multiple sections, but individual dependencies should be +unique across the entire graph. For example, the following graph defines +a duplicate prerequisite for task C: + +.. code-block:: cylc + + [scheduling] + [[dependencies]] + [[[T00,T06,T12,T18]]] + graph = "A => B => C" + [[[T06,T18]]] + graph = "B => C => X" + # duplicate prerequisite: B => C already defined at T06, T18 + +This does not affect scheduling, but for the sake of clarity and brevity +the graph should be written like this: + +.. code-block:: cylc + + [scheduling] + [[dependencies]] + [[[T00,T06,T12,T18]]] + graph = "A => B => C" + [[[T06,T18]]] + # X triggers off C only at 6 and 18 hours + graph = "C => X" + + +.. _AdvancedCycling: + +Advanced Examples +""""""""""""""""" + +The following examples show the various ways of writing graph headings in cylc. + +.. code-block:: cylc + + [[[ R1 ]]] # Run once at the initial cycle point + [[[ P1D ]]] # Run every day starting at the initial cycle point + [[[ PT5M ]]] # Run every 5 minutes starting at the initial cycle + # point + [[[ T00/P2W ]]] # Run every 2 weeks starting at 00:00 after the + # initial cycle point + [[[ +P5D/P1M ]]] # Run every month, starting 5 days after the initial + # cycle point + [[[ R1/T06 ]]] # Run once at 06:00 after the initial cycle point + [[[ R1/P0Y ]]] # Run once at the final cycle point + [[[ R1/$ ]]] # Run once at the final cycle point (alternative + # form) + [[[ R1/$-P3D ]]] # Run once three days before the final cycle point + [[[ R3/T0830 ]]] # Run 3 times, every day at 08:30 after the initial + # cycle point + [[[ R3/01T00 ]]] # Run 3 times, every month at 00:00 on the first + # of the month after the initial cycle point + [[[ R5/W-1/P1M ]]] # Run 5 times, every month starting on Monday + # following the initial cycle point + [[[ T00!^ ]]] # Run at the first occurrence of T00 that isn't the + # initial cycle point + [[[ PT1D!20000101 ]]] # Run every day days excluding 1st Jan 2000 + [[[ 20140201T06/P1D ]]] # Run every day starting at 20140201T06 + [[[ R1/min(T00,T06,T12,T18) ]]] # Run once at the first instance + # of either T00, T06, T12 or T18 + # starting at the initial cycle + # point + + +.. _AdvancedStartingUp: + +Advanced Starting Up +"""""""""""""""""""" + +Dependencies that are only valid at the initial cycle point can be written +using the ``R1`` notation (e.g. as in :ref:`initial-non-repeating-r1-tasks`. +For example: + +.. code-block:: cylc + + [cylc] + UTC mode = True + [scheduling] + initial cycle point = 20130808T00 + final cycle point = 20130812T00 + [[dependencies]] + [[[R1]]] + graph = "prep => foo" + [[[T00]]] + graph = "foo[-P1D] => foo => bar" + +In the example above, ``R1`` implies ``R1/20130808T00``, so +``prep`` only runs once at that cycle point (the initial cycle point). +At that cycle point, ``foo`` will have a dependence on +``prep`` - but not at subsequent cycle points. + +However, it is possible to have a suite that has multiple effective initial +cycles - for example, one starting at ``T00`` and another starting +at ``T12``. What if they need to share an initial task? + +Let's suppose that we add the following section to the suite example above: + +.. code-block:: cylc + + [cylc] + UTC mode = True + [scheduling] + initial cycle point = 20130808T00 + final cycle point = 20130812T00 + [[dependencies]] + [[[R1]]] + graph = "prep => foo" + [[[T00]]] + graph = "foo[-P1D] => foo => bar" + [[[T12]]] + graph = "baz[-P1D] => baz => qux" + +We'll also say that there should be a starting dependence between +``prep`` and our new task ``baz`` - but we still want to have +a single ``prep`` task, at a single cycle. + +We can write this using a special case of the ``task[-interval]`` syntax - +if the interval is null, this implies the task at the initial cycle point. + +For example, we can write our suite like :numref:`fig-test4`. + +.. Need to use a 'container' directive to get centered image with + left-aligned caption (as required for code block text). + +.. container:: twocol + + .. container:: image + + .. _fig-test4: + + .. figure:: graphics/png/orig/test4.png + :align: center + + .. container:: caption + + Staggered Start Suite + + .. code-block:: cylc + + [cylc] + UTC mode = True + [scheduling] + initial cycle point = 20130808T00 + final cycle point = 20130812T00 + [[dependencies]] + [[[R1]]] + graph = "prep" + [[[R1/T00]]] + # ^ implies the initial cycle point: + graph = "prep[^] => foo" + [[[R1/T12]]] + # ^ is initial cycle point, as above: + graph = "prep[^] => baz" + [[[T00]]] + graph = "foo[-P1D] => foo => bar" + [[[T12]]] + graph = "baz[-P1D] => baz => qux" + [visualization] + initial cycle point = 20130808T00 + final cycle point = 20130810T00 + [[node attributes]] + foo = "color=red" + bar = "color=orange" + baz = "color=green" + qux = "color=blue" + + +This neatly expresses what we want - a task running at the initial cycle point +that has one-off dependencies with other task sets at different cycles. + +.. Need to use a 'container' directive to get centered image with + left-aligned caption (as required for code block text). + +.. container:: twocol + + .. container:: image + + .. _fig-test5: + + .. figure:: graphics/png/orig/test5.png + :align: center + + .. container:: caption + + Restricted First Cycle Point Suite + + .. code-block:: cylc + + [cylc] + UTC mode = True + [scheduling] + initial cycle point = 20130808T00 + final cycle point = 20130808T18 + [[dependencies]] + [[[R1]]] + graph = "setup_foo => foo" + [[[+PT6H/PT6H]]] + graph = """ + foo[-PT6H] => foo + foo => bar + """ + [visualization] + initial cycle point = 20130808T00 + final cycle point = 20130808T18 + [[node attributes]] + foo = "color=red" + bar = "color=orange" + + +A different kind of requirement is displayed in :numref:`fig-test5`. +Usually, we want to specify additional tasks and dependencies at the initial +cycle point. What if we want our first cycle point to be entirely special, +with some tasks missing compared to subsequent cycle points? + +In :numref:`fig-test5`, ``bar`` will not be run at the initial +cycle point, but will still run at subsequent cycle points. +``[[[+PT6H/PT6H]]]`` means start at ``+PT6H`` (6 hours after +the initial cycle point) and then repeat every ``PT6H`` (6 hours). + +Some suites may have staggered start-up sequences where different tasks need +running once but only at specific cycle points, potentially due to differing +data sources at different cycle points with different possible initial cycle +points. To allow this cylc provides a ``min( )`` function that can be +used as follows: + +.. code-block:: cylc + + [cylc] + UTC mode = True + [scheduling] + initial cycle point = 20100101T03 + [[dependencies]] + [[[R1/min(T00,T12)]]] + graph = "prep1 => foo" + [[[R1/min(T06,T18)]]] + graph = "prep2 => foo" + [[[T00,T06,T12,T18]]] + graph = "foo => bar" + + +In this example the initial cycle point is ``20100101T03``, so the +``prep1`` task will run once at ``20100101T12`` and the +``prep2`` task will run once at ``20100101T06`` as these are +the first cycle points after the initial cycle point in the respective +``min( )`` entries. + + +.. _IntegerCycling: + +Integer Cycling +""""""""""""""" + +In addition to non-repeating and date-time cycling workflows, cylc can do +integer cycling for repeating workflows that are not date-time based. + +To construct an integer cycling suite, set +``[scheduling]cycling mode = integer``, and specify integer values for +the initial and (optional) final cycle points. The notation for intervals, +offsets, and recurrences (sequences) is similar to the date-time cycling +notation, except for the simple integer values. + +The full integer recurrence expressions supported are: + +- ``Rn/start-point/interval # e.g. R3/1/P2`` +- ``Rn/interval/end-point # e.g. R3/P2/9`` + +But, as for date-time cycling, sequence start and end points can be omitted +where suite initial and final cycle points can be assumed. Some examples: + +.. code-block:: cylc + + [[[ R1 ]]] # Run once at the initial cycle point + # (short for R1/initial-point/?) + [[[ P1 ]]] # Repeat with step 1 from the initial cycle point + # (short for R/initial-point/P1) + [[[ P5 ]]] # Repeat with step 5 from the initial cycle point + # (short for R/initial-point/P5) + [[[ R2//P2 ]]] # Run twice with step 3 from the initial cycle point + # (short for R2/initial-point/P2) + [[[ R/+P1/P2 ]]] # Repeat with step 2, from 1 after the initial cycle point + [[[ R2/P2 ]]] # Run twice with step 2, to the final cycle point + # (short for R2/P2/final-point) + [[[ R1/P0 ]]] # Run once at the final cycle point + # (short for R1/P0/final-point) + + +Example +''''''' + +The tutorial illustrates integer cycling in :ref:`TutInteger`, and +``/etc/examples/satellite/`` is a +self-contained example of a realistic use for integer cycling. It simulates +the processing of incoming satellite data: each new dataset arrives after a +random (as far as the suite is concerned) interval, and is labeled by an +arbitrary (as far as the suite is concerned) ID in the filename. A task called +``get_data`` at the top of the repeating workflow waits on the next +dataset and, when it finds one, moves it to a cycle-point-specific shared +workspace for processing by the downstream tasks. When ``get_data.1`` +finishes, ``get_data.2`` triggers and begins waiting for the next +dataset at the same time as the downstream tasks in cycle point 1 are +processing the first one, and so on. In this way multiple datasets can be +processed at once if they happen to come in quickly. A single shutdown task +runs at the end of the final cycle to collate results. The suite graph is +shown in :numref:`fig-satellite`. + +.. _fig-satellite: + +.. figure:: graphics/png/orig/satellite.png + :align: center + + The ``etc/examples/satellite`` integer suite. + + +Advanced Integer Cycling Syntax +''''''''''''''''''''''''''''''' + +The same syntax used to reference the initial and final cycle points +(introduced in :ref:`referencing-the-initial-and-final-cycle-points`) for +use with date-time cycling can also be used for integer cycling. For +example you can write: + +.. code-block:: cylc + + [[[ R1/^ ]]] # Run once at the initial cycle point + [[[ R1/$ ]]] # Run once at the final cycle point + [[[ R3/^/P2 ]]] # Run three times with step two starting at the + # initial cycle point + +Likewise the syntax introduced in :ref:`excluding-dates` for excluding +a particular point from a recurrence also works for integer cycling. For +example: + +.. code-block:: cylc + + [[[ R/P4!8 ]]] # Run with step 4, to the final cycle point + # but not at point 8 + [[[ R3/3/P2!5 ]]] # Run with step 2 from point 3 but not at + # point 5 + [[[ R/+P1/P6!14 ]]] # Run with step 6 from 1 step after the + # initial cycle point but not at point 14 + +Multiple integer exclusions are also valid in the same way as the syntax +in :ref:`excluding-dates`. Integer exclusions may be a list of single +integer points, an integer sequence, or a combination of both: + +.. code-block:: cylc + + [[[ R/P1!(2,3,7) ]]] # Run with step 1 to the final cycle point, + # but not at points 2, 3, or 7. + [[[ P1 ! P2 ]]] # Run with step 1 from the initial to final + # cycle point, skipping every other step from + # the initial cycle point. + [[[ P1 ! +P1/P2 ]]] # Run with step 1 from the initial cycle point, + # excluding every other step beginning one step + # after the initial cycle point. + [[[ P1 !(P2,6,8) ]]] # Run with step 1 from the initial cycle point, + # excluding every other step, and also excluding + # steps 6 and 8. + + +.. _TriggerTypes: + +Task Triggering +^^^^^^^^^^^^^^^ + +A task is said to "trigger" when it submits its job to run, as soon as all of +its dependencies (also known as its separate "triggers") are met. Tasks can +be made to trigger off of the state of other tasks (indicated by a +``:state`` qualifier on the upstream task (or family) +name in the graph) and, and off the clock, and arbitrary external events. + +External triggering is relatively more complicated, and is documented +separately in :ref:`External Triggers`. + + +Success Triggers +"""""""""""""""" + +The default, with no trigger type specified, is to trigger off the +upstream task succeeding: + +.. code-block:: cylc + + # B triggers if A SUCCEEDS: + graph = "A => B" + +For consistency and completeness, however, the success trigger can be +explicit: + +.. code-block:: cylc + + # B triggers if A SUCCEEDS: + graph = "A => B" + # or: + graph = "A:succeed => B" + + +Failure Triggers +"""""""""""""""" + +To trigger off the upstream task reporting failure: + +.. code-block:: cylc + + # B triggers if A FAILS: + graph = "A:fail => B" + +*Suicide triggers* can be used to remove task ``B`` here if +``A`` does not fail, see :ref:`SuicideTriggers`. + + +Start Triggers +"""""""""""""" + +To trigger off the upstream task starting to execute: + +.. code-block:: cylc + + # B triggers if A STARTS EXECUTING: + graph = "A:start => B" + +This can be used to trigger tasks that monitor other tasks once they +(the target tasks) start executing. Consider a long-running forecast model, +for instance, that generates a sequence of output files as it runs. A +postprocessing task could be launched with a start trigger on the model +(``model:start => post``) to process the model output as it +becomes available. Note, however, that there are several alternative +ways of handling this scenario: both tasks could be triggered at the +same time (``foo => model & post``), but depending on +external queue delays this could result in the monitoring task starting +to execute first; or a different postprocessing task could be +triggered off a message output for each data file +(``model:out1 => post1`` etc.; see :ref:`MessageTriggers`), but this +may not be practical if the +number of output files is large or if it is difficult to add cylc +messaging calls to the model. + + +Finish Triggers +""""""""""""""" + +To trigger off the upstream task succeeding or failing, i.e. finishing +one way or the other: + +.. code-block:: cylc + + # B triggers if A either SUCCEEDS or FAILS: + graph = "A | A:fail => B" + # or + graph = "A:finish => B" + + +.. _MessageTriggers: + +Message Triggers +"""""""""""""""" + +Tasks can also trigger off custom output messages. These must be registered in +the ``[runtime]`` section of the emitting task, and reported using the +``cylc message`` command in task scripting. The graph trigger notation +refers to the item name of the registered output message. +The example suite ``/etc/examples/message-triggers`` illustrates +message triggering. + +.. literalinclude:: ../../etc/examples/message-triggers/suite.rc + :language: cylc + + +Job Submission Triggers +""""""""""""""""""""""" + +It is also possible to trigger off a task submitting, or failing to submit: + +.. code-block:: cylc + + # B triggers if A submits successfully: + graph = "A:submit => B" + # D triggers if C fails to submit successfully: + graph = "C:submit-fail => D" + +A possible use case for submit-fail triggers: if a task goes into the +submit-failed state, possibly after several job submission retries, +another task that inherits the same runtime but sets a different job +submission method and/or host could be triggered to, in effect, run the +same job on a different platform. + + +Conditional Triggers +"""""""""""""""""""" + +AND operators (``&``) can appear on both sides of an arrow. They +provide a concise alternative to defining multiple triggers separately: + +.. code-block:: cylc + + # 1/ this: + graph = "A & B => C" + # is equivalent to: + graph = """A => C + B => C""" + # 2/ this: + graph = "A => B & C" + # is equivalent to: + graph = """A => B + A => C""" + # 3/ and this: + graph = "A & B => C & D" + # is equivalent to this: + graph = """A => C + B => C + A => D + B => D""" + +OR operators (``|``) which result in true conditional triggers, +can only appear on the left [1]_ : + +.. code-block:: cylc + + # C triggers when either A or B finishes: + graph = "A | B => C" + +Forecasting suites typically have simple conditional +triggering requirements, but any valid conditional expression can be +used, as shown in :numref:`fig-conditional` +(conditional triggers are plotted with open arrow heads). + +.. Need to use a 'container' directive to get centered image with + left-aligned caption (as required for code block text). + +.. container:: twocol + + .. container:: image + + .. _fig-conditional: + + .. figure:: graphics/png/orig/conditional-triggers.png + :align: center + + .. container:: caption + + Conditional triggers, which are plotted with open arrow heads. + + .. code-block:: cylc + + graph = """ + # D triggers if A or (B and C) succeed + A | B & C => D + # just to align the two graph sections + D => W + # Z triggers if (W or X) and Y succeed + (W|X) & Y => Z + """ + + +.. _SuicideTriggers: + +Suicide Triggers +"""""""""""""""" + +Suicide triggers take tasks out of the suite. This can be used for +automated failure recovery. The suite.rc listing and accompanying +graph in :numref:`fig-suicide` show how to define a chain of failure +recovery tasks that trigger if they're needed but +otherwise remove themselves from the +suite (you can run the *AutoRecover.async* example suite to see how +this works). The dashed graph edges ending in solid dots indicate +suicide triggers, and the open arrowheads indicate conditional triggers +as usual. Suicide triggers are ignored by default in the graph view, unless +you toggle them on with *View* ``->`` *Options* ``->`` +*Ignore Suicide Triggers*. + +.. Need to use a 'container' directive to get centered image with + left-aligned caption (as required for code block text). + +.. container:: twocol + + .. container:: image + + .. _fig-suicide: + + .. figure:: graphics/png/orig/suicide.png + :align: center + + .. container:: caption + + Automated failure recovery via suicide triggers. + + .. code-block:: cylc + + [meta] + title = automated failure recovery + description = """ + Model task failure triggers diagnosis + and recovery tasks, which take themselves + out of the suite if model succeeds. Model + post processing triggers off model OR + recovery tasks. + """ + [scheduling] + [[dependencies]] + graph = """ + pre => model + model:fail => diagnose => recover + model => !diagnose & !recover + model | recover => post + """ + [runtime] + [[model]] + # UNCOMMENT TO TEST FAILURE: + # script = /bin/false + +.. note:: + + Multiple suicide triggers combine in the same way as other + triggers, so this: + + .. code-block:: cylc + + foo => !baz + bar => !baz + + is equivalent to this: + + .. code-block:: cylc + + foo & bar => !baz + + i.e. both ``foo`` and ``bar`` must succeed for + ``baz`` to be taken out of the suite. If you really want a task + to be taken out if any one of several events occurs then be careful to + write it that way: + + .. code-block:: cylc + + foo | bar => !baz + +.. warning:: + + A word of warning on the meaning of "bare suicide triggers". Consider + the following suite: + + .. code-block:: cylc + + [scheduling] + [[dependencies]] + graph = "foo => !bar" + + Task ``bar`` has a suicide trigger but no normal prerequisites + (a suicide trigger is not a task triggering prerequisite, it is a task + removal prerequisite) so this is entirely equivalent to: + + .. code-block:: cylc + + [scheduling] + [[dependencies]] + graph = """ + foo & bar + foo => !bar + """ + + In other words both tasks will trigger immediately, at the same time, + and then ``bar`` will be removed if ``foo`` succeeds. + +If an active task proxy (currently in the submitted or running states) +is removed from the suite by a suicide trigger, a warning will be logged. + + +.. _FamilyTriggers: + +Family Triggers +""""""""""""""" + +Families defined by the namespace inheritance hierarchy +(:ref:`NIORP`) can be used in the graph trigger whole groups of +tasks at the same time (e.g. forecast model ensembles and groups of +tasks for processing different observation types at the same time) and +for triggering downstream tasks off families as a whole. Higher level +families, i.e. families of families, can also be used, and are reduced +to the lowest level member tasks. + +.. note:: + + Tasks can also trigger off individual family members if necessary. + +To trigger an entire task family at once: + +.. code-block:: cylc + + [scheduling] + [[dependencies]] + graph = "foo => FAM" + [runtime] + [[FAM]] # a family (because others inherit from it) + [[m1,m2]] # family members (inherit from namespace FAM) + inherit = FAM + +This is equivalent to: + +.. code-block:: cylc + + [scheduling] + [[dependencies]] + graph = "foo => m1 & m2" + [runtime] + [[FAM]] + [[m1,m2]] + inherit = FAM + +To trigger other tasks off families we have to specify whether +to triggering off *all members* starting, succeeding, failing, +or finishing, or off *any* members (doing the same). Legal family +triggers are thus: + +.. code-block:: cylc + + [scheduling] + [[dependencies]] + graph = """ + # all-member triggers: + FAM:start-all => one + FAM:succeed-all => one + FAM:fail-all => one + FAM:finish-all => one + # any-member triggers: + FAM:start-any => one + FAM:succeed-any => one + FAM:fail-any => one + FAM:finish-any => one + """ + +Here's how to trigger downstream processing after if one or more family +members succeed, but only after all members have finished (succeeded or +failed): + +.. code-block:: cylc + + [scheduling] + [[dependencies]] + graph = """ + FAM:finish-all & FAM:succeed-any => foo + """ + + +.. _EfficientInterFamilyTriggering: + +Efficient Inter-Family Triggering +""""""""""""""""""""""""""""""""" + +While cylc allows writing dependencies between two families it is important to +consider the number of dependencies this will generate. In the following +example, each member of ``FAM2`` has dependencies pointing at all the +members of ``FAM1``. + +.. code-block:: cylc + + [scheduling] + [[dependencies]] + graph = """ + FAM1:succeed-any => FAM2 + """ + +Expanding this out, you generate ``N * M`` dependencies, where +``N`` is the number of members of ``FAM1`` and ``M`` is +the number of members of ``FAM2``. This can result in high memory use +as the number of members of these families grows, potentially rendering the +suite impractical for running on some systems. + +You can greatly reduce the number of dependencies generated in these situations +by putting dummy tasks in the graphing to represent the state of the family you +want to trigger off. For example, if ``FAM2`` should trigger off any +member of ``FAM1`` succeeding you can create a dummy task +``FAM1_succeed_any_marker`` and place a dependency on it as follows: + +.. code-block:: cylc + + [scheduling] + [[dependencies]] + graph = """ + FAM1:succeed-any => FAM1_succeed_any_marker => FAM2 + """ + [runtime] + # ... + [[FAM1_succeed_any_marker]] + script = true + # ... + +This graph generates only ``N + M`` dependencies, which takes +significantly less memory and CPU to store and evaluate. + + +.. _InterCyclePointTriggers: + +Inter-Cycle Triggers +"""""""""""""""""""" + +Typically most tasks in a suite will trigger off others in the same +cycle point, but some may depend on others with other cycle points. +This notably applies to warm-cycled forecast models, which depend on +their own previous instances (see below); but other kinds of inter-cycle +dependence are possible too [2]_ . Here's how to express this +kind of relationship in cylc: + +.. code-block:: cylc + + [dependencies] + [[PT6H]] + # B triggers off A in the previous cycle point + graph = "A[-PT6H] => B" + +inter-cycle and trigger type (or message trigger) notation can be +combined: + +.. code-block:: cylc + + # B triggers if A in the previous cycle point fails: + graph = "A[-PT6H]:fail => B" + +At suite start-up inter-cycle triggers refer to a previous cycle point +that does not exist. This does not cause the dependent task to wait +indefinitely, however, because cylc ignores triggers that reach back +beyond the initial cycle point. That said, the presence of an +inter-cycle trigger does normally imply that something special has to +happen at start-up. If a model depends on its own previous instance for +restart files, for instance, then an initial set of restart files has to be +generated somehow or the first model task will presumably fail with +missing input files. There are several ways to handle this in cylc +using different kinds of one-off (non-cycling) tasks that run at suite +start-up. They are illustrated in :ref:`TutInterCyclePointTriggers`; to +summarize here briefly: + +- ``R1`` tasks (recommended): + + .. code-block:: cylc + + [scheduling] + [[dependencies]] + [[[R1]]] + graph = "prep" + [[[R1/T00,R1/T12]]] + graph = "prep[^] => foo" + [[[T00,T12]]] + graph = "foo[-PT12H] => foo => bar" + +``R1``, or ``R1/date-time`` tasks are the recommended way to +specify unusual start up conditions. They allow you to specify a clean +distinction between the dependencies of initial cycles and the dependencies +of the subsequent cycles. + +Initial tasks can be used for real model cold-start processes, whereby a +warm-cycled model at any given cycle point can in principle have its inputs +satisfied by a previous instance of itself, *or* by an initial task with +(nominally) the same cycle point. + +In effect, the ``R1`` task masquerades as the previous-cycle-point trigger +of its associated cycling task. At suite start-up initial tasks will +trigger the first cycling tasks, and thereafter the inter-cycle trigger +will take effect. + +If a task has a dependency on another task in a different cycle point, the +dependency can be written using the ``[offset]`` syntax such as +``[-PT12H]`` in ``foo[-PT12H] => foo``. This means that +``foo`` at the current cycle point depends on a previous instance of +``foo`` at 12 hours before the current cycle point. Unlike the +cycling section headings (e.g. ``[[[T00,T12]]]``), dependencies +assume that relative times are relative to the current cycle point, not the +initial cycle point. + +However, it can be useful to have specific dependencies on tasks at or near +the initial cycle point. You can switch the context of the offset to be +the initial cycle point by using the caret symbol: ``^``. + +For example, you can write ``foo[^]`` to mean foo at the initial +cycle point, and ``foo[^+PT6H]`` to mean foo 6 hours after the initial +cycle point. Usually, this kind of dependency will only apply in a limited +number of cycle points near the start of the suite, so you may want to write +it in ``R1``-based cycling sections. Here's the example inter-cycle +``R1`` suite from above again. + +.. code-block:: cylc + + [scheduling] + [[dependencies]] + [[[R1]]] + graph = "prep" + [[[R1/T00,R1/T12]]] + graph = "prep[^] => foo" + [[[T00,T12]]] + graph = "foo[-PT12H] => foo => bar" + +You can see there is a dependence on the initial ``R1`` task +``prep`` for ``foo`` at the first ``T00`` cycle point, +and at the first ``T12`` cycle point. Thereafter, ``foo`` just +depends on its previous (12 hours ago) instance. + +Finally, it is also possible to have a dependency on a task at a specific cycle +point. + +.. code-block:: cylc + + [scheduling] + [[dependencies]] + [[[R1/20200202]]] + graph = "baz[20200101] => qux" + +However, in a long running suite, a repeating cycle should avoid having a +dependency on a task with a specific cycle point (including the initial cycle +point) - as it can currently cause performance issue. In the following example, +all instances of ``qux`` will depend on ``baz.20200101``, which +will never be removed from the task pool: + +.. code-block:: cylc + + [scheduling] + initial cycle point = 2010 + [[dependencies]] + # Can cause performance issue! + [[[P1D]]] + graph = "baz[20200101] => qux" + + +.. _SequentialTasks: + +Special Sequential Tasks +"""""""""""""""""""""""" + +Tasks that depend on their own previous-cycle instance can be declared as +*sequential*: + +.. code-block:: cylc + + [scheduling] + [[special tasks]] + # foo depends on its previous instance: + sequential = foo # deprecated - see below! + [[dependencies]] + [[[T00,T12]]] + graph = "foo => bar" + +*The sequential declaration is deprecated* however, in favor of explicit +inter-cycle triggers which clearly expose the same scheduling behaviour in the +graph: + +.. code-block:: cylc + + [scheduling] + [[dependencies]] + [[[T00,T12]]] + # foo depends on its previous instance: + graph = "foo[-PT12H] => foo => bar" + +The sequential declaration is arguably convenient in one unusual situation +though: if a task has a non-uniform cycling sequence then multiple explicit +triggers, + +.. code-block:: cylc + + [scheduling] + [[dependencies]] + [[[T00,T03,T11]]] + graph = "foo => bar" + [[[T00]]] + graph = "foo[-PT13H] => foo" + [[[T03]]] + graph = "foo[-PT3H] => foo" + [[[T11]]] + graph = "foo[-PT8H] => foo" + +can be replaced by a single sequential declaration, + +.. code-block:: cylc + + [scheduling] + [[special tasks]] + sequential = foo + [[dependencies]] + [[[T00,T03,T11]]] + graph = "foo => bar" + + +Future Triggers +""""""""""""""" + +Cylc also supports inter-cycle triggering off tasks "in the future" (with +respect to cycle point - which has no bearing on wall-clock job submission time +unless the task has a clock trigger): + +.. code-block:: cylc + + [[dependencies]] + [[[T00,T06,T12,T18]]] + graph = """ + # A runs in this cycle: + A + # B in this cycle triggers off A in the next cycle. + A[PT6H] => B + """ + +Future triggers present a problem at suite shutdown rather than at start-up. +Here, ``B`` at the final cycle point wants to trigger off an instance +of ``A`` that will never exist because it is beyond the suite stop +point. Consequently Cylc prevents tasks from spawning successors that depend on +other tasks beyond the final point. + + +.. _ClockTriggerTasks: + +Clock Triggers +"""""""""""""" + +.. note:: + + Please read External Triggers (:ref:`External Triggers`) before + using the older clock triggers described in this section. + +By default, date-time cycle points are not connected to the real time "wall +clock". They are just labels that are passed to task jobs (e.g. to +initialize an atmospheric model run with a particular date-time value). In real +time cycling systems, however, some tasks - typically those near the top of the +graph in each cycle - need to trigger at or near the time when their cycle point +is equal to the real clock date-time. + +So *clock triggers* allow tasks to trigger at (or after, depending on other +triggers) a wall clock time expressed as an offset from cycle point: + +.. code-block:: cylc + + [scheduling] + [[special tasks]] + clock-trigger = foo(PT2H) + [[dependencies]] + [[[T00]]] + graph = foo + +Here, ``foo[2015-08-23T00]`` would trigger (other dependencies allowing) +when the wall clock time reaches ``2015-08-23T02``. Clock-trigger +offsets are normally positive, to trigger some time *after* the wall-clock +time is equal to task cycle point. + +Clock-triggers have no effect on scheduling if a suite is running sufficiently +far behind the clock (e.g. after a delay, or because it is processing archived +historical data) that the trigger times, which are relative to task cycle +point, have already passed. + + +.. _ClockExpireTasks: + +Clock-Expire Triggers +""""""""""""""""""""" + +Tasks can be configured to *expire* - i.e. to skip job submission and +enter the *expired* state - if they are too far behind the wall clock when +they become ready to run, and other tasks can trigger off this. As a possible +use case, consider a cycling task that copies the latest of a set of files to +overwrite the previous set: if the task is delayed by more than one cycle there +may be no point in running it because the freshly copied files will just be +overwritten immediately by the next task instance as the suite catches back up +to real time operation. Clock-expire tasks are configured like clock-trigger +tasks, with a date-time offset relative to cycle point (:ref:`ClockExpireRef`). +The offset should be positive to make the task expire if the wall-clock time +has gone beyond the cycle point. Triggering off an expired task typically +requires suicide triggers to remove the workflow that runs if the task has not +expired. Here a task called ``copy`` expires, and its downstream +workflow is skipped, if it is more than one day behind the wall-clock (see also +``etc/examples/clock-expire``): + +.. code-block:: cylc + + [cylc] + cycle point format = %Y-%m-%dT%H + [scheduling] + initial cycle point = 2015-08-15T00 + [[special tasks]] + clock-expire = copy(-P1D) + [[dependencies]] + [[[P1D]]] + graph = """ + model[-P1D] => model => copy => proc + copy:expired => !proc""" + + +External Triggers +""""""""""""""""" + +This is a substantial topic, documented in :ref:`External Triggers`. + + +.. _ModelRestartDependencies: + +Model Restart Dependencies +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Warm-cycled forecast models generate *restart files*, e.g. model +background fields, to initialize the next forecast. This kind of +dependence requires an inter-cycle trigger: + +.. code-block:: cylc + + [scheduling] + [[dependencies]] + [[[T00,T06,T12,T18]]] + graph = "A[-PT6H] => A" + +If your model is configured to write out additional restart files +to allow one or more cycle points to be skipped in an emergency *do not +represent these potential dependencies in the suite graph* as they +should not be used under normal circumstances. For example, the +following graph would result in task ``A`` erroneously +triggering off ``A[T-24]`` as a matter of course, instead of +off ``A[T-6]``, because ``A[T-24]`` will always +be finished first: + +.. code-block:: cylc + + [scheduling] + [[dependencies]] + [[[T00,T06,T12,T18]]] + # DO NOT DO THIS (SEE ACCOMPANYING TEXT): + graph = "A[-PT24H] | A[-PT18H] | A[-PT12H] | A[-PT6H] => A" + + +How The Graph Determines Task Instantiation +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +A graph trigger pair like ``foo => bar`` determines the existence and +prerequisites (dependencies) of the downstream task ``bar``, for +the cycle points defined by the associated graph section heading. In general it +does not say anything about the dependencies or existence of the upstream task +``foo``. However *if the trigger has no cycle point offset* Cylc +will infer that ``bar`` must exist at the same cycle points as +``foo``. This is a convenience to allow this: + +.. code-block:: cylc + + graph = "foo => bar" + +to be written as shorthand for this: + +.. code-block:: cylc + + graph = """foo + foo => bar""" + +(where ``foo`` by itself means `` => foo``, i.e. the +task exists at these cycle points but has no prerequisites - although other +prerequisites may be defined for it in other parts of the graph). + +*Cylc does not infer the existence of the upstream task in offset +triggers* like ``foo[-P1D] => bar`` because, as explained in +:ref:`cylc-6-migration-implicit-cycling`, a typo in the offset interval +should generate an error rather than silently creating tasks on an erroneous +cycling sequence. + +As a result you need to be careful not to define inter-cycle dependencies that +cannot be satisfied at run time. Suite validation catches this kind of error if +the existence of the cycle offset task is not defined anywhere at all: + +.. code-block:: cylc + + [scheduling] + initial cycle point = 2020 + [[dependencies]] + [[[P1Y]]] + # ERROR + graph = "foo[-P1Y] => bar" + +.. code-block:: bash + + $ cylc validate SUITE + 'ERROR: No cycling sequences defined for foo' + +To fix this, use another line in the graph to tell Cylc to define +``foo`` at each cycle point: + +.. code-block:: cylc + + [scheduling] + initial cycle point = 2020 + [[dependencies]] + [[[P1Y]]] + graph = """ + foo + foo[-P1Y] => bar""" + +But validation does not catch this kind of error if the offset task +is defined only on a different cycling sequence: + +.. code-block:: cylc + + [scheduling] + initial cycle point = 2020 + [[dependencies]] + [[[P2Y]]] + graph = """foo + # ERROR + foo[-P1Y] => bar""" + +This suite will validate OK, but it will stall at runtime with ``bar`` +waiting on ``foo[-P1Y]`` at the intermediate years where it does not +exist. The offset ``[-P1Y]`` is presumably an error (it should be +``[-P2Y]``), or else another graph line is needed to generate +``foo`` instances on the yearly sequence: + +.. code-block:: cylc + + [scheduling] + initial cycle point = 2020 + [[dependencies]] + [[[P1Y]]] + graph = "foo" + [[[P2Y]]] + graph = "foo[-P1Y] => bar" + +Similarly the following suite will validate OK, but it will stall at +runtime with ``bar`` waiting on ``foo[-P1Y]`` in +every cycle point, when only a single instance of it exists, at the initial +cycle point: + +.. code-block:: cylc + + [scheduling] + initial cycle point = 2020 + [[dependencies]] + [[[R1]]] + graph = foo + [[[P1Y]]] + # ERROR + graph = foo[-P1Y] => bar + +.. note:: + + ``cylc graph`` will display un-satisfiable inter-cycle + dependencies as "ghost nodes". :numref:`ghost-node-screenshot` + is a screenshot of cylc graph displaying the above example with the + un-satisfiable task (foo) displayed as a "ghost node". + +.. _ghost-node-screenshot: + +.. figure:: graphics/png/orig/ghost-node-example.png + :align: center + + Screenshot of ``cylc graph`` showing one task as a "ghost node". + + +.. _NIORP: + +Runtime - Task Configuration +---------------------------- + +The ``[runtime]`` section of a suite configuration configures what +to execute (and where and how to execute it) when each task is ready to +run, in a *multiple inheritance hierarchy* of *namespaces* culminating in +individual tasks. This allows all common configuration detail to be +factored out and defined in one place. + +Any namespace can configure any or all of the items defined in +:ref:`SuiteRCReference`. + +Namespaces that do not explicitly inherit from others automatically +inherit from the *root* namespace (below). + +Nested namespaces define *task families* that can be used in the +graph as convenient shorthand for triggering all member tasks at once, +or for triggering other tasks off all members at once - +see :ref:`FamilyTriggers`. Nested namespaces can be +progressively expanded and collapsed in the dependency graph viewer, and +in the gcylc graph and text views. Only the first parent of each +namespace (as for single-inheritance) is used for suite visualization +purposes. + + +Namespace Names +^^^^^^^^^^^^^^^ + +Namespace names may contain letters, digits, underscores, and hyphens. + +.. note:: + + *Task names need not be hardwired into task implementations* + because task and suite identity can be extracted portably from the task + execution environment supplied by the suite server program + (:ref:`TaskExecutionEnvironment`) - then to rename a task you can + just change its name in the suite configuration. + + +Root - Runtime Defaults +^^^^^^^^^^^^^^^^^^^^^^^ + +The root namespace, at the base of the inheritance hierarchy, +provides default configuration for all tasks in the suite. +Most root items are unset by default, but some have default values +sufficient to allow test suites to be defined by dependency graph alone. +The *script* item, for example, defaults to code that +prints a message then sleeps for between 1 and 15 seconds and +exits. Default values are documented with each item in +:ref:`SuiteRCReference`. You can override the defaults or +provide your own defaults by explicitly configuring the root namespace. + + +.. _MultiTaskDef: + +Defining Multiple Namespaces At Once +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +If a namespace section heading is a comma-separated list of names +then the subsequent configuration applies to each list member. +Particular tasks can be singled out at run time using the +``$CYLC_TASK_NAME`` variable. + +As an example, consider a suite containing an ensemble of closely +related tasks that each invokes the same script but with a unique +argument that identifies the calling task name: + +.. code-block:: cylc + + [runtime] + [[ENSEMBLE]] + script = "run-model.sh $CYLC_TASK_NAME" + [[m1, m2, m3]] + inherit = ENSEMBLE + +For large ensembles template processing can be used to +automatically generate the member names and associated dependencies +(see :ref:`Jinja` and :ref:`EmPylabel`). + + +Runtime Inheritance - Single +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The following listing of the *inherit.single.one* example suite +illustrates basic runtime inheritance with single parents. + +.. literalinclude:: ../../etc/examples/inherit/single/one/suite.rc + :language: cylc + + +Runtime Inheritance - Multiple +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +If a namespace inherits from multiple parents the linear order of +precedence (which namespace overrides which) is determined by the +so-called *C3 algorithm* used to find the linear *method +resolution order* for class hierarchies in Python and several other +object oriented programming languages. The result of this should be +fairly obvious for typical use of multiple inheritance in cylc suites, +but for detailed documentation of how the algorithm works refer to the +`official Python documentation +`_. + +The *inherit.multi.one* example suite, listed here, makes use of +multiple inheritance: + +.. literalinclude:: ../../etc/examples/inherit/multi/one/suite.rc + :language: cylc + +``cylc get-suite-config`` provides an easy way to check the result of +inheritance in a suite. You can extract specific items, e.g.: + +.. code-block:: bash + + $ cylc get-suite-config --item '[runtime][var_p2]script' \ + inherit.multi.one + echo ``RUN: run-var.sh'' + +or use the ``--sparse`` option to print entire namespaces +without obscuring the result with the dense runtime structure obtained +from the root namespace: + +.. code-block:: bash + + $ cylc get-suite-config --sparse --item '[runtime]ops_s1' inherit.multi.one + script = echo ``RUN: run-ops.sh'' + inherit = ['OPS', 'SERIAL'] + [directives] + job_type = serial + + +Suite Visualization And Multiple Inheritance +"""""""""""""""""""""""""""""""""""""""""""" + +The first parent inherited by a namespace is also used as the +collapsible family group when visualizing the suite. If this is not what +you want, you can demote the first parent for visualization purposes, +without affecting the order of inheritance of runtime properties: + +.. code-block:: cylc + + [runtime] + [[BAR]] + # ... + [[foo]] + # inherit properties from BAR, but stay under root for visualization: + inherit = None, BAR + + +How Runtime Inheritance Works +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The linear precedence order of ancestors is computed for each namespace +using the C3 algorithm. Then any runtime items that are explicitly +configured in the suite configuration are "inherited" up the linearized +hierarchy for each task, starting at the root namespace: if a particular +item is defined at multiple levels in the hierarchy, the level nearest +the final task namespace takes precedence. Finally, root namespace +defaults are applied for every item that has not been configured in the +inheritance process (this is more efficient than carrying the full dense +namespace structure through from root from the beginning). + + +.. _TaskExecutionEnvironment: + +Task Execution Environment +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The task execution environment contains suite and task identity variables +provided by the suite server program, and user-defined environment variables. +The environment is explicitly exported (by the task job script) prior to +executing the task ``script`` (see :ref:`TaskJobSubmission`). + +Suite and task identity are exported first, so that user-defined +variables can refer to them. Order of definition is preserved throughout +so that variable assignment expressions can safely refer to previously +defined variables. + +Additionally, access to cylc itself is configured prior to the user-defined +environment, so that variable assignment expressions can make use of +cylc utility commands: + +.. code-block:: cylc + + [runtime] + [[foo]] + [[[environment]]] + REFERENCE_TIME = $( cylc util cycletime --offset-hours=6 ) + + +User Environment Variables +"""""""""""""""""""""""""" + +A task's user-defined environment results from its inherited +``[[[environment]]]`` sections: + +.. code-block:: cylc + + [runtime] + [[root]] + [[[environment]]] + COLOR = red + SHAPE = circle + [[foo]] + [[[environment]]] + COLOR = blue # root override + TEXTURE = rough # new variable + +This results in a task *foo* with ``SHAPE=circle``, ``COLOR=blue``, +and ``TEXTURE=rough`` in its environment. + + +Overriding Environment Variables +"""""""""""""""""""""""""""""""" + +When you override inherited namespace items the original parent +item definition is *replaced* by the new definition. This applies to +all items including those in the environment sub-sections which, +strictly speaking, are not "environment variables" until they are +written, post inheritance processing, to the task job script that +executes the associated task. Consequently, if you override an +environment variable you cannot also access the original parent value: + +.. code-block:: cylc + + [runtime] + [[FOO]] + [[[environment]]] + COLOR = red + [[bar]] + inherit = FOO + [[[environment]]] + tmp = $COLOR # !! ERROR: $COLOR is undefined here + COLOR = dark-$tmp # !! as this overrides COLOR in FOO. + +The compressed variant of this, ``COLOR = dark-$COLOR``, is +also in error for the same reason. To achieve the desired result you +must use a different name for the parent variable: + +.. code-block:: cylc + + [runtime] + [[FOO]] + [[[environment]]] + FOO_COLOR = red + [[bar]] + inherit = FOO + [[[environment]]] + COLOR = dark-$FOO_COLOR # OK + + +.. _Task Job Script Variables: + +Task Job Script Variables +""""""""""""""""""""""""" + +These are variables that can be referenced (but should not be modified) in a +task job script. + +The task job script may export the following environment variables: + +.. code-block:: bash + + CYLC_DEBUG # Debug mode, true or not defined + CYLC_DIR # Location of cylc installation used + CYLC_VERSION # Version of cylc installation used + + CYLC_CYCLING_MODE # Cycling mode, e.g. gregorian + CYLC_SUITE_FINAL_CYCLE_POINT # Final cycle point + CYLC_SUITE_INITIAL_CYCLE_POINT # Initial cycle point + CYLC_SUITE_NAME # Suite name + CYLC_UTC # UTC mode, True or False + CYLC_VERBOSE # Verbose mode, True or False + TZ # Set to "UTC" in UTC mode or not defined + + CYLC_SUITE_RUN_DIR # Location of the suite run directory in + # job host, e.g. ~/cylc-run/foo + CYLC_SUITE_DEF_PATH # Location of the suite configuration directory in + # job host, e.g. ~/cylc-run/foo + CYLC_SUITE_HOST # Host running the suite process + CYLC_SUITE_OWNER # User ID running the suite process + CYLC_SUITE_DEF_PATH_ON_SUITE_HOST + # Location of the suite configuration directory in + # suite host, e.g. ~/cylc-run/foo + CYLC_SUITE_SHARE_DIR # Suite (or task!) shared directory (see below) + CYLC_SUITE_UUID # Suite UUID string + CYLC_SUITE_WORK_DIR # Suite work directory (see below) + + CYLC_TASK_JOB # Task job identifier expressed as + # CYCLE-POINT/TASK-NAME/SUBMIT-NUM + # e.g. 20110511T1800Z/t1/01 + CYLC_TASK_CYCLE_POINT # Cycle point, e.g. 20110511T1800Z + CYLC_TASK_NAME # Job's task name, e.g. t1 + CYLC_TASK_SUBMIT_NUMBER # Job's submit number, e.g. 1, + # increments with every submit + CYLC_TASK_TRY_NUMBER # Number of execution tries, e.g. 1 + # increments with automatic retry-on-fail + CYLC_TASK_ID # Task instance identifier expressed as + # TASK-NAME.CYCLE-POINT + # e.g. t1.20110511T1800Z + CYLC_TASK_LOG_DIR # Location of the job log directory + # e.g. ~/cylc-run/foo/log/job/20110511T1800Z/t1/01/ + CYLC_TASK_LOG_ROOT # The task job file path + # e.g. ~/cylc-run/foo/log/job/20110511T1800Z/t1/01/job + CYLC_TASK_WORK_DIR # Location of task work directory (see below) + # e.g. ~/cylc-run/foo/work/20110511T1800Z/t1 + CYLC_TASK_NAMESPACE_HIERARCHY # Linearised family namespace of the task, + # e.g. root postproc t1 + CYLC_TASK_DEPENDENCIES # List of met dependencies that triggered the task + # e.g. foo.1 bar.1 + + CYLC_TASK_COMMS_METHOD # Set to "ssh" if communication method is "ssh" + CYLC_TASK_SSH_LOGIN_SHELL # With "ssh" communication, if set to "True", + # use login shell on suite host + +There are also some global shell variables that may be defined in the task job +script (but not exported to the environment). These include: + +.. code-block:: bash + + CYLC_FAIL_SIGNALS # List of signals trapped by the error trap + CYLC_VACATION_SIGNALS # List of signals trapped by the vacation trap + CYLC_SUITE_WORK_DIR_ROOT # Root directory above the suite work directory + # in the job host + CYLC_TASK_MESSAGE_STARTED_PID # PID of "cylc message" job started" command + CYLC_TASK_WORK_DIR_BASE # Alternate task work directory, + # relative to the suite work directory + + +Suite Share Directories +""""""""""""""""""""""" + +A *suite share directory* is created automatically under the suite run +directory as a share space for tasks. The location is available to tasks as +``$CYLC_SUITE_SHARE_DIR``. In a cycling suite, output files are +typically held in cycle point sub-directories of the suite share directory. + +The top level share and work directory (below) location can be changed +(e.g. to a large data area) by a global config setting +(see :ref:`workdirectory`). + + +Task Work Directories +""""""""""""""""""""" + +Task job scripts are executed from within *work directories* created +automatically under the suite run directory. A task can get its own work +directory from ``$CYLC_TASK_WORK_DIR`` (or simply ``$PWD`` if +it does not ``cd`` elsewhere at runtime). By default the location +contains task name and cycle point, to provide a unique workspace for every +instance of every task. This can be overridden in the suite configuration, +however, to get several tasks to share the same work directory +(see :ref:`worksubdirectory`). + +The top level work and share directory (above) location can be changed +(e.g. to a large data area) by a global config setting +(see :ref:`workdirectory`). + + +Environment Variable Evaluation +""""""""""""""""""""""""""""""" + +Variables in the task execution environment are not evaluated in the +shell in which the suite is running prior to submitting the task. They +are written in unevaluated form to the job script that is submitted by +cylc to run the task (:ref:`JobScripts`) and are therefore +evaluated when the task begins executing under the task owner account +on the task host. Thus ``$HOME``, for instance, evaluates at +run time to the home directory of task owner on the task host. + + +How Tasks Get Access To The Suite Directory +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Tasks can use ``$CYLC_SUITE_DEF_PATH`` to access suite files on +the task host, and the suite bin directory is automatically added +``$PATH``. If a remote suite configuration directory is not +specified the local (suite host) path will be assumed with the local +home directory, if present, swapped for literal ``$HOME`` for +evaluation on the task host. + + +.. _RunningTasksOnARemoteHost: + +Remote Task Hosting +^^^^^^^^^^^^^^^^^^^ + +If a task declares an owner other than the suite owner and/or +a host other than the suite host, cylc will use non-interactive ssh to +execute the task on the ``owner@host`` account by the configured +batch system: + +.. code-block:: cylc + + [runtime] + [[foo]] + [[[remote]]] + host = orca.niwa.co.nz + owner = bob + [[[job]]] + batch system = pbs + +For this to work: + +- non-interactive ssh is required from the suite host to the remote + task accounts. +- cylc must be installed on task hosts. + + - Optional software dependencies such as graphviz and + Jinja2 are not needed on task hosts. + - If polling task communication is used, there is no other + requirement. + - If SSH task communication is configured, non-interactive ssh is + required from the task host to the suite host. + - If (default) task communication is configured, the task host + should have access to the port on the suite host. + +- the suite configuration directory, or some fraction of its + content, can be installed on the task host, if needed. + +To learn how to give remote tasks access to cylc, +see :ref:`HowTasksGetAccessToCylc`. + +Tasks running on the suite host under another user account are treated as +remote tasks. + +Remote hosting, like all namespace settings, can be declared globally in +the root namespace, or per family, or for individual tasks. + + +Dynamic Host Selection +"""""""""""""""""""""" + +Instead of hardwiring host names into the suite configuration you can +specify a shell command that prints a hostname, or an environment +variable that holds a hostname, as the value of the host config item. +See :ref:`DynamicHostSelection`. + + +Remote Task Log Directories +""""""""""""""""""""""""""" + +Task stdout and stderr streams are written to log files in a +suite-specific sub-directory of the *suite run directory*, as +explained in :ref:`WhitherStdoutAndStderr`. For remote tasks +the same directory is used, but *on the task host*. +Remote task log directories, like local ones, are created on the fly, if +necessary, during job submission. + + +.. _viso: + +Visualization +------------- + +The visualization section of a suite configuration is used to configure +suite graphing, principally graph node (task) and edge (dependency +arrow) style attributes. Tasks can be grouped for the purpose of +applying common style attributes. See :ref:`SuiteRCReference` for details. + + +Collapsible Families In Suite Graphs +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. code-block:: cylc + + [visualization] + collapsed families = family1, family2 + +Nested families from the runtime inheritance hierarchy can be expanded +and collapsed in suite graphs and the gcylc graph view. All families +are displayed in the collapsed state at first, unless +``[visualization]collapsed families`` is used to single out +specific families for initial collapsing. + +In the gcylc graph view, nodes outside of the main graph (such as the +members of collapsed families) are plotted as rectangular nodes to +the right if they are doing anything interesting (submitted, running, +failed). + +:numref:`fig-namespaces` illustrates successive expansion of nested +task families in the *namespaces* example suite. + +.. todo:: + + Create sub-figures if possible: for now hacked as separate figures with + link and caption on final displayed figure. + +.. figure:: graphics/png/orig/inherit-2.png + :align: center + +.. figure:: graphics/png/orig/inherit-3.png + :align: center + +.. figure:: graphics/png/orig/inherit-4.png + :align: center + +.. figure:: graphics/png/orig/inherit-5.png + :align: center + +.. figure:: graphics/png/orig/inherit-6.png + :align: center + +.. _fig-namespaces: + +.. figure:: graphics/png/orig/inherit-7.png + :align: center + + Graphs of the *namespaces* example suite showing various states of + expansion of the nested namespace family hierarchy, from all families + collapsed (top left) through to all expanded (bottom right). This + can also be done by right-clicking on tasks in the gcylc graph view. + + +.. _Parameterized Tasks Label: + +Parameterized Tasks +^^^^^^^^^^^^^^^^^^^ + +Cylc can automatically generate tasks and dependencies by expanding +parameterized task names over lists of parameter values. Uses for this +include: + +- generating an ensemble of similar model runs +- generating chains of tasks to process similar datasets +- replicating an entire workflow, or part thereof, over several runs +- splitting a long model run into smaller steps or ``chunks`` + (parameterized cycling) + +.. note:: + + This can be done with Jinja2 loops too (:ref:`Jinja`) + but parameterization is much cleaner (nested loops can seriously reduce + the clarity of a suite configuration).* + + +Parameter Expansion +^^^^^^^^^^^^^^^^^^^ + +Parameter values can be lists of strings, or lists of integers and +integer ranges (with inclusive bounds). Numeric values in a list of strings are +considered strings. It is not possible to mix strings with integer ranges. + +For example: + +.. code-block:: cylc + + [cylc] + [[parameters]] + # parameters: "ship", "buoy", "plane" + # default task suffixes: _ship, _buoy, _plane + obs = ship, buoy, plane + + # parameters: 1, 2, 3, 4, 5 + # default task suffixes: _run1, _run2, _run3, _run4, _run5 + run = 1..5 + + # parameters: 1, 3, 5, 7, 9 + # default task suffixes: _idx1, _idx3, _idx5, _idx7, _idx9 + idx = 1..9..2 + + # parameters: -11, -1, 9 + # default task suffixes: _idx-11, _idx-01, _idx+09 + idx = -11..9..10 + + # parameters: 1, 3, 5, 10, 11, 12, 13 + # default task suffixes: _i01, _i03, _i05, _i10, _i11, _i12, _i13 + i = 1..5..2, 10, 11..13 + + # parameters: "0", "1", "e", "pi", "i" + # default task suffixes: _0, _1, _e, _pi, _i + item = 0, 1, e, pi, i + + # ERROR: mix strings with int range + p = one, two, 3..5 + +Then angle brackets denote use of these parameters throughout the suite +configuration. For the values above, this parameterized name: + +.. code-block:: none + + model # for run = 1..2 + +expands to these concrete task names: + +.. code-block:: none + + model_run1, model_run2 + +and this parameterized name: + +.. code-block:: none + + proc # for obs = ship, buoy, plane + +expands to these concrete task names: + +.. code-block:: none + + proc_ship, proc_buoy, proc_plane + +By default, to avoid any ambiguity, the parameter name appears in the expanded +task names for integer values, but not for string values. For example, +``model_run1`` for ``run = 1``, but ``proc_ship`` for +``obs = ship``. However, the default expansion templates can be +overridden if need be: + +.. code-block:: cylc + + [cylc] + [[parameters]] + obs = ship, buoy, plane + run = 1..5 + [[parameter templates]] + run = -R%(run)s # Make foo expand to foo-R1 etc. + +(See :ref:`RefParameterTemplates` for more on the string template syntax.) + +Any number of parameters can be used at once. This parameterization: + +.. code-block:: none + + model # for run = 1..2 and obs = ship, buoy, plane + +expands to these tasks names: + +.. code-block:: none + + model_run1_ship, model_run1_buoy, model_run1_plane, + model_run2_ship, model_run2_buoy, model_run2_plane + +Here's a simple but complete example suite: + +.. code-block:: cylc + + [cylc] + [[parameters]] + run = 1..2 + [scheduling] + [[dependencies]] + graph = "prep => model" + [runtime] + [[model]] + # ... + +The result, post parameter expansion, is this: + +.. code-block:: cylc + + [scheduling] + [[dependencies]] + graph = "prep => model_run1 & model_run2" + [runtime] + [[model_run1]] + # ... + [[model_run2]] + # ... + +Here's a more complex graph using two parameters (``[runtime]`` omitted): + +.. code-block:: cylc + + [cylc] + [[parameters]] + run = 1..2 + mem = cat, dog + [scheduling] + [[dependencies]] + graph = """prep => init => model => + post => wrap => done""" + +.. todo:: + + \.\.\. which expands to: + + [scheduling] + [[dependencies]] + graph = """ + prep => init_run1 => model_run1_cat => post_run1_cat => wrap_run1 => done + init_run1 => model_run1_dog => post_run2_dog => wrap_run1 + prep => init_run2 => model_run2_cat => post_run2_cat => wrap_run2 => done + init_run2 => model_run2_dog => post_run2_dog => wrap_run2""" + +:numref:`fig-params-1` shows the result as visualized by +``cylc graph``. + +.. _fig-params-1: + +.. figure:: graphics/png/orig/params1.png + :align: center + + Parameter expansion example. + + +Zero-Padded Integer Values +"""""""""""""""""""""""""" + +Integer parameter values are given a default template for generating task +suffixes that are zero-padded according to the longest size of their values. +For example, the default template for ``p = 9..10`` would be +``_p%(p)02d``, so that ``foo

`` would become ``foo_p09, foo_p10``. +If negative values are present in the parameter list, the +default template will include the sign. +For example, the default template for ``p = -1..1`` would be +``_p%(p)+02d``, so that ``foo

`` would become +``foo_p-1, foo_p+0, foo_p+1``. + +To get thicker padding and/or alternate suffixes, use a template. E.g.: + +.. code-block:: cylc + + [cylc] + [[parameters]] + i = 1..9 + p = 3..14 + [[parameter templates]] + i = _i%(i)02d # suffixes = _i01, _i02, ..., _i09 + # A double-percent gives a literal percent character + p = %%p%(p)03d # suffixes = %p003, %p004, ..., %p013, %p014 + + +Parameters as Full Task Names +""""""""""""""""""""""""""""" + +Parameter values can be used as full task names, but the default template +should be overridden to remove the initial underscore. For example: + +.. code-block:: cylc + + [cylc] + [[parameters]] + i = 1..4 + obs = ship, buoy, plane + [[parameter templates]] + i = i%(i)d # task name must begin with an alphabet + obs = %(obs)s + [scheduling] + [[dependencies]] + graph = """ + foo => # foo => i1 & i2 & i3 & i4 + => bar # ship & buoy & plane => bar + """ + + +Passing Parameter Values To Tasks +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Parameter values are passed as environment variables to tasks generated by +parameter expansion. For example, if we have: + +.. code-block:: cylc + + [cylc] + [[parameters]] + obs = ship, buoy, plane + run = 1..5 + [scheduling] + [[dependencies]] + graph = model + +Then task ``model_run2_ship`` would get the following standard +environment variables: + +.. code-block:: bash + + # In a job script of an instance of the "model_run2_ship" task: + export CYLC_TASK_PARAM_run="2" + export CYLC_TASK_PARAM_obs="ship" + +These variables allow tasks to determine which member of a parameterized +group they are, and so to vary their behaviour accordingly. + +You can also define custom variables and string templates for parameter value +substitution. For example, if we add this to the above configuration: + +.. code-block:: cylc + + [runtime] + [[model]] + [[[parameter environment templates]]] + MYNAME = %(obs)sy-mc%(obs)sface + MYFILE = /path/to/run%(run)03d/%(obs)s + +Then task ``model_run2_ship`` would get the following custom +environment variables: + +.. code-block:: bash + + # In a job script of an instance of the "model_run2_ship" task: + export MYNAME=shipy-mcshipface + export MYFILE=/path/to/run002/ship + + +Selecting Specific Parameter Values +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Specific parameter values can be singled out in the graph and under +``[runtime]`` with the notation ```` (for example). +Here's how to make a special task trigger off just the first of a +set of model runs: + +.. code-block:: cylc + + [cylc] + [[parameters]] + run = 1..5 + [scheduling] + [[dependencies]] + graph = """ model => post_proc # general case + model => check_first_run """ # special case + [runtime] + [[model]] + # config for all "model" runs... + [[model]] + # special config (if any) for the first model run... + #... + + +Selecting Partial Parameter Ranges +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The parameter notation does not currently support partial range selection such +as ``foo``, but you can achieve the same result by defining a +second parameter that covers the partial range and giving it the same expansion +template as the full-range parameter. For example: + +.. code-block:: cylc + + [cylc] + [[parameters]] + run = 1..10 # 1, 2, ..., 10 + runx = 1..3 # 1, 2, 3 + [[parameter templates]] + run = _R%(run)02d # _R01, _R02, ..., _R10 + runx = _R%(runx)02d # _R01, _R02, _R03 + [scheduling] + [[dependencies]] + graph = """model => post + model => checkx""" + [runtime] + [[model]] + # ... + #... + + +Parameter Offsets In The Graph +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +A negative offset notation ```` is interpreted as the previous +value in the ordered list of parameter values, while a positive offset is +interpreted as the next value. For example, to split a model run into multiple +steps with each step depending on the previous one, either of these graphs: + +.. code-block:: cylc + + graph = "model => model" # for run = 1, 2, 3 + graph = "model => model" # for run = 1, 2, 3 + +expands to: + +.. code-block:: cylc + + graph = """model_run1 => model_run2 + model_run2 => model_run3""" + + # or equivalently: + + graph = "model_run1 => model_run2 => model_run3" + +And this graph: + +.. code-block:: cylc + + graph = "proc => proc" # for size = small, big, huge + +expands to: + +.. code-block:: cylc + + graph = """proc_small => proc_big + proc_big => proc_huge""" + + # or equivalently: + + graph = "proc_small => proc_big => proc_huge" + +However, a quirk in the current system means that you should avoid mixing +conditional logic in these statements. For example, the following will do the +unexpected: + +.. code-block:: cylc + + graph = foo & baz => foo # for m = cat, dog + +currently expands to: + +.. code-block:: cylc + + graph = foo_cat & baz => foo_dog + + # when users may expect it to be: + # graph = foo_cat => foo_dog + # graph = baz => foo_cat & foo_dog + +For the time being, writing out the logic explicitly will give you the correct +graph. + +.. code-block:: cylc + + graph = """foo => foo # for m = cat, dog + baz => foo""" + + +Task Families And Parameterization +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Task family members can be generated by parameter expansion: + +.. code-block:: cylc + + [runtime] + [[FAM]] + [[member]] + inherit = FAM + # Result: family FAM contains member_r1, member_r2, etc. + + +Family names can be parameterized too, just like task names: + +.. code-block:: cylc + + [runtime] + [[RUN]] + [[model]] + inherit = RUN + [[post_proc]] + inherit = RUN + # Result: family RUN_r1 contains model_r1 and post_proc_r1, + # family RUN_r2 contains model_r2 and post_proc_r1, etc. + +As described in :ref:`FamilyTriggers` family names can be used to +trigger all members at once: + +.. code-block:: cylc + + graph = "foo => FAMILY" + +or to trigger off all members: + +.. code-block:: cylc + + graph = "FAMILY:succeed-all => bar" + +or to trigger off any members: + +.. code-block:: cylc + + graph = "FAMILY:succeed-any => bar" + +If the members of ``FAMILY`` were generated with parameters, you can +also trigger them all at once with parameter notation: + +.. code-block:: cylc + + graph = "foo => member" + +Similarly, to trigger off all members: + +.. code-block:: cylc + + graph = "member => bar" + # (member:fail etc., for other trigger types) + +Family names are still needed in the graph, however, to succinctly express +"succeed-any" triggering semantics, and all-to-all or any-to-all triggering: + +.. code-block:: cylc + + graph = "FAM1:succeed-any => FAM2" + +(Direct all-to-all and any-to-all family triggering is not recommended for +efficiency reasons though - see :ref:`EfficientInterFamilyTriggering`). + +For family *member-to-member* triggering use parameterized members. +For example, if family ``OBS_GET`` has members ``get`` and +family ``OBS_PROC`` has members ``proc`` then this graph: + +.. code-block:: cylc + + graph = "get => proc" # for obs = ship, buoy, plane + +expands to: + +.. code-block:: none + + get_ship => proc_ship + get_buoy => proc_buoy + get_plane => proc_plane + + +.. _Parameterized Cycling: + +Parameterized Cycling +^^^^^^^^^^^^^^^^^^^^^ + +Two ways of constructing cycling systems are described and contrasted in +:ref:`Workflows For Cycling Systems`. For most purposes use of +a proper *cycling workflow* is recommended, wherein cylc incrementally +generates the date-time sequence and extends the workflow, potentially +indefinitely, at run time. For smaller systems of finite duration, however, +parameter expansion can be used to generate a sequence of pre-defined tasks +as a proxy for cycling. + +Here's a cycling workflow of two-monthly model runs for one year, +with previous-instance model dependence (e.g. for model restart files): + +.. code-block:: cylc + + [scheduling] + initial cycle point = 2020-01 + final cycle point = 2020-12 + [[dependencies]] + [[[R1]]] # Run once, at the initial point. + graph = "prep => model" + [[[P2M]]] # Run at 2-month intervals between the initial and final points. + graph = "model[-P2M] => model => post_proc & archive" + [runtime] + [[model]] + script = "run-model $CYLC_TASK_CYCLE_POINT" + +And here's how to do the same thing with parameterized tasks: + +.. code-block:: cylc + + [cylc] + [[parameters]] + chunk = 1..6 + [scheduling] + [[dependencies]] + graph = """prep => model + model => model => + post_proc & archive""" + [runtime] + [[model]] + script = """ + # Compute start date from chunk index and interval, then run the model. + INITIAL_POINT=2020-01 + INTERVAL_MONTHS=2 + OFFSET_MONTHS=(( (CYLC_TASK_PARAM_chunk - 1)*INTERVAL_MONTHS )) + OFFSET=P${OFFSET_MONTHS}M # e.g. P4M for chunk=3 + run-model $(cylc cyclepoint --offset=$OFFSET $INITIAL_POINT)""" + +The two workflows are shown together in :numref:`fig-eg2`. +They both achieve the same result, and both can +include special tasks at the start, end, or +anywhere in between. But as noted earlier the parameterized version has +several disadvantages: it must be finite in extent and not too large; the +date-time arithmetic has to be done by the user; and the full extent of the +workflow will be visible at all times as the suite runs. + +.. todo:: + Create sub-figures if possible: for now hacked as separate figures with + link and caption on final displayed figure. + +.. figure:: graphics/png/orig/eg2-static.png + :align: center + +.. _fig-eg2: + +.. figure:: graphics/png/orig/eg2-dynamic.png + :align: center + + Parameterized (top) and cycling (bottom) versions of the same + workflow. The first three cycle points are shown in the + cycling case. The parameterized case does not have "cycle points". + +Here's a yearly-cycling suite with four parameterized chunks in each cycle +point: + +.. code-block:: cylc + + [cylc] + [[parameters]] + chunk = 1..4 + [scheduling] + initial cycle point = 2020-01 + [[dependencies]] + [[[P1Y]]] + graph = """model => model + model[-P1Y] => model""" + +.. note:: + + The inter-cycle trigger connects the first chunk in each cycle point + to the last chunk in the previous cycle point. Of course it would be simpler + to just use 3-monthly cycling: + + .. code-block:: cylc + + [scheduling] + initial cycle point = 2020-01 + [[dependencies]] + [[[P3M]]] + graph = "model[-P3M] => model" + +Here's a possible valid use-case for mixed cycling: consider a portable +date-time cycling workflow of model jobs that can each take too long to run on +some supported platforms. This could be handled without changing the cycling +structure of the suite by splitting the run (at each cycle point) into a +variable number of shorter steps, using more steps on less powerful hosts. + + +Cycle Point And Parameter Offsets At Start-Up +""""""""""""""""""""""""""""""""""""""""""""" + +In cycling workflows cylc ignores anything earlier than the suite initial +cycle point. So this graph: + +.. code-block:: cylc + + graph = "model[-P1D] => model" + +simplifies at the initial cycle point to this: + +.. code-block:: cylc + + graph = "model" + +Similarly, parameter offsets are ignored if they extend beyond the start +of the parameter value list. So this graph: + +.. code-block:: cylc + + graph = "model => model" + +simplifies for ``chunk=1`` to this: + +.. code-block:: cylc + + graph = "model_chunk1" + +.. note:: + + The initial cut-off applies to every parameter list, but only + to cycle point sequences that start at the suite initial cycle point. + Therefore it may be somewhat easier to use parameterized cycling if you + need multiple date-time sequences *with different start points* in the + same suite. We plan to allow this sequence-start simplification for any + date-time sequence in the future, not just at the suite initial point, + but it needs to be optional because delayed-start cycling tasks + sometimes need to trigger off earlier cycling tasks. + + +.. _Jinja: + +Jinja2 +------ + +.. note:: + + This section needs to be revised - the Parameterized Task feature + introduced in cylc-6.11.0 (see :ref:`Parameterized Tasks Label`) provides + a cleaner way to auto-generate tasks without coding messy Jinja2 loops. + +Cylc has built in support for the Jinja2 template processor in suite +configurations. Jinja2 variables, mathematical expressions, loop control +structures, conditional logic, etc., are automatically processed to +generate the final suite configuration seen by cylc. + +The need for Jinja2 processing must be declared with a hash-bang +comment as the first line of the suite.rc file: + +.. code-block:: cylc + + #!jinja2 + # ... + +Potential uses for this include automatic generation of repeated groups +of similar tasks and dependencies, and inclusion or exclusion of entire +suite sections according to the value of a single flag. Consider a +large complicated operational suite and several related parallel test +suites with slightly different task content and structure (the parallel +suites, for instance, might take certain large input files from the +operation or the archive rather than downloading them again) - these can +now be maintained as a single master suite configuration that reconfigures +itself according to the value of a flag variable indicating the intended use. + +Template processing is the first thing done on parsing a suite +configuration so Jinja2 expressions can appear anywhere in the file (inside +strings and namespace headings, for example). + +Jinja2 is `well documented `_, so here +we just provide an example suite that uses it. The meaning of the +embedded Jinja2 code should be reasonably self-evident to anyone familiar +with standard programming techniques. + +.. _fig-jinja2-ensemble: + +.. figure:: graphics/png/orig/jinja2-ensemble-graph.png + :align: center + + The Jinja2 ensemble example suite graph. + + +The ``jinja2.ensemble`` example, graphed in +:numref:`fig-jinja2-ensemble`, shows an ensemble of similar tasks +generated using Jinja2: + +.. code-block:: cylc + + #!jinja2 + {% set N_MEMBERS = 5 %} + [scheduling] + [[dependencies]] + graph = """{# generate ensemble dependencies #} + {% for I in range( 0, N_MEMBERS ) %} + foo => mem_{{ I }} => post_{{ I }} => bar + {% endfor %}""" + +Here is the generated suite configuration, after Jinja2 processing: + +.. code-block:: cylc + + #!jinja2 + [scheduling] + [[dependencies]] + graph = """ + foo => mem_0 => post_0 => bar + foo => mem_1 => post_1 => bar + foo => mem_2 => post_2 => bar + foo => mem_3 => post_3 => bar + foo => mem_4 => post_4 => bar + """ + +And finally, the ``jinja2.cities`` example uses variables, +includes or excludes special cleanup tasks according to the value of a +logical flag, and it automatically generates all dependencies and family +relationships for a group of tasks that is repeated for each city in the +suite. To add a new city and associated tasks and dependencies simply +add the city name to list at the top of the file. The suite is graphed, +with the New York City task family expanded, in +:numref:`fig-jinja2-cities`. + +.. literalinclude:: ../../etc/examples/jinja2/cities/suite.rc + :language: cylc + +.. _fig-jinja2-cities: + +.. figure:: graphics/png/orig/jinja2-suite-graph.png + :align: center + + The Jinja2 cities example suite graph, with the + New York City task family expanded. + + +Accessing Environment Variables With Jinja2 +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +This functionality is not provided by Jinja2 by default, but cylc +automatically imports the user environment to template's global namespace +(see :ref:`CustomJinja2Filters`) in a dictionary structure called +*environ*. A usage example: + +.. code-block:: cylc + + #!Jinja2 + #... + [runtime] + [[root]] + [[[environment]]] + SUITE_OWNER_HOME_DIR_ON_SUITE_HOST = {{environ['HOME']}} + +This example is emphasizes that *the environment is read on the suite +host at the time the suite configuration is parsed* - it is not, for +instance, read at task run time on the task host. + + +.. _CustomJinja2Filters: + +Custom Jinja2 Filters, Tests and Globals +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Jinja2 has three different namespaces used to separate "globals", +"filters" and "tests". Globals are template-wide accessible variables +and functions. Cylc extends this namespace with "environ" dictionary and +"raise" and "assert" functions for raising exceptions +(see :ref:`Jinja2RaisingExceptions`). + +Filters can be used to modify variable values and are applied using pipe +notation. For example, the built-in ``trim`` filter strips leading +and trailing white space from a string: + +.. code-block:: cylc + + {% set MyString = " dog " %} + {{ MyString | trim() }} # "dog" + +Additionally, variable values can be tested using "is" keyword followed by +the name of the test, e.g. ``VARIABLE is defined``. +See official Jinja2 documentation for available built-in globals, filters +and tests. + +Cylc also supports custom Jinja2 globals, filters and tests. A custom global, +filter or test is a single Python function in a source file with the same name +as the function (plus ".py" extension) and stored in one of the following +locations: + +- ``/lib/Jinja2[namespace]/`` +- ``[suite configuration directory]/Jinja2[namespace]/`` +- ``$HOME/.cylc/Jinja2[namespace]/`` + +where ``[namespace]/`` is one of ``Globals/``, ``Filters/`` or ``Tests/``. + +In the argument list of filter or test function, the first argument is +the variable value to be "filtered" or "tested", respectively, and +subsequent arguments can be whatever else is needed. Currently there are three +custom filters: + + +pad +""" + +The "pad" filter is for padding string values to some +constant length with a fill character - useful for generating task names +and related values in ensemble suites: + +.. code-block:: cylc + + {% for i in range(0,100) %} # 0, 1, ..., 99 + {% set j = i | pad(2,'0') %} + [[A_{{j}}]] # [[A_00]], [[A_01]], ..., [[A_99]] + {% endfor %} + + +strftime +"""""""" + +The "strftime" filter can be used to format ISO8601 date-time strings using +an strftime string. + +.. code-block:: cylc + + {% set START_CYCLE = '10661004T08+01' %} + {{ START_CYCLE | strftime('%H') }} # 00 + +Examples: + +- ``{{START_CYCLE | strftime('%Y')}}`` - 1066 +- ``{{START_CYCLE | strftime('%m')}}`` - 10 +- ``{{START_CYCLE | strftime('%d')}}`` - 14 +- ``{{START_CYCLE | strftime('%H:%M:%S %z')}}`` - 08:00:00 +01 + +It is also possible to parse non-standard date-time strings by passing a +strptime string as the second argument. + +Examples: + +- ``{{'12,30,2000' | strftime('%m', '%m,%d,%Y')}}`` - 12 +- ``{{'1066/10/14 08:00:00' | strftime('%Y%m%dT%H', '%Y/%m/%d %H:%M:%S')}}`` - 10661014T08 + + +duration\_as +"""""""""""" + +The "duration\_as" filter can be used to format ISO8601 duration +strings as a floating-point number of several different units. Units +for the conversion can be specified in a case-insensitive short or long +form: + +- Seconds - "s" or "seconds" +- Minutes - "m" or "minutes" +- Hours - "h" or "hours" +- Days - "d" or "days" +- Weeks - "w" or "weeks" + +Within the suite, this becomes: + +.. code-block:: cylc + + {% set CYCLE_INTERVAL = 'PT1D' %} + {{ CYCLE_INTERVAL | duration_as('h') }} # 24.0 + {% set CYCLE_SUBINTERVAL = 'PT30M' %} + {{ CYCLE_SUBINTERVAL | duration_as('hours') }} # 0.5 + {% set CYCLE_INTERVAL = 'PT1D' %} + {{ CYCLE_INTERVAL | duration_as('s') }} # 86400.0 + {% set CYCLE_SUBINTERVAL = 'PT30M' %} + {{ CYCLE_SUBINTERVAL | duration_as('seconds') }} # 1800.0 + +While the filtered value is a floating-point number, it is often required to +supply an integer to suite entities (e.g. environment variables) that require +it. This is accomplished by chaining filters: + +- ``{{CYCLE_INTERVAL | duration_as('h') | int}}`` - 24 +- ``{{CYCLE_SUBINTERVAL | duration_as('h') | int}}`` - 0 +- ``{{CYCLE_INTERVAL | duration_as('s') | int}}`` - 86400 +- ``{{CYCLE_SUBINTERVAL | duration_as('s') | int}}`` - 1800 + + +Associative Arrays In Jinja2 +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Associative arrays (*dicts* in Python) can be very useful. +Here's an example, from ``/etc/examples/jinja2/dict``: + +.. code-block:: cylc + + #!Jinja2 + {% set obs_types = ['airs', 'iasi'] %} + {% set resource = { 'airs':'ncpus=9', 'iasi':'ncpus=20' } %} + + [scheduling] + [[dependencies]] + graph = OBS + [runtime] + [[OBS]] + [[[job]]] + batch system = pbs + {% for i in obs_types %} + [[ {{i}} ]] + inherit = OBS + [[[directives]]] + -I = {{ resource[i] }} + {% endfor %} + +Here's the result: + +.. code-block:: bash + + $ cylc get-suite-config -i [runtime][airs]directives SUITE + -I = ncpus=9 + + +Jinja2 Default Values And Template Inputs +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The values of Jinja2 variables can be passed in from the cylc command +line rather than hardwired in the suite configuration. +Here's an example, from ``/etc/examples/jinja2/defaults``: + +.. code-block:: cylc + + #!Jinja2 + + [meta] + + title = "Jinja2 example: use of defaults and external input" + + description = """ + The template variable FIRST_TASK must be given on the cylc command line + using --set or --set-file=FILE; two other variables, LAST_TASK and + N_MEMBERS can be set similarly, but if not they have default values.""" + + {% set LAST_TASK = LAST_TASK | default( 'baz' ) %} + {% set N_MEMBERS = N_MEMBERS | default( 3 ) | int %} + + {# input of FIRST_TASK is required - no default #} + + [scheduling] + initial cycle point = 20100808T00 + final cycle point = 20100816T00 + [[dependencies]] + [[[0]]] + graph = """{{ FIRST_TASK }} => ENS + ENS:succeed-all => {{ LAST_TASK }}""" + [runtime] + [[ENS]] + {% for I in range( 0, N_MEMBERS ) %} + [[ mem_{{ I }} ]] + inherit = ENS + {% endfor %} + +Here's the result: + +.. code-block:: bash + + $ cylc list SUITE + Jinja2 Template Error + 'FIRST_TASK' is undefined + cylc-list foo failed: 1 + + $ cylc list --set FIRST_TASK=bob foo + bob + baz + mem_2 + mem_1 + mem_0 + + $ cylc list --set FIRST_TASK=bob --set LAST_TASK=alice foo + bob + alice + mem_2 + mem_1 + mem_0 + + $ cylc list --set FIRST_TASK=bob --set N_MEMBERS=10 foo + mem_9 + mem_8 + mem_7 + mem_6 + mem_5 + mem_4 + mem_3 + mem_2 + mem_1 + mem_0 + baz + bob + +Note also that ``cylc view --set FIRST_TASK=bob --jinja2 SUITE`` +will show the suite with the Jinja2 variables as set. + +.. note:: + + Suites started with template variables set on the command + line will *restart* with the same settings. However, you can set + them again on the ``cylc restart`` command line if they need to + be overridden. + + +Jinja2 Variable Scope +^^^^^^^^^^^^^^^^^^^^^ + +Jinja2 variable scoping rules may be surprising. Variables set inside a +*for loop* block, for instance, are not accessible outside of the block, +so the following will print ``# FOO is 0``, not ``# FOO is 9``: + +.. code-block:: cylc + + {% set FOO = false %} + {% for item in items %} + {% if item.check_something() %} + {% set FOO = true %} + {% endif %} + {% endfor %} + # FOO is {{FOO}} + +Jinja2 documentation suggests using alternative constructs like the loop else +block or the special ``loop`` variable. More complex use cases can be +handled using ``namespace`` objects which allow propagating of changes +across scopes: + +.. code-block:: cylc + + {% set ns = namespace(foo=false) %} + {% for item in items %} + {% if item.check_something() %} + {% set ns.foo = true %} + {% endif %} + {% endfor %} + # FOO is {{ns.foo}} + +For detail, see +`Jinja2 Template Designer Documentation \> +Assignments `_ + + +.. _Jinja2RaisingExceptions: + +Raising Exceptions +^^^^^^^^^^^^^^^^^^ + +Cylc provides two functions for raising exceptions using Jinja2. These +exceptions are raised when the suite.rc file is loaded and will prevent a suite +from running. + +.. note:: + + These functions must be contained within ``{{`` Jinja2 + blocks as opposed to ``{%`` blocks. + + +Raise +""""" + +The "raise" function will result in an error containing the provided text. + +.. code-block:: cylc + + {% if not VARIABLE is defined %} + {{ raise('VARIABLE must be defined for this suite.') }} + {% endif %} + + +Assert +"""""" + +The "assert" function will raise an exception containing the text provided in +the second argument providing that the first argument evaluates as False. The +following example is equivalent to the "raise" example above. + +.. code-block:: cylc + + {{ assert(VARIABLE is defined, 'VARIABLE must be defined for this suite.') }} + + +Importing additional Python modules +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Jinja2 allows to gather variable and macro definitions in a separate template +that can be imported into (and thus shared among) other templates. + +.. code-block:: cylc + + {% import "suite-utils.rc" as utils %} + {% from "suite-utils.rc" import VARIABLE as ALIAS %} + {{ utils.VARIABLE is equalto(ALIAS)) }} + +Cylc extends this functionality to allow import of arbitrary Python modules. + +.. code-block:: cylc + + {% from "itertools" import product %} + [runtime] + {% for group, member in product(['a', 'b'], [0, 1, 2]) %} + [[{{group}}_{{member}}]] + {% endfor %} + +For better clarity and disambiguation Python modules can be prefixed with +``__python__``: + +.. code-block:: cylc + + {% from "__python__.itertools" import product %} + + +.. _EmPylabel: + +EmPy +^^^^ + +In addition to Jinja2, Cylc supports EmPy template processor in suite +configurations. Similarly to Jinja2, EmPy provides variables, mathematical +expressions, loop control structures, conditional logic, etc., that are +expanded to generate the final suite configuration seen by Cylc. See the +`EmPy documentation `_ for more +details on its templating features and how to use them. + +.. note:: + + EmPy is not bundled with Cylc and must be installed separately. It + should be available to Python through standard ``import em``. Please also + note that there is another Python package called "em" that provides + a conflicting module of the same name. You can run + ``cylc check-software`` command to check your installation. + +The need for EmPy processing must be declared with a hash-bang comment as +the first line of the suite.rc file: + +.. code-block:: cylc + + #!empy + # ... + +An example suite ``empy.cities`` demonstrating its use is shown below. +It is a translation of ``jinja2.cities`` example from +:ref:`Jinja` and can be directly compared against it. + +.. literalinclude:: ../../etc/examples/empy/cities/suite.rc + :language: cylc + +For basic usage the difference between Jinja2 and EmPy amounts to a different +markup syntax with little else to distinguish them. EmPy might be preferable, +however, in cases where more complicated processing logic have to be +implemented. + +EmPy is a system for embedding Python expressions and statements in template +text. It makes the full power of Python language and its ecosystem easily +accessible from within the template. This might be desirable for several +reasons: + +- no need to learn different language and its idiosyncrasies just for + writing template logic +- availability of lambda functions, list and dictionary comprehensions + can make template code smaller and more readable compared to Jinja2 +- natural and straightforward integration with Python package ecosystem +- no two-language barrier between writing template logic and processing + extensions makes it easier to refactor and maintain the template code + as its complexity grows - inline pieces of Python code can be + gathered into subroutines and eventually into separate modules and + packages in a seamless manner. + + +Omitting Tasks At Runtime +^^^^^^^^^^^^^^^^^^^^^^^^^ + +It is sometimes convenient to omit certain tasks from the suite at +runtime without actually deleting their definitions from the suite. + +Defining ``[runtime]`` properties for tasks that do not appear in the suite +graph results in verbose-mode validation warnings that the tasks are +disabled. They cannot be used because the suite graph is what defines +their dependencies and valid cycle points. Nevertheless, it is legal to +leave these orphaned runtime sections in the suite configuration because it +allows you to temporarily remove tasks from the suite by simply +commenting them out of the graph. + +To omit a task from the suite at runtime but still leave it fully +defined and available for use (by insertion or ``cylc submit``) +use one or both of ``[scheduling][[special task]]`` lists, *include at +start-up* or *exclude at start-up* (documented in :ref:`IASU` +and :ref:`EASU`). Then the graph still defines the +validity of the tasks and their dependencies, but they are not actually +loaded into the suite at start-up. Other tasks that depend on the +omitted ones, if any, will have to wait on their insertion at a later +time or otherwise be triggered manually. + +Finally, with Jinja2 (:ref:`Jinja`) you can radically alter +suite structure by including or excluding tasks from the ``[scheduling]`` +and ``[runtime]`` sections according to the value of a single logical flag +defined at the top of the suite. + + +Naked Dummy Tasks And Strict Validation +--------------------------------------- + +A *naked dummy task* appears in the suite graph but has no +explicit runtime configuration section. Such tasks automatically +inherit the default "dummy task" configuration from the root +namespace. This is very useful because it allows functional suites to +be mocked up quickly for test and demonstration purposes by simply +defining the graph. It is somewhat dangerous, however, because there +is no way to distinguish an intentional naked dummy task from one +generated by typographic error: misspelling a task name in the graph +results in a new naked dummy task replacing the intended task in the +affected trigger expression; and misspelling a task name in a runtime +section heading results in the intended task becoming a dummy task +itself (by divorcing it from its intended runtime config section). + +To avoid this problem any dummy task used in a real suite should not be +naked - i.e. it should have an explicit entry in under the runtime +section of the suite configuration, even if the section is empty. This +results in exactly the same dummy task behaviour, via implicit +inheritance from root, but it allows use of +``cylc validate --strict`` +to catch errors in task names by failing the suite if any naked dummy +tasks are detected. + + +.. [1] An OR operator on the right doesn't make much sense: if "B or C" + triggers off A, what exactly should cylc do when A finishes? +.. [2] In NWP forecast analysis suites parts of the observation + processing and data assimilation subsystem will typically also + depend on model background fields generated by the previous forecast. diff --git a/doc/src/suite-design-guide/Makefile b/doc/src/suite-design-guide/Makefile deleted file mode 100644 index 26d48f05792..00000000000 --- a/doc/src/suite-design-guide/Makefile +++ /dev/null @@ -1,29 +0,0 @@ -#!/usr/bin/make -f - -# THIS FILE IS PART OF THE CYLC SUITE ENGINE. -# Copyright (C) 2008-2018 NIWA & British Crown (Met Office) & Contributors. -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . - -.PHONY: all clean - -all: document.pdf - -document.pdf: *.tex - - pdflatex document.tex - - pdflatex document.tex - - pdflatex document.tex - -clean: - rm -f *.aux *.out *.toc *.log *.pdf diff --git a/doc/src/suite-design-guide/document.tex b/doc/src/suite-design-guide/document.tex deleted file mode 100644 index d74d896fdc2..00000000000 --- a/doc/src/suite-design-guide/document.tex +++ /dev/null @@ -1,11 +0,0 @@ -\include{preamble} -\begin{document} -\include{title-page} -\tableofcontents -\include{introduction} -\include{style-guide} -\include{general-principles} -\include{efficiency} -\include{portable-suites} -\include{roadmap} -\end{document} diff --git a/doc/src/suite-design-guide/efficiency.rst b/doc/src/suite-design-guide/efficiency.rst new file mode 100644 index 00000000000..a8c35680841 --- /dev/null +++ b/doc/src/suite-design-guide/efficiency.rst @@ -0,0 +1,368 @@ +.. _Efficiency And Maintainability: + +Efficiency And Maintainability +============================== + +Efficiency (in the sense of *economy of suite definition*) and +maintainability go hand in hand. This section describes techniques for clean +and efficient construction of complex workflows that are easy to understand, +maintain, and modify. + + +.. _The Task Family Hierarchy: + +The Task Family Hierarchy +------------------------- + +A properly designed family hierarchy fulfils three purposes in Cylc: + +- efficient sharing of all configuration common to groups of related + tasks +- efficient bulk triggering, for clear scheduling graphs +- clean suite visualization and monitoring, because families are + collapsible in the GUIs + + +.. _Sharing By Inheritance: + +Sharing By Inheritance +^^^^^^^^^^^^^^^^^^^^^^ + +Duplication is a maintenance risk because changes have to be repeated in +multiple places without mistakes. On the other hand, unnecessary sharing of +items via global variables is also bad because it is hard to be sure which +tasks are using which variables. A properly designed runtime inheritance +hierarchy can give every task exactly what it needs, and nothing that it +doesn't need. + +If a group of related tasks has some configuration in common, it can be +factored out into a task family inherited by all. + +.. code-block:: cylc + + [runtime] + [[OBSPROC]] + # Settings common to all obs processing tasks. + [[obs1]] + inherit = OBSPROC + [[obs2]] + inherit = OBSPROC + +If several families have settings in common, they can in turn can inherit +from higher-level families. + +Multiple inheritance allows efficient sharing even for overlapping categories +of tasks. For example consider that some obs processing tasks in the following +suite run parallel jobs and some serial: + +.. code-block:: cylc + + [runtime] + [[SERIAL]] + # Serial job settings. + [[PARALLEL]] + # Parallel job settings. + [[OBSPROC]] + # Settings for all obs processing tasks. + [[obs1, obs2, obs3]] + # Serial obs processing tasks. + inherit = OBSPROC, SERIAL + [[obs4, obs5]] + # Parallel obs processing tasks. + inherit = OBSPROC, PARALLEL + +Note that suite parameters should really be used to define family members +efficiently - see :ref:`Generating Tasks`. + +Cylc provides tools to help make sense of your inheritance hierarchy: + +- ``cylc graph -n/--namespaces`` - plot the full multiple + inheritance graph (not the dependency graph) +- ``cylc get-config SUITE`` - print selected sections or items + after inheritance processing +- ``cylc graph SUITE`` - plot the dependency graph, with + collapsible first-parent families + (see :ref:`Task Families And Visualization`) +- ``cylc list -t/--tree SUITE`` - print the first-parent + inheritance hierarchy +- ``cylc list -m/--mro SUITE`` - print the inheritance + precedence order for each runtime namespace + + +Family Triggering +^^^^^^^^^^^^^^^^^ + +Task families can be used to simplify the scheduling graph wherever many +tasks need to trigger at once: + +.. code-block:: cylc + + [scheduling] + [[dependencies]] + graph = pre => MODELS + [runtime] + [[MODELS]] + [[model1, model2, model3, ...]] + inherit = MODELS + +To trigger *off of* many tasks at once, family names need to be qualified +by ``-all`` or ``-any`` to indicate the desired +member-triggering semantics: + +.. code-block:: cylc + + [scheduling] + [[dependencies]] + graph = """pre => MODELS + MODELS:succeed-all => post""" + +Note that this can be simplified further because Cylc ignores trigger +qualifiers like ``:succeed-all`` on the right of trigger arrows +to allow chaining of dependencies: + +.. code-block:: cylc + + [scheduling] + [[dependencies]] + graph = pre => MODELS:succeed-all => post + + +Family-to-Family Triggering +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. code-block:: cylc + + [scheduling] + [[dependencies]] + graph = BIG_FAM_1:succeed-all => BIG_FAM_2 + +This means every member of ``BIG_FAM_2`` depends on every member +of ``BIG_FAM_1`` succeeding. For very large families this can create so +many dependencies that it affects the performance of Cylc at run time, as +well as cluttering graph visualizations with unnecessary edges. Instead, +interpose a dummy task that signifies completion of the first family: + +.. code-block:: cylc + + [scheduling] + [[dependencies]] + graph = BIG_FAM_1:succeed-all => big_fam_1_done => BIG_FAM_2 + +For families with ``M`` and ``N`` members respectively, this +reduces the number of dependencies from ``M*N`` to ``M+N`` +without affecting the scheduling. + +.. image:: ../graphics/png/orig/fam-to-fam-1.png + +.. image:: ../graphics/png/orig/fam-to-fam-2.png + + +.. _Task Families And Visualization: + +Task Families And Visualization +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +*First parents* in the inheritance hierarchy double as collapsible summary +groups for visualization and monitoring. Tasks should generally be grouped into +visualization families that reflect their logical purpose in the suite rather +than technical detail such as inherited job submission or host settings. So in +the example under :ref:`Sharing By Inheritance` above all +``obs`` tasks collapse into ``OBSPROC`` but not into +``SERIAL`` or ``PARALLEL``. + +If necessary you can introduce new namespaces just for visualization: + +.. code-block:: cylc + + [runtime] + [[MODEL]] + # (No settings here - just for visualization). + [[model1, model2]] + inherit = MODEL, HOSTX + [[model3, model4]] + inherit = MODEL, HOSTY + +To stop a solo parent being used in visualization, demote it to secondary with +a null parent like this: + +.. code-block:: cylc + + [runtime] + [[SERIAL]] + [[foo]] + # Inherit settings from SERIAL but don't use it in visualization. + inherit = None, SERIAL + + +.. _Generating Tasks: + +Generating Tasks Automatically +------------------------------ + +Groups of tasks that are closely related such as an ensemble of model runs or +a family of obs processing tasks, or sections of workflow that are repeated +with minor variations, can be generated automatically by iterating over +some integer range (e.g. ``model`` for ``n = 1..10``) or +list of strings (e.g. ``obs`` for +``type = ship, buoy, radiosonde, ...``). + + +Jinja2 Loops +^^^^^^^^^^^^ + +Task generation was traditionally done in Cylc with explicit Jinja2 loops, +like this: + +.. code-block:: cylc + + # Task generation the old way: Jinja2 loops (NO LONGER RECOMMENDED!) + {% set PARAMS = range(1,11) %} + [scheduling] + [[dependencies]] + graph = """ + {% for P in PARAMS %} + pre => model_p{{P}} => post + {% if P == 5 %} + model_p{{P}} => check + {% endif %} + {% endfor %} """ + [runtime] + {% for P in PARAMS %} + [[model_p{{P}}]] + script = echo "my parameter value is {{P}}" + {% if P == 1 %} + # special case... + {% endif %} + {% endfor %} + +Unfortunately this makes a mess of the suite definition, particularly the +scheduling graph, and it gets worse with nested loops over multiple parameters. + +.. image:: ../graphics/png/orig/param-1.png + + +.. _SDG Parameterized Tasks: + +Parameterized Tasks +^^^^^^^^^^^^^^^^^^^ + +Cylc-6.11 introduced built-in *suite parameters* for generating tasks +without destroying the clarity of the base suite definition. Here's the same +example using suite parameters instead of Jinja2 loops: + +.. code-block:: cylc + + # Task generation the new way: suite parameters. + [cylc] + [[parameters]] + p = 1..10 + [scheduling] + [[dependencies]] + graph = """pre => model

=> post + model => check""" + [runtime] + [[model

]] + script = echo "my parameter value is ${CYLC_TASK_PARAM_p}" + [[model]] + # special case ... + +Here ``model

`` expands to ``model_p7`` for ``p=7``, +and so on, via the default expansion template for integer-valued parameters, +but custom templates can be defined if necessary. Parameters can also be +defined as lists of strings, and you can define dependencies between different +values: ``chunk => chunk

``. Here's a multi-parameter example: + +.. code-block:: cylc + + [cylc] + [[parameters]] + run = a, b, c + m = 1..5 + [scheduling] + [[dependencies]] + graph = pre => init => sim => close => post + [runtime] + [[sim]] + +.. image:: ../graphics/png/orig/param-2.png + +If family members are defined by suite parameters, then parameterized +trigger expressions are equivalent to family ``:-all`` triggers. +For example, this: + +.. code-block:: cylc + + [cylc] + [[parameters]] + n = 1..5 + [scheduling] + [[dependencies]] + graph = pre => model => post + [runtime] + [[MODELS]] + [[model]] + inherit = MODELS + +is equivalent to this: + +.. code-block:: cylc + + [cylc] + [[parameters]] + n = 1..5 + [scheduling] + [[dependencies]] + graph = pre => MODELS:succeed-all => post + [runtime] + [[MODELS]] + [[model]] + inherit = MODELS + +(but future plans for family triggering may make the second case more +efficient for very large families). + +For more information on parameterized tasks see the Cylc user guide. + + +.. _Optional App Config Files: + +Optional App Config Files +------------------------- + +Closely related tasks with few configuration differences between them - such as +multiple UM forecast and reconfiguration apps in the same suite - should use +the same Rose app configuration with the differences supplied by optional +configs, rather than duplicating the entire app for each task. + +Optional app configs should be valid on top of the main app config and not +dependent on the use of other optional app configs. This ensures they will +work correctly with macros and can therefore be upgraded automatically. + +.. note:: + + Currently optional configs don't work very well with UM STASH + configuration - see :ref:`UM STASH in Optional App Configs`. + +Optional app configs can be loaded by command line switch: + +.. code-block:: bash + + rose task-run -O key1 -O key2 + +or by environment variable: + +.. code-block:: bash + + ROSE_APP_OPT_CONF_KEYS = key1 key2 + +The environment variable is generally preferred in suites because you don't +have to repeat and override the root-level script configuration: + +.. code-block:: cylc + + [runtime] + [[root]] + script = rose task-run -v + [[foo]] + [[[environment]]] + ROSE_APP_OPT_CONF_KEYS = key1 key2 diff --git a/doc/src/suite-design-guide/efficiency.tex b/doc/src/suite-design-guide/efficiency.tex deleted file mode 100644 index 252904ff0c2..00000000000 --- a/doc/src/suite-design-guide/efficiency.tex +++ /dev/null @@ -1,355 +0,0 @@ -\section{Efficiency And Maintainability} -\label{Efficiency And Maintainability} - -Efficiency (in the sense of {\em economy of suite definition}) and -maintainability go hand in hand. This section describes techniques for clean -and efficient construction of complex workflows that are easy to understand, -maintain, and modify. - -\subsection{The Task Family Hierarchy} -\label{The Task Family Hierarchy} - -A properly designed family hierarchy fulfills three purposes in Cylc: - -\begin{itemize} - \item efficient sharing of all configuration common to groups of related - tasks - \item efficient bulk triggering, for clear scheduling graphs - \item clean suite visualization and monitoring, because families are - collapsible in the GUIs -\end{itemize} - -\subsubsection{Sharing By Inheritance} -\label{Sharing By Inheritance} - -Duplication is a maintenance risk because changes have to be repeated in -multiple places without mistakes. On the other hand, unnecessary sharing of -items via global variables is also bad because it is hard to be sure which -tasks are using which variables. A properly designed runtime inheritance -hierarchy can give every task exactly what it needs, and nothing that it -doesn't need. - -If a group of related tasks has some configuration in common, it can be -factored out into a task family inherited by all. - -\lstset{language=suiterc} -\begin{lstlisting} -[runtime] - [[OBSPROC]] - # Settings common to all obs processing tasks. - [[obs1]] - inherit = OBSPROC - [[obs2]] - inherit = OBSPROC -\end{lstlisting} - -If several families have settings in common, they can in turn can inherit -from higher-level families. - -Multiple inheritance allows efficient sharing even for overlapping categories -of tasks. For example consider that some obs processing tasks in the following -suite run parallel jobs and some serial: - -\lstset{language=suiterc} -\begin{lstlisting} -[runtime] - [[SERIAL]] - # Serial job settings. - [[PARALLEL]] - # Parallel job settings. - [[OBSPROC]] - # Settings for all obs processing tasks. - [[obs1, obs2, obs3]] - # Serial obs processing tasks. - inherit = OBSPROC, SERIAL - [[obs4, obs5]] - # Parallel obs processing tasks. - inherit = OBSPROC, PARALLEL -\end{lstlisting} - -Note that suite parameters should really be used to define family members -efficiently - see Section~\ref{Generating Tasks}. - -Cylc provides tools to help make sense of your inheritance hierarchy: - -\begin{itemize} - \item \lstinline=cylc graph -n/--namespaces= - plot the full multiple - inheritance graph (not the dependency graph) - \item \lstinline=cylc get-config SUITE= - print selected sections or items - after inheritance processing - \item \lstinline=cylc graph SUITE= - plot the dependency graph, with - collapsible first-parent families (see~\ref{Task Families And Visualization}) - \item \lstinline=cylc list -t/--tree SUITE= - print the first-parent - inheritance hierarchy - \item \lstinline=cylc list -m/--mro SUITE= - print the inheritance - precedence order for each runtime namespace -\end{itemize} - -\subsubsection{Family Triggering} - -Task families can be used to simplify the scheduling graph wherever many -tasks need to trigger at once: - -\lstset{language=suiterc} -\begin{lstlisting} -[scheduling] - [[dependencies]] - graph = pre => MODELS -[runtime] - [[MODELS]] - [[model1, model2, model3, ...]] - inherit = MODELS -\end{lstlisting} - -To trigger {\em off of} many tasks at once, family names need to be qualified -by \lstinline@-all@ or \lstinline@-any@ to indicate the desired -member-triggering semantics: - -\lstset{language=suiterc} -\begin{lstlisting} -[scheduling] - [[dependencies]] - graph = """pre => MODELS - MODELS:succeed-all => post""" -\end{lstlisting} - -Note that this can be simplified further because Cylc ignores trigger -qualifiers like \lstinline=:succeed-all= on the right of trigger arrows -to allow chaining of dependencies: - -\lstset{language=suiterc} -\begin{lstlisting} -[scheduling] - [[dependencies]] - graph = pre => MODELS:succeed-all => post -\end{lstlisting} - -\subsubsection{Family-to-Family Triggering} - -\lstset{language=suiterc} -\begin{lstlisting} -[scheduling] - [[dependencies]] - graph = BIG_FAM_1:succeed-all => BIG_FAM_2 -\end{lstlisting} - -This means every member of \lstinline=BIG_FAM_2= depends on every member -of \lstinline=BIG_FAM_1= succeeding. For very large families this can create so -many dependencies that it affects the performance of Cylc at run time, as -well as cluttering graph visualizations with unnecessary edges. Instead, -interpose a dummy task that signifies completion of the first family: - -\lstset{language=suiterc} -\begin{lstlisting} -[scheduling] - [[dependencies]] - graph = BIG_FAM_1:succeed-all => big_fam_1_done => BIG_FAM_2 -\end{lstlisting} - -For families with \lstinline=M= and \lstinline=N= members respectively, this -reduces the number of dependencies from \lstinline=M*N= to \lstinline=M+N= -without affecting the scheduling. - -\includegraphics[width=\textwidth]{resources/png/fam-to-fam-1.png} -\includegraphics[width=\textwidth]{resources/png/fam-to-fam-2.png} - -\subsubsection{Task Families And Visualization} -\label{Task Families And Visualization} - -{\em First parents} in the inheritance hierarchy double as collapsible summary -groups for visualization and monitoring. Tasks should generally be grouped into -visualization families that reflect their logical purpose in the suite rather -than technical detail such as inherited job submission or host settings. So in -the example under Section~\ref{Sharing By Inheritance} above all -\lstinline=obs= tasks collapse into \lstinline=OBSPROC= but not into -\lstinline=SERIAL= or \lstinline=PARALLEL=. - -If necessary you can introduce new namespaces just for visualization: - -\lstset{language=suiterc} -\begin{lstlisting} -[runtime] - [[MODEL]] - # (No settings here - just for visualization). - [[model1, model2]] - inherit = MODEL, HOSTX - [[model3, model4]] - inherit = MODEL, HOSTY -\end{lstlisting} - -To stop a solo parent being used in visualization, demote it to secondary with -a null parent like this: - -\lstset{language=suiterc} -\begin{lstlisting} -[runtime] - [[SERIAL]] - [[foo]] - # Inherit settings from SERIAL but don't use it in visualization. - inherit = None, SERIAL -\end{lstlisting} - -\subsection{Generating Tasks Automatically} -\label{Generating Tasks} - -Groups of tasks that are closely related such as an ensemble of model runs or -a family of obs processing tasks, or sections of workflow that are repeated -with minor variations, can be generated automatically by iterating over -some integer range (e.g.\ \lstinline=model= for \lstinline@n = 1..10@) or -list of strings (e.g.\ \lstinline=obs= for -\lstinline@type = ship, buoy, radiosonde, ...@). - -\subsubsection{Jinja2 Loops} - -Task generation was traditionally done in Cylc with explicit Jinja2 loops, -like this: -\lstset{language=suiterc} -\begin{lstlisting} -# Task generation the old way: Jinja2 loops (NO LONGER RECOMMENDED!) -{% set PARAMS = range(1,11) %} -[scheduling] - [[dependencies]] - graph = """ -{% for P in PARAMS %} - pre => model_p{{P}} => post - {% if P == 5 %} - model_p{{P}} => check - {% endif %} -{% endfor %} """ -[runtime] -{% for P in PARAMS %} - [[model_p{{P}}]] - script = echo "my parameter value is {{P}}" - {% if P == 1 %} - # special case... - {% endif %} -{% endfor %} -\end{lstlisting} - -Unfortunately this makes a mess of the suite definition, particularly the -scheduling graph, and it gets worse with nested loops over multiple parameters. - -\includegraphics[width=\textwidth]{resources/png/param-1.png} - -\subsubsection{Parameterized Tasks} -\label{Parameterized Tasks} - -Cylc-6.11 introduced built-in {\em suite parameters} for generating tasks -without destroying the clarity of the base suite definition. Here's the same -example using suite parameters instead of Jinja2 loops: - -\lstset{language=suiterc} -\begin{lstlisting} -# Task generation the new way: suite parameters. -[cylc] - [[parameters]] - p = 1..10 -[scheduling] - [[dependencies]] - graph = """pre => model

=> post - model => check""" -[runtime] - [[model

]] - script = echo "my parameter value is ${CYLC_TASK_PARAM_p}" - [[model]] - # special case ... -\end{lstlisting} - -Here \lstinline@model

@ expands to \lstinline@model_p7@ for \lstinline@p=7@, -and so on, via the default expansion template for integer-valued parameters, -but custom templates can be defined if necessary. Parameters can also be -defined as lists of strings, and you can define dependencies between different -values: \lstinline@chunk => chunk

@. Here's a multi-parameter example: - -\lstset{language=suiterc} -\begin{lstlisting} -[cylc] - [[parameters]] - run = a, b, c - m = 1..5 -[scheduling] - [[dependencies]] - graph = pre => init => sim => close => post -[runtime] - [[sim]] -\end{lstlisting} - -\includegraphics[width=\textwidth]{resources/png/param-2.png} - -If family members are defined by suite parameters, then parameterized -trigger expressions are equivalent to family \lstinline=:-all= triggers. -For example, this: - -\lstset{language=suiterc} -\begin{lstlisting} -[cylc] - [[parameters]] - n = 1..5 -[scheduling] - [[dependencies]] - graph = pre => model => post -[runtime] - [[MODELS]] - [[model]] - inherit = MODELS -\end{lstlisting} - -is equivalent to this: - -\lstset{language=suiterc} -\begin{lstlisting} -[cylc] - [[parameters]] - n = 1..5 -[scheduling] - [[dependencies]] - graph = pre => MODELS:succeed-all => post -[runtime] - [[MODELS]] - [[model]] - inherit = MODELS -\end{lstlisting} - -(but future plans for family triggering may make the second case more -efficient for very large families). - -For more information on parameterized tasks see the Cylc user guide. - -\subsection{Optional App Config Files} -\label{Optional App Config Files} - -Closely related tasks with few configuration differences between them - such as -multiple UM forecast and reconfiguration apps in the same suite - should use -the same Rose app configuration with the differences supplied by optional -configs, rather than duplicating the entire app for each task. - -Optional app configs should be valid on top of the main app config and not -dependent on the use of other optional app configs. This ensures they will -work correctly with macros and can therefore be upgraded automatically. - -\note{Currently optional configs don't work very well with UM STASH - configuration - see Section~\ref{UM STASH in Optional App Configs}.} - -Optional app configs can be loaded by command line switch: - -\begin{lstlisting} -rose task-run -O key1 -O key2 -\end{lstlisting} - -or by environment variable: - -\begin{lstlisting} -ROSE_APP_OPT_CONF_KEYS = key1 key2 -\end{lstlisting} - -The environment variable is generally preferred in suites because you don't -have to repeat and override the root-level script configuration: - -\begin{lstlisting} -[runtime] - [[root]] - script = rose task-run -v - [[foo]] - [[[environment]]] - ROSE_APP_OPT_CONF_KEYS = key1 key2 -\end{lstlisting} diff --git a/doc/src/suite-design-guide/general-principles.rst b/doc/src/suite-design-guide/general-principles.rst new file mode 100644 index 00000000000..0cc1af198c7 --- /dev/null +++ b/doc/src/suite-design-guide/general-principles.rst @@ -0,0 +1,757 @@ +.. _Basic Principles: + +Basic Principles +================ + +This section covers general principles that should be kept in mind when +writing any suite. More advanced topics are covered later: +:ref:`Efficiency And Maintainability` and :ref:`Portable Suites Label`. + + +UTC Mode +-------- + +Cylc has full timezone support if needed, but real time NWP suites should use +UTC mode to avoid problems at the transition between local standard time and +daylight saving time, and to enable the same suite to run the same way in +different timezones. + +.. code-block:: cylc + + [cylc] + UTC mode = True + + +Fine Or Coarse-Grained Suites +----------------------------- + +Suites can have many small simple tasks, fewer large complex tasks, or anything +in between. A task that runs many distinct processes can be split into many +distinct tasks. The fine-grained approach is more transparent and it allows +more task level concurrency and quicker failure recovery - you can rerun just +what failed without repeating anything unnecessarily. + + +rose bunch +^^^^^^^^^^ + +One caveat to our fine-graining advice is that submitting a large number of +small tasks at once may be a problem on some platforms. If you have many +similar concurrent jobs you can use ``rose bunch`` to pack them into a +single task with incremental rerun capability: retriggering the task will rerun +just the component jobs that did not successfully complete earlier. + + +.. _Monolithic Or Interdependent Suites: + +Monolithic Or Interdependent Suites +----------------------------------- + +When writing suites from scratch you may need to decide between putting +multiple loosely connected sub-workflows into a single large suite, or +constructing a more modular system of smaller suites that depend on each other +through inter-suite triggering. Each approach has its pros and cons, depending +on your requirements and preferences with respect to the complexity and +manageability of the resulting system. + +The ``cylc gscan`` GUI lets you monitor multiple suites at a time, and +you can define virtual groups of suites that collapse into a single state +summary. + + +Inter-Suite Triggering +^^^^^^^^^^^^^^^^^^^^^^ + +A task in one suite can explicitly trigger off of a task in another suite. The +full range of possible triggering conditions is supported, including custom +message triggers. Remote triggering involves repeatedly querying ("polling") +the remote suite run database, not the suite server program, so it works even +if the other suite is down at the time. + +There is special graph syntax to support triggering off of a task in another +suite, or you can call the underlying ``cylc suite-state`` command +directly in task scripting. + +In real time suites you may want to use clock-triggers to delay the onset of +inter-suite polling until roughly the expected completion time of the remote +task. + + +.. _Self-Contained Suites: + +Self-Contained Suites +--------------------- + +All files generated by Cylc during a suite run are confined to the *suite +run directory* ``$HOME/cylc-run/``. However, Cylc has no control +over the locations of the programs, scripts, and files, that are executed, +read, or generated by your tasks at runtime. It is up to you to ensure that +all of this is confined to the suite run directory too, as far as possible. + +Self-contained suites are more robust, easier to work with, and more portable. +Multiple instances of the same suite (with different suite names) should be +able to run concurrently under the same user account without mutual +interference. + + +Avoiding External Files +^^^^^^^^^^^^^^^^^^^^^^^ + +Suites that use external scripts, executables, and files beyond the essential +system libraries and utilities are vulnerable to external changes: someone +else might interfere with these files without telling you. + +In some case you may need to symlink to large external files anyway, if space +or copy speed is a problem, but otherwise suites with private copies of all the +files they need are more robust. + + +Installing Files At Start-up +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Use ``rose suite-run`` *file creation mode* or ``R1`` +install tasks to copy files to the self-contained suite run directory at +start-up. Install tasks are preferred for time-consuming installations because +they don't slow the suite start-up process, they can be monitored in the GUI, +they can run directly on target platforms, and you can rerun them later without +restarting the suite. If you are using symbolic links to install files under +your suite directory it is recommended that the linking should be set up to +fail if the source is missing e.g. by using *mode=symlink+* for file +installation in a rose app. + + +Confining Ouput To The Run Directory +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Output files should be confined to the suite run directory tree. Then all +output is easy to find, multiple instances of the same suite can run +concurrently without interference, and other users should be able to copy and +run your suite with few modifications. Cylc provides a ``share`` +directory for generated files that are used by several tasks in a suite +(see :ref:`Shared Task IO Paths`). Archiving tasks can use ``rose arch`` +to copy or move selected files to external locations as needed (see +:ref:`Suite Housekeeping`). + + +Task Host Selection +------------------- + +At sites with multiple task hosts to choose from, use +``rose host-select`` to dynamically select appropriate task hosts +rather than hard coding particular hostnames. This enables your suite to +adapt to particular machines being down or heavily overloaded by selecting +from a group of hosts based on a series of criteria. +``rose host-select`` will only return hosts that can be contacted by +non-interactive SSH. + + +Task Scripting +-------------- + +Non-trivial task scripting should be held in external files rather than +inlined in the suite.rc. This keeps the suite definition tidy, and it +allows proper shell-mode text editing and independent testing of task scripts. + +For automatic access by task jobs, task-specific scripts should be kept in +Rose app bin directories, and shared scripts kept in (or installed to) the +suite bin directory. + + +Coding Standards +^^^^^^^^^^^^^^^^ + +When writing your own task scripts make consistent use of appropriate coding +standards such as: + +- `PEP8 for Python `_ +- `Google Shell Style Guide for + Bash `_ + + +Basic Functionality +^^^^^^^^^^^^^^^^^^^ + +In consideration of future users who may not be expert on the internals of your +suite and its tasks, all task scripts should: + +- Print clear usage information if invoked incorrectly (and via the + standard options ``-h, --help``). +- Print useful diagnostic messages in case of error. For example, if a + file was not found, the error message should contain the full path to the + expected location. +- Always return correct shell exit status - zero for success, non-zero + for failure. This is used by Cylc job wrapper code to detect success and + failure and report it back to the suite server program. +- In shell scripts use ``set -u`` to abort on any reference to + an undefined variable. If you really need an undefined variable to evaluate + to an empty string, make it explicit: ``FOO=${FOO:-}``. +- In shell scripts use ``set -e`` to abort on any error without + having to failure-check each command explicitly. +- In shell scripts use ``set -o pipefail`` to abort on any error + within a pipe line. Note that all commands in the pipe line will still + run, it will just exit with the right most non-zero exit status. + +.. note:: + + Examples and more details `are available `_ + for the above three ``set`` commands. + + +Rose Apps +--------- + +Rose apps allow all non-shared task configuration - which is not relevant to +workflow automation - to be moved from the suite definition into app config +files. This makes suites tidier and easier to understand, and it allows +``rose edit`` to provide a unified metadata-enhanced view of the suite +and its apps (see :ref:`Rose Metadata Compliance`). + +Rose apps are a clear winner for tasks with complex configuration requirements. +It matters less for those with little configuration, but for consistency and to +take full advantage of ``rose edit`` it makes sense to use Rose apps +for most tasks. + +When most tasks are Rose apps, set the app-run command as a root-level default, +and override it for the occasional non Rose app task: + +.. code-block:: cylc + + [runtime] + [[root]] + script = rose task-run -v + [[rose-app1]] + #... + [[rose-app2]] + #... + [[hello-world]] # Not a Rose app. + script = echo "Hello World" + + +.. _Rose Metadata Compliance: + +Rose Metadata Compliance +------------------------ + +Rose metadata drives page layout and sort order in ``rose edit``, plus +help information, input validity checking, macros for advanced checking and app +version upgrades, and more. + +To ensure the suite and its constituent applications are being run as intended +it should be valid against any provided metadata: launch the +``rose edit`` GUI or run ``rose macro --validate`` on the +command line to highlight any errors, and correct them prior to use. If errors +are flagged incorrectly you should endeavour to fix the metadata. + +When writing a new suite or application, consider creating metadata to +facilitate ease of use by others. + + +Task Independence +----------------- + +Essential dependencies must be encoded in the suite graph, but +tasks should not rely unnecessarily on the action of other tasks. +For example, tasks should create their own output directories if they don't +already exist, even if they would normally be created by an earlier task +in the workflow. This makes it is easier to run tasks alone during +development and testing. + + +.. _Clock-Triggered Tasks: + +Clock-Triggered Tasks +--------------------- + +Tasks that wait on real time data should use clock-triggers to delay job +submission until the expected data arrival time: + +.. code-block:: cylc + + [scheduling] + initial cycle point = now + [[special tasks]] + # Trigger 5 min after wall-clock time is equal to cycle point. + clock-trigger = get-data(PT5M) + [[dependencies]] + [[[T00]]] + graph = get-data => process-data + +Clock-triggered tasks typically have to handle late data arrival. Task +execution *retry delays* can be used to simply retrigger the task at +intervals until the data is found, but frequently retrying small tasks probably +should not go to a batch scheduler, and multiple task failures will be logged +for what is a essentially a normal condition (at least it is normal until the +data is really late). + +Rather than using task execution retry delays to repeatedly trigger a task that +checks for a file, it may be better to have the task itself repeatedly poll for +the data (see :ref:`Rose App File Polling` for example). + + +.. _Rose App File Polling: + +Rose App File Polling +--------------------- + +Rose apps have built-in polling functionality to check repeatedly for the +existence of files before executing the main app. See the ``[poll]`` +section in Rose app config documentation. This is a good way to implement +check-and-wait functionality in clock-triggered tasks +(:ref:`Clock-Triggered Tasks`), for example. + +It is important to note that frequent polling may be bad for some filesystems, +so be sure to configure a reasonable interval between polls. + + +Task Execution Time Limits +-------------------------- + +Instead of setting job wall clock limits directly in batch scheduler +directives, use the ``execution time limit`` suite config item. +Cylc automatically derives the correct batch scheduler directives from this, +and it is also used to run ``background`` and ``at`` jobs via +the ``timeout`` command, and to poll tasks that haven't reported in +finished by the configured time limit. + + +.. _Restricting Suite Activity: + +Restricting Suite Activity +-------------------------- + +It may be possible for large suites to overwhelm a job host by submitting too +many jobs at once: + +- Large suites that are not sufficiently limited by real time clock + triggering or inter-cycle dependence may generate a lot of *runahead* + (this refers to Cylc's ability to run multiple cycles at once, restricted + only by the dependencies of individual tasks). +- Some suites may have large families of tasks whose members all + become ready at the same time. + +These problems can be avoided with *runahead limiting* and *internal +queues*, respectively. + + +.. _Runahead Limiting: + +Runahead Limiting +^^^^^^^^^^^^^^^^^ + +By default Cylc allows a maximum of three cycle points to be active at the same +time, but this value is configurable: + +.. code-block:: cylc + + [scheduling] + initial cycle point = 2020-01-01T00 + # Don't allow any cycle interleaving: + max active cycle points = 1 + + +Internal Queues +^^^^^^^^^^^^^^^ + +Tasks can be assigned to named internal queues that limit the number of members +that can be active (i.e. submitted or running) at the same time: + +.. code-block:: cylc + + [scheduling] + initial cycle point = 2020-01-01T00 + [[queues]] + # Allow only 2 members of BIG_JOBS to run at once: + [[[big_jobs_queue]]] + limit = 2 + members = BIG_JOBS + [[dependencies]] + [[[T00]]] + graph = pre => BIG_JOBS + [runtime] + [[BIG_JOBS]] + [[foo, bar, baz, ...]] + inherit = BIG_JOBS + + +.. _Suite Housekeeping: + +Suite Housekeeping +------------------ + +Ongoing cycling suites can generate an enormous number of output files and logs +so regular housekeeping is very important. Special housekeeping tasks, +typically the last tasks in each cycle, should be included to archive selected +important files and then delete everything at some offset from the current +cycle point. + +The Rose built-in apps ``rose_arch`` and ``rose_prune`` +provide an easy way to do this. They can be configured easily with +file-matching patterns and cycle point offsets to perform various housekeeping +operations on matched files. + + +Complex Jinja2 Code +------------------- + +The Jinja2 template processor provides general programming constructs, +extensible with custom Python filters, that can be used to *generate* the +suite definition. This makes it possible to write flexible multi-use +suites with structure and content that varies according to various input +switches. There is a cost to this flexibility however: excessive use of Jinja2 +can make a suite hard to understand and maintain. It is difficult to say +exactly where to draw the line, but we recommend erring on the side of +simplicity and clarity: write suites that are easy to understand and therefore +easy to modify for other purposes, rather than extremely complicated suites +that attempt do everything out of the box but are hard to maintain and modify. + +Note that use of Jinja2 loops for generating tasks is now deprecated in favour +of built-in parameterized tasks - see :ref:`Parameterized Tasks Label`. + + +Shared Configuration +-------------------- + +Configuration that is common to multiple tasks should be defined in one +place and used by all, rather than duplicated in each task. Duplication is +a maintenance risk because changes have to be made consistently in several +places at once. + + +Jinja2 Variables +^^^^^^^^^^^^^^^^ + +In simple cases you can share by passing a Jinja2 variable to all the tasks +that need it: + +.. code-block:: cylc + + {% set JOB_VERSION = 'A23' %} + [runtime] + [[foo]] + script = run-foo --version={{JOB_VERSION}} + [[bar]] + script = run-bar --version={{JOB_VERSION}} + + +Inheritance +^^^^^^^^^^^ + +Sharing by inheritance of task families is recommended when more than a few +configuration items are involved. + +The simplest application of inheritance is to set global defaults in the +``[[runtime]][root]`` namespace that is inherited by all tasks. +However, this should only be done for settings that really are used +by the vast majority of tasks. Over-sharing of via root, particularly of +environment variables, is a maintenance risk because it can be very +difficult to be sure which tasks are *using* which global variables. + +Any ``[runtime]`` settings can be shared - scripting, host +and batch scheduler configuration, environment variables, and so on - from +single items up to complete task or app configurations. At the latter extreme, +it is quite common to have several tasks that inherit the same complete +job configuration followed by minor task-specific additions: + +.. code-block:: cylc + + [runtime] + [[FILE-CONVERT]] + script = convert-netcdf + #... + [[convert-a]] + inherit = FILE-CONVERT + [[[environment]]] + FILE_IN = file-a + [[convert-b]] + inherit = FILE-CONVERT + [[[environment]]] + FILE_IN = file-b + +Inheritance is covered in more detail from an efficiency perspective in +:ref:`The Task Family Hierarchy`. + + +.. _Shared Task IO Paths: + +Shared Task IO Paths +^^^^^^^^^^^^^^^^^^^^ + +If one task uses files generated by another task (and both see the same +filesystem) a common IO path should normally be passed to both tasks via a +shared environment variable. As far as Cylc is concerned this is no different +to other shared configuration items, but there are some additional aspects +of usage worth addressing here. + +Primarily, for self-containment (see :ref:`Self-Contained Suites`) shared IO +paths should be under the *suite share directory*, the location of which is +passed to all tasks as ``$CYLC_SUITE_SHARE_PATH``. + +The ``rose task-env`` utility can provide additional environment +variables that refer to static and cyclepoint-specific locations under the +suite share directory. + +.. code-block:: cylc + + [runtime] + [[my-task]] + env-script = $(eval rose task-env -T P1D -T P2D) + +For a current cycle point of ``20170105`` this will make the following +variables available to tasks: + +.. code-block:: bash + + ROSE_DATA=$CYLC_SUITE_SHARE_PATH/data + ROSE_DATAC=$CYLC_SUITE_SHARE_PATH/cycle/20170105 + ROSE_DATACP1D=$CYLC_SUITE_SHARE_PATH/cycle/20170104 + ROSE_DATACP2D=$CYLC_SUITE_SHARE_PATH/cycle/20170103 + +Subdirectories of ``$ROSE_DATAC`` etc. should be agreed between +different sub-systems of the suite; typically they are named for the +file-generating tasks, and the file-consuming tasks should know to look there. + +The share-not-duplicate rule can be relaxed for shared files whose names are +agreed by convention, so long as their locations under the share directory are +proper shared suite variables. For instance the Unified Model uses a large +number of files whose conventional names (``glu_snow``, for example) +can reasonably be expected not to change, so they are typically hardwired into +app configurations (as ``$ROSE_DATA/glu_snow``, for example) to avoid +cluttering the suite definition. + +Here two tasks share a workspace under the suite share directory +by inheritance: + +.. code-block:: cylc + + # Sharing an I/O location via inheritance. + [scheduling] + [[dependencies]] + graph = write_data => read_data + [runtime] + [[root]] + env-script = $(eval rose task-env) + [[WORKSPACE]] + [[[environment]]] + DATA_DIR = ${ROSE_DATA}/png + [[write_data]] + inherit = WORKSPACE + script = """ + mkdir -p $DATA_DIR + write-data.exe -o ${DATA_DIR}""" + [[read_data]] + inherit = WORKSPACE + script = read-data.exe -i ${DATA_DIR} + +In simple cases where an appropriate family does not already exist paths can +be shared via Jinja variables: + +.. code-block:: cylc + + # Sharing an I/O location with Jinja2. + {% set DATA_DIR = '$ROSE_DATA/stuff' %} + [scheduling] + [[dependencies]] + graph = write_data => read_data + [runtime] + [[write_data]] + script = """ + mkdir -p {{DATA_DIR}} + write-data.exe -o {{DATA_DIR}}""" + [[read_data]] + script = read-data.exe -i {{DATA_DIR}} + +For completeness we note that it is also possible to configure multiple tasks +to use the same work directory so they can all share files in ``$PWD``. +(Cylc executes task jobs in special work directories that by default are unique +to each task). This may simplify the suite slightly, and it may be useful if +you are unfortunate enough to have executables that are designed for IO in +``$PWD``, *but it is not recommended*. There is a higher risk +of interference between tasks; it will break ``rose task-run`` +incremental file creation mode; and ``rose task-run --new`` will in +effect delete the work directories of tasks other than its intended target. + +.. code-block:: cylc + + # Shared work directory: tasks can read and write in $PWD - use with caution! + [scheduling] + initial cycle point = 2018 + [[dependencies]] + [[[P1Y]]] + graph = write_data => read_data + [runtime] + [[WORKSPACE]] + work sub-directory = $CYLC_TASK_CYCLE_POINT/datadir + [[write_data]] + inherit = WORKSPACE + script = write-data.exe + [[read_data]] + inherit = WORKSPACE + script = read-data.exe + + +Varying Behaviour By Cycle Point +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +To make a cycling job behave differently at different cycle points you +*could* use a single task with scripting that reacts to the cycle point it finds +itself running at, but it is better to use different tasks (in different +cycling sections) that inherit the same base job configuration. This results +in a more transparent suite that can be understood just by inspecting the +graph: + +.. code-block:: cylc + + # Run the same job differently at different cycle points. + [scheduling] + initial cycle point = 2020-01-01T00 + [[dependencies]] + [[[T00]]] + graph = pre => long_fc => post + [[[T12]]] + graph = pre => short_fc => post + [runtime] + [[MODEL]] + script = run-model.sh + [[long_fc]] + inherit = MODEL + [[[job]]] + execution time limit = PT30M + [[[environment]]] + RUN_LEN = PT48H + [[short_fc]] + inherit = MODEL + [[[job]]] + execution time limit = PT10M + [[[environment]]] + RUN_LEN = PT12H + +The few differences between ``short_fc`` and ``long_fc``, +including batch scheduler resource requests, can be configured after common +settings are inherited. + +At Start-Up +^^^^^^^^^^^ + +Similarly, if a cycling job needs special behaviour at the initial (or any +other) cycle point, just use a different logical task in an ``R1`` graph and +have it inherit the same job as the general cycling task, not a single task +with scripting that behaves differently if it finds itself running at the +initial cycle point. + + +Automating Failure Recovery +--------------------------- + + +Job Submission Retries +^^^^^^^^^^^^^^^^^^^^^^ + +When submitting jobs to a remote host, use job submission retries to +automatically resubmit tasks in the event of network outages. Note this is +distinct from job retries for job execution failure (just below). + +Job submission retries should normally be host (or host-group for +``rose host-select``) specific, not task-specific, so configure them in +a host (or host-group) specific family. The following suite.rc fragment +configures all HPC jobs to retry on job submission failure up to 10 +times at 1 minute intervals, then another 5 times at 1 hour intervals: + +.. code-block:: cylc + + [runtime] + [[HPC]] # Inherited by all jobs submitted to HPC. + [[[job]]] + submission retry delays = 10*PT1M, 5*PT1H + + +Job Execution Retries +^^^^^^^^^^^^^^^^^^^^^ + +Automatic retry on job execution failure is useful if you have good reason to +believe that a simple retry will usually succeed. This may be the case if the +job host is known to be flaky, or if the job only ever fails for one known +reason that can be fixed on a retry. For example, if a model fails occasionally +with a numerical instability that can be remedied with a short timestep rerun, +then an automatic retry may be appropriate: + +.. code-block:: cylc + + [runtime] + [[model]] + script = """ + if [[ $CYLC_TASK_TRY_NUMBER > 1 ]]; then + SHORT_TIMESTEP=true + else + SHORT_TIMESTEP=false + fi + model.exe""" + [[[job]]] + execution retry delays = 1*PT0M + + +Failure Recovery Workflows +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +For recovery from failures that require explicit diagnosis you can configure +alternate routes through the workflow, together with *suicide triggers* +that remove the unused route. In the following example, if the model fails a +diagnosis task will trigger; if it determines the cause of the failure is a +known numerical instability (e.g. by parsing model job logs) it will succeed, +triggering a short timestep run. Postprocessing can proceed from either the +original or the short-step model run, and suicide triggers remove the unused +path from the workflow: + +.. Need to use a 'container' directive to get centered image with + left-aligned caption (as required for code block text). + +.. _fig-failure-rec: + +.. container:: twocol + + .. container:: image + + .. figure:: ../graphics/png/orig/failure-recovery.png + :align: center + + .. container:: caption + + .. code-block:: cylc + + [scheduling] + [[dependencies]] + graph = """ + model | model_short => postproc + model:fail => diagnose => model_short + # Clean up with suicide triggers: + model => ! diagnose & ! model_short + model_short => ! model""" + + +Include Files +------------- + +Include-files should not be overused, but they can sometimes be useful +(e.g. see :ref:`Portable Suites Label`): + +.. code-block:: cylc + + #... + {% include 'inc/foo.rc' %} + +(Technically this inserts a Jinja2-rendered file template). Cylc also has a +native include mechanism that pre-dates Jinja2 support and literally inlines +the include-file: + +.. code-block:: cylc + + #... + %include 'inc/foo.rc' + +The two methods normally produce the same result, but use the Jinja2 version if +you need to construct an include-file name from a variable (because Cylc +include-files get inlined before Jinja2 processing is done): + +.. code-block:: cylc + + #... + {% include 'inc/' ~ SITE ~ '.rc' %} diff --git a/doc/src/suite-design-guide/general-principles.tex b/doc/src/suite-design-guide/general-principles.tex deleted file mode 100644 index 808d0b18a6c..00000000000 --- a/doc/src/suite-design-guide/general-principles.tex +++ /dev/null @@ -1,692 +0,0 @@ -\section{Basic Principles} -\label{Basic Principles} - -This section covers general principles that should be kept in mind when writing -any suite. More advanced topics are covered later: {\em Efficiency And -Maintainability} (section~\ref{Efficiency And Maintainability}) and {\em -Portable Suites} (section~\ref{Portable Suites}). - -\subsection{UTC Mode} - -Cylc has full timezone support if needed, but real time NWP suites should use -UTC mode to avoid problems at the transition between local standard time and -daylight saving time, and to enable the same suite to run the same way in -different timezones. - -\lstset{language=suiterc} -\begin{lstlisting} -[cylc] - UTC mode = True -\end{lstlisting} -\subsection{Fine Or Coarse-Grained Suites} - -Suites can have many small simple tasks, fewer large complex tasks, or anything -in between. A task that runs many distinct processes can be split into many -distinct tasks. The fine-grained approach is more transparent and it allows -more task level concurrency and quicker failure recovery - you can rerun just -what failed without repeating anything unnecessarily. - -\subsubsection{rose bunch} - -One caveat to our fine-graining advice is that submitting a large number of -small tasks at once may be a problem on some platforms. If you have many -similar concurrent jobs you can use \lstinline=rose bunch= to pack them into a -single task with incremental rerun capability: retriggering the task will rerun -just the component jobs that did not successfully complete earlier. - -\subsection{Monolithic Or Interdependent Suites} -\label{Monolithic Or Interdependent Suites} - -When writing suites from scratch you may need to decide between putting -multiple loosely connected sub-workflows into a single large suite, or -constructing a more modular system of smaller suites that depend on each other -through inter-suite triggering. Each approach has its pros and cons, depending -on your requirements and preferences with respect to the complexity and -manageability of the resulting system. - -The \lstinline=cylc gscan= GUI lets you monitor multiple suites at a time, and -you can define virtual groups of suites that collapse into a single state -summary. - -\subsubsection{Inter-Suite Triggering} - -A task in one suite can explicitly trigger off of a task in another suite. The -full range of possible triggering conditions is supported, including custom -message triggers. Remote triggering involves repeatedly querying (``polling'') -the remote suite run database, not the suite server program, so it works even -if the other suite is down at the time. - -There is special graph syntax to support triggering off of a task in another -suite, or you can call the underlying \lstinline=cylc suite-state= command -directly in task scripting. - -In real time suites you may want to use clock-triggers to delay the onset of -inter-suite polling until roughly the expected completion time of the remote -task. - -\subsection{Self-Contained Suites} -\label{Self-Contained Suites} - -All files generated by Cylc during a suite run are confined to the {\em suite -run directory} \lstinline=$HOME/cylc-run/=. However, Cylc has no control -over the locations of the programs, scripts, and files, that are executed, -read, or generated by your tasks at runtime. It is up to you to ensure that -all of this is confined to the suite run directory too, as far as possible. - -Self-contained suites are more robust, easier to work with, and more portable. -Multiple instances of the same suite (with different suite names) should be -able to run concurrently under the same user account without mutual -interference. - -\subsubsection{Avoiding External Files} - -Suites that use external scripts, executables, and files beyond the essential -system libraries and utilities are vulnerable to external changes: someone -else might interfere with these files without telling you. - -In some case you may need to symlink to large external files anyway, if space -or copy speed is a problem, but otherwise suites with private copies of all the -files they need are more robust. - -\subsubsection{Installing Files At Start-up} - -Use \lstinline=rose suite-run= {\em file creation mode} or \lstinline=R1= -install tasks to copy files to the self-contained suite run directory at -start-up. Install tasks are preferred for time-consuming installations because -they don't slow the suite start-up process, they can be monitored in the GUI, -they can run directly on target platforms, and you can rerun them later without -restarting the suite. If you are using symbolic links to install files under -your suite directory it is recommended that the linking should be set up to -fail if the source is missing e.g. by using {\em mode=symlink+} for file -installation in a rose app. - -\subsubsection{Confining Ouput To The Run Directory} - -Output files should be confined to the suite run directory tree. Then all -output is easy to find, multiple instances of the same suite can run -concurrently without interference, and other users should be able to copy and -run your suite with few modifications. Cylc provides a \lstinline@share@ -directory for generated files that are used by several tasks in a suite -(see~\ref{Shared Task IO Paths}). Archiving tasks can use \lstinline=rose arch= -to copy or move selected files to external locations as needed (see~\ref{Suite -Housekeeping}). - -\subsection{Task Host Selection} - -At sites with multiple task hosts to choose from, use -\lstinline=rose host-select= to dynamically select appropriate task hosts -rather than hard coding particular hostnames. This enables your suite to -adapt to particular machines being down or heavily overloaded by selecting -from a group of hosts based on a series of criteria. -\lstinline=rose host-select= will only return hosts that can be contacted by -non-interactive SSH. - -\subsection{Task Scripting} - -Non-trivial task scripting should be held in external files rather than -inlined in the suite.rc. This keeps the suite definition tidy, and it -allows proper shell-mode text editing and independent testing of task scripts. - -For automatic access by task jobs, task-specific scripts should be kept in -Rose app bin directories, and shared scripts kept in (or installed to) the -suite bin directory. - -\subsubsection{Coding Standards} - -When writing your own task scripts make consistent use of appropriate coding -standards such as: - -\begin{itemize} - \item PEP8 for Python - \url{https://www.python.org/dev/peps/pep-0008/} - \item Google Shell Style Guide for Bash - - \url{https://google.github.io/styleguide/shell.xml} -\end{itemize} - -\subsubsection{Basic Functionality} - -In consideration of future users who may not be expert on the internals of your -suite and its tasks, all task scripts should: - -\begin{itemize} - \item Print clear usage information if invoked incorrectly (and via the - standard options \lstinline=-h, --help=). - \item Print useful diagnostic messages in case of error. For example, if a - file was not found, the error message should contain the full path to the - expected location. - \item Always return correct shell exit status - zero for success, non-zero - for failure. This is used by Cylc job wrapper code to detect success and - failure and report it back to the suite server program. - \item In shell scripts use \lstinline=set -u= to abort on any reference to - an undefined variable. If you really need an undefined variable to evaluate - to an empty string, make it explicit: \lstinline@FOO=${FOO:-}@. - \item In shell scripts use \lstinline=set -e= to abort on any error without - having to failure-check each command explicitly. - \item In shell scripts use \lstinline=set -o pipefail= to abort on any error - within a pipe line. Note that all commands in the pipe line will still - run, it will just exit with the right most non-zero exit status. - \item For examples and more details on the above three \lstinline=set= commands, see - \url{https://vaneyckt.io/posts/safer_bash_scripts_with_set_euxo_pipefail/} - -\end{itemize} - - -\subsection{Rose Apps} - -Rose apps allow all non-shared task configuration - which is not relevant to -workflow automation - to be moved from the suite definition into app config -files. This makes suites tidier and easier to understand, and it allows -\lstinline=rose edit= to provide a unified metadata-enhanced view of the suite -and its apps (see~\ref{Rose Metadata Compliance}). - -Rose apps are a clear winner for tasks with complex configuration requirements. -It matters less for those with little configuration, but for consistency and to -take full advantage of \lstinline=rose edit= it makes sense to use Rose apps -for most tasks. - -When most tasks are Rose apps, set the app-run command as a root-level default, -and override it for the occasional non Rose app task: - -\lstset{language=suiterc} -\begin{lstlisting} -[runtime] - [[root]] - script = rose task-run -v - [[rose-app1]] - #... - [[rose-app2]] - #... - [[hello-world]] # Not a Rose app. - script = echo "Hello World" -\end{lstlisting} - -\subsection{Rose Metadata Compliance} -\label{Rose Metadata Compliance} - -Rose metadata drives page layout and sort order in \lstinline=rose edit=, plus -help information, input validity checking, macros for advanced checking and app -version upgrades, and more. - -To ensure the suite and its constituent applications are being run as intended -it should be valid against any provided metadata: launch the -\lstinline=rose edit= GUI or run \lstinline=rose macro --validate= on the -command line to highlight any errors, and correct them prior to use. If errors -are flagged incorrectly you should endeavour to fix the metadata. - -When writing a new suite or application, consider creating metadata to -facilitate ease of use by others. - -\subsection{Task Independence} - -Essential dependencies must be encoded in the suite graph, but tasks should -not -rely unnecessarily on the action of other tasks. For example, tasks should -create their own output directories if they don't already exist, even if they -would normally be created by an earlier task in the workflow. This makes it is -easier to run tasks alone during development and testing. - - -\subsection{Clock-Triggered Tasks} -\label{Clock-Triggered Tasks} -Tasks that wait on real time data should use clock-triggers to delay job -submission until the expected data arrival time: - -\lstset{language=suiterc} -\begin{lstlisting} -[scheduling] - initial cycle point = now - [[special tasks]] - # Trigger 5 min after wall-clock time is equal to cycle point. - clock-trigger = get-data(PT5M) - [[dependencies]] - [[[T00]]] - graph = get-data => process-data -\end{lstlisting} - -Clock-triggered tasks typically have to handle late data arrival. Task -execution {\em retry delays} can be used to simply retrigger the task at -intervals until the data is found, but frequently retrying small tasks probably -should not go to a batch scheduler, and multiple task failures will be logged -for what is a essentially a normal condition (at least it is normal until the -data is really late). - -Rather than using task execution retry delays to repeatedly trigger a task that -checks for a file, it may be better to have the task itself repeatedly poll for -the data (see~\ref{Rose App File Polling} for example). - -\subsection{Rose App File Polling} -\label{Rose App File Polling} - -Rose apps have built-in polling functionality to check repeatedly for the -existence of files before executing the main app. See the \lstinline=[poll]= -section in Rose app config documentation. This is a good way to implement -check-and-wait functionality in clock-triggered tasks (\ref{Clock-Triggered -Tasks}), for example. - -It is important to note that frequent polling may be bad for some filesystems, -so be sure to configure a reasonable interval between polls. - -\subsection{Task Execution Time Limits} - -Instead of setting job wall clock limits directly in batch scheduler -directives, use the \lstinline=execution time limit= suite config item. -Cylc automatically derives the correct batch scheduler directives from this, -and it is also used to run \lstinline=background= and \lstinline=at= jobs via -the \lstinline=timeout= command, and to poll tasks that haven't reported in -finished by the configured time limit. - -\subsection{Restricting Suite Activity} -\label{Restricting Suite Activity} - -It may be possible for large suites to overwhelm a job host by submitting too -many jobs at once: - -\begin{itemize} - \item Large suites that are not sufficiently limited by real time clock - triggering or inter-cycle dependence may generate a lot of {\em runahead} - (this refers to Cylc's ability to run multiple cycles at once, restricted - only by the dependencies of individual tasks). - \item Some suites may have large families of tasks whose members all - become ready at the same time. -\end{itemize} - -These problems can be avoided with {\em runahead limiting} and {\em internal -queues}, respectively. - -\subsubsection{Runahead Limiting} -\label{Runahead Limiting} - -By default Cylc allows a maximum of three cycle points to be active at the same -time, but this value is configurable: - -\lstset{language=suiterc} -\begin{lstlisting} -[scheduling] - initial cycle point = 2020-01-01T00 - # Don't allow any cycle interleaving: - max active cycle points = 1 -\end{lstlisting} - -\subsubsection{Internal Queues} - -Tasks can be assigned to named internal queues that limit the number of members -that can be active (i.e.\ submitted or running) at the same time: - -\lstset{language=suiterc} -\begin{lstlisting} -[scheduling] - initial cycle point = 2020-01-01T00 - [[queues]] - # Allow only 2 members of BIG_JOBS to run at once: - [[[big_jobs_queue]]] - limit = 2 - members = BIG_JOBS - [[dependencies]] - [[[T00]]] - graph = pre => BIG_JOBS -[runtime] - [[BIG_JOBS]] - [[foo, bar, baz, ...]] - inherit = BIG_JOBS -\end{lstlisting} - -\subsection{Suite Housekeeping} -\label{Suite Housekeeping} - -Ongoing cycling suites can generate an enormous number of output files and logs -so regular housekeeping is very important. Special housekeeping tasks, -typically the last tasks in each cycle, should be included to archive selected -important files and then delete everything at some offset from the current -cycle point. - -The Rose built-in apps \lstinline=rose_arch= and \lstinline=rose_prune= -provide an easy way to do this. They can be configured easily with -file-matching patterns and cycle point offsets to perform various housekeeping -operations on matched files. - -\subsection{Complex Jinja2 Code} - -The Jinja2 template processor provides general programming constructs, -extensible with custom Python filters, that can be used to {\em generate} the -suite definition. This makes it possible to write flexible multi-use -suites with structure and content that varies according to various input -switches. There is a cost to this flexibility however: excessive use of Jinja2 -can make a suite hard to understand and maintain. It is difficult to say -exactly where to draw the line, but we recommend erring on the side of -simplicity and clarity: write suites that are easy to understand and therefore -easy to modify for other purposes, rather than extremely complicated suites -that attempt do everything out of the box but are hard to maintain and modify. - -Note that use of Jinja2 loops for generating tasks is now deprecated in favour -of built-in parameterized tasks - see~\ref{Parameterized Tasks}. - -\subsection{Shared Configuration} - -Configuration that is common to multiple tasks should be defined in one -place and used by all, rather than duplicated in each task. Duplication is -a maintenance risk because changes have to be made consistently in several -places at once. - -\subsubsection{Jinja2 Variables} - -In simple cases you can share by passing a Jinja2 variable to all the tasks -that need it: - -\lstset{language=suiterc} -\begin{lstlisting} -{% set JOB_VERSION = 'A23' %} -[runtime] - [[foo]] - script = run-foo --version={{JOB_VERSION}} - [[bar]] - script = run-bar --version={{JOB_VERSION}} -\end{lstlisting} - -\subsubsection{Inheritance} - -Sharing by inheritance of task families is recommended when more than a few -configuration items are involved. - -The simplest application of inheritance is to set global defaults in the -\lstinline=[[runtime]][root]= namespace that is inherited by all tasks. -However, this should only be done for settings that really are used -by the vast majority of tasks. Over-sharing of via root, particularly of -environment variables, is a maintenance risk because it can be very -difficult to be sure which tasks are {\em using} which global variables. - -Any \lstinline=[runtime]= settings can be shared - scripting, host -and batch scheduler configuration, environment variables, and so on - from -single items up to complete task or app configurations. At the latter extreme, -it is quite common to have several tasks that inherit the same complete -job configuration followed by minor task-specific additions: - -\lstset{language=suiterc} -\begin{lstlisting} -[runtime] - [[FILE-CONVERT]] - script = convert-netcdf - #... - [[convert-a]] - inherit = FILE-CONVERT - [[[environment]]] - FILE_IN = file-a - [[convert-b]] - inherit = FILE-CONVERT - [[[environment]]] - FILE_IN = file-b -\end{lstlisting} - -Inheritance is covered in more detail from an efficiency perspective in -Section~\ref{The Task Family Hierarchy}. - -\subsubsection{Shared Task IO Paths} -\label{Shared Task IO Paths} - -If one task uses files generated by another task (and both see the same -filesystem) a common IO path should normally be passed to both tasks via a -shared environment variable. As far as Cylc is concerned this is no different -to other shared configuration items, but there are some additional aspects -of usage worth addressing here. - -Primarily, for self-containment (see~\ref{Self-Contained Suites}) shared IO -paths should be under the {\em suite share directory}, the location of which is -passed to all tasks as \lstinline=$CYLC_SUITE_SHARE_PATH=. - -The \lstinline@rose task-env@ utility can provide additional environment -variables that refer to static and cyclepoint-specific locations under the -suite share directory. - -\lstset{language=suiterc} -\begin{lstlisting} -[runtime] - [[my-task]] - env-script = $(eval rose task-env -T P1D -T P2D) -\end{lstlisting} - -For a current cycle point of \lstinline=20170105= this will make the following -variables available to tasks: - -\lstset{language=sh} -\begin{lstlisting} -ROSE_DATA=$CYLC_SUITE_SHARE_PATH/data -ROSE_DATAC=$CYLC_SUITE_SHARE_PATH/cycle/20170105 -ROSE_DATACP1D=$CYLC_SUITE_SHARE_PATH/cycle/20170104 -ROSE_DATACP2D=$CYLC_SUITE_SHARE_PATH/cycle/20170103 -\end{lstlisting} - -Subdirectories of \lstinline@$ROSE_DATAC@ etc.\ should be agreed between -different sub-systems of the suite; typically they are named for the -file-generating tasks, and the file-consuming tasks should know to look there. - -The share-not-duplicate rule can be relaxed for shared files whose names are -agreed by convention, so long as their locations under the share directory are -proper shared suite variables. For instance the Unified Model uses a large -number of files whose conventional names (\lstinline=glu_snow=, for example) -can reasonably be expected not to change, so they are typically hardwired into -app configurations (as \lstinline=$ROSE_DATA/glu_snow=, for example) to avoid -cluttering the suite definition. - -Here two tasks share a workspace under the suite share directory by inheritance: - -\lstset{language=suiterc} -\begin{lstlisting} -# Sharing an I/O location via inheritance. -[scheduling] - [[dependencies]] - graph = write_data => read_data -[runtime] - [[root]] - env-script = $(eval rose task-env) - [[WORKSPACE]] - [[[environment]]] - DATA_DIR = ${ROSE_DATA}/png - [[write_data]] - inherit = WORKSPACE - script = """ -mkdir -p $DATA_DIR -write-data.exe -o ${DATA_DIR}""" - [[read_data]] - inherit = WORKSPACE - script = read-data.exe -i ${DATA_DIR} -\end{lstlisting} - -In simple cases where an appropriate family does not already exist paths can -be shared via Jinja variables: - -\lstset{language=suiterc} -\begin{lstlisting} -# Sharing an I/O location with Jinja2. -{% set DATA_DIR = '$ROSE_DATA/stuff' %} -[scheduling] - [[dependencies]] - graph = write_data => read_data -[runtime] - [[write_data]] - script = """ -mkdir -p {{DATA_DIR}} -write-data.exe -o {{DATA_DIR}}""" - [[read_data]] - script = read-data.exe -i {{DATA_DIR}} -\end{lstlisting} - -For completeness we note that it is also possible to configure multiple tasks -to use the same work directory so they can all share files in \lstinline@$PWD@. -(Cylc executes task jobs in special work directories that by default are unique -to each task). This may simplify the suite slightly, and it may be useful if -you are unfortunate enough to have executables that are designed for IO in -\lstinline@$PWD@, {\em but it is not recommended.} There is a higher risk -of interference between tasks; it will break \lstinline=rose task-run= -incremental file creation mode; and \lstinline=rose task-run --new= will in -effect delete the work directories of tasks other than its intended target. - -\lstset{language=suiterc} -\begin{lstlisting} -# Shared work directory: tasks can read and write in $PWD - use with caution! -[scheduling] - initial cycle point = 2018 - [[dependencies]] - [[[P1Y]]] - graph = write_data => read_data -[runtime] - [[WORKSPACE]] - work sub-directory = $CYLC_TASK_CYCLE_POINT/datadir - [[write_data]] - inherit = WORKSPACE - script = write-data.exe - [[read_data]] - inherit = WORKSPACE - script = read-data.exe -\end{lstlisting} - -\subsection{Varying Behaviour By Cycle Point} - -To make a cycling job behave differently at different cycle points you {\em -could} use a single task with scripting that reacts to the cycle point it finds -itself running at, but it is better to use different tasks (in different -cycling sections) that inherit the same base job configuration. This results -in a more transparent suite that can be understood just by inspecting the -graph: - -\lstset{language=suiterc} -\begin{lstlisting} -# Run the same job differently at different cycle points. -[scheduling] - initial cycle point = 2020-01-01T00 - [[dependencies]] - [[[T00]]] - graph = pre => long_fc => post - [[[T12]]] - graph = pre => short_fc => post -[runtime] - [[MODEL]] - script = run-model.sh - [[long_fc]] - inherit = MODEL - [[[job]]] - execution time limit = PT30M - [[[environment]]] - RUN_LEN = PT48H - [[short_fc]] - inherit = MODEL - [[[job]]] - execution time limit = PT10M - [[[environment]]] - RUN_LEN = PT12H -\end{lstlisting} - -The few differences between \lstinline=short_fc= and \lstinline=long_fc=, -including batch scheduler resource requests, can be configured after common -settings are inherited. - -\subsubsection{At Start-Up} - -Similarly, if a cycling job needs special behaviour at the initial (or any other) -cycle point, just use a different logical task in an \lstinline=R1= graph and -have it inherit the same job as the general cycling task, not a single task -with scripting that behaves differently if it finds itself running at the -initial cycle point. - -\subsection{Automating Failure Recovery} - -\subsubsection{Job Submission Retries} - -When submitting jobs to a remote host, use job submission retries to -automatically resubmit tasks in the event of network outages. Note this is -distinct from job retries for job execution failure (just below). - -Job submission retries should normally be host (or host-group for -\lstinline=rose host-select=) specific, not task-specific, so configure them in -a host (or host-group) specific family. The following suite.rc fragment -configures all HPC jobs to retry on job submission failure up to 10 -times at 1 minute intervals, then another 5 times at 1 hour intervals: - -\lstset{language=suiterc} -\begin{lstlisting} -[runtime] - [[HPC]] # Inherited by all jobs submitted to HPC. - [[[job]]] - submission retry delays = 10*PT1M, 5*PT1H -\end{lstlisting} - -\subsubsection{Job Execution Retries} - -Automatic retry on job execution failure is useful if you have good reason to -believe that a simple retry will usually succeed. This may be the case if the -job host is known to be flaky, or if the job only ever fails for one known -reason that can be fixed on a retry. For example, if a model fails occasionally -with a numerical instability that can be remedied with a short timestep rerun, -then an automatic retry may be appropriate: - -\lstset{language=suiterc} -\begin{lstlisting} -[runtime] - [[model]] - script = """ -if [[ $CYLC_TASK_TRY_NUMBER > 1 ]]; then - SHORT_TIMESTEP=true -else - SHORT_TIMESTEP=false -fi -model.exe""" - [[[job]]] - execution retry delays = 1*PT0M -\end{lstlisting} - -\subsubsection{Failure Recovery Workflows} - -For recovery from failures that require explicit diagnosis you can configure -alternate routes through the workflow, together with {\em suicide triggers} -that remove the unused route. In the following example, if the model fails a -diagnosis task will trigger; if it determines the cause of the failure is a -known numerical instability (e.g.\ by parsing model job logs) it will succeed, -triggering a short timestep run. Postprocessing can proceed from either the -original or the short-step model run, and suicide triggers remove the unused -path from the workflow: - -%\begin{figure}[H] -%\noindent\begin{minipage}[b]{0.65\textwidth} % -\lstset{language=suiterc} -\begin{lstlisting} -[scheduling] - [[dependencies]] - graph = """ - model | model_short => postproc - model:fail => diagnose => model_short - # Clean up with suicide triggers: - model => ! diagnose & ! model_short - model_short => ! model""" -\end{lstlisting} -%\end{minipage}\hfill -%\begin{minipage}[b]{0.15\textwidth} - \includegraphics[width=0.18\textwidth]{resources/png/failure-recovery.png} -%\end{minipage} -%\end{figure} - -\subsection{Include Files} - -Include-files should not be overused, but they can sometimes be useful -(e.g.\ see Portable Suites~\ref{Portable Suites}): - -\begin{lstlisting} -#... -{% include 'inc/foo.rc' %} -\end{lstlisting} - -(Technically this inserts a Jinja2-rendered file template). Cylc also has a -native include mechanism that pre-dates Jinja2 support and literally inlines -the include-file: - -\begin{lstlisting} -#... -%include 'inc/foo.rc' -\end{lstlisting} - -The two methods normally produce the same result, but use the Jinja2 version if -you need to construct an include-file name from a variable (because Cylc -include-files get inlined before Jinja2 processing is done): - -\begin{lstlisting} -#... -{% include 'inc/' ~ SITE ~ '.rc' %} -\end{lstlisting} - - diff --git a/doc/src/suite-design-guide/portable-suites.rst b/doc/src/suite-design-guide/portable-suites.rst new file mode 100644 index 00000000000..cbe6f983f4b --- /dev/null +++ b/doc/src/suite-design-guide/portable-suites.rst @@ -0,0 +1,514 @@ +.. _Portable Suites Label: + +Portable Suites +=============== + +A *portable* or *interoperable* suite can run "out of the box" at +different sites, or in different environments such as research and operations +within a site. For convenience we just use the term *site portability*. + +Lack of portability is a major barrier to collaborative development when +sites need to run more or less the same workflow, because it is very +difficult to translate changes manually between large, complicated suites. + +Most suites are riddled with site-specific details such as local build +configurations, file paths, host names, and batch scheduler directives, etc.; +but it is possible to cleanly factor all this out to make a portable suite. +Significant variations in workflow structure can even be accommodated quite +easily. If the site workflows are *too different*, however, you may decide +that it is appropriate for each site to maintain separate suites. + +The recommended way to do this, which we expand on below, is: + +- Put all site-specific settings in include-files loaded at the end + of a generic "core" suite definition. +- Use "optional" app config files for site-specific variations + in the core suite's Rose apps. +- (Make minimal use of inlined site switches too, if necessary). +- When referencing files, reference them within the suite structure and + use an install task to link external files in. + +The result should actually be tidier than the original in one respect: all +the messy platform-specific resource directives etc., will be hidden away in +the site include-files. + + +The Jinja2 SITE Variable +------------------------ + +First a suite Jinja2 variable called ``SITE`` should be set to the site +name, either in ``rose-suite.conf``, or in the suite definition itself +(perhaps automatically, by querying the local environment in some way). + +.. code-block:: cylc + + #!Jinja2 + {% set SITE = "niwa" %} + #... + +This will be used to select site-specific configuration, as described below. + + +Site Include-Files +------------------ + +If a section heading in a suite.rc file is repeated the items under it simply +add to or override those defined under the same section earlier in the file +(but note :ref:`List Item Override In Site Include-Files`). +For example, this task definition: + +.. code-block:: cylc + + [runtime] + [[foo]] + script = run-foo.sh + [[[remote]]] + host = hpc1.niwa.co.nz + +can equally be written like this: + +.. code-block:: cylc + + [runtime] # Part 1 (site-agnostic). + [[foo]] + script = run-foo.sh + [runtime] # Part 2 (site-specific). + [[foo]] + [[[remote]]] + host = hpc1.niwa.co.nz + +.. note:: + + If Part 2 had also defined ``script`` the new value would + override the original. It can sometimes be useful to set a widely used + default and override it in a few cases, but be aware that this can + make it more difficult to determine the origin of affected values. + +In this way all site-specific ``[runtime]`` settings, with their +respective sub-section headings, can be moved to the end of the file, and then +out into an include-file (file inclusion is essentially just literal inlining): + +.. code-block:: cylc + + #... + {% set SITE = "niwa" %} + + # Core site-agnostic settings: + #... + [runtime] + [[foo]] + script = run-foo.sh + #... + + # Site-specific settings: + {% include 'site/' ~ SITE ~ '.rc' %} + +where the site include-file ``site/niwa.rc`` contains: + +.. code-block:: cylc + + # site/niwa.rc + [runtime] + [[foo]] + [[[remote]]] + host = hpc1.niwa.co.nz + + +Site-Specific Graphs +-------------------- + +Repeated ``graph`` strings under the same graph section headings are +always additive (graph strings are the only exception to the normal repeat item +override semantics). So, for instance, this graph: + +.. code-block:: cylc + + [scheduling] + initial cycle point = 2025 + [[dependencies]] + [[[P1Y]]] + graph = "pre => model => post => niwa_archive" + +can be written like this: + +.. code-block:: cylc + + [scheduling] + initial cycle point = 2025 + [[dependencies]] + [[[P1Y]]] + graph = "pre => model => post" + [[[P1Y]]] + graph = "post => niwa_archive" + +and again, the site-specific part can be taken out to a site include-file: + +.. code-block:: cylc + + #... + {% set SITE = "niwa" %} + + # Core site-agnostic settings. + #... + [scheduling] + initial cycle point = 2025 + [[dependencies]] + [[[P1Y]]] + graph = "pre => model => post" + #... + # Site-specific settings: + {% include 'site/' ~ SITE ~ '.rc' %} + +where the site include-file ``site/niwa.rc`` contains: + +.. code-block:: cylc + + # site/niwa.rc + [scheduling] + [[dependencies]] + [[[P1Y]]] + graph = "post => niwa_archive" + +Note that the site-file graph needs to define the dependencies of the +site-specific tasks, and thus their points of connection to the core +suite - which is why the core task ``post`` appears in the graph here (if +``post`` had any site-specific runtime settings, to get it to run at +this site, they would also be in the site-file). + + +.. _Inlined Site-Switching: + +Inlined Site-Switching +---------------------- + +It may be tempting to use inlined switch blocks throughout the suite instead of +site include-files, but *this is not recommended* - it is verbose and +untidy (the greater the number of supported sites, the bigger the +mess) and it exposes all site configuration to all users: + +.. code-block:: cylc + + #... + [runtime] + [[model]] + script = run-model.sh + {# Site switch blocks not recommended:#} + {% if SITE == 'niwa' %} + [[[job]]] + batch system = loadleveler + [[[directives]]] + # NIWA Loadleveler directives... + {% elif SITE == 'metoffice' %} + [[[job]]] + batch system = pbs + [[[directives]]] + # Met Office PBS directives... + {% elif SITE == ... %} + #... + {% else %} + {{raise('Unsupported site: ' ~ SITE)}} + {% endif %} + #... + +Inlined switches can be used, however, to configure exceptional behaviour at +one site without requiring the other sites to duplicate the default behaviour. +But be wary of accumulating too many of these switches: + +.. code-block:: cylc + + # (core suite.rc file) + #... + {% if SITE == 'small' %} + {# We can't run 100 members... #} + {% set ENSEMBLE_SIZE = 25 %} + {% else %} + {# ...but everyone else can! #} + {% set ENSEMBLE_SIZE = 100 %} + {% endif %} + #... + +Inlined switches can also be used to temporarily isolate a site-specific +change to a hitherto non site-specific part of the suite, thereby avoiding the +need to update all site include-files before getting agreement from the suite +owner and collaborators. + + +Site-Specific Suite Variables +----------------------------- + +It can sometimes be useful to set site-specific values of suite variables that +aren't exposed to users via ``rose-suite.conf``. For example, consider +a suite that can run a special post-processing workflow of some kind at sites +where IDL is available. The IDL-dependence switch can be set per site like this: + +.. code-block:: cylc + + #... + {% from SITE ~ '-vars.rc' import HAVE_IDL, OTHER_VAR %} + graph = """ + pre => model => post + {% if HAVE_IDL %} + post => idl-1 => idl-2 => idl-3 + {% endif %} + """ + +where for ``SITE = niwa`` the file ``niwa-vars.rc`` contains: + +.. code-block:: cylc + + {# niwa-vars.rc #} + {% set HAVE_IDL = True %} + {% set OTHER_VAR = "the quick brown fox" %} + +Note we are assuming there are significantly fewer options (IDL or not, in this +case) than sites, otherwise the IDL workflow should just go in the site +include-files of the sites that need it. + + +Site-Specific Optional Suite Configs +------------------------------------ + +During development and testing of a portable suite you can use an optional Rose +suite config file to automatically set site-specific suite inputs and thereby +avoid the need to make manual changes every time you check out and run a new +version. The site switch itself has to be set of course, but there may be other +settings too such as model parameters for a standard local test domain. Just +put these settings in ``opt/rose-suite-niwa.conf`` (for site "niwa") +and run the suite with ``rose suite-run -O niwa``. + + +Site-Agnostic File Paths in App Configs +--------------------------------------- + +Where possible apps should be configured to reference files within the suite +structure itself rather than outside of it. This makes the apps themselves +portable and it becomes the job of the install task to ensure all required +source files are available within the suite structure e.g. via symlink into +the share directory. Additionally, by moving the responsibility of linking +files into the suite to an install task you gain the added benefit of knowing +if a file is missing at the start of a suite rather than part way into a run. + + +Site-Specific Optional App Configs +---------------------------------- + +Typically a few but not all apps will need some site customization, e.g. for +local archive configuration, local science options, or whatever. To avoid +explicit site-customization of individual task-run command lines use Rose's +built-in *optional app config* capability: + +.. code-block:: cylc + + [runtime] + [[root]] + script = rose task-run -v -O '({{SITE}})' + +Normally a missing optional app config is considered to be an error, but the +round parentheses here mean the named optional config is optional - i.e. +use it if it exists, otherwise ignore. + +With this setting in place we can simply add a ``opt/rose-app-niwa.conf`` to +any app that needs customization at ``SITE = niwa``. + + +An Example +---------- + +The following small suite is not portable because all of its tasks are +submitted to a NIWA HPC host; two task are entirely NIWA-specific in that they +respectively install files from a local database and upload products to a local +distribution system; and one task runs a somewhat NIWA-specific configuration +of a model. The remaining tasks are site-agnostic apart from local job host +and batch scheduler directives. + +.. code-block:: cylc + + [cylc] + UTC mode = True + [scheduling] + initial cycle point = 2017-01-01 + [[dependencies]] + [[[R1]]] + graph = install_niwa => preproc + [[[P1D]]] + graph = """ + preproc & model[-P1D] => model => postproc => upload_niwa + postproc => idl-1 => idl-2 => idl-3""" + [runtime] + [[root]] + script = rose task-run -v + [[HPC]] # NIWA job host and batch scheduler settings. + [[[remote]]] + host = hpc1.niwa.co.nz + [[[job]]] + batch system = loadleveler + [[[directives]]] + account_no = NWP1623 + class = General + job_type = serial # (most jobs in this suite are serial) + [[install_niwa]] # NIWA-specific file installation task. + inherit = HPC + [[preproc]] + inherit = HPC + [[model]] # Run the model on a local test domain. + inherit = HPC + [[[directives]]] # Override the serial job_type setting. + job_type = parallel + [[[environment]]] + SPEED = fast + [[postproc]] + inherit = HPC + [[upload_niwa]] # NIWA-specific product upload. + inherit = HPC + +To make this portable, refactor it into a core suite.rc file that contains the +clean site-independent workflow configuration and loads all site-specific +settings from an include-file at the end: + +.. code-block:: cylc + + # suite.rc: CORE SITE-INDEPENDENT CONFIGURATION. + {% set SITE = 'niwa' %} + {% from 'site/' ~ SITE ~ '-vars.rc' import HAVE_IDL %} + [cylc] + UTC mode = True + [scheduling] + initial cycle point = 2017-01-01 + [[dependencies]] + [[[P1D]]] + graph = """ + preproc & model[-P1D] => model => postproc + {% if HAVE_IDL %} + postproc => idl-1 => idl-2 => idl-3 + {% endif %} + """ + [runtime] + [[root]] + script = rose task-run -v -O '({{SITE}})' + [[preproc]] + inherit = HPC + [[preproc]] + inherit = HPC + [[model]] + inherit = HPC + [[[environment]]] + SPEED = fast + {% include 'site/' ~ SITE ~ '.rc' %} + +plus site files ``site/niwa-vars.rc``: + +.. code-block:: cylc + + # site/niwa-vars.rc: NIWA SITE SETTINGS FOR THE EXAMPLE SUITE. + {% set HAVE_IDL = True %} + +and ``site/niwa.rc``: + +.. code-block:: cylc + + # site/niwa.rc: NIWA SITE SETTINGS FOR THE EXAMPLE SUITE. + [scheduling] + [[dependencies]] + [[[R1]]] + graph = install_niwa => preproc + [[[P1D]]] + graph = postproc => upload_niwa + [runtime] + [[HPC]] + [[[remote]]] + host = hpc1.niwa.co.nz + [[[job]]] + batch system = loadleveler + [[[directives]]] + account_no = NWP1623 + class = General + job_type = serial # (most jobs in this suite are serial) + [[install_niwa]] # NIWA-specific file installation. + [[model]] + [[[directives]]] # Override the serial job_type setting. + job_type = parallel + [[upload_niwa]] # NIWA-specific product upload. + +and finally, an optional app config file for the local model domain: + +.. code-block:: bash + + app/model/rose-app.conf # Main app config. + app/model/opt/rose-app-niwa.conf # NIWA site settings. + +Some points to note: + +- It is straightforward to extend support to a new site by copying an + existing site file(s) and adapting it to the new job host and batch + scheduler etc. +- Batch system directives should be considered site-specific unless + all supported sites have the same batch system and the same host + architecture (including CPU clock speed and memory size etc.). +- We've assumed that all tasks run on a single HPC host at both + sites. If that's not a valid assumption the ``HPC`` family + inheritance relationships would have to become site-specific. +- Core task runtime configuration aren't needed in site files at all + if their job host and batch system settings can be defined in common + families that are (``HPC`` in this case). + + +.. _Collaborative Development Model: + +Collaborative Development Model +------------------------------- + +Official releases of a portable suite should be made from the suite trunk. + +Changes should be developed on feature branches so as not to affect other users +of the suite. + +Site-specific changes shouldn't touch the core suite.rc file, just the relevant +site include-file, and therefore should not need close scrutiny from other +sites. + +Changes to the core suite.rc file should be agreed by all stakeholders, and +should be carefully checked for effects on site include-files: + +- Changing the name of tasks or families in the core suite may break + sites that add configuration to the original runtime namespace. +- Adding new tasks or families to the core suite may require + corresponding additions to the site files. +- Deleting tasks or families from the core suite may require + corresponding parts of the site files to be removed. And also, check for + site-specific triggering off of deleted tasks or families. + +However, if the owner site has to get some changes into the trunk before all +collaborating sites have time to test them, version control will of course +protect those lagging behind from any immediate ill effects. + +When a new feature is complete and tested at the developer's site, the suite +owner should check out the branch, review and test it, and if necessary request +that other sites do the same and report back. The owner can then merge the +new feature to the trunk once satisfied. + +All planning and discussion associated with the change should be documented on +MOSRS Trac tickets associated with the suite. + + +Research-To-Operations Transition +--------------------------------- + +Under this collaborative development model it is *possible* to use the +same suite in research and operations, largely eliminating the difficult +translation between the two environments. Where appropriate, this can save +a lot of work. + +Operations-specific parts of the suite should be factored out (as for site +portability) into include-files that are only loaded in the operational +environment. Improvements and upgrades can be developed on feature branches in +the research environment. Operations staff can check out completed feature +branches for testing in the operational environment before merging to trunk or +referring back to research if problems are found. After sufficient testing the +new suite version can be deployed into operations. + +.. note:: + + This obviously glosses over the myriad complexities of the technical + and scientific testing and validation of suite upgrades; it merely describes + what is possible from a suite design and collaborative development + perspective. diff --git a/doc/src/suite-design-guide/portable-suites.tex b/doc/src/suite-design-guide/portable-suites.tex deleted file mode 100644 index 1a5f8da1513..00000000000 --- a/doc/src/suite-design-guide/portable-suites.tex +++ /dev/null @@ -1,505 +0,0 @@ -\section{Portable Suites} -\label{Portable Suites} - -A {\em portable} or {\em interoperable} suite can run ``out of the box'' at -different sites, or in different environments such as research and operations -within a site. For convenience we just use the term {\em site portability}. - -Lack of portability is a major barrier to collaborative development when -sites need to run more or less the same workflow, because it is very -difficult to translate changes manually between large, complicated suites. - -Most suites are riddled with site-specific details such as local build -configurations, file paths, host names, and batch scheduler directives, etc.; -but it is possible to cleanly factor all this out to make a portable suite. -Significant variations in workflow structure can even be accommodated quite -easily. If the site workflows are {\em too different}, however, you may decide -that it is appropriate for each site to maintain separate suites. - -The recommended way to do this, which we expand on below, is: - -\begin{itemize} - \item Put all site-specific settings in include-files loaded at the end - of a generic ``core'' suite definition. - \item Use ``optional'' app config files for site-specific variations - in the core suite's Rose apps. - \item (Make minimal use of inlined site switches too, if necessary). - \item When referencing files, reference them within the suite structure and - use an install task to link external files in. -\end{itemize} - -The result should actually be tidier than the original in one respect: all -the messy platform-specific resource directives etc., will be hidden away in -the site include-files. - -\subsection{The Jinja2 SITE Variable} - -First a suite Jinja2 variable called \lstinline=SITE= should be set to the site -name, either in \lstinline=rose-suite.conf=, or in the suite definition itself -(perhaps automatically, by querying the local environment in some way). - -\lstset{language=suiterc} -\begin{lstlisting} -#!Jinja2 -{% set SITE = "niwa" %} -#... -\end{lstlisting} - -This will be used to select site-specific configuration, as described below. - -\subsection{Site Include-Files} - -If a section heading in a suite.rc file is repeated the items under it simply -add to or override those defined under the same section earlier in the file -(but note Section~\ref{List Item Override In Site Include-Files}). -For example, this task definition: - -\begin{lstlisting} -[runtime] - [[foo]] - script = run-foo.sh - [[[remote]]] - host = hpc1.niwa.co.nz -\end{lstlisting} - -can equally be written like this: - -\begin{lstlisting} -[runtime] # Part 1 (site-agnostic). - [[foo]] - script = run-foo.sh -[runtime] # Part 2 (site-specific). - [[foo]] - [[[remote]]] - host = hpc1.niwa.co.nz -\end{lstlisting} - -(Note that if Part 2 had also defined \lstinline=script= the new value would -override the original. It can sometimes be useful to set a widely used -default and override it in a few cases, but be aware that this can make it more -difficult to determine the origin of affected values.) - -In this way all site-specific \lstinline=[runtime]= settings, with their -respective sub-section headings, can be moved to the end of the file, and then -out into an include-file (file inclusion is essentially just literal inlining): - -\begin{lstlisting} -#... -{% set SITE = "niwa" %} - -# Core site-agnostic settings: -#... -[runtime] - [[foo]] - script = run-foo.sh -#... - -# Site-specific settings: -{% include 'site/' ~ SITE ~ '.rc' %} -\end{lstlisting} - -where the site include-file \lstinline=site/niwa.rc= contains: - -\begin{lstlisting} -# site/niwa.rc -[runtime] - [[foo]] - [[[remote]]] - host = hpc1.niwa.co.nz -\end{lstlisting} - -\subsection{Site-Specific Graphs} - -Repeated \lstinline=graph= strings under the same graph section headings are -always additive (graph strings are the only exception to the normal repeat item -override semantics). So, for instance, this graph: - -\begin{lstlisting} -[scheduling] - initial cycle point = 2025 - [[dependencies]] - [[[P1Y]]] - graph = "pre => model => post => niwa_archive" -\end{lstlisting} - -can be written like this: - -\begin{lstlisting} -[scheduling] - initial cycle point = 2025 - [[dependencies]] - [[[P1Y]]] - graph = "pre => model => post" - [[[P1Y]]] - graph = "post => niwa_archive" -\end{lstlisting} - -and again, the site-specific part can be taken out to a site include-file: - -\begin{lstlisting} -#... -{% set SITE = "niwa" %} - -# Core site-agnostic settings. -#... -[scheduling] - initial cycle point = 2025 - [[dependencies]] - [[[P1Y]]] - graph = "pre => model => post" -#... -# Site-specific settings: -{% include 'site/' ~ SITE ~ '.rc' %} -\end{lstlisting} - -where the site include-file \lstinline=site/niwa.rc= contains: - -\begin{lstlisting} -# site/niwa.rc -[scheduling] - [[dependencies]] - [[[P1Y]]] - graph = "post => niwa_archive" -\end{lstlisting} - -Note that the site-file graph needs to define the dependencies of the -site-specific tasks, and thus their points of connection to the core suite -- which is why the core task \lstinline=post= appears in the graph here (if -\lstinline=post= had any site-specific runtime settings, to get it to run at -this site, they would also be in the site-file). - -\subsection{Inlined Site-Switching} -\label{Inlined Site-Switching} - -It may be tempting to use inlined switch blocks throughout the suite instead of -site include-files, but {\em this is not recommended} - it is verbose and -untidy (the greater the number of supported sites, the bigger the -mess) and it exposes all site configuration to all users: - -\lstset{language=suiterc} -\begin{lstlisting} -#... -[runtime] - [[model]] - script = run-model.sh -{# Site switch blocks not recommended:#} -{% if SITE == 'niwa' %} - [[[job]]] - batch system = loadleveler - [[[directives]]] - # NIWA Loadleveler directives... -{% elif SITE == 'metoffice' %} - [[[job]]] - batch system = pbs - [[[directives]]] - # Met Office PBS directives... -{% elif SITE == ... %} - #... -{% else %} - {{raise('Unsupported site: ' ~ SITE)}} -{% endif %} - #... -\end{lstlisting} - -Inlined switches can be used, however, to configure exceptional behaviour at -one site without requiring the other sites to duplicate the default behaviour. -But be wary of accumulating too many of these switches: - -\lstset{language=suiterc} -\begin{lstlisting} -# (core suite.rc file) -#... -{% if SITE == 'small' %} - {# We can't run 100 members... #} - {% set ENSEMBLE_SIZE = 25 %} -{% else %} - {# ...but everyone else can! #} - {% set ENSEMBLE_SIZE = 100 %} -{% endif %} -#... -\end{lstlisting} - -Inlined switches can also be used to temporarily isolate a site-specific -change to a hitherto non site-specific part of the suite, thereby avoiding the -need to update all site include-files before getting agreement from the suite -owner and collaborators. - -\subsection{Site-Specific Suite Variables} - -It can sometimes be useful to set site-specific values of suite variables that -aren't exposed to users via \lstinline=rose-suite.conf=. For example, consider -a suite that can run a special post-processing workflow of some kind at sites -where IDL is available. The IDL-dependence switch can be set per site like this: - -\lstset{language=suiterc} -\begin{lstlisting} -#... -{% from SITE ~ '-vars.rc' import HAVE_IDL, OTHER_VAR %} -graph = """ - pre => model => post -{% if HAVE_IDL %} - post => idl-1 => idl-2 => idl-3 -{% endif %} - """ -\end{lstlisting} - -where for \lstinline@SITE = niwa@ the file \lstinline=niwa-vars.rc= contains: -\lstset{language=suiterc} -\begin{lstlisting} -{# niwa-vars.rc #} -{% set HAVE_IDL = True %} -{% set OTHER_VAR = "the quick brown fox" %} -\end{lstlisting} - -Note we are assuming there are significantly fewer options (IDL or not, in this -case) than sites, otherwise the IDL workflow should just go in the site -include-files of the sites that need it. - -\subsection{Site-Specific Optional Suite Configs} - -During development and testing of a portable suite you can use an optional Rose -suite config file to automatically set site-specific suite inputs and thereby -avoid the need to make manual changes every time you check out and run a new -version. The site switch itself has to be set of course, but there may be other -settings too such as model parameters for a standard local test domain. Just -put these settings in \lstinline=opt/rose-suite-niwa.conf= (for site ``niwa'') -and run the suite with \lstinline=rose suite-run -O niwa=. - -\subsection{Site-Agnostic File Paths in App Configs} - -Where possible apps should be configured to reference files within the suite -structure itself rather than outside of it. This makes the apps themselves -portable and it becomes the job of the install task to ensure all required -source files are available within the suite structure e.g. via symlink into -the share directory. Additionally, by moving the responsibility of linking -files into the suite to an install task you gain the added benefit of knowing -if a file is missing at the start of a suite rather than part way into a run. - -\subsection{Site-Specific Optional App Configs} - -Typically a few but not all apps will need some site customization, e.g.\ for -local archive configuration, local science options, or whatever. To avoid -explicit site-customization of individual task-run command lines use Rose's -built-in {\em optional app config} capability: - -\lstset{language=suiterc} -\begin{lstlisting} -[runtime] - [[root]] - script = rose task-run -v -O '({{SITE}})' -\end{lstlisting} - -Normally a missing optional app config is considered to be an error, but the -round parentheses here mean the named optional config is optional - i.e.\ -use it if it exists, otherwise ignore. - -With this setting in place we can simply add a -\lstinline=opt/rose-app-niwa.conf= to any app that needs customization at -\lstinline@SITE = niwa@. - -\subsection{An Example} - -The following small suite is not portable because all of its tasks are -submitted to a NIWA HPC host; two task are entirely NIWA-specific in that they -respectively install files from a local database and upload products to a local -distribution system; and one task runs a somewhat NIWA-specific configuration -of a model. The remaining tasks are site-agnostic apart from local job host -and batch scheduler directives. - -\lstset{language=suiterc} -\begin{lstlisting} -[cylc] - UTC mode = True -[scheduling] - initial cycle point = 2017-01-01 - [[dependencies]] - [[[R1]]] - graph = install_niwa => preproc - [[[P1D]]] - graph = """ - preproc & model[-P1D] => model => postproc => upload_niwa - postproc => idl-1 => idl-2 => idl-3""" -[runtime] - [[root]] - script = rose task-run -v - [[HPC]] # NIWA job host and batch scheduler settings. - [[[remote]]] - host = hpc1.niwa.co.nz - [[[job]]] - batch system = loadleveler - [[[directives]]] - account_no = NWP1623 - class = General - job_type = serial # (most jobs in this suite are serial) - [[install_niwa]] # NIWA-specific file installation task. - inherit = HPC - [[preproc]] - inherit = HPC - [[model]] # Run the model on a local test domain. - inherit = HPC - [[[directives]]] # Override the serial job_type setting. - job_type = parallel - [[[environment]]] - SPEED = fast - [[postproc]] - inherit = HPC - [[upload_niwa]] # NIWA-specific product upload. - inherit = HPC -\end{lstlisting} - -To make this portable, refactor it into a core suite.rc file that contains the -clean site-independent workflow configuration and loads all site-specific -settings from an include-file at the end: - -\lstset{language=suiterc} -\begin{lstlisting} -# suite.rc: CORE SITE-INDEPENDENT CONFIGURATION. -{% set SITE = 'niwa' %} -{% from 'site/' ~ SITE ~ '-vars.rc' import HAVE_IDL %} -[cylc] - UTC mode = True -[scheduling] - initial cycle point = 2017-01-01 - [[dependencies]] - [[[P1D]]] - graph = """ -preproc & model[-P1D] => model => postproc -{% if HAVE_IDL %} - postproc => idl-1 => idl-2 => idl-3 -{% endif %} - """ -[runtime] - [[root]] - script = rose task-run -v -O '({{SITE}})' - [[preproc]] - inherit = HPC - [[preproc]] - inherit = HPC - [[model]] - inherit = HPC - [[[environment]]] - SPEED = fast -{% include 'site/' ~ SITE ~ '.rc' %} -\end{lstlisting} - -plus site files \lstinline=site/niwa-vars.rc=: - -\lstset{language=suiterc} -\begin{lstlisting} -# site/niwa-vars.rc: NIWA SITE SETTINGS FOR THE EXAMPLE SUITE. -{% set HAVE_IDL = True %} -\end{lstlisting} - -and \lstinline=site/niwa.rc=: - -\lstset{language=suiterc} -\begin{lstlisting} -# site/niwa.rc: NIWA SITE SETTINGS FOR THE EXAMPLE SUITE. -[scheduling] - [[dependencies]] - [[[R1]]] - graph = install_niwa => preproc - [[[P1D]]] - graph = postproc => upload_niwa -[runtime] - [[HPC]] - [[[remote]]] - host = hpc1.niwa.co.nz - [[[job]]] - batch system = loadleveler - [[[directives]]] - account_no = NWP1623 - class = General - job_type = serial # (most jobs in this suite are serial) - [[install_niwa]] # NIWA-specific file installation. - [[model]] - [[[directives]]] # Override the serial job_type setting. - job_type = parallel - [[upload_niwa]] # NIWA-specific product upload. -\end{lstlisting} - -and finally, an optional app config file for the local model domain: - -\lstset{language=bash} -\begin{lstlisting} -app/model/rose-app.conf # Main app config. -app/model/opt/rose-app-niwa.conf # NIWA site settings. -\end{lstlisting} - -Some points to note: - -\begin{itemize} - \item It is straightforward to extend support to a new site by copying an - existing site file(s) and adapting it to the new job host and batch - scheduler etc. - - \item Batch system directives should be considered site-specific unless - all supported sites have the same batch system and the same host - architecture (including CPU clock speed and memory size etc.). - - \item We've assumed that all tasks run on a single HPC host at both - sites. If that's not a valid assumption the \lstinline=HPC= family - inheritance relationships would have to become site-specific. - - \item Core task runtime configuration aren't needed in site files at all - if their job host and batch system settings can be defined in common - families that are (\lstinline=HPC= in this case). -\end{itemize} - - -\subsection{Collaborative Development Model} -\label{Collaborative Development Model} - -Official releases of a portable suite should be made from the suite trunk. - -Changes should be developed on feature branches so as not to affect other users -of the suite. - -Site-specific changes shouldn't touch the core suite.rc file, just the relevant -site include-file, and therefore should not need close scrutiny from other -sites. - -Changes to the core suite.rc file should be agreed by all stakeholders, and -should be carefully checked for effects on site include-files: - -\begin{itemize} - \item Changing the name of tasks or families in the core suite may break - sites that add configuration to the original runtime namespace. - \item Adding new tasks or families to the core suite may require - corresponding additions to the site files. - \item Deleting tasks or families from the core suite may require - corresponding parts of the site files to be removed. And also, check for - site-specific triggering off of deleted tasks or families. -\end{itemize} - -However, if the owner site has to get some changes into the trunk before all -collaborating sites have time to test them, version control will of course -protect those lagging behind from any immediate ill effects. - -When a new feature is complete and tested at the developer's site, the suite -owner should check out the branch, review and test it, and if necessary request -that other sites do the same and report back. The owner can then merge the -new feature to the trunk once satisfied. - -All planning and discussion associated with the change should be documented on -MOSRS Trac tickets associated with the suite. - -\subsection{Research-To-Operations Transition} - -Under this collaborative development model it is {\em possible} to use the -same suite in research and operations, largely eliminating the difficult -translation between the two environments. Where appropriate, this can save -a lot of work. - -Operations-specific parts of the suite should be factored out (as for site -portability) into include-files that are only loaded in the operational -environment. Improvements and upgrades can be developed on feature branches in -the research environment. Operations staff can check out completed feature -branches for testing in the operational environment before merging to trunk or -referring back to research if problems are found. After sufficient testing the -new suite version can be deployed into operations. - -\note{This obviously glosses over the myriad complexities of the technical - and scientific testing and validation of suite upgrades; it merely describes - what is possible from a suite design and collaborative development -perspective.} diff --git a/doc/src/suite-design-guide/preamble.tex b/doc/src/suite-design-guide/preamble.tex deleted file mode 100644 index e734eef16dc..00000000000 --- a/doc/src/suite-design-guide/preamble.tex +++ /dev/null @@ -1,87 +0,0 @@ -% ---- PREAMBLE ---- -% layout -\documentclass{article} -\usepackage[margin=3cm, headheight=1cm]{geometry} - -% Font stuff. -\renewcommand{\familydefault}{\sfdefault} % sanz-serif -\usepackage{parskip} % regular paragraph separation - -% Imports. -\usepackage[usenames]{color} -\usepackage{graphicx} -\usepackage{listings} - \usepackage{courier} -\usepackage{hyperref} -\usepackage{textcomp} - -% Hyperlinks. -\definecolor{links}{rgb}{0.1,0.1,0.6} -\hypersetup{colorlinks=true, linkcolor=links, urlcolor=links} -\urlstyle{same} - -% Headers / Footers. -\usepackage{fancyhdr} -\lhead{Rose+Cylc Suite Design Best Practice Guide} % TODO: hardcoded !!! -\rhead{ - \includegraphics[width=0.15\textwidth]{resources/png/rose-logo} - \includegraphics[width=0.15\textwidth]{resources/tex/cylc-logo}} -\pagestyle{fancy} - -% Code listings: default style. -\definecolor{keywords}{rgb}{0.8,0.4,0.0} -\definecolor{comments}{rgb}{1.0,0.3,0.5} -\definecolor{identifiers}{rgb}{0.3,0.4,0.5} -\definecolor{strings}{rgb}{0.2,0.5,0.3} -\definecolor{basic}{rgb}{0.2,0.3,0.4} -\definecolor{command}{rgb}{0.0,0.2,0.1} -\definecolor{transcr}{rgb}{0.0,0.2,0.4} -\newcommand\mysmall{\fontsize{8}{9.2}\selectfont} - -% bold for courier font: -\renewcommand{\ttdefault}{pcr} - -\lstset{ -basicstyle=\color{basic}\mysmall\ttfamily\bfseries, -identifierstyle=\color{identifiers}, -keywordstyle=\color{keywords}, -commentstyle=\color{comments}, -stringstyle=\color{strings}, -showstringspaces=false, -upquote=true, -} - -% Code listings: suite.rc language support. -\definecolor{level1}{rgb}{0.0,0.2,0.6} -\definecolor{level2}{rgb}{0.0,0.3,0.7} -\definecolor{level3}{rgb}{0.0,0.4,0.8} -\definecolor{jinja2}{rgb}{0.7,0.5,0.3} -\lstdefinelanguage{suiterc} -{ -string=[b]{"}, -sensitive=true, -comment=[l]{\#}, -morecomment=[s][\color{level1}]{[}{]}, -morecomment=[s][\color{level2}]{[[}{]]}, -morecomment=[s][\color{level3}]{[[[}{]]]}, -morecomment=[s][\color{jinja2}]{\{\%}{\%\}}, -morecomment=[s][\color{jinja2}]{\{\{}{\}\}}, -morecomment=[s][\color{jinja2}]{\{\#}{\#\}}, -} - -\definecolor{note}{rgb}{0.6,0.6,0.6} -\newcommand{\note}[1]{{\color{note}\textbf{Note:} \textit{#1}}} - -\definecolor{terminology}{rgb}{0.7,0.5,0.2} -\newcommand{\terminology}[1]{{\color{terminology} \textit{#1}}} - -\definecolor{todo}{rgb}{1.0,1.0,0.0} -\newcommand{\TODO}[1]{{\colorbox{todo}{\textbf{TODO:} \textit{#1}}}} - -\usepackage{framed} -\definecolor{shadecolor}{rgb}{0.95,0.95,0.95} - -%\usepackage{draftwatermark} -%\SetWatermarkText{Draft 15/03/17} -%\SetWatermarkScale{5} -%\SetWatermarkColor[rgb]{0.95,0.90,0.90} diff --git a/doc/src/suite-design-guide/resources/tex/cylc-logo.pdf b/doc/src/suite-design-guide/resources/tex/cylc-logo.pdf deleted file mode 100644 index 502327c8aeb..00000000000 Binary files a/doc/src/suite-design-guide/resources/tex/cylc-logo.pdf and /dev/null differ diff --git a/doc/src/suite-design-guide/roadmap.rst b/doc/src/suite-design-guide/roadmap.rst new file mode 100644 index 00000000000..ceabe806c69 --- /dev/null +++ b/doc/src/suite-design-guide/roadmap.rst @@ -0,0 +1,85 @@ +Roadmap +======= + +Several planned future developments in Rose and Cylc may have an impact on +suite design. + + +.. _List Item Override In Site Include-Files: + +List Item Override In Site Include-Files +---------------------------------------- + + +A few Cylc config items hold lists of task (or family) names, e.g.: + +.. code-block:: cylc + + [scheduling] + [[special tasks]] + clock-trigger = get-data-a, get-data-b + #... + #... + +Currently a repeated config item completely overrides a previously set value +(apart from graph strings which are always additive). This means a site +include-file (for example) can't add a new site-specific clock-triggered task +without writing out the complete list of all clock-triggered tasks in the +suite, which breaks the otherwise clean separation into core and site files. + +.. note:: + + In the future we `plan to `_ + support add, subtract, unset, and override semantics for all items. + + +.. _UM STASH in Optional App Configs: + +UM STASH in Optional App Configs +-------------------------------- + + +A caveat to the advice on use of option app configs in +:ref:`Optional App Config Files`: in general you might need the ability +to turn off or modify some STASH requests in the main +app, not just add additional site-specific +STASH. But overriding STASH in optional configs is fragile because STASH +namelists names are automatically generated from a *hash* of the precise +content of the namelist. This makes it possible to uniquely identify the same +STASH requests in different apps, but if any detail of a STASH request changes +in a main app its namelist name will change and any optional configs that refer +to it will become divorced from their intended target. + +Until this problem is solved we recommend that: + +- All STASH in main UM apps should be grouped into sensible + *packages* that can be turned on and off in optional configs without + referencing the individual STASH request namelists. +- Or all STASH should be held in optional site configs and none in the + main app. Note however that STASH is difficult to configure outside of + ``rose edit``, and the editor `does not yet allow you to edit optional + configs `_. + + +Modular Suite Design +-------------------- + +The `modular suite design concept `_ +is that we should be able to import common workflow segments at install time +rather than duplicating them in each suite. The content of a suite module +will be encapsulated in a protected namespace to avoid clashing with the +importing suite, and selected inputs and outputs exposed via a proper interface. + +This should aid portable suite design too by enabling site-specific parts of a +workflow (local product generation for example) to be stored and imported +on-site rather than polluting the source and revision control record of +the core suite that everyone sees. + +We note that this can already be done to a limited extent by using +``rose suite-run`` to install suite.rc fragments from an external +location. However, as a literal inlining mechanism with no encapsulation or +interface, the internals of the "imported" fragments would have to be +compatible with the suite definition in every respect. + +See also :ref:`Monolithic Or Interdependent Suites` on modular *systems of +suites* connected by inter-suite triggering. diff --git a/doc/src/suite-design-guide/roadmap.tex b/doc/src/suite-design-guide/roadmap.tex deleted file mode 100644 index 4b23c4805dd..00000000000 --- a/doc/src/suite-design-guide/roadmap.tex +++ /dev/null @@ -1,75 +0,0 @@ -\section{Roadmap} - -Several planned future developments in Rose and Cylc may have an impact on -suite design. - -\subsection{List Item Override In Site Include-Files} -\label{List Item Override In Site Include-Files} - -A few Cylc config items hold lists of task (or family) names, e.g.: - -\lstset{language=suiterc} -\begin{lstlisting} -[scheduling] - [[special tasks]] - clock-trigger = get-data-a, get-data-b - #... -#... -\end{lstlisting} - -Currently a repeated config item completely overrides a previously set value -(apart from graph strings which are always additive). This means a site -include-file (for example) can't add a new site-specific clock-triggered task -without writing out the complete list of all clock-triggered tasks in the -suite, which breaks the otherwise clean separation into core and site files. - -{\em In the future we plan to support add, subtract, unset, and override -semantics for all items - see \url{https://github.com/cylc/cylc/issues/1363}}. - -\subsection{UM STASH in Optional App Configs} -\label{UM STASH in Optional App Configs} - -A caveat to the advice on use of option app configs in Section~\ref{Optional -App Config Files}: in general you might need the ability to turn off or modify -some STASH requests in the main app, not just add additional site-specific -STASH. But overriding STASH in optional configs is fragile because STASH -namelists names are automatically generated from a {\em hash} of the precise -content of the namelist. This makes it possible to uniquely identify the same -STASH requests in different apps, but if any detail of a STASH request changes -in a main app its namelist name will change and any optional configs that refer -to it will become divorced from their intended target. - -Until this problem is solved we recommend that: - -\begin{itemize} - \item All STASH in main UM apps should be grouped into sensible {\em - packages} that can be turned on and off in optional configs without - referencing the individual STASH request namelists. - \item Or all STASH should be held in optional site configs and none in the - main app. Note however that STASH is difficult to configure outside of - \lstinline=rose edit=, and the editor does not yet allow you to edit - optional configs - see \url{https://github.com/metomi/rose/issues/1685}. -\end{itemize} - -\subsection{Modular Suite Design} - -The modular suite design concept is that we should be able to import common -workflow segments at install time rather than duplicating them in each suite: -\url{https://github.com/cylc/cylc/issues/1829}. The content of a suite module -will be encapsulated in a protected namespace to avoid clashing with the -importing suite, and selected inputs and outputs exposed via a proper -interface. - -This should aid portable suite design too by enabling site-specific parts of a -workflow (local product generation for example) to be stored and imported -on-site rather than polluting the source and revision control record of -the core suite that everyone sees. - -We note that this can already be done to a limited extent by using -\lstinline=rose suite-run= to install suite.rc fragments from an external -location. However, as a literal inlining mechanism with no encapsulation or -interface, the internals of the ``imported'' fragments would have to be -compatible with the suite definition in every respect. - -See also~\ref{Monolithic Or Interdependent Suites} on modular {\em systems of -suites} connected by inter-suite triggering. diff --git a/doc/src/suite-design-guide/introduction.tex b/doc/src/suite-design-guide/sdg-introduction.rst similarity index 53% rename from doc/src/suite-design-guide/introduction.tex rename to doc/src/suite-design-guide/sdg-introduction.rst index 762d7932651..f1199d6eaa8 100644 --- a/doc/src/suite-design-guide/introduction.tex +++ b/doc/src/suite-design-guide/sdg-introduction.rst @@ -1,5 +1,7 @@ -\section{Introduction} -\label{Introduction} +.. _Introduction Label: + +Introduction +============ This document provides guidance on making complex Cylc + Rose workflows that are clear, maintainable, and portable. Note that best practice advice may @@ -14,15 +16,18 @@ \section{Introduction} move on to more advanced topics (efficiency and maintainability, portable suites), and end with some pointers to future developments. -{\em A good working knowledge of Cylc and Rose is assumed}. +.. note:: + + A good working knowledge of Cylc and Rose is assumed. For further details, + please consult the: + + - `Cylc documentation `_ + - `Rose documentation `_ -\begin{itemize} - \item Cylc: \url{http://cylc.github.io/cylc/documentation.html} - \item Rose: \url{http://metomi.github.io/rose/doc/rose.html} -\end{itemize} +.. note:: -\note{for non-Rose users: this document comes out of the Unified Model - Consortium wherein Cylc is used within the Rose {\em suite management - framework}. However, the bulk of the information in this guide is about -Cylc suite design; which parts are Rose-specific should be clear from -context.} + For non-Rose users: this document comes out of the Unified Model + Consortium wherein Cylc is used within the Rose *suite management + framework*. However, the bulk of the information in this guide is about + Cylc suite design; which parts are Rose-specific should be clear from + context. diff --git a/doc/src/suite-design-guide/style-guide.rst b/doc/src/suite-design-guide/style-guide.rst new file mode 100644 index 00000000000..549abdadb29 --- /dev/null +++ b/doc/src/suite-design-guide/style-guide.rst @@ -0,0 +1,315 @@ +Style Guidelines +================ + +Coding style is largely subjective, but for collaborative development of +complex systems it is important to settle on a clear and consistent style to +avoid getting into a mess. The following style rules are recommended. + + +Tab Characters +-------------- + +Do not use tab characters. Tab width depends on editor settings, so a mixture +of tabs and spaces in the same file can render to a mess. + +Use ``grep -InPr "\t" *`` to find tabs recursively in files in +a directory. + +In *vim* use ``%retab`` to convert existing tabs to spaces, +and set ``expandtab`` to automatically convert new tabs. + +In *emacs* use *whitespace-cleanup*. + +In *gedit*, use the *Draw Spaces* plugin to display tabs and spaces. + + +Trailing Whitespace +------------------- + +Trailing whitespace is untidy, it makes quick reformatting of paragraphs +difficult, and it can result in hard-to-find bugs (space after intended +line continuation markers). + +To remove existing trailing whitespace in a file use a ``sed`` or +``perl`` one-liner: + +.. code-block:: bash + + $ perl -pi -e "s/ +$//g" /path/to/file + # or: + $ sed --in-place 's/[[:space:]]\+$//' path/to/file + +Or do a similar search-and-replace operation in your editor. Editors like +*vim* and *emacs* can also be configured to highlight or automatically +remove trailing whitespace on the fly. + + +Indentation +----------- + +Consistent indentation makes a suite definition more readable, it shows section +nesting clearly, and it makes block re-indentation operations easier in text +editors. Indent suite.rc syntax four spaces per nesting level: + + +Config Items +^^^^^^^^^^^^ + +.. code-block:: cylc + + [SECTION] + # A comment. + title = the quick brown fox + [[SUBSECTION]] + # Another comment. + a short item = value1 + a very very long item = value2 + +Don't align ``item = value`` pairs on the ``=`` character +like this: + +.. code-block:: cylc + + [SECTION] # Avoid this. + a short item = value1 + a very very long item = value2 + +or like this: + +.. code-block:: cylc + + [SECTION] # Avoid this. + a short item = value1 + a very very long item = value2 + +because the whole block may need re-indenting after a single change, which will +pollute your revision history with spurious changes. + +Comments should be indented to the same level as the section or item they refer +to, and trailing comments should be preceded by two spaces, as shown above. + + +Script String Lines +^^^^^^^^^^^^^^^^^^^ + +Script strings are written verbatim to task job scripts so they should really +be indented from the left margin: + +.. code-block:: cylc + + [runtime] + [[foo]] + # Recommended. + post-script = """ + if [[ $RESULT == "bad" ]]; then + echo Goodbye World! + exit 1 + fi""" + +Indentation is *mostly* ignored by the bash interpreter, but is useful for +readability. It is *mostly* harmless to indent internal script lines as if +part of the Cylc syntax, or even out to the triple quotes: + +.. code-block:: cylc + + [runtime] + [[foo]] + # OK, but... + post-script = """ + if [[ $RESULT == "bad" ]]; then + echo Goodbye World! + exit 1 + fi""" + +On parsing the triple quoted value, Cylc will remove any common leading +whitespace from each line using the logic of +`Python's textwrap.dedent `_ +so the script block would end up being the same as the previous example. +However, you should watch your line length (see :ref:`Line Length`) when you +have many levels of indentations. + +.. note:: + + Take care when indenting here documents: + + .. code-block:: cylc + + [runtime] + [[foo]] + script = """ + cat >> log.txt <<_EOF_ + The quick brown fox jumped + over the lazy dog. + _EOF_ + """ + +In the above, each line in ``log.txt`` would end up with 4 leading +white spaces. The following will give you lines with no white spaces. + +.. code-block:: cylc + + [runtime] + [[foo]] + script = """ + cat >> log.txt <<_EOF_ + The quick brown fox jumped + over the lazy dog. + _EOF_ + """ + + +Graph String Lines +^^^^^^^^^^^^^^^^^^ + +Multiline ``graph`` strings can be entirely free-form: + +.. code-block:: cylc + + [scheduling] + [[dependencies]] + graph = """ + # Main workflow: + FAMILY:succeed-all => bar & baz => qux + + # Housekeeping: + qux => rose_arch => rose_prune""" + +Whitespace is ignored in graph string parsing, however, so internal graph lines +can be indented as if part of the suite.rc syntax, or even out to the triple +quotes, if you feel it aids readability (but watch line length with large +indents; see :ref:`Line Length`): + +.. code-block:: cylc + + [scheduling] + [[dependencies]] + graph = """ + # Main workflow: + FAMILY:succeed-all => bar & baz => qux + + # Housekeeping: + qux => rose_arch => rose_prune""" + +Both styles are acceptable; choose one and use it consistently. + + +Jinja2 Code +^^^^^^^^^^^ + +A suite.rc file with embedded Jinja2 code is essentially a Jinja2 program to +generate a Cylc suite definition. It is not possible to consistently indent the +Jinja2 as if it were part of the suite.rc syntax (which to the Jinja2 processor +is just arbitrary text), so it should be indented from the left margin on +its own terms: + +.. code-block:: cylc + + [runtime] + [[OPS]] + {% for T in OPS_TASKS %} + {% for M in range(M_MAX) %} + [[ops_{{T}}_{{M}}]] + inherit = OPS + {% endfor %} + {% endfor %} + + +Comments +-------- + +Comments should be minimal, but not too minimal. If context and clear +task and variable names will do, leave it at that. Extremely verbose comments +tend to get out of sync with the code they describe, which can be worse +than having no comments. + +Avoid long lists of numbered comments - future changes may require mass +renumbering. + +Avoid page-width "section divider" comments, especially if they are not +strictly limited to the standard line length (see :ref:`Line Length`). + +Indent comments to the same level as the config items they describe. + + +Titles, Descriptions, And URLs +------------------------------ + +Document the suite and its tasks with ``title``, +``description``, and ``url`` items instead of comments. These +can be displayed, or linked to, by the GUI at runtime. + + +.. _Line Length: + +Line Length And Continuation +---------------------------- + +Keep to the standard maximum line length of 79 characters where possible. Very +long lines affect readability and make side-by-side diffs hard to view. + +Backslash line continuation markers can be used anywhere in the suite.rc file +but should be avoided if possible because they are easily broken by invisible +trailing whitespace. + +Continuation markers are not needed in graph strings where trailing +trigger arrows imply line continuation: + +.. code-block:: cylc + + [scheduling] + [[dependencies]] + # No line continuation marker is needed here. + graph = """prep => one => two => three => + four => five six => seven => eight""" + [runtime] + [[MY_TASKS]] + # A line continuation marker *is* needed here: + [[one, two, three, four, five, six, seven, eight, nine, ten, \ + eleven, twelve, thirteen ]] + inherit = MY_TASKS + + +Task Naming Conventions +----------------------- + +Use ``UPPERCASE`` for family names and ``lowercase`` +for tasks, so you can distinguish them at a glance. + +Choose a convention for multi-component names and use it consistently. Put the +most general name components first for natural grouping in the GUI, e.g. +``obs_sonde``, ``obs_radar`` (not ``sonde_obs`` etc.) + +Within your convention keep names as short as possible. + + +UM System Task Names +^^^^^^^^^^^^^^^^^^^^ + +For UM System suites we recommend the following full task naming convention: + +.. code-block:: none + + model_system_function[_member] + +For example, ``glu_ops_process_scatwind`` where ``glu`` refers +to the global (deterministic model) update run, ``ops`` is the system +that owns the task, and ``process_scatwind`` is the function it +performs. The optional ``member`` suffix is intended for use with +ensembles as needed. + +Within this convention keep names as short as possible, e.g. use +``fcst`` instead of ``forecast``. + +UM forecast apps should be given names that reflect their general science +configuration rather than geographic domain, to allow use on other model +domains without causing confusion. + + +Rose Config Files +----------------- + +Use ``rose config-dump`` to load and re-save new Rose .conf files. This +puts the files in a standard format (ordering of lines etc.) to ensure that +spurious changes aren't generated when you next use ``rose edit``. + +See also :ref:`Optional App Config Files` on optional app config files. diff --git a/doc/src/suite-design-guide/style-guide.tex b/doc/src/suite-design-guide/style-guide.tex deleted file mode 100644 index 1d498401abe..00000000000 --- a/doc/src/suite-design-guide/style-guide.tex +++ /dev/null @@ -1,298 +0,0 @@ -\section{Style Guidelines} - -Coding style is largely subjective, but for collaborative development of -complex systems it is important to settle on a clear and consistent style to -avoid getting into a mess. The following style rules are recommended. - -\subsection{Tab Characters} - -Do not use tab characters. Tab width depends on editor settings, so a mixture -of tabs and spaces in the same file can render to a mess. - -Use \lstinline=grep -InPr "\t" *= to find tabs recursively in files in -a directory. - -In {\em vim} use \lstinline=%retab= to convert existing tabs to spaces, -and set \lstinline=expandtab= to automatically convert new tabs. - -In {\em emacs} use {\em whitespace-cleanup}. - -In {\em gedit}, use the {\em Draw Spaces} plugin to display tabs and spaces. - -\subsection{Trailing Whitespace} - -Trailing whitespace is untidy, it makes quick reformatting of paragraphs -difficult, and it can result in hard-to-find bugs (space after intended -line continuation markers). - -To remove existing trailing whitespace in a file use a \lstinline=sed= or -\lstinline=perl= one-liner: - -\lstset{language=sh} -\begin{lstlisting} -$ perl -pi -e "s/ +$//g" /path/to/file -# or: -$ sed --in-place 's/[[:space:]]\+$//' path/to/file -\end{lstlisting} - -Or do a similar search-and-replace operation in your editor. Editors like {\em -vim} and {\em emacs} can also be configured to highlight or automatically -remove trailing whitespace on the fly. - -\subsection{Indentation} - -Consistent indentation makes a suite definition more readable, it shows section -nesting clearly, and it makes block re-indentation operations easier in text -editors. Indent suite.rc syntax four spaces per nesting level: - -\subsubsection{Config Items} - -\lstset{language=suiterc} -\begin{lstlisting} -[SECTION] - # A comment. - title = the quick brown fox - [[SUBSECTION]] - # Another comment. - a short item = value1 - a very very long item = value2 -\end{lstlisting} - -Don't align \lstinline@item = value@ pairs on the \lstinline@=@ character -like this: - -\lstset{language=suiterc} -\begin{lstlisting} -[SECTION] # Avoid this. - a short item = value1 - a very very long item = value2 -\end{lstlisting} - -or like this: - -\lstset{language=suiterc} -\begin{lstlisting} -[SECTION] # Avoid this. - a short item = value1 - a very very long item = value2 -\end{lstlisting} - -because the whole block may need re-indenting after a single change, which will -pollute your revision history with spurious changes. - -Comments should be indented to the same level as the section or item they refer -to, and trailing comments should be preceded by two spaces, as shown above. - -\subsubsection{Script String Lines} - -Script strings are written verbatim to task job scripts so they should really -be indented from the left margin: - -\lstset{language=suiterc} -\begin{lstlisting} -[runtime] - [[foo]] - # Recommended. - post-script = """ -if [[ $RESULT == "bad" ]]; then - echo Goodbye World! - exit 1 -fi""" -\end{lstlisting} - -Indentation is {\em mostly} ignored by the bash interpreter, but is useful for -readability. It is {\em mostly} harmless to indent internal script lines as if -part of the Cylc syntax, or even out to the triple quotes: - -\lstset{language=suiterc} -\begin{lstlisting} -[runtime] - [[foo]] - # OK, but... - post-script = """ - if [[ $RESULT == "bad" ]]; then - echo Goodbye World! - exit 1 - fi""" -\end{lstlisting} - -On parsing the triple quoted value, Cylc will remove any common leading -whitespace from each line using the logic of -\href{https://docs.python.org/2/library/textwrap.html#textwrap.dedent}{Python's textwrap.dedent} -so the script block would end up being the same as the previous example. -However, you should watch your line length (see~\ref{Line Length}) when you -have many levels of indentations. - -{\em Take care when indenting here documents}: - -\lstset{language=suiterc} -\begin{lstlisting} -[runtime] - [[foo]] - script = """ - cat >> log.txt <<_EOF_ - The quick brown fox jumped - over the lazy dog. - _EOF_ - """ -\end{lstlisting} - -In the above, each line in \lstinline=log.txt= would end up with 4 leading -white spaces. The following will give you lines with no white spaces. - -\lstset{language=suiterc} -\begin{lstlisting} -[runtime] - [[foo]] - script = """ - cat >> log.txt <<_EOF_ - The quick brown fox jumped - over the lazy dog. - _EOF_ - """ -\end{lstlisting} - -\subsubsection{Graph String Lines} - -Multiline \lstinline@graph@ strings can be entirely free-form: - -\lstset{language=suiterc} -\begin{lstlisting} -[scheduling] - [[dependencies]] - graph = """ - # Main workflow: - FAMILY:succeed-all => bar & baz => qux - - # Housekeeping: - qux => rose_arch => rose_prune""" -\end{lstlisting} - -Whitespace is ignored in graph string parsing, however, so internal graph lines -can be indented as if part of the suite.rc syntax, or even out to the triple -quotes, if you feel it aids readability (but watch line length with large -indents; see~\ref{Line Length}): - -\lstset{language=suiterc} -\begin{lstlisting} -[scheduling] - [[dependencies]] - graph = """ - # Main workflow: - FAMILY:succeed-all => bar & baz => qux - - # Housekeeping: - qux => rose_arch => rose_prune""" -\end{lstlisting} - -Both styles are acceptable; choose one and use it consistently. - -\subsubsection{Jinja2 Code} - -A suite.rc file with embedded Jinja2 code is essentially a Jinja2 program to -generate a Cylc suite definition. It is not possible to consistently indent the -Jinja2 as if it were part of the suite.rc syntax (which to the Jinja2 processor -is just arbitrary text), so it should be indented from the left margin on -its own terms: - -\lstset{language=suiterc} -\begin{lstlisting} -[runtime] - [[OPS]] -{% for T in OPS_TASKS %} - {% for M in range(M_MAX) %} - [[ops_{{T}}_{{M}}]] - inherit = OPS - {% endfor %} -{% endfor %} -\end{lstlisting} - -\subsection{Comments} - -Comments should be minimal, but not too minimal. If context and clear -task and variable names will do, leave it at that. Extremely verbose comments -tend to get out of sync with the code they describe, which can be worse -than having no comments. - -Avoid long lists of numbered comments - future changes may require mass -renumbering. - -Avoid page-width ``section divider'' comments, especially if they are not -strictly limited to the standard line length (see~\ref{Line Length}). - -Indent comments to the same level as the config items they describe. - -\subsection{Titles, Descriptions, And URLs} - -Document the suite and its tasks with \lstinline=title=, -\lstinline=description=, and \lstinline=url= items instead of comments. These -can be displayed, or linked to, by the GUI at runtime. - -\subsection{Line Length And Continuation} -\label{Line Length} - -Keep to the standard maximum line length of 79 characters where possible. Very -long lines affect readability and make side-by-side diffs hard to view. - -Backslash line continuation markers can be used anywhere in the suite.rc file -but should be avoided if possible because they are easily broken by invisible -trailing whitespace. - -Continuation markers are not needed in graph strings where trailing -trigger arrows imply line continuation: - -\lstset{language=suiterc} -\begin{lstlisting} -[scheduling] - [[dependencies]] - # No line continuation marker is needed here. - graph = """prep => one => two => three => - four => five six => seven => eight""" -[runtime] - [[MY_TASKS]] - # A line continuation marker *is* needed here: - [[one, two, three, four, five, six, seven, eight, nine, ten, \ - eleven, twelve, thirteen ]] - inherit = MY_TASKS -\end{lstlisting} - -\subsection{Task Naming Conventions} - -Use \lstinline=UPPERCASE= for family names and \lstinline=lowercase= -for tasks, so you can distinguish them at a glance. - -Choose a convention for multi-component names and use it consistently. Put the -most general name components first for natural grouping in the GUI, e.g.\ -\lstinline=obs_sonde=, \lstinline=obs_radar= (not \lstinline=sonde_obs= etc.) - -Within your convention keep names as short as possible. - -\subsubsection{UM System Task Names} - -For UM System suites we recommend the following full task naming convention: - -\lstset{language=suiterc} -\begin{lstlisting} -model_system_function[_member] -\end{lstlisting} - -For example, \lstinline=glu_ops_process_scatwind= where \lstinline=glu= refers -to the global (deterministic model) update run, \lstinline=ops= is the system -that owns the task, and \lstinline=process_scatwind= is the function it -performs. The optional \lstinline=member= suffix is intended for use with -ensembles as needed. - -Within this convention keep names as short as possible, e.g.\ use -\lstinline=fcst= instead of \lstinline=forecast=. - -UM forecast apps should be given names that reflect their general science -configuration rather than geographic domain, to allow use on other model -domains without causing confusion. - -\subsection{Rose Config Files} - -Use \lstinline=rose config-dump= to load and re-save new Rose .conf files. This -puts the files in a standard format (ordering of lines etc.) to ensure that -spurious changes aren't generated when you next use \lstinline=rose edit=. - -See also~\ref{Optional App Config Files} on optional app config files. diff --git a/doc/src/suite-design-guide/suite-design-guide-master.rst b/doc/src/suite-design-guide/suite-design-guide-master.rst new file mode 100644 index 00000000000..73910197222 --- /dev/null +++ b/doc/src/suite-design-guide/suite-design-guide-master.rst @@ -0,0 +1,47 @@ +.. SDG: + +****************** +Suite Design Guide +****************** + +**Cylc Rose Suite Design Best Practice Guide** + +Version 1.0 - 23 March 2017 + +*Last updated for: Cylc-7.2.0 and Rose-2017.02.0* + +*Hilary Oliver, Dave Matthews, Andy Clark, and Contributors* + +----------- + +.. toctree:: + :maxdepth: 2 + + sdg-introduction + style-guide + general-principles + efficiency + portable-suites + roadmap + +----------- + + +.. insert vertical whitespace else sidebar menu overhangs short page (ugly) + +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| diff --git a/doc/src/suite-design-guide/title-page.tex b/doc/src/suite-design-guide/title-page.tex deleted file mode 100644 index 70921b68899..00000000000 --- a/doc/src/suite-design-guide/title-page.tex +++ /dev/null @@ -1,16 +0,0 @@ -\thispagestyle{empty} - -\begin{titlepage} - \begin{center} - \includegraphics[width=0.3\textwidth]{resources/png/rose-logo} - \includegraphics[width=0.3\textwidth]{resources/tex/cylc-logo} - - \Huge{Cylc Rose Suite Design\\Best Practice Guide} - - \small{Version 1.0 - 23 March 2017} - - \small{Last updated for: Cylc-7.2.0 and Rose-2017.02.0} - - \large{Hilary Oliver, Dave Matthews, Andy Clark, and Contributors} - \end{center} -\end{titlepage} diff --git a/doc/src/suite-name-reg.rst b/doc/src/suite-name-reg.rst new file mode 100644 index 00000000000..3d1e62a8281 --- /dev/null +++ b/doc/src/suite-name-reg.rst @@ -0,0 +1,42 @@ +.. _SuiteRegistration: + +Suite Name Registration +======================= + +Cylc commands target suites via their names, which are relative path names +under the suite run directory (``~/cylc-run/`` by default). Suites can +be grouped together under sub-directories. E.g.: + +.. code-block:: bash + + $ cylc print -t nwp + nwp + |-oper + | |-region1 Local Model Region1 /home/oliverh/cylc-run/nwp/oper/region1 + | `-region2 Local Model Region2 /home/oliverh/cylc-run/nwp/oper/region2 + `-test + `-region1 Local Model TEST Region1 /home/oliverh/cylc-run/nwp/test/region1 + +Suite names can be pre-registered with the ``cylc register`` command, +which creates the suite run directory structure and some service files +underneath it. Otherwise, ``cylc run`` will do this at suite start up. + + +.. insert vertical whitespace else sidebar menu overhangs short page (ugly) + +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| diff --git a/doc/src/suite-storage-etc.rst b/doc/src/suite-storage-etc.rst new file mode 100644 index 00000000000..5d6bd3b6ab8 --- /dev/null +++ b/doc/src/suite-storage-etc.rst @@ -0,0 +1,46 @@ +.. _SuiteStorageEtc: + +Suite Storage, Discovery, Revision Control, and Deployment +========================================================== + +Small groups of cylc users can of course share suites by manual copying, +and generic revision control tools can be used on cylc suites as for any +collection of files. Beyond this cylc does not have a built-in solution +for suite storage and discovery, revision control, and deployment, on a +network. That is not cylc's core purpose, and large sites may have +preferred revision control systems and suite meta-data requirements that +are difficult to anticipate. We can, however, recommend the use of +*Rose* to do all of this very easily and elegantly with cylc suites. + + +.. _Rose: + +Rose +---- + +**Rose** is *a framework for managing and running suites of +scientific applications*, developed at the Met Office for use with +cylc. It is available under the open source GPL license. + +- `Rose documentation `_ +- `Rose source repository `_ + + +.. insert vertical whitespace else sidebar menu overhangs short page (ugly) + +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| diff --git a/doc/src/task-implementation.rst b/doc/src/task-implementation.rst new file mode 100644 index 00000000000..b9bbe0743af --- /dev/null +++ b/doc/src/task-implementation.rst @@ -0,0 +1,176 @@ +.. _TaskImplementation: + +Task Implementation +=================== + +Existing scripts and executables can be used as cylc tasks without modification +so long as they return standard exit status - zero on success, non-zero +for failure - and do not spawn detaching processes internally +(see :ref:`DetachingJobs`). + + +.. _JobScripts: + +Task Job Scripts +---------------- + +When the suite dameon determines that a task is ready to run it generates a +*job script* that embodies the task runtime configuration in the suite.rc +file, and submits it to the configured job host and batch system +(see :ref:`TaskJobSubmission`). + +Task job scripts are written to the suite's job log directory. They can be +printed with ``cylc cat-log`` or generated and printed with +``cylc jobscript``. + + +Inlined Tasks +------------- + +Task *script* items can be multi-line strings of ``bash`` code, so +many tasks can be entirely inlined in the suite.rc file. For anything more than +a few lines of code, however, we recommend using external shell scripts to allow +independent testing, re-use, and shell mode editing. + + +Task Messages +------------- + +Tasks messages can be sent back to the suite server program to report completed +outputs and arbitrary messages of different severity levels. + +Some types of message - in addition to events like task failure - can +optionally trigger execution of event handlers in the suite server program +(see :ref:`EventHandling`). + +Normal severity messages are printed to ``job.out`` and logged by the +suite server program: + +.. code-block:: bash + + cylc message -- "${CYLC_SUITE_NAME}" "${CYLC_TASK_JOB}" \ + "Hello from ${CYLC_TASK_ID}" + +CUSTOM severity messages are printed to ``job.out``, logged by the +suite server program, and can be used to trigger *custom* +event handlers: + +.. code-block:: bash + + cylc message -- "${CYLC_SUITE_NAME}" "${CYLC_TASK_JOB}" \ + "CUSTOM:data available for ${CYLC_TASK_CYCLE_POINT}" + +Custom severity messages and event handlers can be used to signal special +events that are neither routine information or an error condition, such as +production of a particular data file. Task output messages, used for triggering +other tasks, can also be sent with custom severity if need be. + +WARNING severity messages are printed to ``job.err``, logged by the +suite server program, and can be passed to *warning* event handlers: + +.. code-block:: bash + + cylc message -- "${CYLC_SUITE_NAME}" "${CYLC_TASK_JOB}" \ + "WARNING:Uh-oh, something's not right here." + +CRITICAL severity messages are printed to ``job.err``, logged by the +suite server program, and can be passed to *critical* event handlers: + +.. code-block:: bash + + cylc message -- "${CYLC_SUITE_NAME}" "${CYLC_TASK_JOB}" \ + "CRITICAL:ERROR occurred in process X!" + + +Aborting Job Scripts on Error +----------------------------- + +Task job scripts use ``set -x`` to abort on any error, and +trap ERR, EXIT, and SIGTERM to send task failed messages back to the +suite server program before aborting. Other scripts called from job scripts +should therefore abort with standard non-zero exit status on error, to trigger +the job script error trap. + +To prevent a command that is expected to generate a non-zero exit status from +triggering the exit trap, protect it with a control statement such as: + +.. code-block:: bash + + if cmp FILE1 FILE2; then + : # success: do stuff + else + : # failure: do other stuff + fi + +Task job scripts also use ``set -u`` to abort on referencing any +undefined variable (useful for picking up typos); and ``set -o pipefail`` +to abort if any part of a pipe fails (by default the shell only returns the +exit status of the final command in a pipeline). + + +Custom Failure Messages +^^^^^^^^^^^^^^^^^^^^^^^ + +Critical events normally warrant aborting a job script rather than just sending +a message. As described just above, ``exit 1`` or any failing command +not protected by the surrounding scripting will cause a job script to abort and +report failure to the suite server program, potentially triggering a +*failed* task event handler. + +For failures detected by the scripting you could send a critical message back +before aborting, potentially triggering a *critical* task event handler: + +.. code-block:: bash + + if ! /bin/false; then + cylc message -- "${CYLC_SUITE_NAME}" "${CYLC_TASK_JOB}" \ + "CRITICAL:ERROR: /bin/false failed!" + exit 1 + fi + +To abort a job script with a custom message that can be passed to a +*failed* task event handler, use the built-in ``cylc__job_abort`` shell +function: + +.. code-block:: bash + + if ! /bin/false; then + cylc__job_abort "ERROR: /bin/false failed!" + fi + + +.. _DetachingJobs: + +Avoid Detaching Processes +------------------------- + +If a task script starts background sub-processes and does not wait on them, or +internally submits jobs to a batch scheduler and then exits immediately, the +detached processes will not be visible to cylc and the task will appear to +finish when the top-level script finishes. You will need to modify scripts +like this to make them execute all sub-processes in the foreground (or use the +shell ``wait`` command to wait on them before exiting) and to prevent +job submission commands from returning before the job completes (e.g. +``llsubmit -s`` for Loadleveler, +``qsub -sync yes`` for Sun Grid Engine, and +``qsub -W block=true`` for PBS). + +If this is not possible - perhaps you don't have control over the script +or can't work out how to fix it - one alternative approach is to use another +task to repeatedly poll for the results of the detached processes: + +.. code-block:: cylc + + [scheduling] + [[dependencies]] + graph = "model => checker => post-proc" + [runtime] + [[model]] + # Uh-oh, this script does an internal job submission to run model.exe: + script = "run-model.sh" + [[checker]] + # Fail and retry every minute (for 10 tries at the most) if model's + # job.done indicator file does not exist yet. + script = "[[ ! -f $RUN_DIR/job.done ]] && exit 1" + [[[job]]] + execution retry delays = 10 * PT1M diff --git a/doc/src/task-job-submission.rst b/doc/src/task-job-submission.rst new file mode 100644 index 00000000000..00e1a735b70 --- /dev/null +++ b/doc/src/task-job-submission.rst @@ -0,0 +1,618 @@ +.. _TaskJobSubmission: + +Task Job Submission and Management +================================== + +For the requirements a command, script, or program, must fulfill in order +to function as a cylc task, see :ref:`TaskImplementation`. +This section explains how tasks are submitted by the suite server program when +they are ready to run, and how to define new batch system handlers. + +When a task is ready cylc generates a job script (see :ref:`JobScripts`). The +job script is submitted to run by the *batch system* chosen for +the task. Different tasks can use different batch systems. Like +other runtime properties, you can set a suite default batch system and +override it for specific tasks or families: + +.. code-block:: cylc + + [runtime] + [[root]] # suite defaults + [[[job]]] + batch system = loadleveler + [[foo]] # just task foo + [[[job]]] + batch system = at + + +.. _AvailableMethods: + +Supported Job Submission Methods +-------------------------------- + +Cylc supports a number of commonly used batch systems. +See :ref:`CustomJobSubmissionMethods` for how to add new job +submission methods. + + +background +^^^^^^^^^^ + +Runs task job scripts as Unix background processes. + +If an execution time limit is specified for a task, its job will be wrapped +by the ``timeout`` command. + + +at +^^ + +Submits task job scripts to the rudimentary Unix ``at`` scheduler. The +``atd`` daemon must be running. + +If an execution time limit is specified for a task, its job will be wrapped +by the ``timeout`` command. + + +loadleveler +^^^^^^^^^^^ + +Submits task job scripts to loadleveler by the ``llsubmit`` command. +Loadleveler directives can be provided in the suite.rc file: + +.. code-block:: cylc + + [runtime] + [[my_task]] + [[[job]]] + batch system = loadleveler + execution time limit = PT10M + [[[directives]]] + foo = bar + baz = qux + +These are written to the top of the task job script like this: + +.. code-block:: bash + + #!/bin/bash + # DIRECTIVES + # @ foo = bar + # @ baz = qux + # @ wall_clock_limit = 660,600 + # @ queue + +If ``restart=yes`` is specified as a directive for loadleveler, the job will +automatically trap SIGUSR1, which loadleveler may use to preempt the job. On +trapping SIGUSR1, the job will inform the suite that it has been vacated by +loadleveler. This will put it back to the submitted state, until it starts +running again. + +If ``execution time limit`` is specified, it is used to generate the +``wall_clock_limit`` directive. The setting is assumed to be the soft +limit. The hard limit will be set by adding an extra minute to the soft limit. +Do not specify the ``wall_clock_limit`` directive explicitly if +``execution time limit`` is specified. Otherwise, the execution time +limit known by the suite may be out of sync with what is submitted to the batch +system. + + +lsf +^^^ + +Submits task job scripts to IBM Platform LSF by the ``bsub`` command. +LSF directives can be provided in the suite.rc file: + +.. code-block:: cylc + + [runtime] + [[my_task]] + [[[job]]] + batch system = lsf + execution time limit = PT10M + [[[directives]]] + -q = foo + +These are written to the top of the task job script like this: + +.. code-block:: bash + + #!/bin/bash + # DIRECTIVES + #BSUB -q = foo + #BSUB -W = 10 + +If ``execution time limit`` is specified, it is used to generate the +``-W`` directive. Do not specify the ``-W`` directive +explicitly if ``execution time limit`` is specified. Otherwise, the +execution time limit known by the suite may be out of sync with what is +submitted to the batch system. + + +pbs +^^^ + +Submits task job scripts to PBS (or Torque) by the ``qsub`` command. +PBS directives can be provided in the suite.rc file: + +.. code-block:: cylc + + [runtime] + [[my_task]] + [[[job]]] + batch system = pbs + execution time limit = PT1M + [[[directives]]] + -V = + -q = foo + -l nodes = 1 + +These are written to the top of the task job script like this: + +.. code-block:: bash + + #!/bin/bash + # DIRECTIVES + #PBS -V + #PBS -q foo + #PBS -l nodes=1 + #PBS -l walltime=60 + +If ``execution time limit`` is specified, it is used to generate the +``-l walltime`` directive. Do not specify the ``-l walltime`` +directive explicitly if ``execution time limit`` is specified. +Otherwise, the execution time limit known by the suite may be out of sync with +what is submitted to the batch system. + + +moab +^^^^ + +Submits task job scripts to the Moab workload manager by the ``msub`` +command. Moab directives can be provided in the suite.rc file; the syntax is +very similar to PBS: + +.. code-block:: cylc + + [runtime] + [[my_task]] + [[[job]]] + batch system = moab + execution time limit = PT1M + [[[directives]]] + -V = + -q = foo + -l nodes = 1 + +These are written to the top of the task job script like this: + +.. code-block:: bash + + #!/bin/bash + # DIRECTIVES + #PBS -V + #PBS -q foo + #PBS -l nodes=1 + #PBS -l walltime=60 + +(Moab understands ``#PBS`` directives). + +If ``execution time limit`` is specified, it is used to generate the +``-l walltime`` directive. Do not specify the ``-l walltime`` +directive explicitly if ``execution time limit`` is specified. +Otherwise, the execution time limit known by the suite may be out of sync with +what is submitted to the batch system. + + +sge +^^^ + +Submits task job scripts to Sun/Oracle Grid Engine by the ``qsub`` +command. SGE directives can be provided in the suite.rc file: + +.. code-block:: cylc + + [runtime] + [[my_task]] + [[[job]]] + batch system = sge + execution time limit = P1D + [[[directives]]] + -cwd = + -q = foo + -l h_data = 1024M + -l h_rt = 24:00:00 + +These are written to the top of the task job script like this: + +.. code-block:: bash + + #!/bin/bash + # DIRECTIVES + #$ -cwd + #$ -q foo + #$ -l h_data=1024M + #$ -l h_rt=24:00:00 + +If ``execution time limit`` is specified, it is used to generate the +``-l h_rt`` directive. Do not specify the ``-l h_rt`` +directive explicitly if ``execution time limit`` is specified. +Otherwise, the execution time limit known by the suite may be out of sync with +what is submitted to the batch system. + + +slurm +^^^^^ + +Submits task job scripts to Simple Linux Utility for Resource Management by the +``sbatch`` command. SLURM directives can be provided in the suite.rc file: + +.. code-block:: cylc + + [runtime] + [[my_task]] + [[[job]]] + batch system = slurm + execution time limit = PT1H + [[[directives]]] + --nodes = 5 + --account = QXZ5W2 + +.. note:: + + Since not all SLURM commands have a short form, cylc requires + the long form directives. + +These are written to the top of the task job script like this: + +.. code-block:: bash + + #!/bin/bash + #SBATCH --nodes=5 + #SBATCH --time=60:00 + #SBATCH --account=QXZ5W2 + +If ``execution time limit`` is specified, it is used to generate the +``--time`` directive. Do not specify the ``--time`` +directive explicitly if ``execution time limit`` is specified. +Otherwise, the execution time limit known by the suite may be out of sync with +what is submitted to the batch system. + + +Default Directives Provided +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +For batch systems that use job file directives (PBS, Loadleveler, +etc.) default directives are provided to set the job name, stdout and stderr +file paths, and the execution time limit (if specified). + +Cylc constructs the job name string using a combination of the task ID and the +suite name. PBS fails a job submit if the job name in ``-N name`` is +too long. For version 12 or below, this is 15 characters. For version 13, this +is 236 characters. The default setting will truncate the job name string to 15 +characters. If you have PBS 13 at your site, you should modify your site's +global configuration file to allow the job name to be longer. (See also +:ref:`JobNameLengthMaximum`.) For example: + +.. code-block:: cylc + + [hosts] + [[myhpc*]] + [[[batch systems]]] + [[[[pbs]]]] + # PBS 13 + job name length maximum = 236 + + +Directives Section Quirks (PBS, SGE, ...) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +To specify an option with no argument, such as ``-V`` in PBS or +``-cwd`` in SGE you must give a null string as the directive value in +the suite.rc file. + +The left hand side of a setting (i.e. the string before the first equal sign) +must be unique. To specify multiple values using an option such as +``-l`` option in PBS, SGE, etc., either specify all items in a single +line: + +.. code-block:: none + + -l=select=28:ncpus=36:mpiprocs=18:ompthreads=2:walltime=12:00:00 + +(Left hand side is ``-l``. A second ``-l=...`` line will +override the first.) + +Or separate the items: + +.. code-block:: none + + -l select=28 + -l ncpus=36 + -l mpiprocs=18 + -l ompthreads=2 + -l walltime=12:00:00 + +.. note:: + + There is no equal sign after ``-l``. + +(Left hand sides are now ``-l select``, ``-l ncpus``, etc.) + + +.. _WhitherStdoutAndStderr: + +Task stdout And stderr Logs +--------------------------- + +When a task is ready to run cylc generates a filename root to be used +for the task job script and log files. The filename containing the task +name, cycle point, and a submit number that increments if the same task is +re-triggered multiple times: + +.. code-block:: bash + + # task job script: + ~/cylc-run/tut/oneoff/basic/log/job/1/hello/01/job + # task stdout: + ~/cylc-run/tut/oneoff/basic/log/job/1/hello/01/job.out + # task stderr: + ~/cylc-run/tut/oneoff/basic/log/job/1/hello/01/job.err + +How the stdout and stderr streams are directed into these files depends +on the batch system. The ``background`` method just uses +appropriate output redirection on the command line, as shown above. The +``loadleveler`` method writes appropriate directives to the job +script that is submitted to loadleveler. + +Cylc obviously has no control over the stdout and stderr output from +tasks that do their own internal output management (e.g. tasks +that submit internal jobs and direct the associated output to other +files). For less internally complex tasks, however, the files referred +to here will be complete task job logs. + +Some batch systems, such as ``pbs``, redirect a job's stdout +and stderr streams to a separate cache area while the job is running. The +contents are only copied to the normal locations when the job completes. This +means that ``cylc cat-log`` or the gcylc GUI will be unable to find the +job's stdout and stderr streams while the job is running. Some sites with these +batch systems are known to provide commands for viewing and/or +tail-follow a job's stdout and stderr streams that are redirected to these +cache areas. If this is the case at your site, you can configure cylc to make +use of the provided commands by adding some settings to the global site/user +config. E.g.: + +.. code-block:: cylc + + [hosts] + [[HOST]] # <= replace this with a real host name + [[[batch systems]]] + [[[[pbs]]]] + err tailer = qcat -f -e \%(job_id)s + out tailer = qcat -f -o \%(job_id)s + err viewer = qcat -e \%(job_id)s + out viewer = qcat -o \%(job_id)s + + +.. _CommandTemplate: + +Overriding The Job Submission Command +------------------------------------- + +To change the form of the actual command used to submit a job you do not +need to define a new batch system handler; just override the +``command template`` in the relevant job submission sections of +your suite.rc file: + +.. code-block:: cylc + + [runtime] + [[root]] + [[[job]]] + batch system = loadleveler + # Use '-s' to stop llsubmit returning + # until all job steps have completed: + batch submit command template = llsubmit -s %(job)s + +As explained in :ref:`SuiteRCReference` +the template's \%(job)s will be substituted by the job file path. + + +Job Polling +----------- + +For supported batch systems, one-way polling can be used to determine actual +job status: the suite server program executes a process on the task host, by +non-interactive ssh, to interrogate the batch queueing system there, and to +read a *status file* that is automatically generated by the task job script +as it runs. + +Polling may be required to update the suite state correctly after unusual +events such as a machine being rebooted with tasks running on it, or network +problems that prevent task messages from getting back to the suite host. + +Tasks can be polled on demand by right-clicking on them in gcylc or using the +``cylc poll`` command. + +Tasks are polled automatically, once, if they timeout while queueing in a +batch scheduler and submission timeout is set. (See :ref:`TaskEventHandling` +for how to configure timeouts). + +Tasks are polled multiple times, where necessary, when they exceed their +execution time limits. These are normally set with some initial delays to allow +the batch systems to kill the jobs. +(See :ref:`ExecutionTimeLimitPollingIntervals` for how to configure the polling +intervals). + +Any tasks recorded in the *submitted* or *running* states at suite +restart are automatically polled to determine what happened to them while the +suite was down. + +Regular polling can also be configured as a health check on tasks submitted to +hosts that are known to be flaky, or as the sole method of determining task +status on hosts that do not allow task messages to be routed back to the suite +host. + +To use polling instead of task-to-suite messaging set +``task communication method = poll`` +in cylc site and user global config (see :ref:`task_comms_method`). +The default polling intervals can be overridden for all suites there too +(see :ref:`submission_polling` and :ref:`execution_polling`), or in specific +suite configurations (in which case polling will be done regardless of the +task communication method configured for the host; +see :ref:`SubmissionPollingIntervals` and :ref:`ExecutionPollingIntervals`). + +Note that regular polling is not as efficient as task messaging in updating +task status, and it should be used sparingly in large suites. + +.. note:: + + For polling to work correctly, the batch queueing system must have a + job listing command for listing your jobs, and that the job listing must + display job IDs as they are returned by the batch queueing system submit + command. For example, for pbs, moab and sge, the ``qstat`` command + should list jobs with their IDs displayed in exactly the same format as they + are returned by the ``qsub`` command. + + +Job Killing +----------- + +For supported batch systems, the suite server program can execute a process on +the task host, by non-interactive ssh, to kill a submitted or running job +according to its batch system. + +Tasks can be killed on demand by right-clicking on them in gcylc or using the +``cylc kill`` command. + + +Execution Time Limit +-------------------- + +You can specify an ``execution time limit`` for all supported job +submission methods. E.g.: + +.. code-block:: cylc + + [runtime] + [[task-x]] + [[[job]]] + execution time limit = PT1H + +For tasks running with ``background`` or ``at``, their jobs +will be wrapped using the ``timeout`` command. For all other methods, +the relevant time limit directive will be added to their job files. + +The ``execution time limit`` setting will also inform the suite when a +a task job should complete by. If a task job has not reported completing within +the specified time, the suite will poll the task job. (The default +setting is PT1M, PT2M, PT7M. The accumulated times for these intervals will be +roughly 1 minute, 1 + 2 = 3 minutes and 1 + 2 + 7 = 10 minutes after a task job +exceeds its execution time limit.) + + +Execution Time Limit and Execution Timeout +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +If you specify an ``execution time limit`` the +``execution timeout event handler`` will only be called if the job has +not completed after the final poll (by default, 10 min after the time limit). +This should only happen if the submission method you are using is not enforcing +wallclock limits (unlikely) or you are unable to contact the machine to confirm +the job status. + +If you specify an ``execution timeout`` and not an +``execution time limit`` then the +``execution timeout event handler`` will be called as soon as the +specified time is reached. The job will also be polled to check its latest +status (possibly resulting in an update in its status and the calling of the +relevant event handler). This behaviour is deprecated, which users should avoid +using. + +If you specify an ``execution timeout`` and an +``execution time limit`` then the execution timeout setting will be +ignored. + + +.. _CustomJobSubmissionMethods: + +Custom Job Submission Methods +----------------------------- + +Defining a new batch system handler requires a little Python programming. Use +the built-in handlers as examples, and read the documentation in +``lib/cylc/batch_sys_manager.py``. + + +An Example +^^^^^^^^^^ + +The following ``qsub.py`` module overrides the built-in *pbs* +batch system handler to change the directive prefix from ``#PBS`` to +``#QSUB``: + +.. code-block:: python + + #!/usr/bin/env python2 + + from cylc.batch_sys_handlers.pbs import PBSHandler + + class QSUBHandler(PBSHandler): + DIRECTIVE_PREFIX = "#QSUB " + + BATCH_SYSTEM_HANDLER = QSUBHandler() + +If this is in the Python search path (see +:ref:`Where To Put Batch System Handler Modules` below) you can use it by +name in suite configurations: + +.. code-block:: cylc + + [scheduling] + [[dependencies]] + graph = "a" + [runtime] + [[root]] + [[[job]]] + batch system = qsub # <---! + execution time limit = PT1M + [[[directives]]] + -l nodes = 1 + -q = long + -V = + +Generate a job script to see the resulting directives: + +.. code-block:: bash + + $ cylc register test $HOME/test + $ cylc jobscript test a.1 | grep QSUB + #QSUB -e /home/oliverh/cylc-run/my.suite/log/job/1/a/01/job.err + #QSUB -l nodes=1 + #QSUB -l walltime=60 + #QSUB -o /home/oliverh/cylc-run/my.suite/log/job/1/a/01/job.out + #QSUB -N a.1 + #QSUB -q long + #QSUB -V + +(Of course this suite will fail at run time because we only changed the +directive format, and PBS does not accept ``#QSUB`` directives in +reality). + + +.. _Where To Put Batch System Handler Modules: + +Where To Put Batch System Handler Modules +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +*Custom batch system handlers must be installed on suite and job +hosts* in one of these locations: + +- under ``SUITE-DEF-PATH/lib/python/`` +- under ``CYLC-PATH/lib/cylc/batch_sys_handlers/`` +- or anywhere in ``$PYTHONPATH`` + +.. note:: + + For Rose users: ``rose suite-run`` automatically installs + ``SUITE-DEF-PATH/lib/python/`` to job hosts). diff --git a/doc/src/terminology.rst b/doc/src/terminology.rst new file mode 100644 index 00000000000..a7c094d21b8 --- /dev/null +++ b/doc/src/terminology.rst @@ -0,0 +1,43 @@ +Cylc Terminology +================ + + +Jobs and Tasks +-------------- + +A *job* is a program or script that runs on a computer, and a *task* is +a workflow abstraction - a node in the suite dependency graph - that represents +a job. + + +Cycle Points +------------ + +A *cycle point* is a particular date-time (or integer) point in a sequence +of date-time (or integer) points. Each cylc task has a private cycle point and +can advance independently to subsequent cycle points. It may sometimes be +convenient, however, to refer to the "current cycle point" of a suite (or the +previous or next one, etc.) with reference to a particular task, or in the +sense of all tasks instances that "belong to" a particular cycle point. But +keep in mind that different tasks may pass through the "current cycle point" +(etc.) at different times as the suite evolves. + + +.. insert vertical whitespace else sidebar menu overhangs short page (ugly) + +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| diff --git a/doc/src/tutorial.rst b/doc/src/tutorial.rst new file mode 100644 index 00000000000..2237771c802 --- /dev/null +++ b/doc/src/tutorial.rst @@ -0,0 +1,1394 @@ +.. _Tutorial: + +Tutorial +======== + +This section provides a hands-on tutorial introduction to basic cylc +functionality. + +User Config File +---------------- + +Some settings affecting cylc's behaviour can be defined in site and user +*global config files*. For example, to choose the text editor invoked by +cylc on suite configurations: + +.. code-block:: cylc + + # $HOME/.cylc/$(cylc --version)/global.rc + [editors] + terminal = vim + gui = gvim -f + +- For more on site and user global config files + see :ref:`SiteAndUserConfiguration` and :ref:`SiteRCReference`. + + +.. _Configure Environment on Job Hosts: + +Configure Environment on Job Hosts +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +See :ref:`Configure Site Environment on Job Hosts` for information. + + +.. _CUI: + +User Interfaces +--------------- + +You should have access to the cylc command line (CLI) and graphical (GUI) user +interfaces once cylc has been installed as described in +Section :ref:`InstallCylc`. + +Command Line Interface (CLI) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The command line interface is unified under a single top level +``cylc`` command that provides access to many sub-commands +and their help documentation. + +.. code-block:: bash + + $ cylc help # Top level command help. + $ cylc run --help # Example command-specific help. + +Command help transcripts are printed in :ref:`CommandReference` and are +available from the GUI Help menu. + +Cylc is *scriptable* - the error status returned by commands can be +relied on. + +Graphical User Interface (GUI) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The cylc GUI covers the same functionality as the CLI, but it has more +sophisticated suite monitoring capability. It can start and stop suites, or +connect to suites that are already running; in either case, shutting down the +GUI does not affect the suite itself. + +.. code-block:: bash + + $ gcylc & # or: + $ cylc gui & # Single suite control GUI. + $ cylc gscan & # Multi-suite monitor GUI. + +Clicking on a suite in gscan, shown in :numref:`fig-gscan`, opens a +gcylc instance for it. + +Suite Configuration +------------------- + +Cylc suites are defined by extended-INI format ``suite.rc`` +files (the main file format extension is section nesting). These reside +in *suite configuration directories* that may also contain a +``bin`` directory and any other suite-related files. + +- For more on the suite configuration file format, see :ref:`SuiteDefinition` + and :ref:`SuiteRCReference`. + +Suite Registration +------------------ + +Suite registration creates a run directory (under ``~/cylc-run/`` by +default) and populates it with authentication files and a symbolic link to a +suite configuration directory. Cylc commands that parse suites can take +the file path or the suite name as input. Commands that interact with running +suites have to target the suite by name. + +.. code-block:: bash + + # Target a suite by file path: + $ cylc validate /path/to/my/suite/suite.rc + $ cylc graph /path/to/my/suite/suite.rc + + # Register a suite: + $ cylc register my.suite /path/to/my/suite/ + + # Target a suite by name: + $ cylc graph my.suite + $ cylc validate my.suite + $ cylc run my.suite + $ cylc stop my.suite + # etc. + + +.. _tutPassphrases: + +Suite Passphrases +----------------- + +Registration (above) also generates a suite-specific passphrase file under +``.service/`` in the suite run directory. It is loaded by the suite +server program at start-up and used to authenticate connections from client +programs. + +Possession of a suite's passphrase file gives full control over it. +Without it, the information available to a client is determined by the suite's +public access privilege level. + +For more on connection authentication, suite passphrases, and public access, +see :ref:`ConnectionAuthentication`. + + +.. _ImportTheExampleSuites: + +Import The Example Suites +------------------------- + +Run the following command to copy cylc's example suites and register them for +your own use: + +.. code-block:: bash + + $ cylc import-examples /tmp + + +Rename The Imported Tutorial Suites +----------------------------------- + +Suites can be renamed by simply renaming (i.e. moving) their run directories. +Make the tutorial suite names shorter, and print their locations with +``cylc print``: + +.. code-block:: bash + + $ mv ~/cylc-run/examples/$(cylc --version)/tutorial ~/cylc-run/tut + $ cylc print -ya tut + tut/oneoff/jinja2 | /tmp/cylc-examples/7.0.0/tutorial/oneoff/jinja2 + tut/cycling/two | /tmp/cylc-examples/7.0.0/tutorial/cycling/two + tut/cycling/three | /tmp/cylc-examples/7.0.0/tutorial/cycling/three + # ... + +See ``cylc print --help`` for other display options. + +Suite Validation +---------------- + +Suite configurations can be validated to detect syntax (and other) errors: + +.. code-block:: bash + + # pass: + $ cylc validate tut/oneoff/basic + Valid for cylc-6.0.0 + $ echo $? + 0 + # fail: + $ cylc validate my/bad/suite + Illegal item: [scheduling]special tusks + $ echo $? + 1 + + +Hello World in Cylc +------------------- + +**suite**: ``tut/oneoff/basic`` + +Here's the traditional *Hello World* program rendered as a cylc +suite: + +.. literalinclude:: ../../etc/examples/tutorial/oneoff/basic/suite.rc + :language: cylc + +Cylc suites feature a clean separation of scheduling configuration, +which determines *when* tasks are ready to run; and runtime +configuration, which determines *what* to run (and *where* and +*how* to run it) when a task is ready. In this example the +``[scheduling]`` section defines a single task called +``hello`` that triggers immediately when the suite starts +up. When the task finishes the suite shuts down. That this is a +*dependency graph* will be more obvious when more tasks are added. +Under the ``[runtime]`` section the +``script`` item defines a simple inlined +implementation for ``hello``: it sleeps for ten seconds, +then prints ``Hello World!``, and exits. This ends up in a *job script* +generated by cylc to encapsulate the task (below) and, +thanks to some defaults designed to allow quick +prototyping of new suites, it is submitted to run as a background job on +the suite host. In fact cylc even provides a default task implementation +that makes the entire ``[runtime]`` section technically optional: + +.. literalinclude:: ../../etc/examples/tutorial/oneoff/minimal/suite.rc + :language: cylc + +(the resulting *dummy task* just prints out some identifying +information and exits). + +Editing Suites +-------------- + +The text editor invoked by Cylc on suite configurations is determined +by cylc site and user global config files, as shown above in :ref:`CUI`. +Check that you have renamed the tutorial examples suites as described +just above and open the *Hello World* suite in your text editor: + +.. code-block:: bash + + $ cylc edit tut/oneoff/basic # in-terminal + $ cylc edit -g tut/oneoff/basic & # or GUI + +Alternatively, start gcylc on the suite: + +.. code-block:: bash + + $ gcylc tut/oneoff/basic & + +and choose *Suite* ``->`` *Edit* from the menu. + +The editor will be invoked from within the suite configuration directory +for easy access to other suite files (in this case there are none). There are +syntax highlighting control files for several text editors under +``/etc/syntax/``; see in-file comments for installation +instructions. + + +.. _RunningSuitesCLI: + +Running Suites +-------------- + +CLI +^^^ + +Run ``tut/oneoff/basic`` using the ``cylc run`` command. +As a suite runs detailed timestamped information is written to a *suite log* +and progress can be followed with cylc's suite monitoring tools (below). +By default a suite server program daemonizes after printing a short message so +that you can exit the terminal or even log out without killing the suite: + +.. code-block:: bash + + $ cylc run tut/oneoff/basic + ._. + | | The Cylc Suite Engine [7.0.0] + ._____._. ._| |_____. Copyright (C) 2008-2018 NIWA & British Crown (Met Office) & Contributors. + | .___| | | | | .___| _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ + | !___| !_! | | !___. This program comes with ABSOLUTELY NO WARRANTY; + !_____!___. |_!_____! see `cylc warranty`. It is free software, you + .___! | are welcome to redistribute it under certain + !_____! conditions; see `cylc conditions`. + + *** listening on https://nwp-1:43027/ *** + + To view suite server program contact information: + $ cylc get-suite-contact tut/oneoff/basic + + Other ways to see if the suite is still running: + $ cylc scan -n '\btut/oneoff/basic\b' nwp-1 + $ cylc ping -v --host=nwp-1 tut/oneoff/basic + $ ps h -opid,args 123456 # on nwp-1 + + +If you're quick enough (this example only takes 10-15 seconds to run) the +``cylc scan`` command will detect the running suite: + +.. code-block:: bash + + $ cylc scan + tut/oneoff/basic oliverh@nwp-1:43027 + +.. note:: + + You can use the ``--no-detach`` and ``--debug`` options + to ``cylc-run`` to prevent the suite from daemonizing (i.e. to make + it stay attached to your terminal until it exits). + +When a task is ready cylc generates a *job script* to run it, by +default as a background jobs on the suite host. The job process ID is +captured, and job output is directed to log files in standard +locations under the suite run directory. + +Log file locations relative to the suite run directory look like +``job/1/hello/01/`` where the first digit is the *cycle point* of +the task ``hello`` (for non-cycling tasks this is just ``1``); and the +final ``01`` is the *submit number* (so that job logs do not get +overwritten if a job is resubmitted for any reason). + +The suite shuts down automatically once all tasks have succeeded. + +GUI +^^^ + +The cylc GUI can start and stop suites, or (re)connect to suites that +are already running: + +.. code-block:: bash + + $ cylc gui tut/oneoff/basic & + +Use the tool bar *Play* button, or the *Control* ``->`` *Run* menu item, to +run the suite again. You may want to alter the suite configuration slightly +to make the task take longer to run. Try right-clicking on the +``hello`` task to view its output logs. The relative merits of the three +*suite views* - dot, text, and graph - will be more apparent later when we +have more tasks. Closing the GUI does not affect the suite itself. + + +.. _RemoteSuites: + +Remote Suites +------------- + +Suites can run on *localhost* or on a *remote* host. + +To start up a suite on a given host, specify it explicitly via the +``--host=`` option to a ``run`` or ``restart`` command. + +Otherwise, Cylc selects the best host to start up on from allowed +``run hosts`` as specified in the global config under +``[suite servers]``, which defaults to localhost. Should there be +more than one allowed host set, the *most suitable* is determined +according to the settings specified under ``[[run host select]]``, +namely exclusion of hosts not meeting suitability *thresholds*, if +provided, then ranking according to the given *rank* method. + +Discovering Running Suites +-------------------------- + +Suites that are currently running can be detected with command line or +GUI tools: + +.. code-block:: bash + + # list currently running suites and their port numbers: + $ cylc scan + tut/oneoff/basic oliverh@nwp-1:43001 + + # GUI summary view of running suites: + $ cylc gscan & + +The scan GUI is shown in :numref:`fig-gscan`; clicking on a suite in +it opens gcylc. + + +Task Identifiers +---------------- + +At run time, task instances are identified by *name*, which is +determined entirely by the suite configuration, and a *cycle point* which is +usually a date-time or an integer: + +.. code-block:: bash + + foo.20100808T00Z # a task with a date-time cycle point + bar.1 # a task with an integer cycle point (could be non-cycling) + +Non-cycling tasks usually just have the cycle point ``1``, but this +still has to be used to target the task instance with cylc commands. + +Job Submission: How Tasks Are Executed +-------------------------------------- + +**suite**: ``tut/oneoff/jobsub`` + +Task *job scripts* are generated by cylc to wrap the task implementation +specified in the suite configuration (environment, script, etc.) in +error trapping code, messaging calls to report task progress back to the suite +server program, and so forth. Job scripts are written to the *suite job log +directory* where they can be viewed alongside the job output logs. They +can be accessed at run time by right-clicking on the task in the cylc GUI, or +printed to the terminal: + +.. code-block:: bash + + $ cylc cat-log tut/oneoff/basic hello.1 + + +This command can also print the suite log (and stdout and stderr for suites +in daemon mode) and task stdout and stderr logs (see +``cylc cat-log --help``). + +A new job script can also be generated on the fly for inspection: + +.. code-block:: bash + + $ cylc jobscript tut/oneoff/basic hello.1 + +Take a look at the job script generated for ``hello.1`` during +the suite run above. The custom scripting should be clearly visible +toward the bottom of the file. + +The ``hello`` task in the first tutorial suite defaults to +running as a background job on the suite host. To submit it to the Unix +``at`` scheduler instead, configure its job submission settings +as in ``tut/oneoff/jobsub``: + +.. code-block:: cylc + + [runtime] + [[hello]] + script = "sleep 10; echo Hello World!" + [[[job]]] + batch system = at + +Run the suite again after checking that ``at`` is running on your +system. + +Cylc supports a number of different batch systems. Tasks +submitted to external batch queuing systems like ``at``, +``PBS``, ``SLURM``, ``Moab``, or ``LoadLeveler``, are displayed as +*submitted* in the cylc GUI until they start executing. + +- For more on task job scripts, see :ref:`JobScripts`. +- For more on batch systems, see :ref:`AvailableMethods`. + + +Locating Suite And Task Output +------------------------------ + +If the ``--no-detach`` option is not used, suite stdout and +stderr will be directed to the suite run directory along with the +time-stamped suite log file, and task job scripts and job logs +(task stdout and stderr). The default suite run directory location is +``$HOME/cylc-run``: + +.. code-block:: bash + + $ tree $HOME/cylc-run/tut/oneoff/basic/ + |-- .service # location of run time service files + | |-- contact # detail on how to contact the running suite + | |-- db # private suite run database + | |-- passphrase # passphrase for client authentication + | |-- source # symbolic link to source directory + | |-- ssl.cert # SSL certificate for the suite server + | `-- ssl.pem # SSL private key + |-- cylc-suite.db # back compat symlink to public suite run database + |-- share # suite share directory (not used in this example) + |-- work # task work space (sub-dirs are deleted if not used) + | `-- 1 # task cycle point directory (or 1) + | `-- hello # task work directory (deleted if not used) + |-- log # suite log directory + | |-- db # public suite run database + | |-- job # task job log directory + | | `-- 1 # task cycle point directory (or 1) + | | `-- hello # task name + | | |-- 01 # task submission number + | | | |-- job # task job script + | | | `-- job-activity.log # task job activity log + | | | |-- job.err # task stderr log + | | | |-- job.out # task stdout log + | | | `-- job.status # task status file + | | `-- NN -> 01 # symlink to latest submission number + | `-- suite # suite server log directory + | |-- err # suite server stderr log (daemon mode only) + | |-- out # suite server stdout log (daemon mode only) + | `-- log # suite server event log (timestamped info) + +The suite run database files, suite environment file, +and task status files are used internally by cylc. Tasks execute in +private ``work/`` directories that are deleted automatically +if empty when the task finishes. The suite +``share/`` directory is made available to all tasks (by +``$CYLC_SUITE_SHARE_DIR``) as a common share space. The task submission +number increments from ``1`` if a task retries; this is used as a +sub-directory of the log tree to avoid overwriting log files from earlier +job submissions. + +The top level run directory location can be changed in site and user +config files if necessary, and the suite share and work locations can be +configured separately because of the potentially larger disk space +requirement. + +Task job logs can be viewed by right-clicking on tasks in the gcylc +GUI (so long as the task proxy is live in the suite), manually +accessed from the log directory (of course), or printed to the terminal +with the ``cylc cat-log`` command: + +.. code-block:: bash + + # suite logs: + $ cylc cat-log tut/oneoff/basic # suite event log + $ cylc cat-log -o tut/oneoff/basic # suite stdout log + $ cylc cat-log -e tut/oneoff/basic # suite stderr log + # task logs: + $ cylc cat-log tut/oneoff/basic hello.1 # task job script + $ cylc cat-log -o tut/oneoff/basic hello.1 # task stdout log + $ cylc cat-log -e tut/oneoff/basic hello.1 # task stderr log + +- For a web-based interface to suite and task logs (and much more), + see *Rose* in :ref:`SuiteStorageEtc`. +- For more on environment variables supplied to tasks, such as + ``$CYLC_SUITE_SHARE_DIR``, see :ref:`TaskExecutionEnvironment`. + + +.. _ViewingSuiteLogsCylcReview: + +Viewing Suite Logs in a Web Browser: Cylc Review +------------------------------------------------ + +The Cylc Review web service displays suite job logs and other information in +web pages, as shown in :numref:`fig-review-screenshot`. It can run under a +WSGI server (e.g. Apache with ``mod_wsgi``) as a service for all +users, or as an ad hoc service under your own user account. + +If a central Cylc Review service has been set up at your site (e.g. as +described in :ref:`ConfiguringCylcReviewApache`) the URL will typically be +something like ``http:///cylc-review/``. + +.. _fig-review-screenshot: + +.. figure:: graphics/png/orig/cylc-review-screenshot.png + :align: center + + Screenshot of a Cylc Review web page + +Otherwise, to start an ad hoc Cylc Review service to view your own suite logs +(or those of others, if you have read access to them), run: + +.. code-block:: none + + setsid cylc review start 0/dev/null 2>&1 & + +The service should start at ``http://:8080`` (the port number +can optionally be set on the command line). Service logs are written to +``~/.cylc/cylc-review*``. Run ``cylc review`` to view +status information, and ``cylc review stop`` to stop the service. + + +.. _RemoteTasks: + +Remote Tasks +------------ + +**suite**: ``tut/oneoff/remote`` + +The ``hello`` task in the first two tutorial suites defaults to +running on the suite host :ref:`RemoteSuites`. To make it run on a different +host instead change its runtime configuration as in ``tut/oneoff/remote``: + +.. code-block:: cylc + + [runtime] + [[hello]] + script = "sleep 10; echo Hello World!" + [[[remote]]] + host = server1.niwa.co.nz + +In general, a *task remote* is a user account, other than the account +running the suite server program, where a task job is submitted to run. It can +be on the same machine running the suite or on another machine. + +A task remote account must satisfy several requirements: + +- Non-interactive ssh must be enabled from the account running the suite + server program to the account for submitting (and managing) the remote + task job. +- Network settings must allow communication *back* from the remote task + job to the suite, either by network ports or ssh, unless the last-resort one + way *task polling* communication method is used. +- Cylc must be installed and runnable on the task remote account. Other + software dependencies like graphviz are not required there. +- Any files needed by a remote task must be installed on the task + host. In this example there is nothing to install because the + implementation of ``hello`` is inlined in the suite configuration + and thus ends up entirely contained within the task job script. + +If your username is different on the task host, you can add a ``User`` +setting for the relevant host in your ``~/.ssh/config``. +If you are unable to do so, the ``[[[remote]]]`` section also supports an +``owner=username`` item. + +If you configure a task account according to the requirements cylc will invoke +itself on the remote account (with a login shell by default) to create log +directories, transfer any essential service files, send the task job script +over, and submit it to run there by the configured batch system. + +Remote task job logs are saved to the suite run directory on the task remote, +not on the account running the suite. They can be retrieved by right-clicking +on the task in the GUI, or to have cylc pull them back to the suite account +automatically do this: + +.. code-block:: cylc + + [runtime] + [[hello]] + script = "sleep 10; echo Hello World!" + [[[remote]]] + host = server1.niwa.co.nz + retrieve job logs = True + +This suite will attempt to ``rsync`` job logs from the remote +host each time a task job completes. + +Some batch systems have considerable delays between the time when the job +completes and when it writes the job logs in its normal location. If this is +the case, you can configure an initial delay and retry delays for job log +retrieval by setting some delays. E.g.: + +.. code-block:: cylc + + [runtime] + [[hello]] + script = "sleep 10; echo Hello World!" + [[[remote]]] + host = server1.niwa.co.nz + retrieve job logs = True + # Retry after 10 seconds, 1 minute and 3 minutes + retrieve job logs retry delays = PT10S, PT1M, PT3M + + +Finally, if the disk space of the suite host is limited, you may want to set +``[[[remote]]]retrieve job logs max size=SIZE``. The value of SIZE can +be anything that is accepted by the ``--max-size=SIZE`` option of the +``rsync`` command. E.g.: + +.. code-block:: cylc + + [runtime] + [[hello]] + script = "sleep 10; echo Hello World!" + [[[remote]]] + host = server1.niwa.co.nz + retrieve job logs = True + # Don't get anything bigger than 10MB + retrieve job logs max size = 10M + +It is worth noting that cylc uses the existence of a job's ``job.out`` +or ``job.err`` in the local file system to indicate a successful job +log retrieval. If ``retrieve job logs max size=SIZE`` is set and both +``job.out`` and ``job.err`` are bigger than ``SIZE`` +then cylc will consider the retrieval as failed. If retry delays are specified, +this will trigger some useless (but harmless) retries. If this occurs +regularly, you should try the following: + +- Reduce the verbosity of STDOUT or STDERR from the task. +- Redirect the verbosity from STDOUT or STDERR to an alternate log file. +- Adjust the size limit with tolerance to the expected size of STDOUT or + STDERR. + +- For more on remote tasks see :ref:`RunningTasksOnARemoteHost` +- For more on task communications, see :ref:`TaskComms`. +- For more on suite passphrases and authentication, + see :ref:`tutPassphrases` and :ref:`ConnectionAuthentication`. + + +Task Triggering +--------------- + +**suite**: ``tut/oneoff/goodbye`` + +To make a second task called ``goodbye`` trigger after +``hello`` finishes successfully, return to the original +example, ``tut/oneoff/basic``, and change the suite graph +as in ``tut/oneoff/goodbye``: + +.. code-block:: cylc + + [scheduling] + [[dependencies]] + graph = "hello => goodbye" + +or to trigger it at the same time as ``hello``, + +.. code-block:: cylc + + [scheduling] + [[dependencies]] + graph = "hello & goodbye" + +and configure the new task's behaviour under ``[runtime]``: + +.. code-block:: cylc + + [runtime] + [[goodbye]] + script = "sleep 10; echo Goodbye World!" + +Run ``tut/oneoff/goodbye`` and check the output from the new task: + +.. code-block:: bash + + $ cat ~/cylc-run/tut/oneoff/goodbye/log/job/1/goodbye/01/job.out + # or + $ cylc cat-log -o tut/oneoff/goodbye goodbye.1 + JOB SCRIPT STARTING + cylc (scheduler - 2014-08-14T15:09:30+12): goodbye.1 started at 2014-08-14T15:09:30+12 + cylc Suite and Task Identity: + Suite Name : tut/oneoff/goodbye + Suite Host : oliverh-34403dl.niwa.local + Suite Port : 43001 + Suite Owner : oliverh + Task ID : goodbye.1 + Task Host : nwp-1 + Task Owner : oliverh + Task Try No.: 1 + + Goodbye World! + cylc (scheduler - 2014-08-14T15:09:40+12): goodbye.1 succeeded at 2014-08-14T15:09:40+12 + JOB SCRIPT EXITING (TASK SUCCEEDED) + + +Task Failure And Suicide Triggering +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +**suite**: ``tut/oneoff/suicide`` + +Task names in the graph string can be qualified with a state indicator +to trigger off task states other than success: + +.. code-block:: cylc + + graph = """ + a => b # trigger b if a succeeds + c:submit => d # trigger d if c submits + e:finish => f # trigger f if e succeeds or fails + g:start => h # trigger h if g starts executing + i:fail => j # trigger j if i fails + """ + +A common use of this is to automate recovery from known modes of failure: + +.. code-block:: cylc + + graph = "goodbye:fail => really_goodbye" + +i.e. if task ``goodbye`` fails, trigger another task that +(presumably) really says goodbye. + +Failure triggering generally requires use of *suicide triggers* as +well, to remove the recovery task if it isn't required (otherwise it +would hang about indefinitely in the waiting state): + +.. code-block:: cylc + + [scheduling] + [[dependencies]] + graph = """hello => goodbye + goodbye:fail => really_goodbye + goodbye => !really_goodbye # suicide""" + +This means if ``goodbye`` fails, trigger +``really_goodbye``; and otherwise, if ``goodbye`` +succeeds, remove ``really_goodbye`` from the suite. + +Try running ``tut/oneoff/suicide``, which also configures +the ``hello`` task's runtime to make it fail, to see how this works. + +- For more on suite dependency graphs see :ref:`ConfiguringScheduling`. +- For more on task triggering see :ref:`TriggerTypes`. + + +Runtime Inheritance +------------------- + +**suite**: ``tut/oneoff/inherit`` + +The ``[runtime]`` section is actually a *multiple inheritance* hierarchy. +Each subsection is a *namespace* that represents a task, or if it is +inherited by other namespaces, a *family*. This allows common configuration +to be factored out of related tasks very efficiently. + +.. literalinclude:: ../../etc/examples/tutorial/oneoff/inherit/suite.rc + :language: cylc + +The ``[root]`` namespace provides defaults for all tasks in the suite. +Here both tasks inherit ``script`` from ``root``, which they +customize with different values of the environment variable +``$GREETING``. + +.. note:: + + Inheritance from ``root`` is + implicit; from other parents an explicit ``inherit = PARENT`` + is required, as shown below. + +- For more on runtime inheritance, see :ref:`NIORP`. + +Triggering Families +------------------- + +**suite**: ``tut/oneoff/ftrigger1`` + +Task families defined by runtime inheritance can also be used as +shorthand in graph trigger expressions. To see this, consider two +"greeter" tasks that trigger off another task ``foo``: + +.. code-block:: cylc + + [scheduling] + [[dependencies]] + graph = "foo => greeter_1 & greeter_2" + +If we put the common greeting functionality of ``greeter_1`` +and ``greeter_2`` into a special ``GREETERS`` family, +the graph can be expressed more efficiently like this: + +.. code-block:: cylc + + [scheduling] + [[dependencies]] + graph = "foo => GREETERS" + +i.e. if ``foo`` succeeds, trigger all members of +``GREETERS`` at once. Here's the full suite with runtime +hierarchy shown: + +.. literalinclude:: ../../etc/examples/tutorial/oneoff/ftrigger1/suite.rc + :language: cylc + +.. note:: + + We recommend given ALL-CAPS names to task families to help + distinguish them from task names. However, this is just a convention. + +Experiment with the ``tut/oneoff/ftrigger1`` suite to see +how this works. + +Triggering Off Of Families +-------------------------- + +**suite**: ``tut/oneoff/ftrigger2`` + +Tasks (or families) can also trigger *off* other families, but +in this case we need to specify what the trigger means in terms of +the upstream family members. Here's how to trigger another task +``bar`` if all members of ``GREETERS`` succeed: + +.. code-block:: cylc + + [scheduling] + [[dependencies]] + graph = """foo => GREETERS + GREETERS:succeed-all => bar""" + +Verbose validation in this case reports: + +.. code-block:: bash + + $ cylc val -v tut/oneoff/ftrigger2 + ... + Graph line substitutions occurred: + IN: GREETERS:succeed-all => bar + OUT: greeter_1:succeed & greeter_2:succeed => bar + ... + +Cylc ignores family member qualifiers like ``succeed-all`` on +the right side of a trigger arrow, where they don't make sense, to +allow the two graph lines above to be combined in simple cases: + +.. code-block:: cylc + + [scheduling] + [[dependencies]] + graph = "foo => GREETERS:succeed-all => bar" + +Any task triggering status qualified by ``-all`` or +``-any``, for the members, can be used with a family trigger. +For example, here's how to trigger ``bar`` if all members +of ``GREETERS`` finish (succeed or fail) and any of them succeed: + +.. code-block:: cylc + + [scheduling] + [[dependencies]] + graph = """foo => GREETERS + GREETERS:finish-all & GREETERS:succeed-any => bar""" + +(use of ``GREETERS:succeed-any`` by itself here would trigger +``bar`` as soon as any one member of ``GREETERS`` +completed successfully). Verbose validation now begins to show how +family triggers can simplify complex graphs, even for this tiny +two-member family: + +.. code-block:: bash + + $ cylc val -v tut/oneoff/ftrigger2 + ... + Graph line substitutions occurred: + IN: GREETERS:finish-all & GREETERS:succeed-any => bar + OUT: ( greeter_1:succeed | greeter_1:fail ) & \ + ( greeter_2:succeed | greeter_2:fail ) & \ + ( greeter_1:succeed | greeter_2:succeed ) => bar + ... + +Experiment with ``tut/oneoff/ftrigger2`` to see how this works. + +- For more on family triggering, see :ref:`FamilyTriggers`. + + +Suite Visualization +------------------- + +You can style dependency graphs with an optional +``[visualization]`` section, as shown in ``tut/oneoff/ftrigger2``: + +.. code-block:: cylc + + [visualization] + default node attributes = "style=filled" + [[node attributes]] + foo = "fillcolor=#6789ab", "color=magenta" + GREETERS = "fillcolor=#ba9876" + bar = "fillcolor=#89ab67" + +To display the graph in an interactive viewer: + +.. code-block:: bash + + $ cylc graph tut/oneoff/ftrigger2 & # dependency graph + $ cylc graph -n tut/oneoff/ftrigger2 & # runtime inheritance graph + +It should look like :numref:`fig-tut-hello-multi` (with the +``GREETERS`` family node expanded on the right). + +.. todo:: + Create sub-figures if possible: for now hacked as separate figures with + link to first, and caption on final, displayed figure. + +.. _fig-tut-hello-multi: + +.. figure:: graphics/png/orig/tut-hello-multi-1.png + :align: center + +.. figure:: graphics/png/orig/tut-hello-multi-2.png + :align: center + +.. figure:: graphics/png/orig/tut-hello-multi-3.png + :align: center + + The ``tut/oneoff/ftrigger2`` dependency and runtime inheritance graphs + + +Graph styling can be applied to entire families at once, and custom +"node groups" can also be defined for non-family groups. + + +External Task Scripts +--------------------- + +**suite**: ``tut/oneoff/external`` + +The tasks in our examples so far have all had inlined implementation, in +the suite configuration, but real tasks often need to call external +commands, scripts, or executables. To try this, let's return to the +basic Hello World suite and cut the implementation of the task +``hello`` out to a file ``hello.sh`` in the suite bin directory: + +.. literalinclude:: ../../etc/examples/tutorial/oneoff/external/bin/hello.sh + :language: bash + +Make the task script executable, and change the ``hello`` task +runtime section to invoke it: + +.. literalinclude:: ../../etc/examples/tutorial/oneoff/external/suite.rc + :language: cylc + +If you run the suite now the new greeting from the external task script +should appear in the ``hello`` task stdout log. This works +because cylc automatically adds the suite bin directory to +``$PATH`` in the environment passed to tasks via their job +scripts. To execute scripts (etc.) located elsewhere you can +refer to the file by its full file path, or set ``$PATH`` +appropriately yourself (this could be done via +``$HOME/.profile``, which is sourced at the top of the task job +script, or in the suite configuration itself). + +.. note:: + + The use of ``set -e`` above to make the script abort on + error. This allows the error trapping code in the task job script to + automatically detect unforeseen errors. + +Cycling Tasks +------------- + +**suite**: ``tut/cycling/one`` + +So far we've considered non-cycling tasks, which finish without spawning +a successor. + +Cycling is based around iterating through date-time or integer sequences. A +cycling task may run at each cycle point in a given sequence (cycle). For +example, a sequence might be a set of date-times every 6 hours starting from a +particular date-time. A cycling task may run for each date-time item (cycle +point) in that sequence. + +There may be multiple instances of this type of task running in parallel, if +the opportunity arises and their dependencies allow it. Alternatively, a +sequence can be defined with only one valid cycle point - in that case, a task +belonging to that sequence may only run once. + +Open the ``tut/cycling/one`` suite: + +.. literalinclude:: ../../etc/examples/tutorial/cycling/one/suite.rc + :language: cylc + +The difference between cycling and non-cycling suites is all in the +``[scheduling]`` section, so we will leave the +``[runtime]`` section alone for now (this will result in +cycling dummy tasks). + +.. note:: + + The graph is now defined under a new section heading that makes each + task under it have a succession of cycle points ending in ``00`` or + ``12`` hours, between specified initial and final cycle + points (or indefinitely if no final cycle point is given), as shown in + :numref:`fig-tut-one`. + +.. todo:: + Image out of date now. + +.. _fig-tut-one: + +.. figure:: graphics/png/orig/tut-one.png + :align: center + + The ``tut/cycling/one`` suite + +If you run this suite instances of ``foo`` will spawn in parallel out +to the *runahead limit*, and each ``bar`` will trigger off the +corresponding instance of ``foo`` at the same cycle point. The +runahead limit, which defaults to a few cycles but is configurable, prevents +uncontrolled spawning of cycling tasks in suites that are not constrained by +clock triggers in real time operation. + +Experiment with ``tut/cycling/one`` to see how cycling tasks work. + +ISO 8601 Date-Time Syntax +^^^^^^^^^^^^^^^^^^^^^^^^^ + +The suite above is a very simple example of a cycling date-time workflow. More +generally, cylc comprehensively supports the ISO 8601 standard for date-time +instants, intervals, and sequences. Cycling graph sections can be specified +using full ISO 8601 recurrence expressions, but these may be simplified +by assuming context information from the suite - namely initial and final cycle +points. One form of the recurrence syntax looks like +``Rn/start-date-time/period`` (``Rn`` means run ``n`` times). In the example +above, if the initial cycle point +is always at ``00`` or ``12`` hours then ``[[[T00,T12]]]`` could be +written as ``[[[PT12H]]]``, which is short for +``[[[R/initial-cycle-point/PT12H/]]]`` - i.e. run every 12 hours +indefinitely starting at the initial cycle point. It is possible to add +constraints to the suite to only allow initial cycle points at ``00`` or +``12`` hours e.g. + +.. code-block:: cylc + + [scheduling] + initial cycle point = 20130808T00 + initial cycle point constraints = T00, T12 + +.. todo:: + Runahead factor now. + +- For a comprehensive description of ISO 8601 based date-time cycling, + see :ref:`AdvancedCycling` +- For more on runahead limiting in cycling suites, + see :ref:`RunaheadLimit`. + + +.. _TutInterCyclePointTriggers: + +Inter-Cycle Triggers +^^^^^^^^^^^^^^^^^^^^ + +**suite**: ``tut/cycling/two`` + +The ``tut/cycling/two`` suite adds inter-cycle dependence +to the previous example: + +.. code-block:: cylc + + [scheduling] + [[dependencies]] + # Repeat with cycle points of 00 and 12 hours every day: + [[[T00,T12]]] + graph = "foo[-PT12H] => foo => bar" + +For any given cycle point in the sequence defined by the +cycling graph section heading, ``bar`` triggers off +``foo`` as before, but now ``foo`` triggers off its own +previous instance ``foo[-PT12H]``. Date-time offsets in +inter-cycle triggers are expressed as ISO 8601 intervals (12 hours +in this case). :numref:`fig-tut-two` shows how this connects the +cycling graph sections together. + +.. _fig-tut-two: + +.. figure:: graphics/png/orig/tut-two.png + :align: center + + The ``tut/cycling/two`` suite + +Experiment with this suite to see how inter-cycle triggers work. + +.. note:: + + The first instance of ``foo``, at suite start-up, will + trigger immediately in spite of its inter-cycle trigger, because cylc + ignores dependence on points earlier than the initial cycle point. + However, the presence of an inter-cycle trigger usually implies something + special has to happen at start-up. If a model depends on its own previous + instance for restart files, for example, then some special process has to + generate the initial set of restart files when there is no previous cycle + point to do it. The following section shows one way to handle this + in cylc suites. + + +.. _initial-non-repeating-r1-tasks: + +Initial Non-Repeating (R1) Tasks +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +**suite**: ``tut/cycling/three`` + +Sometimes we want to be able to run a task at the initial cycle point, but +refrain from running it in subsequent cycles. We can do this by writing an +extra set of dependencies that are only valid at a single date-time cycle +point. If we choose this to be the initial cycle point, these will only apply +at the very start of the suite. + +The cylc syntax for writing this single date-time cycle point occurrence is +``R1``, which stands for ``R1/no-specified-date-time/no-specified-period``. +This is an adaptation of part of the ISO 8601 date-time standard's recurrence +syntax (``Rn/date-time/period``) with some special context information +supplied by cylc for the ``no-specified-*`` data. + +The ``1`` in the ``R1`` means run once. As we've specified +no date-time, Cylc will use the initial cycle point date-time by default, +which is what we want. We've also missed out specifying the period - this is +set by cylc to a zero amount of time in this case (as it never +repeats, this is not significant). + +For example, in ``tut/cycling/three``: + +.. code-block:: cylc + + [cylc] + cycle point time zone = +13 + [scheduling] + initial cycle point = 20130808T00 + final cycle point = 20130812T00 + [[dependencies]] + [[[R1]]] + graph = "prep => foo" + [[[T00,T12]]] + graph = "foo[-PT12H] => foo => bar" + +This is shown in :numref:`fig-tut-three`. + +.. note:: + + The time zone has been set to ``+1300`` in this case, + instead of UTC (``Z``) as before. If no time zone or UTC mode was set, + the local time zone of your machine will be used in the cycle points. + +At the initial cycle point, ``foo`` will depend on ``foo[-PT12H]`` and also +on ``prep``: + +.. code-block:: cylc + + prep.20130808T0000+13 & foo.20130807T1200+13 => foo.20130808T0000+13 + +Thereafter, it will just look like e.g.: + +.. code-block:: cylc + + foo.20130808T0000+13 => foo.20130808T1200+13 + +However, in our initial cycle point example, the dependence on +``foo.20130807T1200+13`` will be ignored, because that task's cycle +point is earlier than the suite's initial cycle point and so it cannot run. +This means that the initial cycle point dependencies for ``foo`` +actually look like: + +.. code-block:: cylc + + prep.20130808T0000+13 => foo.20130808T0000+13 + + +.. _fig-tut-three: + +.. figure:: graphics/png/orig/tut-three.png + :align: center + + The ``tut/cycling/three`` suite + +- ``R1`` tasks can also be used to make something special + happen at suite shutdown, or at any single cycle point throughout the + suite run. For a full primer on cycling syntax, see :ref:`AdvancedCycling`. + + +.. _TutInteger: + +Integer Cycling +^^^^^^^^^^^^^^^ + +**suite**: ``tut/cycling/integer`` + +Cylc can do also do integer cycling for repeating workflows that are not +date-time based. + +Open the ``tut/cycling/integer`` suite, which is plotted in +:numref:`fig-tut-int`. + +.. literalinclude:: ../../etc/examples/tutorial/cycling/integer/suite.rc + :language: cylc + +.. _fig-tut-int: + +.. figure:: graphics/png/orig/tut-cyc-int.png + :align: center + + The ``tut/cycling/integer`` suite + +The integer cycling notation is intended to look similar to the ISO 8601 +date-time notation, but it is simpler for obvious reasons. The example suite +illustrates two recurrence forms, +``Rn/start-point/period`` and +``Rn/period/stop-point``, simplified somewhat using suite context +information (namely the initial and final cycle points). The first form is +used to run one special task called ``start`` at start-up, and for the +main cycling body of the suite; and the second form to run another special task +called ``stop`` in the final two cycles. The ``P`` character +denotes period (interval) just like in the date-time notation. +``R/1/P2`` would generate the sequence of points ``1,3,5,...``. + +- For more on integer cycling, including a more realistic usage example + see :ref:`IntegerCycling`. + + +Jinja2 +------ + +**suite**: ``tut/oneoff/jinja2`` + +Cylc has built in support for the Jinja2 template processor, which +allows us to embed code in suite configurations to generate the +final result seen by cylc. + +The ``tut/oneoff/jinja2`` suite illustrates two common +uses of Jinja2: changing suite content or structure based on the value +of a logical switch; and iteratively generating dependencies and runtime +configuration for groups of related tasks: + +.. literalinclude:: ../../etc/examples/tutorial/oneoff/jinja2/suite.rc + :language: cylc + +To view the result of Jinja2 processing with the Jinja2 flag +``MULTI`` set to ``False``: + +.. code-block:: bash + + $ cylc view --jinja2 --stdout tut/oneoff/jinja2 + +.. code-block:: cylc + + [meta] + title = "A Jinja2 Hello World! suite" + [scheduling] + [[dependencies]] + graph = "hello" + [runtime] + [[hello]] + script = "sleep 10; echo Hello World!" + +And with ``MULTI`` set to ``True``: + +.. code-block:: bash + + $ cylc view --jinja2 --stdout tut/oneoff/jinja2 + +.. code-block:: cylc + + [meta] + title = "A Jinja2 Hello World! suite" + [scheduling] + [[dependencies]] + graph = "hello => BYE" + [runtime] + [[hello]] + script = "sleep 10; echo Hello World!" + [[BYE]] + script = "sleep 10; echo Goodbye World!" + [[ goodbye_0 ]] + inherit = BYE + [[ goodbye_1 ]] + inherit = BYE + [[ goodbye_2 ]] + inherit = BYE + + +Task Retry On Failure +--------------------- + +**suite**: ``tut/oneoff/retry`` + +Tasks can be configured to retry a number of times if they fail. +An environment variable ``$CYLC_TASK_TRY_NUMBER`` increments +from ``1`` on each successive try, and is passed to the task to allow +different behaviour on the retry: + +.. literalinclude:: ../../etc/examples/tutorial/oneoff/retry/suite.rc + :language: cylc + +If a task with configured retries fails, it goes into the *retrying* state +until the next retry delay is up, then it resubmits. It only enters the +*failed* state on a final definitive failure. + +If a task with configured retries is *killed* (by ``cylc kill`` or +via the GUI) it goes to the *held* state so that the operator can decide +whether to release it and continue the retry sequence or to abort the retry +sequence by manually resetting it to the *failed* state. + +Experiment with ``tut/oneoff/retry`` to see how this works. + +Other Users' Suites +------------------- + +If you have read access to another user's account (even on another host) +it is possible to use ``cylc monitor`` to look at their suite's +progress without full shell access to their account. To do this, you +will need to copy their suite passphrase to + +.. code-block:: bash + + $HOME/.cylc/SUITE_OWNER@SUITE_HOST/SUITE_NAME/passphrase + +(use of the host and owner names is optional here - see :ref:`passphrases`) +*and* also retrieve the port number of the running suite from: + +.. code-block:: bash + + ~SUITE_OWNER/cylc-run/SUITE_NAME/.service/contact + +Once you have this information, you can run + +.. code-block:: bash + + $ cylc monitor --user=SUITE_OWNER --port=SUITE_PORT SUITE_NAME + +to view the progress of their suite. + +Other suite-connecting commands work in the same way; see +:ref:`RemoteControl`. + +Other Things To Try +------------------- + +Almost every feature of cylc can be tested quickly and easily with a +simple dummy suite. You can write your own, or start from one of the +example suites in ``/path/to/cylc/examples`` (see use of +``cylc import-examples`` above) - they all run "out the box" +and can be copied and modified at will. + +- Change the suite runahead limit in a cycling suite. +- Stop a suite mid-run with ``cylc stop``, and restart + it again with ``cylc restart``. +- Hold (pause) a suite mid-run with ``cylc hold``, + then modify the suite configuration and ``cylc reload`` it + before using ``cylc release`` to continue (you can also + reload without holding). +- Use the gcylc View menu to show the task state color key and + watch tasks in the ``task-states`` example evolve + as the suite runs. +- Manually re-run a task that has already completed or failed, + with ``cylc trigger``. +- Use an *internal queue* to prevent more than an alotted number + of tasks from running at once even though they are ready - + see :ref:`InternalQueues`. +- Configure task event hooks to send an email, or shut the suite down, + on task failure. diff --git a/doc/src/workflows.rst b/doc/src/workflows.rst new file mode 100644 index 00000000000..ff2ba1eba99 --- /dev/null +++ b/doc/src/workflows.rst @@ -0,0 +1,78 @@ +.. _Workflows For Cycling Systems: + +Workflows For Cycling Systems +============================= + +A model run and associated processing may need to be cycled for the following +reasons: + +- In real time forecasting systems, a new forecast may be initiated + at regular intervals when new real time data comes in. +- It may be convenient (or necessary, e.g. due to batch scheduler + queue limits) to split single long model runs into many smaller chunks, + each with associated pre- and post-processing workflows. + +Cylc provides two ways of constructing workflows for cycling systems: +*cycling workflows* and *parameterized tasks*. + + +.. _Cycling Workflows: + +Cycling Workflows +----------------- + +This is cylc's classic cycling mode as described in the Introduction. Each +instance of a cycling job is represented by a new instance of *the same task*, +with a new cycle point. The suite configuration defines patterns for +extending the workflow on the fly, so it can keep running indefinitely if +necessary. For example, to cycle ``model.exe`` on a monthly sequence we +could define a single task ``model``, an initial cycle point, and a +monthly sequence. Cylc then generates the date-time sequence and creates a new +task instance for each cycle point as it comes up. Workflow dependencies are +defined generically with respect to the "current cycle point" of the tasks +involved. + +This is the only sensible way to run very large suites or operational suites +that need to continue cycling indefinitely. The cycling is configured with +standards-based ISO 8601 date-time *recurrence expressions*. Multiple +cycling sequences can be used at once in the same suite. See +:ref:`ConfiguringScheduling`. + + +.. _Parameterized-Tasks-as-a-Proxy-for-Cycling: + +Parameterized Tasks as a Proxy for Cycling +------------------------------------------ + +It is also possible to run cycling jobs with a pre-defined static workflow in +which each instance of a cycling job is represented by *a different task*: +as far as the abstract workflow is concerned there is no cycling. The sequence +of tasks can be constructed efficiently, however, using cylc's built-in suite +parameters (:ref:`Parameterized Cycling`) or explicit Jinja2 loops +(:ref:`Jinja`). + +For example, to run ``model.exe`` 12 times on a monthly cycle we could +loop over an integer parameter ``R = 0, 1, 2, ..., 11`` to define tasks +``model-R0, model-R1, model-R2, ...model-R11``, and the parameter +values could be multiplied by the interval ``P1M`` (one month) to get +the start point for the corresponding model run. + +This method is only good for smaller workflows of finite duration because every +single task has to be mapped out in advance, and cylc has to be aware of all of +them throughout the entire run. Additionally Cylc's *cycling workflow* +capabilities (above) are more powerful, more flexible, and generally easier to +use (Cylc will generate the cycle point date-times for you, for instance), so +that is the recommended way to drive most cycling systems. + +The primary use for parameterized tasks in cylc is to generate ensembles and +other groups of related tasks at the same cycle point, not as a proxy for +cycling. + + +Mixed Cycling Workflows +----------------------- + +For completeness we note that parameterized cycling can be used within a +cycling workflow. For example, in a daily cycling workflow long (daily) +model runs could be split into four shorter runs by parameterized cycling. +A simpler six-hourly cycling workflow should be considered first, however. diff --git a/lib/cylc/cfgspec/globalcfg.py b/lib/cylc/cfgspec/globalcfg.py index f09ffc78508..8efbd2e6b49 100644 --- a/lib/cylc/cfgspec/globalcfg.py +++ b/lib/cylc/cfgspec/globalcfg.py @@ -89,16 +89,8 @@ 'documentation': { 'files': { - 'html index': [ - VDR.V_STRING, '$CYLC_DIR/doc/install/index.html'], - 'pdf user guide': [ - VDR.V_STRING, '$CYLC_DIR/doc/install/cylc-user-guide.pdf'], - 'multi-page html user guide': [ - VDR.V_STRING, - '$CYLC_DIR/doc/install/html/multi/cug-html.html'], - 'single-page html user guide': [ - VDR.V_STRING, - '$CYLC_DIR/doc/install/html/single/cug-html.html'], + 'html user guides': [ + VDR.V_STRING, '$CYLC_DIR/doc/built-sphinx/index.html'], }, 'urls': { 'internet homepage': [VDR.V_STRING, 'http://cylc.github.io/cylc/'], diff --git a/tests/cylc-doc/00-cylc-doc.t b/tests/cylc-doc/00-cylc-doc.t index 4df1f585763..bd9ec163f20 100644 --- a/tests/cylc-doc/00-cylc-doc.t +++ b/tests/cylc-doc/00-cylc-doc.t @@ -23,19 +23,16 @@ set_test_number 2 create_test_globalrc "" " [documentation] [[files]] - pdf user guide = ${PWD}/doc/pdf/cug-pdf.pdf - multi-page html user guide = /home/bob/cylc/cylc.git/doc/html/multi/cug-html.html - html index = /home/bob/cylc/cylc.git/doc/index.html - single-page html user guide = /home/bob/cylc/cylc.git/doc/html/single/cug-html.html + html user guides = ${PWD}/doc/built-sphinx/index.html [[urls]] internet homepage = http://cylc.github.com/cylc/ local index = http://localhost/cylc/index.html" #------------------------------------------------------------------------------- -mkdir -p doc/pdf -touch doc/pdf/cug-pdf.pdf -cylc doc -s -p > stdout1.txt +mkdir -p doc/built-sphinx +touch doc/built-sphinx/index.html +cylc doc -s -g > stdout1.txt cmp_ok stdout1.txt <<__END__ -$PWD/doc/pdf/cug-pdf.pdf +${PWD}/doc/built-sphinx/index.html __END__ #------------------------------------------------------------------------------- cylc doc -s > stdout2.txt diff --git a/tests/cylc-doc/01-suite-urls.t b/tests/cylc-doc/01-suite-urls.t index 85c00903337..c378087b732 100644 --- a/tests/cylc-doc/01-suite-urls.t +++ b/tests/cylc-doc/01-suite-urls.t @@ -26,10 +26,7 @@ set_test_number 3 create_test_globalrc "" " [documentation] [[files]] - pdf user guide = ${PWD}/doc/pdf/cug-pdf.pdf - multi-page html user guide = /home/bob/cylc/cylc.git/doc/html/multi/cug-html.html - html index = /home/bob/cylc/cylc.git/doc/index.html - single-page html user guide = /home/bob/cylc/cylc.git/doc/html/single/cug-html.html + html user guides = ${PWD}/doc/built-sphinx/index.html [[urls]] internet homepage = http://cylc.github.com/cylc/ local index = http://localhost/cylc/index.html" diff --git a/tests/cylc-get-site-config/00-basic.t b/tests/cylc-get-site-config/00-basic.t index 3a676830feb..5373a2971de 100644 --- a/tests/cylc-get-site-config/00-basic.t +++ b/tests/cylc-get-site-config/00-basic.t @@ -32,7 +32,7 @@ run_ok $TEST_NAME.doc-section-python \ run_ok $TEST_NAME.multiple-secs \ cylc get-site-config --item='[documentation]' --item='[hosts]' run_ok $TEST_NAME.doc-entry \ - cylc get-site-config --item='[documentation][files]html index' + cylc get-site-config --item='[documentation][files]html user guides' run_fail $TEST_NAME.non-existent \ cylc get-site-config --item='[this][doesnt]exist' #------------------------------------------------------------------------------- diff --git a/tests/documentation/00-make.t b/tests/documentation/00-make.t index 27974ac9ce9..2405731b75b 100644 --- a/tests/documentation/00-make.t +++ b/tests/documentation/00-make.t @@ -25,6 +25,6 @@ fi set_test_number 1 #------------------------------------------------------------------------------- TEST_NAME=$TEST_NAME_BASE-make-docs -run_ok $TEST_NAME make -C $CYLC_DIR/doc <'/dev/null' +run_ok $TEST_NAME cylc make-docs <'/dev/null' #------------------------------------------------------------------------------- exit