diff --git a/lib/galaxy/tool_util/linters/_util.py b/lib/galaxy/tool_util/linters/_util.py index 1e12c82faa1d..3b899c43c427 100644 --- a/lib/galaxy/tool_util/linters/_util.py +++ b/lib/galaxy/tool_util/linters/_util.py @@ -1,5 +1,56 @@ import re +from Cheetah.Compiler import Compiler +from Cheetah.Template import Template + +from galaxy.util import unicodify + + +def get_code(tool_xml): + """get code used in the Galaxy tool""" + # get code from command and configfiles + code = "" + for tag in [".//command", ".//configfile"]: + for cn in tool_xml.findall(tag): + code += cn.text + # TODO not sure about this line, I get UnicodeError for some tools otherwise (trinity) + code = "#encoding utf-8\n" + code + code = Template.compile(source=code, compilerClass=Compiler, returnAClass=False) + code = unicodify(code) + # print(code) + + # template macros + # note: not necessary (& possible) for macro tokens which are expanded + # during loading the xml (and removed from the macros tag) + templatecode = "" + for cn in tool_xml.findall('.//macros/macro[@type="template"]'): + templatecode += cn.text + templatecode = "#encoding utf-8\n" + templatecode + templatecode = Template.compile(source=templatecode, compilerClass=Compiler, returnAClass=False) + templatecode = unicodify(templatecode) + + # get code from output filters (which use variables wo $) + filtercode = "" + for cn in tool_xml.findall("./outputs/*/filter"): + filtercode += cn.text + "\n" + + # get output labels which might contain code (which use variables like ${var}) + labelcode = "" + for cn in tool_xml.findall("./outputs/*[@label]"): + labelcode += cn.attrib["label"] + "\n" + + actioncode = "" + for cn in tool_xml.findall("./outputs//conditional[@name]"): + actioncode += cn.attrib["name"] + "\n" + for cn in tool_xml.findall('./outputs//action/option[@type="from_param"]'): + actioncode += cn.attrib.get("name", "") + "\n" + for cn in tool_xml.findall("./outputs//action/option/filter[@ref]"): + actioncode += cn.attrib["ref"] + "\n" + for cn in tool_xml.findall("./outputs//action[@default]"): + actioncode += cn.attrib["default"] + "\n" + + return code, templatecode, filtercode, labelcode, actioncode + def is_datasource(tool_xml): """Returns true if the tool is a datasource tool""" diff --git a/lib/galaxy/tool_util/linters/configfiles.py b/lib/galaxy/tool_util/linters/configfiles.py new file mode 100644 index 000000000000..f88e020ec6f0 --- /dev/null +++ b/lib/galaxy/tool_util/linters/configfiles.py @@ -0,0 +1,22 @@ +"""This module contains a linting function for a tool's configfiles section. +""" + + +def lint_configfiles(tool_xml, lint_ctx): + """""" + root = tool_xml.getroot() + configfiles = root.findall("configfiles") + if len(configfiles) > 1: + lint_ctx.error("More than one configfiles tag found, behavior undefined.") + return + elif len(configfiles) == 0: + return + configfiles = configfiles[0] + + configfile = configfiles.findall("configfile|inputs") + for cf in configfile: + if not ("name" in cf.attrib or "filename" in cf.attrib): + lint_ctx.error("Configfile needs to define name or filename.") + if cf.tag == "inputs": + if "data_style" in cf.attribs and cf.attribs["data_style"] in ["paths", "staging_path_and_source_path"]: + lint_ctx.error(f"Unknown data_style {cf.attribs['data_style']}for inputs configfile.") diff --git a/lib/galaxy/tool_util/linters/inputs.py b/lib/galaxy/tool_util/linters/inputs.py index e86372e2bdd7..e1f8a532f7fd 100644 --- a/lib/galaxy/tool_util/linters/inputs.py +++ b/lib/galaxy/tool_util/linters/inputs.py @@ -1,12 +1,18 @@ """This module contains a linting functions for tool inputs.""" import re -from galaxy.util import string_as_bool -from ._util import ( +from Cheetah.Parser import ParseError + +from galaxy.tool_util.linters._util import ( + get_code, is_datasource, is_valid_cheetah_placeholder, ) -from ..parser.util import _parse_name +from galaxy.tool_util.parser.util import ( + _parse_name, + _prepare_argument, +) +from galaxy.util import string_as_bool FILTER_TYPES = [ "data_meta", @@ -115,6 +121,77 @@ ] +def _param_in_compiled_cheetah(param, param_name, code): + # # accession $PATH.param_name.ATTRIBUTES in cheetah gives + # # VFFSL(SL,"PATH.param_name.ATTRIBUTES",True) + # # for PATH and ATTRIBUTES we assume simply ([\w.]+)? + # # since if wrong it will be discovered in a test + # if re.search(r'VFFSL\(SL,[\"\']([\w.]+\.)?' + param_name + r'(\.[\w.]+)?[\"\'],True\)', code): + # return True + + # # print("checking path") + # # accessing $PATH.param_name['ATTRIBUTE'] (ATTRIBUTE eg reverse) + # # or $PATH.param_name['ATTRIBUTE'].FURTHER_ATTRIBUTE gives + # # VFN(VFN(VFFSL(SL,"PATH",True),"param_name",True)['ATTRIBUTE'],"FURTHER_ATTRIBUTE",True) + # # we simply search VFN(VFFSL(SL,"PATH",True),"param_name",True) + # # ie ignore the ATTRIBUTES part and for PATH we assume again ([\w.]+)? + # if re.search(r'VFN\(VFFSL\(SL,[\"\'][\w.]+[\"\'],True\),[\"\']' + param_name + r'[\"\'],True\)', code): + # return True + + # # all these are covered by the rawExpr regular expression below + # # - $getVar("param_name") + # # gets + # # _v = VFFSL(SL,"getVar",False)("param_name") + # # if _v is not None: write(_filter(_v, rawExpr='$getVar("param_name")')) + # # - $getVar("getvar_default", "default") + # # gets + # # _v = VFFSL(SL,"getVar",False)("getvar_default", "default") + # # if _v is not None: write(_filter(_v, rawExpr='$getVar("getvar_default", "default")')) + # if re.search(r'VFFSL\(SL,[\"\']getVar[\"\'],False\)\([\"\']([\w.]+\.)?' + param_name + r'(\.[\w.]+)?[\"\'](, [^()]+)?\)', code): + # return True + + # # - $varExists("varexists") + # # gets + # # _v = VFFSL(SL,"varExists",False)("varexists") + # # if _v is not None: write(_filter(_v, rawExpr='$varExists("varexists")')) + # # - $hasVar("hasvar") + # # gets + # # _v = VFFSL(SL,"hasVar",False)("hasvar") + # # if _v is not None: write(_filter(_v, rawExpr='$hasVar("hasvar")')) + # if re.search(r'VFFSL\(SL,[\"\'](varExists|hasvar)[\"\'],False\)\([\"\']([\w.]+\.)?' + param_name + r'(\.[\w.]+)?[\'\"]\)', code): + # return True + + # # Also the following is possible (TODO but we could forbid it) + # # $PATH["param_name"].ATTRIBUTES + # # VFFSL(SL,"PATH",True)["param_name"].ATTRIBUTES + # if re.search(r'VFFSL\(SL,[\"\'][\w.]+[\"\'],True\)\[[\"\']' + param_name + r'[\"\']\]', code): + # return True + # gets + # set $rg_id = str($rg_param('read_group_id_conditional').ID) + # rg_id = str(VFN(VFFSL(SL,"rg_param",False)('read_group_id_conditional'),"ID",True)) + if re.search(r"(VFN|VFFSL)\(.*[\"\']([\w.]+\.)?" + param_name + r"(\.[\w.]+)?[\"\']", code): + return True + + # #for i, r in enumerate($repeat_name) + # #set x = $str(r[ 'param_name' ]) + # #end for + # the loop content gets + # x = VFFSL(SL,"str",False)(r[ 'var_in_repeat' ]) + if re.search(r"(VFFSL|VFN)\(.*\[\s*[\"\']" + param_name + r"[\"\']\s*\]", code): + # print(f" G") + return True + + # "${ ",".join( [ str( r.var_in_repeat3 ) for r in $repeat_name ] ) }" + # gets + # _v = ",".join( [ str( r.var_in_repeat3 ) for r in VFFSL(SL,"repeat_name",True) ] ) + # if _v is not None: write(_filter(_v, rawExpr='${ ",".join( [ str( r.var_in_repeat3 ) for r in $repeat_name ] ) }')) + if re.search(r"rawExpr=([\"\'])(.*[^\w])?" + param_name + r"([^\w].*)?(\1)", code): + return True + + # print("FALSE") + return False + + def lint_inputs(tool_xml, lint_ctx): """Lint parameters in a tool's inputs block.""" datasource = is_datasource(tool_xml) @@ -133,7 +210,7 @@ def lint_inputs(tool_xml, lint_ctx): continue param_name = _parse_name(param_attrib.get("name"), param_attrib.get("argument")) if "name" in param_attrib and "argument" in param_attrib: - if param_attrib.get("name") == _parse_name(None, param_attrib.get("argument")): + if param_attrib.get("name") == _prepare_argument(param_attrib.get("argument")): lint_ctx.warn( f"Param input [{param_name}] 'name' attribute is redundant if argument implies the same name.", node=param, @@ -171,6 +248,7 @@ def lint_inputs(tool_xml, lint_ctx): param_type = param_attrib["type"] # TODO lint for valid param type - attribute combinations + # TODO lint required attributes for valid each param type # lint for valid param type - child node combinations for ptcc in PARAM_TYPE_CHILD_COMBINATIONS: @@ -484,6 +562,99 @@ def lint_inputs(tool_xml, lint_ctx): ) +def lint_inputs_used(tool_xml, lint_ctx): + """ + check if the parameter is used somewhere, the following cases are considered: + - in the command or a configfile + - in an output filter + - if it is the select of conditional + - if there is an inputs configfile that is used + - for data parameters data_style needs to be set for the config file + - for other parameters the name of the config file must be used in the code + + a warning is shown if the parameter is used only in + - template macro, + - output action, or + - label of an output + + otherwise + - if the parameter only appears in change_format + - or if none of the previous cases apply + """ + tool_node = tool_xml.getroot() + try: + code, template_code, filter_code, label_code, action_code = get_code(tool_xml) + except ParseError as pe: + lint_ctx.error(f"Invalid cheetah found{pe}", node=tool_node) + return + + inputs = tool_xml.findall("./inputs//param") + for param in inputs: + try: + param_name = _parse_name(param.attrib.get("name"), param.attrib.get("argument")) + except ValueError: + continue + if _param_in_compiled_cheetah(param, param_name, code): + continue + if param_name in filter_code: + continue + if ( + tool_xml.find("./inputs//param/options[@from_dataset]") is not None + or tool_xml.find("./inputs//param/options/filter[@ref]") is not None + ): + continue + if param.getparent().tag == "conditional": + continue + + conf_inputs = tool_xml.find("./configfiles/inputs") + if conf_inputs is not None: # in this it's really hard to know + param_type = param.attrib.get("type") + if param_type in ["data", "collection"]: + if not conf_inputs.attrib.get("data_style"): + lint_ctx.error( + f"Param input [{param_name}] not found in command or configfiles. Does the present inputs config miss the 'data_style' attribute?", + node=param, + ) + inputs_conf_name = conf_inputs.attrib.get("name", conf_inputs.attrib.get("filename", None)) + if inputs_conf_name: + if not re.search(r"(^|[^\w])" + inputs_conf_name + r"([^\w]|$)", code): + lint_ctx.error( + f"Param input [{param_name}] only used inputs configfile {inputs_conf_name}, but this is not used in the command", + node=param, + ) + continue + + change_format = tool_xml.find(f"./outputs//change_format/when[@input='{param_name}']") is not None + template = re.search(r"(^|[^\w])" + param_name + r"([^\w]|$)", template_code) is not None + action = re.search(r"(^|[^\w])" + param_name + r"([^\w]|$)", action_code) is not None + label = re.search(r"[^\w]" + param_name + r"([^\w]|$)", label_code) is not None + if template or action or label: + if template + action + label == 1: + only = "only " + else: + only = "" + # TODO check that template is used?? + if template: + lint_ctx.warn( + f"Param input [{param_name}] {only}used in a template macro, use a macro token instead.", node=param + ) + if action: + lint_ctx.warn( + f"Param input [{param_name}] {only}found in output actions, better use extended metadata.", + node=param, + ) + if label: + lint_ctx.warn( + f"Param input [{param_name}] {only}found in a label attribute, this is discouraged.", node=param + ) + continue + + if change_format: + lint_ctx.error(f"Param input [{param_name}] is only used in a change_format tag", node=param) + else: + lint_ctx.error(f"Param input [{param_name}] not found in command or configfiles.", node=param) + + def lint_repeats(tool_xml, lint_ctx): """Lint repeat blocks in tool inputs.""" repeats = tool_xml.findall("./inputs//repeat") diff --git a/lib/galaxy/tool_util/parser/util.py b/lib/galaxy/tool_util/parser/util.py index 9ecec25559f6..159eed4e5c74 100644 --- a/lib/galaxy/tool_util/parser/util.py +++ b/lib/galaxy/tool_util/parser/util.py @@ -8,6 +8,12 @@ def is_dict(item): return isinstance(item, dict) or isinstance(item, OrderedDict) +def _prepare_argument(argument): + if argument is None: + return "" + return argument.lstrip("-").replace("-", "_") + + def _parse_name(name, argument): """Determine name of an input source from name and argument returns the name or if absent the argument property @@ -17,5 +23,5 @@ def _parse_name(name, argument): if name is None: if argument is None: raise ValueError("parameter must specify a 'name' or 'argument'.") - name = argument.lstrip("-").replace("-", "_") + name = _prepare_argument(argument) return name diff --git a/packages/tool_util/setup.cfg b/packages/tool_util/setup.cfg index 7f2db40b020b..b75155dfa579 100644 --- a/packages/tool_util/setup.cfg +++ b/packages/tool_util/setup.cfg @@ -32,6 +32,7 @@ version = 22.5.0.dev0 include_package_data = True install_requires = galaxy-util>=22.1 + Cheetah3 lxml MarkupSafe packaging diff --git a/test/unit/tool_util/test_tool_linters.py b/test/unit/tool_util/test_tool_linters.py index 34617ed048ea..6c38cff68ffb 100644 --- a/test/unit/tool_util/test_tool_linters.py +++ b/test/unit/tool_util/test_tool_linters.py @@ -278,7 +278,7 @@ """ -SELECT_DUPLICATED_OPTIONS_WITH_DIFF_SELECTED = """ +INPUTS_SELECT_DUPLICATED_OPTIONS_WITH_DIFF_SELECTED = """ @@ -449,6 +449,201 @@ """ +# tests for many different cheetah placeholder syntax possibilities +# that should pass and two that should give an error +INPUTS_USED_PARAMETER_IN_COMMAND = """ + + + $simple1 + ${ simple2 } + ## Variable in repeat + #for i, r in enumerate($repeat_name) + #set x = $str(r[ 'var_in_repeat' ]) + ${r.var_in_repeat2} + #end for + "${ ",".join( [ str( r.var_in_repeat3 ) for r in $repeat_name ] ) }" + $sec.paired_collection1.forward.ext + $sec["paired_collection3"].forward.ext + $sec.sec2.paired_collection2['reverse'].ext + + $getVar("getvar") + $getVar("getvar_default", "default") + $hasVar("hasvar") + $varExists("varexists") + + ## check that comments are considered + ## $hey_a_missing_parameter + also comments at the end of lines are ignored ## $hey_a_missing_parameter + #* + $hey_a_missing_parameter + *# + #* $hey_a_missing_parameter *# + $same_param_in_both_whens + + + $param_in_config_file + ]]> + + + + + + + + + +
+ + +
+ +
+
+ + + + + + + + + + + + + + + + + + + + + +
+ + + data_param_filter + + + + + + + + + + + + collection_param_filter + + + + + + + +
+""" + +INPUTS_USED_PARAMETER_IN_COMMAND_WITH_INPUTS = """ + + + do something with $inputs ## but note, the linter does not check if inputs is really used + + + + + + + + + +""" + +INPUTS_USED_OTHER = """ + + + + + + #include source=$template_token# + + + $param_in_config_file + ]]> + + + + + + + + + + + + + data_param_filter + + + + + + + + + + + + +""" + +INPUTS_USED_PARAMETER_IN_COMMAND_WITH_INPUTS2 = """ + + + do nothing with the configfile + + + + + + + + +""" + +# test case checking if syntax errors are reported +INPUTS_USED_PARAMETER_SYNTAX_ERROR = """ + + + #if $param + this is unfinished if + + + + +""" + +# tool xml for repeats linter +REPEATS = """ + + + + + + + +""" + # test tool xml for outputs linter OUTPUTS_MISSING = """ @@ -469,8 +664,8 @@ OUTPUTS_UNNAMED_INVALID_NAME = """ - - + + """ @@ -533,17 +728,6 @@ """ -# tool xml for repeats linter -REPEATS = """ - - - - - - - -""" - # tool xml for stdio linter STDIO_DEFAULT_FOR_DEFAULT_PROFILE = """ @@ -587,15 +771,6 @@ TESTS_ABSENT_DATA_SOURCE = """ """ -TESTS_WO_EXPECTATIONS = """ - - - - - - -""" - TESTS_PARAM_OUTPUT_NAMES = """ @@ -641,6 +816,27 @@ """ +TESTS_WO_EXPECTATIONS = """ + + + + + + +""" + +TESTS_VALID = """ + + + + + + + + + + +""" ASSERTS = """ @@ -678,18 +874,6 @@ """ -TESTS_VALID = """ - - - - - - - - - - -""" TESTS_OUTPUT_TYPE_MISMATCH = """ @@ -1137,7 +1321,7 @@ def test_inputs_duplicated_options(lint_ctx): def test_inputs_duplicated_options_with_different_select(lint_ctx): - tool_source = get_xml_tool_source(SELECT_DUPLICATED_OPTIONS_WITH_DIFF_SELECTED) + tool_source = get_xml_tool_source(INPUTS_SELECT_DUPLICATED_OPTIONS_WITH_DIFF_SELECTED) run_lint(lint_ctx, inputs.lint_inputs, tool_source) assert not lint_ctx.warn_messages assert not lint_ctx.error_messages @@ -1289,6 +1473,95 @@ def test_inputs_duplicate_names(lint_ctx): assert len(lint_ctx.error_messages) == 2 +def test_inputs_used_parameter_in_command(lint_ctx): + tool_source = get_xml_tool_source(INPUTS_USED_PARAMETER_IN_COMMAND) + run_lint(lint_ctx, inputs.lint_inputs_used, tool_source) + assert not lint_ctx.info_messages + assert not lint_ctx.valid_messages + assert not lint_ctx.warn_messages + assert "Param input [hey_a_missing_parameter] not found in command or configfiles." in lint_ctx.error_messages + assert "Param input [change_format_parameter] is only used in a change_format tag" in lint_ctx.error_messages + assert len(lint_ctx.error_messages) == 2 + + +def test_inputs_used_other(lint_ctx): + tool_source = get_xml_tool_source(INPUTS_USED_OTHER) + run_lint(lint_ctx, inputs.lint_inputs_used, tool_source) + assert not lint_ctx.info_messages + assert not lint_ctx.valid_messages + assert ( + "Param input [variable_in_template_token] only used in a template macro, use a macro token instead." + in lint_ctx.warn_messages + ) + assert ( + "Param input [action_option_reference] only found in output actions, better use extended metadata." + in lint_ctx.warn_messages + ) + assert ( + "Param input [action_filter_reference] only found in output actions, better use extended metadata." + in lint_ctx.warn_messages + ) + assert ( + "Param input [action_conditional_reference] only found in output actions, better use extended metadata." + in lint_ctx.warn_messages + ) + assert ( + "Param input [param_that_is_used_only_in_output_label] only found in a label attribute, this is discouraged." + in lint_ctx.warn_messages + ) + assert len(lint_ctx.warn_messages) == 5 + assert len(lint_ctx.error_messages) == 0 + + +def test_inputs_used_parameter_in_command_with_inputs(lint_ctx): + tool_source = get_xml_tool_source(INPUTS_USED_PARAMETER_IN_COMMAND_WITH_INPUTS) + run_lint(lint_ctx, inputs.lint_inputs_used, tool_source) + assert not lint_ctx.info_messages + assert not lint_ctx.valid_messages + assert not lint_ctx.warn_messages + assert ( + "Param input [param_that_is_not_in_inputs_configfile] not found in command or configfiles. Does the present inputs config miss the 'data_style' attribute?" + in lint_ctx.error_messages + ) + assert len(lint_ctx.error_messages) == 1 + + +def test_inputs_used_parameter_in_command_with_inputs2(lint_ctx): + tool_source = get_xml_tool_source(INPUTS_USED_PARAMETER_IN_COMMAND_WITH_INPUTS2) + run_lint(lint_ctx, inputs.lint_inputs_used, tool_source) + assert not lint_ctx.info_messages + assert not lint_ctx.valid_messages + assert not lint_ctx.warn_messages + assert ( + "Param input [param_that_is_not_in_inputs_configfile] only used inputs configfile inputs, but this is not used in the command" + in lint_ctx.error_messages + ) + assert len(lint_ctx.error_messages) == 1 + + +def test_inputs_used_parameter_syntax_error(lint_ctx): + tool_source = get_xml_tool_source(INPUTS_USED_PARAMETER_SYNTAX_ERROR) + run_lint(lint_ctx, inputs.lint_inputs_used, tool_source) + assert not lint_ctx.info_messages + assert not lint_ctx.valid_messages + assert not lint_ctx.warn_messages + assert ( + """Invalid cheetah found + +Some #directives are missing their corresponding #end ___ tag: if +Line 4, column 33 + +Line|Cheetah Code +----|------------------------------------------------------------- +2 | +3 | #if $param +4 | this is unfinished if + ^ +""" + in lint_ctx.error_messages + ) + + def test_inputs_repeats(lint_ctx): tool_source = get_xml_tool_source(REPEATS) run_lint(lint_ctx, inputs.lint_repeats, tool_source) @@ -1341,10 +1614,11 @@ def test_outputs_unnamed_invalid_name(lint_ctx): assert "Tool output name [2output] is not a valid Cheetah placeholder." in lint_ctx.warn_messages assert "Collection output with undefined 'type' found." in lint_ctx.warn_messages assert "Tool collection output 2output doesn't define an output format." in lint_ctx.warn_messages + assert "Tool output [2output] uses duplicated label '${tool.name} on ${on_string}'" in lint_ctx.error_messages assert len(lint_ctx.info_messages) == 1 assert not lint_ctx.valid_messages assert len(lint_ctx.warn_messages) == 5 - assert not lint_ctx.error_messages + assert len(lint_ctx.error_messages) == 1 def test_outputs_format_input(lint_ctx):