diff --git a/lib/galaxy/tool_util/linters/_util.py b/lib/galaxy/tool_util/linters/_util.py index 165a61e8ec33..09988e80f1ff 100644 --- a/lib/galaxy/tool_util/linters/_util.py +++ b/lib/galaxy/tool_util/linters/_util.py @@ -1,45 +1,55 @@ import re +from Cheetah.Compiler import Compiler +from Cheetah.Template import Template + +from galaxy.util import unicodify + def get_code(tool_xml): """get code used in the Galaxy tool""" - - # get code from - # - command, - # - configfiles, and - # - template macros - # note: not necessary (& possible) for macro tokens which are expanded - # during loading the xml (and removed from the macros tag) + # get code from command and configfiles code = "" - for tag in [".//command", ".//configfile", './/macros/macro[@type="template"]']: - print(f"tag {tag}") + for tag in [".//command", ".//configfile"]: for cn in tool_xml.findall(tag): - print(f"node {cn}") code += cn.text + # TODO not sure about this line, I get UnicodeError for some tools otherwise (trinity) + code = "#encoding utf-8\n" + code + code = Template.compile(source=code, compilerClass=Compiler, returnAClass=False) + code = unicodify(code) + # print(code) - # remove comments, - # TODO this is not optimal, but strings containing "##"" complicate this quite a bit - # TODO also this is not complete, e.g. multiline comments are missing - code = "\n".join([_ for _ in code.splitlines() if not _.lstrip().startswith("##")]) - - # get code from output filters + # template macros + # note: not necessary (& possible) for macro tokens which are expanded + # during loading the xml (and removed from the macros tag) + templatecode = "" + for cn in tool_xml.findall('.//macros/macro[@type="template"]'): + templatecode += cn.text + templatecode = "#encoding utf-8\n" + templatecode + templatecode = Template.compile(source=templatecode, compilerClass=Compiler, returnAClass=False) + templatecode = unicodify(templatecode) + + # get code from output filters (which use variables wo $) filtercode = "" for cn in tool_xml.findall("./outputs/*/filter"): filtercode += cn.text + "\n" - # get output labels which might contain code + # get output labels which might contain code (which use variables like ${var}) labelcode = "" for cn in tool_xml.findall("./outputs/*[@label]"): - labelcode + cn.attrib["label"] + "\n" - - # TODO not optimal to mix filter code and the following, since they use cheetah synax, i.e. $param - for cn in tool_xml.findall("./outputs/*/actions/action[@default]"): - labelcode += cn.attrib["default"] + "\n" - - for cn in tool_xml.findall("./outputs/*/actions/conditional[@name]"): - labelcode += cn.attrib["name"] + "\n" - - return code, filtercode, labelcode + labelcode += cn.attrib["label"] + "\n" + + actioncode = "" + for cn in tool_xml.findall('./outputs//conditional[@name]'): + actioncode += cn.attrib["name"] + "\n" + for cn in tool_xml.findall('./outputs//action/option[@type="from_param"]'): + actioncode += cn.attrib.get("name", "") + "\n" + for cn in tool_xml.findall("./outputs//action/option/filter[@ref]"): + actioncode += cn.attrib["ref"] + "\n" + for cn in tool_xml.findall("./outputs//action[@default]"): + actioncode += cn.attrib["default"] + "\n" + + return code, templatecode, filtercode, labelcode, actioncode def is_datasource(tool_xml): diff --git a/lib/galaxy/tool_util/linters/inputs.py b/lib/galaxy/tool_util/linters/inputs.py index aa35273668bf..5a27b8d333bc 100644 --- a/lib/galaxy/tool_util/linters/inputs.py +++ b/lib/galaxy/tool_util/linters/inputs.py @@ -8,7 +8,7 @@ is_datasource, is_valid_cheetah_placeholder ) -from ..parser.util import _parse_name +from ..parser.util import _parse_name, _prepare_argument log = logging.getLogger(__name__) FILTER_TYPES = [ @@ -52,6 +52,77 @@ } +def _param_in_compiled_cheetah(param, param_name, code): + # # accession $PATH.param_name.ATTRIBUTES in cheetah gives + # # VFFSL(SL,"PATH.param_name.ATTRIBUTES",True) + # # for PATH and ATTRIBUTES we assume simply ([\w.]+)? + # # since if wrong it will be discovered in a test + # if re.search(r'VFFSL\(SL,[\"\']([\w.]+\.)?' + param_name + r'(\.[\w.]+)?[\"\'],True\)', code): + # return True + + # # print("checking path") + # # accessing $PATH.param_name['ATTRIBUTE'] (ATTRIBUTE eg reverse) + # # or $PATH.param_name['ATTRIBUTE'].FURTHER_ATTRIBUTE gives + # # VFN(VFN(VFFSL(SL,"PATH",True),"param_name",True)['ATTRIBUTE'],"FURTHER_ATTRIBUTE",True) + # # we simply search VFN(VFFSL(SL,"PATH",True),"param_name",True) + # # ie ignore the ATTRIBUTES part and for PATH we assume again ([\w.]+)? + # if re.search(r'VFN\(VFFSL\(SL,[\"\'][\w.]+[\"\'],True\),[\"\']' + param_name + r'[\"\'],True\)', code): + # return True + + # # all these are covered by the rawExpr regular expression below + # # - $getVar("param_name") + # # gets + # # _v = VFFSL(SL,"getVar",False)("param_name") + # # if _v is not None: write(_filter(_v, rawExpr='$getVar("param_name")')) + # # - $getVar("getvar_default", "default") + # # gets + # # _v = VFFSL(SL,"getVar",False)("getvar_default", "default") + # # if _v is not None: write(_filter(_v, rawExpr='$getVar("getvar_default", "default")')) + # if re.search(r'VFFSL\(SL,[\"\']getVar[\"\'],False\)\([\"\']([\w.]+\.)?' + param_name + r'(\.[\w.]+)?[\"\'](, [^()]+)?\)', code): + # return True + + # # - $varExists("varexists") + # # gets + # # _v = VFFSL(SL,"varExists",False)("varexists") + # # if _v is not None: write(_filter(_v, rawExpr='$varExists("varexists")')) + # # - $hasVar("hasvar") + # # gets + # # _v = VFFSL(SL,"hasVar",False)("hasvar") + # # if _v is not None: write(_filter(_v, rawExpr='$hasVar("hasvar")')) + # if re.search(r'VFFSL\(SL,[\"\'](varExists|hasvar)[\"\'],False\)\([\"\']([\w.]+\.)?' + param_name + r'(\.[\w.]+)?[\'\"]\)', code): + # return True + + # # Also the following is possible (TODO but we could forbid it) + # # $PATH["param_name"].ATTRIBUTES + # # VFFSL(SL,"PATH",True)["param_name"].ATTRIBUTES + # if re.search(r'VFFSL\(SL,[\"\'][\w.]+[\"\'],True\)\[[\"\']' + param_name + r'[\"\']\]', code): + # return True + # gets + # set $rg_id = str($rg_param('read_group_id_conditional').ID) + # rg_id = str(VFN(VFFSL(SL,"rg_param",False)('read_group_id_conditional'),"ID",True)) + if re.search(r'(VFN|VFFSL)\(.*[\"\']([\w.]+\.)?' + param_name + r'(\.[\w.]+)?[\"\']', code): + return True + + # #for i, r in enumerate($repeat_name) + # #set x = $str(r[ 'param_name' ]) + # #end for + # the loop content gets + # x = VFFSL(SL,"str",False)(r[ 'var_in_repeat' ]) + if re.search(r'(VFFSL|VFN)\(.*\[\s*[\"\']' + param_name + r'[\"\']\s*\]', code): + # print(f" G") + return True + + # "${ ",".join( [ str( r.var_in_repeat3 ) for r in $repeat_name ] ) }" + # gets + # _v = ",".join( [ str( r.var_in_repeat3 ) for r in VFFSL(SL,"repeat_name",True) ] ) + # if _v is not None: write(_filter(_v, rawExpr='${ ",".join( [ str( r.var_in_repeat3 ) for r in $repeat_name ] ) }')) + if re.search(r"rawExpr=([\"\'])(.*[^\w])?" + param_name + r"([^\w].*)?(\1)", code): + return True + + # print("FALSE") + return False + + def lint_inputs(tool_xml, lint_ctx): """Lint parameters in a tool's inputs block.""" # determine line to report for general problems with outputs @@ -63,7 +134,6 @@ def lint_inputs(tool_xml, lint_ctx): tool_path = None datasource = is_datasource(tool_xml) inputs = tool_xml.findall("./inputs//param") - code, filter_code, label_code = get_code(tool_xml) num_inputs = 0 for param in inputs: num_inputs += 1 @@ -73,7 +143,7 @@ def lint_inputs(tool_xml, lint_ctx): continue param_name = _parse_name(param_attrib.get("name"), param_attrib.get("argument")) if "name" in param_attrib and "argument" in param_attrib: - if param_attrib.get("name") == _parse_name(None, param_attrib.get("argument")): + if param_attrib.get("name") == _prepare_argument(param_attrib.get("argument")): lint_ctx.warn(f"Param input [{param_name}] 'name' attribute is redundant if argument implies the same name.") if param_name.strip() == "": lint_ctx.error("Param input with empty name.", line=param.sourceline, xpath=tool_xml.getpath(param)) @@ -92,28 +162,6 @@ def lint_inputs(tool_xml, lint_ctx): # TODO lint for valid param type - attribute combinations # TODO lint required attributes for valid each param type - # check if the parameter is used somewhere - conf_inputs = tool_xml.find("./configfiles/inputs") - if re.search(r"[^\w_]" + param_name + r"([^\w_]|$)", code) is not None: - pass - elif param_name in filter_code: - pass - elif re.search(r"[^\w_]" + param_name + r"[^\w_]", label_code): - lint_ctx.info(f"Param input [{param_name}] only found in an attribute.") - elif len(tool_xml.findall(f"./outputs//change_format/when[@input='{param_name}']")) > 0: - pass - elif conf_inputs is not None: # in this it's really hard to know - if param_type in ["data", "collection"] and not conf_inputs.attrib.get("data_style"): - lint_ctx.error(f"Param input [{param_name}] not found in command or configfiles. Does the present inputs config miss the 'data_style' attribute?") - else: - if conf_inputs.attrib.get('name', None): - lint_ctx.info(f"Param input [{param_name}] may be unused. Double check that is used in the inputs configfile [{conf_inputs.attrib['name']}] and that this file is used properly.") - elif conf_inputs.attrib.get('filename', None): - lint_ctx.info(f"Param input [{param_name}] may be unused. Double check that is used in the inputs configfile [{conf_inputs.attrib['filename']}] and that this file is used properly.") - elif param.getparent().tag == "conditional": - lint_ctx.info(f"Param input [{param_name}] only used in the select of a conditional, which should be OK.") - else: - lint_ctx.error(f"Param input [{param_name}] not found in command or configfiles.") if param_type == "data": if "format" not in param_attrib: @@ -307,6 +355,77 @@ def lint_inputs(tool_xml, lint_ctx): lint_ctx.warn("Found no input parameters.", line=tool_line, xpath=tool_path) +def lint_inputs_used(tool_xml, lint_ctx): + """ + check if the parameter is used somewhere, the following cases are considered: + - in the command or a configfile + - in an output filter + - if it is the select of conditional + - if there is an inputs configfile that is used + - for data parameters data_style needs to be set for the config file + - for other parameters the name of the config file must be used in the code + + a warning is shown if the parameter is used only in + - template macro, + - output action, or + - label of an output + + otherwise + - if the parameter only appears in change_format + - or if none of the previous cases apply + """ + code, template_code, filter_code, label_code, action_code = get_code(tool_xml) + inputs = tool_xml.findall("./inputs//param") + for param in inputs: + try: + param_name = _parse_name(param.attrib.get("name"), param.attrib.get("argument")) + except ValueError: + continue + if _param_in_compiled_cheetah(param, param_name, code): + continue + if param_name in filter_code: + continue + if tool_xml.find("./inputs//param/options[@from_dataset]") is not None or tool_xml.find("./inputs//param/options/filter[@ref]") is not None: + continue + if param.getparent().tag == "conditional": + continue + + conf_inputs = tool_xml.find("./configfiles/inputs") + if conf_inputs is not None: # in this it's really hard to know + param_type = param.attrib.get("type") + if param_type in ["data", "collection"]: + if not conf_inputs.attrib.get("data_style"): + lint_ctx.error(f"Param input [{param_name}] not found in command or configfiles. Does the present inputs config miss the 'data_style' attribute?", line=param.sourceline, xpath=tool_xml.getpath(param)) + inputs_conf_name = conf_inputs.attrib.get('name', conf_inputs.attrib.get('filename', None)) + if inputs_conf_name: + if not re.search(r"(^|[^\w])" + inputs_conf_name + r"([^\w]|$)", code): + lint_ctx.error(f"Param input [{param_name}] only used inputs configfile {inputs_conf_name}, but this is not used in the command", line=param.sourceline, xpath=tool_xml.getpath(param)) + continue + + change_format = tool_xml.find(f"./outputs//change_format/when[@input='{param_name}']") is not None + template = re.search(r"(^|[^\w])" + param_name + r"([^\w]|$)", template_code) is not None + action = re.search(r"(^|[^\w])" + param_name + r"([^\w]|$)", action_code) is not None + label = re.search(r"[^\w]" + param_name + r"([^\w]|$)", label_code) is not None + if template or action or label: + if template + action + label == 1: + only = "only " + else: + only = "" + # TODO check that template is used?? + if template: + lint_ctx.warn(f"Param input [{param_name}] {only}used in a template macro, use a macro token instead.", line=param.sourceline, xpath=tool_xml.getpath(param)) + if action: + lint_ctx.warn(f"Param input [{param_name}] {only}found in output actions, better use extended metadata.", line=param.sourceline, xpath=tool_xml.getpath(param)) + if label: + lint_ctx.warn(f"Param input [{param_name}] {only}found in a label attribute, this is discouraged.", line=param.sourceline, xpath=tool_xml.getpath(param)) + continue + + if change_format: + lint_ctx.error(f"Param input [{param_name}] is only used in a change_format tag", line=param.sourceline, xpath=tool_xml.getpath(param)) + else: + lint_ctx.error(f"Param input [{param_name}] not found in command or configfiles.", line=param.sourceline, xpath=tool_xml.getpath(param)) + + def lint_repeats(tool_xml, lint_ctx): """Lint repeat blocks in tool inputs.""" repeats = tool_xml.findall("./inputs//repeat") diff --git a/lib/galaxy/tool_util/parser/util.py b/lib/galaxy/tool_util/parser/util.py index 74676e4bfba4..61ff94fe8e3c 100644 --- a/lib/galaxy/tool_util/parser/util.py +++ b/lib/galaxy/tool_util/parser/util.py @@ -8,6 +8,12 @@ def is_dict(item): return isinstance(item, dict) or isinstance(item, OrderedDict) +def _prepare_argument(argument): + if argument is None: + return "" + return argument.lstrip('-').replace("-", "_") + + def _parse_name(name, argument): """Determine name of an input source from name and argument returns the name or if absent the argument property @@ -15,7 +21,7 @@ def _parse_name(name, argument): all remaining dashes are replaced by underscores. """ if name is None: - if not argument: + if argument is None: raise ValueError("parameter must specify a 'name' or 'argument'.") - name = argument.lstrip('-').replace("-", "_") + name = _prepare_argument(argument) return name diff --git a/test/unit/tool_util/test_tool_linters.py b/test/unit/tool_util/test_tool_linters.py index d487998bccf0..f02ee3cd3c69 100644 --- a/test/unit/tool_util/test_tool_linters.py +++ b/test/unit/tool_util/test_tool_linters.py @@ -149,7 +149,6 @@ INPUTS_VALID = """ - $txt_param $int_param @@ -159,7 +158,6 @@ INPUTS_PARAM_NAME = """ - $2 $valid $param_name @@ -172,7 +170,6 @@ INPUTS_PARAM_TYPE = """ - $valid_name $another_valid_name @@ -182,7 +179,6 @@ INPUTS_DATA_PARAM = """ - $valid_name @@ -191,7 +187,6 @@ INPUTS_CONDITIONAL = """ - $select $select3 $select4 $bool $text $optionalselect $multipleselect $select3 $label_select $missing_option @@ -247,9 +242,6 @@ INPUTS_SELECT_INCOMPATIBLE_DISPLAY = """ - - $radio_select $checkboxes_select $checkboxes_select_correct - @@ -270,9 +262,6 @@ INPUTS_SELECT_DUPLICATED_OPTIONS = """ - - $select - @@ -282,9 +271,8 @@ """ -SELECT_DUPLICATED_OPTIONS_WITH_DIFF_SELECTED = """ +INPUTS_SELECT_DUPLICATED_OPTIONS_WITH_DIFF_SELECTED = """ - $select @@ -296,7 +284,6 @@ INPUTS_SELECT_DEPRECATIONS = """ - $select_do$select_ff$select_fp @@ -311,9 +298,6 @@ INPUTS_SELECT_OPTION_DEFINITIONS = """ - - $select_noopt $select_noopts $select_fd_op $select_fd_fdt $select_noval_notext $select_meta_file_key_incomp - @@ -340,7 +324,6 @@ INPUTS_SELECT_FILTER = """ - $select_filter_types @@ -354,7 +337,6 @@ INPUTS_VALIDATOR_INCOMPATIBILITIES = """ - $param_name$another_param_name TEXT @@ -371,9 +353,6 @@ INPUTS_VALIDATOR_CORRECT = """ - - $data_param $collection_param $text_param $select_param $int_param - @@ -415,21 +394,35 @@ """ -INPUTS_PARAMETER_IN_COMMAND = """ +# tests for many different cheetah placeholder syntax possibilities +# that should pass and two that should give an error +INPUTS_USED_PARAMETER_IN_COMMAND = """ - - - - #include source=$template_token# $simple1 ${ simple2 } - $sec.paired_collection1.forward - $sec.paired_collection2['reverse'] - $getVar("hard") - ## skipping $cond_select, note that this comment is ignored as well + ## Variable in repeat + #for i, r in enumerate($repeat_name) + #set x = $str(r[ 'var_in_repeat' ]) + ${r.var_in_repeat2} + #end for + "${ ",".join( [ str( r.var_in_repeat3 ) for r in $repeat_name ] ) }" + $sec.paired_collection1.forward.ext + $sec["paired_collection3"].forward.ext + $sec.sec2.paired_collection2['reverse'].ext + + $getVar("getvar") + $getVar("getvar_default", "default") + $hasVar("hasvar") + $varExists("varexists") + + ## check that comments are considered + ## $hey_a_missing_parameter + also comments at the end of lines are ignored ## $hey_a_missing_parameter + #* + $hey_a_missing_parameter + *# + #* $hey_a_missing_parameter *# $same_param_in_both_whens @@ -438,19 +431,27 @@ ]]> + + - - + + + -
+
- + +
+ +
- + + + + - @@ -463,12 +464,23 @@ + - - + data_param_filter + + + + + + + + + collection_param_filter @@ -482,7 +494,51 @@ """ -INPUTS_PARAMETER_IN_COMMAND_WITH_INPUTS = """ +INPUTS_USED_OTHER = """ + + + + + + #include source=$template_token# + + + $param_in_config_file + ]]> + + + + + + + + + + + + + data_param_filter + + + + + + + + + + + + + +""" + +INPUTS_USED_PARAMETER_IN_COMMAND_WITH_INPUTS = """ do something with $inputs ## but note, the linter does not check if inputs is really used @@ -496,6 +552,21 @@ """ + +INPUTS_USED_PARAMETER_IN_COMMAND_WITH_INPUTS2 = """ + + + do nothing with the configfile + + + + + + + + +""" + # test tool xml for outputs linter OUTPUTS_MISSING = """ @@ -870,7 +941,7 @@ and len(x.info_messages) == 1 and len(x.valid_messages) == 0 and len(x.warn_messages) == 0 and len(x.error_messages) == 2 ), ( - SELECT_DUPLICATED_OPTIONS_WITH_DIFF_SELECTED, inputs.lint_inputs, + INPUTS_SELECT_DUPLICATED_OPTIONS_WITH_DIFF_SELECTED, inputs.lint_inputs, lambda x: len(x.warn_messages) == 0 and len(x.error_messages) == 0 ), @@ -927,18 +998,33 @@ and len(x.info_messages) == 1 and len(x.valid_messages) == 0 and len(x.warn_messages) == 0 and len(x.error_messages) == 0 ), ( - INPUTS_PARAMETER_IN_COMMAND, inputs.lint_inputs, + INPUTS_USED_PARAMETER_IN_COMMAND, inputs.lint_inputs_used, + lambda x: + "Param input [hey_a_missing_parameter] not found in command or configfiles." in x.error_messages + and 'Param input [change_format_parameter] is only used in a change_format tag' in x.error_messages + and len(x.info_messages) == 0 and len(x.valid_messages) == 0 and len(x.warn_messages) == 0 and len(x.error_messages) == 2 + ), + ( + INPUTS_USED_OTHER, inputs.lint_inputs_used, + lambda x: + 'Param input [variable_in_template_token] only used in a template macro, use a macro token instead.' in x.warn_messages + and 'Param input [action_option_reference] only found in output actions.' in x.warn_messages + and 'Param input [action_filter_reference] only found in output actions.' in x.warn_messages + and 'Param input [action_conditional_reference] only found in output actions.' in x.warn_messages + and "Param input [param_that_is_used_only_in_output_label] only found in a label attribute." in x.warn_messages + and len(x.info_messages) == 0 and len(x.valid_messages) == 0 and len(x.warn_messages) == 5 and len(x.error_messages) == 0 + ), + ( + INPUTS_USED_PARAMETER_IN_COMMAND_WITH_INPUTS, inputs.lint_inputs_used, lambda x: - "Param input [cond_select] only used in the select of a conditional, which should be OK." in x.info_messages - and "Param input [hey_a_missing_parameter] not found in command or configfiles." in x.error_messages - and len(x.info_messages) == 2 and len(x.warn_messages) == 0 and len(x.error_messages) == 1 + "Param input [param_that_is_not_in_inputs_configfile] not found in command or configfiles. Does the present inputs config miss the 'data_style' attribute?" in x.error_messages + and len(x.info_messages) == 0 and len(x.valid_messages) == 0 and len(x.warn_messages) == 0 and len(x.error_messages) == 1 ), ( - INPUTS_PARAMETER_IN_COMMAND_WITH_INPUTS, inputs.lint_inputs, + INPUTS_USED_PARAMETER_IN_COMMAND_WITH_INPUTS2, inputs.lint_inputs_used, lambda x: - "Param input [param_that_may_be_in_inputs_configfile] may be unused. Double check that is used in the inputs configfile [inputs] and that this file is used properly." in x.info_messages - and "Param input [param_that_is_not_in_inputs_configfile] not found in command or configfiles. Does the present inputs config miss the 'data_style' attribute?" in x.error_messages - and len(x.info_messages) == 2 and len(x.warn_messages) == 0 and len(x.error_messages) == 1 + 'Param input [param_that_is_not_in_inputs_configfile] only used inputs configfile inputs, but this is not used in the command' in x.error_messages + and len(x.info_messages) == 0 and len(x.valid_messages) == 0 and len(x.warn_messages) == 0 and len(x.error_messages) == 1 ), ( OUTPUTS_MISSING, outputs.lint_output, @@ -1106,8 +1192,10 @@ 'inputs: select filter', 'inputs: validator incompatibilities', 'inputs: validator all correct', - 'inputs: parameter in command', - 'inputs: parameter in command with inputs configfile', + 'inputs used: parameter in command', + 'inputs used: parameter in command with inputs configfile', + 'inputs used: parameter in command unused inputs configfile', + 'inputs used: other warnings', 'outputs: missing outputs tag', 'outputs: multiple outputs tags', 'outputs: unknow tag in outputs',