Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Lint for unused input parameters #12724

Open
wants to merge 11 commits into
base: dev
Choose a base branch
from
51 changes: 51 additions & 0 deletions lib/galaxy/tool_util/linters/_util.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,56 @@
import re

from Cheetah.Compiler import Compiler
from Cheetah.Template import Template
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

https://github.com/galaxyproject/galaxy/blob/dev/packages/tool_util/setup.cfg#L34 this needs to be updated to require Cheetah3 or galaxy-util[template].

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the hint @jmchilton added Cheetah3


from galaxy.util import unicodify


def get_code(tool_xml):
"""get code used in the Galaxy tool"""
# get code from command and configfiles
code = ""
for tag in [".//command", ".//configfile"]:
for cn in tool_xml.findall(tag):
code += cn.text
# TODO not sure about this line, I get UnicodeError for some tools otherwise (trinity)
code = "#encoding utf-8\n" + code
code = Template.compile(source=code, compilerClass=Compiler, returnAClass=False)
code = unicodify(code)
# print(code)

# template macros
# note: not necessary (& possible) for macro tokens which are expanded
# during loading the xml (and removed from the macros tag)
templatecode = ""
for cn in tool_xml.findall('.//macros/macro[@type="template"]'):
templatecode += cn.text
templatecode = "#encoding utf-8\n" + templatecode
templatecode = Template.compile(source=templatecode, compilerClass=Compiler, returnAClass=False)
templatecode = unicodify(templatecode)

# get code from output filters (which use variables wo $)
filtercode = ""
for cn in tool_xml.findall("./outputs/*/filter"):
filtercode += cn.text + "\n"

# get output labels which might contain code (which use variables like ${var})
labelcode = ""
for cn in tool_xml.findall("./outputs/*[@label]"):
labelcode += cn.attrib["label"] + "\n"

actioncode = ""
for cn in tool_xml.findall("./outputs//conditional[@name]"):
actioncode += cn.attrib["name"] + "\n"
for cn in tool_xml.findall('./outputs//action/option[@type="from_param"]'):
actioncode += cn.attrib.get("name", "") + "\n"
for cn in tool_xml.findall("./outputs//action/option/filter[@ref]"):
actioncode += cn.attrib["ref"] + "\n"
for cn in tool_xml.findall("./outputs//action[@default]"):
actioncode += cn.attrib["default"] + "\n"

return code, templatecode, filtercode, labelcode, actioncode


def is_datasource(tool_xml):
"""Returns true if the tool is a datasource tool"""
Expand Down
22 changes: 22 additions & 0 deletions lib/galaxy/tool_util/linters/configfiles.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
"""This module contains a linting function for a tool's configfiles section.
"""


def lint_configfiles(tool_xml, lint_ctx):
""""""
root = tool_xml.getroot()
configfiles = root.findall("configfiles")
if len(configfiles) > 1:
lint_ctx.error("More than one configfiles tag found, behavior undefined.")
return
elif len(configfiles) == 0:
return
configfiles = configfiles[0]

configfile = configfiles.findall("configfile|inputs")
for cf in configfile:
if not ("name" in cf.attrib or "filename" in cf.attrib):
lint_ctx.error("Configfile needs to define name or filename.")
if cf.tag == "inputs":
if "data_style" in cf.attribs and cf.attribs["data_style"] in ["paths", "staging_path_and_source_path"]:
lint_ctx.error(f"Unknown data_style {cf.attribs['data_style']}for inputs configfile.")
179 changes: 175 additions & 4 deletions lib/galaxy/tool_util/linters/inputs.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,18 @@
"""This module contains a linting functions for tool inputs."""
import re

from galaxy.util import string_as_bool
from ._util import (
from Cheetah.Parser import ParseError

from galaxy.tool_util.linters._util import (
get_code,
is_datasource,
is_valid_cheetah_placeholder,
)
from ..parser.util import _parse_name
from galaxy.tool_util.parser.util import (
_parse_name,
_prepare_argument,
)
from galaxy.util import string_as_bool

FILTER_TYPES = [
"data_meta",
Expand Down Expand Up @@ -115,6 +121,77 @@
]


def _param_in_compiled_cheetah(param, param_name, code):
# # accession $PATH.param_name.ATTRIBUTES in cheetah gives
# # VFFSL(SL,"PATH.param_name.ATTRIBUTES",True)
# # for PATH and ATTRIBUTES we assume simply ([\w.]+)?
# # since if wrong it will be discovered in a test
# if re.search(r'VFFSL\(SL,[\"\']([\w.]+\.)?' + param_name + r'(\.[\w.]+)?[\"\'],True\)', code):
# return True

# # print("checking path")
# # accessing $PATH.param_name['ATTRIBUTE'] (ATTRIBUTE eg reverse)
# # or $PATH.param_name['ATTRIBUTE'].FURTHER_ATTRIBUTE gives
# # VFN(VFN(VFFSL(SL,"PATH",True),"param_name",True)['ATTRIBUTE'],"FURTHER_ATTRIBUTE",True)
# # we simply search VFN(VFFSL(SL,"PATH",True),"param_name",True)
# # ie ignore the ATTRIBUTES part and for PATH we assume again ([\w.]+)?
# if re.search(r'VFN\(VFFSL\(SL,[\"\'][\w.]+[\"\'],True\),[\"\']' + param_name + r'[\"\'],True\)', code):
# return True

# # all these are covered by the rawExpr regular expression below
# # - $getVar("param_name")
# # gets
# # _v = VFFSL(SL,"getVar",False)("param_name")
# # if _v is not None: write(_filter(_v, rawExpr='$getVar("param_name")'))
# # - $getVar("getvar_default", "default")
# # gets
# # _v = VFFSL(SL,"getVar",False)("getvar_default", "default")
# # if _v is not None: write(_filter(_v, rawExpr='$getVar("getvar_default", "default")'))
# if re.search(r'VFFSL\(SL,[\"\']getVar[\"\'],False\)\([\"\']([\w.]+\.)?' + param_name + r'(\.[\w.]+)?[\"\'](, [^()]+)?\)', code):
# return True

# # - $varExists("varexists")
# # gets
# # _v = VFFSL(SL,"varExists",False)("varexists")
# # if _v is not None: write(_filter(_v, rawExpr='$varExists("varexists")'))
# # - $hasVar("hasvar")
# # gets
# # _v = VFFSL(SL,"hasVar",False)("hasvar")
# # if _v is not None: write(_filter(_v, rawExpr='$hasVar("hasvar")'))
# if re.search(r'VFFSL\(SL,[\"\'](varExists|hasvar)[\"\'],False\)\([\"\']([\w.]+\.)?' + param_name + r'(\.[\w.]+)?[\'\"]\)', code):
# return True

# # Also the following is possible (TODO but we could forbid it)
# # $PATH["param_name"].ATTRIBUTES
# # VFFSL(SL,"PATH",True)["param_name"].ATTRIBUTES
# if re.search(r'VFFSL\(SL,[\"\'][\w.]+[\"\'],True\)\[[\"\']' + param_name + r'[\"\']\]', code):
# return True
# gets
# set $rg_id = str($rg_param('read_group_id_conditional').ID)
# rg_id = str(VFN(VFFSL(SL,"rg_param",False)('read_group_id_conditional'),"ID",True))
if re.search(r"(VFN|VFFSL)\(.*[\"\']([\w.]+\.)?" + param_name + r"(\.[\w.]+)?[\"\']", code):
return True

# #for i, r in enumerate($repeat_name)
# #set x = $str(r[ 'param_name' ])
# #end for
# the loop content gets
# x = VFFSL(SL,"str",False)(r[ 'var_in_repeat' ])
if re.search(r"(VFFSL|VFN)\(.*\[\s*[\"\']" + param_name + r"[\"\']\s*\]", code):
# print(f" G")
return True

# "${ ",".join( [ str( r.var_in_repeat3 ) for r in $repeat_name ] ) }"
# gets
# _v = ",".join( [ str( r.var_in_repeat3 ) for r in VFFSL(SL,"repeat_name",True) ] )
# if _v is not None: write(_filter(_v, rawExpr='${ ",".join( [ str( r.var_in_repeat3 ) for r in $repeat_name ] ) }'))
if re.search(r"rawExpr=([\"\'])(.*[^\w])?" + param_name + r"([^\w].*)?(\1)", code):
return True

# print("FALSE")
return False


def lint_inputs(tool_xml, lint_ctx):
"""Lint parameters in a tool's inputs block."""
datasource = is_datasource(tool_xml)
Expand All @@ -133,7 +210,7 @@ def lint_inputs(tool_xml, lint_ctx):
continue
param_name = _parse_name(param_attrib.get("name"), param_attrib.get("argument"))
if "name" in param_attrib and "argument" in param_attrib:
if param_attrib.get("name") == _parse_name(None, param_attrib.get("argument")):
if param_attrib.get("name") == _prepare_argument(param_attrib.get("argument")):
lint_ctx.warn(
f"Param input [{param_name}] 'name' attribute is redundant if argument implies the same name.",
node=param,
Expand Down Expand Up @@ -171,6 +248,7 @@ def lint_inputs(tool_xml, lint_ctx):
param_type = param_attrib["type"]

# TODO lint for valid param type - attribute combinations
# TODO lint required attributes for valid each param type

# lint for valid param type - child node combinations
for ptcc in PARAM_TYPE_CHILD_COMBINATIONS:
Expand Down Expand Up @@ -484,6 +562,99 @@ def lint_inputs(tool_xml, lint_ctx):
)


def lint_inputs_used(tool_xml, lint_ctx):
"""
check if the parameter is used somewhere, the following cases are considered:
- in the command or a configfile
- in an output filter
- if it is the select of conditional
- if there is an inputs configfile that is used
- for data parameters data_style needs to be set for the config file
- for other parameters the name of the config file must be used in the code

a warning is shown if the parameter is used only in
- template macro,
- output action, or
- label of an output

otherwise
- if the parameter only appears in change_format
- or if none of the previous cases apply
"""
tool_node = tool_xml.getroot()
try:
code, template_code, filter_code, label_code, action_code = get_code(tool_xml)
except ParseError as pe:
lint_ctx.error(f"Invalid cheetah found{pe}", node=tool_node)
return

inputs = tool_xml.findall("./inputs//param")
for param in inputs:
try:
param_name = _parse_name(param.attrib.get("name"), param.attrib.get("argument"))
except ValueError:
continue
if _param_in_compiled_cheetah(param, param_name, code):
continue
if param_name in filter_code:
continue
if (
tool_xml.find("./inputs//param/options[@from_dataset]") is not None
or tool_xml.find("./inputs//param/options/filter[@ref]") is not None
):
continue
if param.getparent().tag == "conditional":
continue

conf_inputs = tool_xml.find("./configfiles/inputs")
if conf_inputs is not None: # in this it's really hard to know
param_type = param.attrib.get("type")
if param_type in ["data", "collection"]:
if not conf_inputs.attrib.get("data_style"):
lint_ctx.error(
f"Param input [{param_name}] not found in command or configfiles. Does the present inputs config miss the 'data_style' attribute?",
node=param,
)
inputs_conf_name = conf_inputs.attrib.get("name", conf_inputs.attrib.get("filename", None))
if inputs_conf_name:
if not re.search(r"(^|[^\w])" + inputs_conf_name + r"([^\w]|$)", code):
lint_ctx.error(
f"Param input [{param_name}] only used inputs configfile {inputs_conf_name}, but this is not used in the command",
node=param,
)
continue

change_format = tool_xml.find(f"./outputs//change_format/when[@input='{param_name}']") is not None
template = re.search(r"(^|[^\w])" + param_name + r"([^\w]|$)", template_code) is not None
action = re.search(r"(^|[^\w])" + param_name + r"([^\w]|$)", action_code) is not None
label = re.search(r"[^\w]" + param_name + r"([^\w]|$)", label_code) is not None
if template or action or label:
if template + action + label == 1:
only = "only "
else:
only = ""
# TODO check that template is used??
if template:
lint_ctx.warn(
f"Param input [{param_name}] {only}used in a template macro, use a macro token instead.", node=param
)
if action:
lint_ctx.warn(
f"Param input [{param_name}] {only}found in output actions, better use extended metadata.",
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Was expecting that you "complain" about this one .. :)

node=param,
)
if label:
lint_ctx.warn(
f"Param input [{param_name}] {only}found in a label attribute, this is discouraged.", node=param
)
continue

if change_format:
lint_ctx.error(f"Param input [{param_name}] is only used in a change_format tag", node=param)
else:
lint_ctx.error(f"Param input [{param_name}] not found in command or configfiles.", node=param)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

error should only be used if the tool is broken with that change IMO. I don't think unused variables really indicate a tool is broken. I would make these warns or infos.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In particular the last case (the else branch) is clearly an error in my opinion. The parameter is used nowhere in the tool, i.e. the functionality of this parameter is broken (even if the tool still runs...). Just assume it would be the light switch in a car .. if it does not switch the lights on/off it's broken even if the car is still driving, or?

For the if branch I just can not imagine how the tool may produce a different format without using the parameter anywhere else except for the change_format tag.

But I'm fine with downgrading errors and warnings to warnings and infos (resp) if its for the possibility of FP due to getVar and varExists...

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For getVar and varExists we could just bail out like for the case when a param file is present



def lint_repeats(tool_xml, lint_ctx):
"""Lint repeat blocks in tool inputs."""
repeats = tool_xml.findall("./inputs//repeat")
Expand Down
8 changes: 7 additions & 1 deletion lib/galaxy/tool_util/parser/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,12 @@ def is_dict(item):
return isinstance(item, dict) or isinstance(item, OrderedDict)


def _prepare_argument(argument):
if argument is None:
return ""
return argument.lstrip("-").replace("-", "_")


def _parse_name(name, argument):
"""Determine name of an input source from name and argument
returns the name or if absent the argument property
Expand All @@ -17,5 +23,5 @@ def _parse_name(name, argument):
if name is None:
if argument is None:
raise ValueError("parameter must specify a 'name' or 'argument'.")
name = argument.lstrip("-").replace("-", "_")
name = _prepare_argument(argument)
return name
1 change: 1 addition & 0 deletions packages/tool_util/setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ version = 22.5.0.dev0
include_package_data = True
install_requires =
galaxy-util>=22.1
Cheetah3
lxml
MarkupSafe
packaging
Expand Down
Loading