Skip to content

Commit

Permalink
linting of unused parameters
Browse files Browse the repository at this point in the history
by searching in compiled cheetah templates
  • Loading branch information
bernt-matthias committed Jan 24, 2022
1 parent 49e0bd9 commit ee0e5c4
Show file tree
Hide file tree
Showing 4 changed files with 330 additions and 107 deletions.
64 changes: 37 additions & 27 deletions lib/galaxy/tool_util/linters/_util.py
Original file line number Diff line number Diff line change
@@ -1,45 +1,55 @@
import re

from Cheetah.Compiler import Compiler
from Cheetah.Template import Template

from galaxy.util import unicodify


def get_code(tool_xml):
"""get code used in the Galaxy tool"""

# get code from
# - command,
# - configfiles, and
# - template macros
# note: not necessary (& possible) for macro tokens which are expanded
# during loading the xml (and removed from the macros tag)
# get code from command and configfiles
code = ""
for tag in [".//command", ".//configfile", './/macros/macro[@type="template"]']:
print(f"tag {tag}")
for tag in [".//command", ".//configfile"]:
for cn in tool_xml.findall(tag):
print(f"node {cn}")
code += cn.text
# TODO not sure about this line, I get UnicodeError for some tools otherwise (trinity)
code = "#encoding utf-8\n" + code
code = Template.compile(source=code, compilerClass=Compiler, returnAClass=False)
code = unicodify(code)
# print(code)

# remove comments,
# TODO this is not optimal, but strings containing "##"" complicate this quite a bit
# TODO also this is not complete, e.g. multiline comments are missing
code = "\n".join([_ for _ in code.splitlines() if not _.lstrip().startswith("##")])

# get code from output filters
# template macros
# note: not necessary (& possible) for macro tokens which are expanded
# during loading the xml (and removed from the macros tag)
templatecode = ""
for cn in tool_xml.findall('.//macros/macro[@type="template"]'):
templatecode += cn.text
templatecode = "#encoding utf-8\n" + templatecode
templatecode = Template.compile(source=templatecode, compilerClass=Compiler, returnAClass=False)
templatecode = unicodify(templatecode)

# get code from output filters (which use variables wo $)
filtercode = ""
for cn in tool_xml.findall("./outputs/*/filter"):
filtercode += cn.text + "\n"

# get output labels which might contain code
# get output labels which might contain code (which use variables like ${var})
labelcode = ""
for cn in tool_xml.findall("./outputs/*[@label]"):
labelcode + cn.attrib["label"] + "\n"

# TODO not optimal to mix filter code and the following, since they use cheetah synax, i.e. $param
for cn in tool_xml.findall("./outputs/*/actions/action[@default]"):
labelcode += cn.attrib["default"] + "\n"

for cn in tool_xml.findall("./outputs/*/actions/conditional[@name]"):
labelcode += cn.attrib["name"] + "\n"

return code, filtercode, labelcode
labelcode += cn.attrib["label"] + "\n"

actioncode = ""
for cn in tool_xml.findall('./outputs//conditional[@name]'):
actioncode += cn.attrib["name"] + "\n"
for cn in tool_xml.findall('./outputs//action/option[@type="from_param"]'):
actioncode += cn.attrib.get("name", "") + "\n"
for cn in tool_xml.findall("./outputs//action/option/filter[@ref]"):
actioncode += cn.attrib["ref"] + "\n"
for cn in tool_xml.findall("./outputs//action[@default]"):
actioncode += cn.attrib["default"] + "\n"

return code, templatecode, filtercode, labelcode, actioncode


def is_datasource(tool_xml):
Expand Down
169 changes: 144 additions & 25 deletions lib/galaxy/tool_util/linters/inputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
is_datasource,
is_valid_cheetah_placeholder
)
from ..parser.util import _parse_name
from ..parser.util import _parse_name, _prepare_argument
log = logging.getLogger(__name__)

FILTER_TYPES = [
Expand Down Expand Up @@ -52,6 +52,77 @@
}


def _param_in_compiled_cheetah(param, param_name, code):
# # accession $PATH.param_name.ATTRIBUTES in cheetah gives
# # VFFSL(SL,"PATH.param_name.ATTRIBUTES",True)
# # for PATH and ATTRIBUTES we assume simply ([\w.]+)?
# # since if wrong it will be discovered in a test
# if re.search(r'VFFSL\(SL,[\"\']([\w.]+\.)?' + param_name + r'(\.[\w.]+)?[\"\'],True\)', code):
# return True

# # print("checking path")
# # accessing $PATH.param_name['ATTRIBUTE'] (ATTRIBUTE eg reverse)
# # or $PATH.param_name['ATTRIBUTE'].FURTHER_ATTRIBUTE gives
# # VFN(VFN(VFFSL(SL,"PATH",True),"param_name",True)['ATTRIBUTE'],"FURTHER_ATTRIBUTE",True)
# # we simply search VFN(VFFSL(SL,"PATH",True),"param_name",True)
# # ie ignore the ATTRIBUTES part and for PATH we assume again ([\w.]+)?
# if re.search(r'VFN\(VFFSL\(SL,[\"\'][\w.]+[\"\'],True\),[\"\']' + param_name + r'[\"\'],True\)', code):
# return True

# # all these are covered by the rawExpr regular expression below
# # - $getVar("param_name")
# # gets
# # _v = VFFSL(SL,"getVar",False)("param_name")
# # if _v is not None: write(_filter(_v, rawExpr='$getVar("param_name")'))
# # - $getVar("getvar_default", "default")
# # gets
# # _v = VFFSL(SL,"getVar",False)("getvar_default", "default")
# # if _v is not None: write(_filter(_v, rawExpr='$getVar("getvar_default", "default")'))
# if re.search(r'VFFSL\(SL,[\"\']getVar[\"\'],False\)\([\"\']([\w.]+\.)?' + param_name + r'(\.[\w.]+)?[\"\'](, [^()]+)?\)', code):
# return True

# # - $varExists("varexists")
# # gets
# # _v = VFFSL(SL,"varExists",False)("varexists")
# # if _v is not None: write(_filter(_v, rawExpr='$varExists("varexists")'))
# # - $hasVar("hasvar")
# # gets
# # _v = VFFSL(SL,"hasVar",False)("hasvar")
# # if _v is not None: write(_filter(_v, rawExpr='$hasVar("hasvar")'))
# if re.search(r'VFFSL\(SL,[\"\'](varExists|hasvar)[\"\'],False\)\([\"\']([\w.]+\.)?' + param_name + r'(\.[\w.]+)?[\'\"]\)', code):
# return True

# # Also the following is possible (TODO but we could forbid it)
# # $PATH["param_name"].ATTRIBUTES
# # VFFSL(SL,"PATH",True)["param_name"].ATTRIBUTES
# if re.search(r'VFFSL\(SL,[\"\'][\w.]+[\"\'],True\)\[[\"\']' + param_name + r'[\"\']\]', code):
# return True
# gets
# set $rg_id = str($rg_param('read_group_id_conditional').ID)
# rg_id = str(VFN(VFFSL(SL,"rg_param",False)('read_group_id_conditional'),"ID",True))
if re.search(r'(VFN|VFFSL)\(.*[\"\']([\w.]+\.)?' + param_name + r'(\.[\w.]+)?[\"\']', code):
return True

# #for i, r in enumerate($repeat_name)
# #set x = $str(r[ 'param_name' ])
# #end for
# the loop content gets
# x = VFFSL(SL,"str",False)(r[ 'var_in_repeat' ])
if re.search(r'(VFFSL|VFN)\(.*\[\s*[\"\']' + param_name + r'[\"\']\s*\]', code):
# print(f" G")
return True

# "${ ",".join( [ str( r.var_in_repeat3 ) for r in $repeat_name ] ) }"
# gets
# _v = ",".join( [ str( r.var_in_repeat3 ) for r in VFFSL(SL,"repeat_name",True) ] )
# if _v is not None: write(_filter(_v, rawExpr='${ ",".join( [ str( r.var_in_repeat3 ) for r in $repeat_name ] ) }'))
if re.search(r"rawExpr=([\"\'])(.*[^\w])?" + param_name + r"([^\w].*)?(\1)", code):
return True

# print("FALSE")
return False


def lint_inputs(tool_xml, lint_ctx):
"""Lint parameters in a tool's inputs block."""
# determine line to report for general problems with outputs
Expand All @@ -63,7 +134,6 @@ def lint_inputs(tool_xml, lint_ctx):
tool_path = None
datasource = is_datasource(tool_xml)
inputs = tool_xml.findall("./inputs//param")
code, filter_code, label_code = get_code(tool_xml)
num_inputs = 0
for param in inputs:
num_inputs += 1
Expand All @@ -73,7 +143,7 @@ def lint_inputs(tool_xml, lint_ctx):
continue
param_name = _parse_name(param_attrib.get("name"), param_attrib.get("argument"))
if "name" in param_attrib and "argument" in param_attrib:
if param_attrib.get("name") == _parse_name(None, param_attrib.get("argument")):
if param_attrib.get("name") == _prepare_argument(param_attrib.get("argument")):
lint_ctx.warn(f"Param input [{param_name}] 'name' attribute is redundant if argument implies the same name.")
if param_name.strip() == "":
lint_ctx.error("Param input with empty name.", line=param.sourceline, xpath=tool_xml.getpath(param))
Expand All @@ -92,28 +162,6 @@ def lint_inputs(tool_xml, lint_ctx):

# TODO lint for valid param type - attribute combinations
# TODO lint required attributes for valid each param type
# check if the parameter is used somewhere
conf_inputs = tool_xml.find("./configfiles/inputs")
if re.search(r"[^\w_]" + param_name + r"([^\w_]|$)", code) is not None:
pass
elif param_name in filter_code:
pass
elif re.search(r"[^\w_]" + param_name + r"[^\w_]", label_code):
lint_ctx.info(f"Param input [{param_name}] only found in an attribute.")
elif len(tool_xml.findall(f"./outputs//change_format/when[@input='{param_name}']")) > 0:
pass
elif conf_inputs is not None: # in this it's really hard to know
if param_type in ["data", "collection"] and not conf_inputs.attrib.get("data_style"):
lint_ctx.error(f"Param input [{param_name}] not found in command or configfiles. Does the present inputs config miss the 'data_style' attribute?")
else:
if conf_inputs.attrib.get('name', None):
lint_ctx.info(f"Param input [{param_name}] may be unused. Double check that is used in the inputs configfile [{conf_inputs.attrib['name']}] and that this file is used properly.")
elif conf_inputs.attrib.get('filename', None):
lint_ctx.info(f"Param input [{param_name}] may be unused. Double check that is used in the inputs configfile [{conf_inputs.attrib['filename']}] and that this file is used properly.")
elif param.getparent().tag == "conditional":
lint_ctx.info(f"Param input [{param_name}] only used in the select of a conditional, which should be OK.")
else:
lint_ctx.error(f"Param input [{param_name}] not found in command or configfiles.")

if param_type == "data":
if "format" not in param_attrib:
Expand Down Expand Up @@ -307,6 +355,77 @@ def lint_inputs(tool_xml, lint_ctx):
lint_ctx.warn("Found no input parameters.", line=tool_line, xpath=tool_path)


def lint_inputs_used(tool_xml, lint_ctx):
"""
check if the parameter is used somewhere, the following cases are considered:
- in the command or a configfile
- in an output filter
- if it is the select of conditional
- if there is an inputs configfile that is used
- for data parameters data_style needs to be set for the config file
- for other parameters the name of the config file must be used in the code
a warning is shown if the parameter is used only in
- template macro,
- output action, or
- label of an output
otherwise
- if the parameter only appears in change_format
- or if none of the previous cases apply
"""
code, template_code, filter_code, label_code, action_code = get_code(tool_xml)
inputs = tool_xml.findall("./inputs//param")
for param in inputs:
try:
param_name = _parse_name(param.attrib.get("name"), param.attrib.get("argument"))
except ValueError:
continue
if _param_in_compiled_cheetah(param, param_name, code):
continue
if param_name in filter_code:
continue
if tool_xml.find("./inputs//param/options[@from_dataset]") is not None or tool_xml.find("./inputs//param/options/filter[@ref]") is not None:
continue
if param.getparent().tag == "conditional":
continue

conf_inputs = tool_xml.find("./configfiles/inputs")
if conf_inputs is not None: # in this it's really hard to know
param_type = param.attrib.get("type")
if param_type in ["data", "collection"]:
if not conf_inputs.attrib.get("data_style"):
lint_ctx.error(f"Param input [{param_name}] not found in command or configfiles. Does the present inputs config miss the 'data_style' attribute?", line=param.sourceline, xpath=tool_xml.getpath(param))
inputs_conf_name = conf_inputs.attrib.get('name', conf_inputs.attrib.get('filename', None))
if inputs_conf_name:
if not re.search(r"(^|[^\w])" + inputs_conf_name + r"([^\w]|$)", code):
lint_ctx.error(f"Param input [{param_name}] only used inputs configfile {inputs_conf_name}, but this is not used in the command", line=param.sourceline, xpath=tool_xml.getpath(param))
continue

change_format = tool_xml.find(f"./outputs//change_format/when[@input='{param_name}']") is not None
template = re.search(r"(^|[^\w])" + param_name + r"([^\w]|$)", template_code) is not None
action = re.search(r"(^|[^\w])" + param_name + r"([^\w]|$)", action_code) is not None
label = re.search(r"[^\w]" + param_name + r"([^\w]|$)", label_code) is not None
if template or action or label:
if template + action + label == 1:
only = "only "
else:
only = ""
# TODO check that template is used??
if template:
lint_ctx.warn(f"Param input [{param_name}] {only}used in a template macro, use a macro token instead.", line=param.sourceline, xpath=tool_xml.getpath(param))
if action:
lint_ctx.warn(f"Param input [{param_name}] {only}found in output actions, better use extended metadata.", line=param.sourceline, xpath=tool_xml.getpath(param))
if label:
lint_ctx.warn(f"Param input [{param_name}] {only}found in a label attribute, this is discouraged.", line=param.sourceline, xpath=tool_xml.getpath(param))
continue

if change_format:
lint_ctx.error(f"Param input [{param_name}] is only used in a change_format tag", line=param.sourceline, xpath=tool_xml.getpath(param))
else:
lint_ctx.error(f"Param input [{param_name}] not found in command or configfiles.", line=param.sourceline, xpath=tool_xml.getpath(param))


def lint_repeats(tool_xml, lint_ctx):
"""Lint repeat blocks in tool inputs."""
repeats = tool_xml.findall("./inputs//repeat")
Expand Down
10 changes: 8 additions & 2 deletions lib/galaxy/tool_util/parser/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,20 @@ def is_dict(item):
return isinstance(item, dict) or isinstance(item, OrderedDict)


def _prepare_argument(argument):
if argument is None:
return ""
return argument.lstrip('-').replace("-", "_")


def _parse_name(name, argument):
"""Determine name of an input source from name and argument
returns the name or if absent the argument property
In the latter case, leading dashes are stripped and
all remaining dashes are replaced by underscores.
"""
if name is None:
if not argument:
if argument is None:
raise ValueError("parameter must specify a 'name' or 'argument'.")
name = argument.lstrip('-').replace("-", "_")
name = _prepare_argument(argument)
return name
Loading

0 comments on commit ee0e5c4

Please sign in to comment.