Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add filter null collection operation tool #18928

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions lib/galaxy/config/sample/tool_conf.xml.sample
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
<tool file="${model_tools_path}/zip_collection.xml" />
<tool file="${model_tools_path}/filter_failed_collection.xml" />
<tool file="${model_tools_path}/filter_empty_collection.xml" />
<tool file="${model_tools_path}/filter_null.xml" />
<tool file="${model_tools_path}/flatten_collection.xml" />
<tool file="${model_tools_path}/merge_collection.xml" />
<tool file="${model_tools_path}/relabel_from_file.xml" />
Expand Down
4 changes: 3 additions & 1 deletion lib/galaxy/model/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4723,8 +4723,10 @@ def set_skipped(self, object_store_populator: "ObjectStorePopulator") -> None:
self.state = self.states.OK
self.blurb = "skipped"
self.visible = False
null = json.dumps(None)
with open(self.dataset.get_file_name(), "w") as out:
out.write(json.dumps(None))
out.write(null)
self.peek = null
self.set_total_size()

def get_file_name(self, sync_cache: bool = True) -> str:
Expand Down
1 change: 1 addition & 0 deletions lib/galaxy/tool_util/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,7 @@ class TestCollectionOutputAssertions(StrictModel):
class_: Optional[Literal["Collection"]] = Field("Collection", alias="class")
elements: Optional[Dict[str, TestCollectionElementAssertion]] = None
element_tests: Optional[Dict[str, "TestCollectionElementAssertion"]] = None
element_count: Optional[int] = None
attributes: Optional[CollectionAttributes] = None
collection_type: CollectionType = None

Expand Down
12 changes: 8 additions & 4 deletions lib/galaxy/tool_util/parser/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -868,10 +868,13 @@ def matches(ie_list: List, rel_path: str):
class TestCollectionOutputDef:
__test__ = False # Prevent pytest from discovering this class (issue #12071)

def __init__(self, name, attrib, element_tests):
def __init__(self, name, attrib, element_tests, element_count: Optional[int] = None):
self.name = name
self.collection_type = attrib.get("type", None)
count = attrib.get("count", None)
if element_count is not None:
count = element_count
else:
count = attrib.get("count")
self.count = int(count) if count is not None else None
self.attrib = attrib
self.element_tests = element_tests
Expand All @@ -881,7 +884,8 @@ def from_dict(as_dict):
return TestCollectionOutputDef(
name=as_dict["name"],
attrib=as_dict.get("attributes", {}),
element_tests=as_dict["element_tests"],
element_tests=as_dict.get("element_tests"),
element_count=as_dict.get("element_count"),
)

@staticmethod
Expand All @@ -898,7 +902,7 @@ def from_yaml_test_format(as_dict):
return TestCollectionOutputDef.from_dict(as_dict)

def to_dict(self):
return dict(name=self.name, attributes=self.attrib, element_tests=self.element_tests)
return dict(name=self.name, attributes=self.attrib, element_tests=self.element_tests, element_count=self.count)


class DrillDownOptionsDict(TypedDict):
Expand Down
5 changes: 3 additions & 2 deletions lib/galaxy/tool_util/verify/interactor.py
Original file line number Diff line number Diff line change
Expand Up @@ -1132,7 +1132,7 @@ def verify_collection(output_collection_def, data_collection, verify_dataset):
raise AssertionError(message)

expected_element_count = output_collection_def.count
if expected_element_count:
if expected_element_count is not None:
actual_element_count = len(data_collection["elements"])
if expected_element_count != actual_element_count:
message = f"Output collection '{name}': expected to have {expected_element_count} elements, but it had {actual_element_count}."
Expand Down Expand Up @@ -1185,7 +1185,8 @@ def verify_elements(element_objects, element_tests):
message = f"Output collection '{name}': identifier '{identifier}' found out of order, expected order of {expected_sort_order} for the tool generated collection elements {eo_ids}"
raise AssertionError(message)

verify_elements(data_collection["elements"], output_collection_def.element_tests)
if output_collection_def.element_tests:
verify_elements(data_collection["elements"], output_collection_def.element_tests)


def _verify_composite_datatype_file_content(
Expand Down
10 changes: 9 additions & 1 deletion lib/galaxy/tools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3798,7 +3798,15 @@ class FilterNullTool(FilterDatasetsTool):
def element_is_valid(element: model.DatasetCollectionElement):
element_object = element.element_object
assert isinstance(element_object, model.DatasetInstance)
return element_object.extension == "expression.json" and element_object.blurb == "skipped"
if element_object.extension == "expression.json":
if element_object.peek == "null":
# shortcut
return False
else:
with open(element_object.get_file_name()) as fh:
if fh.read(5) == "null":
return False
return True


class FlattenTool(DatabaseOperationTool):
Expand Down
46 changes: 46 additions & 0 deletions lib/galaxy/tools/filter_null.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
<tool id="__FILTER_NULL__" name="Filter null elements" version="1.0.0" tool_type="filter_null">
<description/>
<type class="FilterNullTool" module="galaxy.tools"/>
<action module="galaxy.tools.actions.model_operations" class="ModelOperationToolAction"/>
<edam_operations>
<edam_operation>operation_3695</edam_operation>
</edam_operations>
<inputs>
<param type="data_collection" collection_type="list,list:paired" name="input" label="Input Collection"/>
</inputs>
<outputs>
<collection name="output" format_source="input" type_source="input" label="${on_string} (without null datasets)">
</collection>
</outputs>
<tests>
<test>
<param name="input">
<collection type="list">
<element name="e1" value="simple_line.txt"/>
</collection>
</param>
<output_collection name="output" type="list" count="1">
<element name="e1">
<assert_contents>
<has_text_matching expression="^This is a line of text.\n$"/>
</assert_contents>
</element>
</output_collection>
</test>
</tests>
<help><![CDATA[

========
Synopsis
========

Removes null elements from a collection.

This tool takes a dataset collection and filters out nulls. This is useful for removing elements that resulted from conditional execution of jobs.

.. class:: infomark

This tool will create new history datasets from your collection but your quota usage will not increase.

]]></help>
</tool>
32 changes: 32 additions & 0 deletions lib/galaxy_test/workflow/filter_null.gxwf-tests.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
- doc: |
Test to verify filter null tool keeps non-null datasets.
job:
input_collection:
collection_type: list
elements:
- identifier: first
content: "abc"
when:
value: true
type: raw
outputs:
out:
class: Collection
collection_type: list
element_count: 1
- doc: |
Test to verify filter null tool discards null datasets.
job:
input_collection:
collection_type: list
elements:
- identifier: first
content: "abc"
when:
value: false
type: raw
outputs:
out:
class: Collection
collection_type: list
element_count: 0
22 changes: 22 additions & 0 deletions lib/galaxy_test/workflow/filter_null.gxwf.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
class: GalaxyWorkflow
inputs:
input_collection:
type: data_collection
when:
type: boolean
outputs:
out:
outputSource: filter_null/output
steps:
cat:
tool_id: cat
in:
input1:
source: input_collection
when:
source: when
when: $(inputs.when)
filter_null:
tool_id: '__FILTER_NULL__'
in:
input: cat/out_file1
4 changes: 3 additions & 1 deletion lib/galaxy_test/workflow/test_framework_workflows.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,9 @@ def _verify(self, run_summary: RunJobsSummary, output_definitions: OutputsDict):
self._verify_output(run_summary, output_name, output_definition)

def _verify_output(self, run_summary: RunJobsSummary, output_name, test_properties: OutputChecks):
is_collection_test = isinstance(test_properties, dict) and "elements" in test_properties
is_collection_test = isinstance(test_properties, dict) and (
"elements" in test_properties or test_properties.get("class") == "Collection"
)
item_label = f"Output named {output_name}"

def get_filename(name):
Expand Down
1 change: 1 addition & 0 deletions test/functional/tools/sample_tool_conf.xml
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,7 @@
<tool file="${model_tools_path}/filter_failed_collection.xml" />
<tool file="${model_tools_path}/keep_success_collection.xml" />
<tool file="${model_tools_path}/filter_empty_collection.xml" />
<tool file="${model_tools_path}/filter_null.xml" />
<tool file="${model_tools_path}/flatten_collection.xml" />
<tool file="${model_tools_path}/sort_collection_list.xml" />
<tool file="${model_tools_path}/harmonize_two_collections_list.xml" />
Expand Down
Loading