Skip to content

Commit

Permalink
Merge branch 'release_23.1' into release_23.2
Browse files Browse the repository at this point in the history
  • Loading branch information
mvdbeek committed Jan 15, 2024
2 parents 8ae1db1 + 5ba634b commit a2cb914
Show file tree
Hide file tree
Showing 6 changed files with 127 additions and 6 deletions.
3 changes: 3 additions & 0 deletions lib/galaxy/config/sample/datatypes_conf.xml.sample
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,9 @@
<display file="igv/interval_as_bed.xml" inherit="true"/>
</datatype>
<datatype extension="jellyfish" type="galaxy.datatypes.binary:Binary" subclass="true" display_in_upload="true" description="Jellyfish database files are k-mer counts in binary format with a readable head. They are operated on and converted to human-readable text through jellyfish commands." />
<datatype extension="ktab" type="galaxy.datatypes.binary:Binary" subclass="true" description="A table of canonical k‑mers and their counts for the fastk toolkit." display_in_upload="true" description_url="https://github.com/thegenemyers/FASTK?tab=readme-ov-file#file-encodings"/>
<datatype extension="hist" type="galaxy.datatypes.binary:Binary" subclass="true" description="A binary histogram file of kmers and frequencies for the fastk toolkit." display_in_upload="true" description_url="https://github.com/thegenemyers/FASTK?tab=readme-ov-file#file-encodings"/>
<datatype extension="prof" type="galaxy.datatypes.binary:Binary" subclass="true" description="Read profile file for the fastk toolkit." display_in_upload="true" description_url="https://github.com/thegenemyers/FASTK?tab=readme-ov-file#file-encodings"/>

<!-- ISA data types -->
<datatype extension="isa-tab" type="galaxy.datatypes.isa:IsaTab" mimetype="application/isa-tools" display_in_upload="true" description="ISA-Tab data type." description_url="https://isa-tools.org"/>
Expand Down
1 change: 1 addition & 0 deletions lib/galaxy/config/sample/tool_conf.xml.sample
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@
<tool file="filters/bed_to_bigbed.xml" />
</section>
<section id="filter" name="Filter and Sort">
<tool file="stats/filtering_1_1_0.xml" />
<tool file="stats/filtering.xml" />
<tool file="filters/sorter.xml" />
<tool file="filters/grep.xml" />
Expand Down
2 changes: 1 addition & 1 deletion lib/galaxy/tool_util/toolbox/views/static.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ def definition_with_items_to_panel(definition, allow_sections: bool = True, item
f"Failed to find matching section for (id, name) = ({section_def.id}, {section_def.name})"
)
continue
section = closest_section.copy()
section = closest_section.copy(merge_tools=True)
if section_def.id is not None:
section.id = section_def.id
if section_def.name is not None:
Expand Down
11 changes: 6 additions & 5 deletions lib/galaxy/workflow/modules.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,8 @@ class ConditionalStepWhen(BooleanToolParameter):

def to_cwl(value, hda_references, step):
element_identifier = None
if isinstance(value, model.HistoryDatasetCollectionAssociation):
value = value.collection
if isinstance(value, model.DatasetCollectionElement) and value.hda:
element_identifier = value.element_identifier
value = value.hda
Expand Down Expand Up @@ -155,14 +157,13 @@ def to_cwl(value, hda_references, step):
properties, value.dataset.created_from_basename or element_identifier or value.name
)
return properties
elif hasattr(value, "collection"):
collection = value.collection
if collection.collection_type == "list":
return [to_cwl(dce, hda_references=hda_references, step=step) for dce in collection.dataset_elements]
elif isinstance(value, model.DatasetCollection):
if value.collection_type == "list":
return [to_cwl(dce, hda_references=hda_references, step=step) for dce in value.dataset_elements]
else:
# Could be record or nested lists
rval = {}
for element in collection.elements:
for element in value.elements:
rval[element.element_identifier] = to_cwl(
element.element_object, hda_references=hda_references, step=step
)
Expand Down
13 changes: 13 additions & 0 deletions test/unit/workflows/test_modules.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,19 @@ def test_to_cwl():
assert hda_references == hdas


def test_to_cwl_nested_collection():
hda = model.HistoryDatasetAssociation(create_dataset=True, flush=False)
hda.dataset.state = model.Dataset.states.OK
dc_inner = model.DatasetCollection(collection_type="list")
model.DatasetCollectionElement(collection=dc_inner, element_identifier="inner", element=hda)
dc_outer = model.DatasetCollection(collection_type="list:list")
model.DatasetCollectionElement(collection=dc_outer, element_identifier="outer", element=dc_inner)
hdca = model.HistoryDatasetCollectionAssociation(name="the collection", collection=dc_outer)
result = modules.to_cwl(hdca, [], model.WorkflowStep())
assert result["outer"][0]["class"] == "File"
assert result["outer"][0]["basename"] == "inner"


class MapOverTestCase(NamedTuple):
data_input: str
step_input_def: Union[str, List[str]]
Expand Down
103 changes: 103 additions & 0 deletions tools/stats/filtering_1_1_0.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
<tool id="Filter1" name="Filter" version="1.1.0">
<description>data on any column using simple expressions</description>
<edam_operations>
<edam_operation>operation_0335</edam_operation>
</edam_operations>
<command>
python '$__tool_directory__/filtering.py' '$input' '$out_file1' '$inputs' ${input.metadata.columns} "${input.metadata.column_types}" $header_lines
</command>
<configfiles>
<inputs name="inputs" />
</configfiles>
<inputs>
<param format="tabular" name="input" type="data" label="Filter" help="Dataset missing? See TIP below."/>
<param name="cond" type="text" value="c1=='chr22'" label="With following condition" help="Double equal signs, ==, must be used as shown above. To filter for an arbitrary string, use the Select tool.">
<validator type="empty_field" message="Enter a valid filtering condition, see syntax and examples below."/>
<sanitizer>
<valid initial="string.printable"/>
</sanitizer>
</param>
<param name="header_lines" type="integer" value="0" label="Number of header lines to skip"/>
</inputs>
<outputs>
<data format_source="input" name="out_file1" metadata_source="input"/>
</outputs>
<tests>
<test>
<param name="input" value="1.bed"/>
<param name="cond" value="c1=='chr22'"/>
<param name="header_lines" value="0"/>
<output name="out_file1" file="filter1_test1.bed"/>
</test>
<test>
<param name="input" value="7.bed"/>
<param name="cond" value="c1=='chr1' and c3-c2>=2000 and c6=='+'"/>
<param name="header_lines" value="0"/>
<output name="out_file1" file="filter1_test2.bed"/>
</test>
<!-- Test filtering of file with a variable number of columns. -->
<test>
<param name="input" value="filter1_in3.sam"/>
<param name="cond" value="c3=='chr1' and c5>5"/>
<param name="header_lines" value="0"/>
<output name="out_file1" file="filter1_test3.sam"/>
</test>
<test>
<param name="input" value="filter1_inbad.bed"/>
<param name="cond" value="c1=='chr22'"/>
<param name="header_lines" value="0"/>
<output name="out_file1" file="filter1_test4.bed"/>
</test>
<test>
<param name="input" value="filter1_in5.tab"/>
<param name="cond" value="c8>500"/>
<param name="header_lines" value="1"/>
<output name="out_file1" file="filter1_test5.tab"/>
</test>
<test>
<param name="input" value="filter1_in6.bed"/>
<param name="cond" value="c2=='100%'"/>
<param name="header_lines" value="0"/>
<output name="out_file1" file="filter1_test6.bed"/>
</test>
</tests>
<help>

.. class:: warningmark

Double equal signs, ==, must be used as *"equal to"* (e.g., **c1 == 'chr22'**)

.. class:: infomark

**TIP:** Attempting to apply a filtering condition may throw exceptions if the data type (e.g., string, integer) in every line of the columns being filtered is not appropriate for the condition (e.g., attempting certain numerical calculations on strings). If an exception is thrown when applying the condition to a line, that line is skipped as invalid for the filter condition. The number of invalid skipped lines is documented in the resulting history item as a "Condition/data issue".

.. class:: infomark

**TIP:** If your data is not TAB delimited, use *Text Manipulation-&gt;Convert*

-----

**Syntax**

The filter tool allows you to restrict the dataset using simple conditional statements.

- Columns are referenced with **c** and a **number**. For example, **c1** refers to the first column of a tab-delimited file
- Make sure that multi-character operators contain no white space ( e.g., **&lt;=** is valid while **&lt; =** is not valid )
- When using 'equal-to' operator **double equal sign '==' must be used** ( e.g., **c1=='chr1'** )
- Non-numerical values must be included in single or double quotes ( e.g., **c6=='+'** )
- Filtering condition can include logical operators, but **make sure operators are all lower case** ( e.g., **(c1!='chrX' and c1!='chrY') or not c6=='+'** )

-----

**Example**

- **c1=='chr1'** selects lines in which the first column is chr1
- **c3-c2&lt;100*c4** selects lines where subtracting column 3 from column 2 is less than the value of column 4 times 100
- **len(c2.split(',')) &lt; 4** will select lines where the second column has less than four comma separated elements
- **c2>=1** selects lines in which the value of column 2 is greater than or equal to 1
- Numbers should not contain commas - **c2&lt;=44,554,350** will not work, but **c2&lt;=44554350** will
- Some words in the data can be used, but must be single or double quoted ( e.g., **c3=='exon'** )

</help>
<citations/>
</tool>

0 comments on commit a2cb914

Please sign in to comment.