galaxyproteomics · GraceAHall · Sep 27, 2021 · Sep 27, 2021 · Sep 27, 2021 · Sep 27, 2021
diff --git a/tools/hitmap/.shed.yml b/tools/hitmap/.shed.yml
@@ -0,0 +1,11 @@
+categories: 
+- Proteomics
+description: 'High-resolution MALDI imaging Proteomics Analysis'
+homepage_url: https://github.com/MASHUOA/HiTMaP
+long_description: |
+  Hitmap uses peptide mass fingerprint analysis and a dual scoring system to computationally assign peptide and protein annotations to high mass resolution MALDI-MSI datasets and generate customisable spatial distribution maps. 
+
+  Note: consumes large amounts of memory during runtime. Run on a node with at least 64Gb memory. 
+name: hitmap
+owner: galaxyp
+remote_repository_url: https://github.com/galaxyproject/tools-iuc/tree/master/tools/hitmap/
diff --git a/tools/hitmap/hitmap.xml b/tools/hitmap/hitmap.xml
@@ -0,0 +1,297 @@
+
+<tool id="hitmap" name="HiT-MaP" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.05">
+    <description>High-resolution Maldi-imaging Proteomics Analysis</description>
+
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+
+    <edam_topics>
+        <edam_topic>topic_0121</edam_topic>
+    </edam_topics>
+
+    <edam_operations>
+        <edam_operation>operation_3630</edam_operation>
+    </edam_operations>
+
+    <requirements>
+        <container type="docker">mashuoa/hitmap@sha256:97f2f853362b56be6c2b0fb1507e0288667e7fb6ad0eda3ff1d761dd72dfaad0</container>   
+    </requirements>
+
+    <command detect_errors="exit_code"><![CDATA[
+        mkdir expdata &&
+        cp '${file_inputs.imzml_file}' expdata/sample.imzML &&
+        cp '${file_inputs.ibd_file}' expdata/sample.ibd &&
+        cp '${file_inputs.fasta_db}' expdata/database.fasta &&
+        Rscript '${hitmap_script}' &&
+        python3 $__tool_directory__/hitmap_gen_html.py expdata &&
+        zip -r hitmap_output.zip expdata
+    ]]></command>
+
+    <configfiles>   
+        <expand macro="hitmap_config_script" />
+    </configfiles>
+
+    <inputs>
+        <section name="file_inputs" title="File Inputs" expanded="True">
+            <param  name="imzml_file" 
+                    type="data" 
+                    format="mzml" 
+                    label="MALDI-imaging imzML file" 
+                    help="Input *.imzML file. Must have same basename as counterpart *.ibd. " />
+
+            <param  name="ibd_file" 
+                    type="data" 
+                    format="binary" 
+                    label="MALDI-imaging binary ibd file" 
+                    help="Input *.ibd file. Must have same basename as *.imzML. " />
+
+            <param  name="fasta_db"
+                    type="data"
+                    format="fasta"
+                    label="Proteome"
+                    help="Proteome of organism in fasta format. The proteome for a particular organism can be downloaded from https://www.uniprot.org/proteomes/. file must be fasta format. "/>
+        </section>
+
+
+        <section name="peptide_identification" title="Peptide Identification" expanded="True">
+            <param  name="digestion_site" 
+                    type="select" 
+                    label="Digestion site" 
+                    help="Digestion enzyme used in sample preparation">
+                        <option value="arg-c proteinase">arg-c proteinase</option>
+                        <option value="asp-n endopeptidase">asp-n endopeptidase</option>
+                        <option value="bnps-skatole">bnps-skatole</option>
+                        <option value="caspase1">caspase1</option>
+                        <option value="caspase2">caspase2</option>
+                        <option value="caspase3">caspase3</option>
+                        <option value="caspase4">caspase4</option>
+                        <option value="caspase5">caspase5</option>
+                        <option value="caspase6">caspase6</option>
+                        <option value="caspase7">caspase7</option>
+                        <option value="caspase8">caspase8</option>
+                        <option value="caspase9">caspase9</option>
+                        <option value="caspase10">caspase10</option>
+                        <option value="chymotrypsin-high">chymotrypsin-high</option>
+                        <option value="chymotrypsin-low">chymotrypsin-low</option>
+                        <option value="clostripain">clostripain</option>
+                        <option value="cnbr">cnbr</option>
+                        <option value="enterokinase">enterokinase</option>
+                        <option value="factor xa">factor xa</option>
+                        <option value="formic acid">formic acid</option>
+                        <option value="glutamyl endopeptidase">glutamyl endopeptidase</option>
+                        <option value="granzyme-b">granzyme-b</option>
+                        <option value="hydroxylamine">hydroxylamine</option>
+                        <option value="iodosobenzoic acid">iodosobenzoic acid</option>
+                        <option value="lysc">lysc</option>
+                        <option value="lysn">lysn</option>
+                        <option value="neutrophil elastase">neutrophil elastase</option>
+                        <option value="nctb">nctb</option>
+                        <option value="pepsin">pepsin</option>
+                        <option value="pepsin1.3">pepsin1.3</option>
+                        <option value="proline endopeptidase">proline endopeptidase</option>
+                        <option value="proteinase k">proteinase k</option>
+                        <option value="staphylococcal peptidase i">staphylococcal peptidase i</option>
+                        <option value="thermolysin">thermolysin</option>
+                        <option value="thrombin">thrombin</option>
+                        <option value="trypsin" selected="True">trypsin</option>
+            </param> 
+
+            <param  name="missed_cleavages"
+                    type="integer"
+                    value="1"
+                    default_value="1"
+                    label="missed cleavages"
+                    help="max expected number of missed peptide cleavages during digestion" />
+
+            <param  name="ppm"
+                    type="integer"
+                    value="5"
+                    default_value="5"
+                    label="precursor ion m/z tolerance"
+                    help="Error in parts per million (ppm) for matching the monoisotopic peak of a species with the observed peak in the experimental spectrum"/>
+
+            <param  name="spectra_segments_per_file"
+                    type="integer"
+                    value=""
+                    default_value="4"
+                    label="Spectra segments"
+                    help="(Integer) Expected number of different spectra segments. Will usually be equal to the number of different tissue types in the sample." />
+
+            <param  name="fdr_cutoff"
+                    type="float"
+                    min="0"
+                    max="1"
+                    value="0.05"
+                    default_value="0.05"
+                    label="FDR cutoff"
+                    help="FDR threshold for peptide matching and protein annotation. FDR of 0.05 promises no greater than 5% of identified peptides are fasle-positives. Uses ratio of peptide score in real vs decoy database."/>
+        </section>
+
+        <section name="modifications" title="Modifications" expanded="False">
+            <param  name="fixed"
+                    type="text"
+                    value=""
+                    default_value=""
+                    label="Fixed modification type"
+                    help='Comma-separated list of fixed modifications using unimod standard. Either the Interim name or Accession # can be used.  List of modifications can be found at http://www.unimod.org/.  example: "Phospho" or "21" will both add the Phospho- modification' />
+
+            <param  name="fixmod_position"
+                    type="select"
+                    label="Fixed modification position"
+                    help="Position of the fixed modification">
+                        <option value="None" selected="true">None</option>
+                        <option value="Anywhere">Anywhere</option>
+                        <option value="Any N-term">Any N-term</option>
+                        <option value="Any C-term">Any C-term</option>
+                        <option value="Protein N-term">Protein N-term</option>
+                        <option value="Protein C-term">Protein C-term</option>
+            </param>
+
+            <param  name="variable"
+                    type="text"
+                    value=""
+                    default_value=""
+                    label="Variable modification type"
+                    help='Comma-separated list of variable modifications using unimod standard. Either the Interim name or Accession # can be used.  List of modifications can be found at http://www.unimod.org/.  example: "Phospho" or "21" will both add the Phospho- modification' />
+
+            <param  name="varmod_position"
+                    type="select"
+                    label="Variable modification position"
+                    help="Position of the variable modification">
+                        <option value="None" selected="true">None</option>
+                        <option value="Anywhere">Anywhere</option>
+                        <option value="Any N-term">Any N-term</option>
+                        <option value="Any C-term">Any C-term</option>
+                        <option value="Protein N-term">Protein N-term</option>
+                        <option value="Protein C-term">Protein C-term</option>
+            </param>
+        </section>
+
+        <section name="cluster_ions" title="Proteins of interest" expanded="false">
+            <param  name="proteins_of_interest"
+                    type="text"
+                    value=""
+                    default_value=""
+                    label="Cluster ion imaging"
+                    help="Comma separated list of proteins of interest. For each protein, an image map will be created for the protein and its individual peptides. This image shows the distribution and concentration of the protein and its peptides across the sample. List is case-insensitive. Example: Crystallin, phakinin, filensin, Actin" />
+        </section>
+
+        <section name="advanced" title="Advanced" expanded="False">
+            <param  name="threshold"
+                    type="float"
+                    min="0"
+                    max="1"
+                    value="0.005"
+                    default_value="0.005"
+                    label="Ion spectra noise threshold"
+                    help="Baseline relative m/z intensity to consider peak. Used to exclude noise. Higher values ignore more noise, but may miss true peaks. "/>
+
+            <param  name="decoy_mode" 
+                    type="select" 
+                    label="Decoy database mode"
+                    default_value="isotope" 
+                    help="Strategy for building decoy database. Isotope is recommended.">
+                        <option value="isotope" selected="true">isotope</option>
+                        <option value="elements">elements</option>
+                        <option value="adducts">adducts</option>
+            </param>   
+
+            <param  name="adducts"
+                    type="text"
+                    value="M+H"
+                    default_value ="M+H"
+                    label="adducts"
+                    help='Quoted, comma seperated, list of naturally occuring or sample prep-induced adducts.  H+ adducts are usually the most abundant, but Na+ adducts are also common depending on tissue and sample processing. Each adduct pattern added to the list greatly increases runtime and memory usage. example1: "M+H"  example2: "M+H","M+Na".' />
+        </section>
+    </inputs>
+
+    <outputs>
+        <data   name="html_report" 
+                format="html" 
+                from_work_dir="hitmap_report.html" 
+                label="${tool.name} on ${on_string}: HTML report" />
+
+        <data   name="out_data" 
+                format="zip" 
+                from_work_dir="hitmap_output.zip" 
+                label="${tool.name} on ${on_string}: data package" />
+    </outputs>
+
+    <tests>
+        <test>
+            <param name="imzml_file" value="Bovin_lens.imzML" />
+            <param name="ibd_file" value="Bovin_lens.ibd" />
+            <param name="fasta_db" value="uniprot-bovin.fasta" />
+            <section name="peptide_identification">
+                <param name="digestion_site" value="trypsin" />
+                <param name="missed_cleavages" value="2" />
+                <param name="ppm" value="5" />
+                <param name="spectra_segments_per_file" value="4" />
+                <param name="fdr_cutoff" value="0.05" />
+            </section>
+            <section name="modifications">
+                <param name="fixed" value="" />
+                <param name="fixmod_position" value="None" />
+                <param name="variable" value="" />
+                <param name="varmod_position" value="None" />
+            </section>
+            <section name="cluster_ions">
+                <param name="proteins_of_interest" value="Crystallin, phakinin, filensin, Actin" />
+            </section>
+
+            <output name="html_report" ftype="html">
+                <assert_contents>
+                    <has_text text='div id="segmentation" class="row"' />
+                    <has_text text='div id="pca" class="row"' />
+                    <has_text text='div id="scree" class="row"' />
+                    <has_text text='div id="segment-mean-spectra" class="row"' />
+                    <has_text text='div id="outliers" class="row"' />
+                </assert_contents>
+            </output>
+        </test> 
+    </tests>
+
+    <help><![CDATA[
+
+    HiT-MaP
+    .......
+
+    **What it does**
+
+    An R package of High-resolution Informatics Toolbox for Maldi-imaging Proteomics
+
+    This tool is still in development and has been provided to the community for early access. 
+
+    Github: https://github.com/MASHUOA/HiTMaP
+
+    Publication: https://doi.org/10.1038/s41467-021-23461-w
+
+    **Issues**
+
+    | There are known issues including parallel::mccollect and subscript out of bounds errors. 
+    | Rerunning the job sometimes solves the issue. 
+    | Please report errors using the bug icon and they will be raised with HiT-MaP developers. 
+
+    **Input**
+
+    - MALDI imaging dataset (imzML + ibd)
+    - Proteome database (fasta)
+
+    (Examples of MALDI imaging datasets can be found at 
+    https://www.ebi.ac.uk/pride/archive?keyword=imzml&sortDirection=DESC&page=0&pageSize=20)
+
+    **Output**
+
+    - HTML report (html)
+    - Zipped directory containing all program output files (zip)
+
+    |
+
+    ]]></help>
+
+    <citations>
+        <citation type="doi">10.1038/s41467-021-23461-w</citation>
+    </citations>
+
+</tool>