diff --git a/CHANGES.md b/CHANGES.md
index a01eb20..e6baaea 100755
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -1,5 +1,13 @@
 # CHANGES
 
+## 2.5.0
+
+* Added run-cgprna subcommands: `tophat-fusion`, `start-fusion` and `defuse` to run infuse pipeline.
+* Re-organized expected file structure in Star reference bundle. Previous Star reference bundle on the FTP server will not work for this version.
+* Added CWL files to run Infuse pipeline in cgpRna and example JSON files for using them with Dockstore.
+* Uploaded new set of reference bundle files for GRCh38 and GRCh37d5 to the FTP server.
+* Updated `setup.sh` to install Python3 RSeQC and HTSeq.
+
 ## 2.4.1
 
 * added a patched topaht-fusion-post script, in which the contig name bug #37 is fixed
diff --git a/Dockerfile b/Dockerfile
index d1bfd16..f6b2772 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -91,7 +91,7 @@ FROM ubuntu:16.04
 
 LABEL maintainer="cgphelp@sanger.ac.uk" \
       uk.ac.sanger.cgp="Cancer, Ageing and Somatic Mutation, Wellcome Trust Sanger Institute" \
-      version="2.4.1" \
+      version="2.5.0" \
       description="cgpRna docker"
 
 # Version of tools that are installed in both stages, make sure they are consistent.
@@ -139,7 +139,7 @@ RUN locale-gen en_US.UTF-8
 RUN update-locale LANG=en_US.UTF-8
 
 ENV OPT /opt/wtsi-cgp
-ENV PATH $OPT/bin:$OPT/biobambam2/bin:$OPT/python-lib/bin/:$PATH
+ENV PATH $OPT/bin:$OPT/biobambam2/bin:$OPT/python-lib/bin:$PATH
 ENV PERL5LIB $OPT/lib/perl5
 ENV R_LIBS $OPT/R-lib
 ENV R_LIBS_USER $R_LIBS
diff --git a/README.md b/README.md
index 5aff890..98ffc59 100755
--- a/README.md
+++ b/README.md
@@ -1,7 +1,5 @@
 # cgpRna
 
-[![Quay Badge][quay-status]][quay-repo]
-
 | Master                                        | Develop                                         |
 | --------------------------------------------- | ----------------------------------------------- |
 | [![Master Badge][travis-master]][travis-base] | [![Develop Badge][travis-develop]][travis-base] |
@@ -9,29 +7,43 @@
 cgpRna provides pipelines, for RNA-Seq data, that implement commonly used mapping
 and analysis programs, such as TopHat and rna-star.
 At the present time (May 2016), only pipelines for mapping (with STAR), lane QC
-and fusion gene detection are included in this codebase but this will be added
-to over time with; differential expression, gene/transcript quantification, splice
-variant analysis and allele specific expression.
+and fusion gene detection is included in this codebase.
+
+## Docker container
+
+cgpRna is available as a Docker container on [Quay.io][quay-repo].
+
+[![Quay Badge][quay-status]][quay-repo]
+
+## Workflows on Dockstore
+
+Due to an [issue](https://github.com/dockstore/dockstore/issues/2923) of Dockstore, we have not registered any of the workflows in `cwl` folder, as inputs of two of them are using two-dimensional arrays. Once the issue is resolved, we'll test our workflows with the newer release of Dockstore, register our workflows and list their registries here.
 
 ## Dependencies and Installation
 
-Please install Perl packages:
+If you want to install cgpRna locally, you'll need to follow the instructions below, however, we recommend to use the Docker container.
+
+### Dependencies
+
+cgpRna depends on these Perl packages, so they need to be installed first:
 
 * [PCAP-core](https://github.com/ICGC-TCGA-PanCancer/PCAP-core/releases)
 * [VAGrENT](https://github.com/cancerit/VAGrENT/releases)
 * [cgpVcf](https://github.com/cancerit/cgpVcf/releases)
-* [Grass](https://github.com/cancerit/grass/releases) first.
+* [Grass](https://github.com/cancerit/grass/releases)
+
+Note: samtools is also a dependency but this is installed by PCAP-Core.
 
-Prerequisites for the [RSeQC](http://rseqc.sourceforge.net/#installation) software are:
+cgpRna uses [RSeQC](http://rseqc.sourceforge.net/#installation) and its prerequisites are:
 
 * gcc
-* [python2.7](https://www.python.org/downloads/)
-  * The minimum version the pipeline has been tested with is python-2.7.6
+* [python3](https://www.python.org/downloads/) and pip3 executable.
 * [R](https://www.r-project.org/)
 * [numpy](http://www.numpy.org/)
 
-Once that is done and your $PATH environment variable has been updated so that newly installed
-software can be found, run the following to install cgpRna:
+### Installation
+
+Once dependencies mentioned above are installed, run the following to install cgpRna:
 
 ```
 ./setup.sh path_to_install_to
@@ -53,7 +65,6 @@ N.B. the path_to_install_to should be the same as the install location used for
 * [bedtools](https://github.com/arq5x/bedtools2/) Unless already in the install location bin directory
 * [blastn](http://blast.ncbi.nlm.nih.gov/Blast.cgi?CMD=Web&PAGE_TYPE=BlastDocs&DOC_TYPE=Download) Used by tophat-fusion post
 * [HTSeq](https://pypi.python.org/packages/3c/6e/f8dc3500933e036993645c3f854c4351c9028b180c6dcececde944022992/HTSeq-0.6.1p1.tar.gz) used for read counting
-N.B. samtools is also a dependency but this is installed by PCAP-Core which should have already been installed (see above).
 
 If you are planning to use the fusion pipeline, specifically defuse_fusion.pl, the deFuse config.txt
 file will need to be updated with the installed locations of a number of tools.
@@ -63,7 +74,7 @@ the locations and update the file as instructed.
 ## LICENCE
 
 ```
-Copyright (c) 2014-2017 Genome Research Ltd.
+Copyright (c) 2014-2019 Genome Research Ltd.
 
 Author: Cancer Genome Project <cgpit@sanger.ac.uk>
 
diff --git a/cwls/cgpRna_with_infuse.cwl b/cwls/cgpRna_with_infuse.cwl
new file mode 100644
index 0000000..86dcde4
--- /dev/null
+++ b/cwls/cgpRna_with_infuse.cwl
@@ -0,0 +1,303 @@
+#!/usr/bin/env cwl-runner
+
+class: Workflow
+
+id: "multi-lane-cgprna-with-infuse-workflow"
+
+label: "workflow to generate mapping stats, counts and fusion events"
+
+cwlVersion: v1.0
+
+requirements:
+  - class: ScatterFeatureRequirement
+  - class: SubworkflowFeatureRequirement
+  - class: InlineJavascriptRequirement
+  - class: StepInputExpressionRequirement
+
+inputs:
+  raw_reads:
+    doc: "RAW read input, can be BAM files or pairs of FastQ files (optionally gzip compressed). Each element of this array will be treated as one read group in the aligned BAM file. Within each element, only either BAM files or FastQ files are allowed."
+    type:
+      type: array
+      items:
+        type: array
+        items: File
+
+  map_reference:
+    type: File
+    doc: "The core STAR reference and a GTF file bundled in a tar.gz."
+
+  sample_name:
+    type: string
+    doc: "Sample name, which will used to prefix output file names and SM tag in the BAM file header."
+    default: ''
+
+  stats_reference:
+    type: File
+    doc: "The reference files bundled in a tar.gz."
+
+  count_reference:
+    type: File
+    doc: "A reference GTF file."
+
+  bigwig_reference:
+    type: File
+    doc: "FASTA file of a reference file, which the input BAM file was mapped to."
+    secondaryFiles:
+    - .fai
+
+  bigwig_threads:
+    type: int?
+    default: 1
+    doc: "Number of threads to use for generating bigwig."
+
+  tophat_fusion_reference:
+    type: File
+    doc: "Tophat fusion reference bundle tar file."
+
+  defuse_reference:
+    type: File
+    doc: "Defuse reference bundle tar file."
+
+  vagrent_cache:
+    type: File
+    doc: "VAGrENT cache file that should be the same reference and gene build as the GTF file."
+    secondaryFiles:
+      - ".tbi"
+      - $(self.basename.replace("cache.gz", "fa"))
+      - $(self.basename.replace("cache.gz", "fa.fai"))
+
+  map_threads:
+    type: int?
+    default: 1
+    doc: "Number of threads to use for each mapping process."
+
+  merge_threads:
+    type: int?
+    default: 1
+    doc: "Number of threads to use for merging step."
+
+  tophat_threads:
+    type: int?
+    default: 1
+    doc: "Number of threads to use for tophat fusion process."
+
+  star_threads:
+    type: int?
+    default: 1
+    doc: "Number of threads to use for tophat fusion process."
+
+  defuse_threads:
+    type: int?
+    default: 1
+    doc: "Number of threads to use for tophat fusion process."
+
+  rg_id_tags:
+    type:
+      type: array
+      items: ["null", string]
+    doc: "Readgroup ID tag values. It should have one value for each group of input raw files. Use empty string to use defaults or existing RG ID in the input BAM. It only uses the RG ID value in the first BAM file of a group."
+
+  lb_tags:
+    type:
+      type: array
+      items: ["null", string]
+    doc: "Sequencing library tag values in the output BAM header. It should have one value for each group of input raw files. Use empty string to set it to none or existing LB tag in the input BAM. It only uses the LB tag value in the first BAM file of a group."
+
+  ds_tags:
+    type:
+      type: array
+      items: ["null", string]
+    doc: "Description tag value in the output BAM header. It should have one value for each group of input raw files. Use empty string to set it to none or existing DS tag in the input BAM. It only uses the DS tag value in the first BAM file of a group."
+
+  pl_tags:
+    type:
+      type: array
+      items: ["null", string]
+    doc: "Platform tag value in the output BAM header. It should have one value for each group of input raw files. Use empty string to set it to none or existing PL tag in the input BAM. It only uses the PL tag value in the first BAM file of a group."
+
+  pu_tags:
+    type:
+      type: array
+      items: ["null", string]
+    doc: "Platform unit tag value in the output BAM header. It should have one value for each group of input raw files. Use empty string to set it to none or existing PU tag in the input BAM. It only uses the PU tag value in the first BAM file of a group."
+
+outputs:
+  dup_marked_bam:
+    type: File
+    outputSource: merge/dup_marked_merged_bam
+
+  dup_marked_bam_md5:
+    type: File
+    outputSource: merge/dup_marked_bam_md5
+
+  dup_marked_bam_dup_met:
+    type: File
+    outputSource: merge/dup_marked_bam_dup_met
+
+  transcriptome_lane_bams:
+    type: File[]
+    outputSource: map_and_stats/transcriptome_bam
+
+  dup_marked_lane_bam_dup_mets:
+    type: File[]
+    outputSource: map_and_stats/dup_marked_bam_dup_met
+
+  rna_bas_files:
+    type: File[]
+    outputSource: map_and_stats/rna_bas
+
+  gene_cover_pngs:
+    type: File[]
+    outputSource: map_and_stats/gene_cover_png
+
+  gene_body_coverage_rscripts:
+    type: File[]
+    outputSource: map_and_stats/gene_body_coverage_rscript
+
+  gene_body_coverage_txts:
+    type: File[]
+    outputSource: map_and_stats/gene_body_coverage_txt
+
+  gene_body_coverage_updated_rscripts:
+    type: File[]
+    outputSource: map_and_stats/gene_body_coverage_updated_rscript
+
+  read_dists:
+    type: File[]
+    outputSource: map_and_stats/read_dist
+
+  out_bw:
+    type: File
+    outputSource: bigwig/out_bw
+
+  out_count:
+    type: File
+    outputSource: count/out_count
+
+  tophat_fusions:
+    type: File
+    outputSource: infuse/tophat_fusions
+
+  star_fusions:
+    type: File
+    outputSource: infuse/star_fusions
+
+  defuse_fusions:
+    type: File
+    outputSource: infuse/defuse_fusions
+
+  combined_fusions:
+    type: File
+    outputSource: infuse/combined_fusions
+
+steps:
+  map_and_stats:
+    in:
+      raw_reads:
+        source: raw_reads
+      map_reference:
+        source: map_reference
+      sample_name:
+        source: sample_name
+      stats_reference:
+        source: stats_reference
+      map_threads:
+        source: map_threads
+      rg_id_tag:
+        source: rg_id_tags
+      lb_tag:
+        source: lb_tags
+      ds_tag:
+        source: ds_tags
+      pl_tag:
+        source: pl_tags
+      pu_tag:
+        source: pu_tags
+    out: [dup_marked_bam, dup_marked_bam_dup_met, transcriptome_bam, rna_bas, gene_cover_png, gene_body_coverage_rscript, gene_body_coverage_txt, gene_body_coverage_updated_rscript, read_dist]
+    scatter: [raw_reads, rg_id_tag, lb_tag, ds_tag, pl_tag, pu_tag]
+    scatterMethod: dotproduct
+    run: tools/lane_map_and_stats.cwl
+
+  merge:
+    in:
+      sorted_bams:
+        source: map_and_stats/dup_marked_bam
+      threads:
+        source: merge_threads
+      out_bam_name:
+        source: sample_name
+        valueFrom: $(self).bam
+      out_bam_index_name:
+        source: sample_name
+        valueFrom: $(self).bam.bai
+      md5_file_name:
+        source: sample_name
+        valueFrom: $(self).bam.md5
+      dup_met_file_name:
+        source: sample_name
+        valueFrom: $(self).bam.met
+    out: [dup_marked_merged_bam, dup_marked_bam_dup_met, dup_marked_bam_md5]
+    run: tools/merge_and_mark_dups.cwl
+
+  bigwig:
+    in:
+      sample_bam:
+        source: merge/dup_marked_merged_bam
+      reference:
+        source: bigwig_reference
+      threads:
+        source: bigwig_threads
+    out: [out_bw]
+    run: tools/run-cgprna_bigwig.cwl
+
+  count:
+    in:
+      sample_bam:
+        source: merge/dup_marked_merged_bam
+      reference:
+        source: count_reference
+    out: [out_count]
+    run: tools/run-cgprna_htseq-count.cwl
+
+  infuse:
+    in:
+      in_bam:
+        source: merge/dup_marked_merged_bam
+      sample_name:
+        source: sample_name
+      tophat_fusion_reference:
+        source: tophat_fusion_reference
+      star_reference:
+        source: map_reference
+      defuse_reference:
+        source: defuse_reference
+      gtf:
+        source: count_reference
+      vagrent_cache:
+        source: vagrent_cache
+      tophat_threads:
+        source: tophat_threads
+      star_threads:
+        source: star_threads
+      defuse_threads:
+        source: defuse_threads
+    out: [tophat_fusions, star_fusions, defuse_fusions, combined_fusions]
+    run: infuse_pipeline.cwl
+
+doc: |
+  A workflow to generate mapping stats, gene counts and fusion events from RNA-seq data using cgpRna container. See the [cgpRna](https://github.com/cancerit/cgpRna) website for more information.
+
+$schemas:
+  - http://schema.org/docs/schema_org_rdfa.html
+
+$namespaces:
+  s: http://schema.org/
+
+s:codeRepository: https://github.com/cancerit/cgpRna
+s:license: https://spdx.org/licenses/AGPL-3.0
+
+s:author:
+  - class: s:Person
+    s:email: mailto:yx2@sanger.ac.uk
+    s:name: Yaobo Xu
diff --git a/cwls/cgpRna_workflow.cwl b/cwls/cgpRna_workflow.cwl
index bfd8d33..702f366 100644
--- a/cwls/cgpRna_workflow.cwl
+++ b/cwls/cgpRna_workflow.cwl
@@ -4,7 +4,7 @@ class: Workflow
 
 id: "multi-lane-sample-workflow"
 
-label: "A CGP workflow to generate mapping stats and gene counts from RNA-seq data using tools in cgpRna"
+label: "workflow to generate mapping stats and gene counts"
 
 cwlVersion: v1.0
 
@@ -16,7 +16,7 @@ requirements:
 
 inputs:
   raw_reads:
-    doc: "RAW read input, can be multiple bam files, or several pairs of FastQ files (optionally gzip compressed), but not a mixture of BAM and FastQs. They'll be treated as if they were from the same lane of a sample, i.e. all reads will have the same read group ID in the mapped BAM."
+    doc: "RAW read input, can be BAM files or pairs of FastQ files (optionally gzip compressed). Each element of this array will be treated as one read group in the aligned BAM file. Within each element, only either BAM files or FastQ files are allowed."
     type:
       type: array
       items:
diff --git a/cwls/infuse_pipeline.cwl b/cwls/infuse_pipeline.cwl
new file mode 100644
index 0000000..84a731e
--- /dev/null
+++ b/cwls/infuse_pipeline.cwl
@@ -0,0 +1,158 @@
+#!/usr/bin/env cwl-runner
+
+class: Workflow
+
+id: "infuse-workflow"
+
+label: "workflow to detect fusion events"
+
+cwlVersion: v1.0
+
+requirements:
+  - class: InlineJavascriptRequirement
+
+inputs:
+  in_bam:
+    type: File
+    doc: "input BAM file."
+
+  sample_name:
+    type: string
+    doc: "Sample name, which will used to prefix output file names."
+
+  tophat_fusion_reference:
+    type: File
+    doc: "Tophat fusion reference bundle tar file."
+
+  star_reference:
+    type: File
+    doc: "STAR reference bundle tar file."
+
+  defuse_reference:
+    type: File
+    doc: "Defuse reference bundle tar file."
+
+  gtf:
+    type: File
+    doc: "GTF file which is used to annotate break points."
+
+  vagrent_cache:
+    type: File
+    doc: "VAGrENT cache file that should be the same reference and gene build as the GTF file."
+    secondaryFiles:
+      - ".tbi"
+      - $(self.basename.replace("cache.gz", "fa"))
+      - $(self.basename.replace("cache.gz", "fa.fai"))
+
+  tophat_threads:
+    type: int?
+    default: 1
+    doc: "Number of threads to use for tophat fusion process."
+
+  star_threads:
+    type: int?
+    default: 1
+    doc: "Number of threads to use for tophat fusion process."
+
+  defuse_threads:
+    type: int?
+    default: 1
+    doc: "Number of threads to use for tophat fusion process."
+
+outputs:
+  tophat_fusions:
+    type: File
+    outputSource: tophat_fusion/output
+
+  star_fusions:
+    type: File
+    outputSource: star_fusion/output
+
+  defuse_fusions:
+    type: File
+    outputSource: defuse/output
+
+  combined_fusions:
+    type: File
+    outputSource: combine_fusions/output
+
+steps:
+  bam_to_fq:
+    in:
+      in_bam:
+        source: in_bam
+    out: [output_fqs]
+    run: tools/bam_to_fq.cwl
+
+  tophat_fusion:
+    in:
+      reads:
+        source: bam_to_fq/output_fqs
+      reference:
+        source: tophat_fusion_reference
+      sample_name:
+        source: sample_name
+      threads:
+        source: tophat_threads
+    out: [output]
+    run: tools/run-cgprna_tophat-fusion.cwl
+
+  star_fusion:
+    in:
+      reads:
+        source: bam_to_fq/output_fqs
+      reference:
+        source: star_reference
+      sample_name:
+        source: sample_name
+      threads:
+        source: star_threads
+    out: [output]
+    run: tools/run-cgprna_star-fusion.cwl
+
+  defuse:
+    in:
+      reads:
+        source: bam_to_fq/output_fqs
+      reference:
+        source: defuse_reference
+      sample_name:
+        source: sample_name
+      threads:
+        source: defuse_threads
+    out: [output]
+    run: tools/run-cgprna_defuse.cwl
+
+  combine_fusions:
+    in:
+      sample_name:
+        source: sample_name
+      gtf:
+        source: gtf
+      vagrent_cache:
+        source: vagrent_cache
+      tophat_fusion_output:
+        source: tophat_fusion/output
+      star_fusion_output:
+        source: star_fusion/output
+      defuse_output:
+        source: defuse/output
+    out: [output]
+    run: tools/compare_overlapping_fusions.cwl
+
+doc: |
+  A workflow to run cpgRna Infuse pipeline from a single input BAM file (can be aligned or unaligned) of RNA-seq reads using cgpRna container. See the [cgpRna](https://github.com/cancerit/cgpRna) website for more information.
+
+$schemas:
+  - http://schema.org/docs/schema_org_rdfa.html
+
+$namespaces:
+  s: http://schema.org/
+
+s:codeRepository: https://github.com/cancerit/cgpRna
+s:license: https://spdx.org/licenses/AGPL-3.0
+
+s:author:
+  - class: s:Person
+    s:email: mailto:yx2@sanger.ac.uk
+    s:name: Yaobo Xu
diff --git a/cwls/tools/bam_to_fq.cwl b/cwls/tools/bam_to_fq.cwl
new file mode 100644
index 0000000..5775ad6
--- /dev/null
+++ b/cwls/tools/bam_to_fq.cwl
@@ -0,0 +1,160 @@
+#!/usr/bin/env cwl-runner
+
+class: CommandLineTool
+
+id: "bam-to-fq"
+
+label: "convert bam to unzipped fq"
+
+cwlVersion: v1.0
+
+doc: |
+  ![build_status](https://quay.io/repository/wtsicgp/cgprna/status)
+  A Docker container for the cgpRna mapping flow. See the [cgpRna](https://github.com/cancerit/cgpRna) website for more information.
+
+  Please read the relevant [changes](https://github.com/cancerit/cgpRna/blob/dev/CHANGES.md) when upgrading.
+
+  Parameters for a CWL definition are generally described in a json file, but parameters can be provided on the command line.
+
+  To see the parameters descriptions please run: cwltool --tool-help path_to.cwl
+
+requirements:
+  - class: DockerRequirement
+    dockerPull: "quay.io/wtsicgp/cgprna:2.5.0"
+  - class: InlineJavascriptRequirement
+
+hints:
+  - class: ResourceRequirement
+    coresMin: 1
+    ramMin: 8000
+
+inputs:
+  in_bam:
+    doc: "input BAM."
+    type: File
+    inputBinding:
+      prefix: filename=
+      separate: false
+
+  output_prefix:
+    type: string?
+    default: 'bamtofastq_converted'
+
+  matched_out_name:
+    type: string
+    doc: "output name of the fastq file containing first mate of matched pairs in the BAM."
+    default: ""
+    inputBinding:
+      valueFrom: |
+        ${
+          var extension = "matched.1_1.fq";
+          var eles;
+          if (self) {
+            eles = [inputs.output_prefix, self, extension]
+          } else {
+            eles = [inputs.output_prefix, extension]
+          }
+          return eles.join(".")
+        }
+      prefix: F=
+      separate: false
+
+  matched_2_out_name:
+    type: string
+    doc: "output name of the fastq file containing second mate of matched pairs in the BAM."
+    default: ""
+    inputBinding:
+      valueFrom: |
+        ${
+          var extension = "matched.1_2.fq";
+          var eles;
+          if (self) {
+            eles = [inputs.output_prefix, self, extension]
+          } else {
+            eles = [inputs.output_prefix, extension]
+          }
+          return eles.join(".")
+        }
+      prefix: F2=
+      separate: false
+
+  single_end_out_name:
+    type: string
+    doc: "output name of the fastq file containing single-ended reads in the BAM."
+    default: ""
+    inputBinding:
+      valueFrom: |
+        ${
+          var extension = "singe_ended.fq";
+          var eles;
+          if (self) {
+            eles = [inputs.output_prefix, self, extension]
+          } else {
+            eles = [inputs.output_prefix, extension]
+          }
+          return eles.join(".")
+        }
+      prefix: S=
+      separate: false
+
+  unmatcted_out_name:
+    type: string
+    doc: "output name of the fastq file containing first mate of unmatched pairs in the BAM."
+    default: ""
+    inputBinding:
+      valueFrom: |
+        ${
+          var extension = "unmatched.1_1.fq";
+          var eles;
+          if (self) {
+            eles = [inputs.output_prefix, self, extension]
+          } else {
+            eles = [inputs.output_prefix, extension]
+          }
+          return eles.join(".")
+        }
+      prefix: O=
+      separate: false
+
+  unmatcted_2_out_name:
+    type: string
+    doc: "output name of the fastq file containing second mate of unmatched pairs in the BAM."
+    default: ""
+    inputBinding:
+      valueFrom: |
+        ${
+          var extension = "unmatched.1_2.fq";
+          var eles;
+          if (self) {
+            eles = [inputs.output_prefix, self, extension]
+          } else {
+            eles = [inputs.output_prefix, extension]
+          }
+          return eles.join(".")
+        }
+      prefix: O2=
+      separate: false
+
+outputs:
+  output_fqs:
+    type:
+      type: array
+      items: File
+    outputBinding:
+      glob: "*.matched.1_[12].fq"
+
+baseCommand: ["bamtofastq", "exclude=SECONDARY,SUPPLEMENTARY"]
+
+$schemas:
+  - http://schema.org/docs/schema_org_rdfa.html
+
+$namespaces:
+  s: http://schema.org/
+
+s:codeRepository: https://github.com/cancerit/cgpRna
+s:license: https://spdx.org/licenses/AGPL-3.0-only
+
+s:author:
+  - class: s:Person
+    s:email: mailto:cgphelp@sanger.ac.uk
+    s:name: Yaobo Xu
diff --git a/cwls/tools/compare_overlapping_fusions.cwl b/cwls/tools/compare_overlapping_fusions.cwl
new file mode 100644
index 0000000..3f99089
--- /dev/null
+++ b/cwls/tools/compare_overlapping_fusions.cwl
@@ -0,0 +1,103 @@
+#!/usr/bin/env cwl-runner
+
+class: CommandLineTool
+
+id: "run-cgprna_comp-fusions"
+
+label: "compare fusions"
+
+cwlVersion: v1.0
+
+doc: |
+  ![build_status](https://quay.io/repository/wtsicgp/cgprna/status)
+  A Docker container for the cgpRna mapping flow. See the [cgpRna](https://github.com/cancerit/cgpRna) website for more information.
+
+  Please read the relevant [changes](https://github.com/cancerit/cgpRna/blob/dev/CHANGES.md) when upgrading.
+
+  Parameters for a CWL definition are generally described in a json file, but parameters can be provided on the command line.
+
+  To see the parameters descriptions please run: cwltool --tool-help path_to.cwl
+
+requirements:
+  - class: DockerRequirement
+    dockerPull: "quay.io/wtsicgp/cgprna:2.5.0"
+  - class: InlineJavascriptRequirement
+
+hints:
+  - class: ResourceRequirement
+    ramMin: 8000
+
+inputs:
+  sample_name:
+    type: string
+    doc: "sample name which is used to name the output file."
+    inputBinding:
+      prefix: -s
+      separate: true
+      shellQuote: true
+      position: 1
+
+  gtf:
+    type: File
+    doc: "GTF file which is used to annotate break points."
+    inputBinding:
+      prefix: -g
+      separate: true
+      shellQuote: true
+      position: 2
+
+  vagrent_cache:
+    type: File
+    doc: "VAGrENT cache file that should be the same reference and gene build as the GTF file."
+    inputBinding:
+      prefix: -c
+      separate: true
+      shellQuote: true
+      position: 3
+    secondaryFiles:
+      - ".tbi"
+      - $(self.basename.replace("cache.gz", "fa"))
+      - $(self.basename.replace("cache.gz", "fa.fai"))
+
+  tophat_fusion_output:
+    type: File
+    doc: "Output file of Tophat fusion, usually with suffix: '.tophat-fusion.normals.filtered.strand.txt'."
+    inputBinding:
+      shellQuote: true
+      position: 4
+
+  star_fusion_output:
+    type: File
+    doc: "Output file of Star fusion, usually with suffix: '.star-fusion.normals.filtered.txt'."
+    inputBinding:
+      shellQuote: true
+      position: 5
+
+  defuse_output:
+    type: File
+    doc: "Output file of tophat fusion, usually with suffix: '.defuse-fusion.normals.ext.filtered.txt'."
+    inputBinding:
+      shellQuote: true
+      position: 6
+
+outputs:
+  output:
+    type: File
+    outputBinding:
+      glob: $(inputs.sample_name).infuse.detected.fusions.grass.txt
+
+baseCommand: ["compare_overlapping_fusions.pl", "-o", "."]
+
+$schemas:
+  - http://schema.org/docs/schema_org_rdfa.html
+
+$namespaces:
+  s: http://schema.org/
+
+s:codeRepository: https://github.com/cancerit/cgpRna
+s:license: https://spdx.org/licenses/AGPL-3.0-only
+
+s:author:
+  - class: s:Person
+    s:email: mailto:cgphelp@sanger.ac.uk
+    s:name: Yaobo Xu
diff --git a/cwls/tools/lane_map_and_stats.cwl b/cwls/tools/lane_map_and_stats.cwl
index b941a02..1b0b378 100644
--- a/cwls/tools/lane_map_and_stats.cwl
+++ b/cwls/tools/lane_map_and_stats.cwl
@@ -1,6 +1,10 @@
 class: Workflow
 cwlVersion: v1.0
 
+id: "lane-map-stats"
+
+label: "map raw reads to reference and generate QC stats"
+
 requirements:
   - class: InlineJavascriptRequirement
 
diff --git a/cwls/tools/merge_and_mark_dups.cwl b/cwls/tools/merge_and_mark_dups.cwl
index 7abdd90..e445ff6 100644
--- a/cwls/tools/merge_and_mark_dups.cwl
+++ b/cwls/tools/merge_and_mark_dups.cwl
@@ -2,9 +2,9 @@
 
 class: CommandLineTool
 
-id: "run-cgprna_merge-mark-dups"
+id: "merge-mark-dups"
 
-label: "cgpRna merge mark dups"
+label: "merge BAMs and mark dupliates"
 
 cwlVersion: v1.0
 
@@ -20,7 +20,7 @@ doc: |
 
 requirements:
   - class: DockerRequirement
-    dockerPull: "quay.io/wtsicgp/cgprna:2.4.1"
+    dockerPull: "quay.io/wtsicgp/cgprna:2.5.0"
 
 hints:
   - class: ResourceRequirement
diff --git a/cwls/tools/run-cgprna_bigwig.cwl b/cwls/tools/run-cgprna_bigwig.cwl
index bd88f1f..afb1226 100644
--- a/cwls/tools/run-cgprna_bigwig.cwl
+++ b/cwls/tools/run-cgprna_bigwig.cwl
@@ -20,7 +20,7 @@ doc: |
 
 requirements:
   - class: DockerRequirement
-    dockerPull: "quay.io/wtsicgp/cgprna:2.4.1"
+    dockerPull: "quay.io/wtsicgp/cgprna:2.5.0"
 
 hints:
   - class: ResourceRequirement
diff --git a/cwls/tools/run-cgprna_defuse.cwl b/cwls/tools/run-cgprna_defuse.cwl
new file mode 100644
index 0000000..9639efb
--- /dev/null
+++ b/cwls/tools/run-cgprna_defuse.cwl
@@ -0,0 +1,92 @@
+#!/usr/bin/env cwl-runner
+
+class: CommandLineTool
+
+id: "run-cgprna_defuse"
+
+label: "cgpRna defuse"
+
+cwlVersion: v1.0
+
+doc: |
+  ![build_status](https://quay.io/repository/wtsicgp/cgprna/status)
+  A Docker container for the cgpRna mapping flow. See the [cgpRna](https://github.com/cancerit/cgpRna) website for more information.
+
+  Please read the relevant [changes](https://github.com/cancerit/cgpRna/blob/dev/CHANGES.md) when upgrading.
+
+  Parameters for a CWL definition are generally described in a json file, but parameters can be provided on the command line.
+
+  To see the parameters descriptions please run: cwltool --tool-help path_to.cwl
+
+requirements:
+  - class: DockerRequirement
+    dockerPull: "quay.io/wtsicgp/cgprna:2.5.0"
+  - class: InlineJavascriptRequirement
+
+hints:
+  - class: ResourceRequirement
+    coresMin: 4
+    ramMin: 10000
+    outdirMin: 10000
+
+inputs:
+  reads:
+    doc: "RAW read input, can be bam files and FastQ files (optionally gzip compressed)."
+    type: 
+      type: array
+      items: File
+      inputBinding:
+        itemSeparator: ' '
+    inputBinding:
+      prefix: --input
+      separate: true
+
+  reference:
+    type: File
+    doc: "The Tophat fusion reference files bundled in a tar.gz."
+    inputBinding:
+      prefix: --reference
+      separate: true
+
+  sample_name:
+    type: string
+    doc: "Sample name, which will used to prefix output file names and SM tag in the BAM file header."
+    inputBinding:
+      prefix: --sample-name
+      separate: true
+
+  gene_build:
+    type: string?
+    doc: "gene build folder name, if specified it'll be used to locate the folder within the reference bundle."
+    inputBinding:
+      prefix: --gene-build
+      separate: true
+
+  threads:
+    type: int?
+    doc: "Number of threads to use."
+    inputBinding:
+      prefix: --threads
+      separate: true
+
+outputs:
+  output:
+    type: File
+    outputBinding:
+      glob: $(inputs.sample_name).defuse-fusion.normals.ext.filtered.txt
+
+baseCommand: ["run-cgprna", "defuse"]
+
+$schemas:
+  - http://schema.org/docs/schema_org_rdfa.html
+
+$namespaces:
+  s: http://schema.org/
+
+s:codeRepository: https://github.com/cancerit/cgpRna
+s:license: https://spdx.org/licenses/AGPL-3.0-only
+
+s:author:
+  - class: s:Person
+    s:email: mailto:cgphelp@sanger.ac.uk
+    s:name: Yaobo Xu
diff --git a/cwls/tools/run-cgprna_htseq-count.cwl b/cwls/tools/run-cgprna_htseq-count.cwl
index 3addd9b..f882a92 100644
--- a/cwls/tools/run-cgprna_htseq-count.cwl
+++ b/cwls/tools/run-cgprna_htseq-count.cwl
@@ -20,7 +20,7 @@ doc: |
 
 requirements:
   - class: DockerRequirement
-    dockerPull: "quay.io/wtsicgp/cgprna:2.4.1"
+    dockerPull: "quay.io/wtsicgp/cgprna:2.5.0"
 
 hints:
   - class: ResourceRequirement
diff --git a/cwls/tools/run-cgprna_star-fusion.cwl b/cwls/tools/run-cgprna_star-fusion.cwl
new file mode 100644
index 0000000..679d8e4
--- /dev/null
+++ b/cwls/tools/run-cgprna_star-fusion.cwl
@@ -0,0 +1,92 @@
+#!/usr/bin/env cwl-runner
+
+class: CommandLineTool
+
+id: "run-cgprna_star-fusion"
+
+label: "cgpRna star fusion"
+
+cwlVersion: v1.0
+
+doc: |
+  ![build_status](https://quay.io/repository/wtsicgp/cgprna/status)
+  A Docker container for the cgpRna mapping flow. See the [cgpRna](https://github.com/cancerit/cgpRna) website for more information.
+
+  Please read the relevant [changes](https://github.com/cancerit/cgpRna/blob/dev/CHANGES.md) when upgrading.
+
+  Parameters for a CWL definition are generally described in a json file, but parameters can be provided on the command line.
+
+  To see the parameters descriptions please run: cwltool --tool-help path_to.cwl
+
+requirements:
+  - class: DockerRequirement
+    dockerPull: "quay.io/wtsicgp/cgprna:2.5.0"
+  - class: InlineJavascriptRequirement
+
+hints:
+  - class: ResourceRequirement
+    coresMin: 4
+    ramMin: 10000
+    outdirMin: 10000
+
+inputs:
+  reads:
+    doc: "RAW read input, can be bam files and FastQ files (optionally gzip compressed)."
+    type: 
+      type: array
+      items: File
+      inputBinding:
+        itemSeparator: ' '
+    inputBinding:
+      prefix: --input
+      separate: true
+
+  reference:
+    type: File
+    doc: "The Tophat fusion reference files bundled in a tar.gz."
+    inputBinding:
+      prefix: --reference
+      separate: true
+
+  sample_name:
+    type: string
+    doc: "Sample name, which will used to prefix output file names and SM tag in the BAM file header."
+    inputBinding:
+      prefix: --sample-name
+      separate: true
+
+  gene_build:
+    type: string?
+    doc: "gene build folder name, if specified it'll be used to locate the folder within the reference bundle."
+    inputBinding:
+      prefix: --gene-build
+      separate: true
+
+  threads:
+    type: int?
+    doc: "Number of threads to use."
+    inputBinding:
+      prefix: --threads
+      separate: true
+
+outputs:
+  output:
+    type: File
+    outputBinding:
+      glob: $(inputs.sample_name).star-fusion.normals.filtered.txt
+
+baseCommand: ["run-cgprna", "star-fusion"]
+
+$schemas:
+  - http://schema.org/docs/schema_org_rdfa.html
+
+$namespaces:
+  s: http://schema.org/
+
+s:codeRepository: https://github.com/cancerit/cgpRna
+s:license: https://spdx.org/licenses/AGPL-3.0-only
+
+s:author:
+  - class: s:Person
+    s:email: mailto:cgphelp@sanger.ac.uk
+    s:name: Yaobo Xu
diff --git a/cwls/tools/run-cgprna_star-map.cwl b/cwls/tools/run-cgprna_star-map.cwl
index 26f56cb..58738ab 100644
--- a/cwls/tools/run-cgprna_star-map.cwl
+++ b/cwls/tools/run-cgprna_star-map.cwl
@@ -20,7 +20,7 @@ doc: |
 
 requirements:
   - class: DockerRequirement
-    dockerPull: "quay.io/wtsicgp/cgprna:2.4.1"
+    dockerPull: "quay.io/wtsicgp/cgprna:2.5.0"
   - class: InlineJavascriptRequirement
 
 hints:
@@ -35,10 +35,12 @@ inputs:
     type:
       type: array
       items: File
+      inputBinding:
+        itemSeparator: ' '
     inputBinding:
       prefix: --input
       separate: true
-      itemSeparator: ' '
+
 
   reference:
     type: File
diff --git a/cwls/tools/run-cgprna_stats.cwl b/cwls/tools/run-cgprna_stats.cwl
index a9d2a06..a5125f3 100644
--- a/cwls/tools/run-cgprna_stats.cwl
+++ b/cwls/tools/run-cgprna_stats.cwl
@@ -20,7 +20,7 @@ doc: |
 
 requirements:
   - class: DockerRequirement
-    dockerPull: "quay.io/wtsicgp/cgprna:2.4.1"
+    dockerPull: "quay.io/wtsicgp/cgprna:2.5.0"
 
 hints:
   - class: ResourceRequirement
diff --git a/cwls/tools/run-cgprna_tophat-fusion.cwl b/cwls/tools/run-cgprna_tophat-fusion.cwl
new file mode 100644
index 0000000..f3d2e11
--- /dev/null
+++ b/cwls/tools/run-cgprna_tophat-fusion.cwl
@@ -0,0 +1,91 @@
+#!/usr/bin/env cwl-runner
+
+class: CommandLineTool
+
+id: "run-cgprna_tophat-fusion"
+
+label: "cgpRna tophat fusion"
+
+cwlVersion: v1.0
+
+doc: |
+  ![build_status](https://quay.io/repository/wtsicgp/cgprna/status)
+  A Docker container for the cgpRna mapping flow. See the [cgpRna](https://github.com/cancerit/cgpRna) website for more information.
+
+  Please read the relevant [changes](https://github.com/cancerit/cgpRna/blob/dev/CHANGES.md) when upgrading.
+
+  Parameters for a CWL definition are generally described in a json file, but parameters can be provided on the command line.
+
+  To see the parameters descriptions please run: cwltool --tool-help path_to.cwl
+
+requirements:
+  - class: DockerRequirement
+    dockerPull: "quay.io/wtsicgp/cgprna:2.5.0"
+  - class: InlineJavascriptRequirement
+
+hints:
+  - class: ResourceRequirement
+    coresMin: 4
+    ramMin: 10000
+    outdirMin: 10000
+
+inputs:
+  reads:
+    doc: "RAW read input, can be bam files and FastQ files (optionally gzip compressed)."
+    type: 
+      type: array
+      items: File
+      inputBinding:
+        itemSeparator: ' '
+    inputBinding:
+      prefix: --input
+      separate: true
+
+  reference:
+    type: File
+    doc: "The Tophat fusion reference files bundled in a tar.gz."
+    inputBinding:
+      prefix: --reference
+
+  sample_name:
+    type: string
+    doc: "Sample name, which will used to prefix output file names and SM tag in the BAM file header."
+    inputBinding:
+      prefix: --sample-name
+      separate: true
+
+  gene_build:
+    type: string?
+    doc: "gene build folder name, if specified it'll be used to locate the folder within the reference bundle."
+    inputBinding:
+      prefix: --gene-build
+      separate: true
+
+  threads:
+    type: int?
+    doc: "Number of threads to use."
+    inputBinding:
+      prefix: --threads
+      separate: true
+
+outputs:
+  output:
+    type: File
+    outputBinding:
+      glob: $(inputs.sample_name).tophat-fusion.normals.filtered.strand.txt
+
+baseCommand: ["run-cgprna", "tophat-fusion"]
+
+$schemas:
+  - http://schema.org/docs/schema_org_rdfa.html
+
+$namespaces:
+  s: http://schema.org/
+
+s:codeRepository: https://github.com/cancerit/cgpRna
+s:license: https://spdx.org/licenses/AGPL-3.0-only
+
+s:author:
+  - class: s:Person
+    s:email: mailto:cgphelp@sanger.ac.uk
+    s:name: Yaobo Xu
diff --git a/examples/json/bamtofastq.json b/examples/json/bamtofastq.json
new file mode 100644
index 0000000..0b2737f
--- /dev/null
+++ b/examples/json/bamtofastq.json
@@ -0,0 +1,10 @@
+{
+    "in_bam": {
+        "class":"File",
+        "path": "ftp://ngs.sanger.ac.uk/production/cancer/dockstore/cgprna/small_RNAseq/small_RNAseq.bam"
+    },
+    "output_fqs": {
+        "class":"Directory",
+        "path":"/tmp/bamtofastq_matched_output/"
+    }
+}
diff --git a/examples/json/cgpRna_with_infuse.json b/examples/json/cgpRna_with_infuse.json
new file mode 100644
index 0000000..e9ce6f1
--- /dev/null
+++ b/examples/json/cgpRna_with_infuse.json
@@ -0,0 +1,120 @@
+{
+    "raw_reads": [
+        [{
+            "class":"File",
+            "path": "ftp://ngs.sanger.ac.uk/production/cancer/dockstore/cgprna/small_RNAseq/small_RNAseq.bam"
+        }],
+        [{
+            "class":"File",
+            "path": "ftp://ngs.sanger.ac.uk/production/cancer/dockstore/cgprna/part_of_EM-2_0.5.bam"
+        }]
+    ],
+    "rg_id_tags": ["1", "B"],
+    "lb_tags": ["libA", ""],
+    "ds_tags": ["a small test sample", "a sample that have a bad header"],
+    "pl_tags": ["Illumina", "Illumina"],
+    "pu_tags": ["", "PlatFormUnitB"],
+    "map_reference": {
+        "class":"File",
+        "path": "ftp://ftp.sanger.ac.uk/pub/cancer/support-files/cgpRna_container/GRCh38_full_analysis_set_plus_decoy_hla-ensembl77/star_GRCh38_full_analysis_set_plus_decoy_hla_ensembl77_ref_bundle.tar.gz"
+    },
+    "tophat_fusion_reference": {
+        "class":"File",
+        "path": "ftp://ftp.sanger.ac.uk/pub/cancer/support-files/cgpRna_container/GRCh38_full_analysis_set_plus_decoy_hla-ensembl77/tophat_GRCh38_full_analysis_set_plus_decoy_hla_ensembl77_ref_bundle.tar.gz"
+    },
+    "defuse_reference": {
+        "class":"File",
+        "path": "ftp://ftp.sanger.ac.uk/pub/cancer/support-files/cgpRna_container/GRCh38_full_analysis_set_plus_decoy_hla-ensembl77/defuse_GRCh38_full_analysis_set_plus_decoy_hla_ensembl77_ref_bundle.tar.gz"
+    },
+    "vagrent_cache": {
+        "class":"File",
+        "path": "ftp://ftp.sanger.ac.uk/pub/cancer/support-files/cgpRna_container/GRCh38_full_analysis_set_plus_decoy_hla-ensembl77/vagrent_GRCh38_ensembl77.cache.gz"
+    },
+    "sample_name": "cgpRna_test_RNAseq",
+    "stats_reference": {
+        "class":"File",
+        "path": "ftp://ftp.sanger.ac.uk/pub/cancer/support-files/cgpRna_container/GRCh38_full_analysis_set_plus_decoy_hla-ensembl77/GRCh38_RSeQC_ref.tar.gz"
+    },
+    "count_reference": {
+        "class":"File",
+        "path": "ftp://ftp.sanger.ac.uk/pub/cancer/support-files/cgpRna_container/GRCh38_full_analysis_set_plus_decoy_hla-ensembl77/GRCh38_ensembl77.gtf"
+    },
+    "bigwig_reference": {
+        "class":"File",
+        "path": "ftp://ftp.sanger.ac.uk/pub/cancer/support-files/cgpRna_container/GRCh38_full_analysis_set_plus_decoy_hla-ensembl77/GRCh38_full_analysis_set_plus_decoy_hla.fa"
+    },
+    "map_threads": 4,
+    "bigwig_threads": 4,
+    "merge_threads": 4,
+    "tophat_threads": 4,
+    "star_threads": 4,
+    "defuse_threads": 8,
+    "dup_marked_bam": {
+        "class":"File",
+        "path": "/tmp/cgpRna_test_RNAseq.bam"
+    },
+    "dup_marked_bam_md5": {
+        "class":"File",
+        "path": "/tmp/cgpRna_test_RNAseq.bam.md5"
+    },
+    "dup_marked_bam_dup_met": {
+        "class":"File",
+        "path": "/tmp/cgpRna_test_RNAseq.bam"
+    },
+    "transcriptome_lane_bams": {
+        "class":"Directory",
+        "path":"/tmp/cgpRna_test_RNAseq_lane_stats/"
+    },
+    "dup_marked_lane_bam_dup_mets": {
+        "class":"Directory",
+        "path":"/tmp/cgpRna_test_RNAseq_lane_stats/"
+    },
+    "rna_bas_files": {
+        "class":"Directory",
+        "path":"/tmp/cgpRna_test_RNAseq_lane_stats/"
+    },
+    "gene_cover_pngs": {
+        "class":"Directory",
+        "path":"/tmp/cgpRna_test_RNAseq_lane_stats/"
+    },
+    "gene_body_coverage_rscripts": {
+        "class":"Directory",
+        "path":"/tmp/cgpRna_test_RNAseq_lane_stats/"
+    },
+    "gene_body_coverage_txts": {
+        "class":"Directory",
+        "path":"/tmp/cgpRna_test_RNAseq_lane_stats/"
+    },
+    "gene_body_coverage_updated_rscripts": {
+        "class":"Directory",
+        "path":"/tmp/cgpRna_test_RNAseq_lane_stats/"
+    },
+    "read_dists": {
+        "class":"Directory",
+        "path":"/tmp/cgpRna_test_RNAseq_lane_stats/"
+    },
+    "out_bw": {
+        "class":"File",
+        "path": "/tmp/cgpRna_test_RNAseq.bw"
+    },
+    "out_count": {
+        "class":"File",
+        "path": "/tmp/cgpRna_test_RNAseq.htseq_count.txt.gz"
+    },
+    "tophat_fusions": {
+        "class":"File",
+        "path": "/tmp/tophat_fusions.filtered.txt"
+    },
+    "star_fusions": {
+        "class":"File",
+        "path": "/tmp/star_fusions.filtered.txt"
+    },
+    "defuse_fusions": {
+        "class":"File",
+        "path": "/tmp/defuse_fusions.filtered.txt"
+    },
+    "combined_fusions": {
+        "class":"File",
+        "path": "/tmp/infuse_combined_fusions.annnotated.txt"
+    }
+}
diff --git a/examples/json/cgpRna_workflow.json b/examples/json/cgpRna_workflow.json
index 5f772a8..bc6c140 100644
--- a/examples/json/cgpRna_workflow.json
+++ b/examples/json/cgpRna_workflow.json
@@ -16,20 +16,20 @@
     "pu_tags": ["", "PlatFormUnitB"],
     "map_reference": {
         "class":"File",
-        "path": "ftp://ftp.sanger.ac.uk/pub/cancer/support-files/cgpRna_container/GRCh38_full_analysis_set_plus_decoy_hla-ensembl77/cgpRna-mapRefBundle-star2.4.1c-GRCh38_full_analysis_set_plus_decoy_hla-ensembl77.tar.gz"
+        "path": "ftp://ftp.sanger.ac.uk/pub/cancer/support-files/cgpRna_container/GRCh38_full_analysis_set_plus_decoy_hla-ensembl77/star_GRCh38_full_analysis_set_plus_decoy_hla_ensembl77_ref_bundle.tar.gz"
     },
     "sample_name": "cgpRna_test_RNAseq",
     "stats_reference": {
         "class":"File",
-        "path": "ftp://ftp.sanger.ac.uk/pub/cancer/support-files/cgpRna_container/GRCh38_full_analysis_set_plus_decoy_hla-ensembl77/GRCh38_rseqc_ref.tar.gz"
+        "path": "ftp://ftp.sanger.ac.uk/pub/cancer/support-files/cgpRna_container/GRCh38_full_analysis_set_plus_decoy_hla-ensembl77/GRCh38_RSeQC_ref.tar.gz"
     },
     "count_reference": {
         "class":"File",
-        "path": "ftp://ftp.sanger.ac.uk/pub/cancer/support-files/cgpRna_container/GRCh38_full_analysis_set_plus_decoy_hla-ensembl77/ensembl.gtf"
+        "path": "ftp://ftp.sanger.ac.uk/pub/cancer/support-files/cgpRna_container/GRCh38_full_analysis_set_plus_decoy_hla-ensembl77/GRCh38_ensembl77.gtf"
     },
     "bigwig_reference": {
         "class":"File",
-        "path": "ftp://ftp.sanger.ac.uk/pub/cancer/support-files/cgpRna_container/GRCh38_full_analysis_set_plus_decoy_hla-ensembl77/genome.fa"
+        "path": "ftp://ftp.sanger.ac.uk/pub/cancer/support-files/cgpRna_container/GRCh38_full_analysis_set_plus_decoy_hla-ensembl77/GRCh38_full_analysis_set_plus_decoy_hla.fa"
     },
     "map_threads": 8,
     "bigwig_threads": 8,
diff --git a/examples/json/defuse.json b/examples/json/defuse.json
new file mode 100644
index 0000000..3188441
--- /dev/null
+++ b/examples/json/defuse.json
@@ -0,0 +1,18 @@
+{
+    "reads": [
+        {
+            "class":"File",
+            "path": "ftp://ngs.sanger.ac.uk/production/cancer/dockstore/cgprna/small_RNAseq/small_RNAseq.bam"
+        }
+    ],
+    "reference": {
+        "class":"File",
+        "path": "ftp://ftp.sanger.ac.uk/pub/cancer/support-files/cgpRna_container/GRCh38_full_analysis_set_plus_decoy_hla-ensembl77/defuse_GRCh38_full_analysis_set_plus_decoy_hla_ensembl77_ref_bundle.tar.gz"
+    },
+    "sample_name": "a_small_sample",
+    "threads": 8,
+    "output": {
+        "class":"File",
+        "path": "/tmp/defuse_fusions.filtered.txt"
+    }
+}
diff --git a/examples/json/infuse_pipeline.json b/examples/json/infuse_pipeline.json
new file mode 100644
index 0000000..bf2880f
--- /dev/null
+++ b/examples/json/infuse_pipeline.json
@@ -0,0 +1,46 @@
+{
+    "in_bam": {
+        "class":"File",
+        "path": "ftp://ngs.sanger.ac.uk/production/cancer/dockstore/cgprna/part_of_EM-2_0.5.bam"
+    },
+    "star_reference": {
+        "class":"File",
+        "path": "ftp://ftp.sanger.ac.uk/pub/cancer/support-files/cgpRna_container/GRCh38_full_analysis_set_plus_decoy_hla-ensembl77/star_GRCh38_full_analysis_set_plus_decoy_hla_ensembl77_ref_bundle.tar.gz"
+    },
+    "tophat_fusion_reference": {
+        "class":"File",
+        "path": "ftp://ftp.sanger.ac.uk/pub/cancer/support-files/cgpRna_container/GRCh38_full_analysis_set_plus_decoy_hla-ensembl77/tophat_GRCh38_full_analysis_set_plus_decoy_hla_ensembl77_ref_bundle.tar.gz"
+    },
+    "defuse_reference": {
+        "class":"File",
+        "path": "ftp://ftp.sanger.ac.uk/pub/cancer/support-files/cgpRna_container/GRCh38_full_analysis_set_plus_decoy_hla-ensembl77/defuse_GRCh38_full_analysis_set_plus_decoy_hla_ensembl77_ref_bundle.tar.gz"
+    },
+    "vagrent_cache": {
+        "class":"File",
+        "path": "ftp://ftp.sanger.ac.uk/pub/cancer/support-files/cgpRna_container/GRCh38_full_analysis_set_plus_decoy_hla-ensembl77/vagrent_GRCh38_ensembl77.cache.gz"
+    },
+    "sample_name": "cgpRna_test_RNAseq",
+    "gtf": {
+        "class":"File",
+        "path": "ftp://ftp.sanger.ac.uk/pub/cancer/support-files/cgpRna_container/GRCh38_full_analysis_set_plus_decoy_hla-ensembl77/GRCh38_ensembl77.gtf"
+    },
+    "tophat_threads": 4,
+    "star_threads": 4,
+    "defuse_threads": 8,
+    "tophat_fusions": {
+        "class":"File",
+        "path": "/tmp/tophat_fusions.filtered.txt"
+    },
+    "star_fusions": {
+        "class":"File",
+        "path": "/tmp/star_fusions.filtered.txt"
+    },
+    "defuse_fusions": {
+        "class":"File",
+        "path": "/tmp/defuse_fusions.filtered.txt"
+    },
+    "combined_fusions": {
+        "class":"File",
+        "path": "/tmp/infuse_combined_fusions.annnotated.txt"
+    }
+}
diff --git a/examples/json/merge_and_mark_dups.json b/examples/json/merge_and_mark_dups.json
index edf1a54..94062c9 100644
--- a/examples/json/merge_and_mark_dups.json
+++ b/examples/json/merge_and_mark_dups.json
@@ -2,11 +2,11 @@
     "sorted_bams": [
         {
             "class":"File",
-            "path": "/home/ubuntu/data/test_cgpRna_cwls/test_bam_map/test_small_bam.bam"
+            "path": "ftp://ngs.sanger.ac.uk/production/cancer/dockstore/cgprna/part_of_EM-2_0.5.bam"
         },
         {
             "class":"File",
-            "path": "/home/ubuntu/data/test_cgpRna_cwls/test_fq_map/test_small_fq.bam"
+            "path": "ftp://ngs.sanger.ac.uk/production/cancer/dockstore/cgprna/small_RNAseq/small_RNAseq_mapped_dupMarked.bam"
         }
     ],
     "threads": 8,
diff --git a/examples/json/star_fusion.json b/examples/json/star_fusion.json
new file mode 100644
index 0000000..c3e685b
--- /dev/null
+++ b/examples/json/star_fusion.json
@@ -0,0 +1,18 @@
+{
+    "reads": [
+        {
+            "class":"File",
+            "path": "ftp://ngs.sanger.ac.uk/production/cancer/dockstore/cgprna/small_RNAseq/small_RNAseq.bam"
+        }
+    ],
+    "reference": {
+        "class":"File",
+        "path": "ftp://ftp.sanger.ac.uk/pub/cancer/support-files/cgpRna_container/GRCh38_full_analysis_set_plus_decoy_hla-ensembl77/star_GRCh38_full_analysis_set_plus_decoy_hla_ensembl77_ref_bundle.tar.gz"
+    },
+    "sample_name": "a_small_sample",
+    "threads": 8,
+    "output": {
+        "class":"File",
+        "path": "/tmp/star_fusions.filtered.txt"
+    }
+}
diff --git a/examples/json/star_map_bam.json b/examples/json/star_map_bam.json
index c7b05ee..6c75a4c 100644
--- a/examples/json/star_map_bam.json
+++ b/examples/json/star_map_bam.json
@@ -7,7 +7,7 @@
     ],
     "reference": {
         "class":"File",
-        "path": "ftp://ftp.sanger.ac.uk/pub/cancer/support-files/cgpRna_container/GRCh38_full_analysis_set_plus_decoy_hla-ensembl77/cgpRna-mapRefBundle-star2.4.1c-GRCh38_full_analysis_set_plus_decoy_hla-ensembl77.tar.gz"
+        "path": "ftp://ftp.sanger.ac.uk/pub/cancer/support-files/cgpRna_container/GRCh38_full_analysis_set_plus_decoy_hla-ensembl77/star_GRCh38_full_analysis_set_plus_decoy_hla_ensembl77_ref_bundle.tar.gz"
     },
     "sample_name": "a_small_sample",
     "star_transcriptome_bam":{
diff --git a/examples/json/star_map_fastqs.json b/examples/json/star_map_fastqs.json
index b69a96d..24e282c 100644
--- a/examples/json/star_map_fastqs.json
+++ b/examples/json/star_map_fastqs.json
@@ -11,7 +11,7 @@
     ],
     "reference": {
         "class":"File",
-        "path": "ftp://ftp.sanger.ac.uk/pub/cancer/support-files/cgpRna_container/GRCh38_full_analysis_set_plus_decoy_hla-ensembl77/cgpRna-mapRefBundle-star2.4.1c-GRCh38_full_analysis_set_plus_decoy_hla-ensembl77.tar.gz"
+        "path": "ftp://ftp.sanger.ac.uk/pub/cancer/support-files/cgpRna_container/GRCh38_full_analysis_set_plus_decoy_hla-ensembl77/star_GRCh38_full_analysis_set_plus_decoy_hla_ensembl77_ref_bundle.tar.gz"
     },
     "sample_name": "a_small_sample",
     "star_sample_bam": {
diff --git a/examples/json/tophat_fusion.json b/examples/json/tophat_fusion.json
new file mode 100644
index 0000000..86da8b0
--- /dev/null
+++ b/examples/json/tophat_fusion.json
@@ -0,0 +1,18 @@
+{
+    "reads": [
+        {
+            "class":"File",
+            "path": "ftp://ngs.sanger.ac.uk/production/cancer/dockstore/cgprna/small_RNAseq/small_RNAseq.bam"
+        }
+    ],
+    "reference": {
+        "class":"File",
+        "path": "ftp://ftp.sanger.ac.uk/pub/cancer/support-files/cgpRna_container/GRCh38_full_analysis_set_plus_decoy_hla-ensembl77/tophat_GRCh38_full_analysis_set_plus_decoy_hla_ensembl77_ref_bundle.tar.gz"
+    },
+    "sample_name": "a_small_sample",
+    "threads": 8,
+    "output": {
+        "class":"File",
+        "path": "/tmp/tophat_fusions.filtered.txt"
+    }
+}
diff --git a/perl/bin/defuse_fusion.pl b/perl/bin/defuse_fusion.pl
index 3a7da94..b808d7c 100755
--- a/perl/bin/defuse_fusion.pl
+++ b/perl/bin/defuse_fusion.pl
@@ -107,7 +107,7 @@ sub setup {
 			's|sample=s' => \$opts{'sample'},
 			'sp|species=s' => \$opts{'species'},
 			'rb|refbuild=s' => \$opts{'referencebuild'},
-			'gb|genebuild=i' => \$opts{'genebuild'},
+			'gb|genebuild=s' => \$opts{'genebuild'},
 			'r|refdataloc=s' => \$opts{'refdataloc'},
 			'n|normals=s' => \$opts{'normalfusionslist'},
 			'd|defuseconfig=s' => \$opts{'defuseconfig'},
diff --git a/perl/bin/tophat_fusion.pl b/perl/bin/tophat_fusion.pl
index a1cade3..1f9a32c 100755
--- a/perl/bin/tophat_fusion.pl
+++ b/perl/bin/tophat_fusion.pl
@@ -114,7 +114,7 @@ sub setup {
 			'sp|species=s' => \$opts{'species'},
 			'l|librarytype=s' => \$opts{'librarytype'},
 			'rb|refbuild=s' => \$opts{'referencebuild'},
-			'gb|genebuild=i' => \$opts{'genebuild'},
+			'gb|genebuild=s' => \$opts{'genebuild'},
 			'b|bowtie=i' => \$opts{'bowtieversion'},
 			'r|refdataloc=s' => \$opts{'refdataloc'},
 			'ri|refindex=s' => \$opts{'referenceindex'},
diff --git a/perl/lib/Sanger/CGP/CgpRna.pm b/perl/lib/Sanger/CGP/CgpRna.pm
index 2218fda..88a83cd 100755
--- a/perl/lib/Sanger/CGP/CgpRna.pm
+++ b/perl/lib/Sanger/CGP/CgpRna.pm
@@ -36,7 +36,7 @@ use strict;
 use Const::Fast qw(const);
 use base 'Exporter';
 
-our $VERSION = '2.4.1';
+our $VERSION = '2.5.0';
 our @EXPORT = qw($VERSION);
 
 1;
diff --git a/run-cgprna/run_cgprna/command_line.py b/run-cgprna/run_cgprna/command_line.py
index 73f0693..c41aca5 100644
--- a/run-cgprna/run_cgprna/command_line.py
+++ b/run-cgprna/run_cgprna/command_line.py
@@ -10,6 +10,7 @@
 from .mapping_stats import generate_stats
 from .htseq_count import count
 from .bigwig import generate_bigwig
+from .fusion_tools import tophat_fusion, star_fusion, defuse
 
 version = pkg_resources.require("run_cgprna")[0].version
 
@@ -52,22 +53,22 @@ def main():
     parser_a.add_argument(
         '-sp', '--species', dest='species',
         metavar='STR',
-        help='Species name. No need to set if using a pre-built reference bundle. If using a folder as the reference, it\' be used to locate reference files.',
+        help='Species name. No need to set if using a pre-built reference bundle. If using a folder path as the value of \'--reference\', it\' be used to locate reference files.',
         required=False)
     parser_a.add_argument(
         '-rb', '--reference-build', dest='ref_build',
         metavar='STR',
-        help='Reference build name. No need to set if using a pre-built reference bundle. If using a folder as the reference, it\' be used to locate reference files.',
+        help='Reference build name. No need to set if using a pre-built reference bundle. If using a folder path as the value of \'--reference\', it\' be used to locate reference files.',
         required=False)
     parser_a.add_argument(
         '-gb', '--gene-build', dest='gene_build',
         metavar='STR',
-        help='Gene build name. No need to set if using a pre-built reference bundle. If using a folder as the reference, it\' be used to locate for GTF file.',
+        help='Gene build name. No need to set if using a pre-built reference bundle. If using a folder path as the value of \'--reference\', it\' be used to locate for GTF file.',
         required=False)
     parser_a.add_argument(
         '-gtf', '--gene-build-gtf-name', dest='gene_build_gtf_name',
         metavar='STR',
-        help='File name of the gene build file. No need to set if using a pre-built reference bundle. If using a folder as the reference, it\' be used to locate the GTF file.',
+        help='File name of the gene build annotaion file. No need to set if using a pre-built reference bundle. If using a folder path as the value of \'--reference\', it\' be used to locate the GTF file.',
         required=False)
     parser_a.add_argument(
         '-od', '--output-directory', dest='out_dir',
@@ -111,7 +112,7 @@ def main():
         required=False)
     parser_a.set_defaults(func=map_seq_files)
 
-    # create the parser for the "stats" command
+    # create the parser for "stats" command
     parser_b = subparsers.add_parser(
         'stats',
         parents=[common_parser],
@@ -138,7 +139,7 @@ def main():
         required=False)
     parser_b.set_defaults(func=generate_stats)
 
-    # create the parser for the "bigwig" command
+    # create the parser for "bigwig" command
     parser_c = subparsers.add_parser(
         'bigwig',
         parents=[common_parser],
@@ -151,7 +152,7 @@ def main():
     parser_c.add_argument(
         '-r', '--reference', dest='ref',
         metavar='FASTA_FILE',
-        help='FASTA file of a reference file, which the input BAM file was mapped to.',
+        help='FASTA file of a reference, which the input BAM file was mapped to.',
         required=True)
     parser_c.add_argument(
         '-od', '--output-directory', dest='out_dir',
@@ -165,7 +166,7 @@ def main():
         required=False)
     parser_c.set_defaults(func=generate_bigwig)
 
-    # create the parser for the "count" command
+    # create the parser for "count" command
     parser_d = subparsers.add_parser(
         'count',
         parents=[common_parser],
@@ -187,6 +188,151 @@ def main():
         required=False)
     parser_d.set_defaults(func=count)
 
+    # create the parser for "tophat_fusion" command
+    parser_e = subparsers.add_parser(
+        'tophat-fusion',
+        parents=[common_parser],
+        description='Use Tophat2 to identify gene fusion events.')
+    parser_e.add_argument(
+        '-i', '--input', dest='input',
+        metavar='FILE',
+        nargs='+',
+        help='Input files, can be BAM files or FastQ files or a mixture of both. File names of FastQ files much have suffix of "_1" or "_2" immediately prior to ".f[ast]q".',
+        required=True)
+    parser_e.add_argument(
+        '-s', '--sample-name', dest='sample_name',
+        metavar='STR',
+        help='Sample name, which will used to prefix output file names and SM tag in the BAM file header.',
+        required=True)
+    parser_e.add_argument(
+        '-r', '--reference', dest='ref',
+        metavar='TAR|PATH',
+        help='A reference bundle tar file or the path to reference root directory.',
+        required=True)
+    parser_e.add_argument(
+        '-od', '--output-directory', dest='out_dir',
+        metavar='DIR', default='.',
+        help='Output directory. Default: current directory.',
+        required=False)
+    parser_e.add_argument(
+        '-sp', '--species', dest='species',
+        metavar='STR',
+        help='Species name. No need to set if using a pre-built reference bundle. If using a folder path as the value of \'--reference\', it should be the name of an existing folder containing one or more folders named with differenct reference builds in \'--reference\' folder.',
+        required=False)
+    parser_e.add_argument(
+        '-rb', '--reference-build', dest='ref_build',
+        metavar='STR',
+        help='Reference build name. No need to set if using a pre-built reference bundle. If using a folder path as the value of \'--reference\', it should the name of an existing folder containing \'tophat\' folder for this reference build in \'--species\' folder.',
+        required=False)
+    parser_e.add_argument(
+        '-gb', '--gene-build', dest='gene_build',
+        metavar='STR',
+        help='Gene build name. No need to set if using a pre-built reference bundle. If using a folder path as the value of \'--reference\', it should be the name of an existing folder containing transcriptome index files in \'tophat\' folder.',
+        required=False)
+    parser_e.add_argument(
+        '-t', '--threads', dest='threads',
+        metavar='INT', type=int, default=1,
+        help='Number of threads to use.',
+        required=False)
+    parser_e.set_defaults(func=tophat_fusion)
+
+    # create the parser for "star_fusion" command
+    parser_f = subparsers.add_parser(
+        'star-fusion',
+        parents=[common_parser],
+        description='Use STAR to identify gene fusion events.')
+    parser_f.add_argument(
+        '-i', '--input', dest='input',
+        metavar='FILE',
+        nargs='+',
+        help='Input files, can be BAM files or FastQ files or a mixture of both. File names of FastQ files much have suffix of "_1" or "_2" immediately prior to ".f[ast]q".',
+        required=True)
+    parser_f.add_argument(
+        '-s', '--sample-name', dest='sample_name',
+        metavar='STR',
+        help='Sample name, which will used to prefix output file names and SM tag in the BAM file header.',
+        required=True)
+    parser_f.add_argument(
+        '-r', '--reference', dest='ref',
+        metavar='TAR|PATH',
+        help='A reference bundle tar file or the path to reference root directory.',
+        required=True)
+    parser_f.add_argument(
+        '-od', '--output-directory', dest='out_dir',
+        metavar='DIR', default='.',
+        help='Output directory. Default: current directory.',
+        required=False)
+    parser_f.add_argument(
+        '-sp', '--species', dest='species',
+        metavar='STR',
+        help='Species name. No need to set if using a pre-built reference bundle. If using a folder path as the value of \'--reference\', it should be the name of an existing folder containing one or more folders named with differenct reference builds in \'--reference\' folder.',
+        required=False)
+    parser_f.add_argument(
+        '-rb', '--reference-build', dest='ref_build',
+        metavar='STR',
+        help='Reference build name. No need to set if using a pre-built reference bundle. If using a folder path as the value of \'--reference\', it should the name of an existing folder containing \'star\' folder for this reference build in \'--species\' folder.',
+        required=False)
+    parser_f.add_argument(
+        '-gb', '--gene-build', dest='gene_build',
+        metavar='STR',
+        help='Gene build name. No need to set if using a pre-built reference bundle. If using a folder path as the value of \'--reference\', it should be the name of an existing folder containing a GTF file in \'star\' folder.',
+        required=False)
+    parser_f.add_argument(
+        '-t', '--threads', dest='threads',
+        metavar='INT', type=int, default=1,
+        help='Number of threads to use.',
+        required=False)
+    parser_f.set_defaults(func=star_fusion)
+
+
+    # create the parser for "defuse" command
+    parser_g = subparsers.add_parser(
+        'defuse',
+        parents=[common_parser],
+        description='Use Defuse to identify gene fusion events.')
+    parser_g.add_argument(
+        '-i', '--input', dest='input',
+        metavar='FILE',
+        nargs='+',
+        help='Input files, can be BAM files or FastQ files or a mixture of both. File names of FastQ files much have suffix of "_1" or "_2" immediately prior to ".f[ast]q".',
+        required=True)
+    parser_g.add_argument(
+        '-s', '--sample-name', dest='sample_name',
+        metavar='STR',
+        help='Sample name, which will used to prefix output file names and SM tag in the BAM file header.',
+        required=True)
+    parser_g.add_argument(
+        '-r', '--reference', dest='ref',
+        metavar='TAR|PATH',
+        help='A reference bundle tar file or the path to reference root directory.',
+        required=True)
+    parser_g.add_argument(
+        '-od', '--output-directory', dest='out_dir',
+        metavar='DIR', default='.',
+        help='Output directory. Default: current directory.',
+        required=False)
+    parser_g.add_argument(
+        '-sp', '--species', dest='species',
+        metavar='STR',
+        help='Species name. No need to set if using a pre-built reference bundle. If using a folder path as the value of \'--reference\', it should be the name of an existing folder containing one or more folders named with differenct reference builds in \'--reference\' folder.',
+        required=False)
+    parser_g.add_argument(
+        '-rb', '--reference-build', dest='ref_build',
+        metavar='STR',
+        help='Reference build name. No need to set if using a pre-built reference bundle. If using a folder path as the value of \'--reference\', it should the name of an existing folder containing \'defuse\' folder for this reference build in \'--species\' folder.',
+        required=False)
+    parser_g.add_argument(
+        '-gb', '--gene-build', dest='gene_build',
+        metavar='STR',
+        help='Gene build name. No need to set if using a pre-built reference bundle. If using a folder path as the value of \'--reference\', it should be the name of an existing folder containing all Defuse index files in \'defuse\' folder.',
+        required=False)
+    parser_g.add_argument(
+        '-t', '--threads', dest='threads',
+        metavar='INT', type=int, default=1,
+        help='Number of threads to use.',
+        required=False)
+    parser_g.set_defaults(func=defuse)
+
     args = parser.parse_args()
     if len(sys.argv) > 1:
         args.func(args)
diff --git a/run-cgprna/run_cgprna/fusion_tools.py b/run-cgprna/run_cgprna/fusion_tools.py
new file mode 100644
index 0000000..eef3632
--- /dev/null
+++ b/run-cgprna/run_cgprna/fusion_tools.py
@@ -0,0 +1,179 @@
+import os
+import sys
+import re
+import shutil
+import copy
+from string import Template
+from . import run_templates_in_shell, untar, mkdir
+import gzip
+
+BAM_TO_FASTQ = Template('bamtofastq exclude=SECONDARY,SUPPLEMENTARY T=$bam2fq_tmp S=$bam2fq_tmp_single_end O=$bam2fq_tmp_unmatched O2=$bam2fq_tmp_unmatched2 gz=1 level=1 F=$bam2fq_tmp_matched F2=$bam2fq_tmp_matched_2 filename=$in_bam')
+# Defuse and its wapper defuse_fusion.pl cannot handle gzipped file.
+BAM_TO_FASTQ_FOR_DEFUSE = Template('bamtofastq exclude=SECONDARY,SUPPLEMENTARY T=$bam2fq_tmp S=$bam2fq_tmp_single_end O=$bam2fq_tmp_unmatched O2=$bam2fq_tmp_unmatched2 F=$bam2fq_tmp_matched F2=$bam2fq_tmp_matched_2 filename=$in_bam')
+TOPHAT_FUSION = Template('tophat_fusion.pl -s $sample_name -o $out_dir -t $threads -r $reference_data_root -sp $species -rb $ref_build -gb $gene_build $input')
+STAR_FUSION = Template('star_fusion.pl -s $sample_name -o $out_dir -t $threads -r $reference_data_root -sp $species -rb $ref_build -gb $gene_build $input')
+DEFUSE_FUSION = Template('defuse_fusion.pl -s $sample_name -o $out_dir -t $threads -r $reference_data_root -sp $species -rb $ref_build -gb $gene_build $input')
+DEFUSE_FILTER = Template('defuse_filters.pl -s $sample_name -o $out_dir -i $out_dir/${sample_name}.defuse-fusion.normals.filtered.txt')
+
+NORMAL_FUSION_ARG_NAME='-normals'
+
+REF_RELATED_DEFAULTS = {
+    'species': 'unspecified_species',
+    'ref_build': 'unspecified_ref_build',
+    'gene_build': 'ensembl'
+}
+
+
+def validate_input_seq_files(file_names):
+    '''
+    Perl wrappers: tophat_fusion.pl, star_fusion.pl and defuse.pl all support multiple input files but are restricted into a single type, i.e either BAM or FastQ files. As we're converting BAMs to fastqs before passing them to the wappers, we can easily handle a mixture of BAM and FastQ files, but needs to validate the files before starting to convert BAM to FastQ, so we don't waste time and resources on splitting a BAM when some of the FastQ files are not following the Perl wrapper's file name conventions.
+    '''
+    pairs = {}
+    fq_name_pattern = re.compile(r'(.*)_([12])\.f(?:ast)?q(?:\.gz)?$')
+    for a_file in file_names:
+        if a_file.endswith('.bam'):
+            continue
+        if not os.path.exists(a_file):
+            sys.exit('Error: can not find input file: %s' % a_file)
+        a_file = os.path.abspath(a_file)
+        match = fq_name_pattern.match(os.path.basename(a_file))
+        if match.group(2):
+            # if same mate number been spotted before or already got both mates
+            pair_name, first_or_second_mate_in_a_pair = match.groups()
+            if pairs.get(pair_name, 0) == int(first_or_second_mate_in_a_pair) or pairs.get(pair_name, 0) == 3:
+                sys.exit('Error: Too many \'_%s\' mate files for prefix: %s. Possibly redundant file: %s' % (first_or_second_mate_in_a_pair, pair_name, a_file))
+            pairs[pair_name] = pairs.get(pair_name, 0) + int(first_or_second_mate_in_a_pair)
+        else:
+            sys.exit('Error: File name does not follow expected coventions: %s' % a_file)
+    for pair_name, sum_of_mate_numbers in pairs.items():
+        # if both _1 and _2 of a pair of fastq files are given, value should be 3
+        if sum_of_mate_numbers != 3:
+            if sum_of_mate_numbers == 1:
+                sys.exit('Error: Can not find second mate file of: %s' % pair_name)
+            if sum_of_mate_numbers == 2:
+                sys.exit('Error: Can not find first mate file of: %s' % pair_name)
+            sys.exit('Error: Internal code logic error, sum of first and second mate number in a pair of fastq files is %d, which is not expected and not handled well in code.' % sum_of_mate_numbers)
+
+
+# NOTE: So far only used if Defuse is given a gzipped fastq file
+def gunzip(source_file, dest_file):
+    print('unzipping %s ...' % os.path.basename(source_file), flush=True)
+    with gzip.open(source_file, 'rb') as s_file, open(dest_file, 'wb') as d_file:
+        shutil.copyfileobj(s_file, d_file, 65536)
+    print('done.', flush=True)
+
+
+def run_fusion_wrapper(args, temp_dir_name, fusion_templates, no_gzip=False):
+    # args to dict to allow updates later
+    args_dict = copy.deepcopy(vars(args))
+    # only use temp_dir when needed to extract reference files
+    temp_dir = os.path.join(os.path.abspath(args.out_dir), temp_dir_name)
+    mkdir(temp_dir)
+
+    # valide inputs before bamtofastq, otherwise it could be to late
+    validate_input_seq_files(args.input)
+
+    # prepare the output dir
+    mkdir(args.out_dir)
+
+    reference_data_root=os.path.abspath(args.ref)
+
+    # Anything not a file will be treated as a reference root folder
+    if not os.path.isfile(reference_data_root):
+        if not os.path.exists(reference_data_root):
+            sys.exit('Error: cound not locate directory: %s' % reference_data_root)
+        if any(args_dict[ele] is None for ele in REF_RELATED_DEFAULTS.keys()):
+            sys.exit(
+                'Error: missing required input. When "--reference" is not a reference bundle tar file, you have to provide: %s' % ', '.join(
+                    [ '--' + key.replace('_', '-') for key in REF_RELATED_DEFAULTS.keys() if args_dict[key] is None])
+            )
+    elif not os.path.basename(reference_data_root).endswith('.tar.gz'):
+        # check if input ref file has valid file extensions
+        sys.exit('Error: wrong input format. "--reference" can only be a tar.gz file or a folder.')
+    else:
+        reference_data_root=os.path.join(temp_dir, 'ref')
+        # set ref related args to defaults if not given
+        for arg_name in REF_RELATED_DEFAULTS.keys():
+            if args_dict[arg_name] is None:
+                print('Set "%s" to default.' % arg_name)
+                args_dict[arg_name] = REF_RELATED_DEFAULTS[arg_name]
+            if arg_name == 'gene_build':
+                print('Make sure a folder named "%s" exists in the ref bundle.' % args_dict[arg_name])
+        untar(args.ref, os.path.join(reference_data_root, args_dict['species'], args_dict['ref_build']))
+
+    input_fastqs = []
+    fq_lane_name_count = 1
+
+    # dumb Defuse perl wrapper doesn't like gzipped, so: 
+    fq_suffix, bam_to_fq_template = ('fq.gz', BAM_TO_FASTQ) if not no_gzip else ('fq', BAM_TO_FASTQ_FOR_DEFUSE)
+
+    for a_raw_file in args.input:
+        if a_raw_file.endswith('.cram'):
+            sys.exit('Error: CRAM file is not supported, please remove %s from input and retry.' % a_raw_file)
+        # if input is not a bam, assume it's a fastq.
+        if not a_raw_file.endswith('.bam'):
+            if no_gzip and a_raw_file.endswith('.gz'):
+                # dumb Defuse perl wrapper doesn't like gzipped, so:
+                unzip_to = os.path.join(
+                    temp_dir,
+                    os.path.basename(a_raw_file)[:-3]  # remove the last 3 chars from the base name, which should be '.gz' 
+                )
+                gunzip(a_raw_file, unzip_to)
+                input_fastqs.append(os.path.abspath(unzip_to))
+            else:
+                input_fastqs.append(os.path.abspath(a_raw_file))
+            continue
+
+        bam2fq_params = {
+            'bam2fq_tmp': os.path.join(temp_dir, '%s.%s' % (args.sample_name, fq_lane_name_count)),
+            'bam2fq_tmp_single_end': os.path.join(temp_dir, '%s.%s.s' % (args.sample_name, fq_lane_name_count)),
+            'bam2fq_tmp_unmatched': os.path.join(temp_dir, '%s.%s.o1' % (args.sample_name, fq_lane_name_count)),
+            'bam2fq_tmp_unmatched2': os.path.join(temp_dir, '%s.%s.o2' % (args.sample_name, fq_lane_name_count)),
+            'bam2fq_tmp_matched': os.path.join(temp_dir, '%s.%s_1.%s' % (args.sample_name, fq_lane_name_count, fq_suffix)),
+            'bam2fq_tmp_matched_2': os.path.join(temp_dir, '%s.%s_2.%s' % (args.sample_name, fq_lane_name_count, fq_suffix)),
+            'in_bam': os.path.abspath(a_raw_file)
+        }
+        run_templates_in_shell([bam_to_fq_template], bam2fq_params)
+        input_fastqs += [
+            bam2fq_params['bam2fq_tmp_matched'],
+            bam2fq_params['bam2fq_tmp_matched_2']
+        ]
+        fq_lane_name_count += 1
+
+    # gathering parameters
+    params = {
+        'sample_name': args.sample_name,
+        'input': ' '.join(input_fastqs),
+        'out_dir': os.path.abspath(args.out_dir),
+        'threads': args.threads,
+        'reference_data_root': reference_data_root,
+        'species': args_dict['species'],
+        'ref_build': args_dict['ref_build'],
+        'gene_build': args_dict['gene_build']
+    }
+
+    run_templates_in_shell(fusion_templates, params)
+
+    # clean temp dir
+    shutil.rmtree(temp_dir)
+
+
+def tophat_fusion(args):
+    '''
+    Top level entry point for running tophat_fusion on RNA-Seq data.
+    '''
+    run_fusion_wrapper(args, 'cgpRna_tophat-fusion_temp', [TOPHAT_FUSION])
+
+
+def star_fusion(args):
+    '''
+    Top level entry point for running tophat_fusion on RNA-Seq data.
+    '''
+    run_fusion_wrapper(args, 'cgpRna_star-fusion_temp', [STAR_FUSION])
+
+
+def defuse(args):
+    '''
+    Top level entry point for running tophat_fusion on RNA-Seq data.
+    '''
+    run_fusion_wrapper(args, 'cgpRna_defuse_temp', [DEFUSE_FUSION, DEFUSE_FILTER], True)
diff --git a/run-cgprna/run_cgprna/map.py b/run-cgprna/run_cgprna/map.py
index 8a902f8..579012a 100644
--- a/run-cgprna/run_cgprna/map.py
+++ b/run-cgprna/run_cgprna/map.py
@@ -3,6 +3,7 @@
 import re
 import shutil
 import fnmatch
+import copy
 from string import Template
 from . import run_templates_in_shell, untar, mkdir
 
@@ -11,25 +12,20 @@
 BAM_INDEX_TEMPLATE = Template('bamindex < $out_dir/$sample_name.star.AlignedtoTranscriptome.out.bam > $out_dir/$sample_name.star.AlignedtoTranscriptome.out.bam.bai')
 RENAME_OUTPUT_TEMPLATE = Template('mv "$out_dir/${sample_name}.$file_ext" "$out_dir/${out_file_prefix}.$file_ext"')
 
+# only because star_mapping.pl will try to find files in a particular structure
+REF_RELATED_DEFAULTS = {
+    'species': 'unspecified_species',
+    'ref_build': 'unspecified_ref_build',
+    'gene_build': 'ensembl',
+    'gene_build_gtf_name': 'ensembl.gtf'
+}
 
 def map_seq_files(args):
     '''
     Top level entry point for mapping RNA-Seq sequence files.
     '''
-    # keys should be the same as what they have in command_line.py
-    ref_related_args = {
-        '--species': args.species,
-        '--reference-build': args.ref_build,
-        '--gene-build': args.gene_build,
-        '--gene-build-gtf-name': args.gene_build_gtf_name
-    }
-
-    # only because star_mapping.pl will try to find files in a particular structure
-    ref_related_defaults = {
-        '--species': 'unspecified_species',
-        '--reference-build': 'unspecified_ref_build',
-        '--gene-build': 'unspecified_gene_build'
-    }
+    # args to dict to allow updates later
+    args_dict = copy.deepcopy(vars(args))
 
     # only use temp_dir when needed to extract reference files
     temp_dir = os.path.join(os.path.abspath(args.out_dir), 'cgpRna_map_temp')
@@ -52,18 +48,19 @@ def map_seq_files(args):
 
     reference_data_root = os.path.abspath(args.ref)
     # Anything not a file will be treated as a reference root folder
-    if not os.path.isfile(reference_data_root) and any(ele is None for ele in ref_related_args.values()):
-        sys.exit(
-            'Error: missing required input. When "--reference" is not a reference bundle tar file, you have to provide: %s' % ', '.join(
-                [ key for key, value in ref_related_args.items() if value is None])
-        )
-
+    if not os.path.isfile(reference_data_root):
+        if not os.path.exists(reference_data_root):
+            sys.exit('Error: cound not locate directory: %s' % reference_data_root)
+        if any(args_dict[ele] is None for ele in REF_RELATED_DEFAULTS.keys()):
+            sys.exit(
+                'Error: missing required input. When "--reference" is not a reference bundle tar file, you have to provide: %s' % ', '.join(
+                    [ '--' + key.replace('_', '-') for key in REF_RELATED_DEFAULTS.keys() if args_dict[key] is None])
+            )
+    elif not os.path.basename(reference_data_root).endswith('.tar.gz'):
     # check if input ref file has valid file extensions
-    if not os.path.basename(reference_data_root).endswith('.tar.gz'):
         sys.exit('Error: wrong input format. "--reference" can only be a tar.gz file or a folder.')
-
-    # If a pre-built ref bundle tar file is supplied, prepare the reference
-    if re.match(r'.*\.tar\.gz$', os.path.basename(reference_data_root)):
+    else:
+        # If a pre-built ref bundle tar file is supplied, prepare the reference
         mkdir(temp_dir)
         clean_temp = 1
 
@@ -72,35 +69,20 @@ def map_seq_files(args):
         reference_data_root=os.path.join(temp_dir, ref_root_dir_name)
         
         # set ref related values for star_mapping.pl
-        for key,value in ref_related_args.items():
-            # GTF file name will be the same as in the bundle
-            if key!= '--gene-build-gtf-name':
-                if value is None:
-                    ref_related_args[key] = ref_related_defaults[key]
-            else:
-                if value is not None:
-                    print('Warning: provided "--gene-build-gtf-name" will be overwritten by the GTF file name in the reference bundle.')
+        for arg_name in REF_RELATED_DEFAULTS.keys():
+            if args_dict[arg_name] is None:
+                print('Set "%s" to default.' % arg_name)
+                args_dict[arg_name] = REF_RELATED_DEFAULTS[arg_name]
+            if arg_name == 'gene_build':
+                print('Make sure a folder named "%s" exists in the ref bundle.' % args_dict[arg_name])
+            elif arg_name == 'gene_build_gtf_name':
+                print('Make sure a file named: "%s" exists in the gene build folder in the bundle.' % args_dict[arg_name])
 
         # make the folder structure
-        bundle_decompress_path = os.path.join(temp_dir, ref_root_dir_name, ref_related_args['--species'], ref_related_args['--reference-build'], 'star')
-        final_gtf_folder = os.path.join(bundle_decompress_path, ref_related_args['--gene-build'])
-        mkdir(final_gtf_folder)
-
+        bundle_decompress_path = os.path.join(temp_dir, ref_root_dir_name, args_dict['species'], args_dict['ref_build'])
         # dump reference bundle
         untar(args.ref, bundle_decompress_path)
 
-        # find the GTF file
-        gtfs = find('*.gtf', bundle_decompress_path)
-        if len(gtfs) == 1:
-            ref_related_args['--gene-build-gtf-name'] = os.path.basename(gtfs[0])
-            # link the file to final_gtf_folder
-            os.symlink(
-                gtfs[0],
-                os.path.join(final_gtf_folder, ref_related_args['--gene-build-gtf-name'])
-            )
-        else:
-            sys.exit('Error: none or too many GTF files in refence bundle. Found GTF(s): %s' % ','.join(gtfs))
-
     # gathering parameters
     params = {
         **vars(args),
@@ -108,10 +90,10 @@ def map_seq_files(args):
         'reference_data_root': reference_data_root,
         'other_options': ' '.join(other_options),
         'out_dir': os.path.abspath(args.out_dir),  # overwrite the value in args with absolute path
-        'species': ref_related_args['--species'],  # overwrite the value in args
-        'ref_build': ref_related_args['--reference-build'],  # overwrite the value in args
-        'gene_build': ref_related_args['--gene-build'],  # overwrite the value in args
-        'gene_build_gtf_name': ref_related_args['--gene-build-gtf-name']  # overwrite the value in args
+        'species': args_dict['species'],  # overwrite the value in args
+        'ref_build': args_dict['ref_build'],  # overwrite the value in args
+        'gene_build': args_dict['gene_build'],  # overwrite the value in args
+        'gene_build_gtf_name': args_dict['gene_build_gtf_name']  # overwrite the value in args
     }
 
     run_templates_in_shell(
@@ -148,11 +130,3 @@ def map_seq_files(args):
     # clean temp dir
     if clean_temp:
         shutil.rmtree(temp_dir)
-
-
-def find(pattern, path):
-    return [
-        os.path.join(root, name)
-        for root, _, files in os.walk(path) if files
-        for name in files if fnmatch.fnmatch(name, pattern)
-    ]
diff --git a/setup.sh b/setup.sh
index 0749ab7..a4dba18 100755
--- a/setup.sh
+++ b/setup.sh
@@ -39,13 +39,13 @@ SOURCE_BOWTIE2="https://sourceforge.net/projects/bowtie-bio/files/bowtie2/2.2.3/
 VERSION_BOWTIE2="2.2.3"
 SOURCE_TOPHAT="https://github.com/cancerit/tophat/archive/v2.1.0a.tar.gz"
 SOURCE_BLASTN="ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/2.2.30/ncbi-blast-2.2.30+-x64-linux.tar.gz"
-SOURCE_DEFUSE="https://bitbucket.org/dranew/defuse/get/v0.7.0.tar.gz"
-VERSION_DEFUSE="0.7.0"
+SOURCE_DEFUSE="https://bitbucket.org/dranew/defuse/get/v0.8.2.tar.gz"
+VERSION_DEFUSE="0.8.2"
 SOURCE_GMAP="http://research-pub.gene.com/gmap/src/gmap-gsnap-2015-09-10.tar.gz"
 SOURCE_BLAT="https://hgwdev.gi.ucsc.edu/~kent/src/blatSrc35.zip"
 SOURCE_FATOTWOBIT="http://hgdownload.soe.ucsc.edu/admin/exe/linux.x86_64/faToTwoBit"
 SOURCE_BEDTOOLS="https://github.com/arq5x/bedtools2/releases/download/v2.21.0/bedtools-2.21.0.tar.gz"
-RSEQC_VERSION=2.6.4
+RSEQC_VERSION=3.0.0
 HTSEQ_VERSION=0.7.2
 
 done_message () {
@@ -142,7 +142,7 @@ export PERL5LIB="$PERLROOT"
 
 # Set PYTHONPATH as well so that RSeQC can be installed
 unset PYTHONPATH
-PYTHONROOT=$INST_PATH/lib/python2.7/site-packages
+PYTHONROOT=$INST_PATH/lib/python3/site-packages
 mkdir -p $PYTHONROOT
 export PYTHONPATH="$PYTHONROOT"
 
@@ -417,7 +417,7 @@ if [ -e $SETUP_DIR/rseqc.success ]; then
 else
 (
   cd $SETUP_DIR
-  pip install --prefix $INST_PATH RSeQC==$RSEQC_VERSION
+  pip3 install --prefix $INST_PATH RSeQC==$RSEQC_VERSION
   touch $SETUP_DIR/rseqc.success
   )>>$INIT_DIR/setup.log 2>&1
 fi
@@ -430,7 +430,7 @@ if [ -e $SETUP_DIR/htseq.success ]; then
 else
 (
   cd $SETUP_DIR
-  pip install --prefix $INST_PATH HTSeq==$HTSEQ_VERSION
+  pip3 install --prefix $INST_PATH HTSeq==$HTSEQ_VERSION
   touch $SETUP_DIR/htseq.success
   )>>$INIT_DIR/setup.log 2>&1
 fi