-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
20 changed files
with
312 additions
and
224 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,96 @@ | ||
|
||
## SYSTEM OPTIONS | ||
export TF_BASH=bash | ||
#export TF_BASH=sbatch | ||
|
||
export THREADS=32 | ||
|
||
## SCRATCH STORAGE DIRECTORY | ||
export STORAGE_DIR=/home/vantwisk/vantwisk/fusions/seq_run | ||
[ ! -d ${STORAGE_DIR} ] && mkdir ${STORAGE_DIR} | ||
|
||
## BASE STORAGE DIRECTORIES | ||
export REF_STORAGE_DIR=${STORAGE_DIR}/ref | ||
[ ! -d ${REF_STORAGE_DIR} ] && mkdir ${REF_STORAGE_DIR} | ||
export SIM_STORAGE_DIR=${STORAGE_DIR}/sim | ||
[ ! -d ${SIM_STORAGE_DIR} ] && mkdir ${SIM_STORAGE_DIR} | ||
export RESULTS_STORAGE_DIR=${STORAGE_DIR}/results | ||
[ ! -d ${RESULTS_STORAGE_DIR} ] && mkdir ${RESULTS_STORAGE_DIR} | ||
|
||
## ALIGNMENT STORAGE DIRECTORY | ||
export ALIGNMENT_STORAGE_DIR=${STORAGE_DIR}/alignments | ||
[ ! -d ${ALIGNMENT_STORAGE_DIR} ] && mkdir ${ALIGNMENT_STORAGE_DIR} | ||
|
||
## RESULTS STORAGE DIRECTORIES | ||
export JAFFAL_STORAGE_DIR=${RESULTS_STORAGE_DIR}/jaffal | ||
[ ! -d ${JAFFAL_STORAGE_DIR} ] && mkdir ${JAFFAL_STORAGE_DIR} | ||
export LONGGF_STORAGE_DIR=${RESULTS_STORAGE_DIR}/longgf | ||
[ ! -d ${LONGGF_STORAGE_DIR} ] && mkdir ${LONGGF_STORAGE_DIR} | ||
export GENION_STORAGE_DIR=${RESULTS_STORAGE_DIR}/genion | ||
[ ! -d ${GENION_STORAGE_DIR} ] && mkdir ${GENION_STORAGE_DIR} | ||
export PBFUSION_STORAGE_DIR=${RESULTS_STORAGE_DIR}/pbfusion | ||
[ ! -d ${PBFUSION_STORAGE_DIR} ] && mkdir ${PBFUSION_STORAGE_DIR} | ||
export FUSIONSEEKER_STORAGE_DIR=${RESULTS_STORAGE_DIR}/fusionseeker | ||
[ ! -d ${FUSIONSEEKER_STORAGE_DIR} ] && mkdir ${FUSIONSEEKER_STORAGE_DIR} | ||
|
||
export ARRIBA_STORAGE_DIR=${RESULTS_STORAGE_DIR}/arriba | ||
[ ! -d ${ARRIBA_STORAGE_DIR} ] && mkdir ${ARRIBA_STORAGE_DIR} | ||
export STARFUSION_STORAGE_DIR=${RESULTS_STORAGE_DIR}/starfusion | ||
[ ! -d ${STARFUSION_STORAGE_DIR} ] && mkdir ${STARFUSION_STORAGE_DIR} | ||
|
||
## GRAPH STORAGE DIRECTORIES | ||
export GRAPHS_STORAGE_DIR=${RESULTS_STORAGE_DIR}/graphs | ||
[ ! -d ${GRAPHS_STORAGE_DIR} ] && mkdir ${GRAPHS_STORAGE_DIR} | ||
|
||
## RESOURCES | ||
export DNA_REFERENCE=${REF_STORAGE_DIR}/Homo_sapiens.GRCh38.dna.primary_assembly.fa | ||
export CDNA_REFERENCE=${REF_STORAGE_DIR}/Homo_sapiens.GRCh38.cdna.all.fa | ||
export GTF_REFERENCE=${REF_STORAGE_DIR}/Homo_sapiens.GRCh38.105.gtf | ||
|
||
export PBMM2_MMI=${REF_STORAGE_DIR}/hg38_gencode.mmi | ||
|
||
export STAR_INDEX=${REF_STORAGE_DIR}/hg38_star_index | ||
|
||
export GENOMIC_SUPER_DUPS=${REF_STORAGE_DIR}/genomicSuperDups.txt | ||
|
||
## Annotion Limit Settings | ||
export TRANSCRIPT_LIMIT=1000 | ||
export TRANSCRIPT_LIMITED_FILE=${REF_STORAGE_DIR}/Homo_sapiens.GRCh38.cdna.limited_${TRANSCRIPT_LIMIT}.fa | ||
|
||
## FUSION SIMULATION SETTINGS | ||
export NFUSIONS=100 | ||
export FUSIM_FASTA_FILE=${REF_STORAGE_DIR}/fusim_${NFUSIONS}.fasta | ||
export FUSIM_TXT_FILE=${REF_STORAGE_DIR}/fusim_${NFUSIONS}.fxt | ||
export FUSION_TRANSCRIPTOME=${REF_STORAGE_DIR}/Homo_sapiens.GRCh38.cdna.limited_${TRANSCRIPT_LIMIT}_fusions_${NFUSIONS}.fa | ||
|
||
## LONGREAD AND SHORTREAD READ SIMULATION SETTINGS | ||
export N_TRANSCRIPTS=('1') #('1' '2' '4' '8' '16') | ||
export REPLICATES=1 #10 | ||
export COVERAGE=(10) #(3 5 10 30 50 100) | ||
export QUALITY=('95,100,4') #('75,90,8' '87,97,5' '95,100,4') | ||
export TECH=('pacbio2016' 'nanopore2020') | ||
export READ_LENGTHS=(100 150) | ||
|
||
## LONGGF OPTIONS | ||
export MIN_OVERLAP_LEN=100 | ||
export BIN_SIZE=50 | ||
export MIN_MAP_LENGTH=100 | ||
|
||
## JAFFAL OPTIONS | ||
|
||
## GENION OPTIONS | ||
export GENION_MIN_SUPPORT=2 | ||
|
||
## PBFUSION OPTIONS | ||
export PBFUSION_MIN_COVERAGE=2 | ||
|
||
$ART_P | ||
$RUSTYREAD_P | ||
$MINIMAP2_P | ||
$SAMTOOLS_P | ||
$JAFFA_P | ||
$GENION_P | ||
$PBFUSION_P | ||
$STAR_P | ||
$STAR_FUSION_P | ||
$ARRIBA_P |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,14 +1,24 @@ | ||
|
||
FASTA_NAME=Homo_sapiens.cdna.gtf_limited.fa | ||
if [ ! -f ${DNA_REFERENCE} ]; then | ||
wget -O ${DNA_REFERENCE}.gz http://ftp.ensembl.org/pub/release-105/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz | ||
gunzip -d ${DNA_REFERENCE}.gz | ||
fi | ||
|
||
if [ ! -f ${CDNA_REFERENCE} ]; then | ||
wget -O ${CDNA_REFERENCE}.gz http://ftp.ensembl.org/pub/release-105/fasta/homo_sapiens/cdna/Homo_sapiens.GRCh38.cdna.all.fa.gz | ||
gunzip -d ${CDNA_REFERENCE}.gz | ||
fi | ||
|
||
if [ ! -f Homo_sapiens.GRCh38.105.gtf ]; then | ||
wget http://ftp.ensembl.org/pub/release-105/gtf/homo_sapiens/Homo_sapiens.GRCh38.105.gtf.gz | ||
gunzip -d Homo_sapiens.GRCh38.105.gtf.gz | ||
if [ ! -f ${GTF_REFERENCE} ]; then | ||
wget -O ${GTF_REFERENCE}.gz http://ftp.ensembl.org/pub/release-105/gtf/homo_sapiens/Homo_sapiens.GRCh38.105.gtf.gz | ||
gunzip -d ${GTF_REFERENCE}.gz | ||
fi | ||
|
||
if [ ! -f Homo_sapiens.GRCh38.cdna.all.fa ]; then | ||
wget http://ftp.ensembl.org/pub/release-105/fasta/homo_sapiens/cdna/Homo_sapiens.GRCh38.cdna.all.fa.gz | ||
gunzip -d Homo_sapiens.GRCh38.cdna.all.fa.gz | ||
if [ ! -f ${GENOMIC_SUPER_DUPS} ]; then | ||
wget -O ${GENOMIC_SUPER_DUPS}.gz ftp://hgdownload.soe.ucsc.edu/goldenPath/hg38/database/genomicSuperDups.txt.gz | ||
gunzip -d ${GENOMIC_SUPER_DUPS}.gz | ||
fi | ||
|
||
Rscript generate_breakpoints.R Homo_sapiens.GRCh38.105.gtf Homo_sapiens.GRCh38.cdna.all.fa ${FASTA_NAME} | ||
if [ ! -f ${TRANSCRIPT_LIMITED_FILE} ]; then | ||
Rscript generate_breakpoints.R ${GTF_REFERENCE} ${CDNA_REFERENCE} ${TRANSCRIPT_LIMITED_FILE} ${TRANSCRIPT_LIMIT} | ||
fi |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,23 +1,28 @@ | ||
TRANSCRIPTOME=Homo_sapiens.GRCh38.105.cdna.gtf_confirmed_new_16k.fa | ||
FUSE_TRANSCRIPTS=training16k_fusions.fa | ||
FUSE_TRANSCRIPTOME=training16k_transcriptome.fa | ||
FUSE_META=training16k_fusions.txt | ||
NFUSIONS=100 | ||
|
||
N_TRANSCRIPTS=('1') | ||
REPLICATES=10 | ||
COVERAGE=(3 5 10 30 50 100) | ||
QUALITY=('75,90,8' '87,97,5' '95,100,4') | ||
N_TRANSCRIPTS=('1') #('1' '2' '4' '8' '16') | ||
REPLICATES=1 #10 | ||
COVERAGE=(10) #(3 5 10 30 50 100) | ||
QUALITY=('95,100,4') #('75,90,8' '87,97,5' '95,100,4') | ||
TECH=('pacbio2016' 'nanopore2020') | ||
READ_LENGTHS=(100 150) | ||
|
||
## LONGGF OPTIONS | ||
MIN_OVERLAP_LEN=100 | ||
BIN_SIZE=50 | ||
MIN_MAP_LENGTH=100 | ||
|
||
## JAFFAL OPTIONS | ||
|
||
## GENION OPTIONS | ||
GENION_MIN_SUPPORT=2 | ||
|
||
for i in $(seq 1 ${REPLICATES}); do | ||
#for i in $(seq 1 ${REPLICATES}); do | ||
for q in ${!COVERAGE[@]}; do | ||
for j in ${!QUALITY[@]}; do | ||
for k in ${!TECH[@]}; do | ||
for n in ${!N_TRANSCRIPTS[@]}; do | ||
sbatch jaffal_helper.sh ${COVERAGE[$q]} 1 1 ${i} ${QUALITY[$j]} ${TECH[$k]} ax sam ${N_TRANSCRIPTS[$n]} | ||
sbatch jaffal_helper.sh ${COVERAGE[$q]} 1 1 1 ${QUALITY[$j]} ${TECH[$k]} ax sam ${N_TRANSCRIPTS[$n]} | ||
done | ||
done | ||
done | ||
done | ||
done | ||
#done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.