-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Updated pipeline and generation files.
- Loading branch information
Daniel Joseph Van Twisk
committed
Oct 7, 2024
1 parent
09dfb4a
commit 8df3f43
Showing
20 changed files
with
448 additions
and
69 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
#!/bin/bash | ||
#SBATCH --job-name longgf | ||
#SBATCH --partition allnodes | ||
#SBATCH --time UNLIMITED | ||
#SBATCH --cpus-per-task 1 | ||
#SBATCH --mem-per-cpu 10g | ||
|
||
DATADIR_MINIMAP=/datastore/scratch/users/vantwisk/sim/longreads_training${12}k_minimap2 | ||
FUSIONSEEKER_DIR=/datastore/scratch/users/vantwisk/sim/longreads_training${12}k_fusionseeker | ||
|
||
[ ! -d ${FUSIONSEEKER_DIR} ] && mkdir ${FUSIONSEEKER_DIR} | ||
|
||
#singularity exec --pid --bind /datastore longgf_0.1.2--h05f6578_1.sif \ | ||
fusionseeker \ | ||
--tread 16 \ | ||
--bam ${DATADIR_MINIMAP}/fusions-${1}-${5}-${6}-${4}-sorted.bam \ | ||
--gtf Homo_sapiens.GRCh38.105.gtf \ | ||
--ref ../hg38.fa \ | ||
-o ${FUSIONSEEKER_DIR}/fusions-${1}-${5}-${6}-${4}-fusionseeker \ | ||
-s 2 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
|
||
TRANSCRIPTOME=Homo_sapiens.GRCh38.105.cdna.gtf_confirmed_new_16k.fa | ||
FUSE_TRANSCRIPTS=training16k_fusions.fa | ||
FUSE_TRANSCRIPTOME=training16k_transcriptome.fa | ||
FUSE_META=training16k_fusions.txt | ||
NFUSIONS=100 | ||
|
||
N_TRANSCRIPTS=('1') | ||
REPLICATES=10 | ||
COVERAGE=(3 5 10 30 50 100) | ||
QUALITY=('75,90,8' '87,97,5' '95,100,4') | ||
TECH=('pacbio2016' 'nanopore2020') | ||
|
||
READ_LENGTHS=(100 150) | ||
|
||
#module load R-4.0.3 | ||
|
||
#Rscript split_transcripts.R ${TRANSCRIPTOME} ${FUSE_TRANSCRIPTS} ${FUSE_META} ${NFUSIONS} | ||
|
||
#cp ${TRANSCRIPTOME} ${FUSE_TRANSCRIPTOME} | ||
#cat ${FUSE_TRANSCRIPTS} >> ${FUSE_TRANSCRIPTOME} | ||
|
||
#for i in $(seq 1 ${REPLICATES}); do | ||
# for q in ${!COVERAGE[@]}; do | ||
# for j in ${!QUALITY[@]}; do | ||
# for k in ${!TECH[@]}; do | ||
# for n in ${!N_TRANSCRIPTS[@]}; do | ||
# sbatch minimap2_helper.sh ${COVERAGE[$q]} 1 1 ${i} ${QUALITY[$j]} ${TECH[$k]} ax sam ${N_TRANSCRIPTS[$n]} | ||
# done | ||
# done | ||
# done | ||
# done | ||
#done | ||
|
||
#for i in $(seq 1 ${REPLICATES}); do | ||
# for q in ${!COVERAGE[@]}; do | ||
# for j in ${!QUALITY[@]}; do | ||
# for k in ${!TECH[@]}; do | ||
# for n in ${!N_TRANSCRIPTS[@]}; do | ||
# sbatch genself_helper.sh ${COVERAGE[$q]} 1 1 ${i} ${QUALITY[$j]} ${TECH[$k]} ax sam ${N_TRANSCRIPTS[$n]} | ||
# done | ||
# done | ||
# done | ||
# done | ||
#done | ||
|
||
for i in $(seq 1 ${REPLICATES}); do | ||
for q in ${!COVERAGE[@]}; do | ||
for j in ${!READ_LENGTHS[@]}; do | ||
for n in ${!N_TRANSCRIPTS[@]}; do | ||
sbatch arriba_helper.sh ${COVERAGE[$q]} 1 1 ${READ_LENGTHS[$j]} ${i} ${N_TRANSCRIPTS[$n]} | ||
done | ||
done | ||
done | ||
done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
TRANSCRIPTOME=Homo_sapiens.GRCh38.105.cdna.gtf_confirmed_new_16k.fa | ||
FUSE_TRANSCRIPTS=training16k_fusions.fa | ||
FUSE_TRANSCRIPTOME=training16k_transcriptome.fa | ||
FUSE_META=training16k_fusions.txt | ||
NFUSIONS=100 | ||
|
||
N_TRANSCRIPTS=('1') | ||
REPLICATES=10 | ||
COVERAGE=(3 5 10 30 50 100) | ||
QUALITY=('75,90,8' '87,97,5' '95,100,4') | ||
TECH=('pacbio2016' 'nanopore2020') | ||
MIN_OVERLAP_LEN=100 | ||
BIN_SIZE=50 | ||
MIN_MAP_LENGTH=100 | ||
|
||
for i in $(seq 1 ${REPLICATES}); do | ||
for q in ${!COVERAGE[@]}; do | ||
for j in ${!QUALITY[@]}; do | ||
for k in ${!TECH[@]}; do | ||
for n in ${!N_TRANSCRIPTS[@]}; do | ||
sbatch fusionseeker_helper.sh ${COVERAGE[$q]} 1 1 ${i} ${QUALITY[$j]} ${TECH[$k]} ax sam ${MIN_OVERLAP_LEN} ${BIN_SIZE} ${MIN_MAP_LENGTH} ${N_TRANSCRIPTS[$n]} | ||
done | ||
done | ||
done | ||
done | ||
done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
TRANSCRIPTOME=Homo_sapiens.GRCh38.105.cdna.gtf_confirmed_new_16k.fa | ||
FUSE_TRANSCRIPTS=training16k_fusions.fa | ||
FUSE_TRANSCRIPTOME=training16k_transcriptome.fa | ||
FUSE_META=training16k_fusions.txt | ||
NFUSIONS=100 | ||
|
||
N_TRANSCRIPTS=('1') | ||
REPLICATES=10 | ||
COVERAGE=(3 5 10 30 50 100) | ||
QUALITY=('75,90,8' '87,97,5' '95,100,4') | ||
TECH=('pacbio2016' 'nanopore2020') | ||
MIN_SUPPORT=2 | ||
|
||
for i in $(seq 1 ${REPLICATES}); do | ||
for q in ${!COVERAGE[@]}; do | ||
for j in ${!QUALITY[@]}; do | ||
for k in ${!TECH[@]}; do | ||
for n in ${!N_TRANSCRIPTS[@]}; do | ||
sbatch genion_helper.sh ${COVERAGE[$q]} 1 1 ${i} ${QUALITY[$j]} ${TECH[$k]} ax sam ${MIN_SUPPORT} ${N_TRANSCRIPTS[$n]} | ||
done | ||
done | ||
done | ||
done | ||
done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
TRANSCRIPTOME=Homo_sapiens.GRCh38.105.cdna.gtf_confirmed_new_16k.fa | ||
FUSE_TRANSCRIPTS=training16k_fusions.fa | ||
FUSE_TRANSCRIPTOME=training16k_transcriptome.fa | ||
FUSE_META=training16k_fusions.txt | ||
NFUSIONS=100 | ||
|
||
N_TRANSCRIPTS=('1') | ||
REPLICATES=10 | ||
COVERAGE=(3 5 10 30 50 100) | ||
QUALITY=('75,90,8' '87,97,5' '95,100,4') | ||
TECH=('pacbio2016' 'nanopore2020') | ||
|
||
for i in $(seq 1 ${REPLICATES}); do | ||
for q in ${!COVERAGE[@]}; do | ||
for j in ${!QUALITY[@]}; do | ||
for k in ${!TECH[@]}; do | ||
for n in ${!N_TRANSCRIPTS[@]}; do | ||
sbatch jaffal_helper.sh ${COVERAGE[$q]} 1 1 ${i} ${QUALITY[$j]} ${TECH[$k]} ax sam ${N_TRANSCRIPTS[$n]} | ||
done | ||
done | ||
done | ||
done | ||
done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
TRANSCRIPTOME=Homo_sapiens.GRCh38.105.cdna.gtf_confirmed_new_16k.fa | ||
FUSE_TRANSCRIPTS=training16k_fusions.fa | ||
FUSE_TRANSCRIPTOME=training16k_transcriptome.fa | ||
FUSE_META=training16k_fusions.txt | ||
NFUSIONS=100 | ||
|
||
N_TRANSCRIPTS=('1') | ||
REPLICATES=10 | ||
COVERAGE=(3 5 10 30 50 100) | ||
QUALITY=('75,90,8' '87,97,5' '95,100,4') | ||
TECH=('pacbio2016' 'nanopore2020') | ||
MIN_OVERLAP_LEN=100 | ||
BIN_SIZE=50 | ||
MIN_MAP_LENGTH=100 | ||
|
||
for i in $(seq 1 ${REPLICATES}); do | ||
for q in ${!COVERAGE[@]}; do | ||
for j in ${!QUALITY[@]}; do | ||
for k in ${!TECH[@]}; do | ||
for n in ${!N_TRANSCRIPTS[@]}; do | ||
sbatch longgf_helper.sh ${COVERAGE[$q]} 1 1 ${i} ${QUALITY[$j]} ${TECH[$k]} ax sam ${MIN_OVERLAP_LEN} ${BIN_SIZE} ${MIN_MAP_LENGTH} ${N_TRANSCRIPTS[$n]} | ||
done | ||
done | ||
done | ||
done | ||
done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
|
||
TRANSCRIPTOME=Homo_sapiens.GRCh38.105.cdna.gtf_confirmed_new_16k.fa | ||
FUSE_TRANSCRIPTS=training16k_fusions.fa | ||
FUSE_TRANSCRIPTOME=training16k_transcriptome.fa | ||
FUSE_META=training16k_fusions.txt | ||
NFUSIONS=100 | ||
|
||
N_TRANSCRIPTS=('1') | ||
REPLICATES=10 | ||
COVERAGE=(3 5 10 30 50 100) | ||
QUALITY=('75,90,8' '87,97,5' '95,100,4') | ||
TECH=('pacbio2016' 'nanopore2020') | ||
|
||
READ_LENGTHS=(100 150) | ||
|
||
#module load R-4.0.3 | ||
|
||
#Rscript split_transcripts.R ${TRANSCRIPTOME} ${FUSE_TRANSCRIPTS} ${FUSE_META} ${NFUSIONS} | ||
|
||
#cp ${TRANSCRIPTOME} ${FUSE_TRANSCRIPTOME} | ||
#cat ${FUSE_TRANSCRIPTS} >> ${FUSE_TRANSCRIPTOME} | ||
|
||
for i in $(seq 1 ${REPLICATES}); do | ||
for q in ${!COVERAGE[@]}; do | ||
for j in ${!QUALITY[@]}; do | ||
for k in ${!TECH[@]}; do | ||
for n in ${!N_TRANSCRIPTS[@]}; do | ||
sbatch minimap2_helper.sh ${COVERAGE[$q]} 1 1 ${i} ${QUALITY[$j]} ${TECH[$k]} ax sam ${N_TRANSCRIPTS[$n]} | ||
done | ||
done | ||
done | ||
done | ||
done | ||
|
||
#for i in $(seq 1 ${REPLICATES}); do | ||
# for q in ${!COVERAGE[@]}; do | ||
# for j in ${!QUALITY[@]}; do | ||
# for k in ${!TECH[@]}; do | ||
# for n in ${!N_TRANSCRIPTS[@]}; do | ||
# sbatch genself_helper.sh ${COVERAGE[$q]} 1 1 ${i} ${QUALITY[$j]} ${TECH[$k]} ax sam ${N_TRANSCRIPTS[$n]} | ||
# done | ||
# done | ||
# done | ||
# done | ||
#done | ||
|
||
#for i in $(seq 1 ${REPLICATES}); do | ||
# for q in ${!COVERAGE[@]}; do | ||
# for j in ${!QUALITY[@]}; do | ||
# for k in ${!TECH[@]}; do | ||
# for n in ${!N_TRANSCRIPTS[@]}; do | ||
# sbatch pbmm2_helper.sh ${COVERAGE[$q]} 1 1 ${i} ${QUALITY[$j]} ${TECH[$k]} ax sam ${N_TRANSCRIPTS[$n]} | ||
# done | ||
# done | ||
# done | ||
# done | ||
#done | ||
|
||
#for i in $(seq 1 ${REPLICATES}); do | ||
# for q in ${!COVERAGE[@]}; do | ||
# for j in ${!READ_LENGTHS[@]}; do | ||
# for n in ${!N_TRANSCRIPTS[@]}; do | ||
# sbatch star_helper2.sh ${COVERAGE[$q]} 1 1 ${READ_LENGTHS[$j]} ${i} ${N_TRANSCRIPTS[$n]} | ||
# done | ||
# done | ||
# done | ||
#done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
TRANSCRIPTOME=Homo_sapiens.GRCh38.105.cdna.gtf_confirmed_new_16k.fa | ||
FUSE_TRANSCRIPTS=training16k_fusions.fa | ||
FUSE_TRANSCRIPTOME=training16k_transcriptome.fa | ||
FUSE_META=training16k_fusions.txt | ||
NFUSIONS=100 | ||
|
||
N_TRANSCRIPTS=('1') | ||
REPLICATES=10 | ||
COVERAGE=(3 5 10 30 50 100) | ||
QUALITY=('75,90,8' '87,97,5' '95,100,4') | ||
TECH=('pacbio2016' 'nanopore2020') | ||
|
||
for i in $(seq 1 ${REPLICATES}); do | ||
for q in ${!COVERAGE[@]}; do | ||
for j in ${!QUALITY[@]}; do | ||
for k in ${!TECH[@]}; do | ||
for n in ${!N_TRANSCRIPTS[@]}; do | ||
sbatch pbfusion_helper.sh ${COVERAGE[$q]} 1 1 ${i} ${QUALITY[$j]} ${TECH[$k]} ax sam ${N_TRANSCRIPTS[$n]} | ||
done | ||
done | ||
done | ||
done | ||
done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.