diff --git a/TEannot/TEannot_AllSteps.sh b/TEannot/TEannot_AllSteps.sh index c838816..3bbfd7e 100644 --- a/TEannot/TEannot_AllSteps.sh +++ b/TEannot/TEannot_AllSteps.sh @@ -67,28 +67,29 @@ jid_step4=$(sbatch \ TEannot_Step4.sh | \ cut -d" " -f4) -#jid_step5=$(sbatch \ -# --kill-on-invalid-dep=yes \ -# --dependency=afterok:$jid_step4 \ -# TEannot_Step5.sh | \ -# cut -d" " -f4) - -#jid_step6=$(sbatch \ -# --kill-on-invalid-dep=yes \ -# --dependency=afterok:$jid_step1 \ -# TEannot_Step6.sh | \ -# cut -d" " -f4) - -#jid_step7=$(sbatch \ -# --kill-on-invalid-dep=yes \ -# --dependency=afterok:$jid_step3:$jid_step5:$jid_step6 \ -# TEannot_Step7.sh | \ -# cut -d" " -f4) - -#jid_step8=$(sbatch \ -# --kill-on-invalid-dep=yes \ -# --dependency=afterok:$jid_step7 \ -# TEannot_Step8.sh | \ -# cut -d" " -f4) +jid_step5=$(sbatch \ + --kill-on-invalid-dep=yes \ + --dependency=afterok:$jid_step4 \ + TEannot_Step5.sh | \ + cut -d" " -f4) + +jid_step6=$(sbatch \ + --kill-on-invalid-dep=yes \ + --dependency=afterok:$jid_step1 \ + --array=0-1 \ + TEannot_Step6.sh | \ + cut -d" " -f4) + +jid_step7=$(sbatch \ + --kill-on-invalid-dep=yes \ + --dependency=afterok:$jid_step3:$jid_step5:$jid_step6 \ + TEannot_Step7.sh | \ + cut -d" " -f4) + +jid_step8=$(sbatch \ + --kill-on-invalid-dep=yes \ + --dependency=afterok:$jid_step7 \ + TEannot_Step8.sh | \ + cut -d" " -f4) echo "Finished submitting all jobs at $(date)" diff --git a/TEannot/TEannot_Step5.sh b/TEannot/TEannot_Step5.sh index a4b069e..3f6e8e1 100644 --- a/TEannot/TEannot_Step5.sh +++ b/TEannot/TEannot_Step5.sh @@ -1,13 +1,13 @@ #!/bin/bash #SBATCH --nodes=1 -#SBATCH --ntasks=4 +#SBATCH --ntasks=2 #SBATCH --mem-per-cpu=2G #SBATCH --time=1-00:00:00 -#SBATCH --output=TEAnnot_step5.stdout +#SBATCH --output=TEannot-step5.stdout #SBATCH --job-name="S5_TEannot" #SBATCH -p intel module load repet/2.5 -source config.txt + TEannot.py -P $ProjectName -C TEannot.cfg -S 5 diff --git a/TEannot/TEannot_Step6.sh b/TEannot/TEannot_Step6.sh index 3d75375..17637fb 100644 --- a/TEannot/TEannot_Step6.sh +++ b/TEannot/TEannot_Step6.sh @@ -4,11 +4,43 @@ #SBATCH --ntasks=4 #SBATCH --mem-per-cpu=2G #SBATCH --time=1-00:00:00 -#SBATCH --output=TEAnnot_step6.stdout +#SBATCH --output=TEannot-step6-%a.stdout #SBATCH --job-name="S6_TEannot" #SBATCH -p intel module load repet/2.5 -source config.txt -TEannot.py -P $ProjectName -C TEannot.cfg -S 6 -b tblastx -TEannot.py -P $ProjectName -C TEannot.cfg -S 6 -b blastx + +# REPET TEannot - Step 6 +# Align RepBase databanks to genome + +LOCAL_ALIGNERS=("blastx" "tblastx") +LOCAL_ALIGNER_ABRS=("x" "tx") +LOCAL_ALIGNER=${LOCAL_ALIGNERS[$SLURM_ARRAY_TASK_ID]} +LCL_ALN=${LOCAL_ALIGNER_ABRS[$SLURM_ARRAY_TASK_ID]} + +if [ $SLURM_ARRAY_TASK_ID -eq '0' ]; then + OUT_DIR="${ProjectName}_TEdetect/bankBLRx" +elif [ $SLURM_ARRAY_TASK_ID -eq '1' ]; then + OUT_DIR="${ProjectName}_TEdetect/bankBLRtx" +else + echo "SLURM array improperly set up" + exit 1 +fi + +if [ ! -d "${ProjectName}_TEdetect/bankBLR${LCL_ALN}" ]; then + # if re-running step, drop MySQL tables + MYSQL_HOST=$(grep "repet_host" TEannot.cfg | cut -d" " -f2) + MYSQL_USER=$(grep "repet_user" TEannot.cfg | cut -d" " -f2) + MYSQL_PASS=$(grep "repet_pw" TEannot.cfg | cut -d" " -f2) + MYSQL_DB=$(grep "repet_db" TEannot.cfg | cut -d" " -f2) + + echo "DROP TABLE IF EXISTS ${ProjectName}_chk_bankBLR${LCL_ALN}_path;" \ + "DROP TABLE IF EXISTS ${ProjectName}_chr_bankBLR${LCL_ALN}_path;" \ + "DROP TABLE IF EXISTS ${ProjectName}_bankBLR${LCL_ALN}_nt_seq;" \ + "DROP TABLE IF EXISTS ${ProjectName}_bankBLR${LCL_ALN}_prot_seq;" | \ + mysql -h $MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASS $MYSQL_DB + + TEannot.py -P $ProjectName -C TEannot.cfg -S 6 -b $LOCAL_ALIGNER +else + echo "Step 6 output folder detected, skipping..." +fi diff --git a/TEannot/TEannot_Step7.sh b/TEannot/TEannot_Step7.sh index 3fce1bb..097448a 100644 --- a/TEannot/TEannot_Step7.sh +++ b/TEannot/TEannot_Step7.sh @@ -4,10 +4,10 @@ #SBATCH --ntasks=1 #SBATCH --mem-per-cpu=2G #SBATCH --time=1-00:00:00 -#SBATCH --output=TEAnnot_step7.stdout +#SBATCH --output=TEannot-step7.stdout #SBATCH --job-name="S7_TEannot" #SBATCH -p intel module load repet/2.5 -source config.txt + TEannot.py -P $ProjectName -C TEannot.cfg -S 7 diff --git a/TEannot/TEannot_Step8.sh b/TEannot/TEannot_Step8.sh index 264aec9..c3a2020 100644 --- a/TEannot/TEannot_Step8.sh +++ b/TEannot/TEannot_Step8.sh @@ -4,10 +4,13 @@ #SBATCH --ntasks=1 #SBATCH --mem-per-cpu=2G #SBATCH --time=1-00:00:00 -#SBATCH --output=TEAnnot_step8.stdout +#SBATCH --output=TEannot-step8.stdout #SBATCH --job-name="S8_TEannot" #SBATCH -p intel module load repet/2.5 -source config.txt -TEannot.py -P $ProjectName -C TEannot.cfg -S 8 -o GFF3 + +if [ ! -d "${ProjectName}_GFF3" ]; then + TEannot.py -P $ProjectName -C TEannot.cfg -S 8 -o GFF3 +fi +