From 8bf2106f7fc27649e3c96d1d5cb468ed80da3ede Mon Sep 17 00:00:00 2001 From: Travis Wrightsman Date: Fri, 7 Jul 2017 16:52:04 -0700 Subject: [PATCH] Improve TEannot steps 2, 3, and 4 job scripts --- TEannot/TEannot_AllSteps.sh | 27 +++++++++++++++++---------- TEannot/TEannot_Step2.sh | 2 ++ TEannot/TEannot_Step3.sh | 31 +++++++++++++++++++++++++++---- TEannot/TEannot_Step4.sh | 18 ++++++++++++------ 4 files changed, 58 insertions(+), 20 deletions(-) diff --git a/TEannot/TEannot_AllSteps.sh b/TEannot/TEannot_AllSteps.sh index 296aa8f..c838816 100644 --- a/TEannot/TEannot_AllSteps.sh +++ b/TEannot/TEannot_AllSteps.sh @@ -15,6 +15,7 @@ export ProjectName=$(grep "project_name" TEannot.cfg | cut -d" " -f2) # (!) modify these to your project/environment ## (only choose what REPET supports) export ALIGNERS_AVAIL="BLR+RM+CEN" +export SSR_DETECTORS_AVAIL="TRF+RMSSR" # ALIGNERS_AVAIL has to be a string because bash arrays cannot be passed # directly to SLURM jobs; so the string is split into an array here and @@ -23,6 +24,9 @@ IFS='+' read -ra ALIGNERS_AVAIL_ARRAY <<< "$ALIGNERS_AVAIL" # ${#ALIGNERS_AVAIL_ARRAY[@]} gives length of ALIGNERS_AVAIL_ARRAY array NUM_ALIGNERS=${#ALIGNERS_AVAIL_ARRAY[@]} +IFS='+' read -ra SSR_DETECTORS_AVAIL_ARRAY <<< "$SSR_DETECTORS_AVAIL" +NUM_SSR_DETECTORS=${#SSR_DETECTORS_AVAIL_ARRAY[@]} + # Clear the jobs table for the current project ## in case last run failed for some reason while sub-jobs were running MYSQL_HOST=$(grep "repet_host" TEannot.cfg | cut -d" " -f2) @@ -48,17 +52,20 @@ jid_step2=$(sbatch \ TEannot_Step2.sh | \ cut -d" " -f4) -#jid_step3=$(sbatch \ -# --kill-on-invalid-dep=yes \ -# --dependency=afterok:$jid_step2 \ -# TEannot_Step3.sh | \ -# cut -d" " -f4) +jid_step3=$(sbatch \ + --export=ProjectName,ALIGNERS_AVAIL \ + --kill-on-invalid-dep=yes \ + --dependency=afterok:$jid_step2 \ + TEannot_Step3.sh | \ + cut -d" " -f4) -#jid_step4=$(sbatch \ -# --kill-on-invalid-dep=yes \ -# --dependency=afterok:$jid_step1 \ -# TEannot_Step4.sh | \ -# cut -d" " -f4) +jid_step4=$(sbatch \ + --export=ProjectName,SSR_DETECTORS_AVAIL \ + --kill-on-invalid-dep=yes \ + --dependency=afterok:$jid_step1 \ + --array=0-$(( $NUM_SSR_DETECTORS - 1 )) \ + TEannot_Step4.sh | \ + cut -d" " -f4) #jid_step5=$(sbatch \ # --kill-on-invalid-dep=yes \ diff --git a/TEannot/TEannot_Step2.sh b/TEannot/TEannot_Step2.sh index 30a8267..407f78a 100644 --- a/TEannot/TEannot_Step2.sh +++ b/TEannot/TEannot_Step2.sh @@ -38,4 +38,6 @@ ALIGNER=${ALIGNERS_AVAIL_ARRAY[$(( $SLURM_ARRAY_TASK_ID % $NUM_ALIGNERS ))]} if [ ! -d "${OUT_DIR}/${ALIGNER}" ]; then TEannot.py -P $ProjectName -C TEannot.cfg -S 2 -a $ALIGNER $CMD_SUFFIX +else + echo "Step 2 output folder detected, skipping..." fi diff --git a/TEannot/TEannot_Step3.sh b/TEannot/TEannot_Step3.sh index 44f6061..587dc81 100644 --- a/TEannot/TEannot_Step3.sh +++ b/TEannot/TEannot_Step3.sh @@ -4,12 +4,35 @@ #SBATCH --ntasks=1 #SBATCH --mem-per-cpu=2G #SBATCH --time=1-00:00:00 -#SBATCH --output=TEAnnot_step3.stdout +#SBATCH --output=TEannot-step3.stdout #SBATCH --job-name="S3_TEannot" #SBATCH -p intel module load repet/2.5 -source config.txt -# REPET - Step 3 -TEannot.py -P $ProjectName -C TEannot.cfg -S 3 -c BLR+RM+CEN +# REPET TEannot - Step 3 +# Filter and combine HSPs from Step 2 alignment + +if [ ! -n "$ProjectName" ] || [ ! -n "$ALIGNERS_AVAIL" ]; then + echo 'One or more environment variables required by this script' \ + 'are unset. Either run this script through the scheduler script or' \ + 'set the variable(s) and use the --export option of sbatch before' \ + 'restarting.' + exit 1 +fi + +if [ ! -d "${ProjectName}_TEdetect/Comb" ]; then + # if re-running step, drop MySQL tables + MYSQL_HOST=$(grep "repet_host" TEannot.cfg | cut -d" " -f2) + MYSQL_USER=$(grep "repet_user" TEannot.cfg | cut -d" " -f2) + MYSQL_PASS=$(grep "repet_pw" TEannot.cfg | cut -d" " -f2) + MYSQL_DB=$(grep "repet_db" TEannot.cfg | cut -d" " -f2) + + echo "DROP TABLE IF EXISTS ${ProjectName}_chk_allTEs_path;" \ + "DROP TABLE IF EXISTS ${ProjectName}_chr_allTEs_path;" | \ + mysql -h $MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASS $MYSQL_DB + + TEannot.py -P $ProjectName -C TEannot.cfg -S 3 -c $ALIGNERS_AVAIL +else + echo "Step 3 output folder detected, skipping..." +fi diff --git a/TEannot/TEannot_Step4.sh b/TEannot/TEannot_Step4.sh index 53ae3dc..21523c4 100644 --- a/TEannot/TEannot_Step4.sh +++ b/TEannot/TEannot_Step4.sh @@ -4,14 +4,20 @@ #SBATCH --ntasks=1 #SBATCH --mem-per-cpu=2G #SBATCH --time=1-00:00:00 -#SBATCH --output=TEAnnot_step4.stdout +#SBATCH --output=TEannot-step4-%a.stdout #SBATCH --job-name="S4_TEannot" #SBATCH -p intel module load repet/2.5 -source config.txt -# REPET - Step 4 -TEannot.py -P $ProjectName -C TEannot.cfg -S 4 -s TRF -TEannot.py -P $ProjectName -C TEannot.cfg -S 4 -s Mreps -TEannot.py -P $ProjectName -C TEannot.cfg -S 4 -s RMSSR +# REPET TEannot - Step 4 +# Search for satellites in the genomic sequence + +IFS='+' read -ra SSR_DETECTORS_AVAIL_ARRAY <<< "${SSR_DETECTORS_AVAIL}" +SSR_DETECTOR=${SSR_DETECTORS_AVAIL_ARRAY[$SLURM_ARRAY_TASK_ID]} + +if [ ! -d "${ProjectName}_SSRdetect/${SSR_DETECTOR}" ]; then + TEannot.py -P $ProjectName -C TEannot.cfg -S 4 -s $SSR_DETECTOR +else + echo "Step 4 output folder detected, skipping..." +fi