Skip to content

Commit

Permalink
Improve TEannot steps 2, 3, and 4 job scripts
Browse files Browse the repository at this point in the history
  • Loading branch information
twrightsman committed Jul 7, 2017
1 parent 0c523a1 commit 8bf2106
Show file tree
Hide file tree
Showing 4 changed files with 58 additions and 20 deletions.
27 changes: 17 additions & 10 deletions TEannot/TEannot_AllSteps.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ export ProjectName=$(grep "project_name" TEannot.cfg | cut -d" " -f2)
# (!) modify these to your project/environment
## (only choose what REPET supports)
export ALIGNERS_AVAIL="BLR+RM+CEN"
export SSR_DETECTORS_AVAIL="TRF+RMSSR"

# ALIGNERS_AVAIL has to be a string because bash arrays cannot be passed
# directly to SLURM jobs; so the string is split into an array here and
Expand All @@ -23,6 +24,9 @@ IFS='+' read -ra ALIGNERS_AVAIL_ARRAY <<< "$ALIGNERS_AVAIL"
# ${#ALIGNERS_AVAIL_ARRAY[@]} gives length of ALIGNERS_AVAIL_ARRAY array
NUM_ALIGNERS=${#ALIGNERS_AVAIL_ARRAY[@]}

IFS='+' read -ra SSR_DETECTORS_AVAIL_ARRAY <<< "$SSR_DETECTORS_AVAIL"
NUM_SSR_DETECTORS=${#SSR_DETECTORS_AVAIL_ARRAY[@]}

# Clear the jobs table for the current project
## in case last run failed for some reason while sub-jobs were running
MYSQL_HOST=$(grep "repet_host" TEannot.cfg | cut -d" " -f2)
Expand All @@ -48,17 +52,20 @@ jid_step2=$(sbatch \
TEannot_Step2.sh | \
cut -d" " -f4)

#jid_step3=$(sbatch \
# --kill-on-invalid-dep=yes \
# --dependency=afterok:$jid_step2 \
# TEannot_Step3.sh | \
# cut -d" " -f4)
jid_step3=$(sbatch \
--export=ProjectName,ALIGNERS_AVAIL \
--kill-on-invalid-dep=yes \
--dependency=afterok:$jid_step2 \
TEannot_Step3.sh | \
cut -d" " -f4)

#jid_step4=$(sbatch \
# --kill-on-invalid-dep=yes \
# --dependency=afterok:$jid_step1 \
# TEannot_Step4.sh | \
# cut -d" " -f4)
jid_step4=$(sbatch \
--export=ProjectName,SSR_DETECTORS_AVAIL \
--kill-on-invalid-dep=yes \
--dependency=afterok:$jid_step1 \
--array=0-$(( $NUM_SSR_DETECTORS - 1 )) \
TEannot_Step4.sh | \
cut -d" " -f4)

#jid_step5=$(sbatch \
# --kill-on-invalid-dep=yes \
Expand Down
2 changes: 2 additions & 0 deletions TEannot/TEannot_Step2.sh
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,6 @@ ALIGNER=${ALIGNERS_AVAIL_ARRAY[$(( $SLURM_ARRAY_TASK_ID % $NUM_ALIGNERS ))]}

if [ ! -d "${OUT_DIR}/${ALIGNER}" ]; then
TEannot.py -P $ProjectName -C TEannot.cfg -S 2 -a $ALIGNER $CMD_SUFFIX
else
echo "Step 2 output folder detected, skipping..."
fi
31 changes: 27 additions & 4 deletions TEannot/TEannot_Step3.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,35 @@
#SBATCH --ntasks=1
#SBATCH --mem-per-cpu=2G
#SBATCH --time=1-00:00:00
#SBATCH --output=TEAnnot_step3.stdout
#SBATCH --output=TEannot-step3.stdout
#SBATCH --job-name="S3_TEannot"
#SBATCH -p intel

module load repet/2.5
source config.txt
# REPET - Step 3

TEannot.py -P $ProjectName -C TEannot.cfg -S 3 -c BLR+RM+CEN
# REPET TEannot - Step 3
# Filter and combine HSPs from Step 2 alignment

if [ ! -n "$ProjectName" ] || [ ! -n "$ALIGNERS_AVAIL" ]; then
echo 'One or more environment variables required by this script' \
'are unset. Either run this script through the scheduler script or' \
'set the variable(s) and use the --export option of sbatch before' \
'restarting.'
exit 1
fi

if [ ! -d "${ProjectName}_TEdetect/Comb" ]; then
# if re-running step, drop MySQL tables
MYSQL_HOST=$(grep "repet_host" TEannot.cfg | cut -d" " -f2)
MYSQL_USER=$(grep "repet_user" TEannot.cfg | cut -d" " -f2)
MYSQL_PASS=$(grep "repet_pw" TEannot.cfg | cut -d" " -f2)
MYSQL_DB=$(grep "repet_db" TEannot.cfg | cut -d" " -f2)

echo "DROP TABLE IF EXISTS ${ProjectName}_chk_allTEs_path;" \
"DROP TABLE IF EXISTS ${ProjectName}_chr_allTEs_path;" | \
mysql -h $MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASS $MYSQL_DB

TEannot.py -P $ProjectName -C TEannot.cfg -S 3 -c $ALIGNERS_AVAIL
else
echo "Step 3 output folder detected, skipping..."
fi
18 changes: 12 additions & 6 deletions TEannot/TEannot_Step4.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,20 @@
#SBATCH --ntasks=1
#SBATCH --mem-per-cpu=2G
#SBATCH --time=1-00:00:00
#SBATCH --output=TEAnnot_step4.stdout
#SBATCH --output=TEannot-step4-%a.stdout
#SBATCH --job-name="S4_TEannot"
#SBATCH -p intel

module load repet/2.5
source config.txt
# REPET - Step 4

TEannot.py -P $ProjectName -C TEannot.cfg -S 4 -s TRF
TEannot.py -P $ProjectName -C TEannot.cfg -S 4 -s Mreps
TEannot.py -P $ProjectName -C TEannot.cfg -S 4 -s RMSSR
# REPET TEannot - Step 4
# Search for satellites in the genomic sequence

IFS='+' read -ra SSR_DETECTORS_AVAIL_ARRAY <<< "${SSR_DETECTORS_AVAIL}"
SSR_DETECTOR=${SSR_DETECTORS_AVAIL_ARRAY[$SLURM_ARRAY_TASK_ID]}

if [ ! -d "${ProjectName}_SSRdetect/${SSR_DETECTOR}" ]; then
TEannot.py -P $ProjectName -C TEannot.cfg -S 4 -s $SSR_DETECTOR
else
echo "Step 4 output folder detected, skipping..."
fi

0 comments on commit 8bf2106

Please sign in to comment.