Skip to content

Commit

Permalink
Merge pull request #214 from RoanKanninga/master
Browse files Browse the repository at this point in the history
bugfixing the checkValidationNGS_DNA
  • Loading branch information
BenjaminsM authored Nov 29, 2018
2 parents d39e09a + 06cda69 commit 633caad
Showing 1 changed file with 96 additions and 70 deletions.
166 changes: 96 additions & 70 deletions checkValidationNGS_DNA.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,96 +23,122 @@ EOH
exit 0
}

function doVariantEval(){

folder="${validationFolderTmp}"
output="${outputFolder}"

mkdir -p "${output}"

for i in $(ls "${folder}/"*".${inputType}")
do
name=$(basename $i ".${inputType}")
java -jar ${EBROOTGATK}/GenomeAnalysisTK.jar \
-T VariantEval \
-R /apps/data/1000G/phase1/human_g1k_v37_phiX.fasta \
-o "${output}/output.${name}.eval.grp" \
--eval "${inputFolder}/"*"${name}"*".${inputType}" \
--comp "${i}"

done

for i in $(ls "${folder}/"*".${inputType}")
do

name=$(basename "${i}" ".${inputType}")
check=$(awk '{if (NR==5){if ($11 == "100.00"){print "correct"}}}' "${output}/output.${name}.eval.grp")
if [ "${check}" == "correct" ]
then
zcat "${i}" | awk -v sample="${name}" 'BEGIN {OFS=" "}{if ($1 !~ /^#/){print sample,$1,$2,$4,$5,"FOUND BACK"}}'
else
zcat "${i}" | awk -v sample="${name}" 'BEGIN {OFS=" "}{if ($1 !~ /^#/){print sample,$1,$2,$4,$5,"Not 100% concordant!"}}'
exit 1
fi
done

}

function doComparisonFiltered (){
refCall="${1}"
if [ "${refCall}" == "referenceCall" ]
then
folder="${validationFolderTmp}/filtered/referenceCall"
output="${outputFolder}/filtered/referenceCall"
else
folder="${validationFolderTmp}/filtered/"
output="${outputFolder}/filtered/"
fi
mkdir -p "${output}"

for i in $(ls "${folder}/"*".${inputType}")
do

name=$(basename "${i}" ".${inputType}")

if [[ "${inputType}" == "vcf.gz" ]]
then
validationSample=$(zcat ${i} | grep -v '^#' | awk '{print $1"-"$2"-"$4"-"$5"-"$7}')
inputSample=$(zcat "${inputFolder}/"*"${name}"*".${inputType}" | grep 18598089 | awk '{print $1"-"$2"-"$4"-"$5"-"$7}')

if [ "${validationSample}" == "${inputSample}" ]
then
if [ "${refCall}" == "referenceCall" ]
then
zcat "${i}" | awk -v sample="${name}" 'BEGIN {OFS=" "}{if ($1 !~ /^#/){print sample,$1,$2,$4,$5,$7,"FOUND BACK,REF CALL"}}'
else
zcat "${i}" | awk -v sample="${name}" 'BEGIN {OFS=" "}{if ($1 !~ /^#/){print sample,$1,$2,$4,$5,$7,"FOUND BACK"}}'
fi
else
zcat "${i}" | awk -v sample="${name}" 'BEGIN {OFS=" "}{if ($1 !~ /^#/){print sample,$1,$2,$4,$5,$7,"Not 100% concordant!"}}'
zcat "${inputFolder}/"*"${name}"*".${inputType}" | grep 18598089
zcat "${i}"
exit 1
fi
fi
done

}


while getopts "i:o:v:t:h" opt;
do
case $opt in h)showHelp;; i)inputFolder="${OPTARG}";; o)outputFolder="${OPTARG}";; v)validationFolder="${OPTARG}";; t)inputType="${OPTARG}";;
case $opt in h)showHelp;; i)inputFolder="${OPTARG}";; o)outputFolder="${OPTARG}";; v)validationFolderPrm="${OPTARG}";; t)inputType="${OPTARG}";;
esac
done

if [[ -z "${inputFolder:-}" ]]; then showHelp ; echo "inputFolder is not specified" ; fi ; echo "inputFolder=${inputFolder}"
if [[ -z "${outputFolder:-}" ]]; then mkdir -p "${inputFolder}/output/" ; outputFolder="${inputFolder}/output/" ; fi ; echo "outputFolder=${outputFolder}"
if [[ -z "${inputType:-}" ]]; then inputType="vcf.gz" ; fi ; echo "inputType=${inputType}"
if [[ -z "${validationFolder:-}" ]]; then validationFolder="/groups/umcg-gd/prm03/projects/validationVcfs/" ; fi ; echo "validationFolder=${validationFolder}"
if [[ -z "${validationFolderPrm:-}" ]]; then validationFolderPrm="/groups/umcg-gd/prm03/projects/validationVcfs/" ; fi ; echo "validationFolderPrm=${validationFolderPrm}"

ml GATK


if [ $(hostname) != "calculon" ]
then
mkdir -p "${inputFolder}/validationVcfs/filtered"
validationFolderTmp=${inputFolder}/validationVcfs/
mkdir -p "${outputFolder}/filtered/"

echo "copying validationVcfs"
if [ -f "${inputFolder}/validationVcfs/DNA087244.${inputType}" ]
if [ -f "${validationFolderTmp}/DNA087244.${inputType}" ]
then
echo "already copied, skipped"
else
scp calculon.hpc.rug.nl:${validationFolder}/*{.gz,tbi} "${inputFolder}/validationVcfs/"
scp calculon.hpc.rug.nl:${validationFolder}/filtered/* "${inputFolder}/validationVcfs/filtered/"
scp -r calculon.hpc.rug.nl:${validationFolderPrm}/ "${validationFolderTmp}/"
#scp calculon.hpc.rug.nl:${validationFolderPrm}/referenceCall/*{.gz,tbi} "${validationFolderTmp}/referenceCall/"
#scp -r calculon.hpc.rug.nl:${validationFolderPrm}/filtered/* "${validationFolderTmp}/filtered/"
fi
validationFolder="${inputFolder}/validationVcfs/"
fi
for i in $(ls "${validationFolder}/"*".${inputType}")
do
name=$(basename $i ".${inputType}")

java -jar ${EBROOTGATK}/GenomeAnalysisTK.jar \
-T VariantEval \
-R /apps/data/1000G/phase1/human_g1k_v37_phiX.fasta \
-o "${outputFolder}/output.${name}.eval.grp" \
--eval "${inputFolder}/"*"${name}"*".${inputType}" \
--comp "${i}"

done

for i in $(ls "${validationFolder}/"*".${inputType}")
do
name=$(basename "${i}" ".${inputType}")

referenceCall=$(grep "0/0" "${i}" | wc -l)
referenceCallMale=$(grep -P "\t0:" "${i}" | wc -l)

check=$(awk '{if (NR==5){if ($11 == "100.00"){print "correct"}}}' "${outputFolder}/output.${name}.eval.grp")
if [[ "${inputType}" == "vcf.gz" ]]
then
if [ "${check}" == "correct" ]
then
zcat "${i}" | awk -v sample="${name}" 'BEGIN {OFS=" "}{if ($1 !~ /^#/){print sample,$1,$2,$4,$5,"FOUND BACK"}}'
elif [[ "${referenceCall}" -ne 0 || "${referenceCallMale}" -ne 0 ]]
then
zcat "${i}" | awk -v sample="${name}" 'BEGIN {OFS=" "}{if ($1 !~ /^#/){print sample,$1,$2,$4,$5,"FOUND BACK,REF CALL"}}'
else
zcat "${i}" | awk -v sample="${name}" 'BEGIN {OFS=" "}{if ($1 !~ /^#/){print sample,$1,$2,$4,$5,"Not 100% concordant!"}}'
exit 1
fi
elif [[ "${inputType}" == "vcf" ]]
then
if [ "${check}" == "correct" ]
then
awk -v sample="${name}" 'BEGIN {OFS=" "}{if ($1 !~ /^#/){print sample,$1,$2,$4,$5,"FOUND BACK"}}' "${i}"
elif [[ "${referenceCall}" -ne 0 || "${referenceCallMale}" -ne 0 ]]
then
awk -v sample="${name}" 'BEGIN {OFS=" "}{if ($1 !~ /^#/){print sample,$1,$2,$4,$5,"FOUND BACK,REF CALL"}}' "${i}"
else
awk -v sample="${name}" 'BEGIN {OFS=" "}{if ($1 !~ /^#/){print sample,$1,$2,$4,$5,"Not 100% concordant!"}}' "${i}"
exit 1
fi
else
validationFolderTmp=${inputFolder}/validationVcfs/
if [ -f "${validationFolderTmp}/DNA087244.${inputType}" ]
then
echo "already copied, skipped"
else
cp -r calculon.hpc.rug.nl:${validationFolderPrm}/ "${validationFolderTmp}/"
fi
done

for i in $(ls "${validationFolder}/filtered/"*".${inputType}")
do
name=$(basename "${i}" ".${inputType}")
fi

validationSample=$(zcat ${i} | grep -v '^#' | awk '{print $1"-"$2"-"$4"-"$5"-"$7}')
inputSample=$(zcat "${inputFolder}/"*"${name}"*".${inputType}" | grep 18598089 | awk '{print $1"-"$2"-"$4"-"$5"-"$7}')
doVariantEval

if [ "${validationSample}" == "${inputSample}" ]
then
zcat "${i}" | awk -v sample="${name}" 'BEGIN {OFS=" "}{if ($1 !~ /^#/){print sample,$1,$2,$4,$5,$7,"FOUND BACK"}}'
else
zcat "${i}" | awk -v sample="${name}" 'BEGIN {OFS=" "}{if ($1 !~ /^#/){print sample,$1,$2,$4,$5,$7,"Not 100% concordant!"}}'
zcat "${inputFolder}/"*"${name}"*".${inputType}" | grep 18598089
zcat "${i}"
exit 1
fi
done
doComparisonFiltered "no"
doComparisonFiltered "referenceCall"

0 comments on commit 633caad

Please sign in to comment.