Skip to content

Commit

Permalink
Merge pull request #260 from RoanKanninga/master
Browse files Browse the repository at this point in the history
added new script for many-to-many manual concordanceChecks
  • Loading branch information
kdelange authored Nov 30, 2023
2 parents d4a7bfd + df2b736 commit 0e96be8
Showing 1 changed file with 100 additions and 0 deletions.
100 changes: 100 additions & 0 deletions bin/concordanceCheck-ManyToManyVCFs.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
set -eu

function showHelp() {
#
# Display commandline help on STDOUT.
#
cat <<EOH
===============================================================================================================
Script to do (many-to-many) ConcordanceChecks manually.
Usage:
$(basename $0) OPTIONS
Options:
-h Show this help.
-i folder with index samples
-c folder with the compareWith samples
-d dataType. DRAGEN or nonDRAGEN (default) DRAGEN outputdata contains only familyNumber+umcgnumber as the samplename in the header of the vcf
-w working directory (default=current dir)
===============================================================================================================
EOH
trap - EXIT
exit 0
}

while getopts "w:i:c:d:h" opt;
do
case $opt in h)showHelp;; i)indexFolder="${OPTARG}";; c)compareWithFolder="${OPTARG}";; d)data="${OPTARG}";; w)workDir="${OPTARG}";;
esac
done

if [[ -z "${indexFolder:-}" ]]
then
echo -e '\nERROR: Must specify an indexFolder!\n'

showHelp
exit 1
fi

if [[ -z "${compareWithFolder:-}" ]]
then
echo -e '\nERROR: Must specify an compareWithFolder!\n'

showHelp
exit 1
fi

if [[ -z "${data:-}" ]]
then
data="nonDRAGEN"
fi
if [[ -z "${workDir:-}" ]]
then
workDir="$(pwd)"
fi

ml CompareGenotypeCalls


tmpDir="${workDir}/tmp"
mkdir -p "${tmpDir}"
mkdir -p "${workDir}/samplesheet/"
mkdir -p "${workDir}/output"
for compare in "${compareWithFolder}/"*".gz"
do
compareWithSampleName=$(basename "${compare}")
if [[ "${data}" == 'DRAGEN' ]]
then
compareWithSampleName=$(echo "${compareWithSampleName}" | awk 'BEGIN {FS="_"}{print $1"_"$2}')
else
compareWithSampleName="${compareWithSampleName%%.*}"
fi

for index in "${indexFolder}/"*".gz"
do
indexSampleName=$(basename "${index}")
if [[ "${data}" == 'DRAGEN' ]]
then
indexSampleName=$(echo "${indexSampleName}" | awk 'BEGIN {FS="_"}{print $1"_"$2}')
else
indexSampleName="${indexSampleName%%.*}"
fi


## create samplesheet
sampleSheet="${workDir}/samplesheet/${indexSampleName}_${compareWithSampleName}.sampleId.txt"
echo -e "data1Id\tdata2Id\tlocation1\tlocation2" > "${sampleSheet}"
echo -e "${indexSampleName}\t${compareWithSampleName}\t${index}\t${compare}" >> "${sampleSheet}"

java -XX:ParallelGCThreads=1 -Djava.io.tmpdir="${tmpDir}" -Xmx9g -jar "${EBROOTCOMPAREGENOTYPECALLS}/CompareGenotypeCalls.jar" \
-d1 "${index}" \
-D1 VCF \
-d2 "${compare}" \
-D2 VCF \
-ac \
--sampleMap "${sampleSheet}" \
-o "${workDir}/output/${indexSampleName}_${compareWithSampleName}" \
-sva

echo "${indexSampleName}_${compareWithSampleName} done: output/${indexSampleName}_${compareWithSampleName}"
done
done

0 comments on commit 0e96be8

Please sign in to comment.