diff --git a/config/nxf_call_str.config b/config/nxf_call_str.config index 40b4cfb6c..a79e183da 100644 --- a/config/nxf_call_str.config +++ b/config/nxf_call_str.config @@ -2,7 +2,7 @@ includeConfig 'nxf.config' env { CMD_EXPANSIONHUNTER = "apptainer exec --no-mount home --bind \${TMPDIR} ${APPTAINER_CACHEDIR}/expansionhunter-5.0.0.sif ExpansionHunter" - CMD_STRAGLR="apptainer exec --no-mount home --bind \${TMPDIR} ${APPTAINER_CACHEDIR}/straglr-1.4.4_vip_v3.sif straglr-genotype" + CMD_STRAGLR="apptainer exec --no-mount home --bind \${TMPDIR} ${APPTAINER_CACHEDIR}/straglr-1.5.1.sif straglr.py" } params { diff --git a/config/nxf_cram.config b/config/nxf_cram.config index 0a624fcd7..e86e1cad0 100644 --- a/config/nxf_cram.config +++ b/config/nxf_cram.config @@ -6,7 +6,7 @@ includeConfig 'nxf_call_sv.config' params { cram { - call_snv = true + call_snv = false call_str = true call_sv = true call_cnv = true diff --git a/config/nxf_vcf.config b/config/nxf_vcf.config index 45bb5cb73..fae090cfb 100644 --- a/config/nxf_vcf.config +++ b/config/nxf_vcf.config @@ -8,7 +8,7 @@ env { CMD_FILTERVEP = "apptainer exec --no-mount home --bind \${TMPDIR} ${APPTAINER_CACHEDIR}/vep-111.0.sif filter_vep" CMD_STRANGER = "apptainer exec --no-mount home --bind \${TMPDIR} ${APPTAINER_CACHEDIR}/stranger-0.8.1.sif stranger" CMD_VCFREPORT="apptainer exec --no-mount home --bind \${TMPDIR} ${APPTAINER_CACHEDIR}/vcf-report-7.0.0.sif" - CMD_VCFDECISIONTREE = "apptainer exec --no-mount home --bind \${TMPDIR} ${APPTAINER_CACHEDIR}/vcf-decision-tree-4.1.1.sif" + CMD_VCFDECISIONTREE = "apptainer exec --no-mount home --bind \${TMPDIR} ${projectDir}/_dev/images/vcf-decision-tree-4.2.0-beta.sif" CMD_VCFINHERITANCEMATCHER = "apptainer exec --no-mount home --bind \${TMPDIR} ${APPTAINER_CACHEDIR}/vcf-inheritance-matcher-3.1.0.sif" // workaround for SAMtools https://github.com/samtools/samtools/issues/1366#issuecomment-769170935 @@ -33,7 +33,7 @@ process { } withLabel:'vcf_classify|vcf_classify_samples|vcf_inheritance' { - memory = '2GB' + memory = '4GB' } withLabel: 'vcf_report' { @@ -64,7 +64,8 @@ params { GRCh38 { capice_model = "${projectDir}/resources/GRCh38/capice_model_v5.1.2-v2.ubj" expansionhunter_variant_catalog = "${projectDir}/resources/GRCh38/expansionhunter_variant_catalog.json" - stranger_catalog = "${projectDir}/resources/GRCh38/variant_catalog_grch38_fixed.json" + str_catalog = "${projectDir}/resources/GRCh38/str_catalog_v1.beta.txt" + str_header = "${projectDir}/resources/GRCh38/str_header.hdr" vep_custom_phylop = "${projectDir}/resources/GRCh38/hg38.phyloP100way.bw" vep_plugin_clinvar = "${projectDir}/resources/GRCh38/clinvar_20240603_stripped.tsv.gz" vep_plugin_gnomad = "${projectDir}/resources/GRCh38/gnomad.total.v4.1.sites.stripped.tsv.gz" @@ -99,19 +100,19 @@ params { } filter { - classes = "VUS,LP,P" + classes = "VUS,LP,P,s" consequences = true } filter_samples { - classes = "U1,U2" + classes = "U1,U2,LB,VUS,LP" } report { include_crams = true max_records = "" max_samples = "" - template = "${projectDir}/resources/vip-report-template-v6.2.0.html" + template = "${projectDir}/resources/vip-report-template-v6.2.0-str-poc.html" metadata = "${projectDir}/resources/field_metadata.json" GRCh38 { diff --git a/install.sh b/install.sh index ad557f3ff..a5c4e50ca 100755 --- a/install.sh +++ b/install.sh @@ -83,11 +83,9 @@ download_files() { urls+=("0efcb85f297f08486cd01690b5f13ba0" "images/mosdepth-0.3.8.sif") urls+=("06ac8a76a307fa42fffd80ab906fd24b" "images/picard-3.1.1.sif") urls+=("9a4b685b26744113d3ea0a3904c02706" "images/samtools-1.17-patch1.sif") - urls+=("33f84edc86db09103d835748905fca25" "images/seqtk-1.4.sif") - urls+=("f4a3bef9cf6c6bc63c454c211e692b31" "images/spectre-0.2.1-patched.sif") - urls+=("8f6e06847776448e004df8b863571109" "images/straglr-1.4.4_vip_v3.sif") + urls+=("14d9cc4a5b3c6d705eb37cc1aeead9e8" "images/straglr-1.5.1.sif") urls+=("bcc157242cd9b09c66f015c52ef2d61d" "images/stranger-0.8.1.sif") - urls+=("57401e7b835fed2f52fafadc0dd744d4" "images/vcf-decision-tree-4.1.1.sif") + urls+=("177bc349312820641800fced563bddb4" "_dev/images/vcf-decision-tree-4.2.0-beta.sif") urls+=("9c4d7b48138f29651cdd45eb8d0cc4b6" "images/vcf-inheritance-matcher-3.1.0.sif") urls+=("53f9265acb2041b2b93c692177d91d74" "images/vcf-report-7.0.0.sif") urls+=("7bffc236a7c65b2b2e2e5f7d64beaa87" "images/vep-111.0.sif") @@ -146,7 +144,7 @@ download_files() { # update utils/install.sh when updating inheritance.tsv urls+=("df31eb0fe9ebd9ae26c8d6f5f7ba6e57" "resources/inheritance_20240115.tsv") urls+=("7138e76a38d6f67935699d06082ecacf" "resources/vep/cache/homo_sapiens_refseq_vep_111_GRCh38.tar.gz") - urls+=("78962f0c7c6fe5c63ef7c66b627c95a0" "resources/vip-report-template-v6.2.0.html") + urls+=("0bd1d5f66f4cf90038c73c1ba72dd820" "resources/vip-report-template-v6.2.0-str-poc.html") # when modifying urls array, please keep list in 'ls -l' order for ((i = 0; i < ${#urls[@]}; i += 2)); do download_file "${base_url}" "${urls[i+1]}" "${urls[i+0]}" "${output_dir}" "${validate}" diff --git a/modules/cram/templates/straglr_call.sh b/modules/cram/templates/straglr_call.sh index f7927ee02..a8e230b28 100644 --- a/modules/cram/templates/straglr_call.sh +++ b/modules/cram/templates/straglr_call.sh @@ -5,8 +5,6 @@ call_short_tandem_repeats () { local args=() args+=("--loci" "!{paramLoci}") args+=("--sample" "!{sampleId}") - args+=("--vcf" "straglr.vcf") - args+=("--tsv" "!{tsvOut}") if [ -z "!{sampleSex}" ]; then args+=("--sex" "!{sampleSex}") fi @@ -14,18 +12,24 @@ call_short_tandem_repeats () { args+=("--min_cluster_size" "!{paramMinClusterSize}") args+=("!{cram}") args+=("!{paramReference}") + args+=("straglr") ${CMD_STRAGLR} "${args[@]}" + + mv straglr.tsv "!{tsvOut}" } index () { - # workaround for https://github.com/molgenis/vip/issues/471 - ${CMD_BCFTOOLS} reheader --fai "!{paramReferenceFai}" --temp-prefix . --threads "!{task.cpus}" "straglr.vcf" | ${CMD_BCFTOOLS} sort --temp-dir . --max-mem "!{task.memory.toGiga() - 1}G" --output-type z --output "!{vcfOut}" + awk '/#CHROM*/{print "##INFO="}1' ./straglr.vcf |\ + awk 'BEGIN{FS=OFS="\t"} /^#/ {print; next} {$8="SVTYPE=STR;"$8; print; next;} { print; }'h > straglr_headered.vcf + ${CMD_BCFTOOLS} reheader --fai "!{paramReferenceFai}" --temp-prefix . --threads "!{task.cpus}" straglr_headered.vcf |\ + ${CMD_BCFTOOLS} sort --temp-dir . --max-mem "!{task.memory.toGiga() - 1}G" --output-type z --output "!{vcfOut}" ${CMD_BCFTOOLS} index --csi --output "!{vcfOutIndex}" --threads "!{task.cpus}" "!{vcfOut}" ${CMD_BCFTOOLS} index --stats "!{vcfOut}" > "!{vcfOutStats}" rm straglr.vcf + rm straglr_headered.vcf } main() { diff --git a/modules/vcf/annotate.nf b/modules/vcf/annotate.nf index 0952b20b7..a0d062217 100644 --- a/modules/vcf/annotate.nf +++ b/modules/vcf/annotate.nf @@ -33,7 +33,8 @@ process annotate { reMMScoresPath = params.vcf.annotate[assembly].vep_plugin_ReMM_scores capiceModelPath = params.vcf.annotate[assembly].capice_model alphScorePath = params.vcf.annotate[assembly].vep_plugin_alphscore - strangerCatalog = params.vcf.annotate[assembly].stranger_catalog + strCatalog = params.vcf.annotate[assembly].str_catalog + strHeader = params.vcf.annotate[assembly].str_header areProbandHpoIdsIndentical = areProbandHpoIdsIndentical(meta.project.samples) gadoScores = meta.gado != null ? meta.gado : "" diff --git a/modules/vcf/templates/annotate.sh b/modules/vcf/templates/annotate.sh index 63186f2c5..adaaeb68f 100644 --- a/modules/vcf/templates/annotate.sh +++ b/modules/vcf/templates/annotate.sh @@ -114,15 +114,13 @@ capice_predict() { } stranger() { - cp "!{vcfOut}" stranger_input.vcf.gz - - local args=() - args+=("--repeats-file" "!{strangerCatalog}") - args+=("--loglevel" "ERROR") - args+=("stranger_input.vcf.gz") - - ${CMD_STRANGER} "${args[@]}" | ${CMD_BCFTOOLS} view --no-version --threads "!{task.cpus}" --output-type "z" --output-file "!{vcfOut}" - rm "stranger_input.vcf.gz" + #FIXME: annotate strips all but STR's due to -i + #e.g. strip out all non-STR to separate file and merge after annotation + cp "!{vcfOut}" str_input.vcf.gz + zcat str_input.vcf.gz |\ + ${CMD_BCFTOOLS} annotate -a "!{strCatalog}" -c CHROM,FROM,TO,repeat_unit,Repeat_id,Gene_id,Disease,HGNCId,-,-,LocusStructure,NormalMax,PathogenicMin -i 'SVTYPE="STR"' -h "!{strHeader}" |\ + ${CMD_BCFTOOLS} view --no-version --threads "!{task.cpus}" --output-type "z" --output-file "!{vcfOut}" + rm "str_input.vcf.gz" } vep() { @@ -215,7 +213,7 @@ main () { fi capice vep - if [ -n "!{strangerCatalog}" ]; then + if [ -n "!{strCatalog}" ]; then stranger fi index diff --git a/resources/decision_tree_GRCh38.json b/resources/decision_tree_GRCh38.json index abade352b..cf27b871b 100644 --- a/resources/decision_tree_GRCh38.json +++ b/resources/decision_tree_GRCh38.json @@ -168,7 +168,7 @@ "nextNode": "gnomAD" }, "outcomeFalse": { - "nextNode": "exit_rm" + "nextNode": "gnomAD" } }, "sv": { @@ -191,7 +191,7 @@ "value": "STR" }, "outcomeTrue": { - "nextNode": "str_status" + "nextNode": "str_sequence" }, "outcomeFalse": { "nextNode": "annotSV" @@ -200,25 +200,21 @@ "nextNode": "annotSV" } }, - "str_status": { - "description": "Stranger str status (normal, pre_mutation, mutation)", - "type": "CATEGORICAL", - "field": "INFO/STR_STATUS", - "outcomeMap": { - "full_mutation": { - "nextNode": "exit_lp" - }, - "pre_mutation": { - "nextNode": "exit_vus" - }, - "normal": { - "nextNode": "exit_lb" - } + "str_sequence": { + "description": "STR sequence matches requested repeat unit", + "type": "BOOL", + "query": { + "field": "INFO/repeat_unit", + "operator": "eq_seq", + "value": "field:INFO/RU" + }, + "outcomeTrue": { + "nextNode": "exit_str" }, "outcomeMissing": { "nextNode": "exit_vus" }, - "outcomeDefault": { + "outcomeFalse": { "nextNode": "exit_vus" } }, @@ -430,6 +426,11 @@ "description": "Pathogenic", "type": "LEAF", "class": "P" + }, + "exit_str": { + "description": "Short Tandem Repeat - pathogenicity based on sample info", + "type": "LEAF", + "class": "s" } } } diff --git a/resources/decision_tree_samples.json b/resources/decision_tree_samples.json index 870c88283..e79b893eb 100644 --- a/resources/decision_tree_samples.json +++ b/resources/decision_tree_samples.json @@ -1,6 +1,24 @@ { - "rootNode": "gt", + "rootNode": "vipc", "nodes": { + "vipc": { + "type": "BOOL", + "description": "Genotype quality", + "query": { + "field": "INFO/CSQ/VIPC", + "operator": "==", + "value": "s" + }, + "outcomeTrue": { + "nextNode": "str_pathogenic_min" + }, + "outcomeFalse": { + "nextNode": "gt" + }, + "outcomeMissing": { + "nextNode": "gt" + } + }, "gt": { "description": "Genotype", "type": "CATEGORICAL", @@ -86,6 +104,42 @@ "nextNode": "exit_u2" } }, + "str_pathogenic_min": { + "description": "Nr of repeat units above pathogenic_min", + "type": "BOOL", + "query": { + "field": "FORMAT/AC", + "operator": "range_above", + "value": "field:INFO/PathogenicMin" + }, + "outcomeTrue": { + "nextNode": "str_pathogenic_min" + }, + "outcomeMissing": { + "nextNode": "exit_vus" + }, + "outcomeFalse": { + "nextNode": "exit_vus" + } + }, + "str_normal_max": { + "description": "Range of repeat units below normal_max", + "type": "BOOL", + "query": { + "field": "FORMAT/ACR", + "operator": "range_below", + "value": "field:INFO/NormalMax" + }, + "outcomeTrue": { + "nextNode": "exit_lb" + }, + "outcomeMissing": { + "nextNode": "exit_vus" + }, + "outcomeFalse": { + "nextNode": "exit_vus" + } + }, "exit_u1": { "description": "Usable: probably", "type": "LEAF", @@ -100,6 +154,21 @@ "description": "Usable: probably not", "type": "LEAF", "class": "U3" + }, + "exit_lb": { + "description": "STR Likely Benign", + "type": "LEAF", + "class": "LB" + }, + "exit_vus": { + "description": "STR VUS", + "type": "LEAF", + "class": "VUS" + }, + "exit_lp": { + "description": "STR Likely Pathogenic", + "type": "LEAF", + "class": "LP" } } } diff --git a/resources/field_metadata.json b/resources/field_metadata.json index 345ba6483..c5ae85ba0 100644 --- a/resources/field_metadata.json +++ b/resources/field_metadata.json @@ -16,6 +16,22 @@ "YL", "MT" ] + }, + "ACR": { + "label": "Allelic copy ranges", + "description": "Straglr: Allelic copy ranges", + "numberType": "PER_GENOTYPE", + "numberCount": 2, + "separator": "-", + "type": "FLOAT" + }, + "AC": { + "label": "Allelic copies", + "description": "Straglr: Allelic copies", + "numberType": "PER_GENOTYPE", + "numberCount": 2, + "separator": "-", + "type": "FLOAT" } }, "info": { diff --git a/utils/apptainer/build.sh b/utils/apptainer/build.sh index cfdacdbf1..70755a26d 100644 --- a/utils/apptainer/build.sh +++ b/utils/apptainer/build.sh @@ -95,8 +95,8 @@ main() { images+=("seqtk-1.4") images+=("spectre-0.2.1-patched") images+=("stranger-0.8.1") - images+=("straglr-1.4.4_vip_v3") - images+=("vcf-decision-tree-4.1.1") + images+=("straglr-1.5.1") + images+=("vcf-decision-tree-4.2.0-beta") images+=("vcf-inheritance-matcher-3.1.0") images+=("vcf-report-7.0.0") diff --git a/utils/apptainer/def/straglr-1.5.1.def b/utils/apptainer/def/straglr-1.5.1.def new file mode 100644 index 000000000..2abba167c --- /dev/null +++ b/utils/apptainer/def/straglr-1.5.1.def @@ -0,0 +1,24 @@ +Bootstrap: localimage +From: sif/build/ubuntu-22.04.sif + +%post + apt-get update + apt-get install -qq curl bzip2 gcc make zlib1g-dev libbz2-dev liblzma-dev libdeflate-dev git python3-pip python3 + + apt install ncbi-blast+ --assume-yes + + mkdir -p /opt/trf/ + curl -Ls -o /opt/trf/trf "https://github.com/Benson-Genomics-Lab/TRF/releases/download/v4.09.1/trf409.linux64" + chmod +x /opt/trf/trf + + pip install git+https://github.com/bcgsc/straglr.git@v1.5.1#egg=straglr + + # cleanup + pip cache purge + apt-get clean + +%environment + PATH=$PATH:/opt/trf + +%help + Long-read-based human genomic short tandem repeat detection. diff --git a/utils/apptainer/def/vcf-decision-tree-4.1.1.def b/utils/apptainer/def/vcf-decision-tree-4.2.0-beta.def similarity index 61% rename from utils/apptainer/def/vcf-decision-tree-4.1.1.def rename to utils/apptainer/def/vcf-decision-tree-4.2.0-beta.def index 3e9e6698d..879d4e2b8 100644 --- a/utils/apptainer/def/vcf-decision-tree-4.1.1.def +++ b/utils/apptainer/def/vcf-decision-tree-4.2.0-beta.def @@ -6,17 +6,14 @@ From: sif/build/openjdk-17.sif Usage: java -jar /opt/vcf-decision-tree/lib/vcf-decision-tree.jar %post - version_major=4 - version_minor=1 - version_patch=1 # install apk update apk add --virtual=.build-dependencies curl mkdir -p /opt/vcf-decision-tree/lib - curl -Ls -o /opt/vcf-decision-tree/lib/vcf-decision-tree.jar "https://github.com/molgenis/vip-decision-tree/releases/download/v${version_major}.${version_minor}.${version_patch}/vcf-decision-tree.jar" - echo "5c18aa3c302bbc4120c0fae50bca99828f7304837a52d96606bea0133a140780 /opt/vcf-decision-tree/lib/vcf-decision-tree.jar" | sha256sum -c + curl -Ls -o /opt/vcf-decision-tree/lib/vcf-decision-tree.jar "https://download.molgeniscloud.org/downloads/vip/_dev/github/vcf-decision-tree-4.2.0-beta.jar" + echo "52e1b8f9289c5670d9e6626cfd19a572cef4c8102654e41c7bb0763557974233 /opt/vcf-decision-tree/lib/vcf-decision-tree.jar" | sha256sum -c # cleanup apk del .build-dependencies