Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Poc/straglr #631

Draft
wants to merge 15 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion config/nxf_call_str.config
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ includeConfig 'nxf.config'

env {
CMD_EXPANSIONHUNTER = "apptainer exec --no-mount home --bind \${TMPDIR} ${APPTAINER_CACHEDIR}/expansionhunter-5.0.0.sif ExpansionHunter"
CMD_STRAGLR="apptainer exec --no-mount home --bind \${TMPDIR} ${APPTAINER_CACHEDIR}/straglr-1.4.4_vip_v3.sif straglr-genotype"
CMD_STRAGLR="apptainer exec --no-mount home --bind \${TMPDIR} ${APPTAINER_CACHEDIR}/straglr-1.5.1.sif straglr.py"
}

params {
Expand Down
2 changes: 1 addition & 1 deletion config/nxf_cram.config
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ includeConfig 'nxf_call_sv.config'

params {
cram {
call_snv = true
call_snv = false
call_str = true
call_sv = true
call_cnv = true
Expand Down
13 changes: 7 additions & 6 deletions config/nxf_vcf.config
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ env {
CMD_FILTERVEP = "apptainer exec --no-mount home --bind \${TMPDIR} ${APPTAINER_CACHEDIR}/vep-111.0.sif filter_vep"
CMD_STRANGER = "apptainer exec --no-mount home --bind \${TMPDIR} ${APPTAINER_CACHEDIR}/stranger-0.8.1.sif stranger"
CMD_VCFREPORT="apptainer exec --no-mount home --bind \${TMPDIR} ${APPTAINER_CACHEDIR}/vcf-report-7.0.0.sif"
CMD_VCFDECISIONTREE = "apptainer exec --no-mount home --bind \${TMPDIR} ${APPTAINER_CACHEDIR}/vcf-decision-tree-4.1.1.sif"
CMD_VCFDECISIONTREE = "apptainer exec --no-mount home --bind \${TMPDIR} ${projectDir}/_dev/images/vcf-decision-tree-4.2.0-beta.sif"
CMD_VCFINHERITANCEMATCHER = "apptainer exec --no-mount home --bind \${TMPDIR} ${APPTAINER_CACHEDIR}/vcf-inheritance-matcher-3.1.0.sif"

// workaround for SAMtools https://github.com/samtools/samtools/issues/1366#issuecomment-769170935
Expand All @@ -33,7 +33,7 @@ process {
}

withLabel:'vcf_classify|vcf_classify_samples|vcf_inheritance' {
memory = '2GB'
memory = '4GB'
}

withLabel: 'vcf_report' {
Expand Down Expand Up @@ -64,7 +64,8 @@ params {
GRCh38 {
capice_model = "${projectDir}/resources/GRCh38/capice_model_v5.1.2-v2.ubj"
expansionhunter_variant_catalog = "${projectDir}/resources/GRCh38/expansionhunter_variant_catalog.json"
stranger_catalog = "${projectDir}/resources/GRCh38/variant_catalog_grch38_fixed.json"
str_catalog = "${projectDir}/resources/GRCh38/str_catalog_v1.beta.txt"
str_header = "${projectDir}/resources/GRCh38/str_header.hdr"
vep_custom_phylop = "${projectDir}/resources/GRCh38/hg38.phyloP100way.bw"
vep_plugin_clinvar = "${projectDir}/resources/GRCh38/clinvar_20240603_stripped.tsv.gz"
vep_plugin_gnomad = "${projectDir}/resources/GRCh38/gnomad.total.v4.1.sites.stripped.tsv.gz"
Expand Down Expand Up @@ -99,19 +100,19 @@ params {
}

filter {
classes = "VUS,LP,P"
classes = "VUS,LP,P,s"
consequences = true
}

filter_samples {
classes = "U1,U2"
classes = "U1,U2,LB,VUS,LP"
}

report {
include_crams = true
max_records = ""
max_samples = ""
template = "${projectDir}/resources/vip-report-template-v6.2.0.html"
template = "${projectDir}/resources/vip-report-template-v6.2.0-str-poc.html"
metadata = "${projectDir}/resources/field_metadata.json"

GRCh38 {
Expand Down
8 changes: 3 additions & 5 deletions install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -83,11 +83,9 @@ download_files() {
urls+=("0efcb85f297f08486cd01690b5f13ba0" "images/mosdepth-0.3.8.sif")
urls+=("06ac8a76a307fa42fffd80ab906fd24b" "images/picard-3.1.1.sif")
urls+=("9a4b685b26744113d3ea0a3904c02706" "images/samtools-1.17-patch1.sif")
urls+=("33f84edc86db09103d835748905fca25" "images/seqtk-1.4.sif")
urls+=("f4a3bef9cf6c6bc63c454c211e692b31" "images/spectre-0.2.1-patched.sif")
urls+=("8f6e06847776448e004df8b863571109" "images/straglr-1.4.4_vip_v3.sif")
urls+=("14d9cc4a5b3c6d705eb37cc1aeead9e8" "images/straglr-1.5.1.sif")
urls+=("bcc157242cd9b09c66f015c52ef2d61d" "images/stranger-0.8.1.sif")
urls+=("57401e7b835fed2f52fafadc0dd744d4" "images/vcf-decision-tree-4.1.1.sif")
urls+=("177bc349312820641800fced563bddb4" "_dev/images/vcf-decision-tree-4.2.0-beta.sif")
urls+=("9c4d7b48138f29651cdd45eb8d0cc4b6" "images/vcf-inheritance-matcher-3.1.0.sif")
urls+=("53f9265acb2041b2b93c692177d91d74" "images/vcf-report-7.0.0.sif")
urls+=("7bffc236a7c65b2b2e2e5f7d64beaa87" "images/vep-111.0.sif")
Expand Down Expand Up @@ -146,7 +144,7 @@ download_files() {
# update utils/install.sh when updating inheritance.tsv
urls+=("df31eb0fe9ebd9ae26c8d6f5f7ba6e57" "resources/inheritance_20240115.tsv")
urls+=("7138e76a38d6f67935699d06082ecacf" "resources/vep/cache/homo_sapiens_refseq_vep_111_GRCh38.tar.gz")
urls+=("78962f0c7c6fe5c63ef7c66b627c95a0" "resources/vip-report-template-v6.2.0.html")
urls+=("0bd1d5f66f4cf90038c73c1ba72dd820" "resources/vip-report-template-v6.2.0-str-poc.html")
# when modifying urls array, please keep list in 'ls -l' order
for ((i = 0; i < ${#urls[@]}; i += 2)); do
download_file "${base_url}" "${urls[i+1]}" "${urls[i+0]}" "${output_dir}" "${validate}"
Expand Down
12 changes: 8 additions & 4 deletions modules/cram/templates/straglr_call.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,27 +5,31 @@ call_short_tandem_repeats () {
local args=()
args+=("--loci" "!{paramLoci}")
args+=("--sample" "!{sampleId}")
args+=("--vcf" "straglr.vcf")
args+=("--tsv" "!{tsvOut}")
if [ -z "!{sampleSex}" ]; then
args+=("--sex" "!{sampleSex}")
fi
args+=("--min_support" "!{paramMinSupport}")
args+=("--min_cluster_size" "!{paramMinClusterSize}")
args+=("!{cram}")
args+=("!{paramReference}")
args+=("straglr")

${CMD_STRAGLR} "${args[@]}"

mv straglr.tsv "!{tsvOut}"
}

index () {
# workaround for https://github.com/molgenis/vip/issues/471
${CMD_BCFTOOLS} reheader --fai "!{paramReferenceFai}" --temp-prefix . --threads "!{task.cpus}" "straglr.vcf" | ${CMD_BCFTOOLS} sort --temp-dir . --max-mem "!{task.memory.toGiga() - 1}G" --output-type z --output "!{vcfOut}"
awk '/#CHROM*/{print "##INFO=<ID=SVTYPE,Number=1,Type=String,Description=\"Type of structural variant\">"}1' ./straglr.vcf |\
awk 'BEGIN{FS=OFS="\t"} /^#/ {print; next} {$8="SVTYPE=STR;"$8; print; next;} { print; }'h > straglr_headered.vcf
${CMD_BCFTOOLS} reheader --fai "!{paramReferenceFai}" --temp-prefix . --threads "!{task.cpus}" straglr_headered.vcf |\
${CMD_BCFTOOLS} sort --temp-dir . --max-mem "!{task.memory.toGiga() - 1}G" --output-type z --output "!{vcfOut}"

${CMD_BCFTOOLS} index --csi --output "!{vcfOutIndex}" --threads "!{task.cpus}" "!{vcfOut}"
${CMD_BCFTOOLS} index --stats "!{vcfOut}" > "!{vcfOutStats}"

rm straglr.vcf
rm straglr_headered.vcf
}

main() {
Expand Down
3 changes: 2 additions & 1 deletion modules/vcf/annotate.nf
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@ process annotate {
reMMScoresPath = params.vcf.annotate[assembly].vep_plugin_ReMM_scores
capiceModelPath = params.vcf.annotate[assembly].capice_model
alphScorePath = params.vcf.annotate[assembly].vep_plugin_alphscore
strangerCatalog = params.vcf.annotate[assembly].stranger_catalog
strCatalog = params.vcf.annotate[assembly].str_catalog
strHeader = params.vcf.annotate[assembly].str_header

areProbandHpoIdsIndentical = areProbandHpoIdsIndentical(meta.project.samples)
gadoScores = meta.gado != null ? meta.gado : ""
Expand Down
18 changes: 8 additions & 10 deletions modules/vcf/templates/annotate.sh
Original file line number Diff line number Diff line change
Expand Up @@ -114,15 +114,13 @@ capice_predict() {
}

stranger() {
cp "!{vcfOut}" stranger_input.vcf.gz

local args=()
args+=("--repeats-file" "!{strangerCatalog}")
args+=("--loglevel" "ERROR")
args+=("stranger_input.vcf.gz")

${CMD_STRANGER} "${args[@]}" | ${CMD_BCFTOOLS} view --no-version --threads "!{task.cpus}" --output-type "z" --output-file "!{vcfOut}"
rm "stranger_input.vcf.gz"
#FIXME: annotate strips all but STR's due to -i
#e.g. strip out all non-STR to separate file and merge after annotation
cp "!{vcfOut}" str_input.vcf.gz
zcat str_input.vcf.gz |\
${CMD_BCFTOOLS} annotate -a "!{strCatalog}" -c CHROM,FROM,TO,repeat_unit,Repeat_id,Gene_id,Disease,HGNCId,-,-,LocusStructure,NormalMax,PathogenicMin -i 'SVTYPE="STR"' -h "!{strHeader}" |\
${CMD_BCFTOOLS} view --no-version --threads "!{task.cpus}" --output-type "z" --output-file "!{vcfOut}"
rm "str_input.vcf.gz"
}

vep() {
Expand Down Expand Up @@ -215,7 +213,7 @@ main () {
fi
capice
vep
if [ -n "!{strangerCatalog}" ]; then
if [ -n "!{strCatalog}" ]; then
stranger
fi
index
Expand Down
35 changes: 18 additions & 17 deletions resources/decision_tree_GRCh38.json
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@
"nextNode": "gnomAD"
},
"outcomeFalse": {
"nextNode": "exit_rm"
"nextNode": "gnomAD"
}
},
"sv": {
Expand All @@ -191,7 +191,7 @@
"value": "STR"
},
"outcomeTrue": {
"nextNode": "str_status"
"nextNode": "str_sequence"
},
"outcomeFalse": {
"nextNode": "annotSV"
Expand All @@ -200,25 +200,21 @@
"nextNode": "annotSV"
}
},
"str_status": {
"description": "Stranger str status (normal, pre_mutation, mutation)",
"type": "CATEGORICAL",
"field": "INFO/STR_STATUS",
"outcomeMap": {
"full_mutation": {
"nextNode": "exit_lp"
},
"pre_mutation": {
"nextNode": "exit_vus"
},
"normal": {
"nextNode": "exit_lb"
}
"str_sequence": {
"description": "STR sequence matches requested repeat unit",
"type": "BOOL",
"query": {
"field": "INFO/repeat_unit",
"operator": "eq_seq",
"value": "field:INFO/RU"
},
"outcomeTrue": {
"nextNode": "exit_str"
},
"outcomeMissing": {
"nextNode": "exit_vus"
},
"outcomeDefault": {
"outcomeFalse": {
"nextNode": "exit_vus"
}
},
Expand Down Expand Up @@ -430,6 +426,11 @@
"description": "Pathogenic",
"type": "LEAF",
"class": "P"
},
"exit_str": {
"description": "Short Tandem Repeat - pathogenicity based on sample info",
"type": "LEAF",
"class": "s"
}
}
}
71 changes: 70 additions & 1 deletion resources/decision_tree_samples.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,24 @@
{
"rootNode": "gt",
"rootNode": "vipc",
"nodes": {
"vipc": {
"type": "BOOL",
"description": "Genotype quality",
"query": {
"field": "INFO/CSQ/VIPC",
"operator": "==",
"value": "s"
},
"outcomeTrue": {
"nextNode": "str_pathogenic_min"
},
"outcomeFalse": {
"nextNode": "gt"
},
"outcomeMissing": {
"nextNode": "gt"
}
},
"gt": {
"description": "Genotype",
"type": "CATEGORICAL",
Expand Down Expand Up @@ -86,6 +104,42 @@
"nextNode": "exit_u2"
}
},
"str_pathogenic_min": {
"description": "Nr of repeat units above pathogenic_min",
"type": "BOOL",
"query": {
"field": "FORMAT/AC",
"operator": "range_above",
"value": "field:INFO/PathogenicMin"
},
"outcomeTrue": {
"nextNode": "str_pathogenic_min"
},
"outcomeMissing": {
"nextNode": "exit_vus"
},
"outcomeFalse": {
"nextNode": "exit_vus"
}
},
"str_normal_max": {
"description": "Range of repeat units below normal_max",
"type": "BOOL",
"query": {
"field": "FORMAT/ACR",
"operator": "range_below",
"value": "field:INFO/NormalMax"
},
"outcomeTrue": {
"nextNode": "exit_lb"
},
"outcomeMissing": {
"nextNode": "exit_vus"
},
"outcomeFalse": {
"nextNode": "exit_vus"
}
},
"exit_u1": {
"description": "Usable: probably",
"type": "LEAF",
Expand All @@ -100,6 +154,21 @@
"description": "Usable: probably not",
"type": "LEAF",
"class": "U3"
},
"exit_lb": {
"description": "STR Likely Benign",
"type": "LEAF",
"class": "LB"
},
"exit_vus": {
"description": "STR VUS",
"type": "LEAF",
"class": "VUS"
},
"exit_lp": {
"description": "STR Likely Pathogenic",
"type": "LEAF",
"class": "LP"
}
}
}
16 changes: 16 additions & 0 deletions resources/field_metadata.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,22 @@
"YL",
"MT"
]
},
"ACR": {
"label": "Allelic copy ranges",
"description": "Straglr: Allelic copy ranges",
"numberType": "PER_GENOTYPE",
"numberCount": 2,
"separator": "-",
"type": "FLOAT"
},
"AC": {
"label": "Allelic copies",
"description": "Straglr: Allelic copies",
"numberType": "PER_GENOTYPE",
"numberCount": 2,
"separator": "-",
"type": "FLOAT"
}
},
"info": {
Expand Down
4 changes: 2 additions & 2 deletions utils/apptainer/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -95,8 +95,8 @@ main() {
images+=("seqtk-1.4")
images+=("spectre-0.2.1-patched")
images+=("stranger-0.8.1")
images+=("straglr-1.4.4_vip_v3")
images+=("vcf-decision-tree-4.1.1")
images+=("straglr-1.5.1")
images+=("vcf-decision-tree-4.2.0-beta")
images+=("vcf-inheritance-matcher-3.1.0")
images+=("vcf-report-7.0.0")

Expand Down
24 changes: 24 additions & 0 deletions utils/apptainer/def/straglr-1.5.1.def
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
Bootstrap: localimage
From: sif/build/ubuntu-22.04.sif

%post
apt-get update
apt-get install -qq curl bzip2 gcc make zlib1g-dev libbz2-dev liblzma-dev libdeflate-dev git python3-pip python3

apt install ncbi-blast+ --assume-yes

mkdir -p /opt/trf/
curl -Ls -o /opt/trf/trf "https://github.com/Benson-Genomics-Lab/TRF/releases/download/v4.09.1/trf409.linux64"
chmod +x /opt/trf/trf

pip install git+https://github.com/bcgsc/[email protected]#egg=straglr

# cleanup
pip cache purge
apt-get clean

%environment
PATH=$PATH:/opt/trf

%help
Long-read-based human genomic short tandem repeat detection.
Loading