Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .bumpversion.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 0.2.14
current_version = 0.2.17
commit = True
tag = False
parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>[a-z0-9+]+)
Expand Down
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,5 @@ __pycache__/
build/
venv/
working/
data/
workspace/
8 changes: 7 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,12 @@

## dev

- [17](https://github.com/umccr/bolt/pull/17) - change dragen HRD file optional

- [14](https://github.com/umccr/bolt/pull/14) - gpgr version bump to 2.2.0

- [3](https://github.com/scwatts/bolt/pull/3) - Improve PCGR / CPSR argument handling

- [6](https://github.com/umccr/bolt/pull/6) - Change oncoanalyser v2.0.0 uptade, with switch sv caller from GRIPSS to eSVee
- [6](https://github.com/umccr/bolt/pull/6) - Change oncoanalyser v2.0.0 uptade, with switch sv caller from GRIPSS to eSVee

- [9](https://github.com/umccr/bolt/pull/9) Add hypermutation sample handling
14 changes: 7 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,20 +54,20 @@ the software environment and dependencies. Consequently, dependencies are split

| Name | Docker image URI | Commands |
| --- | --- | --- |
| pcgr | ghcr.io/scwatts/bolt:0.2.14-pcgr | • `bolt smlv_germline report`<br />• `bolt smlv_somatic annotate`<br />• `bolt smlv_somatic report`<br /> |
| gpgr | ghcr.io/scwatts/bolt:0.2.14-gpgr | • `bolt other cancer_report` |
| snpeff | ghcr.io/scwatts/bolt:0.2.14-snpeff | • `bolt sv_somatic annotate` |
| circos | ghcr.io/scwatts/bolt:0.2.14-circos | • `bolt other purple_baf_plot` |
| multiqc | ghcr.io/scwatts/bolt:0.2.14-multiqc | • `bolt other multiqc_report` |
| base | ghcr.io/scwatts/bolt:0.2.14 | • `bolt smlv_germline prepare`<br />• `bolt smlv_somatic rescue`<br />• `bolt smlv_somatic filter`<br />• `bolt sv_somatic prioritise`<br /> |
| pcgr | ghcr.io/scwatts/bolt:0.2.17-pcgr | • `bolt smlv_germline report`<br />• `bolt smlv_somatic annotate`<br />• `bolt smlv_somatic report`<br /> |
| gpgr | ghcr.io/scwatts/bolt:0.2.17-gpgr | • `bolt other cancer_report` |
| snpeff | ghcr.io/scwatts/bolt:0.2.17-snpeff | • `bolt sv_somatic annotate` |
| circos | ghcr.io/scwatts/bolt:0.2.17-circos | • `bolt other purple_baf_plot` |
| multiqc | ghcr.io/scwatts/bolt:0.2.17-multiqc | • `bolt other multiqc_report` |
| base | ghcr.io/scwatts/bolt:0.2.17 | • `bolt smlv_germline prepare`<br />• `bolt smlv_somatic rescue`<br />• `bolt smlv_somatic filter`<br />• `bolt sv_somatic prioritise`<br /> |

## Usage

Given the nature of software dependencies required, it is strongly recommended to run `bolt` commands via the existing
[Docker images](#docker-images):

```bash
docker run -ti -v $(pwd):$(pwd) -w $(pwd) ghcr.io/scwatts/bolt:0.2.14 \
docker run -ti -v $(pwd):$(pwd) -w $(pwd) ghcr.io/scwatts/bolt:0.2.17 \
bolt smlv_somatic filter \
--tumor_name tumor_sample \
--vcf_fp tumor_sample.vcf.gz \
Expand Down
128 changes: 82 additions & 46 deletions bolt/common/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
######################################
## Variation selection (annotation) ##
######################################
MAX_SOMATIC_VARIANTS = 500_000
MAX_SOMATIC_VARIANTS = 450_000
MAX_SOMATIC_VARIANTS_GNOMAD_FILTER = 0.01


Expand Down Expand Up @@ -35,10 +35,47 @@
'pathogenic',
'uncertain_significance',
}
PCGR_TIERS_RESCUE = {
PCGR_ACTIONABILITY_TIER_RESCUE = {
'1',
'2',
}


################################
## Hypermutated report filter ##
################################
PCGR_TIERS_FILTERING = (
'TIER_1',
'TIER_2',
}
'TIER_3',
'TIER_4',
'NONCODING',
)

VEP_IMPACTS_FILTER = (
'intergenic',
'intronic',
'downstream',
'upstream',
'impacts_other',
)

GENOMIC_REGIONS_FILTERING = (
'difficult',
'none',
'giab_conf',
)

HOTSPOT_FIELDS_FILTERING = (
'SAGE_HOTSPOT',
'hotspot',
'PCGR_MUTATION_HOTSPOT',
)

RETAIN_FIELDS_FILTERING = (
'PANEL',
*HOTSPOT_FIELDS_FILTERING,
)


##################################################
Expand All @@ -61,6 +98,8 @@ class VcfFilter(enum.Enum):
ENCODE = 'ENCODE'
GNOMAD_COMMON = 'gnomAD_common'

PCGR_COUNT_LIMIT = 'PCGR_count_limit'

@property
def namespace(self):
return 'FILTER'
Expand All @@ -77,16 +116,14 @@ class VcfInfo(enum.Enum):
SAGE_NOVEL = 'SAGE_NOVEL'
SAGE_RESCUE = 'SAGE_RESCUE'

PCGR_TIER = 'PCGR_TIER'
PCGR_ACTIONABILITY_TIER = 'PCGR_ACTIONABILITY_TIER'
PCGR_CSQ = 'PCGR_CSQ'
PCGR_MUTATION_HOTSPOT = 'PCGR_MUTATION_HOTSPOT'
PCGR_CLINVAR_CLNSIG = 'PCGR_CLINVAR_CLNSIG'
PCGR_CLINVAR_CLASSIFICATION = 'PCGR_CLINVAR_CLASSIFICATION'
PCGR_COSMIC_COUNT = 'PCGR_COSMIC_COUNT'
PCGR_TCGA_PANCANCER_COUNT = 'PCGR_TCGA_PANCANCER_COUNT'
PCGR_ICGC_PCAWG_COUNT = 'PCGR_ICGC_PCAWG_COUNT'

CPSR_FINAL_CLASSIFICATION = 'CPSR_FINAL_CLASSIFICATION'
CPSR_PATHOGENICITY_SCORE = 'CPSR_PATHOGENICITY_SCORE'
CPSR_CLINVAR_CLASSIFICATION = 'CPSR_CLINVAR_CLASSIFICATION'
CPSR_CSQ = 'CPSR_CSQ'

Expand All @@ -112,7 +149,7 @@ class VcfInfo(enum.Enum):

GNOMAD_AF = 'gnomAD_AF'

PCGR_TIER_RESCUE = 'PCGR_TIER_RESCUE'
PCGR_ACTIONABILITY_TIER_RESCUE = 'PCGR_ACTIONABILITY_TIER_RESCUE'
SAGE_HOTSPOT_RESCUE = 'SAGE_HOTSPOT_RESCUE'
CLINICAL_POTENTIAL_RESCUE = 'CLINICAL_POTENTIAL_RESCUE'

Expand All @@ -121,6 +158,8 @@ class VcfInfo(enum.Enum):
RESCUED_FILTERS_EXISTING = 'RESCUED_FILTERS_EXISTING'
RESCUED_FILTERS_PENDING = 'RESCUED_FILTERS_PENDING'

PANEL = 'PANEL'

@property
def namespace(self):
return 'INFO'
Expand Down Expand Up @@ -187,6 +226,9 @@ def namespace(self):
'Description': f'gnomAD AF >= {MAX_GNOMAD_AF}',
},

VcfFilter.PCGR_COUNT_LIMIT: {
'Description': 'Manually filtered to meet PCGR 500,000 variant limit',
},

# INFO
VcfInfo.TUMOR_AF: {
Expand Down Expand Up @@ -226,7 +268,7 @@ def namespace(self):
'Description': 'Variant rescued by a matching SAGE call',
},

VcfInfo.PCGR_TIER: {
VcfInfo.PCGR_ACTIONABILITY_TIER: {
'Number': '1',
'Type': 'String',
'Description': (
Expand All @@ -237,28 +279,29 @@ def namespace(self):
},
VcfInfo.PCGR_CSQ: {
'Number': '.',
'Type': 'String',
'Description': (
'Consequence annotations from Ensembl VEP. Format: Allele|Consequence|IMPACT|SYMBOL|'
'Gene|Feature_type|Feature|BIOTYPE|EXON|INTRON|HGVSc|HGVSp|cDNA_position|'
'CDS_position|Protein_position|Amino_acids|Codons|Existing_variation|ALLELE_NUM|'
'DISTANCE|STRAND|FLAGS|PICK|VARIANT_CLASS|SYMBOL_SOURCE|HGNC_ID|CANONICAL|'
'MANE_SELECT|MANE_PLUS_CLINICAL|TSL|APPRIS|CCDS|ENSP|SWISSPROT|TREMBL|UNIPARC|'
'UNIPROT_ISOFORM|RefSeq|DOMAINS|HGVS_OFFSET|AF|AFR_AF|AMR_AF|EAS_AF|EUR_AF|SAS_AF|'
'gnomAD_AF|gnomAD_AFR_AF|gnomAD_AMR_AF|gnomAD_ASJ_AF|gnomAD_EAS_AF|gnomAD_FIN_AF|'
'gnomAD_NFE_AF|gnomAD_OTH_AF|gnomAD_SAS_AF|CLIN_SIG|SOMATIC|PHENO|CHECK_REF|'
'NearestExonJB'
),
'Type': 'String',
'Description': (
'Consequence annotations from Ensembl VEP. Format: '
'Allele|Consequence|IMPACT|SYMBOL|Gene|Feature_type|Feature|BIOTYPE|EXON|INTRON|HGVSc|'
'HGVSp|cDNA_position|CDS_position|Protein_position|Amino_acids|Codons|Existing_variation|'
'ALLELE_NUM|DISTANCE|STRAND|FLAGS|PICK|VARIANT_CLASS|SYMBOL_SOURCE|HGNC_ID|CANONICAL|'
'MANE|MANE_SELECT|MANE_PLUS_CLINICAL|TSL|APPRIS|CCDS|ENSP|SWISSPROT|TREMBL|UNIPARC|'
'UNIPROT_ISOFORM|RefSeq|DOMAINS|HGVS_OFFSET|gnomADe_AF|gnomADe_AFR_AF|gnomADe_AMR_AF|'
'gnomADe_ASJ_AF|gnomADe_EAS_AF|gnomADe_FIN_AF|gnomADe_MID_AF|gnomADe_NFE_AF|'
'gnomADe_REMAINING_AF|gnomADe_SAS_AF|CLIN_SIG|SOMATIC|PHENO|CHECK_REF|MOTIF_NAME|'
'MOTIF_POS|HIGH_INF_POS|MOTIF_SCORE_CHANGE|TRANSCRIPTION_FACTORS|NearestExonJB|'
'MaxEntScan_alt|MaxEntScan_diff|MaxEntScan_ref'
),
},
VcfInfo.PCGR_MUTATION_HOTSPOT: {
'Number': '.',
'Type': 'String',
'Description': 'Known cancer mutation hotspot, as found in cancerhotspots.org_v2, Gene|Codon|Q-value',
'Description': 'Known cancer mutation hotspot, as found in cancerhotspots.org. Format: GeneSymbol|Entrez_ID|CodonRefAA|Alt_AA|Q-value',
},
VcfInfo.PCGR_CLINVAR_CLNSIG: {
VcfInfo.PCGR_CLINVAR_CLASSIFICATION: {
'Number': '.',
'Type': 'String',
'Description': 'ClinVar clinical significance',
'Description': 'ClinVar - Overall clinical significance of variant on a five-tiered scale',
},
VcfInfo.PCGR_COSMIC_COUNT: {
'Number': '1',
Expand All @@ -276,23 +319,10 @@ def namespace(self):
'Description': 'Count of ICGC PCAWG hits',
},

VcfInfo.CPSR_FINAL_CLASSIFICATION: {
'Number': '1',
'Type': 'String',
'Description': (
'Final variant classification based on the combination of CLINVAR_CLASSIFICTION (for '
'ClinVar-classified variants), and CPSR_CLASSIFICATION (for novel variants)'
),
},
VcfInfo.CPSR_PATHOGENICITY_SCORE: {
'Number': '1',
'Type': 'Float',
'Description': 'Aggregated CPSR pathogenicity score',
},
VcfInfo.CPSR_CLINVAR_CLASSIFICATION: {
'Number': '1',
'Type': 'String',
'Description': 'Clinical significance of variant on a five-tiered scale',
'Description': 'ClinVar - Overall clinical significance of variant on a five-tiered scale',
},
VcfInfo.CPSR_CSQ: {
'Number': '.',
Expand All @@ -301,13 +331,13 @@ def namespace(self):
'Consequence annotations from Ensembl VEP. Format: Allele|Consequence|IMPACT|SYMBOL|'
'Gene|Feature_type|Feature|BIOTYPE|EXON|INTRON|HGVSc|HGVSp|cDNA_position|CDS_position|'
'Protein_position|Amino_acids|Codons|Existing_variation|ALLELE_NUM|DISTANCE|STRAND|'
'FLAGS|PICK|VARIANT_CLASS|SYMBOL_SOURCE|HGNC_ID|CANONICAL|MANE_SELECT|'
'MANE_PLUS_CLINICAL|APPRIS|CCDS|ENSP|SWISSPROT|TREMBL|UNIPARC|UNIPROT_ISOFORM|RefSeq|'
'DOMAINS|HGVS_OFFSET|AF|AFR_AF|AMR_AF|EAS_AF|EUR_AF|SAS_AF|gnomAD_AF|gnomAD_AFR_AF|'
'gnomAD_AMR_AF|gnomAD_ASJ_AF|gnomAD_EAS_AF|gnomAD_FIN_AF|gnomAD_NFE_AF|gnomAD_OTH_AF|'
'gnomAD_SAS_AF|CLIN_SIG|SOMATIC|PHENO|CHECK_REF|MOTIF_NAME|MOTIF_POS|HIGH_INF_POS|'
'MOTIF_SCORE_CHANGE|TRANSCRIPTION_FACTORS|NearestExonJB|LoF|LoF_filter|LoF_flags|'
'LoF_info'
'FLAGS|PICK|VARIANT_CLASS|SYMBOL_SOURCE|HGNC_ID|CANONICAL|MANE|MANE_SELECT|'
'MANE_PLUS_CLINICAL|TSL|APPRIS|CCDS|ENSP|SWISSPROT|TREMBL|UNIPARC|UNIPROT_ISOFORM|RefSeq|'
'DOMAINS|HGVS_OFFSET|gnomADe_AF|gnomADe_AFR_AF|gnomADe_AMR_AF|gnomADe_ASJ_AF|'
'gnomADe_EAS_AF|gnomADe_FIN_AF|gnomADe_MID_AF|gnomADe_NFE_AF|gnomADe_REMAINING_AF|'
'gnomADe_SAS_AF|CLIN_SIG|SOMATIC|PHENO|CHECK_REF|MOTIF_NAME|MOTIF_POS|HIGH_INF_POS|'
'MOTIF_SCORE_CHANGE|TRANSCRIPTION_FACTORS|NearestExonJB|MaxEntScan_alt|MaxEntScan_diff|'
'MaxEntScan_ref'
),
},

Expand All @@ -316,7 +346,7 @@ def namespace(self):
'Type': 'Flag',
'Description': '',
},
VcfInfo.PCGR_TIER_RESCUE: {
VcfInfo.PCGR_ACTIONABILITY_TIER_RESCUE: {
'Number': '0',
'Type': 'Flag',
'Description': '',
Expand Down Expand Up @@ -350,6 +380,12 @@ def namespace(self):
'Description': 'Filters pending prior to variant rescue',
},

VcfInfo.PANEL: {
'Number': '0',
'Type': 'Flag',
'Description': 'UMCCR somatic panel CDS (2,000 bp padding)',
},


# FORMAT
VcfFormat.SAGE_AD: {
Expand Down
Loading
Loading