Skip to content

Commit

Permalink
Merge branch 'hotfix/1.1.4'
Browse files Browse the repository at this point in the history
updated regex to lookup gene name with .search
  • Loading branch information
sb43 committed Sep 1, 2020
2 parents 7254595 + d2fd362 commit 7873dae
Show file tree
Hide file tree
Showing 5 changed files with 23 additions and 9 deletions.
2 changes: 2 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
## 1.1.4
* updated regex to get genename
## 1.1.3
* fixed bug in gene name comparison with hyphen(-)
* corrected typo in default directory name
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ USER root

MAINTAINER [email protected]

ENV ANNOTATEVCF_VER '1.1.3'
ENV ANNOTATEVCF_VER '1.1.4'

# install system tools
RUN apt-get -yq update
Expand Down
7 changes: 4 additions & 3 deletions annotate/commandline.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@


def main():
usage = "\n %prog [options] -vcf input.vcf [-drv_json test.json -drv_data test_dir] "
usage = "\n %prog [options] -vcf input.vcf [-filter -np -gt -g -m -lof -hl -o ]"

optParser = argparse.ArgumentParser(prog='annotateVcf',
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
Expand All @@ -27,7 +27,7 @@ def main():
required.add_argument("-vcf", "--vcf_file", type=str, dest="vcf_file", required=True,
default=None, help="vcf_file to annotate")

required.add_argument("-filter", "--vcf_filter", type=str, dest="vcf_filter", nargs='+',
optional.add_argument("-filter", "--vcf_filter", type=str, dest="vcf_filter", nargs='+',
required=False, default=['PASS'], help="Include variant sites \
matching vcf FILTER flag(s), multiple flags can be specified \
with space separator")
Expand All @@ -36,7 +36,8 @@ def main():
default=None, help="normal panel file to flag germline variant sites")

optional.add_argument("-gt", "--germline_tag", type=str, dest="germline_tag", required=False,
default="NPGL", help="tag to mark normal panel filtered variants in vcf INFO field")
default="NPGL", help="tag to mark normal panel filtered variants in \
vcf INFO field, only applicable when -np is set")

optional.add_argument("-g", "--lof_genes", type=str, dest="lof_genes", required=False,
default=None, help="LoF gene name file to use annotations")
Expand Down
19 changes: 15 additions & 4 deletions annotate/vcf_annotator.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,8 +133,8 @@ def annotate_lof_genes(self, genes_file, lof_types):
:param lof_types: lof consequences type string
:return:
"""
get_gene = re.compile(r'.*;VD=(\w+)|.*')
# create dummy genome locationo file to annoate LoF genes...
get_gene = re.compile(r'\bVD=([-\w]+)')
genome_loc_file = self.outdir + '/genome.tab.gz'
create_dummy_genome(self.vcf_path, genome_loc_file)
genes_outfile = self.outfile_name.format('_genes.vcf')
Expand All @@ -150,9 +150,10 @@ def annotate_lof_genes(self, genes_file, lof_types):
if line.startswith('#'):
lof_fh.write(line)
else:
gene = line.split('VD=')[1].split('|')[0]
gene = get_gene.search(line)[1]
# gene = _get_gene('VD', 0))
# write matching LoF genes....
if gene in lof_gene_list:
if gene.upper() in lof_gene_list:
lof_fh.write(line)
self.merge_vcf_dict['b'] = compress_vcf(lof_outfile)

Expand All @@ -171,9 +172,19 @@ def concat_results(self):


# generic methods ....
def _get_gene(line, gene_field, field_loc):
# Not used ... kept for future implementation of different annotation fields....
# ANN=T|missense_variant|MODERATE|AGAP005273|AGAP005273| [ e.g. 'ANN', 3]
# VD=TP5-TEST1-TEST2|CCDS11118.1|r.276_277insa|c.86_87insA|p.N29fs*14| [ e.g. 'VD', 0 ]
info_list = line.split("\t")[7].split(';')
info_dict = dict(f.split('=') for f in info_list if '=' in f)
gene = info_dict[gene_field].split('|')[field_loc]
return gene.upper()


def get_drv_gene_list(drv_genes):
with open(drv_genes) as f_drv:
lof_gene_list = f_drv.read().splitlines()
lof_gene_list = [gene.upper() for gene in f_drv.read().splitlines()]
return lof_gene_list


Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from setuptools import setup

config = {
'version': '1.1.3',
'version': '1.1.4',
'name': 'annotateVcf',
'description': 'Tool to annotate and filter vcf files...',
'author': 'Shriram Bhosle',
Expand Down

0 comments on commit 7873dae

Please sign in to comment.