diff --git a/annotate/commandline.py b/annotate/commandline.py index 2f38e71..b81169c 100644 --- a/annotate/commandline.py +++ b/annotate/commandline.py @@ -11,6 +11,7 @@ configdir = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'config/') log_config = configdir + 'logging.conf' info_header = configdir + 'info.header' +filters_json = configdir + 'filters.json' logging.config.fileConfig(log_config) log = logging.getLogger(__name__) version = pkg_resources.require("annotateVcf")[0].version @@ -27,28 +28,21 @@ def main(): required.add_argument("-vcf", "--vcf_file", type=str, dest="vcf_file", required=True, default=None, help="vcf_file to annotate") - optional.add_argument("-filter", "--vcf_filter", type=str, dest="vcf_filter", nargs='+', - required=False, default=['PASS'], help="Include variant sites \ - matching vcf FILTER flag(s), multiple flags can be specified \ - with space separator") + optional.add_argument("-filters", "--vcf_filters", type=str, dest="vcf_filters", required=False, + default=filters_json, help="Include vcf filters \ + configuration file in json (param:value) format \ + [please refer bcftools documentation for more details \ + : http://samtools.github.io/bcftools/bcftools.html#expressions]") optional.add_argument("-np", "--normal_panel", type=str, dest="normal_panel", required=False, default=None, help="normal panel file to flag germline variant sites") - optional.add_argument("-gt", "--germline_tag", type=str, dest="germline_tag", required=False, - default="NPGL", help="tag to mark normal panel filtered variants in \ - vcf INFO field, only applicable when -np is set") - optional.add_argument("-g", "--lof_genes", type=str, dest="lof_genes", required=False, default=None, help="LoF gene name file to use annotations") optional.add_argument("-m", "--mutations", type=str, dest="mutations", required=False, default=None, help="driver mutations file to use for driver variant annotations") - optional.add_argument("-lof", "--lof_type", type=str, dest="lof_type", nargs='+', metavar='N', - required=False, default=["stop_lost", "start_lost", "ess_splice", - "frameshift", "nonsense"], help="Loss of function effect types") - optional.add_argument("-hl", "--header_line", type=str, dest="header_line", required=False, default=info_header, help="vcf info header line and info tag") diff --git a/annotate/config/filters.json b/annotate/config/filters.json new file mode 100644 index 0000000..2668ccb --- /dev/null +++ b/annotate/config/filters.json @@ -0,0 +1,17 @@ +{ + "_#comment": [ + "Add filters to include/ exclude the variants sites to annotate using drivers", + "please refer filter expression formats to add new filters : https://samtools.github.io/bcftools/bcftools.html#expressions" + ], + "include": { + "FILTER": "FILTER=\"PASS\"", + "FORMAT": "FORMAT/VAF[*] > 0.15", + "INFO": "INFO/VC=\"stop_lost,start_lost,ess_splice,frameshift,nonsense\"", + "INFO_FLAG_GERMLINE": "NPGL" + }, + + "exclude": { + "None": null + } +} + diff --git a/annotate/io_formatter.py b/annotate/io_formatter.py index 7febfc0..0c85fc6 100644 --- a/annotate/io_formatter.py +++ b/annotate/io_formatter.py @@ -5,6 +5,7 @@ import io import tempfile import pkg_resources +import json import shutil from contextlib import contextmanager @@ -24,12 +25,10 @@ def __init__(self, **kwargs): self.genes_file = kwargs.get('lof_genes', None) self.muts_file = kwargs.get('mutations', None) self.np_vcf = kwargs.get('normal_panel', None) - self.np_tag = kwargs['germline_tag'] - self.lof_type = kwargs['lof_type'] self.header_line = kwargs['header_line'] self.outdir = kwargs['outdir'] self.keepTmp = kwargs['keepTmp'] - self.filter = kwargs.get('vcf_filter', None) + self.json_file = kwargs.get('vcf_filters', None) # check input data ... def format(self, input_array): @@ -47,14 +46,12 @@ def format(self, input_array): def _get_formatter(self, input_type): if input_type == 'vcf_file': return self._format_vcf - elif input_type == 'lof_type': - return self._format_lof elif input_type == 'outdir': return self._get_outdir_path - elif input_type == 'format_filter': - return self._format_vcf_filter elif input_type == 'input_status': return self._check_input + elif input_type == 'vcf_filters': + return self._get_filters else: raise ValueError(input_type) @@ -72,19 +69,13 @@ def _check_input(self): def _format_vcf(self): return get_file_metadata(self.vcf_file) - def _format_vcf_filter(self): + def _get_filters(self): """ - formatter function - :return: - """ - return format_filter(self.filter, 'FILTER=') - - def _format_lof(self): + load parameters from json config file """ - format lof consequence types to filter - :return: - """ - return format_filter(self.lof_type, 'INFO/VC=') + inc_filters = ['FORMAT', 'FILTER', 'INFO', 'INFO_FLAG_GERMLINE'] + formatted_filters = parse_filters(self.json_file, 'include', inc_filters) + return formatted_filters def _get_outdir_path(self): """ @@ -113,19 +104,6 @@ def check_inputs(file_dict): return file_dict -def format_filter(filter_vals, filter_type): - """ - - :param filter_vals: for vcf filter field - :param filter_type: filter prefix [e.g., FILTER, INFO, etc ] - :return: formatted filter string - """ - format_store = [] - for val in filter_vals: - format_store.append(filter_type + "\"" + val + "\"") - return ' || '.join(format_store) - - def get_file_metadata(full_file_name): """ takes file path as input and gives its path and processed extension @@ -139,6 +117,25 @@ def get_file_metadata(full_file_name): return file_metadata +def parse_filters(json_file, filter_type, filters): + """ + load filtering parameters from json config file + """ + filter_param_dict = {} + try: + if json_file is None: + sys.exit('Json configuration file must be provided') + with open(json_file, 'r') as cfgfile: + filter_cfg = json.load(cfgfile) + for filter in filters: + filter_param_dict[filter] = filter_cfg[filter_type][filter] + except json.JSONDecodeError as jde: + sys.exit('json error:{}'.format(jde.args[0])) + except FileNotFoundError as fne: + sys.exit('Can not find json file:{}'.format(fne.args[0])) + return filter_param_dict + + @contextmanager def tempdir(mypath): """ diff --git a/annotate/vcf_annotator.py b/annotate/vcf_annotator.py index 6dbf58d..3d18e35 100644 --- a/annotate/vcf_annotator.py +++ b/annotate/vcf_annotator.py @@ -26,7 +26,6 @@ def __init__(self, f, basedir): self.outdir = basedir self.status_dict = f.format(['input_status']) self.input_data = f.format(['vcf_file']) - # set input vcf parameters ... self._set_input_vcf(self.input_data) self.drv_header = f.header_line @@ -42,24 +41,27 @@ def _runAnalysis(self, f): :return: """ status = self.status_dict['input_status'] - vcf_filters = f.format(['format_filter']) - self.vcf_filter = vcf_filters['format_filter'] + vcf_filters = f.format(['vcf_filters']) + vcf_filter_params = vcf_filters.get('vcf_filters', None) + self.info_filters = vcf_filter_params['INFO'] + self.format_filters = vcf_filter_params['FORMAT'] + self.filter_filters = vcf_filter_params['FILTER'] + self.flag_germline = vcf_filter_params['INFO_FLAG_GERMLINE'] run_status = False a_type = ['normal_panel', 'mutations', 'lof_genes'] for analysis in a_type: if status[analysis] and analysis == 'normal_panel': - logging.info(f"Tagging germline variants with INFO tag:{f.np_tag}") - self.tag_germline_vars(f.np_tag, f.np_vcf) + logging.info(f"Tagging germline variants with INFO tag:{self.flag_germline}") + self.tag_germline_vars(f.np_vcf) run_status = True if status[analysis] and analysis == 'mutations': logging.info("Annotating driver mutations with INFO field:DRV=") self.annot_drv_muts(f.muts_file) run_status = True if status[analysis] and analysis == 'lof_genes': - logging.info("Annotating LoF genes with INFO filed:DRV=LoF") - lof_types = f.format(['lof_type']) - self.annotate_lof_genes(f.genes_file, lof_types['lof_type']) + logging.info("Annotating LoF genes with INFO field:DRV=LoF") + self.annotate_lof_genes(f.genes_file) run_status = True if run_status: logging.info("concatenating results") @@ -79,24 +81,23 @@ def _set_input_vcf(self, input_data): self.vcf_name = input_data['vcf_file']['name'] self.outfile_name = self.outdir + '/' + self.vcf_name + '{}' - def tag_germline_vars(self, np_tag, np_vcf): + def tag_germline_vars(self, np_vcf): """ - use normal panel to tag germline variants and created filtered vcf file to use in + use normal panel to tag germline variants and create filtered vcf file to use in subsequent driver annotation steps ... sets filtered vcf as new user input parameter for downstream analysis add tagged and filtered vcf files to concat in final step - :param np_tag: :param np_vcf: :return: """ tagged_vcf = self.outfile_name.format('_np.vcf.gz') filtered_vcf = self.outfile_name.format('_np_filtered.vcf.gz') - cmd = f"bcftools annotate -a {np_vcf} -i '{self.vcf_filter}' -m '{np_tag}'" \ + cmd = f"bcftools annotate -a {np_vcf} -i '{self.filter_filters}' -m '{self.flag_germline}'" \ f" {self.vcf_path} | bgzip -c >{tagged_vcf} && tabix -p vcf {tagged_vcf}" _run_command(cmd) - cmd = f"bcftools view -i '({self.vcf_filter}) && {np_tag}=0' {tagged_vcf} | " \ + cmd = f"bcftools view -i '{self.flag_germline}=0' {tagged_vcf} | " \ f"bgzip -c >{filtered_vcf} && tabix -p vcf {filtered_vcf}" _run_command(cmd) @@ -118,7 +119,9 @@ def annot_drv_muts(self, muts_file): :return: """ muts_outfile = self.outfile_name.format('_muts.vcf.gz') - cmd = f"bcftools annotate -i '{self.vcf_filter}' --merge-logic DRV:unique" \ + combined_filter = _combine_filters([self.filter_filters, self.format_filters]) + cmd = f"bcftools annotate -i '{combined_filter}'" \ + f" --merge-logic DRV:unique" \ f" -a {muts_file} -h {self.drv_header} " \ f"-c CHROM,FROM,TO,INFO/DRV {self.vcf_path} |" \ f"bcftools annotate -i 'DRV!=\".\" && DRV[*]==VC' | " \ @@ -126,11 +129,10 @@ def annot_drv_muts(self, muts_file): _run_command(cmd) self.merge_vcf_dict['a'] = muts_outfile - def annotate_lof_genes(self, genes_file, lof_types): + def annotate_lof_genes(self, genes_file): """ annotate vcf using known LoF genes :param genes_file: lof gene names file - :param lof_types: lof consequences type string :return: """ # create dummy genome locationo file to annoate LoF genes... @@ -140,9 +142,10 @@ def annotate_lof_genes(self, genes_file, lof_types): genes_outfile = self.outfile_name.format('_genes.vcf') lof_outfile = self.outfile_name.format('_genes_lof.vcf') lof_gene_list = get_drv_gene_list(genes_file) - # map lof effect types to pass variants... - cmd = f"bcftools annotate -a {genome_loc_file} -i '({self.vcf_filter}) && ({lof_types})' " \ + combined_filter = _combine_filters([self.filter_filters, self.format_filters, self.info_filters]) + cmd = f"bcftools annotate -a {genome_loc_file} -i '{combined_filter}' " \ f"-h {self.drv_header} -c CHROM,FROM,TO,INFO/DRV {self.vcf_path} >{genes_outfile}" + _run_command(cmd) with open(lof_outfile, "w") as lof_fh, open(genes_outfile, 'r') as gene_f: for line in gene_f: @@ -172,6 +175,17 @@ def concat_results(self): # generic methods .... +def _combine_filters(filter_array): + """ + :param filter_array: filtring paramters + :return: return formatted filtering parameters if present otherwise () equivalent to no filter... + """ + if any(filter_array): + return f"({') && ('.join(filter(None, filter_array))})" + else: + return "()" + + def _get_gene(line, gene_field, field_loc): # Not used ... kept for future implementation of different annotation fields.... # ANN=T|missense_variant|MODERATE|AGAP005273|AGAP005273| [ e.g. 'ANN', 3] diff --git a/setup.py b/setup.py index 986a553..6d2ac4f 100644 --- a/setup.py +++ b/setup.py @@ -12,7 +12,7 @@ 'python_requires': '>= 3.6', 'install_requires': ['tzlocal'], 'packages': ['annotate'], - 'package_data': {'annotate':['config/*.conf','config/*.header']}, + 'package_data': {'annotate':['config/*.conf','config/*.header','config/*.json']}, 'entry_points': { 'console_scripts': ['annotateVcf=annotate.commandline:main'], } diff --git a/tests/test_celline_vcf_input.py b/tests/test_celline_vcf_input.py index 253e498..057e53f 100755 --- a/tests/test_celline_vcf_input.py +++ b/tests/test_celline_vcf_input.py @@ -16,14 +16,12 @@ class TestClass(): test_out = configdir + '/test_output/' options_vcf_celline = {'vcf_file': test_dir + 'celline.vcf.gz', - 'vcf_filter': ['PASS'], 'lof_genes': test_dir + 'lof_genes_v1.0.txt', 'mutations': test_dir + 'driver_mutations_sorted.tsv.gz', - 'lof_type': ["stop_lost","start_lost","ess_splice","frameshift","nonsense"], + 'vcf_filters': test_dir + 'filters.json', 'header_line': test_dir + 'info.header', 'outdir': test_dir + 'tmpout', 'keepTmp': False, - 'germline_tag': 'NPGL', 'normal_panel': test_dir+'np.vcf.gz' } @@ -33,9 +31,11 @@ class TestClass(): }} file_dict = {'input_status': {'mutations': True, 'lof_genes': True, 'normal_panel': True, 'vcf_file': True}} - lof_type = { - 'lof_type': 'INFO/VC="stop_lost" || INFO/VC="start_lost" || INFO/VC="ess_splice" || INFO/VC="frameshift" || INFO/VC="nonsense"'} - my_filter = {'format_filter': 'FILTER="PASS"'} + + info_filter="INFO/VC=\"stop_lost,start_lost,ess_splice,frameshift,nonsense\"" + format_filter= "FORMAT/VAF[*] > 0.15" + filters_filter= "FILTER=\"PASS\"" + info_flag_germline="NPGL" # celline output muts_vcf = f"{test_out}/celline_muts.vcf.gz" lof_vcf = f"{test_out}/celline_genes_lof.vcf.gz" @@ -46,23 +46,37 @@ class TestClass(): def test_celline_vcf_input(self): # check input type function f = self.my_formatter - file_metadata = self.file_metadata - assert file_metadata == f.format(['vcf_file']),'test_vcf_input test OK' + assert self.file_metadata == f.format(['vcf_file']),'test_vcf_input test OK' def test_celline_file_input(self): file_dict=self.file_dict f = self.my_formatter assert file_dict == f.format(['input_status']),'test_file_input test OK' - def test_celline_lof_format(self): - lof_type=self.lof_type + def test_celline_info_filter(self): + f = self.my_formatter + vcf_filters = f.format(['vcf_filters']) + vcf_filter_params = vcf_filters.get('vcf_filters', None) + assert self.info_filter == vcf_filter_params['INFO'],'test_INFO test OK' + + def test_celline_format_filter(self): f = self.my_formatter - assert lof_type == f.format(['lof_type']),'test_lof_format test OK' + vcf_filters = f.format(['vcf_filters']) + vcf_filter_params = vcf_filters.get('vcf_filters', None) + assert self.format_filter == vcf_filter_params['FORMAT'],'test_FORMAT test OK' - def test_celline_filter_format(self): - my_filter=self.my_filter + def test_celline_filter_filters(self): f = self.my_formatter - assert my_filter == f.format(['format_filter']),'test_filter_format test OK' + vcf_filters = f.format(['vcf_filters']) + vcf_filter_params = vcf_filters.get('vcf_filters', None) + assert self.filters_filter == vcf_filter_params['FILTER'],'test_FILTER test OK' + + def test_celline_info_flag_germline(self): + f = self.my_formatter + vcf_filters = f.format(['vcf_filters']) + vcf_filter_params = vcf_filters.get('vcf_filters', None) + assert self.info_flag_germline == vcf_filter_params['INFO_FLAG_GERMLINE'],'test_INFO_FLAG_GERMLINE test OK' + def chek_celline_outdir(slef): self.test_dir + 'tmpout' == self.outdir_path diff --git a/tests/test_input/README b/tests/test_input/README new file mode 100644 index 0000000..400ad06 --- /dev/null +++ b/tests/test_input/README @@ -0,0 +1,11 @@ +annotateVcf -vcf celline.vcf.gz -g lof_genes_v1.0.txt -m driver_mutations_sorted.tsv.gz -np np.vcf.gz -o test_output -t +annotateVcf -vcf input.vcf.gz -g lof_genes_v1.0.txt -m driver_mutations_sorted.tsv.gz -o test_output -t -filters filters.json +bcftools query -f '%CHROM\t%END\n' ../celline.vcf.gz >cell_linergions.txt + +bcftools view /lustre/scratch117/casm/team215/sb43/user_query/cell_line_vafs_comparison/vafcorrect_broad_wgs/vafcorrect_out/output/PDv38is_wgs/snp/PDv38is_wgs_ACH-000879_snp_vaf.vcf.gz -R cell_linergions.txt | bgzip -c >../celline.vcf.gz + +bcftools query -f '%CHROM\t%END\n' ../input.vcf.gz >org_pos.txt + +bcftools view /lustre/scratch117/casm/team215/sb43/organoid_analysis/WGS/p2126/out_vafcorrect/output/WTSI-COLO_005_b/snp/WTSI-COLO_005_b_WTSI-COLO_005_a_DNA_snp_vaf.vcf.gz -R org_pos.txt >input.vcf + + diff --git a/tests/test_input/celline.vcf.gz b/tests/test_input/celline.vcf.gz index f07b2d2..4fc3892 100644 Binary files a/tests/test_input/celline.vcf.gz and b/tests/test_input/celline.vcf.gz differ diff --git a/tests/test_input/celline.vcf.gz.tbi b/tests/test_input/celline.vcf.gz.tbi index eda32ba..a21613f 100644 Binary files a/tests/test_input/celline.vcf.gz.tbi and b/tests/test_input/celline.vcf.gz.tbi differ diff --git a/tests/test_input/filters.json b/tests/test_input/filters.json new file mode 100644 index 0000000..e6b985c --- /dev/null +++ b/tests/test_input/filters.json @@ -0,0 +1,17 @@ +{ + "_#comment": [ + "Add filters to include/ exclude the variants sites to annotate using drivers", + "please refer filter expression formats to add any filters : https://samtools.github.io/bcftools/bcftools.html#expressions" + ], + "include": { + "FILTER": "FILTER=\"PASS\"", + "FORMAT": "FORMAT/VAF[*] > 0.15", + "INFO": "INFO/VC=\"stop_lost,start_lost,ess_splice,frameshift,nonsense\"", + "INFO_FLAG_GERMLINE": "NPGL" + }, + + "exclude": { + "None": "NA" + } +} + diff --git a/tests/test_input/input.vcf.gz b/tests/test_input/input.vcf.gz index f71ab1c..d1ab9b3 100644 Binary files a/tests/test_input/input.vcf.gz and b/tests/test_input/input.vcf.gz differ diff --git a/tests/test_input/input.vcf.gz.tbi b/tests/test_input/input.vcf.gz.tbi index 28a775b..39bb953 100644 Binary files a/tests/test_input/input.vcf.gz.tbi and b/tests/test_input/input.vcf.gz.tbi differ diff --git a/tests/test_input_lof/filters.json b/tests/test_input_lof/filters.json new file mode 100644 index 0000000..e6b985c --- /dev/null +++ b/tests/test_input_lof/filters.json @@ -0,0 +1,17 @@ +{ + "_#comment": [ + "Add filters to include/ exclude the variants sites to annotate using drivers", + "please refer filter expression formats to add any filters : https://samtools.github.io/bcftools/bcftools.html#expressions" + ], + "include": { + "FILTER": "FILTER=\"PASS\"", + "FORMAT": "FORMAT/VAF[*] > 0.15", + "INFO": "INFO/VC=\"stop_lost,start_lost,ess_splice,frameshift,nonsense\"", + "INFO_FLAG_GERMLINE": "NPGL" + }, + + "exclude": { + "None": "NA" + } +} + diff --git a/tests/test_input_lof/input.vcf.gz b/tests/test_input_lof/input.vcf.gz index 394d049..ac9e41d 100644 Binary files a/tests/test_input_lof/input.vcf.gz and b/tests/test_input_lof/input.vcf.gz differ diff --git a/tests/test_input_lof/input.vcf.gz.tbi b/tests/test_input_lof/input.vcf.gz.tbi index 2b469a5..0e2d4e0 100644 Binary files a/tests/test_input_lof/input.vcf.gz.tbi and b/tests/test_input_lof/input.vcf.gz.tbi differ diff --git a/tests/test_input_no_vaf_vcf/driver_mutations_sorted.tsv.gz b/tests/test_input_no_vaf_vcf/driver_mutations_sorted.tsv.gz new file mode 100644 index 0000000..26e5a6c Binary files /dev/null and b/tests/test_input_no_vaf_vcf/driver_mutations_sorted.tsv.gz differ diff --git a/tests/test_input_no_vaf_vcf/driver_mutations_sorted.tsv.gz.tbi b/tests/test_input_no_vaf_vcf/driver_mutations_sorted.tsv.gz.tbi new file mode 100644 index 0000000..8dc2ffb Binary files /dev/null and b/tests/test_input_no_vaf_vcf/driver_mutations_sorted.tsv.gz.tbi differ diff --git a/tests/test_input_no_vaf_vcf/filters.json b/tests/test_input_no_vaf_vcf/filters.json new file mode 100644 index 0000000..5dd082c --- /dev/null +++ b/tests/test_input_no_vaf_vcf/filters.json @@ -0,0 +1,17 @@ +{ + "_#comment": [ + "Add filters to include/ exclude the variants sites to annotate using drivers", + "please refer filter expression formats to add any filters : https://samtools.github.io/bcftools/bcftools.html#expressions" + ], + "include": { + "FILTER": "FILTER=\"PASS\"", + "FORMAT": null, + "INFO": "INFO/VC=\"stop_lost,start_lost,ess_splice,frameshift,nonsense\"", + "INFO_FLAG_GERMLINE": "NPGL" + }, + + "exclude": { + "None": "NA" + } +} + diff --git a/tests/test_input_no_vaf_vcf/info.header b/tests/test_input_no_vaf_vcf/info.header new file mode 100644 index 0000000..bf75f79 --- /dev/null +++ b/tests/test_input_no_vaf_vcf/info.header @@ -0,0 +1 @@ +##INFO= diff --git a/tests/test_input_no_vaf_vcf/input.vcf.gz b/tests/test_input_no_vaf_vcf/input.vcf.gz new file mode 100644 index 0000000..cccee54 Binary files /dev/null and b/tests/test_input_no_vaf_vcf/input.vcf.gz differ diff --git a/tests/test_input_no_vaf_vcf/input.vcf.gz.tbi b/tests/test_input_no_vaf_vcf/input.vcf.gz.tbi new file mode 100644 index 0000000..0ea4b93 Binary files /dev/null and b/tests/test_input_no_vaf_vcf/input.vcf.gz.tbi differ diff --git a/tests/test_input_no_vaf_vcf/lof_genes_v1.0.txt b/tests/test_input_no_vaf_vcf/lof_genes_v1.0.txt new file mode 100644 index 0000000..8d8edf0 --- /dev/null +++ b/tests/test_input_no_vaf_vcf/lof_genes_v1.0.txt @@ -0,0 +1,7 @@ +APC +B2M +ARID1A +DICER1 +TP53 +RNF43 +ARID1B diff --git a/tests/test_lof_with_hyphen.py b/tests/test_lof_with_hyphen.py index 120241c..d8ac209 100755 --- a/tests/test_lof_with_hyphen.py +++ b/tests/test_lof_with_hyphen.py @@ -16,14 +16,12 @@ class TestClass(): test_out = configdir + '/test_output_lof/' options_vcf = {'vcf_file': test_dir + 'input.vcf.gz', - 'vcf_filter': ['PASS'], 'lof_genes': test_dir + 'lof_genes_v1.0.txt', 'mutations': test_dir + 'driver_mutations_sorted.tsv.gz', - 'lof_type': ["stop_lost","start_lost","ess_splice","frameshift","nonsense"], + 'vcf_filters': test_dir + 'filters.json', 'header_line': test_dir + 'info.header', 'outdir': test_dir + "/tmpout", 'keepTmp': False, - 'germline_tag': 'NPGL', 'normal_panel': None } @@ -34,9 +32,9 @@ class TestClass(): }} file_dict = {'input_status': {'mutations': True, 'lof_genes': True, 'normal_panel': False, 'vcf_file': True}} - lof_type = { - 'lof_type': 'INFO/VC="stop_lost" || INFO/VC="start_lost" || INFO/VC="ess_splice" || INFO/VC="frameshift" || INFO/VC="nonsense"'} - my_filter = {'format_filter': 'FILTER="PASS"'} + info_filter = "INFO/VC=\"stop_lost,start_lost,ess_splice,frameshift,nonsense\"" + format_filter = "FORMAT/VAF[*] > 0.15" + filters_filter = "FILTER=\"PASS\"" #organoid check muts_vcf = f"{test_out}/input_muts.vcf.gz" lof_vcf = f"{test_out}/input_genes_lof.vcf.gz" @@ -56,15 +54,25 @@ def test_matched_file_input(self): f = self.my_formatter assert file_dict == f.format(['input_status']),'test_file_input test OK' - def test_matched_lof_format(self): - lof_type=self.lof_type + + def test_celline_info_filter(self): + f = self.my_formatter + vcf_filters = f.format(['vcf_filters']) + vcf_filter_params = vcf_filters.get('vcf_filters', None) + assert self.info_filter == vcf_filter_params['INFO'],'test_INFO test OK' + + def test_celline_format_filter(self): f = self.my_formatter - assert lof_type == f.format(['lof_type']),'test_lof_format test OK' + vcf_filters = f.format(['vcf_filters']) + vcf_filter_params = vcf_filters.get('vcf_filters', None) + assert self.format_filter == vcf_filter_params['FORMAT'],'test_FORMAT test OK' - def test_matched_filter_format(self): - my_filter=self.my_filter + def test_celline_filter_filters(self): f = self.my_formatter - assert my_filter == f.format(['format_filter']),'test_filter_format test OK' + vcf_filters = f.format(['vcf_filters']) + vcf_filter_params = vcf_filters.get('vcf_filters', None) + assert self.filters_filter == vcf_filter_params['FILTER'],'test_FILTER test OK' + def chek_matched_outdir(slef): self.test_dir + 'tmpout' == self.outdir_path diff --git a/tests/test_matched_vcf_input.py b/tests/test_matched_vcf_input.py index 1d38ad8..c392fae 100755 --- a/tests/test_matched_vcf_input.py +++ b/tests/test_matched_vcf_input.py @@ -16,14 +16,12 @@ class TestClass(): test_out = configdir + '/test_output/' options_vcf = {'vcf_file': test_dir + 'input.vcf.gz', - 'vcf_filter': ['PASS'], 'lof_genes': test_dir + 'lof_genes_v1.0.txt', 'mutations': test_dir + 'driver_mutations_sorted.tsv.gz', - 'lof_type': ["stop_lost","start_lost","ess_splice","frameshift","nonsense"], + 'vcf_filters': test_dir + 'filters.json', 'header_line': test_dir + 'info.header', 'outdir': test_dir + "/tmpout", 'keepTmp': False, - 'germline_tag': 'NPGL', 'normal_panel': None } @@ -34,13 +32,13 @@ class TestClass(): }} file_dict = {'input_status': {'mutations': True, 'lof_genes': True, 'normal_panel': False, 'vcf_file': True}} - lof_type = { - 'lof_type': 'INFO/VC="stop_lost" || INFO/VC="start_lost" || INFO/VC="ess_splice" || INFO/VC="frameshift" || INFO/VC="nonsense"'} - my_filter = {'format_filter': 'FILTER="PASS"'} + info_filter = "INFO/VC=\"stop_lost,start_lost,ess_splice,frameshift,nonsense\"" + format_filter = "FORMAT/VAF[*] > 0.15" + filters_filter = "FILTER=\"PASS\"" #organoid check - muts_vcf = f"{test_out}/input.muts.vcf.gz" - lof_vcf = f"{test_out}/input.genes.lof.vcf.gz" - drv_vcf = f"{test_out}/input.drv.vcf.gz" + muts_vcf = f"{test_out}/input_muts.vcf.gz" + lof_vcf = f"{test_out}/input_genes_lof.vcf.gz" + drv_vcf = f"{test_out}/input_drv.vcf.gz" my_formatter=formatter.IO_Formatter(**options_vcf) outdir_path=my_formatter.format(['outdir']) @@ -56,15 +54,24 @@ def test_matched_file_input(self): f = self.my_formatter assert file_dict == f.format(['input_status']),'test_file_input test OK' - def test_matched_lof_format(self): - lof_type=self.lof_type + def test_celline_info_filter(self): f = self.my_formatter - assert lof_type == f.format(['lof_type']),'test_lof_format test OK' + vcf_filters = f.format(['vcf_filters']) + vcf_filter_params = vcf_filters.get('vcf_filters', None) + assert self.info_filter == vcf_filter_params['INFO'],'test_INFO test OK' - def test_matched_filter_format(self): - my_filter=self.my_filter + def test_celline_format_filter(self): f = self.my_formatter - assert my_filter == f.format(['format_filter']),'test_filter_format test OK' + vcf_filters = f.format(['vcf_filters']) + vcf_filter_params = vcf_filters.get('vcf_filters', None) + assert self.format_filter == vcf_filter_params['FORMAT'],'test_FORMAT test OK' + + def test_celline_filter_filters(self): + f = self.my_formatter + vcf_filters = f.format(['vcf_filters']) + vcf_filter_params = vcf_filters.get('vcf_filters', None) + assert self.filters_filter == vcf_filter_params['FILTER'],'test_FILTER test OK' + def chek_matched_outdir(slef): self.test_dir + 'tmpout' == self.outdir_path diff --git a/tests/test_no_vaf_vcf.py b/tests/test_no_vaf_vcf.py new file mode 100755 index 0000000..4921af9 --- /dev/null +++ b/tests/test_no_vaf_vcf.py @@ -0,0 +1,100 @@ +import annotate.io_formatter as formatter +import annotate.vcf_annotator as annotator +import pytest +import os +import filecmp +import unittest.mock + +''' +written test to check codebase integrity +of annotateVcf +''' + +class TestClass(): + configdir = os.path.join(os.path.dirname(os.path.realpath(__file__))) + test_dir = configdir + '/test_input_no_vaf_vcf/' + test_out = configdir + '/test_output_no_vaf_vcf/' + + options_vcf = {'vcf_file': test_dir + 'input.vcf.gz', + 'lof_genes': test_dir + 'lof_genes_v1.0.txt', + 'mutations': test_dir + 'driver_mutations_sorted.tsv.gz', + 'vcf_filters': test_dir + 'filters.json', + 'header_line': test_dir + 'info.header', + 'outdir': test_dir + "/tmpout", + 'keepTmp': False, + 'normal_panel': None + } + + + file_metadata = {'vcf_file': {'name': 'input', + 'ext': '.vcf.gz', + 'path': test_dir + 'input.vcf.gz' + }} + file_dict = {'input_status': {'mutations': True, 'lof_genes': True, + 'normal_panel': False, 'vcf_file': True}} + info_filter = "INFO/VC=\"stop_lost,start_lost,ess_splice,frameshift,nonsense\"" + format_filter = None + filters_filter = "FILTER=\"PASS\"" + #organoid check + muts_vcf = f"{test_out}/input_muts.vcf.gz" + lof_vcf = f"{test_out}/input_genes_lof.vcf.gz" + drv_vcf = f"{test_out}/input_drv.vcf.gz" + + my_formatter=formatter.IO_Formatter(**options_vcf) + outdir_path=my_formatter.format(['outdir']) + + def test_no_VAF_vcf_input(self): + # check input type function + f = self.my_formatter + file_metadata = self.file_metadata + assert file_metadata == f.format(['vcf_file']),'test_vcf_input test OK' + + def test_matched_file_input(self): + file_dict=self.file_dict + f = self.my_formatter + assert file_dict == f.format(['input_status']),'test_file_input test OK' + + + def test_celline_info_filter(self): + f = self.my_formatter + vcf_filters = f.format(['vcf_filters']) + vcf_filter_params = vcf_filters.get('vcf_filters', None) + assert self.info_filter == vcf_filter_params['INFO'],'test_INFO test OK' + + def test_celline_format_filter(self): + f = self.my_formatter + vcf_filters = f.format(['vcf_filters']) + vcf_filter_params = vcf_filters.get('vcf_filters', None) + assert self.format_filter == vcf_filter_params['FORMAT'],'test_FORMAT test OK' + + def test_celline_filter_filters(self): + f = self.my_formatter + vcf_filters = f.format(['vcf_filters']) + vcf_filter_params = vcf_filters.get('vcf_filters', None) + assert self.filters_filter == vcf_filter_params['FILTER'],'test_FILTER test OK' + + def chek_matched_outdir(slef): + self.test_dir + 'tmpout' == self.outdir_path + + def test_no_VAF_vcf_formatter(self): + f = self.my_formatter + outdir_path = self.outdir_path + with formatter.tempdir(outdir_path['outdir']) as base_dir: + obs_muts_vcf = f"{base_dir}/input_muts.vcf.gz" + obs_lof_vcf = f"{base_dir}/input_genes_lof.vcf.gz" + obs_drv_vcf = f"{base_dir}/input_drv.vcf.gz" + + annotator.VcfAnnotator(f, base_dir) + + exp_muts_vcf_sub = annotator.unheader_vcf(self.muts_vcf, base_dir+'/exp_muts.vcf') + obs_muts_vcf_sub = annotator.unheader_vcf(obs_muts_vcf, base_dir+'/obs_muts.vcf') + assert filecmp.cmp(exp_muts_vcf_sub, obs_muts_vcf_sub, shallow=True), 'muts records in vcf files are identical OK' + + exp_lof_vcf_sub = annotator.unheader_vcf(self.lof_vcf, base_dir+'/exp_genes.lof.vcf') + obs_lof_vcf_sub = annotator.unheader_vcf(obs_lof_vcf, base_dir+'/obs_genes.lof.vcf') + assert filecmp.cmp(exp_lof_vcf_sub, obs_lof_vcf_sub, shallow=True), 'lof records in vcf files are identical OK' + + exp_drv_vcf_sub = annotator.unheader_vcf(self.drv_vcf, base_dir+'/exp_drv.vcf') + obs_drv_vcf_sub = annotator.unheader_vcf(obs_drv_vcf, base_dir+'/obs_drv.vcf') + assert filecmp.cmp(exp_drv_vcf_sub, obs_drv_vcf_sub, shallow=True), 'final drv records in vcf files are identical OK' + diff --git a/tests/test_output/celline_drv.vcf.gz b/tests/test_output/celline_drv.vcf.gz index 002bc8f..0170d13 100644 Binary files a/tests/test_output/celline_drv.vcf.gz and b/tests/test_output/celline_drv.vcf.gz differ diff --git a/tests/test_output/celline_drv.vcf.gz.tbi b/tests/test_output/celline_drv.vcf.gz.tbi index 82d5272..f4b1f5c 100644 Binary files a/tests/test_output/celline_drv.vcf.gz.tbi and b/tests/test_output/celline_drv.vcf.gz.tbi differ diff --git a/tests/test_output/celline_genes_lof.vcf.gz b/tests/test_output/celline_genes_lof.vcf.gz index e7885dd..af01a45 100644 Binary files a/tests/test_output/celline_genes_lof.vcf.gz and b/tests/test_output/celline_genes_lof.vcf.gz differ diff --git a/tests/test_output/celline_genes_lof.vcf.gz.tbi b/tests/test_output/celline_genes_lof.vcf.gz.tbi index 61fcc47..113bd5a 100644 Binary files a/tests/test_output/celline_genes_lof.vcf.gz.tbi and b/tests/test_output/celline_genes_lof.vcf.gz.tbi differ diff --git a/tests/test_output/celline_muts.vcf.gz b/tests/test_output/celline_muts.vcf.gz index 660823a..2dd9f03 100644 Binary files a/tests/test_output/celline_muts.vcf.gz and b/tests/test_output/celline_muts.vcf.gz differ diff --git a/tests/test_output/celline_muts.vcf.gz.tbi b/tests/test_output/celline_muts.vcf.gz.tbi index ef37c95..18e19c7 100644 Binary files a/tests/test_output/celline_muts.vcf.gz.tbi and b/tests/test_output/celline_muts.vcf.gz.tbi differ diff --git a/tests/test_output/celline_np.vcf.gz b/tests/test_output/celline_np.vcf.gz new file mode 100644 index 0000000..bf27cb0 Binary files /dev/null and b/tests/test_output/celline_np.vcf.gz differ diff --git a/tests/test_output/celline_np.vcf.gz.tbi b/tests/test_output/celline_np.vcf.gz.tbi new file mode 100644 index 0000000..376077d Binary files /dev/null and b/tests/test_output/celline_np.vcf.gz.tbi differ diff --git a/tests/test_output/celline_np_filtered.vcf.gz b/tests/test_output/celline_np_filtered.vcf.gz new file mode 100644 index 0000000..c9623cc Binary files /dev/null and b/tests/test_output/celline_np_filtered.vcf.gz differ diff --git a/tests/test_output/celline_np_filtered.vcf.gz.tbi b/tests/test_output/celline_np_filtered.vcf.gz.tbi new file mode 100644 index 0000000..e1fff83 Binary files /dev/null and b/tests/test_output/celline_np_filtered.vcf.gz.tbi differ diff --git a/tests/test_output/input.drv.vcf.gz b/tests/test_output/input.drv.vcf.gz deleted file mode 100644 index e6089bd..0000000 Binary files a/tests/test_output/input.drv.vcf.gz and /dev/null differ diff --git a/tests/test_output/input.drv.vcf.gz.tbi b/tests/test_output/input.drv.vcf.gz.tbi deleted file mode 100644 index a4607b0..0000000 Binary files a/tests/test_output/input.drv.vcf.gz.tbi and /dev/null differ diff --git a/tests/test_output/input.genes.lof.vcf.gz b/tests/test_output/input.genes.lof.vcf.gz deleted file mode 100644 index 3a58a2e..0000000 Binary files a/tests/test_output/input.genes.lof.vcf.gz and /dev/null differ diff --git a/tests/test_output/input.genes.lof.vcf.gz.tbi b/tests/test_output/input.genes.lof.vcf.gz.tbi deleted file mode 100644 index 265ca37..0000000 Binary files a/tests/test_output/input.genes.lof.vcf.gz.tbi and /dev/null differ diff --git a/tests/test_output/input.muts.vcf.gz b/tests/test_output/input.muts.vcf.gz deleted file mode 100644 index f02b6db..0000000 Binary files a/tests/test_output/input.muts.vcf.gz and /dev/null differ diff --git a/tests/test_output/input.muts.vcf.gz.tbi b/tests/test_output/input.muts.vcf.gz.tbi deleted file mode 100644 index 873bd3d..0000000 Binary files a/tests/test_output/input.muts.vcf.gz.tbi and /dev/null differ diff --git a/tests/test_output/input_drv.vcf.gz b/tests/test_output/input_drv.vcf.gz new file mode 100644 index 0000000..9dccb4b Binary files /dev/null and b/tests/test_output/input_drv.vcf.gz differ diff --git a/tests/test_output/input_drv.vcf.gz.tbi b/tests/test_output/input_drv.vcf.gz.tbi new file mode 100644 index 0000000..c04143c Binary files /dev/null and b/tests/test_output/input_drv.vcf.gz.tbi differ diff --git a/tests/test_output/input_genes_lof.vcf.gz b/tests/test_output/input_genes_lof.vcf.gz new file mode 100644 index 0000000..bed1231 Binary files /dev/null and b/tests/test_output/input_genes_lof.vcf.gz differ diff --git a/tests/test_output/input_genes_lof.vcf.gz.tbi b/tests/test_output/input_genes_lof.vcf.gz.tbi new file mode 100644 index 0000000..d7bcf5b Binary files /dev/null and b/tests/test_output/input_genes_lof.vcf.gz.tbi differ diff --git a/tests/test_output/input_muts.vcf.gz b/tests/test_output/input_muts.vcf.gz new file mode 100644 index 0000000..ea1cc4e Binary files /dev/null and b/tests/test_output/input_muts.vcf.gz differ diff --git a/tests/test_output/input_muts.vcf.gz.tbi b/tests/test_output/input_muts.vcf.gz.tbi new file mode 100644 index 0000000..1d5e00c Binary files /dev/null and b/tests/test_output/input_muts.vcf.gz.tbi differ diff --git a/tests/test_output_lof/genome.tab.gz b/tests/test_output_lof/genome.tab.gz deleted file mode 100644 index 18d8d97..0000000 Binary files a/tests/test_output_lof/genome.tab.gz and /dev/null differ diff --git a/tests/test_output_lof/genome.tab.gz.tbi b/tests/test_output_lof/genome.tab.gz.tbi deleted file mode 100644 index 19d6b68..0000000 Binary files a/tests/test_output_lof/genome.tab.gz.tbi and /dev/null differ diff --git a/tests/test_output_lof/input_drv.vcf.gz b/tests/test_output_lof/input_drv.vcf.gz index ea8ae6c..fd52a5b 100644 Binary files a/tests/test_output_lof/input_drv.vcf.gz and b/tests/test_output_lof/input_drv.vcf.gz differ diff --git a/tests/test_output_lof/input_drv.vcf.gz.tbi b/tests/test_output_lof/input_drv.vcf.gz.tbi index da29bf0..eec9523 100644 Binary files a/tests/test_output_lof/input_drv.vcf.gz.tbi and b/tests/test_output_lof/input_drv.vcf.gz.tbi differ diff --git a/tests/test_output_lof/input_genes.vcf b/tests/test_output_lof/input_genes.vcf deleted file mode 100644 index 33af928..0000000 --- a/tests/test_output_lof/input_genes.vcf +++ /dev/null @@ -1,96 +0,0 @@ -##fileformat=VCFv4.1 -##FILTER= -##FILTER== 25 base quality"> -##FILTER= -##FILTER== 15 base quality found in the matched normal"> -##FILTER= -##FILTER= -##FILTER= -##FILTER= -##FILTER= -##FILTER= -##FILTER= -##FILTER== 3 mutant allele present in at least 1 percent unmatched normal samples in the unmatched VCF."> -##FILTER== 10 on each strand but mutant allele is only present on one strand"> -##FILTER= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##SAMPLE= -##SAMPLE= -##cavemanVersion=1.13.14 -##cgpAnalysisProc_20190930.1=5346618 -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##fileDate=20190924 -##reference=/lustre/scratch119/casm/team78pipelines/canpipe/live/data/analysis/2126_2577635/CaVEMan/genome.fa -##source_20190930.1=AnnotateVcf.pl -##vcfProcessLog=,InputVCFSource=,InputVCFParam=> -##vcfProcessLog_20190930.1=,InputVCFSource=,InputVCFVer=<1.8.8>,InputVCFParam=> -##vcfProcessLog_20190930.2=,InputVCFSource=,InputVCFVer=<3.5.0>,InputVCFParam=> -##INFO= -##bcftools_annotateVersion=1.10.2-64-gf66af73+htslib-1.10.2-85-g16f62c5 -##bcftools_annotateCommand=annotate -a /nfs/users/nfs_s/sb43/scripts/annotatevcf_github/tests/tmpout/tmp_jp6ts6o/genome.tab.gz -i '(FILTER="PASS") && (INFO/VC="stop_lost" || INFO/VC="start_lost" || INFO/VC="ess_splice" || INFO/VC="frameshift" || INFO/VC="nonsense")' -h /nfs/users/nfs_s/sb43/scripts/annotatevcf_github/tests/test_input_lof/info.header -c CHROM,FROM,TO,INFO/DRV /nfs/users/nfs_s/sb43/scripts/annotatevcf_github/tests/test_input_lof/input.vcf.gz; Date=Sat Aug 29 15:57:54 2020 -#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NORMAL TUMOUR -chr1 161193208 77c913be-df72-11e9-aae8-d600ec72acc9 C A . PASS DP=63;MP=1;GP=1.8e-07;TG=CC/AACCC;TP=0.89;SG=CC/AAACC;SP=0.091;ASRD=0.97;CLPM=0;ASMD=146;VT=Sub;VD=ADAMTS4|CCDS1223.1|r.2340+5g>u|c.1911+5G>T|p.?|protein_coding:CDS:extended_cis_splice_site:substitution:splice_site_variant:transcript_variant|SO:0000010:SO:0000316:SO:0001993:SO:1000002:SO:0001629:SO:0001576;VC=ess_splice;VW=ADAMTS4|CCDS1223.1|r.2340+5g>u|c.1911+5G>T|p.?|protein_coding:CDS:extended_cis_splice_site:substitution:splice_site_variant:transcript_variant|SO:0000010:SO:0000316:SO:0001993:SO:1000002:SO:0001629:SO:0001576;DRV=LoF GT:FAZ:FCZ:FGZ:FTZ:RAZ:RCZ:RGZ:RTZ:PM 0|0:0:12:0:0:0:15:0:0:0 0|1:9:12:0:0:5:9:1:0:0.39 -chr1 219210652 77cc542a-df72-11e9-aae8-d600ec72acc9 G A . PASS DP=91;MP=1;GP=3e-13;TG=GG/AAAAG;TP=0.64;SG=GG/AAAGG;SP=0.36;ASRD=0.92;CLPM=0;ASMD=139.5;VT=Sub;VD=LYPLAL1|CCDS1522.1|r.524+5g>a|c.477+5G>A|p.?|protein_coding:CDS:extended_cis_splice_site:substitution:splice_site_variant:transcript_variant|SO:0000010:SO:0000316:SO:0001993:SO:1000002:SO:0001629:SO:0001576;VC=ess_splice;VW=LYPLAL1|CCDS1522.1|r.524+5g>a|c.477+5G>A|p.?|protein_coding:CDS:extended_cis_splice_site:substitution:splice_site_variant:transcript_variant|SO:0000010:SO:0000316:SO:0001993:SO:1000002:SO:0001629:SO:0001576;DRV=LoF GT:FAZ:FCZ:FGZ:FTZ:RAZ:RCZ:RGZ:RTZ:PM 0|0:0:0:26:0:0:0:21:0:0 0|1:17:1:7:0:13:0:6:0:0.68 -chr1 247934046 77ce1eae-df72-11e9-aae8-d600ec72acc9 C A . PASS DP=76;MP=1;GP=1.5e-13;TG=CC/AACCC;TP=0.7;SG=CC/ACCCC;SP=0.28;ASRD=0.97;CLPM=0;ASMD=146;VT=Sub;VD=OR2AJ1|ENST00000318244|r.580c>a|c.278C>A|p.S93*|protein_coding:exon:CDS:substitution:codon_variant:stop_gained|SO:0000010:SO:0000147:SO:0000316:SO:1000002:SO:0001581:SO:0001587;VC=nonsense;VW=OR2AJ1|ENST00000318244|r.580c>a|c.278C>A|p.S93*|protein_coding:exon:CDS:substitution:codon_variant:stop_gained|SO:0000010:SO:0000147:SO:0000316:SO:1000002:SO:0001581:SO:0001587;DRV=LoF GT:FAZ:FCZ:FGZ:FTZ:RAZ:RCZ:RGZ:RTZ:PM 0|0:0:22:0:0:0:26:0:0:0 0|1:2:9:0:0:7:10:0:0:0.32 -chr10 113160663 77deb084-df72-11e9-aae8-d600ec72acc9 C T . PASS DP=72;MP=1;GP=1.1e-09;TG=CC/CCTTT;TP=0.54;SG=CC/CCCTT;SP=0.46;ASRD=0.97;CLPM=0;ASMD=146;VT=Sub;VD=TCF7L2|CCDS53577.1|r.1713c>u|c.1363C>T|p.R455*|protein_coding:exon:CDS:substitution:codon_variant:stop_gained|SO:0000010:SO:0000147:SO:0000316:SO:1000002:SO:0001581:SO:0001587;VC=nonsense;VW=TCF7L2|CCDS53577.1|r.1713c>u|c.1363C>T|p.R455*|protein_coding:exon:CDS:substitution:codon_variant:stop_gained|SO:0000010:SO:0000147:SO:0000316:SO:1000002:SO:0001581:SO:0001587;DRV=LoF GT:FAZ:FCZ:FGZ:FTZ:RAZ:RCZ:RGZ:RTZ:PM 0|0:0:17:0:0:1:17:0:0:0 0|1:0:13:0:8:0:6:0:10:0.49 -chr13 43888891 780b9c20-df72-11e9-aae8-d600ec72acc9 G T . PASS DP=89;MP=1;GP=1.9e-12;TG=GG/GGGGT;TP=1;SG=GG/GGGTT;SP=0.0017;ASRD=0.93;CLPM=0;ASMD=141;VT=Sub;VD=LACC1|CCDS9391.1|r.1527g>u|c.1042G>T|p.E348*|protein_coding:exon:CDS:substitution:codon_variant:stop_gained|SO:0000010:SO:0000147:SO:0000316:SO:1000002:SO:0001581:SO:0001587;VC=nonsense;VW=LACC1|CCDS9391.1|r.1527g>u|c.1042G>T|p.E348*|protein_coding:exon:CDS:substitution:codon_variant:stop_gained|SO:0000010:SO:0000147:SO:0000316:SO:1000002:SO:0001581:SO:0001587;DRV=LoF GT:FAZ:FCZ:FGZ:FTZ:RAZ:RCZ:RGZ:RTZ:PM 0|0:0:0:23:0:0:0:21:0:0 0|1:0:0:22:3:0:0:17:3:0.13 -chr15 44715437 781d409c-df72-11e9-aae8-d600ec72acc9 C T . PASS DP=77;MP=1;GP=6.4e-14;TG=CC/TTTTT;TP=0.96;SG=CC/CTTTT;SP=0.036;ASRD=0.97;CLPM=0;ASMD=146;VT=Sub;VD=B2M-TEST|CCDS10113.1|r.152c>u|c.82C>T|p.Q28*|protein_coding:exon:CDS:substitution:codon_variant:stop_gained|SO:0000010:SO:0000147:SO:0000316:SO:1000002:SO:0001581:SO:0001587;VC=nonsense;VW=B2M|CCDS10113.1|r.152c>u|c.82C>T|p.Q28*|protein_coding:exon:CDS:substitution:codon_variant:stop_gained|SO:0000010:SO:0000147:SO:0000316:SO:1000002:SO:0001581:SO:0001587;DRV=LoF GT:FAZ:FCZ:FGZ:FTZ:RAZ:RCZ:RGZ:RTZ:PM 0|0:0:33:0:0:0:16:1:0:0 1|1:0:0:0:7:0:1:0:19:0.96 -chr18 56686854 783e6100-df72-11e9-aae8-d600ec72acc9 G T . PASS DP=69;MP=1;GP=1.7e-13;TG=GG/GGTTT;TP=0.75;SG=GG/GTTTT;SP=0.13;ASRD=0.87;CLPM=0;ASMD=131;VT=Sub;VD=WDR7|CCDS11962.1|r.809-1g>u|c.598-1G>T|p.?|protein_coding:CDS:extended_cis_splice_site:substitution:splice_site_variant:transcript_variant|SO:0000010:SO:0000316:SO:0001993:SO:1000002:SO:0001629:SO:0001576;VC=ess_splice;VW=WDR7|CCDS11962.1|r.809-1g>u|c.598-1G>T|p.?|protein_coding:CDS:extended_cis_splice_site:substitution:splice_site_variant:transcript_variant|SO:0000010:SO:0000316:SO:0001993:SO:1000002:SO:0001629:SO:0001576;DRV=LoF GT:FAZ:FCZ:FGZ:FTZ:RAZ:RCZ:RGZ:RTZ:PM 0|0:0:0:29:0:0:0:19:0:0 0|1:0:0:6:7:1:0:2:5:0.57 -chr19 45821379 784421ee-df72-11e9-aae8-d600ec72acc9 C A . PASS DP=51;MP=1;GP=2.8e-06;TG=CC/AAACC;TP=0.85;SG=CC/AAAAC;SP=0.1;ASRD=0.93;CLPM=0;ASMD=141;VT=Sub;VD=SYMPK|CCDS12676.2|r.3138+5g>u|c.2893+5G>T|p.?|protein_coding:CDS:extended_cis_splice_site:substitution:splice_site_variant:transcript_variant|SO:0000010:SO:0000316:SO:0001993:SO:1000002:SO:0001629:SO:0001576;VC=ess_splice;VW=SYMPK|CCDS12676.2|r.3138+5g>u|c.2893+5G>T|p.?|protein_coding:CDS:extended_cis_splice_site:substitution:splice_site_variant:transcript_variant|SO:0000010:SO:0000316:SO:0001993:SO:1000002:SO:0001629:SO:0001576;DRV=LoF GT:FAZ:FCZ:FGZ:FTZ:RAZ:RCZ:RGZ:RTZ:PM 0|0:0:14:0:0:0:9:0:0:0 0|1:8:5:0:0:9:6:0:0:0.61 -chr2 1453776 78468510-df72-11e9-aae8-d600ec72acc9 C T . PASS DP=71;MP=1;GP=8.3e-12;TG=CC/CCCTT;TP=0.68;SG=CC/CCTTT;SP=0.31;ASRD=0.97;CLPM=0;ASMD=146;VT=Sub;VD=TPO|CCDS1643.1|r.656c>u|c.565C>T|p.R189*|protein_coding:exon:CDS:substitution:codon_variant:stop_gained|SO:0000010:SO:0000147:SO:0000316:SO:1000002:SO:0001581:SO:0001587;VC=nonsense;VW=TPO|CCDS1643.1|r.656c>u|c.565C>T|p.R189*|protein_coding:exon:CDS:substitution:codon_variant:stop_gained|SO:0000010:SO:0000147:SO:0000316:SO:1000002:SO:0001581:SO:0001587;DRV=LoF GT:FAZ:FCZ:FGZ:FTZ:RAZ:RCZ:RGZ:RTZ:PM 0|0:0:25:0:0:0:17:0:0:0 0|1:0:9:0:6:0:7:0:7:0.45 -chr2 96610725 785c6560-df72-11e9-aae8-d600ec72acc9 C T . PASS DP=81;MP=0.88;GP=2.5e-09;TG=CC/CCCCT;TP=0.88;SG=CC/CCCCC;SP=0.12;ASRD=0.93;CLPM=0;ASMD=141;VT=Sub;VD=KANSL3|CCDS46361.1|r.1396+1g>a|c.1319+1G>A|p.?|protein_coding:CDS:extended_cis_splice_site:substitution:splice_site_variant:transcript_variant|SO:0000010:SO:0000316:SO:0001993:SO:1000002:SO:0001629:SO:0001576;VC=ess_splice;VW=KANSL3|CCDS46361.1|r.1396+1g>a|c.1319+1G>A|p.?|protein_coding:CDS:extended_cis_splice_site:substitution:splice_site_variant:transcript_variant|SO:0000010:SO:0000316:SO:0001993:SO:1000002:SO:0001629:SO:0001576;DRV=LoF GT:FAZ:FCZ:FGZ:FTZ:RAZ:RCZ:RGZ:RTZ:PM 0|0:0:21:0:0:0:12:0:0:0 0|1:0:23:0:3:0:20:0:2:0.1 -chr2 165901790 78635988-df72-11e9-aae8-d600ec72acc9 G A . PASS DP=81;MP=1;GP=2.1e-12;TG=GG/AAAAG;TP=0.65;SG=GG/AAAGG;SP=0.35;ASRD=0.97;CLPM=0;ASMD=146;VT=Sub;VD=TTC21B|CCDS33315.1|r.2827c>u|c.2689C>T|p.Q897*|protein_coding:exon:CDS:substitution:codon_variant:stop_gained|SO:0000010:SO:0000147:SO:0000316:SO:1000002:SO:0001581:SO:0001587;VC=nonsense;VW=TTC21B|CCDS33315.1|r.2827c>u|c.2689C>T|p.Q897*|protein_coding:exon:CDS:substitution:codon_variant:stop_gained|SO:0000010:SO:0000147:SO:0000316:SO:1000002:SO:0001581:SO:0001587;DRV=LoF GT:FAZ:FCZ:FGZ:FTZ:RAZ:RCZ:RGZ:RTZ:PM 0|0:0:0:22:0:0:0:22:0:0 0|1:15:0:7:0:11:0:4:0:0.7 -chr2 178775881 7864149a-df72-11e9-aae8-d600ec72acc9 C A . PASS DP=67;MP=1;GP=4.5e-10;TG=CC/AAACC;TP=0.52;SG=CC/AACCC;SP=0.48;ASRD=0.93;CLPM=0;ASMD=141;VT=Sub;VD=TTN|CCDS59435.1|r.6208g>u|c.5983G>T|p.E1995*|protein_coding:exon:CDS:substitution:codon_variant:stop_gained|SO:0000010:SO:0000147:SO:0000316:SO:1000002:SO:0001581:SO:0001587;VC=nonsense;VW=TTN|CCDS59435.1|r.6208g>u|c.5983G>T|p.E1995*|protein_coding:exon:CDS:substitution:codon_variant:stop_gained|SO:0000010:SO:0000147:SO:0000316:SO:1000002:SO:0001581:SO:0001587;DRV=LoF GT:FAZ:FCZ:FGZ:FTZ:RAZ:RCZ:RGZ:RTZ:PM 0|0:0:18:0:0:0:18:0:0:0 0|1:10:6:0:0:5:10:0:0:0.48 -chr4 47645090 78b1b89e-df72-11e9-aae8-d600ec72acc9 T A . PASS DP=71;MP=1;GP=1.6e-09;TG=TT/ATTTT;TP=0.99;SG=TT/AATTT;SP=0.014;ASRD=0.97;CLPM=0;ASMD=146;VT=Sub;VD=CORIN|CCDS3477.1|r.1948a>u|c.1948A>T|p.K650*|protein_coding:exon:CDS:substitution:codon_variant:stop_gained|SO:0000010:SO:0000147:SO:0000316:SO:1000002:SO:0001581:SO:0001587;VC=nonsense;VW=CORIN|CCDS3477.1|r.1948a>u|c.1948A>T|p.K650*|protein_coding:exon:CDS:substitution:codon_variant:stop_gained|SO:0000010:SO:0000147:SO:0000316:SO:1000002:SO:0001581:SO:0001587;DRV=LoF GT:FAZ:FCZ:FGZ:FTZ:RAZ:RCZ:RGZ:RTZ:PM 0|0:0:0:0:18:0:0:0:16:0 0|1:3:0:0:19:3:0:0:12:0.16 -chr5 17634694 78c6c4be-df72-11e9-aae8-d600ec72acc9 C T . PASS DP=85;MP=1;GP=2.3e-12;TG=CC/CCCCT;TP=0.61;SG=CC/CCCTT;SP=0.39;ASRD=0.97;CLPM=0;ASMD=146;VT=Sub;VD=AC233724.12|ENST00000512227|r.235c>u|c.235C>T|p.Q79*|protein_coding:exon:CDS:substitution:codon_variant:stop_gained|SO:0000010:SO:0000147:SO:0000316:SO:1000002:SO:0001581:SO:0001587;VC=nonsense;VW=AC233724.12|ENST00000512227|r.235c>u|c.235C>T|p.Q79*|protein_coding:exon:CDS:substitution:codon_variant:stop_gained|SO:0000010:SO:0000147:SO:0000316:SO:1000002:SO:0001581:SO:0001587;DRV=LoF GT:FAZ:FCZ:FGZ:FTZ:RAZ:RCZ:RGZ:RTZ:PM 0|0:0:23:0:0:0:21:0:0:0 0|1:0:10:0:7:0:20:0:4:0.27 -chr5 112815507 78d23984-df72-11e9-aae8-d600ec72acc9 C T . PASS DP=74;MP=1;GP=3.7e-15;TG=CC/TTTTT;TP=0.87;SG=CC/CTTTT;SP=0.13;ASRD=0.97;CLPM=0;ASMD=146;VT=Sub;VD=APC-TEST|CCDS4107.1|r.903c>u|c.847C>T|p.R283*|protein_coding:exon:CDS:substitution:codon_variant:stop_gained|SO:0000010:SO:0000147:SO:0000316:SO:1000002:SO:0001581:SO:0001587;VC=nonsense;VW=APC|CCDS4107.1|r.903c>u|c.847C>T|p.R283*|protein_coding:exon:CDS:substitution:codon_variant:stop_gained|SO:0000010:SO:0000147:SO:0000316:SO:1000002:SO:0001581:SO:0001587;DRV=LoF GT:FAZ:FCZ:FGZ:FTZ:RAZ:RCZ:RGZ:RTZ:PM 0|0:0:28:0:0:0:25:0:0:0 1|1:0:0:0:11:0:1:0:9:0.95 -chrX 135545510 7928ae68-df72-11e9-aae8-d600ec72acc9 A T . PASS DP=84;MP=1;GP=2.7e-11;TG=AA/AAAAT;TP=1;SG=AA/AAATT;SP=0.0022;ASRD=0.95;CLPM=0;ASMD=143.5;VT=Sub;VD=INTS6L|CCDS35401.1|r.611a>u|c.277A>T|p.R93*|protein_coding:exon:CDS:substitution:codon_variant:stop_gained|SO:0000010:SO:0000147:SO:0000316:SO:1000002:SO:0001581:SO:0001587;VC=nonsense;VW=INTS6L|CCDS35401.1|r.611a>u|c.277A>T|p.R93*|protein_coding:exon:CDS:substitution:codon_variant:stop_gained|SO:0000010:SO:0000147:SO:0000316:SO:1000002:SO:0001581:SO:0001587;DRV=LoF GT:FAZ:FCZ:FGZ:FTZ:RAZ:RCZ:RGZ:RTZ:PM 0|0:18:0:0:0:22:0:0:0:0 0|1:18:0:0:3:20:0:0:3:0.14 diff --git a/tests/test_output_lof/input_genes_lof.vcf.gz b/tests/test_output_lof/input_genes_lof.vcf.gz index 0b535f1..a6b37d2 100644 Binary files a/tests/test_output_lof/input_genes_lof.vcf.gz and b/tests/test_output_lof/input_genes_lof.vcf.gz differ diff --git a/tests/test_output_lof/input_genes_lof.vcf.gz.tbi b/tests/test_output_lof/input_genes_lof.vcf.gz.tbi index a3593bd..983ac03 100644 Binary files a/tests/test_output_lof/input_genes_lof.vcf.gz.tbi and b/tests/test_output_lof/input_genes_lof.vcf.gz.tbi differ diff --git a/tests/test_output_lof/input_muts.vcf.gz b/tests/test_output_lof/input_muts.vcf.gz index 70bcc98..a54e813 100644 Binary files a/tests/test_output_lof/input_muts.vcf.gz and b/tests/test_output_lof/input_muts.vcf.gz differ diff --git a/tests/test_output_lof/input_muts.vcf.gz.tbi b/tests/test_output_lof/input_muts.vcf.gz.tbi index e2145b9..c084986 100644 Binary files a/tests/test_output_lof/input_muts.vcf.gz.tbi and b/tests/test_output_lof/input_muts.vcf.gz.tbi differ diff --git a/tests/test_output_no_vaf_vcf/input_drv.vcf.gz b/tests/test_output_no_vaf_vcf/input_drv.vcf.gz new file mode 100644 index 0000000..4d83284 Binary files /dev/null and b/tests/test_output_no_vaf_vcf/input_drv.vcf.gz differ diff --git a/tests/test_output_no_vaf_vcf/input_drv.vcf.gz.tbi b/tests/test_output_no_vaf_vcf/input_drv.vcf.gz.tbi new file mode 100644 index 0000000..a576936 Binary files /dev/null and b/tests/test_output_no_vaf_vcf/input_drv.vcf.gz.tbi differ diff --git a/tests/test_output_no_vaf_vcf/input_genes.vcf b/tests/test_output_no_vaf_vcf/input_genes.vcf new file mode 100644 index 0000000..11b5f03 --- /dev/null +++ b/tests/test_output_no_vaf_vcf/input_genes.vcf @@ -0,0 +1,97 @@ +##fileformat=VCFv4.1 +##FILTER= +##FILTER== 25 base quality"> +##FILTER= +##FILTER== 15 base quality found in the matched normal"> +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER== 3 mutant allele present in at least 1 percent unmatched normal samples in the unmatched VCF."> +##FILTER== 10 on each strand but mutant allele is only present on one strand"> +##FILTER= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##SAMPLE= +##SAMPLE= +##cavemanVersion=1.14.1 +##cgpAnalysisProc_20200116.1=5524663 +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##fileDate=20200115 +##reference=/lustre/scratch119/casm/team78pipelines/canpipe/live/data/analysis/2126_2577635/CaVEMan/genome.fa +##source_20200119.1=AnnotateVcf.pl +##vcfProcessLog=,InputVCFSource=,InputVCFParam=> +##vcfProcessLog_20200116.1=,InputVCFSource=,InputVCFVer=<1.8.9>,InputVCFParam=> +##vcfProcessLog_20200119.1=,InputVCFSource=,InputVCFVer=<3.5.2>,InputVCFParam=> +##bcftools_viewVersion=1.12+htslib-1.12 +##bcftools_viewCommand=view -R lof_pos.txt /lustre/scratch117/casm/team215/sb43/organoid_analysis/WGS/p2126/out_vafcorrect//WTSI-COLO_005_1pre.caveman_c.annot.vcf.gz; Date=Tue Mar 30 14:23:07 2021 +##INFO= +##bcftools_annotateVersion=1.12+htslib-1.12 +##bcftools_annotateCommand=annotate -a test_output/tmpdtvu0kq9/genome.tab.gz -i '(FILTER="PASS" ) && ( INFO/VC="stop_lost,start_lost,ess_splice,frameshift,nonsense")' -h /nfs/users/nfs_s/sb43/scripts/annotatevcf_github/annotate/config/info.header -c CHROM,FROM,TO,INFO/DRV input.vcf.gz; Date=Tue Mar 30 16:55:58 2021 +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NORMAL TUMOUR +chr1 161193208 3883f734-37d6-11ea-a6a1-4d80907df7c2 C A . PASS DP=63;MP=1;GP=1.5e-07;TG=CC/ACC;TP=0.99;SG=CC/AAC;SP=0.014;ASRD=0.97;CLPM=0;ASMD=146;VT=Sub;VD=ADAMTS4|CCDS1223.1|r.2340+5g>u|c.1911+5G>T|p.?|protein_coding:CDS:extended_cis_splice_site:substitution:splice_site_variant:transcript_variant|SO:0000010:SO:0000316:SO:0001993:SO:1000002:SO:0001629:SO:0001576;VC=ess_splice;VW=ADAMTS4|CCDS1223.1|r.2340+5g>u|c.1911+5G>T|p.?|protein_coding:CDS:extended_cis_splice_site:substitution:splice_site_variant:transcript_variant|SO:0000010:SO:0000316:SO:0001993:SO:1000002:SO:0001629:SO:0001576;DRV=LoF GT:FAZ:FCZ:FGZ:FTZ:RAZ:RCZ:RGZ:RTZ:PM 0|0:0:12:0:0:0:15:0:0:0 0|1:9:12:0:0:5:9:1:0:0.39 +chr1 219210652 38868fa8-37d6-11ea-a6a1-4d80907df7c2 G A . PASS DP=91;MP=1;GP=2.6e-13;TG=GG/AAG;TP=1;SG=GG/AGG;SP=3.4e-06;ASRD=0.92;CLPM=0;ASMD=139.5;VT=Sub;VD=LYPLAL1|CCDS1522.1|r.524+5g>a|c.477+5G>A|p.?|protein_coding:CDS:extended_cis_splice_site:substitution:splice_site_variant:transcript_variant|SO:0000010:SO:0000316:SO:0001993:SO:1000002:SO:0001629:SO:0001576;VC=ess_splice;VW=LYPLAL1|CCDS1522.1|r.524+5g>a|c.477+5G>A|p.?|protein_coding:CDS:extended_cis_splice_site:substitution:splice_site_variant:transcript_variant|SO:0000010:SO:0000316:SO:0001993:SO:1000002:SO:0001629:SO:0001576;DRV=LoF GT:FAZ:FCZ:FGZ:FTZ:RAZ:RCZ:RGZ:RTZ:PM 0|0:0:0:26:0:0:0:21:0:0 0|1:17:1:7:0:13:0:6:0:0.68 +chr1 247934046 3887f190-37d6-11ea-a6a1-4d80907df7c2 C A . PASS DP=76;MP=1;GP=1.3e-13;TG=CC/ACC;TP=1;SG=CC/AAC;SP=0.0014;ASRD=0.97;CLPM=0;ASMD=146;VT=Sub;VD=OR2AJ1|ENST00000318244|r.580c>a|c.278C>A|p.S93*|protein_coding:exon:CDS:substitution:codon_variant:stop_gained|SO:0000010:SO:0000147:SO:0000316:SO:1000002:SO:0001581:SO:0001587;VC=nonsense;VW=OR2AJ1|ENST00000318244|r.580c>a|c.278C>A|p.S93*|protein_coding:exon:CDS:substitution:codon_variant:stop_gained|SO:0000010:SO:0000147:SO:0000316:SO:1000002:SO:0001581:SO:0001587;DRV=LoF GT:FAZ:FCZ:FGZ:FTZ:RAZ:RCZ:RGZ:RTZ:PM 0|0:0:22:0:0:0:26:0:0:0 0|1:2:9:0:0:7:10:0:0:0.32 +chr10 113160663 38932966-37d6-11ea-a6a1-4d80907df7c2 C T . PASS DP=72;MP=1;GP=9.3e-10;TG=CC/CCT;TP=0.52;SG=CC/CTT;SP=0.48;ASRD=0.97;CLPM=0;ASMD=146;VT=Sub;VD=TCF7L2|CCDS53577.1|r.1713c>u|c.1363C>T|p.R455*|protein_coding:exon:CDS:substitution:codon_variant:stop_gained|SO:0000010:SO:0000147:SO:0000316:SO:1000002:SO:0001581:SO:0001587;VC=nonsense;VW=TCF7L2|CCDS53577.1|r.1713c>u|c.1363C>T|p.R455*|protein_coding:exon:CDS:substitution:codon_variant:stop_gained|SO:0000010:SO:0000147:SO:0000316:SO:1000002:SO:0001581:SO:0001587;DRV=LoF GT:FAZ:FCZ:FGZ:FTZ:RAZ:RCZ:RGZ:RTZ:PM 0|0:0:17:0:0:1:17:0:0:0 0|1:0:13:0:8:0:6:0:10:0.49 +chr13 43888891 38b522aa-37d6-11ea-a6a1-4d80907df7c2 G T . PASS DP=89;MP=1;GP=2e-12;TG=GG/GGGT;TP=1;SG=GG/GGTT;SP=1.6e-05;ASRD=0.93;CLPM=0;ASMD=141;VT=Sub;VD=LACC1|CCDS9391.1|r.1527g>u|c.1042G>T|p.E348*|protein_coding:exon:CDS:substitution:codon_variant:stop_gained|SO:0000010:SO:0000147:SO:0000316:SO:1000002:SO:0001581:SO:0001587;VC=nonsense;VW=LACC1|CCDS9391.1|r.1527g>u|c.1042G>T|p.E348*|protein_coding:exon:CDS:substitution:codon_variant:stop_gained|SO:0000010:SO:0000147:SO:0000316:SO:1000002:SO:0001581:SO:0001587;DRV=LoF GT:FAZ:FCZ:FGZ:FTZ:RAZ:RCZ:RGZ:RTZ:PM 0|0:0:0:23:0:0:0:21:0:0 0|1:0:0:22:3:0:0:17:3:0.13 +chr15 44715437 38c39ba0-37d6-11ea-a6a1-4d80907df7c2 C T . PASS DP=77;MP=1;GP=6.8e-14;TG=CC/TT;TP=1;SG=CC/CT;SP=6.5e-06;ASRD=0.97;CLPM=0;ASMD=146;VT=Sub;VD=B2M|CCDS10113.1|r.152c>u|c.82C>T|p.Q28*|protein_coding:exon:CDS:substitution:codon_variant:stop_gained|SO:0000010:SO:0000147:SO:0000316:SO:1000002:SO:0001581:SO:0001587;VC=nonsense;VW=B2M|CCDS10113.1|r.152c>u|c.82C>T|p.Q28*|protein_coding:exon:CDS:substitution:codon_variant:stop_gained|SO:0000010:SO:0000147:SO:0000316:SO:1000002:SO:0001581:SO:0001587;DRV=LoF GT:FAZ:FCZ:FGZ:FTZ:RAZ:RCZ:RGZ:RTZ:PM 0|0:0:33:0:0:0:16:1:0:0 1|1:0:0:0:7:0:1:0:19:0.96 +chr18 56686854 38d994dc-37d6-11ea-a6a1-4d80907df7c2 G T . PASS DP=69;MP=1;GP=1.2e-13;TG=GG/GT;TP=1;SG=GT/GT;SP=1.2e-13;ASRD=0.87;CLPM=0;ASMD=131;VT=Sub;VD=WDR7|CCDS11962.1|r.809-1g>u|c.598-1G>T|p.?|protein_coding:CDS:extended_cis_splice_site:substitution:splice_site_variant:transcript_variant|SO:0000010:SO:0000316:SO:0001993:SO:1000002:SO:0001629:SO:0001576;VC=ess_splice;VW=WDR7|CCDS11962.1|r.809-1g>u|c.598-1G>T|p.?|protein_coding:CDS:extended_cis_splice_site:substitution:splice_site_variant:transcript_variant|SO:0000010:SO:0000316:SO:0001993:SO:1000002:SO:0001629:SO:0001576;DRV=LoF GT:FAZ:FCZ:FGZ:FTZ:RAZ:RCZ:RGZ:RTZ:PM 0|0:0:0:29:0:0:0:19:0:0 0|1:0:0:6:7:1:0:2:5:0.57 +chr19 45821379 38ddcfca-37d6-11ea-a6a1-4d80907df7c2 C A . PASS DP=51;MP=1;GP=2.3e-06;TG=CC/AAC;TP=0.99;SG=CC/ACC;SP=0.01;ASRD=0.93;CLPM=0;ASMD=141;VT=Sub;VD=SYMPK|CCDS12676.2|r.3138+5g>u|c.2893+5G>T|p.?|protein_coding:CDS:extended_cis_splice_site:substitution:splice_site_variant:transcript_variant|SO:0000010:SO:0000316:SO:0001993:SO:1000002:SO:0001629:SO:0001576;VC=ess_splice;VW=SYMPK|CCDS12676.2|r.3138+5g>u|c.2893+5G>T|p.?|protein_coding:CDS:extended_cis_splice_site:substitution:splice_site_variant:transcript_variant|SO:0000010:SO:0000316:SO:0001993:SO:1000002:SO:0001629:SO:0001576;DRV=LoF GT:FAZ:FCZ:FGZ:FTZ:RAZ:RCZ:RGZ:RTZ:PM 0|0:0:14:0:0:0:9:0:0:0 0|1:8:5:0:0:9:6:0:0:0.61 +chr2 1453776 38df3004-37d6-11ea-a6a1-4d80907df7c2 C T . PASS DP=71;MP=1;GP=7.7e-12;TG=CC/CCTT;TP=0.94;SG=CC/CCCT;SP=0.056;ASRD=0.97;CLPM=0;ASMD=146;VT=Sub;VD=TPO|CCDS1643.1|r.656c>u|c.565C>T|p.R189*|protein_coding:exon:CDS:substitution:codon_variant:stop_gained|SO:0000010:SO:0000147:SO:0000316:SO:1000002:SO:0001581:SO:0001587;VC=nonsense;VW=TPO|CCDS1643.1|r.656c>u|c.565C>T|p.R189*|protein_coding:exon:CDS:substitution:codon_variant:stop_gained|SO:0000010:SO:0000147:SO:0000316:SO:1000002:SO:0001581:SO:0001587;DRV=LoF GT:FAZ:FCZ:FGZ:FTZ:RAZ:RCZ:RGZ:RTZ:PM 0|0:0:25:0:0:0:17:0:0:0 0|1:0:9:0:6:0:7:0:7:0.45 +chr2 165901790 38f5dd86-37d6-11ea-a6a1-4d80907df7c2 G A . PASS DP=81;MP=1;GP=1.8e-12;TG=GG/AAG;TP=1;SG=GG/AGG;SP=1.6e-05;ASRD=0.97;CLPM=0;ASMD=146;VT=Sub;VD=TTC21B|CCDS33315.1|r.2827c>u|c.2689C>T|p.Q897*|protein_coding:exon:CDS:substitution:codon_variant:stop_gained|SO:0000010:SO:0000147:SO:0000316:SO:1000002:SO:0001581:SO:0001587;VC=nonsense;VW=TTC21B|CCDS33315.1|r.2827c>u|c.2689C>T|p.Q897*|protein_coding:exon:CDS:substitution:codon_variant:stop_gained|SO:0000010:SO:0000147:SO:0000316:SO:1000002:SO:0001581:SO:0001587;DRV=LoF GT:FAZ:FCZ:FGZ:FTZ:RAZ:RCZ:RGZ:RTZ:PM 0|0:0:0:22:0:0:0:22:0:0 0|1:15:0:7:0:11:0:4:0:0.7 +chr2 178775881 38f67ec6-37d6-11ea-a6a1-4d80907df7c2 C A . PASS DP=67;MP=1;GP=4.2e-10;TG=CC/AACC;TP=0.98;SG=CC/ACCC;SP=0.013;ASRD=0.93;CLPM=0;ASMD=141;VT=Sub;VD=TTN|CCDS59435.1|r.6208g>u|c.5983G>T|p.E1995*|protein_coding:exon:CDS:substitution:codon_variant:stop_gained|SO:0000010:SO:0000147:SO:0000316:SO:1000002:SO:0001581:SO:0001587;VC=nonsense;VW=TTN|CCDS59435.1|r.6208g>u|c.5983G>T|p.E1995*|protein_coding:exon:CDS:substitution:codon_variant:stop_gained|SO:0000010:SO:0000147:SO:0000316:SO:1000002:SO:0001581:SO:0001587;DRV=LoF GT:FAZ:FCZ:FGZ:FTZ:RAZ:RCZ:RGZ:RTZ:PM 0|0:0:18:0:0:0:18:0:0:0 0|1:10:6:0:0:5:10:0:0:0.48 +chr4 47645090 392fb844-37d6-11ea-a6a1-4d80907df7c2 T A . PASS DP=71;MP=1;GP=1.4e-09;TG=TT/ATT;TP=1;SG=TT/TTT;SP=1.3e-05;ASRD=0.97;CLPM=0;ASMD=146;VT=Sub;VD=CORIN|CCDS3477.1|r.1948a>u|c.1948A>T|p.K650*|protein_coding:exon:CDS:substitution:codon_variant:stop_gained|SO:0000010:SO:0000147:SO:0000316:SO:1000002:SO:0001581:SO:0001587;VC=nonsense;VW=CORIN|CCDS3477.1|r.1948a>u|c.1948A>T|p.K650*|protein_coding:exon:CDS:substitution:codon_variant:stop_gained|SO:0000010:SO:0000147:SO:0000316:SO:1000002:SO:0001581:SO:0001587;DRV=LoF GT:FAZ:FCZ:FGZ:FTZ:RAZ:RCZ:RGZ:RTZ:PM 0|0:0:0:0:18:0:0:0:16:0 0|1:3:0:0:19:3:0:0:12:0.16 +chr5 17634694 393ed4e6-37d6-11ea-a6a1-4d80907df7c2 C T . PASS DP=85;MP=1;GP=1.9e-12;TG=CC/CCT;TP=1;SG=CC/CTT;SP=4.3e-06;ASRD=0.97;CLPM=0;ASMD=146;VT=Sub;VD=AC233724.12|ENST00000512227|r.235c>u|c.235C>T|p.Q79*|protein_coding:exon:CDS:substitution:codon_variant:stop_gained|SO:0000010:SO:0000147:SO:0000316:SO:1000002:SO:0001581:SO:0001587;VC=nonsense;VW=AC233724.12|ENST00000512227|r.235c>u|c.235C>T|p.Q79*|protein_coding:exon:CDS:substitution:codon_variant:stop_gained|SO:0000010:SO:0000147:SO:0000316:SO:1000002:SO:0001581:SO:0001587;DRV=LoF GT:FAZ:FCZ:FGZ:FTZ:RAZ:RCZ:RGZ:RTZ:PM 0|0:0:23:0:0:0:21:0:0:0 0|1:0:10:0:7:0:20:0:4:0.27 +chr5 112815507 3948d8a6-37d6-11ea-a6a1-4d80907df7c2 C T . PASS DP=74;MP=1;GP=3.8e-15;TG=CC/TT;TP=1;SG=CC/CT;SP=0.00055;ASRD=0.97;CLPM=0;ASMD=146;VT=Sub;VD=APC|CCDS4107.1|r.903c>u|c.847C>T|p.R283*|protein_coding:exon:CDS:substitution:codon_variant:stop_gained|SO:0000010:SO:0000147:SO:0000316:SO:1000002:SO:0001581:SO:0001587;VC=nonsense;VW=APC|CCDS4107.1|r.903c>u|c.847C>T|p.R283*|protein_coding:exon:CDS:substitution:codon_variant:stop_gained|SO:0000010:SO:0000147:SO:0000316:SO:1000002:SO:0001581:SO:0001587;DRV=LoF GT:FAZ:FCZ:FGZ:FTZ:RAZ:RCZ:RGZ:RTZ:PM 0|0:0:28:0:0:0:25:0:0:0 1|1:0:0:0:11:0:1:0:9:0.95 +chrX 135545510 398ea07a-37d6-11ea-a6a1-4d80907df7c2 A T . PASS DP=84;MP=1;GP=2.8e-11;TG=AA/AAAT;TP=1;SG=AA/AATT;SP=2.3e-05;ASRD=0.95;CLPM=0;ASMD=143.5;VT=Sub;VD=INTS6L|CCDS35401.1|r.611a>u|c.277A>T|p.R93*|protein_coding:exon:CDS:substitution:codon_variant:stop_gained|SO:0000010:SO:0000147:SO:0000316:SO:1000002:SO:0001581:SO:0001587;VC=nonsense;VW=INTS6L|CCDS35401.1|r.611a>u|c.277A>T|p.R93*|protein_coding:exon:CDS:substitution:codon_variant:stop_gained|SO:0000010:SO:0000147:SO:0000316:SO:1000002:SO:0001581:SO:0001587;DRV=LoF GT:FAZ:FCZ:FGZ:FTZ:RAZ:RCZ:RGZ:RTZ:PM 0|0:18:0:0:0:22:0:0:0:0 0|1:18:0:0:3:20:0:0:3:0.14 diff --git a/tests/test_output_lof/input_genes_lof.vcf b/tests/test_output_no_vaf_vcf/input_genes_lof.vcf similarity index 69% rename from tests/test_output_lof/input_genes_lof.vcf rename to tests/test_output_no_vaf_vcf/input_genes_lof.vcf index 52fe81d..ea73295 100644 --- a/tests/test_output_lof/input_genes_lof.vcf +++ b/tests/test_output_no_vaf_vcf/input_genes_lof.vcf @@ -41,8 +41,8 @@ ##INFO= ##SAMPLE= ##SAMPLE= -##cavemanVersion=1.13.14 -##cgpAnalysisProc_20190930.1=5346618 +##cavemanVersion=1.14.1 +##cgpAnalysisProc_20200116.1=5524663 ##contig= ##contig= ##contig= @@ -68,15 +68,17 @@ ##contig= ##contig= ##contig= -##fileDate=20190924 +##fileDate=20200115 ##reference=/lustre/scratch119/casm/team78pipelines/canpipe/live/data/analysis/2126_2577635/CaVEMan/genome.fa -##source_20190930.1=AnnotateVcf.pl -##vcfProcessLog=,InputVCFSource=,InputVCFParam=> -##vcfProcessLog_20190930.1=,InputVCFSource=,InputVCFVer=<1.8.8>,InputVCFParam=> -##vcfProcessLog_20190930.2=,InputVCFSource=,InputVCFVer=<3.5.0>,InputVCFParam=> +##source_20200119.1=AnnotateVcf.pl +##vcfProcessLog=,InputVCFSource=,InputVCFParam=> +##vcfProcessLog_20200116.1=,InputVCFSource=,InputVCFVer=<1.8.9>,InputVCFParam=> +##vcfProcessLog_20200119.1=,InputVCFSource=,InputVCFVer=<3.5.2>,InputVCFParam=> +##bcftools_viewVersion=1.12+htslib-1.12 +##bcftools_viewCommand=view -R lof_pos.txt /lustre/scratch117/casm/team215/sb43/organoid_analysis/WGS/p2126/out_vafcorrect//WTSI-COLO_005_1pre.caveman_c.annot.vcf.gz; Date=Tue Mar 30 14:23:07 2021 ##INFO= -##bcftools_annotateVersion=1.10.2-64-gf66af73+htslib-1.10.2-85-g16f62c5 -##bcftools_annotateCommand=annotate -a /nfs/users/nfs_s/sb43/scripts/annotatevcf_github/tests/tmpout/tmp_jp6ts6o/genome.tab.gz -i '(FILTER="PASS") && (INFO/VC="stop_lost" || INFO/VC="start_lost" || INFO/VC="ess_splice" || INFO/VC="frameshift" || INFO/VC="nonsense")' -h /nfs/users/nfs_s/sb43/scripts/annotatevcf_github/tests/test_input_lof/info.header -c CHROM,FROM,TO,INFO/DRV /nfs/users/nfs_s/sb43/scripts/annotatevcf_github/tests/test_input_lof/input.vcf.gz; Date=Sat Aug 29 15:57:54 2020 +##bcftools_annotateVersion=1.12+htslib-1.12 +##bcftools_annotateCommand=annotate -a test_output/tmpdtvu0kq9/genome.tab.gz -i '(FILTER="PASS" ) && ( INFO/VC="stop_lost,start_lost,ess_splice,frameshift,nonsense")' -h /nfs/users/nfs_s/sb43/scripts/annotatevcf_github/annotate/config/info.header -c CHROM,FROM,TO,INFO/DRV input.vcf.gz; Date=Tue Mar 30 16:55:58 2021 #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NORMAL TUMOUR -chr15 44715437 781d409c-df72-11e9-aae8-d600ec72acc9 C T . PASS DP=77;MP=1;GP=6.4e-14;TG=CC/TTTTT;TP=0.96;SG=CC/CTTTT;SP=0.036;ASRD=0.97;CLPM=0;ASMD=146;VT=Sub;VD=B2M-TEST|CCDS10113.1|r.152c>u|c.82C>T|p.Q28*|protein_coding:exon:CDS:substitution:codon_variant:stop_gained|SO:0000010:SO:0000147:SO:0000316:SO:1000002:SO:0001581:SO:0001587;VC=nonsense;VW=B2M|CCDS10113.1|r.152c>u|c.82C>T|p.Q28*|protein_coding:exon:CDS:substitution:codon_variant:stop_gained|SO:0000010:SO:0000147:SO:0000316:SO:1000002:SO:0001581:SO:0001587;DRV=LoF GT:FAZ:FCZ:FGZ:FTZ:RAZ:RCZ:RGZ:RTZ:PM 0|0:0:33:0:0:0:16:1:0:0 1|1:0:0:0:7:0:1:0:19:0.96 -chr5 112815507 78d23984-df72-11e9-aae8-d600ec72acc9 C T . PASS DP=74;MP=1;GP=3.7e-15;TG=CC/TTTTT;TP=0.87;SG=CC/CTTTT;SP=0.13;ASRD=0.97;CLPM=0;ASMD=146;VT=Sub;VD=APC-TEST|CCDS4107.1|r.903c>u|c.847C>T|p.R283*|protein_coding:exon:CDS:substitution:codon_variant:stop_gained|SO:0000010:SO:0000147:SO:0000316:SO:1000002:SO:0001581:SO:0001587;VC=nonsense;VW=APC|CCDS4107.1|r.903c>u|c.847C>T|p.R283*|protein_coding:exon:CDS:substitution:codon_variant:stop_gained|SO:0000010:SO:0000147:SO:0000316:SO:1000002:SO:0001581:SO:0001587;DRV=LoF GT:FAZ:FCZ:FGZ:FTZ:RAZ:RCZ:RGZ:RTZ:PM 0|0:0:28:0:0:0:25:0:0:0 1|1:0:0:0:11:0:1:0:9:0.95 +chr15 44715437 38c39ba0-37d6-11ea-a6a1-4d80907df7c2 C T . PASS DP=77;MP=1;GP=6.8e-14;TG=CC/TT;TP=1;SG=CC/CT;SP=6.5e-06;ASRD=0.97;CLPM=0;ASMD=146;VT=Sub;VD=B2M|CCDS10113.1|r.152c>u|c.82C>T|p.Q28*|protein_coding:exon:CDS:substitution:codon_variant:stop_gained|SO:0000010:SO:0000147:SO:0000316:SO:1000002:SO:0001581:SO:0001587;VC=nonsense;VW=B2M|CCDS10113.1|r.152c>u|c.82C>T|p.Q28*|protein_coding:exon:CDS:substitution:codon_variant:stop_gained|SO:0000010:SO:0000147:SO:0000316:SO:1000002:SO:0001581:SO:0001587;DRV=LoF GT:FAZ:FCZ:FGZ:FTZ:RAZ:RCZ:RGZ:RTZ:PM 0|0:0:33:0:0:0:16:1:0:0 1|1:0:0:0:7:0:1:0:19:0.96 +chr5 112815507 3948d8a6-37d6-11ea-a6a1-4d80907df7c2 C T . PASS DP=74;MP=1;GP=3.8e-15;TG=CC/TT;TP=1;SG=CC/CT;SP=0.00055;ASRD=0.97;CLPM=0;ASMD=146;VT=Sub;VD=APC|CCDS4107.1|r.903c>u|c.847C>T|p.R283*|protein_coding:exon:CDS:substitution:codon_variant:stop_gained|SO:0000010:SO:0000147:SO:0000316:SO:1000002:SO:0001581:SO:0001587;VC=nonsense;VW=APC|CCDS4107.1|r.903c>u|c.847C>T|p.R283*|protein_coding:exon:CDS:substitution:codon_variant:stop_gained|SO:0000010:SO:0000147:SO:0000316:SO:1000002:SO:0001581:SO:0001587;DRV=LoF GT:FAZ:FCZ:FGZ:FTZ:RAZ:RCZ:RGZ:RTZ:PM 0|0:0:28:0:0:0:25:0:0:0 1|1:0:0:0:11:0:1:0:9:0.95 diff --git a/tests/test_output_no_vaf_vcf/input_genes_lof.vcf.gz b/tests/test_output_no_vaf_vcf/input_genes_lof.vcf.gz new file mode 100644 index 0000000..7c0e10b Binary files /dev/null and b/tests/test_output_no_vaf_vcf/input_genes_lof.vcf.gz differ diff --git a/tests/test_output_no_vaf_vcf/input_genes_lof.vcf.gz.tbi b/tests/test_output_no_vaf_vcf/input_genes_lof.vcf.gz.tbi new file mode 100644 index 0000000..e9182b3 Binary files /dev/null and b/tests/test_output_no_vaf_vcf/input_genes_lof.vcf.gz.tbi differ diff --git a/tests/test_output_no_vaf_vcf/input_muts.vcf.gz b/tests/test_output_no_vaf_vcf/input_muts.vcf.gz new file mode 100644 index 0000000..96f4c33 Binary files /dev/null and b/tests/test_output_no_vaf_vcf/input_muts.vcf.gz differ diff --git a/tests/test_output_no_vaf_vcf/input_muts.vcf.gz.tbi b/tests/test_output_no_vaf_vcf/input_muts.vcf.gz.tbi new file mode 100644 index 0000000..d9dcc1a Binary files /dev/null and b/tests/test_output_no_vaf_vcf/input_muts.vcf.gz.tbi differ