Skip to content

Commit

Permalink
Merge branch 'TASK-6766' of https://github.com/opencb/opencga into TA…
Browse files Browse the repository at this point in the history
…SK-6766
  • Loading branch information
jtarraga committed Sep 9, 2024
2 parents ea2d888 + faf1d72 commit 3e92c9e
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 64 deletions.
104 changes: 41 additions & 63 deletions opencga-app/app/analysis/qc/sample_qc/sample_qc.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/usr/bin/env python3
import subprocess

import sys
import os
import logging
Expand Down Expand Up @@ -30,79 +30,57 @@ def __init__(self, vcf_file, info_file, bam_file, config, output_parent_dir, sam
self.id_ = id_

def run(self):
# Checking data
# self.checking_data() # TODO check input data
# check_data()
# relatedness()
# inferred_sex()
# if info_file.somatic == True && config.mutational_signature.skip == False:
# mutational_signature()
# mendelian_errors()
# return ;

self.bcftools_stats(vcf_file=self.vcf_file)

# Running sample QC steps
# self.step1() # TODO run all encessary steps for this QC (e.g. relatedness)
# missingness()
# heterozygosity ()
# roh()
# upd()

# Return results
# ... # TODO return results
pass

def step1(self):
# Create output dir for this step
output_dir = create_output_dir([self.output_parent_dir, 'step1'])

# Run step1
# ... # TODO execute this step commands
pass
def bcftools_stats(self, vcf_file):
"""
Calculates VCF stats using BCFTools
:param str vcf_file: VCF file to get stats from
:return:
"""
# Creating output dir for bcftools
output_dir = create_output_dir([self.output_parent_dir, 'bcftools'])

# Running bcftools
cmd_bcftools = 'bcftools stats -v ' + vcf_file + ' > ' + os.path.join(output_dir, 'bcftools_stats.txt')
execute_bash_command(cmd=cmd_bcftools)
LOGGER.info("BCFTools stats calculated successfully for {file}".format(file=vcf_file))

vcf_file = "/home/mbleda/Downloads/KFSHRC/CGMQ2022-02850.vcf.gz"
output_dir = "/tmp/qc_tests/"
sample_ids = ""
info_file = ""
config = ""

def run(vcf_file, sample_ids, info_file, config, output_dir):
# check_data()
# relatedness()
# inferred_sex()
# if info_file.somatic == True && config.mutational_signature.skip == False:
# mutational_signature()
# mendelian_errors()
# return ;
bcftools_stats(vcf_file=vcf_file, output_dir=output_dir)
# missingness()
# heterozygosity ()
# roh()
# upd()


def bcftools_stats(vcf_file, output_dir):
"""
Calculates VCF stats using BCFTools
:param vcf_file: VCF file to get stats from
:param output_dir: Output directory
:return:
"""
bcftools_stats_output = exec_bash_command(cmd_line='bcftools stats -v ' + vcf_file + ' > ' + output_dir + '/bcftools_stats.txt')
if bcftools_stats_output[0] == 0:
print("BCFTools stats calculated successfully for {file}".format(file=vcf_file))
# Plot stats using plot-vcfstats
exec_bash_command(cmd_line='plot-vcfstats -p ' + output_dir + '/bcftools_stats_plots ' + output_dir + '/bcftools_stats.txt')
cmd_vcfstats = 'plot-vcfstats -p ' + output_dir + '/bcftools_stats_plots ' + output_dir + '/bcftools_stats.txt'
execute_bash_command(cmd=cmd_vcfstats)
LOGGER.info("Plot stats using plot-vcfstats executed successfully for {file}".format(file=vcf_file))

# TODO: Future implementation for vcf stats plots
#plot_bcftools_stats(file=output_dir + '/bcftools_stats.txt', prefix="stats", output=output_dir)

def exec_bash_command(cmd_line):
"""
Run a bash command (e.g. bcftools), and return output
"""
po = subprocess.Popen(cmd_line,
shell=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)

stdout, stderr = po.communicate()
po.wait()
return_code = po.returncode

if return_code != 0:
raise Exception(
"Command line {} got return code {}.\nSTDOUT: {}\nSTDERR: {}".format(cmd_line, return_code, stdout,
stderr))

return po.returncode, stdout

if __name__ == '__main__':
sys.exit(run(vcf_file=vcf_file, sample_ids=sample_ids, info_file=info_file, config=config, output_dir=output_dir))
vcf_file = "/home/mbleda/Downloads/KFSHRC/CGMQ2022-02850.vcf.gz"
info_file = ""
bam_file = ""
config = ""
output_parent_dir = "/tmp/qc_tests/"
sample_ids = []
id_ = ""
se = SampleQCExecutor(vcf_file=vcf_file, info_file=info_file, bam_file=bam_file, config=config,
output_parent_dir=output_parent_dir, sample_ids=sample_ids, id_=id_)
sys.exit(se.run())
10 changes: 9 additions & 1 deletion opencga-app/app/analysis/qc/variant_qc.main.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,15 @@ def main():
shutil.copy(info_jsons[i], qc_outdir_fpath)

# Execute QC
qc_executor(vcf_files[i], info_jsons[i], bam_files[i], config, qc_outdir_fpath, sample_ids, id_).run()
qc_executor(
vcf_file=vcf_files[i],
info_file=info_jsons[i],
bam_file=bam_files[i],
config=config,
output_parent_dir=qc_outdir_fpath,
sample_ids=sample_ids,
id_=id_
).run()

if __name__ == '__main__':
sys.exit(main())

0 comments on commit 3e92c9e

Please sign in to comment.