From f271e48bd2d0ac56cb472f0e6c254283e30083de Mon Sep 17 00:00:00 2001
From: dapregi <dperezgil89@gmail.com>
Date: Wed, 11 Sep 2024 16:48:44 +0100
Subject: [PATCH] qc: draft code migration from zetta-bioinfo #TASK-6766

---
 .../app/analysis/qc/family_qc/family_qc.py    | 253 +++++++----
 .../qc/individual_qc/individual_qc.py         | 411 +++++++++++++++---
 .../app/analysis/qc/sample_qc/sample_qc.py    |  79 +++-
 .../app/analysis/qc/variant_qc.main.py        |   4 +-
 4 files changed, 570 insertions(+), 177 deletions(-)

diff --git a/opencga-app/app/analysis/qc/family_qc/family_qc.py b/opencga-app/app/analysis/qc/family_qc/family_qc.py
index 88698ba8aa..6537724d37 100644
--- a/opencga-app/app/analysis/qc/family_qc/family_qc.py
+++ b/opencga-app/app/analysis/qc/family_qc/family_qc.py
@@ -9,6 +9,7 @@
 
 LOGGER = logging.getLogger('variant_qc_logger')
 
+
 class FamilyQCExecutor:
     def __init__(self, vcf_file, info_file, bam_file, config, output_parent_dir, sample_ids, id_):
         """Create output dir
@@ -29,19 +30,24 @@ def __init__(self, vcf_file, info_file, bam_file, config, output_parent_dir, sam
         self.sample_ids = sample_ids
         self.id_ = id_
 
-    def filter_rename_variants_vcf(self,pop_freq_fpath,outdir_fpath):
+        # Loading configuration
+        config_fhand = open(self.config, 'r')
+        self.config_json = json.load(config_fhand)
+        config_fhand.close()
+
+    def filter_rename_variants_vcf(self, pop_freq_fpath, outdir_fpath):
         # Reading VCF
-        vcf_fhand = gzip.open(self.vcf_file,'r')
+        vcf_fhand = gzip.open(self.vcf_file, 'r')
         # Reading pop_freq file
-        input_pop_freq_fhand = open(str(pop_freq_fpath),'r')
+        input_pop_freq_fhand = open(str(pop_freq_fpath), 'r')
         LOGGER.debug('Getting variant IDs to include in the VCF from file: "{}"'.format(pop_freq_fpath))
-        variant_ids_to_include =  [line.strip().split()[1] for line in input_pop_freq_fhand]
+        variant_ids_to_include = [line.strip().split()[1] for line in input_pop_freq_fhand]
 
         # Create output dir and file
-        filtered_vcf_outdir_fpath = create_output_dir(path_elements=[str(outdir_fpath),'filtered_vcf'])
+        filtered_vcf_outdir_fpath = create_output_dir(path_elements=[str(outdir_fpath), 'filtered_vcf'])
         output_file_name = 'filtered_vcf_' + str(self.vcf_file.split('/')[-1])
-        filtered_vcf_fpath = os.path.join(filtered_vcf_outdir_fpath,output_file_name)
-        filtered_vcf_fhand = gzip.open(filtered_vcf_fpath,'wt')
+        filtered_vcf_fpath = os.path.join(filtered_vcf_outdir_fpath, output_file_name)
+        filtered_vcf_fhand = gzip.open(filtered_vcf_fpath, 'wt')
         LOGGER.debug('Generating filtered VCF with variant IDs under ID column: "{}"'.format(filtered_vcf_fpath))
 
         # Generate VCF with variant IDs under ID column
@@ -54,7 +60,7 @@ def filter_rename_variants_vcf(self,pop_freq_fpath,outdir_fpath):
             else:
                 # Getting variant data
                 variant_items = line.strip().split()
-                variant_id = ':'.join(variant_items[0],variant_items[1],variant_items[3],variant_items[4])
+                variant_id = ':'.join([variant_items[0], variant_items[1], variant_items[3], variant_items[4]])
                 if 'chr' not in variant_items[0]:
                     variant_id = 'chr' + variant_id
                 if variant_id in variant_ids_to_include:
@@ -66,10 +72,12 @@ def filter_rename_variants_vcf(self,pop_freq_fpath,outdir_fpath):
         return filtered_vcf_fpath
 
     def get_samples_individuals_info(self):
-        family_info_json = json.load(self.info_file)
+        family_info_fhand = open(self.info_file)
+        family_info_json = json.load(family_info_fhand)
         samples_individuals = {}
         for sample in self.sample_ids:
-            samples_individuals[sample] = {'individualId': '','individualSex': 0, 'fatherId': 'NA','motherId': 'NA', 'familyMembersRoles': 'NA'}
+            samples_individuals[sample] = {'individualId': '', 'individualSex': 0, 'fatherId': 'NA', 'motherId': 'NA',
+                                           'familyMembersRoles': 'NA'}
 
         LOGGER.debug('Getting individual information for each sample')
         for member in family_info_json['members']:
@@ -83,7 +91,9 @@ def get_samples_individuals_info(self):
                     elif (member['sex']['id']).upper() == 'FEMALE' or member['karyotypicSex'] == 'XX':
                         samples_individuals[sample_member['id']]['individualSex'] = 2
                     else:
-                        LOGGER.warning('Sex information for individual "{}" (sample "{}") is not available. Hence, sex code for the fam file will be 0.'.format(member['id'],sample_member['id']))
+                        LOGGER.warning(
+                            'Sex information for individual "{}" (sample "{}") is not available. Hence, sex code for the fam file will be 0.'.format(
+                                member['id'], sample_member['id']))
                         pass
                     # Filling in father info
                     if 'id' in member['father'].keys():
@@ -92,10 +102,11 @@ def get_samples_individuals_info(self):
                     if 'id' in member['mother'].keys():
                         samples_individuals[sample_member['id']]['motherId'] = member['mother']['id']
                     # Filling in family roles info for the individual
-                    samples_individuals[sample_member['id']]['familyMembersRoles'] = family_info_json['roles'][member['id']]
+                    samples_individuals[sample_member['id']]['familyMembersRoles'] = family_info_json['roles'][
+                        member['id']]
 
         # Checking if individual information for each sample was found
-        for sample,individual_info in samples_individuals.items():
+        for sample, individual_info in samples_individuals.items():
             if individual_info['individualId'] == '':
                 LOGGER.warning('No individual information available for sample "{}".'.format(sample['id']))
             else:
@@ -104,30 +115,33 @@ def get_samples_individuals_info(self):
         # Return samples_individuals dictionary
         return samples_individuals
 
-    def generate_files_for_plink_fam_file(self,outdir_fpath):
+    def generate_files_for_plink_fam_file(self, outdir_fpath):
         # Getting family id and sample_individuals_info
         family_id = self.id_
         samples_individuals = self.get_samples_individuals_info()
 
         # Generating text file to update sex information
         sex_information_output_file_name = family_id + '_individual_sample_sex_information.txt'
-        sex_information_output_fpath = os.path.join(str(outdir_fpath),sex_information_output_file_name)
-        sex_information_output_fhand = open(sex_information_output_fpath,'w')
-        LOGGER.debug('Generating text file to update individual, sample, sex information: "{}"'.format(sex_information_output_fpath))
+        sex_information_output_fpath = os.path.join(str(outdir_fpath), sex_information_output_file_name)
+        sex_information_output_fhand = open(sex_information_output_fpath, 'w')
+        LOGGER.debug('Generating text file to update individual, sample, sex information: "{}"'.format(
+            sex_information_output_fpath))
 
         # Generating text file to update parent-offspring relationships
         parent_offspring_output_file_name = family_id + '_parent_offspring_relationships.txt'
-        parent_offspring_output_fpath = os.path.join(str(outdir_fpath),parent_offspring_output_file_name)
-        parent_offspring_output_fhand = open(parent_offspring_output_fpath,'w')
-        LOGGER.debug('Generating text file to update parent-offspring relationships: "{}"'.format(parent_offspring_output_fpath))
+        parent_offspring_output_fpath = os.path.join(str(outdir_fpath), parent_offspring_output_file_name)
+        parent_offspring_output_fhand = open(parent_offspring_output_fpath, 'w')
+        LOGGER.debug(
+            'Generating text file to update parent-offspring relationships: "{}"'.format(parent_offspring_output_fpath))
 
         for sample in self.sample_ids:
             # Individual information for that sample
             individual_info = samples_individuals[sample]
             # Structure = FamilyID SampleID Sex
-            sex_information_output_fhand.write(('/t'.join([family_id,sample,individual_info['individualSex']])) + '\n')
+            sex_information_output_fhand.write(
+                ('\t'.join([family_id, sample, str(individual_info['individualSex'])])) + '\n')
             # Structure = FamilyID SampleID FatherID MotherID
-            parent_offspring_info = [family_id,sample,0,0]
+            parent_offspring_info = [family_id, sample, str(0), str(0)]
             father_id = individual_info['fatherId']
             mother_id = individual_info['motherId']
             if father_id != 'NA':
@@ -140,61 +154,80 @@ def generate_files_for_plink_fam_file(self,outdir_fpath):
                     if individual_info['individualId'] == mother_id:
                         parent_offspring_info[3] = sample_id
                         break
-            parent_offspring_output_fhand.write(('/t'.join(parent_offspring_info)) + '\n')
-        LOGGER.info('Text file generated to update individual, sample, sex information: "{}"'.format(sex_information_output_fpath))
-        LOGGER.info('Text file generated to update parent-offspring relationships: "{}"'.format(parent_offspring_output_fpath))
+            parent_offspring_output_fhand.write(('\t'.join(parent_offspring_info)) + '\n')
+        LOGGER.info('Text file generated to update individual, sample, sex information: "{}"'.format(
+            sex_information_output_fpath))
+        LOGGER.info(
+            'Text file generated to update parent-offspring relationships: "{}"'.format(parent_offspring_output_fpath))
 
         # Return paths of text files generated. First path: individual, sample, sex information file. Second path: parent-offspring information file.
-        return [sex_information_output_fpath,parent_offspring_output_fpath]
+        return [sex_information_output_fpath, parent_offspring_output_fpath]
 
     def relatedness_results_data_model(method):
-        relatedness_json = [
-            {
-                "method": "",
-                "maf": "",
-                "scores": [
-                    {
-                        "sampleId1": "",
-                        "sampleId2": "",
-                        "reportedRelationship": "",
-                        "inferredRelationship": "",
-                        "validation": "",
-                        "values": {
-                            "RT": "",
-                            "ez": "",
-                            "z0": "",
-                            "z1": "",
-                            "z2": "",
-                            "PiHat": ""
-                        }
+        relatedness_json = {
+            "method": "",
+            "software": {
+                "name": "",
+                "version": "",
+                "commit": "",
+                "params": {}
+            },
+            "scores": [
+                {
+                    "sampleId1": "",
+                    "sampleId2": "",
+                    "reportedRelationship": "",
+                    "inferredRelationship": "",
+                    "validation": "",
+                    "values": {
+                        "RT": "",
+                        "ez": "",
+                        "z0": "",
+                        "z1": "",
+                        "z2": "",
+                        "PiHat": ""
                     }
-                ]
+                }
+            ],
+            "images": [
+                {
+                    "name": "",
+                    "base64": "",
+                    "description": ""
+                }
+            ],
+            "attributes": {
+                "cli": "",
+                "files": [],
+                "JOB_ID": ""
             }
-        ]
+        }
+        method = str(method)
         if method == "PLINK/IBD":
             LOGGER.info('Relatedness method to be used: "{}"'.format(method))
             relatedness_json["method"] = "PLINK/IBD"
-            relatedness_json["maf"] = "1000G:ALL>0.3"
+            relatedness_json["software"]["name"] = "plink"
+            relatedness_json["software"]["version"] = "1.9"
         else:
             LOGGER.info('Relatedness method to be used: "{}"'.format(method))
-            relatedness_json["method"] = str(method)
-            relatedness_json["maf"] = "NA"
+            relatedness_json["method"] = method
         # Return relatedness json data model with method info filled in.
         return relatedness_json
 
-    def relatedness_plink(self,filtered_vcf_fpath,pop_freq_fpath,pop_exclude_var_fpath,outdir_fpath,plink_path,method="PLINK/IBD"):
+    def relatedness_plink(self, filtered_vcf_fpath, pop_freq_fpath, pop_exclude_var_fpath, outdir_fpath, plink_path,
+                          method="PLINK/IBD"):
         LOGGER.info('Method: {}'.format(method))
-        plink_outdir_fpath = create_output_dir(path_elements=[str(outdir_fpath),'plink_IBD'])
+        plink_outdir_fpath = create_output_dir(path_elements=[str(outdir_fpath), 'plink_IBD'])
         sex_info_fpath, parent_offspring_fpath = self.generate_files_for_plink_fam_file(outdir_fpath=plink_outdir_fpath)
         # Preparing PLINK commands
         plink_path = str(plink_path)
         files_prefix = self.id_ + "_plink_relatedness_results"
-        plink_output_folder_files_prefix = os.path.join(plink_outdir_fpath,files_prefix)
+        plink_output_folder_files_prefix = os.path.join(plink_outdir_fpath, files_prefix)
         cmd_plink_files = ' '.join([plink_path,
                                     "--vcf", str(filtered_vcf_fpath),
                                     "--make-bed",
                                     "--const-fid", self.id_,
-                                    "--chr","1-22",
+                                    "--chr", "1-22",
                                     "--not-chr", "X,Y,MT",
                                     "--allow-extra-chr",
                                     "--snps-only",
@@ -207,33 +240,35 @@ def relatedness_plink(self,filtered_vcf_fpath,pop_freq_fpath,pop_exclude_var_fpa
         plink_files = execute_bash_command(cmd_plink_files)
         if plink_files[0] == 0:
             plink_files_generated = os.listdir(plink_outdir_fpath)
-            LOGGER.info('Files available in directory "{}":\n{}'.format(plink_outdir_fpath,plink_files_generated))
+            LOGGER.info('Files available in directory "{}":\n{}'.format(plink_outdir_fpath, plink_files_generated))
 
             cmd_plink_ibd = ' '.join([plink_path,
-                                    "--bfile", plink_output_folder_files_prefix,
-                                    "--genome", "rel-check",
-                                    "--read-freq", str(pop_freq_fpath),
-                                    "--exclude", str(pop_exclude_var_fpath),
-                                    "--out", plink_output_folder_files_prefix])
+                                      "--bfile", plink_output_folder_files_prefix,
+                                      "--genome", "rel-check",
+                                      "--read-freq", str(pop_freq_fpath),
+                                      "--exclude", str(pop_exclude_var_fpath),
+                                      "--out", plink_output_folder_files_prefix])
             LOGGER.debug("Performing IBD analysis")
             plink_ibd = execute_bash_command(cmd_plink_ibd)
             if plink_ibd[0] == 0:
                 plink_files_generated = os.listdir(plink_outdir_fpath)
-                LOGGER.info('Files available in directory "{}":\n{}'.format(plink_outdir_fpath,plink_files_generated))
+                LOGGER.info('Files available in directory "{}":\n{}'.format(plink_outdir_fpath, plink_files_generated))
 
             plink_genome_fpath = plink_output_folder_files_prefix + '.genome'
             if os.path.isfile(plink_genome_fpath) == False:
-                LOGGER.error('File "{}" does not exist. Check:\nSTDOUT: "{}"\nSTDERR: "{}"'.format(plink_genome_fpath,plink_ibd[1],plink_ibd[2]))
-                raise Exception('File "{}" does not exist. Check:\nSTDOUT: "{}"\nSTDERR: "{}"'.format(plink_genome_fpath,plink_ibd[1],plink_ibd[2]))
+                LOGGER.error('File "{}" does not exist. Check:\nSTDOUT: "{}"\nSTDERR: "{}"'.format(plink_genome_fpath,
+                                                                                                   plink_ibd[1],
+                                                                                                   plink_ibd[2]))
+                raise Exception(
+                    'File "{}" does not exist. Check:\nSTDOUT: "{}"\nSTDERR: "{}"'.format(plink_genome_fpath,
+                                                                                          plink_ibd[1], plink_ibd[2]))
             else:
                 # Return method used and path of .genome file generated by PLINK
-                return [method,plink_genome_fpath]
+                return [method, plink_genome_fpath]
 
-    def relatedness_validation(reported_result,inferred_result):
+    def relatedness_validation(reported_result, inferred_result):
         LOGGER.debug('Comparing reported {} and inferred {} results'.format(reported_result, inferred_result))
-        if 'UNKNOWN' in reported_result or 'UNKNOWN' in inferred_result:
-            validation = "UNKNOWN"
-        elif reported_result == "" or inferred_result == "":
+        if 'UNKNOWN' in reported_result or reported_result == "":
             validation = "UNKNOWN"
         else:
             if reported_result == inferred_result:
@@ -243,17 +278,17 @@ def relatedness_validation(reported_result,inferred_result):
         # Return validation result
         return validation
 
-    def relatedness_inference(self,relatedness_thresholds_fpath,method,plink_genome_fpath):
+    def relatedness_inference(self, relatedness_thresholds_fpath, method, plink_genome_fpath):
         # Reading relatedness thresholds file (.tsv)
         LOGGER.debug('Getting relatedness thresholds from file: "{}"'.format(relatedness_thresholds_fpath))
         relatedness_thresholds_fhand = open(str(relatedness_thresholds_fpath))
         relationship_groups_thresholds_dict = {}
-        for index,line in enumerate(relatedness_thresholds_fhand):
+        for index, line in enumerate(relatedness_thresholds_fhand):
             relatedness_thresholds_row_values = line.strip().split()
             if index == 0:
                 relatedness_thresholds_file_header = relatedness_thresholds_row_values
                 continue
-            for column,value in enumerate(relatedness_thresholds_row_values):
+            for column, value in enumerate(relatedness_thresholds_row_values):
                 if relatedness_thresholds_file_header[column] == 'relationship':
                     relationship_groups_thresholds_dict[value] == {}
                 else:
@@ -265,7 +300,7 @@ def relatedness_inference(self,relatedness_thresholds_fpath,method,plink_genome_
 
         relatedness_results = self.relatedness_results_data_model(method)
         relatedness_scores = []
-        for index,line in enumerate(input_genome_file_fhand):
+        for index, line in enumerate(input_genome_file_fhand):
             genome_file_row_values = line.strip().split()
             if index == 0:
                 continue
@@ -281,76 +316,101 @@ def relatedness_inference(self,relatedness_thresholds_fpath,method,plink_genome_
             score["values"]["PiHat"] = str(genome_file_row_values[9])
 
             # Inferring family relationship block:
-            LOGGER.debug("Inferring family relationship between sample {} and sample {} ".format(str(genome_file_row_values[1]),str(genome_file_row_values[3])))
+            LOGGER.debug(
+                "Inferring family relationship between sample {} and sample {} ".format(str(genome_file_row_values[1]),
+                                                                                        str(genome_file_row_values[3])))
             inference_groups = []
-            for relationship,values in relationship_groups_thresholds_dict.items():
+            for relationship, values in relationship_groups_thresholds_dict.items():
                 # Check if PI_HAT, Z0, Z1, Z2 values (from PLINK .genome file) are within range (internal thresholds)
-                if (values['minPiHat'] <= score["values"]["PiHat"] <= values['maxPiHat']) and (values['minZ0'] <= score["values"]["z0"] <= values['maxZ0']) and (values['minZ1'] <= score["values"]["z1"] <= values['maxZ1']) and (values['minZ2'] <= score["values"]["z2"] <= values['maxZ2']):
+                if (values['minPiHat'] <= score["values"]["PiHat"] <= values['maxPiHat']) and (
+                        values['minZ0'] <= score["values"]["z0"] <= values['maxZ0']) and (
+                        values['minZ1'] <= score["values"]["z1"] <= values['maxZ1']) and (
+                        values['minZ2'] <= score["values"]["z2"] <= values['maxZ2']):
                     inference_groups.append(str(relationship))
                     continue
             if len(inference_groups) == 0:
                 score["inferredRelationship"] = "UNKNOWN"
-                LOGGER.info("UNKNOWN family relationship inferred between sample {} and sample {} ".format(str(genome_file_row_values[1]),str(genome_file_row_values[3])))
+                LOGGER.info("UNKNOWN family relationship inferred between sample {} and sample {} ".format(
+                    str(genome_file_row_values[1]), str(genome_file_row_values[3])))
             else:
                 score["inferredRelationship"] = ', '.join(inference_groups)
-            LOGGER.info("Family relationship inferred between sample {} and sample {} ".format(str(genome_file_row_values[1]),str(genome_file_row_values[3])))
+            LOGGER.info(
+                "Family relationship inferred between sample {} and sample {} ".format(str(genome_file_row_values[1]),
+                                                                                       str(genome_file_row_values[3])))
             relatedness_scores.append(score)
         relatedness_results["scores"] = relatedness_scores
 
         # Return dict/json with plink and inferred results
         return relatedness_results
 
-    def relatedness_report(self,relatedness_inference_results):
+    def relatedness_report(self, relatedness_inference_results):
         samples_individuals = self.get_samples_individuals_info()
         # Getting reported family relationship block:
         relatedness_results = relatedness_inference_results
         for score_result in relatedness_inference_results["scores"]:
-            LOGGER.debug('Getting reported relatedness information for sample {} and sample {}'.format(score_result["sampleId1"],score_result["sampleId2"]))
+            LOGGER.debug(
+                'Getting reported relatedness information for sample {} and sample {}'.format(score_result["sampleId1"],
+                                                                                              score_result[
+                                                                                                  "sampleId2"]))
             reported_relationship = []
             individual1_info = samples_individuals[score_result["sampleId1"]]
             individual2_info = samples_individuals[score_result["sampleId2"]]
             if individual1_info["individualId"] == "" or individual2_info["individualId"] == "":
-                LOGGER.warning('No individual information available for sample {} and sample {}). Hence reported family relationship UNKNOWN'.format(score_result["sampleId1"],score_result["sampleId2"]))
+                LOGGER.warning(
+                    'No individual information available for sample {} and sample {}). Hence reported family relationship UNKNOWN'.format(
+                        score_result["sampleId1"], score_result["sampleId2"]))
                 relatedness_results["scores"]["reportedRelationship"] = "UNKNOWN"
                 continue
             else:
-                unknown_results = [False,False]
+                unknown_results = [False, False]
                 if individual1_info["individualId"] in individual2_info["familyMembersRoles"].keys():
-                    reported_relationship.append(individual2_info["familyMembersRoles"][individual1_info["individualId"]])
+                    reported_relationship.append(
+                        individual2_info["familyMembersRoles"][individual1_info["individualId"]])
                 else:
                     reported_relationship.append("UNKNOWN")
                     unknown_results[0] = True
                 if individual2_info["individualId"] in individual1_info["familyMembersRoles"].keys():
-                    reported_relationship.append(individual1_info["familyMembersRoles"][individual2_info["individualId"]])
+                    reported_relationship.append(
+                        individual1_info["familyMembersRoles"][individual2_info["individualId"]])
                 else:
                     reported_relationship.append("UNKNOWN")
                     unknown_results[1] = True
 
                 if all(unknown_results):
                     relatedness_results["scores"]["reportedRelationship"] = "UNRELATED"
-                    LOGGER.info("UNRELATED family relationship found for sample {} (individual: {}) and sample {} (individual: {})".format(score_result["sampleId1"], individual1_info["individualId"],score_result["sampleId2"], individual2_info["individualId"]))
+                    LOGGER.info(
+                        "UNRELATED family relationship found for sample {} (individual: {}) and sample {} (individual: {})".format(
+                            score_result["sampleId1"], individual1_info["individualId"], score_result["sampleId2"],
+                            individual2_info["individualId"]))
                 elif any(unknown_results):
-                    LOGGER.warning('Family relationship discrepancy found for sample {} (individual: {}) and sample {} (individual: {}). Hence reported family relationship UNKNOWN'.format(score_result["sampleId1"], individual1_info["individualId"],score_result["sampleId2"], individual2_info["individualId"]))
+                    LOGGER.warning(
+                        'Family relationship discrepancy found for sample {} (individual: {}) and sample {} (individual: {}). Hence reported family relationship UNKNOWN'.format(
+                            score_result["sampleId1"], individual1_info["individualId"], score_result["sampleId2"],
+                            individual2_info["individualId"]))
                     relatedness_results["scores"]["reportedRelationship"] = "UNKNOWN"
                 else:
                     relatedness_results["scores"]["reportedRelationship"] = ', '.join(reported_relationship)
-                    LOGGER.info("Family relationship reported for sample {} (individual: {}) and sample {} (individual: {})".format(score_result["sampleId1"], individual1_info["individualId"],score_result["sampleId2"], individual2_info["individualId"]))
+                    LOGGER.info(
+                        "Family relationship reported for sample {} (individual: {}) and sample {} (individual: {})".format(
+                            score_result["sampleId1"], individual1_info["individualId"], score_result["sampleId2"],
+                            individual2_info["individualId"]))
 
             # Validating reported vs inferred family relationship results block:
-            validation_result = self.relatedness_validation(relatedness_results["scores"]["reportedRelationship"],relatedness_results["scores"]["inferredRelationship"])
+            validation_result = self.relatedness_validation(relatedness_results["scores"]["reportedRelationship"],
+                                                            relatedness_results["scores"]["inferredRelationship"])
             relatedness_results["scores"]["validation"] = validation_result
 
         # Return dict/json with plink, inferred, reported and validation results
         return relatedness_results
 
-    def relatedness_results_json(self,relatedness_results, outdir_fpath):
+    def relatedness_results_json(self, relatedness_results, outdir_fpath):
         relatedness_output_dir_fpath = outdir_fpath
 
         # Generating json file with relatedness results
         relatedness_results_file_name = 'relatedness.json'
-        relatedness_results_fpath = os.path.join(relatedness_output_dir_fpath,relatedness_results_file_name)
+        relatedness_results_fpath = os.path.join(relatedness_output_dir_fpath, relatedness_results_file_name)
         LOGGER.debug('Generating json file with relatedness results. File path: "{}"'.format(relatedness_results_fpath))
-        json.dump(relatedness_results,open(relatedness_results_fpath,'w'))
+        json.dump(relatedness_results, open(relatedness_results_fpath, 'w'))
         LOGGER.info('Json file with relatedness results generated. File path: "{}"'.format(relatedness_results_fpath))
 
         # Return json file path with relatedness results
@@ -363,18 +423,21 @@ def relatedness(self):
         relatedness_thresholds_fpath = "/path/to/relatedness_thresholds.tsv"
 
         # Create output dir for relatedness analysis
-        relatedness_output_dir_fpath = create_output_dir(path_elements=[self.output_parent_dir,'relatedness'])
+        relatedness_output_dir_fpath = create_output_dir(path_elements=[self.output_parent_dir, 'relatedness'])
 
         # Filtering VCF and renaming variants
-        filtered_vcf_fpath = self.filter_rename_variants_vcf(pop_freq_fpath,relatedness_output_dir_fpath)
+        filtered_vcf_fpath = self.filter_rename_variants_vcf(pop_freq_fpath, relatedness_output_dir_fpath)
         # Performing IBD analysis from PLINK
-        method, plink_genome_fpath = self.relatedness_plink(filtered_vcf_fpath,pop_freq_fpath,pop_exclude_var_fpath,relatedness_output_dir_fpath)
+        method, plink_genome_fpath = self.relatedness_plink(filtered_vcf_fpath, pop_freq_fpath, pop_exclude_var_fpath,
+                                                            relatedness_output_dir_fpath)
         # Inferring family relationships
-        relatedness_inference_dict = self.relatedness_inference(relatedness_thresholds_fpath,method,plink_genome_fpath)
+        relatedness_inference_dict = self.relatedness_inference(relatedness_thresholds_fpath, method,
+                                                                plink_genome_fpath)
         # Getting reported family relationships and validating inferred vs reported results
         relatedness_results_dict = self.relatedness_report(relatedness_inference_dict)
         # Generating file with results
-        relatedness_results_json_fpath = self.relatedness_results_json(relatedness_results_dict,relatedness_output_dir_fpath)
+        relatedness_results_json_fpath = self.relatedness_results_json(relatedness_results_dict,
+                                                                       relatedness_output_dir_fpath)
 
     def run(self):
         # Checking data
diff --git a/opencga-app/app/analysis/qc/individual_qc/individual_qc.py b/opencga-app/app/analysis/qc/individual_qc/individual_qc.py
index 075087ff98..9e0628ce75 100644
--- a/opencga-app/app/analysis/qc/individual_qc/individual_qc.py
+++ b/opencga-app/app/analysis/qc/individual_qc/individual_qc.py
@@ -1,15 +1,16 @@
 #!/usr/bin/env python3
 
-import subprocess
-import sys
-#import bokeh
 import os
 import logging
-
-from utils import create_output_dir
+import json
+import gzip
+from utils import create_output_dir, execute_bash_command
 
 LOGGER = logging.getLogger('variant_qc_logger')
 
+chrx_vars = "data/20201028_CCDG_14151_B01_GRM_WGS_2020-08-05_chrX.recalibrated_variants_filtered_annotated_chrX.prune.in"
+chrx_var_frq = "data/20201028_CCDG_14151_B01_GRM_WGS_2020-08-05_chrX.recalibrated_variants_filtered_annotated_chrX.frq"
+
 class IndividualQCExecutor:
     def __init__(self, vcf_file, info_file, bam_file, config, output_parent_dir, sample_ids, id_):
         """Create output dir
@@ -30,85 +31,351 @@ def __init__(self, vcf_file, info_file, bam_file, config, output_parent_dir, sam
         self.sample_ids = sample_ids
         self.id_ = id_
 
-    def run(self):
+        # Loading configuration
+        config_fhand = open(self.config, 'r')
+        self.config_json = json.load(config_fhand)
+        config_fhand.close()
+
+#    def run(self):
         # Checking data
         # self.checking_data()  # TODO check input data
 
         # Running sample QC steps
         # self.step1()  # TODO run all encessary steps for this QC (e.g. relatedness)
-
+        if self.bam_file != None:
+            self.compute_coverage_metrics(bam_file=self.bam_file, output_dir=self.output_parent_dir)
+            # check the output of the compute_coverage_metrics is OK and continue with the checks
+            self.calculate_aneuploidies()
+            self.calculate_coverage_based_inferred_sex()
+        else:
+            LOGGER.warning("Skipping coverage-based inferred sex: BIGWIG file not found for sample '" + sample_id + "'")
+            pass
         # Return results
-        # ...  # TODO return results
+#        # ...  # TODO return results
         pass
 
-    def step1(self):
+#    def step1(self):
         # Create output dir for this step
         output_dir = create_output_dir([self.output_parent_dir, 'step1'])
 
         # Run step1
         # ...  # TODO execute this step commands
+#        pass
+
+    def compute_coverage_metrics(self, bam_file, output_dir):
+        """
+        Given a BigWig file, it calculates the average coverage per chromosome and autosomes
+        :param bw_file: Path to the BigWig file for the sample
+        :param output_dir: Output directory
+
+        :return: dict with median coverage per chr1-22,X,Y and autosomes
+        """
+        # Create output dir for this step
+        output_dir = create_output_dir([self.output_parent_dir, 'coverage_metrics'])
+
+        bam_file = "file.bam"
+        #cr = crpb.CountReadsPerBin([bam_file], binLength=50, stepSize=50)
+
+    def calculate_aneuploidies(self, coverage, output_dir):
+        # Create output dir for this step
+        output_dir = create_output_dir([self.output_parent_dir, 'aneuploidies'])
+        pass
+
+    def calculate_coverage_based_inferred_sex(bw_file, config, coverage, output_dir):
+        """
+        Calculates inferred sex based on coverage if BAM information is available and checks if matches against the expected
+        :param bw_file: Path to the BigWig file for the sample
+        :param output_dir: Output directory
+        :return:
+        """
+        chromosomes = ['chrX', 'chrY', "autosomes"]
+        if all(chromosomes) in coverage and all(coverage[chromosomes]) is not None:
+            # Calculate ratio X-chrom / autosomes
+            ratio_chrX = float(coverage["chrX"] / coverage["autosomes"])
+            # Calculate ratio Y-chrom / autosomes
+            ratio_chrY = float(coverage["chrY"] / coverage["autosomes"])
+
+
+        else:
+            LOGGER.warning("Coverage-based inferred sex not calculate for sample {sample}. Missing or invalid coverage "
+                           "values".format(sample=sample_ids))
+
+    def filter_variants(self, chrx_vars, outdir_path):
+        """
+        Annotates input VCF with chromosome coordinate ID and filters for LD pruned PASS  chr X variants using BCFTools.
+        :param self: Input data
+        :param chrX_vars: List of LD pruned chr X variants
+        :param outdir_path: Output directory path
+        :return:
+        """
+        individual_id = self.id_
+        # Read in input files:
+        individual_info_json = json.load(self.info_file)
+        vcf_file = gzip.open(self.vcf_file,'r')
+        good_vars = open(chrx_vars, 'r')
+        for sample in self.sample_ids:
+            sample_id = sample
+            # Create output directory:
+            filtered_vcf_name = individual_id + "_" + sample_id + '_passed_variants.vcf.gz'
+            filtered_vcf_path = os.path.join(str(outdir_path),filtered_vcf_name)
+
+            # Annotate input VCF with chromosome coordinate ID:
+            bcftools_annotate_cmd = "bcftools annotate --set-id '%CHROM\:%POS\:%REF\:%FIRST_ALT' " + vcf_file + " -Oz -o " + os.path.join(outdir_path, "/annotated.vcf.gz")
+            execute_bash_command(cmd=bcftools_annotate_cmd)
+            LOGGER.info("{file} annotated with chromosome coordinate IDs".format(file=vcf_file))
+
+            # Filter for chromosome X LD pruned variants:
+            bcftools_variant_filter_cmd = "bcftools view --include ID==@" + good_vars + " " + os.path.join(outdir_path, "/annotated.vcf.gz") + " -Oz -o " + os.path.join(outdir_path, "/varfilter.vcf.gz")
+            execute_bash_command(cmd=bcftools_variant_filter_cmd)
+            LOGGER.info("annotated.vcf.gz filtered for chr X LD pruned variants")
+
+            # Check if FILTER field contains PASS variants:
+            varcount_all_cmd = "bcftools view -i 'FILTER="PASS"' " + vcf_file + " zgrep -vw '#/|CHROM' | wc -l"
+            all_pass_vars = execute_bash_command(cmd=varcount_all_cmd)
+            LOGGER.info("checking for PASS variants in the FILTER field of '{}'".format(vcf_file))
+
+            # Filter for PASS variants:
+            bcftools_pass_filter_cmd = "bcftools view -i 'FILTER="PASS"' " + os.path.join(outdir_path, "/varfilter.vcf.gz") + " -Oz -o " + filtered_vcf_path
+            execute_bash_command(cmd=bcftools_pass_filter_cmd)
+            varcount_filt_cmd = "bcftools view -i 'FILTER="PASS"' " + filtered_vcf_path + " zgrep -vw '#\|CHROM' | wc -l"
+            chrX_variants = execute_bash_command(cmd=varcount_filt_cmd)
+            if all_pass_vars == 0:
+                LOGGER.info("WARNING: no FILTER information available, input data will not be filtered for PASS variants, results may be unreliable. There are '{}' chr X LD pruned variants after filtering".format(chrX_variants))
+            elif chrX_variants < 1000: # NB, need to come up with a good threshold - this is a random suggestion
+                LOGGER.info("WARNING: poor quality data, results may be unreliable. There are '{}' good quality chr X LD pruned variants after filtering".format(chrX_variants))
+            else:
+                LOGGER.info("There are '{}' good quality chr X LD pruned variants after filtering")
+            # Return annotated, filtered VCF path:
+            return filtered_vcf_path
         pass
 
-vcf_file = "/home/mbleda/Downloads/KFSHRC/CGMQ2022-02850.vcf.gz"
-output_dir = "/tmp/qc_tests/"
-sample_ids = ""
-info_file = ""
-config = ""
-
-
-def run(vcf_file, sample_ids, info_file, config, output_dir):
-    inferred_sex(vcf_file=vcf_file, output_dir=output_dir)
-    # missingness()
-    # heterozygosity ()
-    # roh()
-    # upd()
-
-def exec_bash_command(cmd_line):
-    """
-    Run a bash command (e.g. bcftools), and return output
-    """
-    po = subprocess.Popen(cmd_line,
-                          shell=True,
-                          stdout=subprocess.PIPE,
-                          stderr=subprocess.PIPE)
-
-    stdout, stderr = po.communicate()
-    po.wait()
-    return_code = po.returncode
-
-    if return_code != 0:
-        raise Exception(
-            "Command line {} got return code {}.\nSTDOUT: {}\nSTDERR: {}".format(cmd_line, return_code, stdout,
-                                                                                 stderr))
-
-    return po.returncode, stdout
-
-
-def bcftools_stats(vcf_file, output_dir):
-    """
-    Calculates VCF stats using BCFTools
-    :param vcf_file: VCF file to get stats from
-    :param output_dir: Output directory
-    :return:
-    """
-    bcftools_stats_output = exec_bash_command(cmd_line='bcftools stats -v ' + vcf_file + ' > ' + output_dir + '/bcftools_stats.txt')
-    if bcftools_stats_output[0] == 0:
-        print("BCFTools stats calculated successfully for {file}".format(file=vcf_file))
-        #plot_bcftools_stats(file=output_dir + '/bcftools_stats.txt', prefix="stats", output=output_dir)
-
-def inferred_sex(vcf_file, output_dir):
-    """
-    Calculates inferred sex and checks if matches against the expected
-    :param vcf_file: VCF file containing variants for chrX and chrY. This VCF should have
-    been previously filtered to contain variants
-    :param output_dir: Output directory
-    :return:
-    """
-    binary = "/home/mbleda/soft/plink/plink_linux_x86_64_20231018/plink"
-    plink_sex_output = exec_bash_command(cmd_line=binary + 'bcftools stats -v ' + vcf_file + ' > ' + output_dir + '/bcftools_stats.txt')
-    # if bcftools_stats_output[0] == 0:
-    #     print("BCFTools stats calculated successfully for {file}".format(file=vcf_file))
-    #     plot_bcftools_stats(file=output_dir + '/bcftools_stats.txt', prefix="stats", output=output_dir)
-
-if __name__ == '__main__':
-    sys.exit(run(vcf_file=vcf_file, sample_ids=sample_ids, info_file=info_file, config=config, output_dir=output_dir))
+
+    def get_individual_sex(self):
+        """
+        Retrieve individual sex for sample and recode for Plink input.
+        """
+        individual_info_json = json.load(self.info_file)
+        indsex = individual_info_json['sex']['id']
+        karsex = individual_info_json['karyotypicSex']
+        ind_metadata = {}
+        for sample in self.sample_ids:
+            ind_metadata[sample] =  {'individualId': individual_info_json['id'], 'individualSex': 0, 'sampleId': 'NA'}
+
+            LOGGER.debug("Retrieve sex information and recode")
+            for sam in individual_info_json['samples']:
+                if sam['id'] in self.sample_ids:
+                    ind_metadata[sam['id']]['sampleId'] = sample['id']
+                    if indsex.upper() == 'MALE' or karsex == 'XY':
+                        ind_metadata[sam['id']]['individualSex'] = 1
+                    elif indsex.upper() == 'FEMALE' or karsex == 'XX':
+                        ind_metadata[sam['id']]['individualSex'] = 2
+                    else:
+                        LOGGER.info("Sex information for Individual '{}' (Sample '{}') is not available, sex will be inferred".format(sam['id'],sample['id']))
+        # Check individual information for each sample is present:
+        for sample,individual_info in ind_metadata.items():
+            if individual_info['individualSex'] == 0:
+                LOGGER.warning("No individual information available for sample '{}'.".format(individual_info['sampleId']))
+            else:
+                LOGGER.info("Individual information for sample '{}' found".format(individual_info['sampleId']))
+        # Return sex information:
+        return ind_metadata
+    pass
+
+
+    def generate_plink_fam_file_input(self,output_dir):
+        # Retrieve sex information:
+        individual_id = self.id_
+        ind_metadata = self.get_individual_sex()
+
+        # Generate a text file to update the Plink fam file with sex information:
+        sex_info_file_name = individual_id + '_sex_information.txt'
+        sex_info_path = os.path.join(str(output_dir),sex_info_file_name)
+        sex_info_input = open(sex_info_path,'w')
+        LOGGER.debug("Generating text file to update the input sex information: '{}'".format(sex_info_path))
+
+        for sample in self.sample_ids:
+            # Individual information for sample:
+            individual_info = ind_metadata[sample]
+            # Plink sex update file format: familyId individualId sex (note, for the purpose of simplicity, individualId replaces familyId below)
+            ###### BECAUSE IT'S POSSIBLE TO HAVE MULTIPLE FAMILY IDS & THE FAMILY ID ISN'T ACTUALLY REQUIRED FOR THE ANALYSIS
+            sex_info_input.write(('/t'.join([sample,sample,individual_info['individualSex']])) + '\n')
+            LOGGER.info("Text file generated to update the Plink fam file with sex information: '{}'".format(sex_info_path))
+
+        # Return paths of text files generated:
+        return sex_info_path
+    pass
+
+
+    def inferred_sex_results_data_model(sex_method):
+        inferredSex = [
+            {
+                method: "",
+                sampleId: "",
+                inferredKaryotypicSex "", #### I'M NOT SURE I LIKE THIS - I'D RATHER HAVE "inferredSex" - it's specific to the coverage based inference
+                software: {
+                    name: "plink",
+                    version: "1.9",
+                    commit: "",
+                    params: {
+                        key: value
+                    }
+                },
+                values: {
+                    "sampleId": "",
+                    "PEDSEX": "",
+                    "SNPSEX": "",
+                    "STATUS": "",
+                    "F": ""
+                },
+                images: [
+                    {
+                        name: "Sex check",
+                        base64: "",
+                        description: "TO UPDATE"
+                    }
+                ],
+                attributes: {
+                    cli: "",
+                    files: [],
+                    JOB_ID: ""
+                }
+            }
+        ]
+        if sex_method == "Plink/check-sex":
+            LOGGER.info("Relatedness method used: '{}'".format(sex_method))
+            inferredSex['method'] = str(sex_method)
+        elif sex_method == "Plink/impute-sex":
+            LOGGER.info("Relatedness method used: '{}'".format(sex_method))
+            inferredSex['method'] = str(sex_method)
+            inferredSex['values']['PEDSEX'] == "NA"
+            inferredSex['values']['STATUS'] == "INFERRED"
+#        elif sex_method == "MartaCoverageBasedSexMethodName":
+#            LOGGER.info("Relatedness method used: '{}'".format(sex_method))
+#            #etc
+        else:
+            LOGGER.warning("No method for sex inference defined.")
+        # Return inferredSex data model json with method specific fields filled in:
+        return inferredSex
+    pass
+
+
+    def check_sex_plink(self,filtered_vcf_path,chrx_var_frq,plink_outdir,sex_method="Plink/check-sex"):
+        # Define Plink check-sex methodology:
+        LOGGER.info("Method: {}".format(sex_method))
+        plink_outdir_path = create_output_dir(path_elements=[str(plink_outdir),"plink_check_sex"])
+        sex_info = self.generate_plink_fam_file_input(output_dir=plink_outdir_path)
+
+        # Plink parameters:
+        file_prefix = self.id_ + "plink_sex_check_results"
+        plink_output_prefix = os.path.join(plink_outdir,file_prefix)
+
+        # Convert input to Plink format:
+        plink_convert_input = ' '.join([plink,
+                                        "--vcf", str(filtered_vcf_path),
+                                        "--make-bed",
+                                        "--keep-allele-order",
+                                        "--update-sex", sex_info_path,
+                                        "--split-x, "hg38", "no-fail",
+                                        "--out", plink_output_prefix])
+        LOGGER.debug("Generating Plink check-sex input files")
+        plink_input_files = execute_bash_command(plink_convert_input)
+        if plink_convert_input[0] == 0:
+            plink_files_generated = os.listdir(plink_outdir)
+            LOGGER.info("Files available: '{}':\n{}".format(plink_outdir,plink_files_generated))
+
+            # Run Plink check-sex analysis:
+            plink_check_sex_input = ' '.join([plink,
+                                        "--bfile", plink_output_prefix,
+                                        "--read-freq", str(chrx_var_freq),
+                                        "--check-sex",
+                                        "--out", plink_output_prefix])
+            LOGGER.debug("Performing Plink check-sex")
+            plink_sexcheck = execute_bash_command(plink_check_sex_input)
+            if plink_check_sex_input[0] == 0:
+                plink_files_generated = os.listdir(plink_outdir)
+                LOGGER.info("Files available: '{}':\n{}".format(plink_outdir,plink_files_generated))
+
+            plink_sexcheck_path = plink_output_prefix + '.sexcheck'
+            if os.path.isfile(plink_sexcheck_path) == False:
+                LOGGER.error("File '{}' does not exist. Check:\nSTDOUT: '{}'\nSTDERR: '{}'".format(plink_sexcheck_path,plink_sexcheck[1],plink_sexcheck[2]))
+                raise Exception("File '{}' does not exist. Check:\nSTDOUT: '{}'\nSTDERR: '{}'".format(plink_sexcheck_path,plink_sexcheck[1],plink_sexcheck[2]))
+            else:
+                # Return method used and path of the output .sexcheck file generated by Plink
+                return [sex_method,plink_sexcheck_path]
+        pass
+
+
+    def impute_sex_plink(self,filtered_vcf_path,chrx_var_frq,plink_outdir,sex_method="Plink/impute-sex"):
+        # Define Plink impute-sex methodology:
+        LOGGER.info("Method: {}".format(sex_method))
+        plink_outdir_path = create_output_dir(path_elements=[str(plink_outdir),"plink_impute_sex"])
+
+        # Plink parameters:
+        file_prefix = self.id_ + "plink_impute_check_results"
+        plink_output_prefix = os.path.join(plink_outdir,file_prefix)
+
+        # Convert input to Plink format:
+        plink_convert_input = ' '.join([plink,
+                                        "--vcf", str(filtered_vcf_path),
+                                        "--make-bed",
+                                        "--keep-allele-order",
+                                        "--split-x, "hg38", "no-fail",
+                                        "--out", plink_output_prefix])
+        LOGGER.debug("Generating Plink sex-check input files")
+        plink_input_files = execute_bash_command(plink_convert_input)
+        if plink_convert_input[0] == 0:
+            plink_files_generated = os.listdir(plink_outdir)
+            LOGGER.info("Files available: '{}':\n{}".format(plink_outdir,plink_files_generated))
+
+            # Run Plink impute-sex analysis:
+            plink_impute_sex_input = ' '.join([plink,
+                                        "--bfile", plink_output_prefix,
+                                        "--read-freq", str(chrx_var_frq),
+                                        "--impute-sex",
+                                        "--out", plink_output_prefix])
+            LOGGER.debug("Performing Plink impute-sex")
+            plink_imputesex = execute_bash_command(plink_impute_sex_input)
+            if plink_impute_sex_input[0] == 0:
+                plink_files_generated = os.listdir(plink_outdir)
+                LOGGER.info("Files available: '{}':\n{}".format(plink_outdir,plink_files_generated))
+
+            plink_sexcheck_path = plink_output_prefix + '.sexcheck'
+            if os.path.isfile(plink_sexcheck_path) == False:
+                LOGGER.error("File '{}' does not exist. Check:\nSTDOUT: '{}'\nSTDERR: '{}'".format(plink_sexcheck_path,plink_sexcheck[1],plink_sexcheck[2]))
+                raise Exception("File '{}' does not exist. Check:\nSTDOUT: '{}'\nSTDERR: '{}'".format(plink_sexcheck_path,plink_sexcheck[1],plink_sexcheck[2]))
+            else:
+                # Return method used and path of the output .sexcheck file generated by Plink
+                return [sex_method,plink_sexcheck_path]
+        pass
+
+
+    def run_inferred_sex(self):
+        """
+        Run Plink check-sex or impute-sex as appropriate, dependent on the availability of user supplied sex information.
+        :param self: Input data
+        :filtered_vcf_path: Input VCF filtered for LD pruned chr X variants
+        :param chrX_var_frq: 1000 Genomes population based allele frequencies for the selected LD pruned chr X variants
+        :param plink_outdir: Output directory path
+        :return:
+        """
+        # Retrieve sex information:
+        individual_id = self.id_
+        ind_metadata = self.get_individual_sex()
+
+        # Run inferred sex analysis:
+        if ind_metadata['individualSex'] == 0:
+            inferred_sex_output = impute_sex_plink(TO UPDATE)
+            LOGGER.info("Running Plink impute-sex")
+        else:
+            inferred_sex_output = check_sex_plink(TO UPDATE)
+            LOGGER.info("Running Plink check-sex")
+
+
+
+
+    def plot_results(TO UPDATE):
+        # Plot results:
+        pass
+
+
+
diff --git a/opencga-app/app/analysis/qc/sample_qc/sample_qc.py b/opencga-app/app/analysis/qc/sample_qc/sample_qc.py
index 3b7f0756bd..66fa020b75 100644
--- a/opencga-app/app/analysis/qc/sample_qc/sample_qc.py
+++ b/opencga-app/app/analysis/qc/sample_qc/sample_qc.py
@@ -3,12 +3,13 @@
 import sys
 import os
 import logging
-
+import json
 
 from utils import create_output_dir, execute_bash_command
 
 LOGGER = logging.getLogger('variant_qc_logger')
 
+
 class SampleQCExecutor:
     def __init__(self, vcf_file, info_file, bam_file, config, output_parent_dir, sample_ids, id_):
         """Create output dir
@@ -29,16 +30,21 @@ def __init__(self, vcf_file, info_file, bam_file, config, output_parent_dir, sam
         self.sample_ids = sample_ids
         self.id_ = id_
 
+        # Loading configuration
+        config_fhand = open(self.config, 'r')
+        self.config_json = json.load(config_fhand)
+        config_fhand.close()
+
+
     def run(self):
-        # check_data()
-        # relatedness()
-        # inferred_sex()
-        # if info_file.somatic == True && config.mutational_signature.skip == False:
-        #     mutational_signature()
-        # mendelian_errors()
-        # return ;
 
-        self.bcftools_stats(vcf_file=self.vcf_file)
+        # Genome plot
+        if 'skip' not in self.config_json or 'genomePlot' not in self.config_json['skip']:
+            self.create_genome_plot()
+
+
+
+        # self.bcftools_stats(vcf_file=self.vcf_file)
 
         # missingness()
         # heterozygosity ()
@@ -49,6 +55,61 @@ def run(self):
         # ...  # TODO return results
         pass
 
+    def get_genome_plot_vcfs(self, output_dir, gp_config_json):
+
+        # Creating VCF files for each variant type
+        snvs_fpath = os.path.join(output_dir, 'snvs.tsv')
+        indels_fpath = os.path.join(output_dir, 'indels.tsv')
+        cnvs_fpath = os.path.join(output_dir, 'cnvs.tsv')
+        rearrs_fpath = os.path.join(output_dir, 'rearrs.tsv')
+
+        # TODO Filtering VCF
+        vcf_fhand = open(self.vcf_file, 'r')
+        # /opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/genomePlot/GenomePlotLocalAnalysisExecutor.java
+        vcf_fhand.close()
+
+        return snvs_fpath, indels_fpath, cnvs_fpath, rearrs_fpath
+
+    def create_genome_plot(self):
+        # Creating output dir for this step
+        output_dir = create_output_dir([self.output_parent_dir, 'genome-plot'])
+
+        # Getting genome plot config file
+        gp_config_fpath = self.config_json['genomePlot']['configFile']
+        gp_config_fhand = open(gp_config_fpath, 'r')
+        gp_config_json = json.load(gp_config_fhand)
+        gp_config_fhand.close()
+
+        # Getting variants
+        snvs_fpath, indels_fpath, cnvs_fpath, rearrs_fpath = self.get_genome_plot_vcfs(output_dir, gp_config_json)
+
+        # Creating CMD
+        # /analysis/R/genome-plot/circos.R
+        # https://github.com/opencb/opencga/blob/TASK-6766/opencga-analysis/src/main/R/genome-plot/circos.R
+        # TODO input/output paths
+        cmd = (
+            ' R CMD Rscript --vanilla /data/input/circos.R'
+            ' --genome_version hg38'
+            ' --out_path /data/output'
+            ' --plot_title {plot_title}'
+            ' --out_format {out_format}'
+            ' /data/output/{snvs_fname}'
+            ' /data/output/{indels_fname}'
+            ' /data/output/{cnvs_fname}'
+            ' /data/output/{rearrs_fname}'
+            ' {sample_id}'
+        ).format(
+            plot_title=gp_config_json['title'],
+            out_format='png',
+            snvs_fname=os.path.basename(snvs_fpath),
+            indels_fname=os.path.basename(indels_fpath),
+            cnvs_fname=os.path.basename(cnvs_fpath),
+            rearrs_fname=os.path.basename(rearrs_fpath),
+            sample_id=self.id_
+        )
+
+        return_code, stdout, stderr = execute_bash_command(cmd)
+
 
     def bcftools_stats(self, vcf_file):
         """
diff --git a/opencga-app/app/analysis/qc/variant_qc.main.py b/opencga-app/app/analysis/qc/variant_qc.main.py
index 263686e869..581056c28b 100644
--- a/opencga-app/app/analysis/qc/variant_qc.main.py
+++ b/opencga-app/app/analysis/qc/variant_qc.main.py
@@ -35,7 +35,7 @@ def get_parser():
                         help='comma-separated BAM file paths')
     parser.add_argument('-q', '--qc-type', dest='qc_type', choices = ['sample', 'individual', 'family'], required=True,
                         help='type of QC')
-    parser.add_argument('-c', '--config', dest='config',
+    parser.add_argument('-c', '--config', dest='config', required=True,
                         help='configuration file path')
     parser.add_argument('-o', '--output-dir', dest='output_dir',
                         help='output directory path')
@@ -217,6 +217,8 @@ def main():
         shutil.copy(vcf_files[i], qc_outdir_fpath)
         LOGGER.debug('Copying info JSON file "{}" in output directory "{}"'.format(info_jsons[i], qc_outdir_fpath))
         shutil.copy(info_jsons[i], qc_outdir_fpath)
+        LOGGER.debug('Copying config JSON file "{}" in output directory "{}"'.format(config, qc_outdir_fpath))
+        shutil.copy(config, qc_outdir_fpath)
 
         # Execute QC
         qc_executor(