From c2b8a067ae638bdd0e317084d306b877f34fcbee Mon Sep 17 00:00:00 2001 From: Ximena <70326255+xim56@users.noreply.github.com> Date: Thu, 19 Sep 2024 12:32:37 +0100 Subject: [PATCH 1/3] qc: Fix relatedness_validation function and mark as PASS if reported relationship groups are a subset of inferred relationship groups #TASK-6775 #TASK-6766 --- .../app/analysis/qc/family_qc/family_qc.py | 31 +++++++------------ 1 file changed, 11 insertions(+), 20 deletions(-) diff --git a/opencga-app/app/analysis/qc/family_qc/family_qc.py b/opencga-app/app/analysis/qc/family_qc/family_qc.py index 9d84316fa1..0dc5cf070c 100644 --- a/opencga-app/app/analysis/qc/family_qc/family_qc.py +++ b/opencga-app/app/analysis/qc/family_qc/family_qc.py @@ -288,13 +288,11 @@ def relatedness_validation(reported_result, inferred_result): else: reported_result = set(reported_result.split(', ')) inferred_result = set(inferred_result.split(', ')) - if len(reported_result) != len(inferred_result): - validation = "FAIL" + if reported_result == inferred_result or reported_result.issubset(inferred_result): + validation = "PASS" else: - if reported_result == inferred_result: - validation = "PASS" - else: - validation = "FAIL" + validation = "FAIL" + # Return validation result return validation @@ -364,27 +362,23 @@ def relatedness_report(self, relatedness_inference_results): # Getting reported family relationship block: relatedness_results = relatedness_inference_results for score_result in relatedness_inference_results["scores"]: - LOGGER.debug( - 'Getting reported relatedness information for sample {} and sample {}'.format(score_result["sampleId1"], - score_result[ - "sampleId2"])) + LOGGER.debug('Getting reported relatedness information for sample {} and sample {}'.format(score_result["sampleId1"], score_result["sampleId2"])) reported_relationship = [] individual1_info = samples_individuals[score_result["sampleId1"]] individual2_info = samples_individuals[score_result["sampleId2"]] if individual1_info["individualId"] == "" or individual2_info["individualId"] == "": - LOGGER.warning( - 'No individual information available for sample {} and sample {}). Hence reported family relationship UNKNOWN'.format( + LOGGER.warning('No individual information available for sample {} and sample {}). Hence reported family relationship UNKNOWN'.format( score_result["sampleId1"], score_result["sampleId2"])) relatedness_results["scores"]["reportedRelationship"] = "UNKNOWN" continue else: unknown_results = [False, False] if individual1_info["individualId"] in individual2_info["familyMembersRoles"].keys(): - reported_relationship.append( - individual2_info["familyMembersRoles"][individual1_info["individualId"]]) + reported_relationship.append(individual2_info["familyMembersRoles"][individual1_info["individualId"]]) else: reported_relationship.append("UNKNOWN") unknown_results[0] = True + if individual2_info["individualId"] in individual1_info["familyMembersRoles"].keys(): reported_relationship.append( individual1_info["familyMembersRoles"][individual2_info["individualId"]]) @@ -401,19 +395,16 @@ def relatedness_report(self, relatedness_inference_results): elif any(unknown_results): LOGGER.warning( 'Family relationship discrepancy found for sample {} (individual: {}) and sample {} (individual: {}). Hence reported family relationship UNKNOWN'.format( - score_result["sampleId1"], individual1_info["individualId"], score_result["sampleId2"], - individual2_info["individualId"])) + score_result["sampleId1"], individual1_info["individualId"], score_result["sampleId2"],individual2_info["individualId"])) score_result["reportedRelationship"] = "UNKNOWN" else: score_result["reportedRelationship"] = ', '.join(reported_relationship) LOGGER.info( "Family relationship reported for sample {} (individual: {}) and sample {} (individual: {})".format( - score_result["sampleId1"], individual1_info["individualId"], score_result["sampleId2"], - individual2_info["individualId"])) + score_result["sampleId1"], individual1_info["individualId"], score_result["sampleId2"],individual2_info["individualId"])) # Validating reported vs inferred family relationship results block: - validation_result = FamilyQCExecutor.relatedness_validation(score_result["reportedRelationship"], - score_result["inferredRelationship"]) + validation_result = FamilyQCExecutor.relatedness_validation(score_result["reportedRelationship"], score_result["inferredRelationship"]) score_result["validation"] = validation_result # Return dict/json with plink, inferred, reported and validation results From 041bc33719ae1a44811928c7504943741d9d5d04 Mon Sep 17 00:00:00 2001 From: Ximena <70326255+xim56@users.noreply.github.com> Date: Thu, 19 Sep 2024 12:48:10 +0100 Subject: [PATCH 2/3] qc: Rename function to generate file with relatedness results to generate_relatedness_results_file and remove some unnecessary new lines #TASK-6775 #TASK-6766 --- opencga-app/app/analysis/qc/family_qc/family_qc.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/opencga-app/app/analysis/qc/family_qc/family_qc.py b/opencga-app/app/analysis/qc/family_qc/family_qc.py index 0dc5cf070c..1cf1d3a550 100644 --- a/opencga-app/app/analysis/qc/family_qc/family_qc.py +++ b/opencga-app/app/analysis/qc/family_qc/family_qc.py @@ -410,7 +410,7 @@ def relatedness_report(self, relatedness_inference_results): # Return dict/json with plink, inferred, reported and validation results return relatedness_results - def relatedness_results_json(self, relatedness_results, outdir_fpath): + def generate_relatedness_results_file(self, relatedness_results, outdir_fpath): relatedness_output_dir_fpath = outdir_fpath # Generating json file with relatedness results @@ -435,16 +435,13 @@ def relatedness(self): # Filtering VCF and renaming variants filtered_vcf_fpath = self.filter_rename_variants_vcf(pop_freq_fpath, relatedness_output_dir_fpath) # Performing IBD analysis from PLINK - method, plink_genome_fpath = self.relatedness_plink(filtered_vcf_fpath, pop_freq_fpath, pop_exclude_var_fpath, - relatedness_output_dir_fpath) + method, plink_genome_fpath = self.relatedness_plink(filtered_vcf_fpath, pop_freq_fpath, pop_exclude_var_fpath, relatedness_output_dir_fpath) # Inferring family relationships - relatedness_inference_dict = self.relatedness_inference(relatedness_thresholds_fpath, method, - plink_genome_fpath) + relatedness_inference_dict = self.relatedness_inference(relatedness_thresholds_fpath, method, plink_genome_fpath) # Getting reported family relationships and validating inferred vs reported results relatedness_results_dict = self.relatedness_report(relatedness_inference_dict) # Generating file with results - relatedness_results_json_fpath = self.relatedness_results_json(relatedness_results_dict, - relatedness_output_dir_fpath) + relatedness_results_json_fpath = self.generate_relatedness_results_file(relatedness_results_dict, relatedness_output_dir_fpath) def run(self): # Checking data From 67b2859057bb795db71d4733e4104236e65281a3 Mon Sep 17 00:00:00 2001 From: Ximena <70326255+xim56@users.noreply.github.com> Date: Thu, 19 Sep 2024 13:52:05 +0100 Subject: [PATCH 3/3] qc: Convert generate_relatedness_results_file to static method #TASK-6775 #TASK-6766 --- opencga-app/app/analysis/qc/family_qc/family_qc.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/opencga-app/app/analysis/qc/family_qc/family_qc.py b/opencga-app/app/analysis/qc/family_qc/family_qc.py index 1cf1d3a550..3030975e6b 100644 --- a/opencga-app/app/analysis/qc/family_qc/family_qc.py +++ b/opencga-app/app/analysis/qc/family_qc/family_qc.py @@ -410,7 +410,8 @@ def relatedness_report(self, relatedness_inference_results): # Return dict/json with plink, inferred, reported and validation results return relatedness_results - def generate_relatedness_results_file(self, relatedness_results, outdir_fpath): + @staticmethod + def generate_relatedness_results_file(relatedness_results, outdir_fpath): relatedness_output_dir_fpath = outdir_fpath # Generating json file with relatedness results @@ -424,7 +425,7 @@ def generate_relatedness_results_file(self, relatedness_results, outdir_fpath): return relatedness_results_fpath def relatedness(self): - # Set up. Prepare reference file paths to use them later: + # Setup. Prepare reference file paths to use them later: pop_freq_fpath = "/path/to/pop_freq_prune_in.frq" pop_exclude_var_fpath = "/path/to/pop_exclude_var.prune.out" relatedness_thresholds_fpath = "/path/to/relatedness_thresholds.tsv"