From 16cd1f425a9aee65d28bf29ef1e98949938499a3 Mon Sep 17 00:00:00 2001 From: thomasyu888 Date: Mon, 23 Jan 2023 23:24:41 -0800 Subject: [PATCH 1/6] Do not push fusion files --- genie/database_to_staging.py | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/genie/database_to_staging.py b/genie/database_to_staging.py index a95c5f5d..3ac690cc 100644 --- a/genie/database_to_staging.py +++ b/genie/database_to_staging.py @@ -7,7 +7,6 @@ import os import re import subprocess -import time from typing import List import pandas as pd @@ -1775,16 +1774,16 @@ def stagingToCbio( wes_panelids, ) - store_fusion_files( - syn, - consortiumReleaseSynId, - genieVersion, - fusionSynId, - keepForCenterConsortiumSamples, - keepForMergedConsortiumSamples, - current_release_staging, - CENTER_MAPPING_DF, - ) + # store_fusion_files( + # syn, + # consortiumReleaseSynId, + # genieVersion, + # fusionSynId, + # keepForCenterConsortiumSamples, + # keepForMergedConsortiumSamples, + # current_release_staging, + # CENTER_MAPPING_DF, + # ) store_sv_files( syn, From 63f6807ee4a775f0113236956fe34add8c774102 Mon Sep 17 00:00:00 2001 From: thomasyu888 Date: Mon, 23 Jan 2023 23:27:48 -0800 Subject: [PATCH 2/6] Don't push out fusion files --- genie/database_to_staging.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/genie/database_to_staging.py b/genie/database_to_staging.py index 3ac690cc..d2ca78e1 100644 --- a/genie/database_to_staging.py +++ b/genie/database_to_staging.py @@ -1837,7 +1837,7 @@ def revise_metadata_files(syn, consortiumid, genie_version=None): i["id"], downloadLocation=GENIE_RELEASE_DIR, ifcollision="overwrite.local" ) for i in release_files - if "meta" in i["name"] + if "meta" in i["name"] and i['name'] != "meta_fusions.txt" ] for meta_ent in meta_file_ents: From 413250029c1dfbdd946d53b893976bb5efe9bd7e Mon Sep 17 00:00:00 2001 From: Rixing Xu Date: Tue, 2 May 2023 23:08:33 -0700 Subject: [PATCH 3/6] remove fusions from consortium release --- genie/database_to_staging.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/genie/database_to_staging.py b/genie/database_to_staging.py index 6b8a74a8..72a9aca2 100644 --- a/genie/database_to_staging.py +++ b/genie/database_to_staging.py @@ -1924,8 +1924,11 @@ def create_link_version( release_file["name"] != "data_clinical.txt" or release_type == "consortium" ) is_gene_panel = release_file["name"].startswith("data_gene_panel") + is_depreciated_file = ( + release_file["name"] in ["data_fusions.txt"] + ) - if not_folder and not_public and not is_gene_panel: + if not_folder and not_public and not is_gene_panel and not is_depreciated_file: syn.store( synapseclient.Link( release_file["id"], From ca8951f57f2e17a84bdcf6c2804cc20f2634b006 Mon Sep 17 00:00:00 2001 From: Rixing Xu Date: Tue, 2 May 2023 23:31:13 -0700 Subject: [PATCH 4/6] add linting --- genie/database_to_staging.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/genie/database_to_staging.py b/genie/database_to_staging.py index 72a9aca2..788ff45b 100644 --- a/genie/database_to_staging.py +++ b/genie/database_to_staging.py @@ -1811,7 +1811,7 @@ def revise_metadata_files(syn, consortiumid, genie_version=None): i["id"], downloadLocation=GENIE_RELEASE_DIR, ifcollision="overwrite.local" ) for i in release_files - if "meta" in i["name"] and i['name'] != "meta_fusions.txt" + if "meta" in i["name"] and i["name"] != "meta_fusions.txt" ] for meta_ent in meta_file_ents: @@ -1924,9 +1924,7 @@ def create_link_version( release_file["name"] != "data_clinical.txt" or release_type == "consortium" ) is_gene_panel = release_file["name"].startswith("data_gene_panel") - is_depreciated_file = ( - release_file["name"] in ["data_fusions.txt"] - ) + is_depreciated_file = release_file["name"] in ["data_fusions.txt"] if not_folder and not_public and not is_gene_panel and not is_depreciated_file: syn.store( From bdf205c79654d912662a4f70aa7e741239ecd78f Mon Sep 17 00:00:00 2001 From: Rixing Xu Date: Wed, 3 May 2023 11:36:33 -0700 Subject: [PATCH 5/6] remove store_fusion_files func --- genie/database_to_staging.py | 89 ------------------------------------ 1 file changed, 89 deletions(-) diff --git a/genie/database_to_staging.py b/genie/database_to_staging.py index 788ff45b..8163214b 100644 --- a/genie/database_to_staging.py +++ b/genie/database_to_staging.py @@ -644,85 +644,6 @@ def store_gene_panel_files( return genePanelEntities -# TODO: add to load.py -def store_fusion_files( - syn, - release_synid, - genie_version, - fusion_synid, - keep_for_center_consortium_samples, - keep_for_merged_consortium_samples, - current_release_staging, - center_mappingdf, -): - """ - Create, filter, configure, and store fusion file - - Args: - syn: Synapse object - release_synid: Synapse id to store release file - genie_version: GENIE version (ie. v6.1-consortium) - fusion_synid: Fusion database synid - keep_for_center_consortium_samples: Samples to keep for center files - keep_for_merged_consortium_samples: Samples to keep for merged file - current_release_staging: Staging flag - center_mappingdf: Center mapping dataframe - """ - logger.info("MERING, FILTERING, STORING FUSION FILES") - FusionsDf = extract.get_syntabledf( - syn, - "select HUGO_SYMBOL,ENTREZ_GENE_ID,CENTER,TUMOR_SAMPLE_BARCODE,FUSION," - f"DNA_SUPPORT,RNA_SUPPORT,METHOD,FRAME from {fusion_synid}", - ) - version = syn.create_snapshot_version(fusion_synid, comment=genie_version) - - FusionsDf["ENTREZ_GENE_ID"].mask( - FusionsDf["ENTREZ_GENE_ID"] == 0, float("nan"), inplace=True - ) - - if not current_release_staging: - FusionsStagingDf = FusionsDf[ - FusionsDf["TUMOR_SAMPLE_BARCODE"].isin(keep_for_center_consortium_samples) - ] - for center in center_mappingdf.center: - center_fusion = FusionsStagingDf[FusionsStagingDf["CENTER"] == center] - if not center_fusion.empty: - center_fusion.to_csv( - FUSIONS_CENTER_PATH % center, sep="\t", index=False - ) - load.store_file( - syn=syn, - filepath=FUSIONS_CENTER_PATH % center, - version_comment=genie_version, - parentid=center_mappingdf["stagingSynId"][ - center_mappingdf["center"] == center - ][0], - ) - - FusionsDf = FusionsDf[ - FusionsDf["TUMOR_SAMPLE_BARCODE"].isin(keep_for_merged_consortium_samples) - ] - FusionsDf.rename(columns=transform._col_name_to_titlecase, inplace=True) - - # Remove duplicated Fusions - FusionsDf = FusionsDf[ - ~FusionsDf[["Hugo_Symbol", "Tumor_Sample_Barcode", "Fusion"]].duplicated() - ] - # FusionsDf.to_csv(FUSIONS_PATH, sep="\t", index=False) - fusionText = process_functions.removePandasDfFloat(FusionsDf) - fusions_path = os.path.join(GENIE_RELEASE_DIR, "data_fusions.txt") - with open(fusions_path, "w") as fusion_file: - fusion_file.write(fusionText) - load.store_file( - syn=syn, - filepath=fusions_path, - parentid=release_synid, - version_comment=genie_version, - name="data_fusions.txt", - used=f"{fusion_synid}.{version}", - ) - - # TODO: add to load.py def store_sv_files( syn: synapseclient.Synapse, @@ -1748,16 +1669,6 @@ def stagingToCbio( wes_panelids, ) - # store_fusion_files( - # syn, - # consortiumReleaseSynId, - # genieVersion, - # fusionSynId, - # keepForCenterConsortiumSamples, - # keepForMergedConsortiumSamples, - # current_release_staging, - # CENTER_MAPPING_DF, - # ) store_sv_files( syn, From 4ee9d42e48bd0e19c6ecae532512b7ba48124016 Mon Sep 17 00:00:00 2001 From: Rixing Xu Date: Wed, 3 May 2023 14:19:56 -0700 Subject: [PATCH 6/6] remove space --- genie/database_to_staging.py | 1 - 1 file changed, 1 deletion(-) diff --git a/genie/database_to_staging.py b/genie/database_to_staging.py index a24c4cf2..3a420873 100644 --- a/genie/database_to_staging.py +++ b/genie/database_to_staging.py @@ -1667,7 +1667,6 @@ def stagingToCbio( wes_panelids, ) - store_sv_files( syn, consortiumReleaseSynId,