From c14e1fc23c08575f5da0228a42753589c0ac9aae Mon Sep 17 00:00:00 2001 From: ntalluri Date: Fri, 25 Oct 2024 12:39:21 -0500 Subject: [PATCH] update code to pre and post process underscores in node names --- spras/meo.py | 9 ++++++++- test/MEO/input/meo-edges.txt | 6 +++--- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/spras/meo.py b/spras/meo.py index b614d4c4..3382c54b 100644 --- a/spras/meo.py +++ b/spras/meo.py @@ -88,6 +88,8 @@ def generate_inputs(data, filename_map): # TODO test whether this selection is needed, what values could the column contain that we would want to # include or exclude? nodes = nodes.loc[nodes[node_type]] + # replace _'s with ꧁SEP꧂ + nodes['NODEID'] = nodes['NODEID'].str.replace('_', '꧁SEP꧂') nodes.to_csv(filename_map[node_type], index=False, columns=['NODEID'], header=False) # Create network file @@ -95,7 +97,9 @@ def generate_inputs(data, filename_map): # Format network file edges = add_directionality_constant(edges, 'EdgeType', '(pd)', '(pp)') - + # replace _'s with ꧁SEP꧂ + edges['Interactor1'] = edges['Interactor1'].str.replace('_', '꧁SEP꧂') + edges['Interactor2'] = edges['Interactor2'].str.replace('_', '꧁SEP꧂') edges.to_csv(filename_map['edges'], sep='\t', index=False, columns=['Interactor1', 'EdgeType', 'Interactor2', 'Weight'], header=False) @@ -181,6 +185,9 @@ def parse_output(raw_pathway_file, standardized_pathway_file): # Columns Source Type Target Oriented Weight df = raw_pathway_df(raw_pathway_file, sep='\t', header=0) if not df.empty: + # Replace ꧁SEP꧂ with _ + df['Source'] = df['Source'].str.replace('꧁SEP꧂', '_') + df['Target'] = df['Target'].str.replace('꧁SEP꧂', '_') # Keep only edges that were assigned an orientation (direction) df = df.loc[df['Oriented']] # TODO what should be the edge rank? diff --git a/test/MEO/input/meo-edges.txt b/test/MEO/input/meo-edges.txt index 25e05b80..dbad11c3 100644 --- a/test/MEO/input/meo-edges.txt +++ b/test/MEO/input/meo-edges.txt @@ -1,6 +1,6 @@ -GeneA (pp) GeneC 0.5 -GeneB (pp) GeneC 0.5 -GeneC (pp) GeneD 0.5 +Gene_A (pp) Gene_C 0.5 +GeneB (pp) Gene_C 0.5 +Gene_C (pp) GeneD 0.5 GeneD (pp) GeneE 0.5 GeneD (pp) GeneG 0.5 GeneE (pp) GeneF 0.5