Skip to content

Commit

Permalink
update code to pre and post process underscores in node names
Browse files Browse the repository at this point in the history
  • Loading branch information
ntalluri committed Oct 25, 2024
1 parent a26f4d0 commit c14e1fc
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 4 deletions.
9 changes: 8 additions & 1 deletion spras/meo.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,14 +88,18 @@ def generate_inputs(data, filename_map):
# TODO test whether this selection is needed, what values could the column contain that we would want to
# include or exclude?
nodes = nodes.loc[nodes[node_type]]
# replace _'s with ꧁SEP꧂
nodes['NODEID'] = nodes['NODEID'].str.replace('_', '꧁SEP꧂')
nodes.to_csv(filename_map[node_type], index=False, columns=['NODEID'], header=False)

# Create network file
edges = data.get_interactome()

# Format network file
edges = add_directionality_constant(edges, 'EdgeType', '(pd)', '(pp)')

# replace _'s with ꧁SEP꧂
edges['Interactor1'] = edges['Interactor1'].str.replace('_', '꧁SEP꧂')
edges['Interactor2'] = edges['Interactor2'].str.replace('_', '꧁SEP꧂')
edges.to_csv(filename_map['edges'], sep='\t', index=False,
columns=['Interactor1', 'EdgeType', 'Interactor2', 'Weight'], header=False)

Expand Down Expand Up @@ -181,6 +185,9 @@ def parse_output(raw_pathway_file, standardized_pathway_file):
# Columns Source Type Target Oriented Weight
df = raw_pathway_df(raw_pathway_file, sep='\t', header=0)
if not df.empty:
# Replace ꧁SEP꧂ with _
df['Source'] = df['Source'].str.replace('꧁SEP꧂', '_')
df['Target'] = df['Target'].str.replace('꧁SEP꧂', '_')
# Keep only edges that were assigned an orientation (direction)
df = df.loc[df['Oriented']]
# TODO what should be the edge rank?
Expand Down
6 changes: 3 additions & 3 deletions test/MEO/input/meo-edges.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
GeneA (pp) GeneC 0.5
GeneB (pp) GeneC 0.5
GeneC (pp) GeneD 0.5
Gene_A (pp) Gene_C 0.5
GeneB (pp) Gene_C 0.5
Gene_C (pp) GeneD 0.5
GeneD (pp) GeneE 0.5
GeneD (pp) GeneG 0.5
GeneE (pp) GeneF 0.5

0 comments on commit c14e1fc

Please sign in to comment.