Skip to content

Commit

Permalink
Merge pull request #182 from ntalluri/issue-133
Browse files Browse the repository at this point in the history
Omics Integrator 2 Testing Error
  • Loading branch information
agitter authored Sep 22, 2024
2 parents cd1efd7 + 2e2c5c1 commit 7b07916
Show file tree
Hide file tree
Showing 4 changed files with 32 additions and 5 deletions.
16 changes: 11 additions & 5 deletions spras/omicsintegrator2.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,14 +148,20 @@ def parse_output(raw_pathway_file, standardized_pathway_file):
"""
# Omicsintegrator2 returns a single line file if no network is found
num_lines = sum(1 for line in open(raw_pathway_file))
# Omicsintegrator2 has corrupted output; list of correct column names
sorted_correct_column_names = ['cost', 'in_solution', 'protein1', 'protein2'] # the order of edge attributes in the NetworkX graph is not guaranteed.

if num_lines < 2:
df = pd.DataFrame(columns=['Node1', 'Node2', 'Rank', 'Direction'])
else:
df = pd.read_csv(raw_pathway_file, sep='\t', header=0)
df = df[df['in_solution'] == True] # Check whether this column can be empty before revising this line
df = df.take([0, 1], axis=1)
df = add_rank_column(df)
df = reinsert_direction_col_undirected(df)
df.columns = ['Node1', 'Node2', 'Rank', "Direction"]
if sorted(df.columns) == sorted_correct_column_names: # if column header names are all correct
df = df[df['in_solution'] == True] # the 'in_solution' column exists when the forest is not empty.
df = df.take([0, 1], axis=1) # the first two columns in the df will be 'protein1' and 'protein2', followed by the edge attributes.
df = add_rank_column(df)
df = reinsert_direction_col_undirected(df)
df.columns = ['Node1', 'Node2', 'Rank', "Direction"]
else: # corrupted data
df = pd.DataFrame(columns=['Node1', 'Node2', 'Rank', 'Direction'])

df.to_csv(standardized_pathway_file, header=True, index=False, sep='\t')
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
protein1 protein2 cost
B A 0.52
B C 0.73
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
protein1 protein2 in_solution cost
B A True 0.52
B C True 0.73
15 changes: 15 additions & 0 deletions test/parse-outputs/test_parse_outputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
INDIR = "test/parse-outputs/input/"
OUTDIR = "test/parse-outputs/output/"
EXPDIR = "test/parse-outputs/expected/"
OI2_EDGE_CASES_INDIR = 'test/parse-outputs/input/omicsintegrator-edge-cases/'

# DOMINO input is the concatenated module_0.html and module_1.html file from
# the DOMINO output of the network dip.sif and the nodes tnfa_active_genes_file.txt
Expand Down Expand Up @@ -37,3 +38,17 @@ def test_empty_file(self):

runner.parse_output(algo, test_file, out_file)
assert filecmp.cmp(OUTDIR + f"{algo}-empty-pathway.txt", EXPDIR + f"empty-pathway-expected.txt", shallow=False)

def test_oi2_miss_insolution(self):
test_file = OI2_EDGE_CASES_INDIR + f"omicsintegrator2-miss-insolution-raw-pathway.txt"
out_file = OUTDIR + f"omicsintegrator2-miss-insolution-pathway.txt"

runner.parse_output('omicsintegrator2', test_file, out_file)
assert filecmp.cmp(out_file, EXPDIR + f"empty-pathway-expected.txt", shallow=False)

def test_oi2_wrong_order(self):
test_file = OI2_EDGE_CASES_INDIR + f"omicsintegrator2-wrong-order-raw-pathway.txt"
out_file = OUTDIR + f"omicsintegrator2-wrong-order-pathway.txt"

runner.parse_output('omicsintegrator2', test_file, out_file)
assert filecmp.cmp(out_file, EXPDIR + f"omicsintegrator2-pathway-expected.txt", shallow=False)

0 comments on commit 7b07916

Please sign in to comment.