Skip to content

Commit

Permalink
Merge pull request #190 from ntalluri/meo_node_id_error
Browse files Browse the repository at this point in the history
Update MEO Node ID Error
  • Loading branch information
agitter authored Nov 8, 2024
2 parents a26f4d0 + de53952 commit 1a34151
Show file tree
Hide file tree
Showing 16 changed files with 108 additions and 19 deletions.
17 changes: 15 additions & 2 deletions spras/meo.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,11 @@

__all__ = ['MEO', 'write_properties']

# replaces all underscores in the node names with unicode seperator
# MEO keeps only the substring up to the first underscore when parsing node names
# https://github.com/agitter/meo/blob/1fe57e8ff3952c494e2b14dfdc563a84596e2fcd/src/alg/Vertex.java#L56-L71
underscore_replacement = '꧁SEP꧂'


# Only supports the Random orientation algorithm
# Does not support MINSAT or MAXCSP
Expand Down Expand Up @@ -63,6 +68,8 @@ def write_properties(filename=Path('properties.txt'), edges=None, sources=None,
- MEO tracks the directionality of the original edges, but all of its output edges are directed.
- To remain accurate to MEO's design we will also treat the output graph's as directed
"""


class MEO(PRM):
required_inputs = ['sources', 'targets', 'edges']

Expand All @@ -88,18 +95,21 @@ def generate_inputs(data, filename_map):
# TODO test whether this selection is needed, what values could the column contain that we would want to
# include or exclude?
nodes = nodes.loc[nodes[node_type]]
# replace _'s with underscore_replacement
nodes['NODEID'] = nodes['NODEID'].str.replace('_', underscore_replacement)
nodes.to_csv(filename_map[node_type], index=False, columns=['NODEID'], header=False)

# Create network file
edges = data.get_interactome()

# Format network file
edges = add_directionality_constant(edges, 'EdgeType', '(pd)', '(pp)')

# replace _'s with ꧁SEP꧂
edges['Interactor1'] = edges['Interactor1'].str.replace('_', underscore_replacement)
edges['Interactor2'] = edges['Interactor2'].str.replace('_', underscore_replacement)
edges.to_csv(filename_map['edges'], sep='\t', index=False,
columns=['Interactor1', 'EdgeType', 'Interactor2', 'Weight'], header=False)


# TODO add parameter validation
# TODO document required arguments
@staticmethod
Expand Down Expand Up @@ -181,6 +191,9 @@ def parse_output(raw_pathway_file, standardized_pathway_file):
# Columns Source Type Target Oriented Weight
df = raw_pathway_df(raw_pathway_file, sep='\t', header=0)
if not df.empty:
# Replace underscore_replacement with _
df['Source'] = df['Source'].str.replace(underscore_replacement, '_')
df['Target'] = df['Target'].str.replace(underscore_replacement, '_')
# Keep only edges that were assigned an orientation (direction)
df = df.loc[df['Oriented']]
# TODO what should be the edge rank?
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
#Interactor1 Interactor2 Weight
A B 0.98
test_A B 0.98
B C 0.77
2 changes: 1 addition & 1 deletion test/generate-inputs/expected/domino-network-expected.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
ID_interactor_A ppi ID_interactor_B
ENSG0A ppi ENSG0B
ENSG0test_A ppi ENSG0B
ENSG0B ppi ENSG0C
2 changes: 1 addition & 1 deletion test/generate-inputs/expected/meo-edges-expected.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
A (pp) B 0.98
test꧁SEP꧂A (pp) B 0.98
B (pp) C 0.77
4 changes: 2 additions & 2 deletions test/generate-inputs/expected/mincostflow-edges-expected.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
A B 0.98
test_A B 0.98
B C 0.77
B A 0.98
B test_A 0.98
C B 0.77
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
protein1 protein2 weight directionality
A B 0.98 U
test_A B 0.98 U
B C 0.77 U
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
protein1 protein2 cost
A B 0.52
test_A B 0.52
B C 0.73
4 changes: 2 additions & 2 deletions test/generate-inputs/expected/pathlinker-network-expected.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#Interactor1 Interactor2 Weight
A B 0.98
test_A B 0.98
B C 0.77
B A 0.98
B test_A 0.98
C B 0.77
2 changes: 2 additions & 0 deletions test/generate-inputs/inputs/network.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
test_A B 0.98 U
B C 0.77 U
3 changes: 3 additions & 0 deletions test/generate-inputs/inputs/node-prizes.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
NODEID prize active
test_A 2 true
C 5.7 true
1 change: 1 addition & 0 deletions test/generate-inputs/inputs/sources.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
test_A
1 change: 1 addition & 0 deletions test/generate-inputs/inputs/targets.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
C
69 changes: 69 additions & 0 deletions test/generate-inputs/inputs/test_config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
hash_length: 7
container_framework: docker
unpack_singularity: false
container_registry:
base_url: docker.io
owner: reedcompbio

algorithms:
- name: "pathlinker"
params:
include: true
run1:
k: range(100,201,100)

- name: "omicsintegrator1"
params:
include: true
run1:
b: [5, 6]
w: np.linspace(0,5,2)
d: [10]

- name: "omicsintegrator2"
params:
include: true
run1:
b: [4]
g: [0]
run2:
b: [2]
g: [3]

- name: "meo"
params:
include: true
run1:
max_path_length: [3]
local_search: ["Yes"]
rand_restarts: [10]

- name: "mincostflow"
params:
include: true
run1:
flow: [1] # The flow must be an int
capacity: [1]

- name: "allpairs"
params:
include: true

- name: "domino"
params:
include: true
run1:
slice_threshold: [0.3]
module_threshold: [0.05]

datasets:
-
# Labels can only contain letters, numbers, or underscores
label: test_data
node_files: ["node-prizes.txt", "sources.txt", "targets.txt"]
# DataLoader.py can currently only load a single edge file, which is the primary network
edge_files: ["network.txt"]
# Placeholder
other_files: []
# Relative path from the spras directory
data_dir: "test/generate-inputs/inputs"
8 changes: 4 additions & 4 deletions test/generate-inputs/test_generate_inputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
'domino': 'network',
'pathlinker': 'network',
'allpairs': 'network'
}
}


class TestGenerateInputs:
Expand All @@ -29,14 +29,14 @@ def setup_class(cls):
Path(OUTDIR).mkdir(parents=True, exist_ok=True)

def test_prepare_inputs_networks(self):
config_loc = os.path.join("config", "config.yaml")
config_loc = os.path.join("test", "generate-inputs", "inputs", "test_config.yaml")

with open(config_loc) as config_file:
config = yaml.load(config_file, Loader=yaml.FullLoader)
test_file = "test/generate-inputs/output/test_pickled_dataset.pkl"

data0_dataset = next((ds for ds in config["datasets"] if ds["label"] == "data0"), None)
runner.merge_input(data0_dataset, test_file)
test_dataset = next((ds for ds in config["datasets"] if ds["label"] == "test_data"), None)
runner.merge_input(test_dataset, test_file)

for algo in algo_exp_file.keys():
inputs = runner.get_required_inputs(algo)
Expand Down
4 changes: 2 additions & 2 deletions test/parse-outputs/expected/meo-pathway-expected.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
Node1 Node2 Rank Direction
GENEA GENEC 1 D
GENEC GENEB 1 D
GENE_A GENE_C 1 D
GENE_C GENEB 1 D
4 changes: 2 additions & 2 deletions test/parse-outputs/input/meo-raw-pathway.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
Source Type Target Oriented Weight
GENEA pp GENEC true 0.5
GENEC pd GENEB true 0.5
GENE꧁SEP꧂A pp GENE꧁SEP꧂C true 0.5
GENE꧁SEP꧂C pd GENEB true 0.5

0 comments on commit 1a34151

Please sign in to comment.