-
Notifications
You must be signed in to change notification settings - Fork 20
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
contributing guide: adding local neighborhood #161
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,14 @@ | ||
# Create a Docker image for the Local Neighborhood algorithm here | ||
|
||
# Local Neighborhood wrapper | ||
|
||
FROM python:3.12.3-alpine3.20 | ||
|
||
WORKDIR /LocalNeighborhood | ||
|
||
# Copy the py file to the working directory | ||
COPY local_neighborhood.py . | ||
|
||
# Copy the data subfolder to the working directory | ||
COPY ln-network.txt . | ||
COPY ln-nodes.txt . | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We don't need to copy the data into the image |
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It isn't necessary to commit these copies of the files that are in the |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
A|B|E | ||
C|B | ||
C|D | ||
D|E | ||
A|E |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
A|B | ||
C|B | ||
C|D | ||
D|E | ||
A|E |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
A | ||
B |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
A|B | ||
C|B | ||
A|E |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
A|B | ||
C|B | ||
A|E |
Original file line number | Diff line number | Diff line change | ||
---|---|---|---|---|
@@ -0,0 +1,136 @@ | ||||
from spras.prm import PRM | ||||
from pathlib import Path | ||||
from spras.containers import prepare_volume, run_container | ||||
from spras.util import add_rank_column | ||||
import pandas as pd | ||||
from spras.interactome import reinsert_direction_col_undirected | ||||
|
||||
__all__ = ['LocalNeighborhood'] | ||||
|
||||
|
||||
class LocalNeighborhood(PRM): | ||||
required_inputs = ['network', 'nodetypes'] | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. PathLinker has a "nodetypes" file because it had sources and targets. Local neighborhood only has one "type" of node, so we could consider calling this input file something different. |
||||
|
||||
@staticmethod | ||||
def generate_inputs(data, filename_map): | ||||
# both edge list and prizes | ||||
""" | ||||
Access fields from the dataset and write the required input files | ||||
@param data: dataset | ||||
@param filename_map: a dict mapping file types in the required_inputs to the filename for that type | ||||
@return: | ||||
""" | ||||
# print(filename_map) | ||||
# print(data) | ||||
|
||||
for input_type in LocalNeighborhood.required_inputs: | ||||
if input_type not in filename_map: | ||||
raise ValueError(f"{input_type} filename is missing") | ||||
|
||||
node_df = None | ||||
|
||||
if data.contains_node_columns('prize'): | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I will need to review this logic more carefully. I didn't yet. |
||||
node_df = data.request_node_columns(['prize']) | ||||
elif data.contains_node_columns(['active', 'sources', 'targets']): | ||||
node_df = data.request_node_columns(['active', 'sources', 'targets']) | ||||
node_df['prize'] = 0.0 # Initialize 'prize' column | ||||
node_df.loc[node_df['active'] == True, 'prize'] = 1.0 | ||||
node_df.loc[node_df['sources'] == True, 'prize'] = 1.0 | ||||
node_df.loc[node_df['targets'] == True, 'prize'] = 1.0 | ||||
else: | ||||
raise ValueError("Local Neighborhood requires node prizes or sources and targets") | ||||
|
||||
print(node_df) | ||||
|
||||
node_df.to_csv(filename_map['nodetypes'],sep='\t',index=False,columns=['NODEID'],header=False) | ||||
|
||||
edges_df = data.get_interactome() | ||||
|
||||
print(edges_df) | ||||
|
||||
edges_df.to_csv(filename_map['network'],sep='|',index=False, | ||||
columns=['Interactor1','Interactor2'], | ||||
header=False) | ||||
|
||||
|
||||
#TODO: ????? | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is there more you wanted to add here? |
||||
@staticmethod | ||||
def run(nodetypes=None, network=None, output_file=None, container_framework="docker"): | ||||
""" | ||||
Run PathLinker with Docker | ||||
@param nodetypes: input node types with sources and targets (required) | ||||
@param network: input network file (required) | ||||
@param output_file: path to the output pathway file (required) | ||||
@param k: path length (optional) | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No longer have a parameter |
||||
@param container_framework: choose the container runtime framework, currently supports "docker" or "singularity" (optional) | ||||
""" | ||||
# Add additional parameter validation | ||||
# Do not require k | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||
# Use the PathLinker default | ||||
# Could consider setting the default here instead | ||||
if not nodetypes or not network or not output_file: | ||||
raise ValueError('Required Local Neighborhood arguments are missing') | ||||
|
||||
work_dir = '/spras' | ||||
|
||||
# Each volume is a tuple (src, dest) | ||||
volumes = list() | ||||
|
||||
bind_path, node_file = prepare_volume(nodetypes, work_dir) | ||||
volumes.append(bind_path) | ||||
|
||||
bind_path, network_file = prepare_volume(network, work_dir) | ||||
volumes.append(bind_path) | ||||
|
||||
# PathLinker does not provide an argument to set the output directory | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Remove PathLinker comments |
||||
# Use its --output argument to set the output file prefix to specify an absolute path and prefix | ||||
# out_dir = Path(output_file).parent | ||||
# PathLinker requires that the output directory exist | ||||
# out_dir.mkdir(parents=True, exist_ok=True) | ||||
bind_path, mapped_out_file = prepare_volume(output_file, work_dir) | ||||
volumes.append(bind_path) | ||||
# mapped_out_prefix = mapped_out_dir + '/out' # Use posix path inside the container | ||||
|
||||
# print(mapped_out_prefix) | ||||
#TODO: change for local neighborhood | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Clean up TODOs |
||||
command = ['python', | ||||
'/LocalNeighborhood/local_neighborhood.py', | ||||
'--network', network_file, | ||||
'--nodes', node_file, | ||||
'--output', mapped_out_file] | ||||
|
||||
print('Running Local Neighborhood with arguments: {}'.format(' '.join(command)), flush=True) | ||||
|
||||
container_suffix = "local-neighborhood" #TODO change | ||||
out = run_container(container_framework, | ||||
container_suffix, | ||||
command, | ||||
volumes, | ||||
work_dir) | ||||
print(out) | ||||
|
||||
# Rename the primary output file to match the desired output filename | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Remove if not needed |
||||
# Currently PathLinker only writes one output file so we do not need to delete others | ||||
# We may not know the value of k that was used | ||||
# output_edges = Path(next(out_dir.glob('out*-ranked-edges.txt'))) | ||||
# output_edges.rename(output_file) | ||||
|
||||
|
||||
@staticmethod | ||||
def parse_output(raw_pathway_file, standardized_pathway_file): | ||||
""" | ||||
Convert a predicted pathway into the universal format | ||||
@param raw_pathway_file: pathway file produced by an algorithm's run function | ||||
@param standardized_pathway_file: the same pathway written in the universal format | ||||
""" | ||||
try: | ||||
df = pd.read_csv(raw_pathway_file, sep='|', header=None) | ||||
except pd.errors.EmptyDataError: | ||||
with open(standardized_pathway_file, 'w'): | ||||
pass | ||||
return | ||||
# df.columns = ["vertex1", "vertex2", "1"] | ||||
df = add_rank_column(df) | ||||
df = reinsert_direction_col_undirected(df) | ||||
df.to_csv(standardized_pathway_file, index=False,header=False, sep='\t') | ||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
A|B | ||
C|B | ||
A|E |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
A B 1 U | ||
B C 1 U | ||
A D 1 U | ||
C D 1 U | ||
C E 1 U | ||
C F 1 U |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
A|B | ||
B|C | ||
A|D | ||
C|D | ||
C|E | ||
C|F |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If you haven't explicitly tagged your image with a version, the default tag is latest