Skip to content

[WIP] Rewrite bubble detection. #181

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 24 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
56764fc
First stab at creating bubbles, with source and sink labels and relat…
Balletie Jun 16, 2015
da47b66
Replace test graph with one that conserves flow. Update tests accordi…
Balletie Jun 16, 2015
15ca256
Remove visited set and keep track of processed rels using property
Balletie Jun 16, 2015
d1e4a10
Create bubbles in PathExpander instead, correctly this time.
Balletie Jun 17, 2015
e72becc
Test the bubbles with a new graph, update and enable old tests
Balletie Jun 17, 2015
2fde348
Also test for multiple source nodes in bubbles. Currently fails
Balletie Jun 17, 2015
9120094
Add some comments, split some functions.
gfokkema Jun 18, 2015
df682eb
Add back propagateSourceIDs, and also store the propagatedSources for…
Balletie Jun 18, 2015
c99a7e9
WIP: Initial clustering implementation for bubbles
Balletie Jun 18, 2015
b7d94c9
Trim source and sink of clustered bubbles, return source and sink as …
Balletie Jun 18, 2015
dc82975
Fix some perfomance issues: get rid of nested transactions, do explic…
Balletie Jun 18, 2015
c695e29
Test correctly, and fix bugs that arose (see description)
Balletie Jun 18, 2015
998e07f
Set the interestingness property. Get the individual score in Cluster…
Balletie Jun 18, 2015
eccd398
Extend the test for individual nodes, and fix bug that arose
Balletie Jun 19, 2015
07fbc0a
Performance improvement: use a query when clustering a large bubble
Balletie Jun 19, 2015
27ad184
Extend clustertest with tests for 1) duplicates 2) missing 3) nested …
Balletie Jun 20, 2015
a4093a2
Keep a map from bubble IDs to their nested bubble IDs. also merge eve…
Balletie Jun 20, 2015
ed0c423
Completely rewrite clustering to use a recursive traversal.
Balletie Jun 20, 2015
90796cb
Also test the combined graphs for correct clustering
Balletie Jun 20, 2015
e41a259
Fix off-by-one error in AllClustersQuery. Test passing.
Balletie Jun 20, 2015
deffb97
Keep track of sink nodes that are visited, so there are no duplicates
Balletie Jun 20, 2015
7c19a06
Merge remote-tracking branch 'jente/redo-strain' into skip-cluster-bu…
gfokkema Jun 20, 2015
4332cb2
Continue clustering on visited nodes, pretend source is not there whe…
Balletie Jun 22, 2015
c35543e
Work in progress for a new bubble detection algorithm.
Balletie Jun 22, 2015
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -69,12 +69,12 @@ public interface Graph extends AnnotationCollection {

/**
* Return a list of nodes that belong to the same cluster as the given startId.
* @param startNodes the start nodes
* @param start the start nodes
* @param end the maximum rank of the cluster
* @param threshold the clustering threshold
* @return a list representing the cluster
*/
Map<Integer, List<Cluster>> getAllClusters(List<String> startNodes, int end, int threshold);
Map<Integer, List<Cluster>> getAllClusters(int start, int end, int threshold);

/**
* Sets the interestingness strategy which calculates the interestingness when
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -169,9 +169,9 @@ public int getRankFromBasePair(int base) {
}

@Override
public Map<Integer, List<Cluster>> getAllClusters(List<String> startNodes,
public Map<Integer, List<Cluster>> getAllClusters(int start,
int end, int threshold) {
return query(new AllClustersQuery(startNodes, end, threshold, is));
return query(new AllClustersQuery(start, end, threshold, is));
}

@Override
Expand Down Expand Up @@ -268,8 +268,12 @@ public void setInterestingnessStrategy(InterestingnessStrategy is) {
* order, to assign ranks and scores to nodes.
*/
protected void analyze() {
// Rank the graph.
execute(e -> new AnalyzeCommand(rootIterator()).execute(e));
ResourceIterator<Node> roots;
try (Transaction tx = service.beginTx()) {
roots = rootIterator();
new AnalyzeCommand(roots).execute(service);
tx.success();
}
}

@Override
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
package nl.tudelft.dnainator.graph.impl;

import org.neo4j.graphdb.Direction;
import org.neo4j.graphdb.Node;
import org.neo4j.graphdb.Relationship;

/**
* Utility methods for neo4j nodes and relationships.
*/
public final class Neo4jUtil {

private Neo4jUtil() {

}

/**
* @param n the {@link Node}
* @return the number of incoming NEXT relationships.
*/
public static int inDegree(Node n) {
return n.getDegree(RelTypes.NEXT, Direction.INCOMING);
}

/**
* @param n the {@link Node}
* @return the number of outgoing NEXT relationships.
*/
public static int outDegree(Node n) {
return n.getDegree(RelTypes.NEXT, Direction.OUTGOING);
}

/**
* @param n the {@link Node}
* @return the outgoing NEXT relationships of the node.
*/
public static Iterable<Relationship> outgoing(Node n) {
return n.getRelationships(RelTypes.NEXT, Direction.OUTGOING);
}

/**
* @param n the {@link Node}
* @return the incoming NEXT relationships of the node.
*/
public static Iterable<Relationship> incoming(Node n) {
return n.getRelationships(RelTypes.NEXT, Direction.INCOMING);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,6 @@ public enum NodeLabels implements Label {
ANNOTATION,
DRMUTATION,
SOURCE,
NODE
NODE,
BUBBLE_SOURCE,
}
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,6 @@ public enum RelTypes implements RelationshipType {
ANNOTATED,
NEXT,
SOURCE,
MUTATION,
BUBBLE_SOURCE_OF
}
Original file line number Diff line number Diff line change
@@ -1,45 +1,37 @@
package nl.tudelft.dnainator.graph.impl.command;

import java.util.HashMap;
import java.util.Map;
import java.util.Iterator;

import nl.tudelft.dnainator.graph.impl.NodeLabels;
import nl.tudelft.dnainator.graph.impl.RelTypes;
import nl.tudelft.dnainator.graph.impl.properties.AnnotationProperties;
import nl.tudelft.dnainator.graph.impl.properties.SequenceProperties;
import nl.tudelft.dnainator.graph.impl.properties.SourceProperties;
import nl.tudelft.dnainator.graph.impl.query.BubbleSkipper;
import nl.tudelft.dnainator.graph.interestingness.Scores;

import org.neo4j.collection.primitive.Primitive;
import org.neo4j.collection.primitive.PrimitiveLongIterator;
import org.neo4j.collection.primitive.PrimitiveLongSet;
import org.neo4j.graphdb.Direction;
import org.neo4j.graphdb.GraphDatabaseService;
import org.neo4j.graphdb.Node;
import org.neo4j.graphdb.Relationship;
import org.neo4j.graphdb.ResourceIterator;
import org.neo4j.graphdb.Transaction;
import org.neo4j.graphdb.traversal.InitialBranchState.State;
import org.neo4j.graphdb.traversal.InitialBranchState;
import org.neo4j.graphdb.traversal.Uniqueness;

import static nl.tudelft.dnainator.graph.impl.properties.SequenceProperties.BASE_DIST;
import static nl.tudelft.dnainator.graph.impl.properties.SequenceProperties.RANK;
import static nl.tudelft.dnainator.graph.impl.Neo4jUtil.incoming;
import static nl.tudelft.dnainator.graph.impl.Neo4jUtil.inDegree;
import static nl.tudelft.dnainator.graph.impl.Neo4jUtil.outgoing;
import static nl.tudelft.dnainator.graph.impl.Neo4jUtil.outDegree;
import static org.neo4j.helpers.collection.IteratorUtil.loop;

/**
* The {@link AnalyzeCommand} creates a topological ordering and
* ranks the nodes in the Neo4j database accordingly.
*/
public class AnalyzeCommand implements Command {
private static final int INIT_CAP = 4096;
private static final String LABEL = "n";
private static final String GET_NODES_BASEDIST =
"MATCH (n:" + NodeLabels.NODE.name() + ")-[:" + RelTypes.SOURCE.name() + "]-s, "
+ " (t {" + SourceProperties.SOURCE.name() + ": \"TKK_REF\"})"
+ "WHERE NOT (n-->t)"
+ " AND {dist} >= n." + SequenceProperties.BASE_DIST.name()
+ " AND {dist} < n." + SequenceProperties.BASE_DIST.name()
+ " + n." + Scores.SEQ_LENGTH.name() + " RETURN n AS " + LABEL;
private ResourceIterator<Node> roots;
private PrimitiveLongSet bubbleSources;

/**
* Create a new {@link AnalyzeCommand} that will
Expand All @@ -48,55 +40,136 @@ public class AnalyzeCommand implements Command {
*/
public AnalyzeCommand(ResourceIterator<Node> roots) {
this.roots = roots;
this.bubbleSources = Primitive.longSet();
}

/**
* Return a topological ordering on the specified database service.
* @param service the database service
* @return a topological ordering, starting from the roots
*/
@SuppressWarnings("unchecked")
public Iterable<Node> topologicalOrder(GraphDatabaseService service) {
return topologicalOrder(service, Primitive.longSet());
}

private Iterable<Node> topologicalOrder(GraphDatabaseService service,
PrimitiveLongSet processed) {
return service.traversalDescription()
// Depth first order, for creating bubbles.
.depthFirst()
.expand(new TopologicalPathExpander()
, new State<>(processed, null))
.expand(new TopologicalPathExpander(), InitialBranchState.NO_STATE)
// We manage uniqueness for ourselves.
.uniqueness(Uniqueness.NONE)
.traverse(loop(roots))
.nodes();
}

/**
* Attempts to find a bouble from the given source.
* @param service the database service.
* @param source the node to start from.
* @return a breadth first directed traversal, starting from the given source.
*/
public Iterable<Node> bubbleTraverser(GraphDatabaseService service, Node source) {
return service.traversalDescription()
.breadthFirst()
// Skip nested bubbles.
.expand(BubbleSkipper.get())
.traverse(source)
.nodes();
}

@Override
public void execute(GraphDatabaseService service) {
try (
Transaction tx = service.beginTx();
// Our set is located "off heap", i.e. not managed by the garbage collector.
// It is automatically closed after the try block, which frees the allocated memory.
PrimitiveLongSet processed = Primitive.offHeapLongSet(INIT_CAP)
) {
for (Node n : topologicalOrder(service, processed)) {
rankDest(n);
for (Node n : topologicalOrder(service)) {
rankDest(n);
if (!bubbleSources.contains(n.getId()) && outDegree(n) >= 2) {
System.out.println("--> Begin Recursion level: 0");;
tryBubble(service, n, 0);
System.out.println("--> End Recursion level: 0");;
}
scoreDRMutations(service);
tx.success();
}
}

/**
* Rank the destination nodes of the outgoing edges of the given node.
* @param n the source node of the destination nodes to be ranked.
*/
private void tryBubble(GraphDatabaseService service, Node start, int recursionLevel) {
System.out.println("Try bubble for source: " + start.getProperty("ID"));;
bubbleSources.add(start.getId());
PrimitiveLongSet pathNodes = Primitive.longSet();
PrimitiveLongSet endRelationships = Primitive.longSet();
Iterator<Node> it = bubbleTraverser(service, start).iterator();
advancePaths(endRelationships, it.next()); // Skip source node.
while (it.hasNext()) {
Node n = it.next();
System.out.println("Current node: " + n.getProperty("ID"));;
if (convergentPaths(service, endRelationships)) {
System.out.println("Try bubble: " + start.getProperty("ID") + ", " + n.getProperty("ID"));;
if (inDegree(n) != endRelationships.size()) {
System.out.println("In-degree not equal to number of paths, giving up on: " + start.getProperty("ID"));;
return;
}
if (!isSimpleBubble(service, pathNodes, start.getId(), n.getId())) {
System.out.println("Not a simple bubble.");;
return;
}
System.out.println("Found bubble: " + start.getProperty("ID") + ", " + n.getProperty("ID"));;
start.addLabel(NodeLabels.BUBBLE_SOURCE);
start.createRelationshipTo(n, RelTypes.BUBBLE_SOURCE_OF);
return;
}
pathNodes.add(n.getId());
if (outDegree(n) >= 2) {
System.out.println("--> Begin Recursion level: " + (recursionLevel + 1));;
tryBubble(service, n, recursionLevel + 1);
System.out.println("--> End Recursion level: " + (recursionLevel + 1));;
}
advancePaths(endRelationships, n);
}
System.out.println("Giving up for source: " + start.getProperty("ID"));;
}

private boolean isSimpleBubble(GraphDatabaseService service,
PrimitiveLongSet pathNodes, long source, long sink) {
PrimitiveLongIterator it = pathNodes.iterator();
while (it.hasNext()) {
long id = it.next();
Node n = service.getNodeById(id);
for (Relationship inout : n.getRelationships(RelTypes.NEXT)) {
System.out.println("Test foreign relationship: " + inout.getStartNode().getProperty("ID") + " -> " + inout.getEndNode().getProperty("ID"));
long otherID = inout.getOtherNode(n).getId();
if (otherID != sink && otherID != source && !pathNodes.contains(otherID)) {
return false;
}
}
}
return true;
}

private boolean convergentPaths(GraphDatabaseService service,
PrimitiveLongSet endRelationships) {
PrimitiveLongIterator it = endRelationships.iterator();
long prev = service.getRelationshipById(it.next()).getEndNode().getId();
while (it.hasNext()) {
long inID = service.getRelationshipById(it.next()).getEndNode().getId();
if (inID != prev) {
return false;
}
}
return true;
}

private void advancePaths(PrimitiveLongSet endRelationships,
Node n) {
for (Relationship in : incoming(n)) {
// Remove it, part of advancing the paths.
endRelationships.remove(in.getId());
}
for (Relationship out : outgoing(n)) {
endRelationships.add(out.getId());
}
}

private void rankDest(Node n) {
int baseSource = (int) n.getProperty(BASE_DIST.name())
+ (int) n.getProperty(Scores.SEQ_LENGTH.name());
int rankSource = (int) n.getProperty(RANK.name()) + 1;

for (Relationship r : n.getRelationships(RelTypes.NEXT, Direction.OUTGOING)) {
for (Relationship r : outgoing(n)) {
Node dest = r.getEndNode();

if ((int) dest.getProperty(BASE_DIST.name()) < baseSource) {
Expand All @@ -107,30 +180,4 @@ private void rankDest(Node n) {
}
}
}

/**
* Scores the amount of drug resistance mutations.
* @param service the graph service
*/
private void scoreDRMutations(GraphDatabaseService service) {
Map<String, Object> params = new HashMap<>(1);
service.findNodes(NodeLabels.DRMUTATION).forEachRemaining(drannotations ->
drannotations.getRelationships(RelTypes.ANNOTATED).forEach(node -> {
// From the startref of the annotation
// subtract the startref of the annotated node
// and add the base distance of the annotated node
int basedist = (int) drannotations.getProperty(AnnotationProperties.STARTREF.name())
- (int) node.getStartNode().getProperty(SequenceProperties.STARTREF.name())
+ (int) node.getStartNode().getProperty(SequenceProperties.BASE_DIST.name());

params.put("dist", basedist);
ResourceIterator<Node> mutations = service.execute(GET_NODES_BASEDIST, params)
.columnAs(LABEL);
mutations.forEachRemaining(m -> {
int score = (int) m.getProperty(Scores.DR_MUT.name(), 0);
m.setProperty(Scores.DR_MUT.name(), score + 1);
});
})
);
}
}
Loading