diff --git a/dnainator-core/src/main/java/nl/tudelft/dnainator/graph/impl/command/AnalyzeCommand.java b/dnainator-core/src/main/java/nl/tudelft/dnainator/graph/impl/command/AnalyzeCommand.java index 0ef4187d..6423a94f 100644 --- a/dnainator-core/src/main/java/nl/tudelft/dnainator/graph/impl/command/AnalyzeCommand.java +++ b/dnainator-core/src/main/java/nl/tudelft/dnainator/graph/impl/command/AnalyzeCommand.java @@ -79,14 +79,22 @@ public void execute(GraphDatabaseService service) { // It is automatically closed after the try block, which frees the allocated memory. PrimitiveLongSet processed = Primitive.offHeapLongSet(INIT_CAP) ) { + System.out.println("executing"); + long start = System.nanoTime(); for (Node n : topologicalOrder(service, processed)) { rankDest(n); + scoreIndependentMutation(service, n); } + System.out.println((System.nanoTime() - start) * 1e-6 + " ms"); scoreDRMutations(service); tx.success(); } } + private void scoreIndependentMutation(GraphDatabaseService service, Node n) { + new MutationFinderCommand(n).execute(service); + } + /** * Rank the destination nodes of the outgoing edges of the given node. * @param n the source node of the destination nodes to be ranked. diff --git a/dnainator-core/src/main/java/nl/tudelft/dnainator/graph/impl/command/MutationFinderCommand.java b/dnainator-core/src/main/java/nl/tudelft/dnainator/graph/impl/command/MutationFinderCommand.java new file mode 100644 index 00000000..72e1d964 --- /dev/null +++ b/dnainator-core/src/main/java/nl/tudelft/dnainator/graph/impl/command/MutationFinderCommand.java @@ -0,0 +1,44 @@ +package nl.tudelft.dnainator.graph.impl.command; + +import nl.tudelft.dnainator.graph.impl.RelTypes; +import nl.tudelft.dnainator.graph.interestingness.Scores; + +import org.neo4j.graphdb.Direction; +import org.neo4j.graphdb.GraphDatabaseService; +import org.neo4j.graphdb.Node; +import org.neo4j.graphdb.Path; +import org.neo4j.graphdb.Transaction; + +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; + +public class MutationFinderCommand implements Command { + private Map> sources; + private Node mutation; + + public MutationFinderCommand(Node mutation) { + this.mutation = mutation; + this.sources = new HashMap<>(); + + } + + @Override + public void execute(GraphDatabaseService service) { + Set commonancestors = new HashSet<>(); + try (Transaction tx = service.beginTx()) { + for (Path p : service.traversalDescription() + .breadthFirst() + .relationships(RelTypes.SOURCE, Direction.OUTGOING) + .relationships(RelTypes.ANCESTOR_OF, Direction.INCOMING) + .evaluator(new PhyloEvaluator()) + .traverse(mutation) + ) { + commonancestors.add(p.endNode()); + } + mutation.setProperty(Scores.INDEP_MUT.name(), commonancestors.size()); + tx.success(); + } + } +} diff --git a/dnainator-core/src/main/java/nl/tudelft/dnainator/graph/impl/command/PhyloEvaluator.java b/dnainator-core/src/main/java/nl/tudelft/dnainator/graph/impl/command/PhyloEvaluator.java new file mode 100644 index 00000000..9e14ad3a --- /dev/null +++ b/dnainator-core/src/main/java/nl/tudelft/dnainator/graph/impl/command/PhyloEvaluator.java @@ -0,0 +1,39 @@ +package nl.tudelft.dnainator.graph.impl.command; + +import nl.tudelft.dnainator.graph.impl.NodeLabels; +import nl.tudelft.dnainator.graph.impl.RelTypes; + +import org.neo4j.graphdb.Direction; +import org.neo4j.graphdb.Node; +import org.neo4j.graphdb.Path; +import org.neo4j.graphdb.Relationship; +import org.neo4j.graphdb.traversal.Evaluation; +import org.neo4j.graphdb.traversal.Evaluator; + +import java.util.HashSet; +import java.util.Set; + +public class PhyloEvaluator implements Evaluator { + private Set clusters = new HashSet<>(); + + @Override + public Evaluation evaluate(Path path) { + if (path.endNode().hasLabel(NodeLabels.NODE)) { + return Evaluation.EXCLUDE_AND_CONTINUE; + } else if (path.endNode().hasLabel(NodeLabels.SOURCE)) { + clusters.add(path.endNode()); + return Evaluation.EXCLUDE_AND_CONTINUE; + } + + for (Relationship rel : path.endNode().getRelationships(Direction.OUTGOING, + RelTypes.ANCESTOR_OF)) { + if (!clusters.contains(rel.getEndNode())) { + return Evaluation.INCLUDE_AND_PRUNE; + } + } + + clusters.add(path.endNode()); + return Evaluation.EXCLUDE_AND_CONTINUE; + } + +} diff --git a/dnainator-core/src/main/java/nl/tudelft/dnainator/graph/interestingness/Scores.java b/dnainator-core/src/main/java/nl/tudelft/dnainator/graph/interestingness/Scores.java index 22f90511..00e9005b 100644 --- a/dnainator-core/src/main/java/nl/tudelft/dnainator/graph/interestingness/Scores.java +++ b/dnainator-core/src/main/java/nl/tudelft/dnainator/graph/interestingness/Scores.java @@ -22,6 +22,12 @@ public int applyImportanceModifier(int rawScore) { } return multipliers[rawScore]; } + }, + INDEP_MUT("independentMutation") { + @Override + public int applyImportanceModifier(int rawScore) { + return rawScore * 100; + } }; private String name; diff --git a/dnainator-core/src/test/java/nl/tudelft/dnainator/graph/impl/Neo4jGraphTest.java b/dnainator-core/src/test/java/nl/tudelft/dnainator/graph/impl/Neo4jGraphTest.java index 4a843406..e7e2c177 100644 --- a/dnainator-core/src/test/java/nl/tudelft/dnainator/graph/impl/Neo4jGraphTest.java +++ b/dnainator-core/src/test/java/nl/tudelft/dnainator/graph/impl/Neo4jGraphTest.java @@ -40,7 +40,6 @@ import java.util.stream.Collectors; import static nl.tudelft.dnainator.graph.impl.properties.SequenceProperties.ID; - import static org.hamcrest.Matchers.lessThan; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; @@ -111,9 +110,10 @@ private static File getTreeFile() throws URISyntaxException { @Test public void testNodeLookup() { // CHECKSTYLE.OFF: MagicNumber - SequenceNode node1 = new SequenceNodeImpl("2", Arrays.asList("ASDF", "ASD"), 1, 5, "TATA"); - SequenceNode node2 = new SequenceNodeImpl("3", Arrays.asList("ASDF"), 5, 9, "TATA"); - SequenceNode node3 = new SequenceNodeImpl("5", Arrays.asList("ASDF"), 4, 8, "TATA"); + SequenceNode node1 = new SequenceNodeImpl("2", Arrays.asList("TKK_001", + "TKK_002"), 1, 5, "TATA"); + SequenceNode node2 = new SequenceNodeImpl("3", Arrays.asList("TKK_001"), 5, 9, "TATA"); + SequenceNode node3 = new SequenceNodeImpl("5", Arrays.asList("TKK_001"), 4, 8, "TATA"); assertEquals(node1, db.getNode("2")); assertEquals(node2, db.getNode("3")); assertEquals(node3, db.getNode("5")); @@ -126,7 +126,7 @@ public void testNodeLookup() { @Test public void testRootLookup() { // CHECKSTYLE.OFF: MagicNumber - SequenceNode root = new SequenceNodeImpl("5", Arrays.asList("ASDF"), 4, 8, "TATA"); + SequenceNode root = new SequenceNodeImpl("5", Arrays.asList("TKK_001"), 4, 8, "TATA"); assertEquals(root, db.getRootNode()); // CHECKSTYLE.ON: MagicNumber } @@ -239,13 +239,13 @@ public void testQueryFilter() { @Test public void testQuerySources() { GraphQueryDescription qd = new GraphQueryDescription() - .containsSource("ASDF"); + .containsSource("TKK_001"); Set expect = new HashSet<>(); Collections.addAll(expect, "2", "5", "3", "7", "8", "11"); assertUnorderedIDEquals(expect, db.queryNodes(qd)); // Also test for multiple sources (reusing the old one) - qd = qd.containsSource("ASD"); + qd = qd.containsSource("TKK_002"); Collections.addAll(expect, "9", "10"); assertUnorderedIDEquals(expect, db.queryNodes(qd)); diff --git a/dnainator-core/src/test/java/nl/tudelft/dnainator/graph/impl/command/MutationFinderCommandTest.java b/dnainator-core/src/test/java/nl/tudelft/dnainator/graph/impl/command/MutationFinderCommandTest.java new file mode 100644 index 00000000..06d0e2b2 --- /dev/null +++ b/dnainator-core/src/test/java/nl/tudelft/dnainator/graph/impl/command/MutationFinderCommandTest.java @@ -0,0 +1,114 @@ +package nl.tudelft.dnainator.graph.impl.command; + +import nl.tudelft.dnainator.annotation.impl.AnnotationCollectionImpl; +import nl.tudelft.dnainator.annotation.impl.AnnotationImpl; +import nl.tudelft.dnainator.core.EnrichedSequenceNode; +import nl.tudelft.dnainator.core.impl.SequenceNodeFactoryImpl; +import nl.tudelft.dnainator.graph.impl.Neo4jBatchBuilder; +import nl.tudelft.dnainator.graph.impl.Neo4jGraph; +import nl.tudelft.dnainator.graph.impl.NodeLabels; +import nl.tudelft.dnainator.graph.impl.command.MutationFinderCommand; +import nl.tudelft.dnainator.graph.impl.properties.SequenceProperties; +import nl.tudelft.dnainator.parser.EdgeParser; +import nl.tudelft.dnainator.parser.NodeParser; +import nl.tudelft.dnainator.parser.TreeParser; +import nl.tudelft.dnainator.parser.exceptions.ParseException; +import nl.tudelft.dnainator.parser.impl.EdgeParserImpl; +import nl.tudelft.dnainator.parser.impl.NodeParserImpl; +import nl.tudelft.dnainator.tree.TreeNode; + +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; +import org.neo4j.graphdb.Node; +import org.neo4j.io.fs.FileUtils; + +import java.io.BufferedReader; +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.net.URISyntaxException; +import java.util.Collection; +import java.util.stream.Collectors; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.fail; + +public class MutationFinderCommandTest { + private static final String DB_PATH = "target/neo4j-tree-junit"; + private static Neo4jGraph db; + private static InputStream nodeFile; + private static InputStream edgeFile; + private static AnnotationImpl first; + private static AnnotationImpl middle; + private static AnnotationImpl last; + + /** + * Setup the database and construct the graph. + * @throws URISyntaxException + */ + @BeforeClass + public static void setUp() throws URISyntaxException { + try { + FileUtils.deleteRecursively(new File(DB_PATH)); + nodeFile = getNodeFile(); + edgeFile = getEdgeFile(); + NodeParser np = new NodeParserImpl(new SequenceNodeFactoryImpl(), + new BufferedReader(new InputStreamReader(nodeFile, "UTF-8"))); + EdgeParser ep = new EdgeParserImpl(new BufferedReader( + new InputStreamReader(edgeFile, "UTF-8"))); + TreeNode phylo = new TreeParser(getTreeFile()).parse(); + db = (Neo4jGraph) new Neo4jBatchBuilder(DB_PATH, new AnnotationCollectionImpl(), phylo) + .constructGraph(np, ep) + .build(); + } catch (IOException e) { + fail("Couldn't initialize DB"); + } catch (ParseException e) { + fail("Couldn't parse file: " + e.getMessage()); + } + //CHECKSTYLE.OFF: MagicNumber + first = new AnnotationImpl("first", 0, 10, true); + middle = new AnnotationImpl("middle", 5, 25, true); + last = new AnnotationImpl("last", 20, 30, true); + //CHECKSTYLE.ON: MagicNumber + db.addAnnotation(first); + db.addAnnotation(middle); + db.addAnnotation(last); + } + + private static InputStream getNodeFile() { + return MutationFinderCommandTest.class.getResourceAsStream("/strains/advancedtopo.node.graph"); + } + + private static InputStream getEdgeFile() { + return MutationFinderCommandTest.class.getResourceAsStream("/strains/advancedtopo.edge.graph"); + } + + private static File getTreeFile() throws URISyntaxException { + return new File(MutationFinderCommandTest.class.getResource("/strains/advancedtopo.nwk") + .toURI()); + } + + /** + * Test returning a source set. + * @param expected + * @param actual + */ + @Test + public void testIndependentMutations() { + db.execute(e -> { + Node node = e.findNode(NodeLabels.NODE, SequenceProperties.ID.name(), "6"); + new MutationFinderCommand(node).execute(e); + }); + } + + /** + * Clean up after ourselves. + * @throws IOException when the database could not be deleted + */ + @AfterClass + public static void cleanUp() throws IOException { + db.shutdown(); + } +} diff --git a/dnainator-core/src/test/resources/strains/advancedtopo.edge.graph b/dnainator-core/src/test/resources/strains/advancedtopo.edge.graph new file mode 100644 index 00000000..b70d9128 --- /dev/null +++ b/dnainator-core/src/test/resources/strains/advancedtopo.edge.graph @@ -0,0 +1,6 @@ +1 2 +2 3 +3 4 +5 6 +6 7 +7 8 \ No newline at end of file diff --git a/dnainator-core/src/test/resources/strains/advancedtopo.node.graph b/dnainator-core/src/test/resources/strains/advancedtopo.node.graph new file mode 100644 index 00000000..b6aeb8f3 --- /dev/null +++ b/dnainator-core/src/test/resources/strains/advancedtopo.node.graph @@ -0,0 +1,16 @@ +> 1 | TKK_001 | 1 | 5 +TATA +> 2 | TKK_001,TKK_002 | 2 | 6 +TATA +> 3 | TKK_001,TKK_002,TKK_003 | 3 | 7 +TATA +> 4 | TKK_003,TKK_004,TKK_005,TKK_006 | 4 | 8 +TATA +> 5 | TKK_006 | 5 | 9 +TATA +> 6 | TKK_001,TKK_003,TKK_005 | 6 | 10 +TATA +> 7 | TKK_004,TKK_002,TKK_003 | 7 | 11 +TATA +> 8 | TKK_001,TKK_002,TKK_003,TKK_004,TKK_005,TKK_006 | 8 | 12 +TATA \ No newline at end of file diff --git a/dnainator-core/src/test/resources/strains/advancedtopo.nwk b/dnainator-core/src/test/resources/strains/advancedtopo.nwk new file mode 100644 index 00000000..de38d994 --- /dev/null +++ b/dnainator-core/src/test/resources/strains/advancedtopo.nwk @@ -0,0 +1 @@ +((TKK_001:0.1,TKK_002:0.2),(TKK_003:0.3,((TKK_004:0.4,TKK_005:0.5),TKK_005:0.6))) \ No newline at end of file diff --git a/dnainator-core/src/test/resources/strains/topo.node.graph b/dnainator-core/src/test/resources/strains/topo.node.graph index d89b3d73..a5c73b67 100644 --- a/dnainator-core/src/test/resources/strains/topo.node.graph +++ b/dnainator-core/src/test/resources/strains/topo.node.graph @@ -1,16 +1,16 @@ -> 2 | ASDF,ASD | 1 | 5 +> 2 | TKK_001,TKK_002 | 1 | 5 TATA -> 9 | ASD | 2 | 6 +> 9 | TKK_002 | 2 | 6 TATA -> 10 | ASD | 3 | 7 +> 10 | TKK_002 | 3 | 7 TATA -> 5 | ASDF | 4 | 8 +> 5 | TKK_001 | 4 | 8 TATA -> 3 | ASDF | 5 | 9 +> 3 | TKK_001 | 5 | 9 TATA -> 7 | ASDF | 6 | 10 +> 7 | TKK_001,TKK_004 | 6 | 10 TATA -> 11 | ASD,FDSA,ASDF | 7 | 11 +> 11 | TKK_001,TKK_002,TKK_003 | 7 | 11 TATA -> 8 | ASDF | 8 | 12 +> 8 | TKK_001,TKK_002 | 8 | 12 TATA \ No newline at end of file diff --git a/dnainator-core/src/test/resources/strains/topo.nwk b/dnainator-core/src/test/resources/strains/topo.nwk index b39014db..b1dae237 100644 --- a/dnainator-core/src/test/resources/strains/topo.nwk +++ b/dnainator-core/src/test/resources/strains/topo.nwk @@ -1 +1 @@ -(FDSA:0.1,(ASDF:0.2,ASD:0.3)) \ No newline at end of file +((TKK_001:0.1,TKK_002:0.2),(TKK_003:0.3,TKK_004:0.4)) \ No newline at end of file