Skip to content

Commit 04cdd59

Browse files
committed
Add hyperparameter tuning to node embeddings
1 parent bcfac5d commit 04cdd59

7 files changed

+1766
-150
lines changed
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
// Creates a smaller projection by sampling the original graph using "Common Neighbour Aware Random Walk"
2+
3+
CALL gds.graph.sample.cnarw(
4+
$dependencies_projection + '-sampled-cleaned',
5+
$dependencies_projection,
6+
{
7+
samplingRatio: toFloat($dependencies_projection_sampling_ratio)
8+
}
9+
)
10+
YIELD graphName, fromGraphName, nodeCount, relationshipCount, startNodeCount, projectMillis
11+
RETURN graphName, fromGraphName, nodeCount, relationshipCount, startNodeCount, projectMillis

cypher/Node_Embeddings/Node_Embeddings_1d_Fast_Random_Projection_Stream.cypher

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ CALL gds.fastRP.stream(
44
$dependencies_projection + '-cleaned', {
55
embeddingDimension: toInteger($dependencies_projection_embedding_dimension)
66
,randomSeed: 30
7+
,normalizationStrength: 0.3
78
,relationshipWeightProperty: $dependencies_projection_weight_property
89
}
910
)
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
// Node Embeddings 1d using Fast Random Projection: Stream for Hyper-Parameter tuning. Requires "Add_file_name and_extension.cypher".
2+
3+
CALL gds.fastRP.stream(
4+
$dependencies_projection + '-cleaned', {
5+
embeddingDimension: toInteger($dependencies_projection_embedding_dimension)
6+
,randomSeed: toInteger($dependencies_projection_embedding_random_seed)
7+
,normalizationStrength: toFloat($dependencies_projection_fast_random_projection_normalization_strength)
8+
,iterationWeights: [0.0, 0.0, 1.0, toFloat($dependencies_projection_fast_random_projection_forth_iteration_weight)]
9+
,relationshipWeightProperty: $dependencies_projection_weight_property
10+
}
11+
)
12+
YIELD nodeId, embedding
13+
WITH gds.util.asNode(nodeId) AS codeUnit
14+
,embedding
15+
OPTIONAL MATCH (artifact:Java:Artifact)-[:CONTAINS]->(codeUnit)
16+
WITH *, artifact.name AS artifactName
17+
OPTIONAL MATCH (projectRoot:Directory)<-[:HAS_ROOT]-(proj:TS:Project)-[:CONTAINS]->(codeUnit)
18+
WITH *, last(split(projectRoot.absoluteFileName, '/')) AS projectName
19+
RETURN DISTINCT
20+
coalesce(codeUnit.fqn, codeUnit.globalFqn, codeUnit.fileName, codeUnit.signature, codeUnit.name) AS codeUnitName
21+
,codeUnit.name AS shortCodeUnitName
22+
,elementId(codeUnit) AS nodeElementId
23+
,coalesce(artifactName, projectName) AS projectName
24+
,coalesce(codeUnit.communityLeidenId, 0) AS communityId
25+
,coalesce(codeUnit.centralityPageRank, 0.01) AS centrality
26+
,embedding
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
// Node Embeddings 3c using Node2Vec: Stream. Requires "Add_file_name and_extension.cypher".
2+
3+
CALL gds.node2vec.stream(
4+
$dependencies_projection + '-cleaned', {
5+
embeddingDimension: toInteger($dependencies_projection_embedding_dimension)
6+
,randomSeed: toInteger($dependencies_projection_embedding_random_seed)
7+
,iterations: toInteger($dependencies_projection_node2vec_iterations)
8+
,inOutFactor: toFloat($dependencies_projection_node2vec_in_out_factor)
9+
,returnFactor: toFloat($dependencies_projection_node2vec_return_factor)
10+
,windowSize: toInteger($dependencies_projection_node2vec_window_size)
11+
,walksPerNode: toInteger($dependencies_projection_node2vec_walks_per_node)
12+
,walkLength: toInteger($dependencies_projection_node2vec_walk_length)
13+
,negativeSamplingRate: toInteger($dependencies_projection_node2vec_negative_sampling_rate)
14+
,positiveSamplingFactor: toFloat($dependencies_projection_node2vec_positive_sampling_factor)
15+
,relationshipWeightProperty: $dependencies_projection_weight_property
16+
}
17+
)
18+
YIELD nodeId, embedding
19+
WITH gds.util.asNode(nodeId) AS codeUnit
20+
,embedding
21+
OPTIONAL MATCH (artifact:Java:Artifact)-[:CONTAINS]->(codeUnit)
22+
WITH *, artifact.name AS artifactName
23+
OPTIONAL MATCH (projectRoot:Directory)<-[:HAS_ROOT]-(proj:TS:Project)-[:CONTAINS]->(codeUnit)
24+
WITH *, last(split(projectRoot.absoluteFileName, '/')) AS projectName
25+
RETURN DISTINCT
26+
coalesce(codeUnit.fqn, codeUnit.globalFqn, codeUnit.fileName, codeUnit.signature, codeUnit.name) AS codeUnitName
27+
,codeUnit.name AS shortCodeUnitName
28+
,elementId(codeUnit) AS nodeElementId
29+
,coalesce(artifactName, projectName) AS projectName
30+
,coalesce(codeUnit.communityLeidenId, 0) AS communityId
31+
,coalesce(codeUnit.centralityPageRank, 0.01) AS centrality
32+
,embedding

0 commit comments

Comments
 (0)