From ab73271781e8e9a0715f784936df4b371d64c338 Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Mon, 6 May 2024 12:53:34 +0200 Subject: [PATCH 01/16] Add Config parameters and basic top-level method 'get.commit.network' will delegate calls to corresponding methods, depending on 'commit.relation' config parameter in NetworkConf Signed-off-by: Leo Sendelbach --- util-conf.R | 12 ++++++++++++ util-networks.R | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+) diff --git a/util-conf.R b/util-conf.R index d1b8c0c8..35e5303e 100644 --- a/util-conf.R +++ b/util-conf.R @@ -837,6 +837,18 @@ NetworkConf = R6::R6Class("NetworkConf", inherit = Conf, allowed = c(TRUE, FALSE), allowed.number = 1 ), + commit.relation = list( + default = "cochange", + type = "character", + allowed = c("cochange", "commit.interaction"), + allowed.number = Inf + ), + commit.directed = list( + default = FALSE, + type = "logical", + allowed = c(TRUE, FALSE), + allowed.number = 1 + ), edges.for.base.artifacts = list( default = TRUE, type = "logical", diff --git a/util-networks.R b/util-networks.R index a9b19e11..e4581fac 100644 --- a/util-networks.R +++ b/util-networks.R @@ -44,6 +44,7 @@ requireNamespace("lubridate") # for date conversion ## vertex types TYPE.AUTHOR = "Author" TYPE.ARTIFACT = "Artifact" +TYPE.COMMIT = "Commit" ## edge types TYPE.EDGES.INTRA = "Unipartite" @@ -929,6 +930,51 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", return(net) }, + #' Get the generic commit network. + #' + #' @return the generic artifact network + get.commit.network = function() { + logging::loginfo("Constructing artifact network.") + + ## construct network + relations = private$network.conf$get.value("commit.relation") + networks = lapply(relations, function(relation) { + network = switch( + relation, + cochange = private$get.commit.network.cochange(), + commit.interaction = private$get.commit.network.commit.interaction(), + stop(sprintf("The artifact relation '%s' does not exist.", relation)) + ) + + ## set edge attributes on all edges + igraph::E(network)$type = TYPE.EDGES.INTRA + igraph::E(network)$relation = relation + + ## set vertex attribute 'kind' on all edges, corresponding to relation + vertex.kind = private$get.vertex.kind.for.relation(relation) + network = igraph::set.vertex.attribute(network, "kind", value = vertex.kind) + + return(network) + }) + net = merge.networks(networks) + + ## set vertex and edge attributes for identifaction + igraph::V(net)$type = TYPE.COMMIT + + ## simplify network if wanted + if (private$network.conf$get.value("simplify")) { + net = simplify.network(net, simplify.multiple.relations = + private$network.conf$get.value("simplify.multiple.relations")) + } + + ## add range attribute for later analysis (if available) + if ("RangeData" %in% class(private$proj.data)) { + attr(net, "range") = private$proj.data$get.range() + } + + return(net) + }, + #' Get the (real) bipartite network. #' #' @return the bipartite network From 3ed87e9e5cffe247ab9bc3b4ae04d4fb5e838261 Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Mon, 6 May 2024 14:15:26 +0200 Subject: [PATCH 02/16] Add functions for network construction functions 'get.commit.network.cochange' and 'get.commit.network.commit.interaction' are called in 'get.commit.network'. Also add 'group.commits.by.data.column', a helper function used in constructing the cochange commit network. Signed-off-by: Leo Sendelbach --- util-data.R | 29 ++++++++++++++++ util-networks.R | 91 +++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 117 insertions(+), 3 deletions(-) diff --git a/util-data.R b/util-data.R index 988146a5..8d68765f 100644 --- a/util-data.R +++ b/util-data.R @@ -2143,6 +2143,35 @@ ProjectData = R6::R6Class("ProjectData", return(mylist) }, + ## * * processed data ---------------------------------------------- + + #' Group the commits of the given \code{data.source} by the given \code{group.column}. + #' For each group, the column \code{"hash"} is duplicated and prepended to each + #' group's data as first column (see below for details). + #' + #' Example: To obtain the commits that changed the same source-code artifact, + #' call \code{group.commits.by.data.column("commits", "artifact")}. + #' + #' @param data.source The specified data source. One of \code{"commits"}, + #' \code{"mails"}, and \code{"issues"}. [default: "commits"] + #' @param group.column The column to group the commits of the given \code{data.source} by + #' [default: "artifact"] + #' + #' @return a list mapping each distinct item in \code{group.column} to all corresponding + #' data items from \code{data.source}, with the column \code{"hash"} duplicated + #' as first column (with name \code{"data.vertices"}) + #' + #' @seealso ProjectData$group.data.by.column + group.commits.by.data.column = function(data.source = c("commits", "mails", "issues"), + group.column = "artifact") { + logging::loginfo("Grouping commits by data column.") + + ## store the commits per group that is determined by 'group.column' + mylist = self$group.data.by.column(data.source, group.column, "hash") + + return(mylist) + }, + #' Group the authors of the given \code{data.source} by the given \code{group.column}. #' For each group, the column \code{"author.name"} is duplicated and prepended to each #' group's data as first column (see below for details). diff --git a/util-networks.R b/util-networks.R index e4581fac..8e221021 100644 --- a/util-networks.R +++ b/util-networks.R @@ -123,6 +123,8 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", artifacts.network.callgraph = NULL, # igraph artifacts.network.mail = NULL, # igraph artifacts.network.issue = NULL, # igraph + commit.network.commit.interaction = NULL, #igraph + commit.network.cochange = NULL, #igraph ## * * relation-to-vertex-kind mapping ----------------------------- @@ -680,6 +682,87 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", return(artifacts.net) }, + #' Build and get the commit network with commit-interactions as the relation. + #' + #' @return the commit-interaction commit network + get.commit.network.commit.interaction = function() { + + logging::logdebug("get.commit.network.commit.interaction: starting.") + + ## do not compute anything more than once + if (!is.null(private$commit.network.commit.interaction)) { + logging::logdebug("get.commit.network.commit.interaction: finished. (already existing)") + return(private$commit.network.commit.interaction) + } + + ## get the authors that appear in the commit-interaction data as the vertices of the network + vertices = unique(c(private$proj.data$get.commit.interactions()[["base.hash"]], + private$proj.data$get.commit.interactions()[["commit.hash"]])) + vertices = data.frame(name = vertices) + + ## get the commit-interaction data as the edge data of the network + edges = private$proj.data$get.commit.interactions() + ## set the commits as the 'to' and 'from' of the network and order the dataframe + edges = edges[, c("base.hash", "commit.hash", "func", "interacting.author", + "file", "base.author", "base.func", "base.file")] + colnames(edges)[1] = "to" + colnames(edges)[2] = "from" + commit.net.data = list(vertices = vertices, edges = edges) + ## construct the network + commit.net = construct.network.from.edge.list( + commit.net.data[["vertices"]], + commit.net.data[["edges"]], + network.conf = private$network.conf, + directed = private$network.conf$get.value("commit.directed"), + available.edge.attributes = private$proj.data$ + get.data.columns.for.data.source("commit.interactions") + ) + + private$commit.network.commit.interaction = commit.net + logging::logdebug("get.commit.network.commit.interaction: finished.") + + return(commit.net) + }, + + #' Get the co-change-based commit network, + #' If it does not already exist build it first. + #' + #' @return the commit network with cochange realtion + get.commit.network.cochange = function() { + + logging::logdebug("get.commit.network.cochange: starting.") + + ## do not compute anything more than once + if (!is.null(private$artifacts.network.cochange)) { + logging::logdebug("get.commit.network.cochange: finished. (already existing)") + return(private$commit.network.cochange) + } + + ## construct edge list based on commit--artifact data + commit.net.data.raw = private$proj.data$group.commits.by.data.column("commits", "artifact") + commit.net.data = construct.edge.list.from.key.value.list( + commit.net.data.raw, + network.conf = private$network.conf, + directed = FALSE, + respect.temporal.order = TRUE + ) + + ## construct network from obtained data + commit.net = construct.network.from.edge.list( + commit.net.data[["vertices"]], + commit.net.data[["edges"]], + network.conf = private$network.conf, + directed = FALSE, + available.edge.attributes = private$proj.data$get.data.columns.for.data.source("commits") + ) + + ## store network + private$commit.network.cochange = commit.net + logging::logdebug("get.commit.network.cochange: finished.") + + return(commit.net) + }, + ## * * bipartite relations ------------------------------------------ #' Get the key-value data for the bipartite relations, @@ -754,6 +837,8 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", private$artifacts.network.cochange = NULL private$artifacts.network.issue = NULL private$artifacts.network.mail = NULL + private$commit.network.commit.interaction = NULL + private$commit.network.cochange = NULL private$proj.data = private$proj.data.original if (private$network.conf$get.value("unify.date.ranges")) { private$cut.data.to.same.timestamps() @@ -932,9 +1017,9 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", #' Get the generic commit network. #' - #' @return the generic artifact network + #' @return the generic commit network get.commit.network = function() { - logging::loginfo("Constructing artifact network.") + logging::loginfo("Constructing commit network.") ## construct network relations = private$network.conf$get.value("commit.relation") @@ -943,7 +1028,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", relation, cochange = private$get.commit.network.cochange(), commit.interaction = private$get.commit.network.commit.interaction(), - stop(sprintf("The artifact relation '%s' does not exist.", relation)) + stop(sprintf("The commit relation '%s' does not exist.", relation)) ) ## set edge attributes on all edges From 93b551875d46b8e0eba415871eb79746bba81e72 Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Wed, 15 May 2024 12:36:29 +0200 Subject: [PATCH 03/16] Add test file for commit network Also add first test for commit-interaction based commit network and fixed a minoir error in network creation Signed-off-by: Leo Sendelbach --- tests/test-networks-commit.R | 89 ++++++++++++++++++++++++++++++++++++ util-networks.R | 2 +- 2 files changed, 90 insertions(+), 1 deletion(-) create mode 100644 tests/test-networks-commit.R diff --git a/tests/test-networks-commit.R b/tests/test-networks-commit.R new file mode 100644 index 00000000..c5b310cf --- /dev/null +++ b/tests/test-networks-commit.R @@ -0,0 +1,89 @@ +## This file is part of coronet, which is free software: you +## can redistribute it and/or modify it under the terms of the GNU General +## Public License as published by the Free Software Foundation, version 2. +## +## This program is distributed in the hope that it will be useful, +## but WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +## GNU General Public License for more details. +## +## You should have received a copy of the GNU General Public License along +## with this program; if not, write to the Free Software Foundation, Inc., +## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +## +## Copyright 2024 by Leo Sendelbach + +## All Rights Reserved. + + +context("Network-building functionality.") + +## +## Context +## + +CF.DATA = file.path(".", "codeface-data") +CF.SELECTION.PROCESS = "testing" +CASESTUDY = "test" +ARTIFACT = "feature" # function, feature, file, featureexpression + +## use only when debugging this file independently +if (!dir.exists(CF.DATA)) CF.DATA = file.path(".", "tests", "codeface-data") + + +## +## Tests for author.all.authors and author.only.committers +## + + + +patrick::with_parameters_test_that("Network construction with commit-interactions as relation", { + ## configuration object for the datapath + proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, "file") + proj.conf$update.value("commit.interactions", TRUE) + proj.conf$update.value("commit.interactions.filter.global", FALSE) + proj.data = ProjectData$new(project.conf = proj.conf) + + net.conf = NetworkConf$new() + net.conf$update.values(updated.values = list(commit.relation = "commit.interaction", + commit.directed = test.directed)) + + network.builder = NetworkBuilder$new(project.data = proj.data, network.conf = net.conf) + network.built = network.builder$get.commit.network() + ## build the expected network + vertices = data.frame( + name = c("3a0ed78458b3976243db6829f63eba3eead26774", + "0a1a5c523d835459c42f33e863623138555e2526", + "1143db502761379c2bfcecc2007fc34282e7ee61", + "418d1dc4929ad1df251d2aeb833dd45757b04a6f", + "5a5ec9675e98187e1e92561e1888aa6f04faa338", + "d01921773fae4bed8186b0aa411d6a2f7a6626e6"), + kind = TYPE.COMMIT, + type = TYPE.COMMIT + ) + edges = data.frame( + base.hash = c("3a0ed78458b3976243db6829f63eba3eead26774", + "0a1a5c523d835459c42f33e863623138555e2526", + "1143db502761379c2bfcecc2007fc34282e7ee61", + "0a1a5c523d835459c42f33e863623138555e2526"), + hash = c("0a1a5c523d835459c42f33e863623138555e2526", + "418d1dc4929ad1df251d2aeb833dd45757b04a6f", + "5a5ec9675e98187e1e92561e1888aa6f04faa338", + "d01921773fae4bed8186b0aa411d6a2f7a6626e6"), + func = c("GLOBAL", "test2.c::test2", "GLOBAL", "test2.c::test2"), + interacting.author = c("Thomas", "Karl", "Olaf", "Thomas"), + file = c("GLOBAL", "test2.c", "GLOBAL", "test2.c"), + base.author = c("Olaf", "Thomas", "Karl", "Thomas"), + base.func = c("test2.c::test2", "test2.c::test2", + "test3.c::test_function", "test2.c::test2"), + base.file = c("test2.c", "test2.c", "test3.c", "test2.c"), + weight = c(1, 1, 1, 1), + type = c(TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA), + relation = c("commit.interaction", "commit.interaction", "commit.interaction", "commit.interaction") + ) + network = igraph::graph.data.frame(edges, directed = test.directed, vertices = vertices) + expect_true(igraph::identical_graphs(network.built, network)) +}, patrick::cases( + "directed: FALSE" = list(test.directed = FALSE), + "directed: TRUE" = list(test.directed = TRUE) +)) \ No newline at end of file diff --git a/util-networks.R b/util-networks.R index 8e221021..1a4de64a 100644 --- a/util-networks.R +++ b/util-networks.R @@ -1036,7 +1036,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", igraph::E(network)$relation = relation ## set vertex attribute 'kind' on all edges, corresponding to relation - vertex.kind = private$get.vertex.kind.for.relation(relation) + vertex.kind = TYPE.COMMIT network = igraph::set.vertex.attribute(network, "kind", value = vertex.kind) return(network) From dd90d9a8e2e521ca04fd7a659b1e0e6bb6fd622a Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Wed, 15 May 2024 13:34:32 +0200 Subject: [PATCH 04/16] Change how vertex kind is initialized Initializing vertex kind to 'TYPE.COMMIT' in the correct position Signed-off-by: Leo Sendelbach --- util-networks.R | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/util-networks.R b/util-networks.R index 1a4de64a..bee20195 100644 --- a/util-networks.R +++ b/util-networks.R @@ -1035,15 +1035,12 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", igraph::E(network)$type = TYPE.EDGES.INTRA igraph::E(network)$relation = relation - ## set vertex attribute 'kind' on all edges, corresponding to relation - vertex.kind = TYPE.COMMIT - network = igraph::set.vertex.attribute(network, "kind", value = vertex.kind) - return(network) }) net = merge.networks(networks) ## set vertex and edge attributes for identifaction + igraph::V(net)$kind = TYPE.COMMIT igraph::V(net)$type = TYPE.COMMIT ## simplify network if wanted From 8e863a2fa26dc6f4ddd0d242712627061dae283c Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Tue, 21 May 2024 15:18:53 +0200 Subject: [PATCH 05/16] Add tests and minor fixes for cochange network Tests for each artifact type, parameterized for directed attribute Signed-off-by: Leo Sendelbach --- tests/test-networks-commit.R | 136 ++++++++++++++++++++++++++++++++++- util-networks.R | 6 +- 2 files changed, 139 insertions(+), 3 deletions(-) diff --git a/tests/test-networks-commit.R b/tests/test-networks-commit.R index c5b310cf..249accfc 100644 --- a/tests/test-networks-commit.R +++ b/tests/test-networks-commit.R @@ -25,7 +25,6 @@ context("Network-building functionality.") CF.DATA = file.path(".", "codeface-data") CF.SELECTION.PROCESS = "testing" CASESTUDY = "test" -ARTIFACT = "feature" # function, feature, file, featureexpression ## use only when debugging this file independently if (!dir.exists(CF.DATA)) CF.DATA = file.path(".", "tests", "codeface-data") @@ -82,6 +81,141 @@ patrick::with_parameters_test_that("Network construction with commit-interaction relation = c("commit.interaction", "commit.interaction", "commit.interaction", "commit.interaction") ) network = igraph::graph.data.frame(edges, directed = test.directed, vertices = vertices) + expect_true(igraph::identical_graphs(network.built, network)) +}, patrick::cases( + "directed: FALSE" = list(test.directed = FALSE), + "directed: TRUE" = list(test.directed = TRUE) +)) + +patrick::with_parameters_test_that("Network construction with cochange as relation, file as artifact", { + ## configuration object for the datapath + proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, "file") + proj.data = ProjectData$new(project.conf = proj.conf) + + net.conf = NetworkConf$new() + net.conf$update.values(updated.values = list(commit.relation = "cochange", + commit.directed = test.directed)) + + network.builder = NetworkBuilder$new(project.data = proj.data, network.conf = net.conf) + network.built = network.builder$get.commit.network() + ## build the expected network + vertices = data.frame( + name = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", + "5a5ec9675e98187e1e92561e1888aa6f04faa338", + "3a0ed78458b3976243db6829f63eba3eead26774", + "0a1a5c523d835459c42f33e863623138555e2526", + "1143db502761379c2bfcecc2007fc34282e7ee61"), + kind = TYPE.COMMIT, + type = TYPE.COMMIT + ) + edges = data.frame( + from = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "3a0ed78458b3976243db6829f63eba3eead26774"), + to = c("5a5ec9675e98187e1e92561e1888aa6f04faa338", "0a1a5c523d835459c42f33e863623138555e2526"), + artifact.type = c("File", "File"), + artifact = c("test.c", "test2.c"), + weight = c(1, 1), + type = c(TYPE.EDGES.INTRA, TYPE.EDGES.INTRA), + relation = c("cochange", "cochange") + ) + + if (test.directed) { + edges <- edges[, c(2, 1, 3, 4, 5, 6, 7), ] + } + network = igraph::graph.data.frame(edges, directed = test.directed, vertices = vertices) + + expect_true(igraph::identical_graphs(network.built, network)) +}, patrick::cases( + "directed: FALSE" = list(test.directed = FALSE), + "directed: TRUE" = list(test.directed = TRUE) +)) + +patrick::with_parameters_test_that("Network construction with cochange as relation, function as artifact", { + ## configuration object for the datapath + proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, "function") + proj.conf$update.value("commits.filter.base.artifact", FALSE) + proj.data = ProjectData$new(project.conf = proj.conf) + + net.conf = NetworkConf$new() + net.conf$update.values(updated.values = list(commit.relation = "cochange", + commit.directed = test.directed)) + + network.builder = NetworkBuilder$new(project.data = proj.data, network.conf = net.conf) + network.built = network.builder$get.commit.network() + ## build the expected network + vertices = data.frame( + name = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", + "5a5ec9675e98187e1e92561e1888aa6f04faa338", + "3a0ed78458b3976243db6829f63eba3eead26774", + "0a1a5c523d835459c42f33e863623138555e2526", + "1143db502761379c2bfcecc2007fc34282e7ee61"), + kind = TYPE.COMMIT, + type = TYPE.COMMIT + ) + edges = data.frame( + from = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", + "5a5ec9675e98187e1e92561e1888aa6f04faa338", "72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", + "5a5ec9675e98187e1e92561e1888aa6f04faa338", "3a0ed78458b3976243db6829f63eba3eead26774"), + to = c("5a5ec9675e98187e1e92561e1888aa6f04faa338", "3a0ed78458b3976243db6829f63eba3eead26774", + "3a0ed78458b3976243db6829f63eba3eead26774", "0a1a5c523d835459c42f33e863623138555e2526", + "0a1a5c523d835459c42f33e863623138555e2526", "0a1a5c523d835459c42f33e863623138555e2526"), + artifact.type = c("Function", "Function", "Function", "Function", "Function", "Function"), + artifact = c("File_Level", "File_Level", "File_Level", "File_Level", "File_Level", "File_Level"), + weight = c(1, 1, 1, 1, 1, 1), + type = c(TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, + TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA), + relation = c("cochange", "cochange", "cochange", "cochange", "cochange", "cochange") + ) + + if (test.directed) { + edges <- edges[, c(2, 1, 3, 4, 5, 6, 7), ] + } + network = igraph::graph.data.frame(edges, directed = test.directed, vertices = vertices) + + expect_true(igraph::identical_graphs(network.built, network)) +}, patrick::cases( + "directed: FALSE" = list(test.directed = FALSE), + "directed: TRUE" = list(test.directed = TRUE) +)) + +patrick::with_parameters_test_that("Network construction with cochange as relation, feature as artifact", { + ## configuration object for the datapath + proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, "feature") + proj.conf$update.value("commits.filter.base.artifact", FALSE) + proj.data = ProjectData$new(project.conf = proj.conf) + + net.conf = NetworkConf$new() + net.conf$update.values(updated.values = list(commit.relation = "cochange", + commit.directed = test.directed)) + + network.builder = NetworkBuilder$new(project.data = proj.data, network.conf = net.conf) + network.built = network.builder$get.commit.network() + ## build the expected network + vertices = data.frame( + name = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", + "5a5ec9675e98187e1e92561e1888aa6f04faa338", + "3a0ed78458b3976243db6829f63eba3eead26774", + "1143db502761379c2bfcecc2007fc34282e7ee61", + "0a1a5c523d835459c42f33e863623138555e2526"), + kind = TYPE.COMMIT, + type = TYPE.COMMIT + ) + edges = data.frame( + from = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "3a0ed78458b3976243db6829f63eba3eead26774", + "3a0ed78458b3976243db6829f63eba3eead26774", "1143db502761379c2bfcecc2007fc34282e7ee61"), + to = c("5a5ec9675e98187e1e92561e1888aa6f04faa338", "1143db502761379c2bfcecc2007fc34282e7ee61", + "0a1a5c523d835459c42f33e863623138555e2526", "0a1a5c523d835459c42f33e863623138555e2526"), + artifact.type = c("Feature", "Feature", "Feature", "Feature"), + artifact = c("A", "Base_Feature", "Base_Feature", "Base_Feature"), + weight = c(1, 1, 1, 1), + type = c(TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA), + relation = c("cochange", "cochange", "cochange", "cochange") + ) + + if (test.directed) { + edges <- edges[, c(2, 1, 3, 4, 5, 6, 7), ] + } + network = igraph::graph.data.frame(edges, directed = test.directed, vertices = vertices) + expect_true(igraph::identical_graphs(network.built, network)) }, patrick::cases( "directed: FALSE" = list(test.directed = FALSE), diff --git a/util-networks.R b/util-networks.R index bee20195..9e6998a6 100644 --- a/util-networks.R +++ b/util-networks.R @@ -743,16 +743,18 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", commit.net.data = construct.edge.list.from.key.value.list( commit.net.data.raw, network.conf = private$network.conf, - directed = FALSE, + directed = private$network.conf$get.value("commit.directed"), respect.temporal.order = TRUE ) + commit.net.data$edges <- commit.net.data$edges[, -which(colnames(commit.net.data$edges) + %in% c("date", "hash", "file"))] ## construct network from obtained data commit.net = construct.network.from.edge.list( commit.net.data[["vertices"]], commit.net.data[["edges"]], network.conf = private$network.conf, - directed = FALSE, + directed = private$network.conf$get.value("commit.directed"), available.edge.attributes = private$proj.data$get.data.columns.for.data.source("commits") ) From 175d385eb67792a284216de698bf7980eeac0d35 Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Tue, 21 May 2024 17:28:24 +0200 Subject: [PATCH 06/16] Add commit network to 'get.networks' Commit Network now also built when calling function 'get.networks'. Signed-off-by: Leo Sendelbach --- util-networks.R | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/util-networks.R b/util-networks.R index 9e6998a6..e3f7e5dc 100644 --- a/util-networks.R +++ b/util-networks.R @@ -1180,12 +1180,15 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", authors.net = self$get.author.network() ## artifact relation artifacts.net = self$get.artifact.network() + ## commit relation + commit.net = self$get.commit.network() return(list( "authors.to.artifacts" = authors.to.artifacts, "bipartite.net" = bipartite.net, "authors.net" = authors.net, - "artifacts.net" = artifacts.net + "artifacts.net" = artifacts.net, + "commit.net" = commit.net )) }, From f9b329319e04471a9bc252a2a3541d9bfca9185c Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Wed, 29 May 2024 12:33:27 +0200 Subject: [PATCH 07/16] Add commit network to showcase show how to construct commit network in showcase. Also fixed bug that resulted in showcase crashing. Signed-off-by: Leo Sendelbach --- showcase.R | 9 ++++++++- util-networks.R | 2 +- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/showcase.R b/showcase.R index 74da2497..d115c1c7 100644 --- a/showcase.R +++ b/showcase.R @@ -24,6 +24,7 @@ ## Copyright 2021 by Niklas Schneider ## Copyright 2022 by Jonathan Baumann ## Copyright 2024 by Maximilian Löffler +## Copyright 2024 by Leo Sendelbach ## All Rights Reserved. @@ -65,6 +66,7 @@ ARTIFACT = "feature" # function, feature, file, featureexpression (only relevant AUTHOR.RELATION = "mail" # mail, cochange, issue ARTIFACT.RELATION = "cochange" # cochange, callgraph, mail, issue +COMMIT.RELATION = "commit.interaction" # commit.interaction, cochange ## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / @@ -73,13 +75,16 @@ ARTIFACT.RELATION = "cochange" # cochange, callgraph, mail, issue ## initialize project configuration proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, ARTIFACT) proj.conf$update.value("commits.filter.base.artifact", TRUE) +proj.conf$update.value("commit.interactions", TRUE) ## specify that custom event timestamps should be read from 'custom-events.list' proj.conf$update.value("custom.event.timestamps.file", "custom-events.list") proj.conf$print() ## initialize network configuration net.conf = NetworkConf$new() -net.conf$update.values(updated.values = list(author.relation = AUTHOR.RELATION, artifact.relation = ARTIFACT.RELATION)) +net.conf$update.values(updated.values = list(author.relation = AUTHOR.RELATION, + artifact.relation = ARTIFACT.RELATION, + commit.relation = COMMIT.RELATION)) net.conf$print() ## get ranges @@ -141,6 +146,7 @@ x$get.author.network() x$update.network.conf(updated.values = list(author.directed = FALSE)) x$get.author.network() x$get.artifact.network() +x$get.commit.network() x$reset.environment() x$get.networks() x$update.network.conf(updated.values = list(author.only.committers = FALSE, author.directed = FALSE)) @@ -201,6 +207,7 @@ y$update.network.conf(updated.values = list(edge.attributes = c("date"))) y$get.author.network() y$update.network.conf(updated.values = list(edge.attributes = c("hash"))) y$get.artifact.network() +y$get.commit.network() y$get.networks() y$update.network.conf(updated.values = list(author.only.committers = FALSE, author.directed = TRUE)) h = y$get.bipartite.network() diff --git a/util-networks.R b/util-networks.R index e3f7e5dc..b54e4e24 100644 --- a/util-networks.R +++ b/util-networks.R @@ -733,7 +733,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", logging::logdebug("get.commit.network.cochange: starting.") ## do not compute anything more than once - if (!is.null(private$artifacts.network.cochange)) { + if (!is.null(private$commit.network.cochange)) { logging::logdebug("get.commit.network.cochange: finished. (already existing)") return(private$commit.network.cochange) } From 05c3bc09cb1d396fd59c34a88030cdca58fd04dd Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Wed, 19 Jun 2024 15:45:07 +0200 Subject: [PATCH 08/16] Add date attribute to vertices In this process, also refactor 'construct.edge.list.from.key.value.list' method. Some more comments might be necessary. Signed-off-by: Leo Sendelbach --- tests/test-networks-commit.R | 15 ++ util-networks.R | 284 +++++++++++++++++++++-------------- 2 files changed, 190 insertions(+), 109 deletions(-) diff --git a/tests/test-networks-commit.R b/tests/test-networks-commit.R index 249accfc..d8023e3b 100644 --- a/tests/test-networks-commit.R +++ b/tests/test-networks-commit.R @@ -105,6 +105,11 @@ patrick::with_parameters_test_that("Network construction with cochange as relati "3a0ed78458b3976243db6829f63eba3eead26774", "0a1a5c523d835459c42f33e863623138555e2526", "1143db502761379c2bfcecc2007fc34282e7ee61"), + date = c("2016-07-12 15:58:59", + "2016-07-12 16:00:45", + "2016-07-12 16:05:41", + "2016-07-12 16:06:32", + "2016-07-12 16:06:10"), kind = TYPE.COMMIT, type = TYPE.COMMIT ) @@ -148,6 +153,11 @@ patrick::with_parameters_test_that("Network construction with cochange as relati "3a0ed78458b3976243db6829f63eba3eead26774", "0a1a5c523d835459c42f33e863623138555e2526", "1143db502761379c2bfcecc2007fc34282e7ee61"), + date = c("2016-07-12 15:58:59", + "2016-07-12 16:00:45", + "2016-07-12 16:05:41", + "2016-07-12 16:06:32", + "2016-07-12 16:06:10"), kind = TYPE.COMMIT, type = TYPE.COMMIT ) @@ -196,6 +206,11 @@ patrick::with_parameters_test_that("Network construction with cochange as relati "3a0ed78458b3976243db6829f63eba3eead26774", "1143db502761379c2bfcecc2007fc34282e7ee61", "0a1a5c523d835459c42f33e863623138555e2526"), + date = c("2016-07-12 15:58:59", + "2016-07-12 16:00:45", + "2016-07-12 16:05:41", + "2016-07-12 16:06:10", + "2016-07-12 16:06:32"), kind = TYPE.COMMIT, type = TYPE.COMMIT ) diff --git a/util-networks.R b/util-networks.R index b54e4e24..cfaece00 100644 --- a/util-networks.R +++ b/util-networks.R @@ -355,7 +355,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", network.conf = private$network.conf, directed = FALSE, respect.temporal.order = TRUE, - artifact.edges = TRUE + network.type = "artifact" ) ## construct network from obtained data @@ -740,15 +740,15 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", ## construct edge list based on commit--artifact data commit.net.data.raw = private$proj.data$group.commits.by.data.column("commits", "artifact") + commit.net.data = construct.edge.list.from.key.value.list( commit.net.data.raw, network.conf = private$network.conf, directed = private$network.conf$get.value("commit.directed"), - respect.temporal.order = TRUE + respect.temporal.order = TRUE, + network.type = "commit" ) - commit.net.data$edges <- commit.net.data$edges[, -which(colnames(commit.net.data$edges) - %in% c("date", "hash", "file"))] ## construct network from obtained data commit.net = construct.network.from.edge.list( commit.net.data[["vertices"]], @@ -1318,14 +1318,16 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", #' i.e., whether to only add edges from the later event to the previous one. #' If \code{NA} is passed, the default value is taken. #' [default: directed] -#' @param artifact.edges whether the key value data represents edges in an artifact network based -#' on the cochange relation -#' [default: FALSE] +#' @param network.type the type of network for which the key value data is to be used as edges [default: "author"] #' #' @return a list of two data.frames named 'vertices' and 'edges' (compatible with return value #' of \code{igraph::as.data.frame}) construct.edge.list.from.key.value.list = function(list, network.conf, directed = FALSE, - respect.temporal.order = directed, artifact.edges = FALSE) { + respect.temporal.order = directed, + network.type = c("author", "artifact", "commit")) { + + network.type = match.arg.or.default(network.type, default = "author", several.ok = FALSE) + logging::loginfo("Create edges.") logging::logdebug("construct.edge.list.from.key.value.list: starting.") @@ -1347,7 +1349,7 @@ construct.edge.list.from.key.value.list = function(list, network.conf, directed ## replace it with the \code{author.name} attribute as artifacts cannot cause ## edges in artifact networks, authors can edge.attributes = network.conf$get.value("edge.attributes") - if (artifact.edges) { + if (network.type == "artifact") { artifact.index = match("artifact", edge.attributes, nomatch = NA) if (!is.na(artifact.index)) { edge.attributes = edge.attributes[-artifact.index] @@ -1355,138 +1357,202 @@ construct.edge.list.from.key.value.list = function(list, network.conf, directed } } + ## if edges in a commit network contain 'date', 'hash' or 'file' attributes, remove them + ## as they belong to commits, which are the vertices in commit networks + if (network.type == "commit") { + cols.which = which(edge.attributes %in% c("date", "hash", "file")) + edge.attributes <- edge.attributes[-cols.which] + } + if (respect.temporal.order) { ## for all subsets (sets), connect all items in there with the previous ones - edge.list.data = parallel::mclapply(list, function(set) { - number.edges = sum(seq_len(nrow(set)) - 1) - logging::logdebug("[%s/%s] Constructing edges for %s '%s': starting (%s edges to construct).", - match(attr(set, "group.name"), keys), keys.number, - attr(set, "group.type"), attr(set, "group.name"), number.edges) - - ## Skip artifacts with many, many edges - if (number.edges > network.conf$get.value("skip.threshold")) { - logging::logwarn("Skipping edges for %s '%s' due to amount (> %s).", - attr(set, "group.type"), attr(set, "group.name"), network.conf$get.value("skip.threshold")) - return(NULL) - } + edge.list.data = parallel::mclapply(list, construct.edges.temporal.order, network.conf, + edge.attributes, keys, keys.number, network.type) - ## queue of already processed artifacts - edge.list.set = data.frame() - vertices.processed.set = c() + edge.list = plyr::rbind.fill(edge.list.data) + vertices.processed = unlist( parallel::mclapply(edge.list.data, function(data) attr(data, "vertices.processed")) ) - ## connect the current item to all previous ones - for (item.no in seq_len(nrow(set))) { - item = set[item.no, ] + } else { - ## get vertex data - item.vertex = item[["data.vertices"]] + ## for all items in the sublists, construct the cartesian product + edge.list.data = parallel::mclapply(list, construct.edges.no.temporal.order, network.conf, + edge.attributes, keys, keys.number) - ## get edge attributes - cols.which = edge.attributes %in% colnames(item) - item.edge.attrs = item[ , edge.attributes[cols.which], drop = FALSE] + edge.list = plyr::rbind.fill(edge.list.data) + vertices.processed = unlist( parallel::mclapply(edge.list.data, function(data) attr(data, "vertices.processed")) ) - ## construct edges - combinations = expand.grid(item.vertex, vertices.processed.set, stringsAsFactors = FALSE) - if (nrow(combinations) > 0 & nrow(item.edge.attrs) == 1) { - combinations = cbind(combinations, item.edge.attrs, row.names = NULL) # add edge attributes - } - edge.list.set = rbind(edge.list.set, combinations) # add to edge list + } - ## mark current item as processed - vertices.processed.set = c(vertices.processed.set, item.vertex) - } + logging::logdebug("construct.edge.list.from.key.value.list: finished.") - ## store set of processed vertices - attr(edge.list.set, "vertices.processed") = vertices.processed.set + if (network.type == "commit") { + vertices.dates.processed = unlist( parallel::mclapply(edge.list.data, function(data) attr(data, "vertices.dates.processed")) ) + return(list( + vertices = data.frame( + name = unique(vertices.processed), + date = unique(vertices.dates.processed) + ), + edges = edge.list + )) + } else { + return(list( + vertices = data.frame( + name = unique(vertices.processed) + ), + edges = edge.list + )) + } +} - logging::logdebug("Constructing edges for %s '%s': finished.", attr(set, "group.type"), attr(set, "group.name")) +#' Constructs edge list from the given key value list respecting temporal order. +#' Helper method which is called by 'construct.edge.list.by.key.value.list'. +#' +#' @param list the given key value list +#' @param network.conf the network configuration +#' @param edge.attributes the attributes that should be on the edges of the network +#' @param keys the keays of the key value list +#' @param keys.number the amount of keys in the key value list +#' @param network.type the type of network that should be created +#' +#' @return the data for the edge list +construct.edges.temporal.order = function(set, network.conf, edge.attributes, keys, keys.number, network.type) { + number.edges = sum(seq_len(nrow(set)) - 1) + logging::logdebug("[%s/%s] Constructing edges for %s '%s': starting (%s edges to construct).", + match(attr(set, "group.name"), keys), keys.number, + attr(set, "group.type"), attr(set, "group.name"), number.edges) + + ## Skip artifacts with many, many edges + if (number.edges > network.conf$get.value("skip.threshold")) { + logging::logwarn("Skipping edges for %s '%s' due to amount (> %s).", + attr(set, "group.type"), attr(set, "group.name"), network.conf$get.value("skip.threshold")) + return(NULL) + } - return(edge.list.set) - }) + ## queue of already processed artifacts + edge.list.set = data.frame() + vertices.processed.set = c() - edge.list = plyr::rbind.fill(edge.list.data) - vertices.processed = unlist( parallel::mclapply(edge.list.data, function(data) attr(data, "vertices.processed")) ) + ## connect the current item to all previous ones + for (item.no in seq_len(nrow(set))) { + item = set[item.no, ] - } else { + ## get vertex data + item.vertex = item[["data.vertices"]] + if (network.type == "commit") { + item.vertex = data.frame(commit = item.vertex, date = get.date.string(item[["date"]])) + } - ## for all items in the sublists, construct the cartesian product - edge.list.data = parallel::mclapply(list, function(set) { - number.edges = sum(table(set[["data.vertices"]]) * (dim(table(set[["data.vertices"]])) - 1)) - logging::logdebug("[%s/%s] Constructing edges for %s '%s': starting (%s edges to construct).", - match(attr(set, "group.name"), keys), keys.number, - attr(set, "group.type"), attr(set, "group.name"), number.edges) - - ## Skip artifacts with many, many edges - if (number.edges > network.conf$get.value("skip.threshold")) { - logging::logwarn("Skipping edges for %s '%s' due to amount (> %s).", - attr(set, "group.type"), attr(set, "group.name"), network.conf$get.value("skip.threshold")) - return(NULL) - } + ## get edge attributes + cols.which = edge.attributes %in% colnames(item) + item.edge.attrs = item[ , edge.attributes[cols.which], drop = FALSE] - ## get vertex data - vertices = unique(set[["data.vertices"]]) + ## construct edges + combinations = c() + if (network.type == "commit") { + combinations = expand.grid(item.vertex[["commit"]], vertices.processed.set[["commit"]], stringsAsFactors = FALSE) + } else { + combinations = expand.grid(item.vertex, vertices.processed.set, stringsAsFactors = FALSE) + } - ## break if there is no author - if (length(vertices) < 1) { - return(NULL) - } + if (nrow(combinations) > 0 & nrow(item.edge.attrs) == 1) { + combinations = cbind(combinations, item.edge.attrs, row.names = NULL) # add edge attributes + } + edge.list.set = rbind(edge.list.set, combinations) # add to edge list + + ## mark current item as processed + if (network.type == "commit") { + vertices.processed.set = data.frame(commit = c(vertices.processed.set[["commit"]], item.vertex[["commit"]]), + date = c(vertices.processed.set[["date"]], item.vertex[["date"]])) + } else { + vertices.processed.set = c(vertices.processed.set, item.vertex) + } + } - ## if there is only one author, just create the vertex, but no edges - if (length(vertices) == 1) { - edges = data.frame() - attr(edges, "vertices.processed") = vertices # store set of processed vertices - return(edges) - } + ## store set of processed vertices + if (network.type == "commit") { + attr(edge.list.set, "vertices.processed") = vertices.processed.set[["commit"]] + attr(edge.list.set, "vertices.dates.processed") = vertices.processed.set[["date"]] + } else { + attr(edge.list.set, "vertices.processed") = vertices.processed.set + } + + logging::logdebug("Constructing edges for %s '%s': finished.", attr(set, "group.type"), attr(set, "group.name")) - ## get combinations - combinations = combn(vertices, 2) # all unique pairs of authors + return(edge.list.set) +} + +#' Constructs edge list from the given key value list not respecting temporal order. +#' Helper method which is called by 'construct.edge.list.by.key.value.list'. +#' +#' @param list the given key value list +#' @param network.conf the network configuration +#' @param edge.attributes the attributes that should be on the edges of the network +#' @param keys the keays of the key value list +#' @param keys.number the amount of keys in the key value list +#' +#' @return the data for the edge list +construct.edges.no.temporal.order = function(set, network.conf, edge.attributes, keys, keys.number) { + number.edges = sum(table(set[["data.vertices"]]) * (dim(table(set[["data.vertices"]])) - 1)) + logging::logdebug("[%s/%s] Constructing edges for %s '%s': starting (%s edges to construct).", + match(attr(set, "group.name"), keys), keys.number, + attr(set, "group.type"), attr(set, "group.name"), number.edges) + + ## Skip artifacts with many, many edges + if (number.edges > network.conf$get.value("skip.threshold")) { + logging::logwarn("Skipping edges for %s '%s' due to amount (> %s).", + attr(set, "group.type"), attr(set, "group.name"), network.conf$get.value("skip.threshold")) + return(NULL) + } - ## construct edge list - edges = apply(combinations, 2, function(comb) { + ## get vertex data + vertices = unique(set[["data.vertices"]]) - ## iterate over each of the two data vertices of the current combination to determine the edges - ## for which it is the sender of the edge and use the second one as the receiver of the edge - edges.by.comb.item = lapply(comb, function(comb.item) { - ## basic edge data - edge = data.frame(from = comb.item, to = comb[comb != comb.item]) + ## break if there is no author + if (length(vertices) < 1) { + return(NULL) + } - ## get edge attibutes - edge.attrs = set[set[["data.vertices"]] %in% comb.item, ] # get data for current combination item - cols.which = edge.attributes %in% colnames(edge.attrs) - edge.attrs = edge.attrs[ , edge.attributes[cols.which], drop = FALSE] + ## if there is only one author, just create the vertex, but no edges + if (length(vertices) == 1) { + edges = data.frame() + attr(edges, "vertices.processed") = vertices # store set of processed vertices + return(edges) + } - # add edge attributes to edge list - edgelist = cbind(edge, edge.attrs) - return(edgelist) - }) + ## get combinations + combinations = combn(vertices, 2) # all unique pairs of authors - ## union the edge lists for the combination items - edges.union = plyr::rbind.fill(edges.by.comb.item) - return(edges.union) + ## construct edge list + edges = apply(combinations, 2, function(comb) { - }) - edges = plyr::rbind.fill(edges) + ## iterate over each of the two data vertices of the current combination to determine the edges + ## for which it is the sender of the edge and use the second one as the receiver of the edge + edges.by.comb.item = lapply(comb, function(comb.item) { + ## basic edge data + edge = data.frame(from = comb.item, to = comb[comb != comb.item]) - ## store set of processed vertices - attr(edges, "vertices.processed") = vertices + ## get edge attibutes + edge.attrs = set[set[["data.vertices"]] %in% comb.item, ] # get data for current combination item + cols.which = edge.attributes %in% colnames(edge.attrs) + edge.attrs = edge.attrs[ , edge.attributes[cols.which], drop = FALSE] - return(edges) + # add edge attributes to edge list + edgelist = cbind(edge, edge.attrs) + return(edgelist) }) - edge.list = plyr::rbind.fill(edge.list.data) - vertices.processed = unlist( parallel::mclapply(edge.list.data, function(data) attr(data, "vertices.processed")) ) + ## union the edge lists for the combination items + edges.union = plyr::rbind.fill(edges.by.comb.item) + return(edges.union) - } + }) + edges = plyr::rbind.fill(edges) - logging::logdebug("construct.edge.list.from.key.value.list: finished.") + ## store set of processed vertices + attr(edges, "vertices.processed") = vertices - return(list( - vertices = data.frame( - name = unique(vertices.processed) - ), - edges = edge.list - )) + return(edges) } #' Construct a network from the given lists of vertices and edges. From cd9a930fcb54ff465c2a5a7c43cfe82ac15c134d Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Wed, 26 Jun 2024 14:50:34 +0200 Subject: [PATCH 09/16] Add new function for adding vertex attributes New function allows adding vertex attributes from commit data to commit network vertices Signed-off-by: Leo Sendelbach --- tests/test-networks-commit.R | 52 +++++++++++++++++++++++++++++++++++- util-networks-covariates.R | 35 ++++++++++++++++++++++++ 2 files changed, 86 insertions(+), 1 deletion(-) diff --git a/tests/test-networks-commit.R b/tests/test-networks-commit.R index d8023e3b..8d94bec1 100644 --- a/tests/test-networks-commit.R +++ b/tests/test-networks-commit.R @@ -235,4 +235,54 @@ patrick::with_parameters_test_that("Network construction with cochange as relati }, patrick::cases( "directed: FALSE" = list(test.directed = FALSE), "directed: TRUE" = list(test.directed = TRUE) -)) \ No newline at end of file +)) + +test_that("Adding vertex attributes to a commit network", { + ## configuration object for the datapath + proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, "feature") + proj.conf$update.value("commits.filter.base.artifact", FALSE) + proj.data = ProjectData$new(project.conf = proj.conf) + + net.conf = NetworkConf$new() + net.conf$update.values(updated.values = list(commit.relation = "cochange", + commit.directed = FALSE)) + + network.builder = NetworkBuilder$new(project.data = proj.data, network.conf = net.conf) + network.built = network.builder$get.commit.network() + network.new.attr = add.vertex.attribute.commit.network(network.built,proj.data, "author.name", "NO_AUTHOR") + ## build the expected network + vertices = data.frame( + name = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", + "5a5ec9675e98187e1e92561e1888aa6f04faa338", + "3a0ed78458b3976243db6829f63eba3eead26774", + "1143db502761379c2bfcecc2007fc34282e7ee61", + "0a1a5c523d835459c42f33e863623138555e2526"), + date = c("2016-07-12 15:58:59", + "2016-07-12 16:00:45", + "2016-07-12 16:05:41", + "2016-07-12 16:06:10", + "2016-07-12 16:06:32"), + kind = TYPE.COMMIT, + type = TYPE.COMMIT, + author.name = c("Björn", + "Olaf", + "Olaf", + "Karl", + "Thomas") + ) + edges = data.frame( + from = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "3a0ed78458b3976243db6829f63eba3eead26774", + "3a0ed78458b3976243db6829f63eba3eead26774", "1143db502761379c2bfcecc2007fc34282e7ee61"), + to = c("5a5ec9675e98187e1e92561e1888aa6f04faa338", "1143db502761379c2bfcecc2007fc34282e7ee61", + "0a1a5c523d835459c42f33e863623138555e2526", "0a1a5c523d835459c42f33e863623138555e2526"), + artifact.type = c("Feature", "Feature", "Feature", "Feature"), + artifact = c("A", "Base_Feature", "Base_Feature", "Base_Feature"), + weight = c(1, 1, 1, 1), + type = c(TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA), + relation = c("cochange", "cochange", "cochange", "cochange") + ) + + network = igraph::graph.data.frame(edges, directed = FALSE, vertices = vertices) + + expect_true(igraph::identical_graphs(network.new.attr, network)) +}) \ No newline at end of file diff --git a/util-networks-covariates.R b/util-networks-covariates.R index 95a3021a..ed9c2ea2 100644 --- a/util-networks-covariates.R +++ b/util-networks-covariates.R @@ -22,6 +22,7 @@ ## Copyright 2022 by Niklas Schneider ## Copyright 2022 by Jonathan Baumann ## Copyright 2024 by Maximilian Löffler +## Copyright 2024 by Leo Sendelbach ## All Rights Reserved. ## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / @@ -140,6 +141,40 @@ add.vertex.attribute = function(net.to.range.list, attr.name, default.value, com return(nets.with.attr) } +#' Utility function to add a vertex attribute from commit data to a commit network. +#' +#' @param network the commit network +#' @param project.data the project data from which to extract the values +#' @param attr.name the name of the attribute +#' @param default.value the dafault value of the attribute +#' if it does not occur in the commit data +#' +#' @return a networks with new vertex attribute +add.vertex.attribute.commit.network = function(network, project.data, + attr.name, default.value) { + # get the commit data and extract the required data + commit.data = project.data$get.commits() + hashes = commit.data[["hash"]] + attribute = commit.data[[attr.name]] + attribute.values = c() + for (hash.num in seq_along(igraph::V(network))) { + # for each vertex, finc the position in the data frame + hash = igraph::V(network)[hash.num]$name + hash.index = match(hash, hashes, nomatch = NA) + + value = c() + # extract the correct value from the data or use the default value + if (!is.na(hash.index)) { + value = attribute[[hash.index]] + } else { + value = default.value + } + attribute.values = c(attribute.values, value) + } + net.with.attr = igraph::set.vertex.attribute(network, attr.name, value = attribute.values) + +} + ## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / ## Author network functions ------------------------------------------------ From 21c67c1644e85a30e3a97abb93e6cfb2621e0801 Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Wed, 3 Jul 2024 15:21:22 +0200 Subject: [PATCH 10/16] Add usage of new utility method to showcase 'add.vertex.attribute.commit.network' is now used in showcase. Also minor changes to documentation and performance improvement in cochange commit network creation. Signed-off-by: Leo Sendelbach --- showcase.R | 2 ++ tests/test-networks-commit.R | 42 +++++++++++++++++++++++++++++++++++- util-networks-covariates.R | 3 +++ util-networks.R | 3 +-- 4 files changed, 47 insertions(+), 3 deletions(-) diff --git a/showcase.R b/showcase.R index d115c1c7..3d2aece7 100644 --- a/showcase.R +++ b/showcase.R @@ -239,6 +239,8 @@ sample.pull.requests = add.vertex.attribute.author.issue.count(my.networks, x.da ## add vertex attributes for the project-level network x.net.as.list = list("1970-01-01 00:00:00-2030-01-01 00:00:00" = x$get.author.network()) sample.entire = add.vertex.attribute.author.commit.count(x.net.as.list, x.data, aggregation.level = "complete") +## add vertex attributes to commit network +add.vertex.attribute.commit.network(x$get.commit.network(), x.data, "author.name", "NO_AUTHOR") ## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / diff --git a/tests/test-networks-commit.R b/tests/test-networks-commit.R index 8d94bec1..fa3f70c4 100644 --- a/tests/test-networks-commit.R +++ b/tests/test-networks-commit.R @@ -249,7 +249,7 @@ test_that("Adding vertex attributes to a commit network", { network.builder = NetworkBuilder$new(project.data = proj.data, network.conf = net.conf) network.built = network.builder$get.commit.network() - network.new.attr = add.vertex.attribute.commit.network(network.built,proj.data, "author.name", "NO_AUTHOR") + network.new.attr = add.vertex.attribute.commit.network(network.built, proj.data, "author.name", "NO_AUTHOR") ## build the expected network vertices = data.frame( name = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", @@ -285,4 +285,44 @@ test_that("Adding vertex attributes to a commit network", { network = igraph::graph.data.frame(edges, directed = FALSE, vertices = vertices) expect_true(igraph::identical_graphs(network.new.attr, network)) + + network.new.attr = add.vertex.attribute.commit.network(network.new.attr, proj.data, "commit.id", "NO_ID") + + ## build the expected network + vertices = data.frame( + name = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", + "5a5ec9675e98187e1e92561e1888aa6f04faa338", + "3a0ed78458b3976243db6829f63eba3eead26774", + "1143db502761379c2bfcecc2007fc34282e7ee61", + "0a1a5c523d835459c42f33e863623138555e2526"), + date = c("2016-07-12 15:58:59", + "2016-07-12 16:00:45", + "2016-07-12 16:05:41", + "2016-07-12 16:06:10", + "2016-07-12 16:06:32"), + kind = TYPE.COMMIT, + type = TYPE.COMMIT, + author.name = c("Björn", + "Olaf", + "Olaf", + "Karl", + "Thomas"), + commit.id = c("", "", + "", "", "") + ) + edges = data.frame( + from = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "3a0ed78458b3976243db6829f63eba3eead26774", + "3a0ed78458b3976243db6829f63eba3eead26774", "1143db502761379c2bfcecc2007fc34282e7ee61"), + to = c("5a5ec9675e98187e1e92561e1888aa6f04faa338", "1143db502761379c2bfcecc2007fc34282e7ee61", + "0a1a5c523d835459c42f33e863623138555e2526", "0a1a5c523d835459c42f33e863623138555e2526"), + artifact.type = c("Feature", "Feature", "Feature", "Feature"), + artifact = c("A", "Base_Feature", "Base_Feature", "Base_Feature"), + weight = c(1, 1, 1, 1), + type = c(TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA), + relation = c("cochange", "cochange", "cochange", "cochange") + ) + + network.two = igraph::graph.data.frame(edges, directed = FALSE, vertices = vertices) + + expect_true(igraph::identical_graphs(network.new.attr, network.two)) }) \ No newline at end of file diff --git a/util-networks-covariates.R b/util-networks-covariates.R index ed9c2ea2..1e476277 100644 --- a/util-networks-covariates.R +++ b/util-networks-covariates.R @@ -142,6 +142,9 @@ add.vertex.attribute = function(net.to.range.list, attr.name, default.value, com } #' Utility function to add a vertex attribute from commit data to a commit network. +#' Attribute name should be a column name of the commit data dataframe. +#' Default column names can be seen in 'COMMITS.LIST.COLUMNS' in 'util-read.R', +#' though more might be possible. #' #' @param network the commit network #' @param project.data the project data from which to extract the values diff --git a/util-networks.R b/util-networks.R index cfaece00..d1c49325 100644 --- a/util-networks.R +++ b/util-networks.R @@ -1462,8 +1462,7 @@ construct.edges.temporal.order = function(set, network.conf, edge.attributes, ke ## mark current item as processed if (network.type == "commit") { - vertices.processed.set = data.frame(commit = c(vertices.processed.set[["commit"]], item.vertex[["commit"]]), - date = c(vertices.processed.set[["date"]], item.vertex[["date"]])) + vertices.processed.set = rbind(vertices.processed.set, item.vertex) } else { vertices.processed.set = c(vertices.processed.set, item.vertex) } From 94207542c5407382078dfa54ae82bc4f25ccbdb9 Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Wed, 17 Jul 2024 16:48:29 +0200 Subject: [PATCH 11/16] Add missing edge attributes attribute 'date' added to cochange commit network edges, attribute artifact.type added to all networks based on commit interactions Signed-off-by: Leo Sendelbach --- tests/test-networks-artifact.R | 2 ++ tests/test-networks-author.R | 1 + tests/test-networks-commit.R | 63 +++++++++++++++++++--------------- util-networks.R | 12 +++++-- 4 files changed, 48 insertions(+), 30 deletions(-) diff --git a/tests/test-networks-artifact.R b/tests/test-networks-artifact.R index 432840fc..1d847b54 100644 --- a/tests/test-networks-artifact.R +++ b/tests/test-networks-artifact.R @@ -252,6 +252,7 @@ patrick::with_parameters_test_that("Network construction with commit-interaction "test3.c::test_function", "test2.c::test2"), base.author = c("Olaf", "Thomas", "Karl", "Thomas"), interacting.author = c("Thomas", "Karl", "Olaf", "Thomas"), + artifact.type = c("File", "File", "File", "File"), weight = c(1, 1, 1, 1), type = c(TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA), relation = c("commit.interaction", "commit.interaction", "commit.interaction", "commit.interaction") @@ -301,6 +302,7 @@ patrick::with_parameters_test_that("Network construction with commit-interaction base.file = c("test2.c", "test2.c", "test3.c", "test2.c"), base.author = c("Olaf", "Thomas", "Karl", "Thomas"), interacting.author = c("Thomas", "Karl", "Olaf", "Thomas"), + artifact.type = c("Function", "Function", "Function", "Function"), weight = c(1, 1, 1, 1), type = c(TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA), relation = c("commit.interaction", "commit.interaction", "commit.interaction", "commit.interaction") diff --git a/tests/test-networks-author.R b/tests/test-networks-author.R index 2910ba51..d343a0c5 100644 --- a/tests/test-networks-author.R +++ b/tests/test-networks-author.R @@ -720,6 +720,7 @@ patrick::with_parameters_test_that("Network construction with commit-interaction base.func = c("test2.c::test2", "test2.c::test2", "test3.c::test_function", "test2.c::test2"), base.file = c("test2.c", "test2.c", "test3.c", "test2.c"), + artifact.type = c("CommitInteraction", "CommitInteraction", "CommitInteraction", "CommitInteraction"), weight = c(1, 1, 1, 1), type = c(TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA), relation = c("commit.interaction", "commit.interaction", "commit.interaction", "commit.interaction") diff --git a/tests/test-networks-commit.R b/tests/test-networks-commit.R index fa3f70c4..8ddb87db 100644 --- a/tests/test-networks-commit.R +++ b/tests/test-networks-commit.R @@ -76,6 +76,7 @@ patrick::with_parameters_test_that("Network construction with commit-interaction base.func = c("test2.c::test2", "test2.c::test2", "test3.c::test_function", "test2.c::test2"), base.file = c("test2.c", "test2.c", "test3.c", "test2.c"), + artifact.type = c("CommitInteraction", "CommitInteraction", "CommitInteraction", "CommitInteraction"), weight = c(1, 1, 1, 1), type = c(TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA), relation = c("commit.interaction", "commit.interaction", "commit.interaction", "commit.interaction") @@ -105,17 +106,18 @@ patrick::with_parameters_test_that("Network construction with cochange as relati "3a0ed78458b3976243db6829f63eba3eead26774", "0a1a5c523d835459c42f33e863623138555e2526", "1143db502761379c2bfcecc2007fc34282e7ee61"), - date = c("2016-07-12 15:58:59", - "2016-07-12 16:00:45", - "2016-07-12 16:05:41", - "2016-07-12 16:06:32", - "2016-07-12 16:06:10"), + date = get.date.from.string(c("2016-07-12 15:58:59", + "2016-07-12 16:00:45", + "2016-07-12 16:05:41", + "2016-07-12 16:06:32", + "2016-07-12 16:06:10")), kind = TYPE.COMMIT, type = TYPE.COMMIT ) edges = data.frame( from = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "3a0ed78458b3976243db6829f63eba3eead26774"), to = c("5a5ec9675e98187e1e92561e1888aa6f04faa338", "0a1a5c523d835459c42f33e863623138555e2526"), + date = get.date.from.string(c("2016-07-12 16:00:45", "2016-07-12 16:06:32")), artifact.type = c("File", "File"), artifact = c("test.c", "test2.c"), weight = c(1, 1), @@ -124,7 +126,7 @@ patrick::with_parameters_test_that("Network construction with cochange as relati ) if (test.directed) { - edges <- edges[, c(2, 1, 3, 4, 5, 6, 7), ] + edges <- edges[, c(2, 1, 3, 4, 5, 6, 7, 8), ] } network = igraph::graph.data.frame(edges, directed = test.directed, vertices = vertices) @@ -153,11 +155,11 @@ patrick::with_parameters_test_that("Network construction with cochange as relati "3a0ed78458b3976243db6829f63eba3eead26774", "0a1a5c523d835459c42f33e863623138555e2526", "1143db502761379c2bfcecc2007fc34282e7ee61"), - date = c("2016-07-12 15:58:59", - "2016-07-12 16:00:45", - "2016-07-12 16:05:41", - "2016-07-12 16:06:32", - "2016-07-12 16:06:10"), + date = get.date.from.string(c("2016-07-12 15:58:59", + "2016-07-12 16:00:45", + "2016-07-12 16:05:41", + "2016-07-12 16:06:32", + "2016-07-12 16:06:10")), kind = TYPE.COMMIT, type = TYPE.COMMIT ) @@ -168,6 +170,8 @@ patrick::with_parameters_test_that("Network construction with cochange as relati to = c("5a5ec9675e98187e1e92561e1888aa6f04faa338", "3a0ed78458b3976243db6829f63eba3eead26774", "3a0ed78458b3976243db6829f63eba3eead26774", "0a1a5c523d835459c42f33e863623138555e2526", "0a1a5c523d835459c42f33e863623138555e2526", "0a1a5c523d835459c42f33e863623138555e2526"), + date = get.date.from.string(c("2016-07-12 16:00:45", "2016-07-12 16:05:41", "2016-07-12 16:05:41", + "2016-07-12 16:06:32", "2016-07-12 16:06:32", "2016-07-12 16:06:32")), artifact.type = c("Function", "Function", "Function", "Function", "Function", "Function"), artifact = c("File_Level", "File_Level", "File_Level", "File_Level", "File_Level", "File_Level"), weight = c(1, 1, 1, 1, 1, 1), @@ -177,7 +181,7 @@ patrick::with_parameters_test_that("Network construction with cochange as relati ) if (test.directed) { - edges <- edges[, c(2, 1, 3, 4, 5, 6, 7), ] + edges <- edges[, c(2, 1, 3, 4, 5, 6, 7, 8), ] } network = igraph::graph.data.frame(edges, directed = test.directed, vertices = vertices) @@ -206,11 +210,11 @@ patrick::with_parameters_test_that("Network construction with cochange as relati "3a0ed78458b3976243db6829f63eba3eead26774", "1143db502761379c2bfcecc2007fc34282e7ee61", "0a1a5c523d835459c42f33e863623138555e2526"), - date = c("2016-07-12 15:58:59", - "2016-07-12 16:00:45", - "2016-07-12 16:05:41", - "2016-07-12 16:06:10", - "2016-07-12 16:06:32"), + date = get.date.from.string(c("2016-07-12 15:58:59", + "2016-07-12 16:00:45", + "2016-07-12 16:05:41", + "2016-07-12 16:06:10", + "2016-07-12 16:06:32")), kind = TYPE.COMMIT, type = TYPE.COMMIT ) @@ -219,6 +223,7 @@ patrick::with_parameters_test_that("Network construction with cochange as relati "3a0ed78458b3976243db6829f63eba3eead26774", "1143db502761379c2bfcecc2007fc34282e7ee61"), to = c("5a5ec9675e98187e1e92561e1888aa6f04faa338", "1143db502761379c2bfcecc2007fc34282e7ee61", "0a1a5c523d835459c42f33e863623138555e2526", "0a1a5c523d835459c42f33e863623138555e2526"), + date = get.date.from.string(c("2016-07-12 16:00:45", "2016-07-12 16:06:10", "2016-07-12 16:06:32", "2016-07-12 16:06:32")), artifact.type = c("Feature", "Feature", "Feature", "Feature"), artifact = c("A", "Base_Feature", "Base_Feature", "Base_Feature"), weight = c(1, 1, 1, 1), @@ -227,7 +232,7 @@ patrick::with_parameters_test_that("Network construction with cochange as relati ) if (test.directed) { - edges <- edges[, c(2, 1, 3, 4, 5, 6, 7), ] + edges <- edges[, c(2, 1, 3, 4, 5, 6, 7, 8), ] } network = igraph::graph.data.frame(edges, directed = test.directed, vertices = vertices) @@ -257,11 +262,11 @@ test_that("Adding vertex attributes to a commit network", { "3a0ed78458b3976243db6829f63eba3eead26774", "1143db502761379c2bfcecc2007fc34282e7ee61", "0a1a5c523d835459c42f33e863623138555e2526"), - date = c("2016-07-12 15:58:59", - "2016-07-12 16:00:45", - "2016-07-12 16:05:41", - "2016-07-12 16:06:10", - "2016-07-12 16:06:32"), + date = get.date.from.string(c("2016-07-12 15:58:59", + "2016-07-12 16:00:45", + "2016-07-12 16:05:41", + "2016-07-12 16:06:10", + "2016-07-12 16:06:32")), kind = TYPE.COMMIT, type = TYPE.COMMIT, author.name = c("Björn", @@ -275,6 +280,7 @@ test_that("Adding vertex attributes to a commit network", { "3a0ed78458b3976243db6829f63eba3eead26774", "1143db502761379c2bfcecc2007fc34282e7ee61"), to = c("5a5ec9675e98187e1e92561e1888aa6f04faa338", "1143db502761379c2bfcecc2007fc34282e7ee61", "0a1a5c523d835459c42f33e863623138555e2526", "0a1a5c523d835459c42f33e863623138555e2526"), + date = get.date.from.string(c("2016-07-12 16:00:45", "2016-07-12 16:06:10", "2016-07-12 16:06:32", "2016-07-12 16:06:32")), artifact.type = c("Feature", "Feature", "Feature", "Feature"), artifact = c("A", "Base_Feature", "Base_Feature", "Base_Feature"), weight = c(1, 1, 1, 1), @@ -295,11 +301,11 @@ test_that("Adding vertex attributes to a commit network", { "3a0ed78458b3976243db6829f63eba3eead26774", "1143db502761379c2bfcecc2007fc34282e7ee61", "0a1a5c523d835459c42f33e863623138555e2526"), - date = c("2016-07-12 15:58:59", - "2016-07-12 16:00:45", - "2016-07-12 16:05:41", - "2016-07-12 16:06:10", - "2016-07-12 16:06:32"), + date = get.date.from.string(c("2016-07-12 15:58:59", + "2016-07-12 16:00:45", + "2016-07-12 16:05:41", + "2016-07-12 16:06:10", + "2016-07-12 16:06:32")), kind = TYPE.COMMIT, type = TYPE.COMMIT, author.name = c("Björn", @@ -315,6 +321,7 @@ test_that("Adding vertex attributes to a commit network", { "3a0ed78458b3976243db6829f63eba3eead26774", "1143db502761379c2bfcecc2007fc34282e7ee61"), to = c("5a5ec9675e98187e1e92561e1888aa6f04faa338", "1143db502761379c2bfcecc2007fc34282e7ee61", "0a1a5c523d835459c42f33e863623138555e2526", "0a1a5c523d835459c42f33e863623138555e2526"), + date = get.date.from.string(c("2016-07-12 16:00:45", "2016-07-12 16:06:10", "2016-07-12 16:06:32", "2016-07-12 16:06:32")), artifact.type = c("Feature", "Feature", "Feature", "Feature"), artifact = c("A", "Base_Feature", "Base_Feature", "Base_Feature"), weight = c(1, 1, 1, 1), diff --git a/util-networks.R b/util-networks.R index d1c49325..16d7f064 100644 --- a/util-networks.R +++ b/util-networks.R @@ -248,6 +248,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", colnames(edges)[1] = "to" colnames(edges)[2] = "from" colnames(edges)[4] = "hash" + edges = cbind(edges, data.frame(artifact.type = c("CommitInteraction"))) author.net.data = list(vertices = vertices, edges = edges) ## construct the network author.net = construct.network.from.edge.list( @@ -401,6 +402,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", edges = edges[, c("file", "base.file", "func", "commit.hash", "base.hash", "base.func", "base.author", "interacting.author")] + edges = cbind(edges, data.frame(artifact.type = c("File"))) colnames(edges)[colnames(edges) == "commit.hash"] = "hash" } else if (proj.conf.artifact == "function") { ## change the vertices to the functions from the commit-interaction data @@ -410,6 +412,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", edges = edges[, c("func", "base.func", "commit.hash", "file", "base.hash", "base.file", "base.author", "interacting.author")] + edges = cbind(edges, data.frame(artifact.type = c("Function"))) colnames(edges)[colnames(edges) == "commit.hash"] = "hash" } else { ## If neither 'function' nor 'file' was configured, send a warning @@ -705,6 +708,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", ## set the commits as the 'to' and 'from' of the network and order the dataframe edges = edges[, c("base.hash", "commit.hash", "func", "interacting.author", "file", "base.author", "base.func", "base.file")] + edges = cbind(edges, data.frame(artifact.type = c("CommitInteraction"))) colnames(edges)[1] = "to" colnames(edges)[2] = "from" commit.net.data = list(vertices = vertices, edges = edges) @@ -1360,7 +1364,7 @@ construct.edge.list.from.key.value.list = function(list, network.conf, directed ## if edges in a commit network contain 'date', 'hash' or 'file' attributes, remove them ## as they belong to commits, which are the vertices in commit networks if (network.type == "commit") { - cols.which = which(edge.attributes %in% c("date", "hash", "file")) + cols.which = which(edge.attributes %in% c("hash", "file")) edge.attributes <- edge.attributes[-cols.which] } @@ -1391,7 +1395,7 @@ construct.edge.list.from.key.value.list = function(list, network.conf, directed return(list( vertices = data.frame( name = unique(vertices.processed), - date = unique(vertices.dates.processed) + date = get.date.from.string(unique(vertices.dates.processed)) ), edges = edge.list )) @@ -1429,6 +1433,10 @@ construct.edges.temporal.order = function(set, network.conf, edge.attributes, ke return(NULL) } + if (network.type == "commit") { + set = set[order(set[["date"]]), ] + } + ## queue of already processed artifacts edge.list.set = data.frame() vertices.processed.set = c() From 860d4ee551255e3d0ff393133d0ae8ad7ff5e749 Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Thu, 25 Jul 2024 13:16:46 +0200 Subject: [PATCH 12/16] Add minor non-functional fixes to adress review Added linebreaks, fixed spelling, removed cbind Signed-off-by: Leo Sendelbach --- util-networks-covariates.R | 7 +++---- util-networks.R | 26 ++++++++++++++------------ 2 files changed, 17 insertions(+), 16 deletions(-) diff --git a/util-networks-covariates.R b/util-networks-covariates.R index 1e476277..5709126a 100644 --- a/util-networks-covariates.R +++ b/util-networks-covariates.R @@ -152,7 +152,7 @@ add.vertex.attribute = function(net.to.range.list, attr.name, default.value, com #' @param default.value the dafault value of the attribute #' if it does not occur in the commit data #' -#' @return a networks with new vertex attribute +#' @return a network with new vertex attribute add.vertex.attribute.commit.network = function(network, project.data, attr.name, default.value) { # get the commit data and extract the required data @@ -160,9 +160,8 @@ add.vertex.attribute.commit.network = function(network, project.data, hashes = commit.data[["hash"]] attribute = commit.data[[attr.name]] attribute.values = c() - for (hash.num in seq_along(igraph::V(network))) { - # for each vertex, finc the position in the data frame - hash = igraph::V(network)[hash.num]$name + for (hash in igraph::V(network)$name) { + # for each vertex, find the position in the data frame hash.index = match(hash, hashes, nomatch = NA) value = c() diff --git a/util-networks.R b/util-networks.R index 16d7f064..352794ad 100644 --- a/util-networks.R +++ b/util-networks.R @@ -248,7 +248,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", colnames(edges)[1] = "to" colnames(edges)[2] = "from" colnames(edges)[4] = "hash" - edges = cbind(edges, data.frame(artifact.type = c("CommitInteraction"))) + edges[["artifact.type"]] = "CommitInteraction" author.net.data = list(vertices = vertices, edges = edges) ## construct the network author.net = construct.network.from.edge.list( @@ -402,7 +402,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", edges = edges[, c("file", "base.file", "func", "commit.hash", "base.hash", "base.func", "base.author", "interacting.author")] - edges = cbind(edges, data.frame(artifact.type = c("File"))) + edges[["artifact.type"]] = "File" colnames(edges)[colnames(edges) == "commit.hash"] = "hash" } else if (proj.conf.artifact == "function") { ## change the vertices to the functions from the commit-interaction data @@ -412,7 +412,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", edges = edges[, c("func", "base.func", "commit.hash", "file", "base.hash", "base.file", "base.author", "interacting.author")] - edges = cbind(edges, data.frame(artifact.type = c("Function"))) + edges[["artifact.type"]] = "Function" colnames(edges)[colnames(edges) == "commit.hash"] = "hash" } else { ## If neither 'function' nor 'file' was configured, send a warning @@ -698,7 +698,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", return(private$commit.network.commit.interaction) } - ## get the authors that appear in the commit-interaction data as the vertices of the network + ## get the hashes that appear in the commit-interaction data as the vertices of the network vertices = unique(c(private$proj.data$get.commit.interactions()[["base.hash"]], private$proj.data$get.commit.interactions()[["commit.hash"]])) vertices = data.frame(name = vertices) @@ -708,7 +708,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", ## set the commits as the 'to' and 'from' of the network and order the dataframe edges = edges[, c("base.hash", "commit.hash", "func", "interacting.author", "file", "base.author", "base.func", "base.file")] - edges = cbind(edges, data.frame(artifact.type = c("CommitInteraction"))) + edges[["artifact.type"]] = "CommitInteraction" colnames(edges)[1] = "to" colnames(edges)[2] = "from" commit.net.data = list(vertices = vertices, edges = edges) @@ -1391,7 +1391,8 @@ construct.edge.list.from.key.value.list = function(list, network.conf, directed logging::logdebug("construct.edge.list.from.key.value.list: finished.") if (network.type == "commit") { - vertices.dates.processed = unlist( parallel::mclapply(edge.list.data, function(data) attr(data, "vertices.dates.processed")) ) + vertices.dates.processed = unlist( parallel::mclapply(edge.list.data, + function(data) attr(data, "vertices.dates.processed")) ) return(list( vertices = data.frame( name = unique(vertices.processed), @@ -1412,10 +1413,10 @@ construct.edge.list.from.key.value.list = function(list, network.conf, directed #' Constructs edge list from the given key value list respecting temporal order. #' Helper method which is called by 'construct.edge.list.by.key.value.list'. #' -#' @param list the given key value list +#' @param set the given key value list #' @param network.conf the network configuration #' @param edge.attributes the attributes that should be on the edges of the network -#' @param keys the keays of the key value list +#' @param keys the keys of the key value list #' @param keys.number the amount of keys in the key value list #' @param network.type the type of network that should be created #' @@ -1458,12 +1459,13 @@ construct.edges.temporal.order = function(set, network.conf, edge.attributes, ke ## construct edges combinations = c() if (network.type == "commit") { - combinations = expand.grid(item.vertex[["commit"]], vertices.processed.set[["commit"]], stringsAsFactors = FALSE) + combinations = expand.grid(item.vertex[["commit"]], + vertices.processed.set[["commit"]], stringsAsFactors = FALSE) } else { combinations = expand.grid(item.vertex, vertices.processed.set, stringsAsFactors = FALSE) } - if (nrow(combinations) > 0 & nrow(item.edge.attrs) == 1) { + if (nrow(combinations) > 0 && nrow(item.edge.attrs) == 1) { combinations = cbind(combinations, item.edge.attrs, row.names = NULL) # add edge attributes } edge.list.set = rbind(edge.list.set, combinations) # add to edge list @@ -1492,10 +1494,10 @@ construct.edges.temporal.order = function(set, network.conf, edge.attributes, ke #' Constructs edge list from the given key value list not respecting temporal order. #' Helper method which is called by 'construct.edge.list.by.key.value.list'. #' -#' @param list the given key value list +#' @param set the given key value list #' @param network.conf the network configuration #' @param edge.attributes the attributes that should be on the edges of the network -#' @param keys the keays of the key value list +#' @param keys the keys of the key value list #' @param keys.number the amount of keys in the key value list #' #' @return the data for the edge list From 849123a8b7d898fbb1343745ecffc1f6000c9367 Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Thu, 25 Jul 2024 13:24:05 +0200 Subject: [PATCH 13/16] Add missing 'artifact.type' to networks Networks based on commit interaction data now correctly have an edge attribute called 'artifact.type'. Value of column 'artifact.type' in commit interaction data is 'CommitInteraction' until potentially overwritten in artifact network construction Signed-off-by: Leo Sendelbach --- util-read.R | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/util-read.R b/util-read.R index f4fe7025..ecf60c27 100644 --- a/util-read.R +++ b/util-read.R @@ -863,14 +863,15 @@ create.empty.pasta.list = function() { COMMIT.INTERACTION.LIST.COLUMNS = c( "func", "commit.hash", "file", "base.hash", "base.func", "base.file", - "base.author", "interacting.author" + "base.author", "interacting.author", + "artifact.type" ) ## declare the datatype for each column in the constant 'COMMIT.INTERACTION.LIST.COLUMNS' COMMIT.INTERACTION.LIST.DATA.TYPES = c( "character", "character", "character", "character", "character", "character", - "character", "character" + "character", "character", "character" ) COMMIT.INTERACTION.GLOBAL.FILE.FUNCTION.NAME = "GLOBAL" @@ -952,6 +953,7 @@ read.commit.interactions = function(data.path = NULL) { ## Author data will be merged from commit data in \code{update.commit.interactions}. interactions["base.author"] = NA_character_ interactions["interacting.author"] = NA_character_ + interactions["artifact.type"] = "CommitInteraction" return(interactions) }))) From 3fb7437b68950303916b62984fa449732c70353e Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Thu, 25 Jul 2024 14:22:03 +0200 Subject: [PATCH 14/16] Fix endless recursion problem Add check for calling function in the beginning of 'update.commit.interactions'. Also contains minor fixes to adress PR comments and updates tests to reflect changes made in previous commit. Signed-off-by: Leo Sendelbach --- README.md | 7 +++ showcase.R | 4 +- tests/test-data.R | 10 +++-- tests/test-networks-commit.R | 3 ++ tests/test-read.R | 14 +++--- util-data.R | 12 ++--- util-networks-covariates.R | 5 +-- util-networks.R | 86 +++++++++++++++++++++--------------- 8 files changed, 84 insertions(+), 57 deletions(-) diff --git a/README.md b/README.md index 86b2671c..58b2c82e 100644 --- a/README.md +++ b/README.md @@ -234,6 +234,11 @@ There are four types of networks that can be built using this library: author ne * The vertices in an artifact network denote any kind of artifact, e.g., source-code artifact (such as features or files) or communication artifact (such as mail threads or issues). All artifact-type vertices are uniquely identifiable by their name. There are only unipartite edges among artifacts in this type of network. * The relations (i.e., the edges' meaning and source) can be configured using the [`NetworkConf`](#networkconf) attribute `artifact.relation`. The relation also describes which kinds of artifacts are represented as vertices in the network. (For example, if "mail" is selected as `artifact.relation`, only mail-thread vertices are included in the network.) +- Commit networks + * The vertices in a commit network denote any commits in the data. All vertices + are uniquely identifyable by the hash of the commit. There are only unipartite edges among commits in this type of network. + * The relations (i.e., the edges meaning and source) can be configured using the [`networkConf`](#networkconf) attribute `commit.relation`. The relation also describes the type of data used for network construction (`cochange` uses commit data, `commit.interaction` uses commit interaction data). + - Bipartite networks * The vertices in a bipartite network denote both authors and artifacts. There are only bipartite edges from authors to artifacts in this type of network. * The relations (i.e., the edges' meaning and source) can be configured using the [`NetworkConf`](#networkconf) attribute `artifact.relation`. @@ -249,6 +254,7 @@ Relations determine which information is used to construct edges among the verti - `cochange` * For author networks (configured via `author.relation` in the [`NetworkConf`](#networkconf)), authors who change the same source-code artifact are connected with an edge. * For artifact networks (configured via `artifact.relation` in the [`NetworkConf`](#networkconf)), source-code artifacts that are concurrently changed in the same commit are connected with an edge. + * For commit networks (configured vie `commit.relation` in the [`NetworkConf`](#networkconf)), commits are connected if they change the same artifact. * For bipartite networks (configured via `artifact.relation` in the [`NetworkConf`](#networkconf)), authors get linked to all source-code artifacts they have changed in their respective commits. - `mail` @@ -269,6 +275,7 @@ Relations determine which information is used to construct edges among the verti - `commit.interaction` * For author networks (configured via `author.relation` in the [`NetworkConf`](#networkconf)), authors who contribute to interacting commits are connected with an edge. * For artifact networks (configured via `artifact.relation` in the [`NetworkConf`](#networkconf)), artifacts are connected when there is an interaction between two commits that occur in the artifacts. + * For commit networks (configured via `commit.relation` in the [`NetworkConf`](#networkconf)), commits are connected when they interact in the commit interaction data. * This relation does not apply for bipartite networks. #### Edge-construction algorithms for author networks diff --git a/showcase.R b/showcase.R index 3d2aece7..4cb95d4a 100644 --- a/showcase.R +++ b/showcase.R @@ -239,8 +239,8 @@ sample.pull.requests = add.vertex.attribute.author.issue.count(my.networks, x.da ## add vertex attributes for the project-level network x.net.as.list = list("1970-01-01 00:00:00-2030-01-01 00:00:00" = x$get.author.network()) sample.entire = add.vertex.attribute.author.commit.count(x.net.as.list, x.data, aggregation.level = "complete") -## add vertex attributes to commit network -add.vertex.attribute.commit.network(x$get.commit.network(), x.data, "author.name", "NO_AUTHOR") +## add vertex attributes to commit network. Default value 'NO_AUTHOR' is used if vertex is not in commit data +add.vertex.attribute.commit.network(x$get.commit.network(), x.data, attr.name = "author.name", default.value = "NO_AUTHOR") ## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / diff --git a/tests/test-data.R b/tests/test-data.R index 88ce0e42..c983946d 100644 --- a/tests/test-data.R +++ b/tests/test-data.R @@ -564,15 +564,15 @@ test_that("Compare two ProjectData Objects with commit.interactions", { proj.data.two$set.commits(create.empty.commits.list()) ## create empty data frame of correct size - commit.interactions.data.expected = data.frame(matrix(nrow = 4, ncol = 8)) + commit.interactions.data.expected = data.frame(matrix(nrow = 4, ncol = 9)) ## assure that the correct type is used - for(i in seq_len(8)) { + for(i in seq_len(9)) { commit.interactions.data.expected[[i]] = as.character(commit.interactions.data.expected[[i]]) } ## set everything except for authors as expected colnames(commit.interactions.data.expected) = c("commit.hash", "base.hash", "func", "file", - "base.func", "base.file", "base.author", - "interacting.author") + "base.func", "base.file","artifact.type", + "base.author", "interacting.author") commit.interactions.data.expected[["commit.hash"]] = c("0a1a5c523d835459c42f33e863623138555e2526", "418d1dc4929ad1df251d2aeb833dd45757b04a6f", @@ -588,6 +588,8 @@ test_that("Compare two ProjectData Objects with commit.interactions", { commit.interactions.data.expected[["base.func"]] = c("test2.c::test2", "test2.c::test2", "test3.c::test_function", "test2.c::test2") commit.interactions.data.expected[["base.file"]] = c("test2.c", "test2.c", "test3.c", "test2.c") + commit.interactions.data.expected[["artifact.type"]] = c("CommitInteraction", "CommitInteraction", + "CommitInteraction", "CommitInteraction") expect_equal(proj.data.two$get.commit.interactions(), commit.interactions.data.expected) diff --git a/tests/test-networks-commit.R b/tests/test-networks-commit.R index 8ddb87db..7de34eed 100644 --- a/tests/test-networks-commit.R +++ b/tests/test-networks-commit.R @@ -83,6 +83,9 @@ patrick::with_parameters_test_that("Network construction with commit-interaction ) network = igraph::graph.data.frame(edges, directed = test.directed, vertices = vertices) expect_true(igraph::identical_graphs(network.built, network)) + + network.new.attr = add.vertex.attribute.commit.network(network.built, proj.data, "deleted.lines", "NO_DATA") + expect_identical(igraph::V(network.new.attr)$deleted.lines, c("0", "0","0", "NO_DATA", "0", "NO_DATA")) }, patrick::cases( "directed: FALSE" = list(test.directed = FALSE), "directed: TRUE" = list(test.directed = TRUE) diff --git a/tests/test-read.R b/tests/test-read.R index c617e091..f01d16c1 100644 --- a/tests/test-read.R +++ b/tests/test-read.R @@ -505,15 +505,15 @@ test_that("Read the commit-interactions data.", { commit.interactions.data.read = read.commit.interactions(proj.conf$get.value("datapath")) ## build the expected data.frame - commit.interactions.data.expected = data.frame(matrix(nrow = 4, ncol = 8)) + commit.interactions.data.expected = data.frame(matrix(nrow = 4, ncol = 9)) ## assure that the correct type is used - for(i in seq_len(8)) { + for(i in seq_len(ncol(commit.interactions.data.expected))) { commit.interactions.data.expected[[i]] = as.character(commit.interactions.data.expected[[i]]) } ## set everything except for authors as expected colnames(commit.interactions.data.expected) = c("func", "commit.hash", "file", "base.hash", "base.func", "base.file", "base.author", - "interacting.author") + "interacting.author", "artifact.type") commit.interactions.data.expected[["commit.hash"]] = c("5a5ec9675e98187e1e92561e1888aa6f04faa338", "0a1a5c523d835459c42f33e863623138555e2526", @@ -529,6 +529,8 @@ test_that("Read the commit-interactions data.", { commit.interactions.data.expected[["base.func"]] = c("test3.c::test_function", "test2.c::test2", "test2.c::test2", "test2.c::test2") commit.interactions.data.expected[["base.file"]] = c("test3.c", "test2.c", "test2.c", "test2.c") + commit.interactions.data.expected[["artifact.type"]] = c("CommitInteraction", "CommitInteraction", + "CommitInteraction", "CommitInteraction") ## check the results expect_identical(commit.interactions.data.read, commit.interactions.data.expected, info = "commit interaction data.") @@ -543,11 +545,11 @@ test_that("Read the empty commit-interactions data.", { commit.interactions.data.read = read.commit.interactions("./codeface-data/results/testing/ test_empty_proximity/proximity") ## build the expected data.frame - commit.interactions.data.expected = data.frame(matrix(nrow = 0, ncol = 8)) + commit.interactions.data.expected = data.frame(matrix(nrow = 0, ncol = 9)) colnames(commit.interactions.data.expected) = c("func", "commit.hash", "file", "base.hash", "base.func", "base.file", - "base.author", "interacting.author") - for(i in seq_len(8)) { + "base.author", "interacting.author", "artifact.type") + for(i in seq_len(ncol(commit.interactions.data.expected))) { commit.interactions.data.expected[[i]] = as.character(commit.interactions.data.expected[[i]]) } ## check the results diff --git a/util-data.R b/util-data.R index 8d68765f..7f2a971a 100644 --- a/util-data.R +++ b/util-data.R @@ -415,7 +415,10 @@ ProjectData = R6::R6Class("ProjectData", #' #' This method should be called whenever the field \code{commit.interactions} is changed. update.commit.interactions = function() { - if (self$is.data.source.cached("commit.interactions")) { + stacktrace = get.stacktrace(sys.calls()) + caller = get.second.last.element(stacktrace) + if (self$is.data.source.cached("commit.interactions") && + (is.na(caller)|| paste(caller, collapse = " ") != "self$set.commits(commit.data)")) { if (!self$is.data.source.cached("commits.unfiltered")) { self$get.commits() } @@ -2143,8 +2146,6 @@ ProjectData = R6::R6Class("ProjectData", return(mylist) }, - ## * * processed data ---------------------------------------------- - #' Group the commits of the given \code{data.source} by the given \code{group.column}. #' For each group, the column \code{"hash"} is duplicated and prepended to each #' group's data as first column (see below for details). @@ -2162,12 +2163,11 @@ ProjectData = R6::R6Class("ProjectData", #' as first column (with name \code{"data.vertices"}) #' #' @seealso ProjectData$group.data.by.column - group.commits.by.data.column = function(data.source = c("commits", "mails", "issues"), - group.column = "artifact") { + group.commits.by.data.column = function(group.column = "artifact") { logging::loginfo("Grouping commits by data column.") ## store the commits per group that is determined by 'group.column' - mylist = self$group.data.by.column(data.source, group.column, "hash") + mylist = self$group.data.by.column("commits", group.column, "hash") return(mylist) }, diff --git a/util-networks-covariates.R b/util-networks-covariates.R index 5709126a..700b5e9f 100644 --- a/util-networks-covariates.R +++ b/util-networks-covariates.R @@ -149,8 +149,8 @@ add.vertex.attribute = function(net.to.range.list, attr.name, default.value, com #' @param network the commit network #' @param project.data the project data from which to extract the values #' @param attr.name the name of the attribute -#' @param default.value the dafault value of the attribute -#' if it does not occur in the commit data +#' @param default.value the default value that is used if the current hash +#' is not contained in the commit data at all #' #' @return a network with new vertex attribute add.vertex.attribute.commit.network = function(network, project.data, @@ -174,7 +174,6 @@ add.vertex.attribute.commit.network = function(network, project.data, attribute.values = c(attribute.values, value) } net.with.attr = igraph::set.vertex.attribute(network, attr.name, value = attribute.values) - } diff --git a/util-networks.R b/util-networks.R index 352794ad..dd27f36f 100644 --- a/util-networks.R +++ b/util-networks.R @@ -123,8 +123,8 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", artifacts.network.callgraph = NULL, # igraph artifacts.network.mail = NULL, # igraph artifacts.network.issue = NULL, # igraph - commit.network.commit.interaction = NULL, #igraph - commit.network.cochange = NULL, #igraph + commits.network.commit.interaction = NULL, #igraph + commits.network.cochange = NULL, #igraph ## * * relation-to-vertex-kind mapping ----------------------------- @@ -248,7 +248,9 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", colnames(edges)[1] = "to" colnames(edges)[2] = "from" colnames(edges)[4] = "hash" - edges[["artifact.type"]] = "CommitInteraction" + if (nrow(edges) > 0) { + edges[["artifact.type"]] = "CommitInteraction" + } author.net.data = list(vertices = vertices, edges = edges) ## construct the network author.net = construct.network.from.edge.list( @@ -402,7 +404,9 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", edges = edges[, c("file", "base.file", "func", "commit.hash", "base.hash", "base.func", "base.author", "interacting.author")] - edges[["artifact.type"]] = "File" + if (nrow(edges) > 0) { + edges[["artifact.type"]] = ARTIFACT.CODEFACE[[proj.conf.artifact]] + } colnames(edges)[colnames(edges) == "commit.hash"] = "hash" } else if (proj.conf.artifact == "function") { ## change the vertices to the functions from the commit-interaction data @@ -412,7 +416,9 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", edges = edges[, c("func", "base.func", "commit.hash", "file", "base.hash", "base.file", "base.author", "interacting.author")] - edges[["artifact.type"]] = "Function" + if (nrow(edges) > 0) { + edges[["artifact.type"]] = ARTIFACT.CODEFACE[[proj.conf.artifact]] + } colnames(edges)[colnames(edges) == "commit.hash"] = "hash" } else { ## If neither 'function' nor 'file' was configured, send a warning @@ -693,9 +699,9 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", logging::logdebug("get.commit.network.commit.interaction: starting.") ## do not compute anything more than once - if (!is.null(private$commit.network.commit.interaction)) { + if (!is.null(private$commits.network.commit.interaction)) { logging::logdebug("get.commit.network.commit.interaction: finished. (already existing)") - return(private$commit.network.commit.interaction) + return(private$commits.network.commit.interaction) } ## get the hashes that appear in the commit-interaction data as the vertices of the network @@ -708,7 +714,9 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", ## set the commits as the 'to' and 'from' of the network and order the dataframe edges = edges[, c("base.hash", "commit.hash", "func", "interacting.author", "file", "base.author", "base.func", "base.file")] - edges[["artifact.type"]] = "CommitInteraction" + if (nrow(edges) > 0) { + edges[["artifact.type"]] = "CommitInteraction" + } colnames(edges)[1] = "to" colnames(edges)[2] = "from" commit.net.data = list(vertices = vertices, edges = edges) @@ -722,13 +730,13 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", get.data.columns.for.data.source("commit.interactions") ) - private$commit.network.commit.interaction = commit.net + private$commits.network.commit.interaction = commit.net logging::logdebug("get.commit.network.commit.interaction: finished.") return(commit.net) }, - #' Get the co-change-based commit network, + #' Get the cochange-based commit network, #' If it does not already exist build it first. #' #' @return the commit network with cochange realtion @@ -737,13 +745,13 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", logging::logdebug("get.commit.network.cochange: starting.") ## do not compute anything more than once - if (!is.null(private$commit.network.cochange)) { + if (!is.null(private$commits.network.cochange)) { logging::logdebug("get.commit.network.cochange: finished. (already existing)") - return(private$commit.network.cochange) + return(private$commits.network.cochange) } ## construct edge list based on commit--artifact data - commit.net.data.raw = private$proj.data$group.commits.by.data.column("commits", "artifact") + commit.net.data.raw = private$proj.data$group.commits.by.data.column("artifact") commit.net.data = construct.edge.list.from.key.value.list( commit.net.data.raw, @@ -763,7 +771,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", ) ## store network - private$commit.network.cochange = commit.net + private$commits.network.cochange = commit.net logging::logdebug("get.commit.network.cochange: finished.") return(commit.net) @@ -843,8 +851,8 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", private$artifacts.network.cochange = NULL private$artifacts.network.issue = NULL private$artifacts.network.mail = NULL - private$commit.network.commit.interaction = NULL - private$commit.network.cochange = NULL + private$commits.network.commit.interaction = NULL + private$commits.network.cochange = NULL private$proj.data = private$proj.data.original if (private$network.conf$get.value("unify.date.ranges")) { private$cut.data.to.same.timestamps() @@ -1192,7 +1200,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", "bipartite.net" = bipartite.net, "authors.net" = authors.net, "artifacts.net" = artifacts.net, - "commit.net" = commit.net + "commits.net" = commit.net )) }, @@ -1322,7 +1330,8 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", #' i.e., whether to only add edges from the later event to the previous one. #' If \code{NA} is passed, the default value is taken. #' [default: directed] -#' @param network.type the type of network for which the key value data is to be used as edges [default: "author"] +#' @param network.type the type of network for which the key value data is to be used as edges +#' (one out of "author", "artifact", or "commit")[default: "author"] #' #' @return a list of two data.frames named 'vertices' and 'edges' (compatible with return value #' of \code{igraph::as.data.frame}) @@ -1361,11 +1370,11 @@ construct.edge.list.from.key.value.list = function(list, network.conf, directed } } - ## if edges in a commit network contain 'date', 'hash' or 'file' attributes, remove them + ## if edges in a commit network contain 'hash' or 'file' attributes, remove them ## as they belong to commits, which are the vertices in commit networks if (network.type == "commit") { cols.which = which(edge.attributes %in% c("hash", "file")) - edge.attributes <- edge.attributes[-cols.which] + edge.attributes = edge.attributes[-cols.which] } if (respect.temporal.order) { @@ -1375,7 +1384,9 @@ construct.edge.list.from.key.value.list = function(list, network.conf, directed edge.attributes, keys, keys.number, network.type) edge.list = plyr::rbind.fill(edge.list.data) - vertices.processed = unlist( parallel::mclapply(edge.list.data, function(data) attr(data, "vertices.processed")) ) + vertices.processed = unlist(parallel::mclapply(edge.list.data, function(data) { + return(attr(data, "vertices.processed")) + })) } else { @@ -1384,28 +1395,31 @@ construct.edge.list.from.key.value.list = function(list, network.conf, directed edge.attributes, keys, keys.number) edge.list = plyr::rbind.fill(edge.list.data) - vertices.processed = unlist( parallel::mclapply(edge.list.data, function(data) attr(data, "vertices.processed")) ) + vertices.processed = unlist(parallel::mclapply(edge.list.data, function(data) { + return(attr(data, "vertices.processed")) + })) } logging::logdebug("construct.edge.list.from.key.value.list: finished.") if (network.type == "commit") { - vertices.dates.processed = unlist( parallel::mclapply(edge.list.data, - function(data) attr(data, "vertices.dates.processed")) ) + vertices.dates.processed = unlist(parallel::mclapply(edge.list.data, function(data) { + return (attr(data, "vertices.dates.processed")) + })) return(list( - vertices = data.frame( - name = unique(vertices.processed), - date = get.date.from.string(unique(vertices.dates.processed)) - ), - edges = edge.list + vertices = data.frame( + name = unique(vertices.processed), + date = get.date.from.string(unique(vertices.dates.processed)) + ), + edges = edge.list )) } else { return(list( - vertices = data.frame( - name = unique(vertices.processed) - ), - edges = edge.list + vertices = data.frame( + name = unique(vertices.processed) + ), + edges = edge.list )) } } @@ -1504,13 +1518,13 @@ construct.edges.temporal.order = function(set, network.conf, edge.attributes, ke construct.edges.no.temporal.order = function(set, network.conf, edge.attributes, keys, keys.number) { number.edges = sum(table(set[["data.vertices"]]) * (dim(table(set[["data.vertices"]])) - 1)) logging::logdebug("[%s/%s] Constructing edges for %s '%s': starting (%s edges to construct).", - match(attr(set, "group.name"), keys), keys.number, - attr(set, "group.type"), attr(set, "group.name"), number.edges) + match(attr(set, "group.name"), keys), keys.number, + attr(set, "group.type"), attr(set, "group.name"), number.edges) ## Skip artifacts with many, many edges if (number.edges > network.conf$get.value("skip.threshold")) { logging::logwarn("Skipping edges for %s '%s' due to amount (> %s).", - attr(set, "group.type"), attr(set, "group.name"), network.conf$get.value("skip.threshold")) + attr(set, "group.type"), attr(set, "group.name"), network.conf$get.value("skip.threshold")) return(NULL) } From 170bc66eb779d7cf2ab504db7c3f4ec483103838 Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Wed, 7 Aug 2024 15:30:30 +0200 Subject: [PATCH 15/16] Update News.md and minor fix Include this PR's changelog in the NEWS.md Add constant for commit interaction artifact type Move check for avoiding infinite recursion to the correct position and add commentary Signed-off-by: Leo Sendelbach --- NEWS.md | 4 ++++ README.md | 4 ++-- util-conf.R | 2 ++ util-data.R | 12 +++++++----- util-networks.R | 4 ++-- 5 files changed, 17 insertions(+), 9 deletions(-) diff --git a/NEWS.md b/NEWS.md index 3d093756..1047a961 100644 --- a/NEWS.md +++ b/NEWS.md @@ -12,6 +12,7 @@ - Add line-based code coverage reports into CI pipeline. Coverage reports are generated by `coverage.R` (PR #262, 10cac49d005e87c3964cc61711e7f5acef749626, b3b9f4ac7a9911bd00293c68fac88e0f9033bdfb, c815d18dc6266d620a7a145493417b87ac08679e, e8093525fdaf46e54f2f7fcc6358ca7892e795e5, 32d04823e2007c63d2a43ce59bea3057327c19a7) - Add the possibility to split data time-based by multiple data sources (PR #261, 1088395f46b84028c8d7c463ca86b5dc38500c26, e1f79fc9e40cd6f41c946be42db364b2101cfe10, 0bb187fec0fd801d7634bf8d5180525770f6ab0b, 371a97ac6ebf3de4fe9360dea79d62e2ed3ef585) - Add tests for uncovered functionality in `util-misc.R` and `util-networks.R` (PR #264, ff30f3238b1bf2539280d0d055a5d925c197c271, af80551d0615a49b86e45ff596bd75941ee88f91) +- Add commit network as a new type of network. It uses commits as vertices and connects them either via cochange or commit interactions. This includes adding new config parameters and a function for adding vertex attributes to a commit network(PR #263, ab73271781e8e9a0715f784936df4b371d64c338, ab73271781e8e9a0715f784936df4b371d64c338, cd9a930fcb54ff465c2a5a7c43cfe82ac15c134d) ### Changed/Improved @@ -19,10 +20,13 @@ - Replace deprecated `igraph` functions by their preferred alternatives (PR #264, 0df9d5bf6bafbb5d440f4c47db4ec901cf11f037) - Deprecate support for R version 3.6 (PR #264, c8e6f45111e487fadbe7f0a13c7595eb23f3af6e, fb3f5474259d4a88f4ff545691cca9d1ccde90e3) - Explicitly add R version 4.4 to the CI test pipeline (c8e6f45111e487fadbe7f0a13c7595eb23f3af6e) +- Refactor function `construct.edge.list.from.key.value.list` to be more readable(PR #263, 05c3bc09cb1d396fd59c34a88030cdca58fd04dd) ### Fixed - Fix the creation of edgelists for issue-based artifact-networks by correctly iterating over the issue data (PR #264, 321d85043112971c04998249c14a0677a32c9004) +- Fix networks based upon commit interaction data to also have the attribute `artifact.type`(PR #263, 849123a8b7d898fbb1343745ecffc1f6000c9367) +- Fix endless recursion that could occur when commit interaction data was configured and commit data is empty (PR #263, 3fb7437b68950303916b62984fa449732c70353e) ## 4.4 diff --git a/README.md b/README.md index 58b2c82e..804b376c 100644 --- a/README.md +++ b/README.md @@ -630,7 +630,7 @@ Updates to the parameters can be done by calling `NetworkConf$update.variables(. - `author.relation` * The relation(s) among authors, encoded as edges in an author network * **Note**: The author--artifact relation in bipartite and multi networks is configured by `artifact.relation`! - * possible values: [*`"mail"`*, `"cochange"`, `"issue"`] + * possible values: [*`"mail"`*, `"cochange"`, `"issue"`, `commit.interaction`] - `author.directed` * The directedness of edges in an author network * [`TRUE`, *`FALSE`*] @@ -649,7 +649,7 @@ Updates to the parameters can be done by calling `NetworkConf$update.variables(. - `artifact.relation` * The relation(s) among artifacts, encoded as edges in an artifact network * **Note**: Additionally, this relation configures also the author--artifact relation in bipartite and multi networks! - * possible values: [*`"cochange"`*, `"callgraph"`, `"mail"`, `"issue"`] + * possible values: [*`"cochange"`*, `"callgraph"`, `"mail"`, `"issue"`, `commit.interaction`] - `artifact.directed` * The directedness of edges in an artifact network * **Note**: This parameter does only affect the `issue` relation, as the `cochange` relation is always undirected, while the `callgraph` relation is always directed. For the `mail`, we currently do not have data available to exhibit edge information. diff --git a/util-conf.R b/util-conf.R index 35e5303e..85aec34a 100644 --- a/util-conf.R +++ b/util-conf.R @@ -63,6 +63,8 @@ ARTIFACT.CODEFACE = list( "file" = "File" ) +ARTIFACT.COMMIT.INTERACTION = "CommitInteraction" + ## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / ## Conf -------------------------------------------------------------------- diff --git a/util-data.R b/util-data.R index 7f2a971a..90c01ca4 100644 --- a/util-data.R +++ b/util-data.R @@ -415,11 +415,13 @@ ProjectData = R6::R6Class("ProjectData", #' #' This method should be called whenever the field \code{commit.interactions} is changed. update.commit.interactions = function() { - stacktrace = get.stacktrace(sys.calls()) - caller = get.second.last.element(stacktrace) - if (self$is.data.source.cached("commit.interactions") && - (is.na(caller)|| paste(caller, collapse = " ") != "self$set.commits(commit.data)")) { - if (!self$is.data.source.cached("commits.unfiltered")) { + if (self$is.data.source.cached("commit.interactions")) { + ## check if caller was 'set.commits'. If so, or if commits are already filtered, + ## do not get the commits again. + stacktrace = get.stacktrace(sys.calls()) + caller = get.second.last.element(stacktrace) + if (!self$is.data.source.cached("commits.unfiltered") && + (is.na(caller) || paste(caller, collapse = " ") != "self$set.commits(commit.data)")) { self$get.commits() } diff --git a/util-networks.R b/util-networks.R index dd27f36f..da1b1da6 100644 --- a/util-networks.R +++ b/util-networks.R @@ -249,7 +249,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", colnames(edges)[2] = "from" colnames(edges)[4] = "hash" if (nrow(edges) > 0) { - edges[["artifact.type"]] = "CommitInteraction" + edges[["artifact.type"]] = ARTIFACT.COMMIT.INTERACTION } author.net.data = list(vertices = vertices, edges = edges) ## construct the network @@ -715,7 +715,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", edges = edges[, c("base.hash", "commit.hash", "func", "interacting.author", "file", "base.author", "base.func", "base.file")] if (nrow(edges) > 0) { - edges[["artifact.type"]] = "CommitInteraction" + edges[["artifact.type"]] = ARTIFACT.COMMIT.INTERACTION } colnames(edges)[1] = "to" colnames(edges)[2] = "from" From 5842073b8d97622bdae87907c7de7bc370cc2bbb Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Thu, 8 Aug 2024 11:18:47 +0200 Subject: [PATCH 16/16] Update `README.md` and `NEWS.md` Minor changes in response to reviews. Also added a use for constant `ARTIFACT.COMMIT.INTERACTION` that was previously overlooked. Signed-off-by: Leo Sendelbach --- NEWS.md | 8 +++----- README.md | 14 ++++++++++---- util-read.R | 2 +- 3 files changed, 14 insertions(+), 10 deletions(-) diff --git a/NEWS.md b/NEWS.md index 1047a961..dddf0ac9 100644 --- a/NEWS.md +++ b/NEWS.md @@ -6,13 +6,13 @@ ### Added -- Add commit-interaction data and add functions `read.commit.interactions` for reading, as well as `get.commit.interactions`, `set.commit.interactions` and utility functions for working with commit-interaction data (PR #252, d82857fbebd1111bb16588a4223bb24a8dcd07de, b4fd2a29c9b5fd561b1106c6febb54a32b0085ab, fd0aa05f824b93545ae8e05833b95b3bd9809286, bca35760eb0aac86c04923f2d534b2d8cece204e) as well as tests for these features (PR #252, eeba7e29932bc973513c963fb9e716e9230d570f, 8bb39f4df39b49dfaff8f19feb6db5e5fbd81fac, 54b6f655248720436af116fe72521f9cb0348429, 7a5497aaf9114017d1b3b9b68b6cccd7ca8ac114, 7b8585f87675795822c07230192d6454de31dcc7, ef725407bf8818c8fff96ea6f343338b7162cbe0) +- Add commit-interaction data and add functions `read.commit.interactions` for reading, as well as `get.commit.interactions`, `set.commit.interactions` and utility functions for working with commit-interaction data (PR #252, d82857fbebd1111bb16588a4223bb24a8dcd07de, b4fd2a29c9b5fd561b1106c6febb54a32b0085ab, fd0aa05f824b93545ae8e05833b95b3bd9809286, bca35760eb0aac86c04923f2d534b2d8cece204e, PR #263, 849123a8b7d898fbb1343745ecffc1f6000c9367, 3fb7437b68950303916b62984fa449732c70353e, 170bc66eb779d7cf2ab504db7c3f4ec483103838) as well as tests for these features (PR #252, eeba7e29932bc973513c963fb9e716e9230d570f, 8bb39f4df39b49dfaff8f19feb6db5e5fbd81fac, 54b6f655248720436af116fe72521f9cb0348429, 7a5497aaf9114017d1b3b9b68b6cccd7ca8ac114, 7b8585f87675795822c07230192d6454de31dcc7, ef725407bf8818c8fff96ea6f343338b7162cbe0,) - Add commit-interaction networks that can be created with `create.author.network` and `create.artifact.network` if the `artifact.relation` and `author.relation` is configured to be `commit.interaction` (PR #252, d82857fbebd1111bb16588a4223bb24a8dcd07de, 329d97ec3de36a9e1bcadc0c7a53c1d92e8b481c) as well as tests for these features (PR #252, 07e7ed744209b0251217fa8f7f35d9b9875face2, 7068cfa10d993dcae3f5e3f76f8cafa99fa8b350) - Add helper function for prefixing function names with file names in `util-read.R` (PR #252, f8ea987b138173cf0509c7910e0572d8ee1b3f1f) - Add line-based code coverage reports into CI pipeline. Coverage reports are generated by `coverage.R` (PR #262, 10cac49d005e87c3964cc61711e7f5acef749626, b3b9f4ac7a9911bd00293c68fac88e0f9033bdfb, c815d18dc6266d620a7a145493417b87ac08679e, e8093525fdaf46e54f2f7fcc6358ca7892e795e5, 32d04823e2007c63d2a43ce59bea3057327c19a7) - Add the possibility to split data time-based by multiple data sources (PR #261, 1088395f46b84028c8d7c463ca86b5dc38500c26, e1f79fc9e40cd6f41c946be42db364b2101cfe10, 0bb187fec0fd801d7634bf8d5180525770f6ab0b, 371a97ac6ebf3de4fe9360dea79d62e2ed3ef585) - Add tests for uncovered functionality in `util-misc.R` and `util-networks.R` (PR #264, ff30f3238b1bf2539280d0d055a5d925c197c271, af80551d0615a49b86e45ff596bd75941ee88f91) -- Add commit network as a new type of network. It uses commits as vertices and connects them either via cochange or commit interactions. This includes adding new config parameters and a function for adding vertex attributes to a commit network(PR #263, ab73271781e8e9a0715f784936df4b371d64c338, ab73271781e8e9a0715f784936df4b371d64c338, cd9a930fcb54ff465c2a5a7c43cfe82ac15c134d) +- Add commit network as a new type of network. It uses commits as vertices and connects them either via cochange or commit interactions. This includes adding new config parameters and the function `add.vertex.attribute.commit.network` for adding vertex attributes to a commit network (PR #263, ab73271781e8e9a0715f784936df4b371d64c338, ab73271781e8e9a0715f784936df4b371d64c338, cd9a930fcb54ff465c2a5a7c43cfe82ac15c134d) ### Changed/Improved @@ -20,13 +20,11 @@ - Replace deprecated `igraph` functions by their preferred alternatives (PR #264, 0df9d5bf6bafbb5d440f4c47db4ec901cf11f037) - Deprecate support for R version 3.6 (PR #264, c8e6f45111e487fadbe7f0a13c7595eb23f3af6e, fb3f5474259d4a88f4ff545691cca9d1ccde90e3) - Explicitly add R version 4.4 to the CI test pipeline (c8e6f45111e487fadbe7f0a13c7595eb23f3af6e) -- Refactor function `construct.edge.list.from.key.value.list` to be more readable(PR #263, 05c3bc09cb1d396fd59c34a88030cdca58fd04dd) +- Refactor function `construct.edge.list.from.key.value.list` to be more readable (PR #263, 05c3bc09cb1d396fd59c34a88030cdca58fd04dd) ### Fixed - Fix the creation of edgelists for issue-based artifact-networks by correctly iterating over the issue data (PR #264, 321d85043112971c04998249c14a0677a32c9004) -- Fix networks based upon commit interaction data to also have the attribute `artifact.type`(PR #263, 849123a8b7d898fbb1343745ecffc1f6000c9367) -- Fix endless recursion that could occur when commit interaction data was configured and commit data is empty (PR #263, 3fb7437b68950303916b62984fa449732c70353e) ## 4.4 diff --git a/README.md b/README.md index 804b376c..dc2cba45 100644 --- a/README.md +++ b/README.md @@ -237,7 +237,7 @@ There are four types of networks that can be built using this library: author ne - Commit networks * The vertices in a commit network denote any commits in the data. All vertices are uniquely identifyable by the hash of the commit. There are only unipartite edges among commits in this type of network. - * The relations (i.e., the edges meaning and source) can be configured using the [`networkConf`](#networkconf) attribute `commit.relation`. The relation also describes the type of data used for network construction (`cochange` uses commit data, `commit.interaction` uses commit interaction data). + * The relations (i.e., the edges' meaning and source) can be configured using the [`networkConf`](#networkconf) attribute `commit.relation`. The relation also describes the type of data used for network construction (`cochange` uses commit data, `commit.interaction` uses commit interaction data). - Bipartite networks * The vertices in a bipartite network denote both authors and artifacts. There are only bipartite edges from authors to artifacts in this type of network. @@ -275,7 +275,7 @@ Relations determine which information is used to construct edges among the verti - `commit.interaction` * For author networks (configured via `author.relation` in the [`NetworkConf`](#networkconf)), authors who contribute to interacting commits are connected with an edge. * For artifact networks (configured via `artifact.relation` in the [`NetworkConf`](#networkconf)), artifacts are connected when there is an interaction between two commits that occur in the artifacts. - * For commit networks (configured via `commit.relation` in the [`NetworkConf`](#networkconf)), commits are connected when they interact in the commit interaction data. + * For commit networks (configured via `commit.relation` in the [`NetworkConf`](#networkconf)), commits are connected when they interact in the commit-interaction data. * This relation does not apply for bipartite networks. #### Edge-construction algorithms for author networks @@ -630,7 +630,7 @@ Updates to the parameters can be done by calling `NetworkConf$update.variables(. - `author.relation` * The relation(s) among authors, encoded as edges in an author network * **Note**: The author--artifact relation in bipartite and multi networks is configured by `artifact.relation`! - * possible values: [*`"mail"`*, `"cochange"`, `"issue"`, `commit.interaction`] + * possible values: [*`"mail"`*, `"cochange"`, `"issue"`, `"commit.interaction"`] - `author.directed` * The directedness of edges in an author network * [`TRUE`, *`FALSE`*] @@ -649,11 +649,17 @@ Updates to the parameters can be done by calling `NetworkConf$update.variables(. - `artifact.relation` * The relation(s) among artifacts, encoded as edges in an artifact network * **Note**: Additionally, this relation configures also the author--artifact relation in bipartite and multi networks! - * possible values: [*`"cochange"`*, `"callgraph"`, `"mail"`, `"issue"`, `commit.interaction`] + * possible values: [*`"cochange"`*, `"callgraph"`, `"mail"`, `"issue"`, `"commit.interaction"`] - `artifact.directed` * The directedness of edges in an artifact network * **Note**: This parameter does only affect the `issue` relation, as the `cochange` relation is always undirected, while the `callgraph` relation is always directed. For the `mail`, we currently do not have data available to exhibit edge information. * [`TRUE`, *`FALSE`*] +- `commit.relation` + * The relation(s) among commits, encoded as edges in a commit network + * possible values: [*`"cochange"`*, `"commit.interaction"`] +- `commit.directed` + * The directedness of edges in a commit network + * [`TRUE`, *`FALSE`*] - `edge.attributes` * The list of edge-attribute names and information * a subset of the following as a single vector: diff --git a/util-read.R b/util-read.R index ecf60c27..06c082e5 100644 --- a/util-read.R +++ b/util-read.R @@ -953,7 +953,7 @@ read.commit.interactions = function(data.path = NULL) { ## Author data will be merged from commit data in \code{update.commit.interactions}. interactions["base.author"] = NA_character_ interactions["interacting.author"] = NA_character_ - interactions["artifact.type"] = "CommitInteraction" + interactions["artifact.type"] = ARTIFACT.COMMIT.INTERACTION return(interactions) })))