From 83bfc0e55c584926406bc9587afb00b9baa1baff Mon Sep 17 00:00:00 2001 From: Roger1995 Date: Wed, 23 Aug 2017 20:00:23 +0200 Subject: [PATCH 01/40] Introduce file with network metrics MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Raphael Nömmer --- metrics.R | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 metrics.R diff --git a/metrics.R b/metrics.R new file mode 100644 index 00000000..e9b00c9e --- /dev/null +++ b/metrics.R @@ -0,0 +1,33 @@ + +requireNamespace("igraph") + + + + + +hub.indegree = function(network){ + degrees = igraph::degree(network, mode = c("in")) + vertex = which.max(degrees) + return(igraph::V(network)[vertex]) +} + +density = function(network) { + density = igraph::graph.density(network) + return(density) +} + +avg.outdegree = function(network) { + outdegrees = igraph::degree(network, mode = c("out")) + avg = mean(outdegrees) + return(avg) +} + +avg.pathlength = function(network) { + lengths = igraph::shortest.paths(network, V(network), mode = "out", weights = NA) + lengths = unname(lengths + + + + + +} From 76014b2b8e5d11f484cb05b53f0f0932fe875768 Mon Sep 17 00:00:00 2001 From: Roger1995 Date: Wed, 23 Aug 2017 23:45:06 +0200 Subject: [PATCH 02/40] Add more network metrics, change issue data folder MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-Off-By: Raphael Nömmer --- metrics.R | 43 +++++++++++++++++++++++++++++++++++++++++-- util-conf.R | 7 +++---- 2 files changed, 44 insertions(+), 6 deletions(-) diff --git a/metrics.R b/metrics.R index e9b00c9e..91acedac 100644 --- a/metrics.R +++ b/metrics.R @@ -23,11 +23,50 @@ avg.outdegree = function(network) { } avg.pathlength = function(network) { - lengths = igraph::shortest.paths(network, V(network), mode = "out", weights = NA) - lengths = unname(lengths + return(igraph::average.path.length(network, directed = TRUE, unconnected = FALSE)) +} + +clustering.coeff = function(network) { + local.cc = igraph::transitivity(network, type = "local", vids = NULL) + cc = mean(local.cc, na.rm = TRUE) + return(cc) +} + +# Not sure if this is correct +modularity = function(network) { + comm = igraph::cluster_walktrap(network) + mod = igraph::modularity(network, igraph::membership(comm)) + return(mod) +} + +smallworldness = function(network) { + +} + + +determine.smallworldness = function(g) { + + # construct Erdös-Renyi network with same number of nodes and edges as g + h = erdos.renyi.game(n=vcount(g), p.or.m=ecount(g), type="gnm", directed=TRUE) + + ## compute clustering coefficients + g.cc = transitivity(g) + h.cc = transitivity(h) + ## compute average shortest-path length + g.l = average.path.length(g) + h.l = average.path.length(h) + ## binary decision + # intermediate variables + gamma = g.cc / h.cc + lambda = g.l / h.l + # indicator s.delta + s.delta = gamma / lambda + # if s.delta > 1, then the network is a small-world network + #is.smallworld = ifelse(s.delta > 1, TRUE, FALSE) + return (s.delta) } diff --git a/util-conf.R b/util-conf.R index 17e73d29..8db89d78 100644 --- a/util-conf.R +++ b/util-conf.R @@ -265,8 +265,8 @@ ProjectConf = R6::R6Class("ProjectConf", #' @param casestudy the current casestudy #' #' @return the path to the issues folder - get.issues.folder = function(data, selection.process, casestudy) { - return(file.path(data, private$subfolder.results, selection.process, paste(casestudy, "issues", sep = "_"))) + get.issues.folder = function(data, selection.process, project) { + return(file.path(data, private$subfolder.results, selection.process, project)) }, #' Construct and return the path to a Codeface configuration. @@ -317,7 +317,6 @@ ProjectConf = R6::R6Class("ProjectConf", conf$artifact = artifact conf$artifact.short = ARTIFACT.TO.ABBREVIATION[[ conf$artifact ]] conf$artifact.codeface = ARTIFACT.CODEFACE[[ conf$artifact ]] - ## store path to actual Codeface data conf$datapath = private$get.results.folder(data, selection.process, conf[["project"]], tagging) ## store path to call graphs @@ -327,7 +326,7 @@ ProjectConf = R6::R6Class("ProjectConf", ## store path to pasta data conf$datapath.pasta = private$get.pasta.folder(data, selection.process, casestudy) ## store path to issue data - conf$datapath.issues = private$get.issues.folder(data, selection.process, casestudy) + conf$datapath.issues = private$get.issues.folder(data, selection.process, conf[["project"]]) ## READ REVISIONS META-DATA From ba08a24a1ba789f7cd4a7b07600bfc483a65356b Mon Sep 17 00:00:00 2001 From: Roger1995 Date: Thu, 24 Aug 2017 09:48:20 +0200 Subject: [PATCH 03/40] Add namespace requirements to smallworldness MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-Off-By: Raphael Nömmer --- metrics.R | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/metrics.R b/metrics.R index 91acedac..9c288ccd 100644 --- a/metrics.R +++ b/metrics.R @@ -40,22 +40,23 @@ modularity = function(network) { } smallworldness = function(network) { - + smallworldness <- determine.smallworldness(network) # smallworldness(nw.data$nw) # + return(smallworldness) } determine.smallworldness = function(g) { # construct Erdös-Renyi network with same number of nodes and edges as g - h = erdos.renyi.game(n=vcount(g), p.or.m=ecount(g), type="gnm", directed=TRUE) + h = igraph::erdos.renyi.game(n=igraph::vcount(g), p.or.m=igraph::gsize(g), type="gnm", directed=TRUE) ## compute clustering coefficients - g.cc = transitivity(g) - h.cc = transitivity(h) + g.cc = igraph::transitivity(g) + h.cc = igraph::transitivity(h) ## compute average shortest-path length - g.l = average.path.length(g) - h.l = average.path.length(h) + g.l = igraph::average.path.length(g) + h.l = igraph::average.path.length(h) ## binary decision # intermediate variables From 6adabaf58acbeecccfb27ad56d1b42a925b3144a Mon Sep 17 00:00:00 2001 From: Roger1995 Date: Thu, 31 Aug 2017 09:52:57 +0200 Subject: [PATCH 04/40] Continue implementing metrics MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-Off-By: Raphael Nömmer --- metrics.R | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/metrics.R b/metrics.R index 9c288ccd..cf50daf1 100644 --- a/metrics.R +++ b/metrics.R @@ -39,6 +39,7 @@ modularity = function(network) { return(mod) } +# requires simplified network smallworldness = function(network) { smallworldness <- determine.smallworldness(network) # smallworldness(nw.data$nw) # return(smallworldness) @@ -71,3 +72,37 @@ determine.smallworldness = function(g) { return (s.delta) } + + +amount.nodes = function(network) { + return(igraph::vcount(network)) +} + +power.law.fitting = function(network) { + v.degree <- sort(igraph::degree(network, mode="all"), decreasing=TRUE) + + ## Power-law fiting + ## (from Mitchell Joblin , Siemens AG, 2012, 2013) + p.fit = igraph::power.law.fit(v.degree, implementation="plfit") + param.names = c("alpha", "xmin", "KS.p") + res = list() + res[param.names] = p.fit[param.names] + + ## Check percent of vertices under power-law + res$num.power.law = length(which(v.degree >= res$xmin)) + res$percent.power.law = 100 * (res$num.power.law / length(v.degree)) + + return(cbind(res$alpha,res$xmin,res$KS.p,res$num.power.law,res$percent.power.law)) +} + +generate.hierarchy = function(network) { + degrees = igraph::degree(network, mode="total") + cluster.coeff = igraph::transitivity(network, type = "local", vids = NULL) + + degrees.without.cc = subset(degrees, !(is.nan(cluster.coeff) | cluster.coeff == 0)) + cluster.coeff = subset(cluster.coeff, !(is.nan(cluster.coeff) | cluster.coeff == 0)) + + names.of.points = row.names(as.data.frame(degrees.without.cc)) +} + + From aeb456f4e6bca8ddca8516950d5ea4ce008f2c03 Mon Sep 17 00:00:00 2001 From: Roger1995 Date: Wed, 6 Sep 2017 15:26:44 +0200 Subject: [PATCH 05/40] Add more metrics for network analysis. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-Off-By: Raphael Nömmer --- metrics.R | 36 +++++++++++++++++------------------- 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/metrics.R b/metrics.R index cf50daf1..f2cf160b 100644 --- a/metrics.R +++ b/metrics.R @@ -1,19 +1,11 @@ requireNamespace("igraph") - - - - hub.indegree = function(network){ degrees = igraph::degree(network, mode = c("in")) vertex = which.max(degrees) - return(igraph::V(network)[vertex]) -} - -density = function(network) { - density = igraph::graph.density(network) - return(density) + node = igraph::V(network)[vertex] + return(node) } avg.outdegree = function(network) { @@ -22,6 +14,15 @@ avg.outdegree = function(network) { return(avg) } +node.degrees = function(network) { + return(igraph::degree(network, mode="total")) +} + +density = function(network) { + density = igraph::graph.density(network) + return(density) +} + avg.pathlength = function(network) { return(igraph::average.path.length(network, directed = TRUE, unconnected = FALSE)) } @@ -32,16 +33,19 @@ clustering.coeff = function(network) { return(cc) } -# Not sure if this is correct modularity = function(network) { comm = igraph::cluster_walktrap(network) mod = igraph::modularity(network, igraph::membership(comm)) return(mod) } +amount.nodes = function(network) { + return(igraph::vcount(network)) +} + # requires simplified network smallworldness = function(network) { - smallworldness <- determine.smallworldness(network) # smallworldness(nw.data$nw) # + smallworldness <- determine.smallworldness(network) return(smallworldness) } @@ -73,11 +77,6 @@ determine.smallworldness = function(g) { return (s.delta) } - -amount.nodes = function(network) { - return(igraph::vcount(network)) -} - power.law.fitting = function(network) { v.degree <- sort(igraph::degree(network, mode="all"), decreasing=TRUE) @@ -102,7 +101,6 @@ generate.hierarchy = function(network) { degrees.without.cc = subset(degrees, !(is.nan(cluster.coeff) | cluster.coeff == 0)) cluster.coeff = subset(cluster.coeff, !(is.nan(cluster.coeff) | cluster.coeff == 0)) - names.of.points = row.names(as.data.frame(degrees.without.cc)) + return(data.frame(x = log(degrees.without.cc), y = cluster.coeff)) } - From c1ad7dc17eea14078a60dc6426349403d3f8e1b3 Mon Sep 17 00:00:00 2001 From: Roger1995 Date: Wed, 6 Sep 2017 21:08:24 +0200 Subject: [PATCH 06/40] Indroduce first plot, fix error when loading issue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-Off-By: Raphael Nömmer --- metrics.R | 2 +- plot-metrics.R | 10 ++++++++++ util-conf.R | 2 +- util-read.R | 10 ++++------ 4 files changed, 16 insertions(+), 8 deletions(-) create mode 100644 plot-metrics.R diff --git a/metrics.R b/metrics.R index f2cf160b..8a4f20f6 100644 --- a/metrics.R +++ b/metrics.R @@ -101,6 +101,6 @@ generate.hierarchy = function(network) { degrees.without.cc = subset(degrees, !(is.nan(cluster.coeff) | cluster.coeff == 0)) cluster.coeff = subset(cluster.coeff, !(is.nan(cluster.coeff) | cluster.coeff == 0)) - return(data.frame(x = log(degrees.without.cc), y = cluster.coeff)) + return(data.frame(deg = log(degrees.without.cc), cc = cluster.coeff)) } diff --git a/plot-metrics.R b/plot-metrics.R new file mode 100644 index 00000000..86591bf9 --- /dev/null +++ b/plot-metrics.R @@ -0,0 +1,10 @@ + +requireNamespace("ggplot2") + + +plot.hierarchy = function(df) { + plot = ggplot2::ggplot(df, ggplot2::aes(y = cc, x = deg, color = deg)) + + ggplot2::geom_point() + + ggplot2::geom_smooth() + return(plot) +} diff --git a/util-conf.R b/util-conf.R index 8739f456..e1c86133 100644 --- a/util-conf.R +++ b/util-conf.R @@ -516,6 +516,7 @@ ProjectConf = R6::R6Class("ProjectConf", inherit = Conf, #' @return the path to the configuration folder get.configurations.folder = function(data, selection.process) { return(file.path(data, private$subfolder.configurations, selection.process)) + }, #' Construct and return the path to a Codeface configuration. @@ -589,7 +590,6 @@ ProjectConf = R6::R6Class("ProjectConf", inherit = Conf, logging::logerror("Artifact '%s' cannot be converted to a proper Codeface tagging! Stopping...", artifact) stop("Stopped due to wrong configuration parameters!") } - ## construct file name for configuration conf.file = private$construct.conf.path(data, selection.process, casestudy, tagging) diff --git a/util-read.R b/util-read.R index 72491987..cee5fceb 100644 --- a/util-read.R +++ b/util-read.R @@ -336,14 +336,11 @@ read.issues = function(data.path) { ## set proper column names colnames(issue.data) = c( "issue.id", "issue.state", "creation.date", "closing.date", "is.pull.request", # issue information - "author.id", "author.name", "author.email", # author information + "author.name", "author.mail", # author information "date", # the date - "event.name" # the event describing the row's entry + "ref.name", "event.name" # the event describing the row's entry ) - ## remove unneeded columns from data - issue.data["author.id"] = NULL - ## set pattern for issue ID for better recognition issue.data[["issue.id"]] = sprintf("", issue.data[["issue.id"]]) @@ -351,9 +348,10 @@ read.issues = function(data.path) { issue.data[["is.pull.request"]] = as.logical(issue.data[["is.pull.request"]]) ## convert dates and sort by 'date' column + print(issue.data) issue.data[["date"]] = as.POSIXct(issue.data[["date"]]) issue.data[["creation.date"]] = as.POSIXct(issue.data[["creation.date"]]) - issue.data[["closing.date"]][ issue.data[["closing.date"]] == "null" ] = NA + issue.data[["closing.date"]][ issue.data[["closing.date"]] == "" ] = NA issue.data[["closing.date"]] = as.POSIXct(issue.data[["closing.date"]]) issue.data = issue.data[order(issue.data[["date"]], decreasing = FALSE), ] # sort! From 545850aeae438a872838c7ecbd0ef13e0ca0f5ab Mon Sep 17 00:00:00 2001 From: Christian Hechtl Date: Mon, 11 Sep 2017 15:27:50 +0200 Subject: [PATCH 07/40] Introduce handling of incomplete ranges Add functionality to cut data sources to the same date ranges Add parameter in NetworkConf for that purpose Add cutting functionalities in the NetworkBuilder fixes #38 Signed-off-by: Christian Hechtl --- util-conf.R | 6 ++ util-data.R | 150 +++++++++++++++++++++++++++++++++++++++++++++++- util-networks.R | 56 ++++++++++++++++++ 3 files changed, 210 insertions(+), 2 deletions(-) diff --git a/util-conf.R b/util-conf.R index 6b625917..d5493a5c 100644 --- a/util-conf.R +++ b/util-conf.R @@ -385,6 +385,12 @@ NetworkConf = R6::R6Class("NetworkConf", inherit = Conf, type = "numeric", allowed = Inf, allowed.number = 1 + ), + unify.date.ranges = list( + default = FALSE, + type = "logical", + allowed = c(TRUE, FALSE), + allowed.number = 1 ) ) diff --git a/util-data.R b/util-data.R index 38a319c4..7b5bba31 100644 --- a/util-data.R +++ b/util-data.R @@ -13,6 +13,18 @@ requireNamespace("R6") # for R6 classes requireNamespace("logging") # for logging requireNamespace("parallel") # for parallel computation +## / / / / / / / / / / / / / / +## Constant +## + +## mapping of relation to data source +RELATION.TO.DATASOURCE = list( + "cochange" = "commits", + "callgraph" = "commits", + "mail" = "mails", + "issue" = "issues" +) + ## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / ## ProjectData ------------------------------------------------------------- @@ -40,8 +52,10 @@ ProjectData = R6::R6Class("ProjectData", mails = NULL, # data.frame ## authors authors = NULL, # list - ##issues + ## issues issues = NULL, #data.frame + ## timestamps of mail, issue and commit data + data.timestamps = NULL, #data.frame ## * * filtering commits ------------------------------------------- @@ -155,6 +169,45 @@ ProjectData = R6::R6Class("ProjectData", } } return(data) + }, + + #' Call the getters of the specified data sources in order to + #' initialize the sources and extract the timestamps. + #' + #' @param data.sources the data sources to be prepated + prepare.timestamps = function(data.sources) { + if("mails" %in% data.sources) { + self$get.mails() + } + if("commits" %in% data.sources) { + self$get.commits.raw() + } + if("issues" %in% data.sources) { + self$get.issues() + } + + }, + + #' Extract the earliest and the latest date from the specified data source + #' and store it to the timestamps data.frame. + #' + #' @param source the specified data source + extract.timestamps = function(source) { + if(is.null(private$data.timestamps)) { + private$data.timestamps = data.frame(row.names = c("start", "end")) + } + if(source == "mails") { + private$data.timestamps$mails = c(as.POSIXct(min(private$mails$date)), + as.POSIXct(max(private$mails$date))) + } else if(source == "commits") { + private$data.timestamps$commits = c(as.POSIXct(min(private$commits.raw$date)), + as.POSIXct(max(private$commits.raw$date))) + + } else if(source == "issues") { + private$data.timestamps$issues = c(as.POSIXct(min(private$issues$creation.date)), + as.POSIXct(max(private$issues$creation.date))) + + } } ), @@ -196,6 +249,7 @@ ProjectData = R6::R6Class("ProjectData", private$mails = NULL private$authors = NULL private$pasta = NULL + private$data.timestamps = NULL }, ## * * configuration ----------------------------------------------- @@ -298,6 +352,10 @@ ProjectData = R6::R6Class("ProjectData", return(private$commits.filtered.empty) }, + set.commits.filtered.empty = function(data) { + private$commits.filtered.empty = data + }, + #' Get the list of commits without the base artifact. #' If it doesn´t already exist call the filter method. #' @@ -313,6 +371,10 @@ ProjectData = R6::R6Class("ProjectData", return(private$commits.filtered) }, + set.commits.filtered = function(data) { + private$commits.filtered = data + }, + #' Get the complete list of commits. #' If it doesn´t already exist call the read method first. #' @@ -327,6 +389,7 @@ ProjectData = R6::R6Class("ProjectData", private$project.conf$get.value("artifact") ) } + private$extract.timestamps(source = "commits") return(private$commits.raw) }, @@ -407,6 +470,7 @@ ProjectData = R6::R6Class("ProjectData", private$mails = private$add.pasta.data(private$mails) } } + private$extract.timestamps(source = "mails") return(private$mails) }, @@ -487,6 +551,88 @@ ProjectData = R6::R6Class("ProjectData", return(private$artifacts) }, + set.artifacts = function(artifacts) { + logging::loginfo("Setting artifact data.") + private$artifacts = artifacts + }, + + ## get the list of issues + get.issues = function() { + logging::loginfo("Getting issue data") + + ## if issues have not been read yet do this + if(is.null(private$issues)) { + private$issues = read.issues(self$get.data.path.issues()) + } + private$extract.timestamps(source = "issues") + + return(private$issues) + }, + + #' Set the issue data to the given new data. + #' + #' @param issues the given new data + set.issues = function(issues) { + logging::loginfo("Setting issue data.") + private$issues = issues + }, + + #' Get the timestamps (earliest and latest date) of the specified data sources. + #' If 'simple' is TRUE return the overall latest start and earliest end date + #' in order to cut the specified data sources to the same date ranges. + #' + #' @param data.sources the specified data sources + #' @param simple whether or not the timestamps get simplified + #' + #' @return a data.frame with the timestamps + get.data.timestamps = function(data.sources = c("mails", "commits", "issues"), simple = FALSE) { + private$prepare.timestamps(data.sources = data.sources) + if(is.null(private$data.timestamps)) { + logging::logwarn("No timestamps available.") + return(data.frame()) + } else if(simple == FALSE) { + timestamps = subset(private$data.timestamps, select = data.sources) + return(timestamps) + } else { + subset.timestamps = private$data.timestamps[data.sources] + timestamps.buffer = data.frame(max = apply(subset.timestamps,1,max), + min = apply(subset.timestamps,1,min)) + timestamps = data.frame(start = timestamps.buffer["start", "max"], + end = timestamps.buffer["end", "min"]) + + return(timestamps) + } + + }, + + #' Cut the specified data sources to the same date range depending on the extracted + #' timestamps. + #' + #' @param data.sources the specified data sources + #' + #' @return a list of the cut data.sources + get.data.cut.to.same.date = function(data.sources = c("mails", "commits", "issues")) { + timestamps = self$get.data.timestamps(data.sources = data.sources , simple = TRUE) + result = list() + if("mails" %in% data.sources) { + mails.cut = self$get.mails()[which(private$mails$date >= timestamps$start),] + mails.cut = mails.cut[which(mails.cut$date <= timestamps$end),] + result[["mails"]] = mails.cut + } + if("commits" %in% data.sources) { + commits.cut = self$get.commits.raw()[which(private$commits.raw$date >= timestamps$start),] + commits.cut = commits.cut[which(commits.cut$date <= timestamps$end),] + result[["commits"]] = commits.cut + } + if("issues" %in% data.sources) { + issues.cut = self$get.issues()[which(private$issues$creation.date >= timestamps$start),] + issues.cut = issues.cut[which(issues.cut$creation.date <= timestamps$end),] + result[["issues"]] = issues.cut + } + + return(result) + }, + #' Get single pasta items. #' For a given 'message.id', the associated 'commit.hash' is returned. #' For a given 'commit.hash', the associated 'message.id' or IDs are returned. @@ -736,7 +882,7 @@ RangeData = R6::R6Class("RangeData", inherit = ProjectData, return(private$revision.callgraph) } - ) + ) ) diff --git a/util-networks.R b/util-networks.R index 47ef2f73..68fba05b 100644 --- a/util-networks.R +++ b/util-networks.R @@ -60,6 +60,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", ## * * data and configuration -------------------------------------- proj.data = NULL, + proj.data.original = NULL, network.conf = NULL, ## * * network caching --------------------------------------------- @@ -72,6 +73,50 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", artifacts.network.mail = NULL, # igraph artifacts.network.issue = NULL, # igraph + ## * * data cutting --------------------------------------------- + + #' Clone the current data object and replace the specified + #' data sources by the cut ones + #' + #' @param cut.data the cut data sources + #' + #' @return the clone + clone.data = function(cut.data) { + clone = private$proj.data$clone() + if("mails" %in% names(cut.data)) { + clone$set.mails(cut.data$mails) + } + if("commits" %in% names(cut.data)) { + clone$set.commits.raw(cut.data$commits) + clone$set.commits.filtered(NULL) + clone$set.commits.filtered.empty(NULL) + } + if("issues" %in% names(cut.data)) { + clone$set.issues(cut.data$issues) + } + return(clone) + }, + + #' Cut the data sources of the data object to the same date ranges. + cut.data.to.same.timestamps = function() { + cut.data = private$proj.data$get.data.cut.to.same.date(data.sources = private$get.data.sources()) + clone = private$clone.data(cut.data = cut.data) + private$proj.data.original = private$proj.data + private$proj.data = clone + }, + + #' Determine which data sources should be cut depending on the artifact and author relation. + #' + #' @return the data sources to be cut + get.data.sources = function() { + author.relation = private$network.conf$get.variable("author.relation") + artifact.relation = private$network.conf$get.variable("artifact.relation") + data.sources = c(RELATION.TO.DATASOURCE[[author.relation]]) + data.sources = c(data.sources, RELATION.TO.DATASOURCE[[artifact.relation]]) + data.sources = unique(data.sources) + return(data.sources) + }, + ## * * author networks --------------------------------------------- #' Get the co-change-based author relation as network. @@ -379,6 +424,10 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", if (class(self)[1] == "ProjectData") logging::loginfo("Initialized data object %s", self$get.class.name()) + + if(private$network.conf$get.variable("unify.date.ranges")) { + private$cut.data.to.same.timestamps() + } }, ## * * resetting environment --------------------------------------- @@ -390,6 +439,13 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", private$authors.network.cochange = NULL private$artifacts.network.cochange = NULL private$artifacts.network.callgraph = NULL + if(!is.null(private$proj.data.original)) { + private$proj.data = private$proj.data.original + private$proj.data.original = NULL + if(private$network.conf$get.variable("unify.date.ranges")) { + private$cut.data.to.same.timestamps() + } + } }, ## * * configuration ----------------------------------------------- From 5921070b874ba1aada50d429481baea37bef5cbe Mon Sep 17 00:00:00 2001 From: Christian Hechtl Date: Mon, 11 Sep 2017 15:52:57 +0200 Subject: [PATCH 08/40] Adjust getter of NetworkConf to new type Signed-off-by: Christian Hechtl --- util-networks.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/util-networks.R b/util-networks.R index 68fba05b..dddfc049 100644 --- a/util-networks.R +++ b/util-networks.R @@ -109,8 +109,8 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", #' #' @return the data sources to be cut get.data.sources = function() { - author.relation = private$network.conf$get.variable("author.relation") - artifact.relation = private$network.conf$get.variable("artifact.relation") + author.relation = private$network.conf$get.value("author.relation") + artifact.relation = private$network.conf$get.value("artifact.relation") data.sources = c(RELATION.TO.DATASOURCE[[author.relation]]) data.sources = c(data.sources, RELATION.TO.DATASOURCE[[artifact.relation]]) data.sources = unique(data.sources) @@ -425,7 +425,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", if (class(self)[1] == "ProjectData") logging::loginfo("Initialized data object %s", self$get.class.name()) - if(private$network.conf$get.variable("unify.date.ranges")) { + if(private$network.conf$get.value("unify.date.ranges")) { private$cut.data.to.same.timestamps() } }, @@ -442,7 +442,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", if(!is.null(private$proj.data.original)) { private$proj.data = private$proj.data.original private$proj.data.original = NULL - if(private$network.conf$get.variable("unify.date.ranges")) { + if(private$network.conf$get.value("unify.date.ranges")) { private$cut.data.to.same.timestamps() } } From 7187e1f015e2434ebf5376775721513b23177e4a Mon Sep 17 00:00:00 2001 From: Roger1995 Date: Wed, 13 Sep 2017 22:49:55 +0200 Subject: [PATCH 09/40] Add plot functions for metrics and change encoding MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-Off-By: Raphael Nömmer --- metrics.R | 64 +++++++++++++++++++++++--------------------------- plot-metrics.R | 61 ++++++++++++++++++++++++++++++++++++++++++++++- util-conf.R | 2 +- util-read.R | 8 +++---- 4 files changed, 95 insertions(+), 40 deletions(-) diff --git a/metrics.R b/metrics.R index 8a4f20f6..a0f3aefb 100644 --- a/metrics.R +++ b/metrics.R @@ -1,66 +1,61 @@ - requireNamespace("igraph") -hub.indegree = function(network){ +metrics.hub.indegree = function(network, project){ degrees = igraph::degree(network, mode = c("in")) vertex = which.max(degrees) - node = igraph::V(network)[vertex] - return(node) + df = data.frame("name" = names(vertex), "degree" = unname(vertex), "project" = project) + return(df) } -avg.outdegree = function(network) { +metrics.avg.outdegree = function(network, project) { outdegrees = igraph::degree(network, mode = c("out")) avg = mean(outdegrees) - return(avg) + df = data.frame("project" = project, "avg.degree" = avg) + return(df) } -node.degrees = function(network) { - return(igraph::degree(network, mode="total")) +metrics.node.degrees = function(network) { + degrees = igraph::degree(network, mode="total") + return(data.frame("name" = names(degrees), "degree" = unname(degrees))) } -density = function(network) { +metrics.density = function(network, project) { density = igraph::graph.density(network) - return(density) + return(data.frame("project" = project, "density" = unname(density))) } -avg.pathlength = function(network) { - return(igraph::average.path.length(network, directed = TRUE, unconnected = FALSE)) +metrics.avg.pathlength = function(network, project) { + return(data.frame("project" = project, "avg.pathlength" = igraph::average.path.length(network, directed = TRUE, unconnected = FALSE))) } -clustering.coeff = function(network) { +metrics.clustering.coeff = function(network, project) { local.cc = igraph::transitivity(network, type = "local", vids = NULL) cc = mean(local.cc, na.rm = TRUE) - return(cc) + return(data.frame("project" = project, "clustering.coeff" = cc)) } -modularity = function(network) { +metrics.modularity = function(network, project) { comm = igraph::cluster_walktrap(network) mod = igraph::modularity(network, igraph::membership(comm)) - return(mod) + return(data.frame("project" = project, "modularity" = mod)) } -amount.nodes = function(network) { - return(igraph::vcount(network)) +metrics.amount.nodes = function(network, project) { + return(data.frame("project" = project, "amount.nodes" = igraph::vcount(network))) } # requires simplified network -smallworldness = function(network) { - smallworldness <- determine.smallworldness(network) - return(smallworldness) -} - - -determine.smallworldness = function(g) { +metrics.smallworldness = function(network, project) { # construct Erdös-Renyi network with same number of nodes and edges as g - h = igraph::erdos.renyi.game(n=igraph::vcount(g), p.or.m=igraph::gsize(g), type="gnm", directed=TRUE) + h = igraph::erdos.renyi.game(n=igraph::vcount(network), p.or.m=igraph::gsize(network), type="gnm", directed=TRUE) ## compute clustering coefficients - g.cc = igraph::transitivity(g) + g.cc = igraph::transitivity(network) h.cc = igraph::transitivity(h) ## compute average shortest-path length - g.l = igraph::average.path.length(g) + g.l = igraph::average.path.length(network) h.l = igraph::average.path.length(h) ## binary decision @@ -74,10 +69,10 @@ determine.smallworldness = function(g) { # if s.delta > 1, then the network is a small-world network #is.smallworld = ifelse(s.delta > 1, TRUE, FALSE) - return (s.delta) + return (data.frame("project" = project, "smallworldness" = s.delta)) } -power.law.fitting = function(network) { +metrics.power.law.fitting = function(network) { v.degree <- sort(igraph::degree(network, mode="all"), decreasing=TRUE) ## Power-law fiting @@ -90,17 +85,18 @@ power.law.fitting = function(network) { ## Check percent of vertices under power-law res$num.power.law = length(which(v.degree >= res$xmin)) res$percent.power.law = 100 * (res$num.power.law / length(v.degree)) - - return(cbind(res$alpha,res$xmin,res$KS.p,res$num.power.law,res$percent.power.law)) + df = data.frame(res$alpha,res$xmin,res$KS.p,res$num.power.law,res$percent.power.law) + browser() + return(data.frame("power.law" = names(df), "value" = c(res$alpha,res$xmin,res$KS.p,res$num.power.law,res$percent.power.law))) } -generate.hierarchy = function(network) { +metrics.hierarchy = function(network) { degrees = igraph::degree(network, mode="total") cluster.coeff = igraph::transitivity(network, type = "local", vids = NULL) degrees.without.cc = subset(degrees, !(is.nan(cluster.coeff) | cluster.coeff == 0)) cluster.coeff = subset(cluster.coeff, !(is.nan(cluster.coeff) | cluster.coeff == 0)) - return(data.frame(deg = log(degrees.without.cc), cc = cluster.coeff)) + return(data.frame(deg = log(degrees.without.cc), cc = log(cluster.coeff))) } diff --git a/plot-metrics.R b/plot-metrics.R index 86591bf9..31e65b80 100644 --- a/plot-metrics.R +++ b/plot-metrics.R @@ -1,10 +1,69 @@ requireNamespace("ggplot2") +#plots the maximum indegree one or more projects as a bar diagram +metrics.plot.hub.indegree = function(df) { + plot = ggplot2::ggplot(df, ggplot2::aes(y = degree, x = project)) + + ggplot2::geom_bar(stat="identity") + return(plot) +} + +metrics.plot.avg.outdegree = function(df) { + plot = ggplot2::ggplot(df, ggplot2::aes(y = avg.degree, x = project)) + + ggplot2::geom_bar(stat="identity") + return(plot) +} + +metrics.plot.node.degrees = function(df) { + plot = ggplot2::ggplot(df, ggplot2::aes(y = degree, x = name)) + + ggplot2::geom_bar(stat="identity") + return(plot) +} + +metrics.plot.density = function(df) { + plot = ggplot2::ggplot(df, ggplot2::aes(y = density, x = project)) + + ggplot2::geom_bar(stat="identity") + return(plot) +} + +metrics.plot.avg.pathlength = function(df) { + plot = ggplot2::ggplot(df, ggplot2::aes(y = avg.papthlength, x = project)) + + ggplot2::geom_bar(stat="identity") + return(plot) +} + +metrics.plot.clustering.coeff = function(df) { + plot = ggplot2::ggplot(df, ggplot2::aes(y = clustering.coeff, x = project)) + + ggplot2::geom_bar(stat="identity") + return(plot) +} + +metrics.plot.modularity = function(df) { + plot = ggplot2::ggplot(df, ggplot2::aes(y = modularity, x = project)) + + ggplot2::geom_bar(stat="identity") + return(plot) +} + +metrics.plot.amount.nodes = function(df) { + plot = ggplot2::ggplot(df, ggplot2::aes(y = amount.nodes, x = project)) + + ggplot2::geom_bar(stat="identity") + return(plot) +} -plot.hierarchy = function(df) { +metrics.plot.smallworldness = function(df) { + plot = ggplot2::ggplot(df, ggplot2::aes(y = smallworldness, x = project)) + + ggplot2::geom_bar(stat="identity") + return(plot) +} + +metrics.plot.power.law.fitting = function(df) { + +} + +metrics.plot.hierarchy = function(df) { plot = ggplot2::ggplot(df, ggplot2::aes(y = cc, x = deg, color = deg)) + ggplot2::geom_point() + ggplot2::geom_smooth() return(plot) } + diff --git a/util-conf.R b/util-conf.R index e1c86133..4e4c67bb 100644 --- a/util-conf.R +++ b/util-conf.R @@ -621,7 +621,7 @@ ProjectConf = R6::R6Class("ProjectConf", inherit = Conf, ## read revisions file revisions.file = file.path(conf$datapath, "revisions.list") revisions.df <- try(read.table(revisions.file, header = FALSE, sep = ";", strip.white = TRUE, - fileEncoding = "latin1", encoding = "utf8"), silent = TRUE) + encoding = "UTF-8"), silent = TRUE) ## break if the list of revisions is empty or any other error occurs if (inherits(revisions.df, 'try-error')) { logging::logerror("There are no revisions available for the current casestudy.") diff --git a/util-read.R b/util-read.R index cee5fceb..df9b76a5 100644 --- a/util-read.R +++ b/util-read.R @@ -39,7 +39,7 @@ read.commits.raw = function(data.path, artifact) { ## read data.frame from disk (as expected from save.list.to.file) [can be empty] commit.data <- try(read.table(file, header = FALSE, sep = ";", strip.white = TRUE, - fileEncoding = "latin1", encoding = "utf8"), silent = TRUE) + encoding = "UTF-8"), silent = TRUE) ## handle the case that the list of commits is empty if (inherits(commit.data, 'try-error')) { @@ -164,7 +164,7 @@ read.mails = function(data.path) { ## read data.frame from disk (as expected from save.list.to.file) [can be empty] mail.data <- try(read.table(file, header = FALSE, sep = ";", strip.white = TRUE, - fileEncoding = "latin1", encoding = "utf8"), silent = TRUE) + encoding = "UTF-8"), silent = TRUE) ## handle the case that the list of mails is empty if (inherits(mail.data, 'try-error')) { @@ -228,7 +228,7 @@ read.authors = function(data.path) { ## read data.frame from disk (as expected from save.list.to.file) [can be empty] authors.df <- try(read.table(file, header = FALSE, sep = ";", strip.white = TRUE, - fileEncoding = "latin1", encoding = "utf8"), silent = TRUE) + encoding = "UTF-8"), silent = TRUE) ## break if the list of authors is empty if (inherits(authors.df, 'try-error')) { @@ -324,7 +324,7 @@ read.issues = function(data.path) { ## read issues from disk [can be empty] issue.data = try(read.table(filepath, header = FALSE, sep = ";", strip.white = TRUE, - fileEncoding = "latin1", encoding = "utf8"), silent = TRUE) + encoding = "UTF-8"), silent = TRUE) ## handle the case that the list of commits is empty if (inherits(issue.data, 'try-error')) { From 3e616d8610f5f2b5c0084373dfda9230bdcfaacb Mon Sep 17 00:00:00 2001 From: Roger1995 Date: Thu, 21 Sep 2017 00:31:34 +0200 Subject: [PATCH 10/40] Visual changes to hierarchy plot MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-Off-By: Raphael Nömmer --- metrics.R | 9 ++++----- plot-metrics.R | 7 ++++--- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/metrics.R b/metrics.R index a0f3aefb..66cf69c5 100644 --- a/metrics.R +++ b/metrics.R @@ -15,7 +15,7 @@ metrics.avg.outdegree = function(network, project) { } metrics.node.degrees = function(network) { - degrees = igraph::degree(network, mode="total") + degrees = sort(igraph::degree(network, mode="total"), decreasing = TRUE) return(data.frame("name" = names(degrees), "degree" = unname(degrees))) } @@ -25,7 +25,7 @@ metrics.density = function(network, project) { } metrics.avg.pathlength = function(network, project) { - return(data.frame("project" = project, "avg.pathlength" = igraph::average.path.length(network, directed = TRUE, unconnected = FALSE))) + return(data.frame("project" = project, "avg.pathlength" = igraph::average.path.length(network, directed = TRUE, unconnected = TRUE))) } metrics.clustering.coeff = function(network, project) { @@ -55,8 +55,8 @@ metrics.smallworldness = function(network, project) { h.cc = igraph::transitivity(h) ## compute average shortest-path length - g.l = igraph::average.path.length(network) - h.l = igraph::average.path.length(h) + g.l = igraph::average.path.length(network, unconnected = TRUE) + h.l = igraph::average.path.length(h, unconnected = TRUE) ## binary decision # intermediate variables @@ -86,7 +86,6 @@ metrics.power.law.fitting = function(network) { res$num.power.law = length(which(v.degree >= res$xmin)) res$percent.power.law = 100 * (res$num.power.law / length(v.degree)) df = data.frame(res$alpha,res$xmin,res$KS.p,res$num.power.law,res$percent.power.law) - browser() return(data.frame("power.law" = names(df), "value" = c(res$alpha,res$xmin,res$KS.p,res$num.power.law,res$percent.power.law))) } diff --git a/plot-metrics.R b/plot-metrics.R index 31e65b80..2779903d 100644 --- a/plot-metrics.R +++ b/plot-metrics.R @@ -61,9 +61,10 @@ metrics.plot.power.law.fitting = function(df) { } metrics.plot.hierarchy = function(df) { - plot = ggplot2::ggplot(df, ggplot2::aes(y = cc, x = deg, color = deg)) + - ggplot2::geom_point() + - ggplot2::geom_smooth() + pred = predict(lm(cc ~ deg, data = df)) + plot = ggplot2::ggplot(df, ggplot2::aes(y = cc, x = deg)) + + ggplot2::geom_point(ggplot2::aes(color = deg)) + + ggplot2::geom_line(ggplot2::aes(y = pred)) return(plot) } From 90cdc092dd11db9f4ee57fd1d1e36a4b354d5edf Mon Sep 17 00:00:00 2001 From: Christian Hechtl Date: Thu, 21 Sep 2017 13:51:46 +0200 Subject: [PATCH 11/40] Rebuild cutting mechanism for incomplete Ranges The cutting is replaced by the already existing splitting mechanism Signed-off-by: Christian Hechtl --- util-data.R | 79 +++++++++---------------------------------------- util-networks.R | 40 +++++-------------------- 2 files changed, 21 insertions(+), 98 deletions(-) diff --git a/util-data.R b/util-data.R index 7b5bba31..220602d1 100644 --- a/util-data.R +++ b/util-data.R @@ -197,15 +197,15 @@ ProjectData = R6::R6Class("ProjectData", private$data.timestamps = data.frame(row.names = c("start", "end")) } if(source == "mails") { - private$data.timestamps$mails = c(as.POSIXct(min(private$mails$date)), - as.POSIXct(max(private$mails$date))) + private$data.timestamps$mails = c(min(private$mails$date), + max(private$mails$date)) } else if(source == "commits") { - private$data.timestamps$commits = c(as.POSIXct(min(private$commits.raw$date)), - as.POSIXct(max(private$commits.raw$date))) + private$data.timestamps$commits = c(min(private$commits.raw$date), + max(private$commits.raw$date)) } else if(source == "issues") { - private$data.timestamps$issues = c(as.POSIXct(min(private$issues$creation.date)), - as.POSIXct(max(private$issues$creation.date))) + private$data.timestamps$issues = c(min(private$issues$date), + max(private$issues$date)) } } @@ -352,10 +352,6 @@ ProjectData = R6::R6Class("ProjectData", return(private$commits.filtered.empty) }, - set.commits.filtered.empty = function(data) { - private$commits.filtered.empty = data - }, - #' Get the list of commits without the base artifact. #' If it doesn´t already exist call the filter method. #' @@ -371,10 +367,6 @@ ProjectData = R6::R6Class("ProjectData", return(private$commits.filtered) }, - set.commits.filtered = function(data) { - private$commits.filtered = data - }, - #' Get the complete list of commits. #' If it doesn´t already exist call the read method first. #' @@ -551,32 +543,6 @@ ProjectData = R6::R6Class("ProjectData", return(private$artifacts) }, - set.artifacts = function(artifacts) { - logging::loginfo("Setting artifact data.") - private$artifacts = artifacts - }, - - ## get the list of issues - get.issues = function() { - logging::loginfo("Getting issue data") - - ## if issues have not been read yet do this - if(is.null(private$issues)) { - private$issues = read.issues(self$get.data.path.issues()) - } - private$extract.timestamps(source = "issues") - - return(private$issues) - }, - - #' Set the issue data to the given new data. - #' - #' @param issues the given new data - set.issues = function(issues) { - logging::loginfo("Setting issue data.") - private$issues = issues - }, - #' Get the timestamps (earliest and latest date) of the specified data sources. #' If 'simple' is TRUE return the overall latest start and earliest end date #' in order to cut the specified data sources to the same date ranges. @@ -586,17 +552,15 @@ ProjectData = R6::R6Class("ProjectData", #' #' @return a data.frame with the timestamps get.data.timestamps = function(data.sources = c("mails", "commits", "issues"), simple = FALSE) { + data.sources = match.arg(arg = data.sources, several.ok = TRUE, choices = c("mails", "commits", "issues")) private$prepare.timestamps(data.sources = data.sources) - if(is.null(private$data.timestamps)) { - logging::logwarn("No timestamps available.") - return(data.frame()) - } else if(simple == FALSE) { + if(simple == FALSE) { timestamps = subset(private$data.timestamps, select = data.sources) return(timestamps) } else { subset.timestamps = private$data.timestamps[data.sources] - timestamps.buffer = data.frame(max = apply(subset.timestamps,1,max), - min = apply(subset.timestamps,1,min)) + timestamps.buffer = data.frame(max = apply(subset.timestamps, 1, max), + min = apply(subset.timestamps, 1, min)) timestamps = data.frame(start = timestamps.buffer["start", "max"], end = timestamps.buffer["end", "min"]) @@ -612,25 +576,11 @@ ProjectData = R6::R6Class("ProjectData", #' #' @return a list of the cut data.sources get.data.cut.to.same.date = function(data.sources = c("mails", "commits", "issues")) { + data.sources = match.arg(arg = data.sources, several.ok = TRUE, choices = c("mails", "commits", "issues")) timestamps = self$get.data.timestamps(data.sources = data.sources , simple = TRUE) - result = list() - if("mails" %in% data.sources) { - mails.cut = self$get.mails()[which(private$mails$date >= timestamps$start),] - mails.cut = mails.cut[which(mails.cut$date <= timestamps$end),] - result[["mails"]] = mails.cut - } - if("commits" %in% data.sources) { - commits.cut = self$get.commits.raw()[which(private$commits.raw$date >= timestamps$start),] - commits.cut = commits.cut[which(commits.cut$date <= timestamps$end),] - result[["commits"]] = commits.cut - } - if("issues" %in% data.sources) { - issues.cut = self$get.issues()[which(private$issues$creation.date >= timestamps$start),] - issues.cut = issues.cut[which(issues.cut$creation.date <= timestamps$end),] - result[["issues"]] = issues.cut - } - - return(result) + timestamps.vector = c(timestamps$start, timestamps$end) + result = split.data.time.based(self, bins = timestamps.vector) + return(result[[1]]) }, #' Get single pasta items. @@ -790,7 +740,6 @@ ProjectData = R6::R6Class("ProjectData", return(mylist) } - ) ) diff --git a/util-networks.R b/util-networks.R index dddfc049..a31fb3b4 100644 --- a/util-networks.R +++ b/util-networks.R @@ -75,34 +75,11 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", ## * * data cutting --------------------------------------------- - #' Clone the current data object and replace the specified - #' data sources by the cut ones - #' - #' @param cut.data the cut data sources - #' - #' @return the clone - clone.data = function(cut.data) { - clone = private$proj.data$clone() - if("mails" %in% names(cut.data)) { - clone$set.mails(cut.data$mails) - } - if("commits" %in% names(cut.data)) { - clone$set.commits.raw(cut.data$commits) - clone$set.commits.filtered(NULL) - clone$set.commits.filtered.empty(NULL) - } - if("issues" %in% names(cut.data)) { - clone$set.issues(cut.data$issues) - } - return(clone) - }, #' Cut the data sources of the data object to the same date ranges. cut.data.to.same.timestamps = function() { cut.data = private$proj.data$get.data.cut.to.same.date(data.sources = private$get.data.sources()) - clone = private$clone.data(cut.data = cut.data) - private$proj.data.original = private$proj.data - private$proj.data = clone + private$proj.data = cut.data }, #' Determine which data sources should be cut depending on the artifact and author relation. @@ -111,9 +88,8 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", get.data.sources = function() { author.relation = private$network.conf$get.value("author.relation") artifact.relation = private$network.conf$get.value("artifact.relation") - data.sources = c(RELATION.TO.DATASOURCE[[author.relation]]) - data.sources = c(data.sources, RELATION.TO.DATASOURCE[[artifact.relation]]) - data.sources = unique(data.sources) + data.sources = unique(c(RELATION.TO.DATASOURCE[[author.relation]], + RELATION.TO.DATASOURCE[[artifact.relation]])) return(data.sources) }, @@ -417,6 +393,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", #' @param network.conf the network configuration initialize = function(project.data, network.conf) { private$proj.data = project.data + private$proj.data.original = project.data if(!missing(network.conf) && "NetworkConf" %in% class(network.conf)) { private$network.conf = network.conf @@ -439,12 +416,9 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", private$authors.network.cochange = NULL private$artifacts.network.cochange = NULL private$artifacts.network.callgraph = NULL - if(!is.null(private$proj.data.original)) { - private$proj.data = private$proj.data.original - private$proj.data.original = NULL - if(private$network.conf$get.value("unify.date.ranges")) { - private$cut.data.to.same.timestamps() - } + private$proj.data = private$proj.data.original + if(private$network.conf$get.value("unify.date.ranges")) { + private$cut.data.to.same.timestamps() } }, From 4be6f2fc9eccb477ea8973f48d19e5aaea47fce3 Mon Sep 17 00:00:00 2001 From: Christian Hechtl Date: Wed, 4 Oct 2017 13:01:21 +0200 Subject: [PATCH 12/40] Fix minor bugs in cutting mechanism The timestamps are now extracted when the issue getter is called A warning message is printed when the data sources don't overlap Add project data getter in the NetworkBuilder for testing reasons Signed-off-by: Christian Hechtl --- util-data.R | 8 +++++++- util-networks.R | 4 ++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/util-data.R b/util-data.R index 220602d1..5085cc00 100644 --- a/util-data.R +++ b/util-data.R @@ -510,6 +510,9 @@ ProjectData = R6::R6Class("ProjectData", if(is.null(private$issues)) { private$issues = read.issues(self$get.data.path.issues()) } + + private$extract.timestamps(source = "issues") + return(private$issues) }, @@ -579,6 +582,9 @@ ProjectData = R6::R6Class("ProjectData", data.sources = match.arg(arg = data.sources, several.ok = TRUE, choices = c("mails", "commits", "issues")) timestamps = self$get.data.timestamps(data.sources = data.sources , simple = TRUE) timestamps.vector = c(timestamps$start, timestamps$end) + if(timestamps$start > timestamps$end) { + logging::logwarn("The datasources don't overlap. The result will be empty.") + } result = split.data.time.based(self, bins = timestamps.vector) return(result[[1]]) }, @@ -831,7 +837,7 @@ RangeData = R6::R6Class("RangeData", inherit = ProjectData, return(private$revision.callgraph) } - ) + ) ) diff --git a/util-networks.R b/util-networks.R index a31fb3b4..57a31b1e 100644 --- a/util-networks.R +++ b/util-networks.R @@ -456,6 +456,10 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", private$network.conf$update.value(entry, value) }, + get.project.data = function() { + return(private$proj.data) + }, + #' Update the network configuration based on the given list #' of values and reset the environment afterwards #' From 9373d0172a29349e587f6cfec850ba59f6a792b6 Mon Sep 17 00:00:00 2001 From: Christian Hechtl Date: Wed, 4 Oct 2017 13:02:11 +0200 Subject: [PATCH 13/40] Add tests for the cutting mechanism on data and network side Signed-off-by: Christian Hechtl --- tests/test-data-cut.R | 51 +++++++++++++++++++++++++++++++++++++ tests/test-networks-cut.R | 53 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 104 insertions(+) create mode 100644 tests/test-data-cut.R create mode 100644 tests/test-networks-cut.R diff --git a/tests/test-data-cut.R b/tests/test-data-cut.R new file mode 100644 index 00000000..07182c0c --- /dev/null +++ b/tests/test-data-cut.R @@ -0,0 +1,51 @@ +## (c) Christian Hechtl, 2017 +## hechtl@fim.uni-passau.de + + +context("Cutting functionality on ProjectData side.") + +## +## Context +## + +CF.DATA = file.path(".", "codeface-data") +CF.SELECTION.PROCESS = "testing" +CASESTUDY = "test" +ARTIFACT = "feature" + +## use only when debugging this file independently +if (!dir.exists(CF.DATA)) CF.DATA = file.path(".", "tests", "codeface-data") + +test_that("Cut commit and mail data to same date range.", { + + ## configurations + + proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, ARTIFACT) + data.sources = c("mails", "commits") + + ## construct objects + + x.data = ProjectData$new(proj.conf) + + commit.data.expected = data.frame(commit.id=sprintf("", c(32712,32712,32713,32713)), + date=as.POSIXct(c("2016-07-12 15:58:59","2016-07-12 15:58:59","2016-07-12 16:00:45", + "2016-07-12 16:00:45")), + author.name=c("Claus Hunsen","Claus Hunsen","Olaf","Olaf"), + author.email=c("hunsen@fim.uni-passau.de","hunsen@fim.uni-passau.de","olaf@example.org", + "olaf@example.org"), + hash=c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0","72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", + "5a5ec9675e98187e1e92561e1888aa6f04faa338","5a5ec9675e98187e1e92561e1888aa6f04faa338"), + changed.files=as.integer(c(1,1,1,1)), + added.lines=as.integer(c(1,1,1,1)), + deleted.lines=as.integer(c(1,1,0,0)), + diff.size=as.integer(c(2,2,1,1)), + file=c("test.c","test.c","test.c","test.c"), + artifact=c("A","defined(A)","A","defined(A)"), + artifact.type=c("Feature","FeatureExpression","Feature","FeatureExpression"), + artifact.diff.size=as.integer(c(1,1,1,1))) + + commit.data = x.data$get.data.cut.to.same.date(data.sources = data.sources)$get.commits.raw() + + expect_identical(commit.data, commit.data.expected, info = "Cut Raw commit data.") + +}) diff --git a/tests/test-networks-cut.R b/tests/test-networks-cut.R new file mode 100644 index 00000000..9f7007c2 --- /dev/null +++ b/tests/test-networks-cut.R @@ -0,0 +1,53 @@ +## (c) Christian Hechtl, 2017 +## hechtl@fim.uni-passau.de + + +context("Cutting functionality on NetworkBuilder side.") + +## +## Context +## + +CF.DATA = file.path(".", "codeface-data") +CF.SELECTION.PROCESS = "testing" +CASESTUDY = "test" +ARTIFACT = "feature" + +## use only when debugging this file independently +if (!dir.exists(CF.DATA)) CF.DATA = file.path(".", "tests", "codeface-data") + +test_that("Cut commit and mail data to same date range.", { + + ## configurations + + proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, ARTIFACT) + net.conf = NetworkConf$new() + net.conf$update.value(entry = "unify.date.ranges", value = TRUE) + + ## construct objects + + x.data = ProjectData$new(proj.conf) + x = NetworkBuilder$new(x.data, net.conf) + + commit.data.expected = data.frame(commit.id=sprintf("", c(32712,32712,32713,32713)), + date=as.POSIXct(c("2016-07-12 15:58:59","2016-07-12 15:58:59","2016-07-12 16:00:45", + "2016-07-12 16:00:45")), + author.name=c("Claus Hunsen","Claus Hunsen","Olaf","Olaf"), + author.email=c("hunsen@fim.uni-passau.de","hunsen@fim.uni-passau.de","olaf@example.org", + "olaf@example.org"), + hash=c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0","72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", + "5a5ec9675e98187e1e92561e1888aa6f04faa338","5a5ec9675e98187e1e92561e1888aa6f04faa338"), + changed.files=as.integer(c(1,1,1,1)), + added.lines=as.integer(c(1,1,1,1)), + deleted.lines=as.integer(c(1,1,0,0)), + diff.size=as.integer(c(2,2,1,1)), + file=c("test.c","test.c","test.c","test.c"), + artifact=c("A","defined(A)","A","defined(A)"), + artifact.type=c("Feature","FeatureExpression","Feature","FeatureExpression"), + artifact.diff.size=as.integer(c(1,1,1,1))) + + commit.data = x$get.project.data()$get.commits.raw() + + expect_identical(commit.data, commit.data.expected, info = "Cut Raw commit data.") + +}) From 1ec406c7dce7ad36648daf1ec36886dfc90d9ad1 Mon Sep 17 00:00:00 2001 From: Christian Hechtl Date: Thu, 5 Oct 2017 21:58:01 +0200 Subject: [PATCH 14/40] Add documentation to new getter Add checking of mail data in the cutting tests. Signed-off-by: Christian Hechtl --- tests/test-data-cut.R | 13 +++++++++++++ tests/test-networks-cut.R | 13 +++++++++++++ util-data.R | 1 - util-networks.R | 4 ++++ 4 files changed, 30 insertions(+), 1 deletion(-) diff --git a/tests/test-data-cut.R b/tests/test-data-cut.R index 07182c0c..0cf8420c 100644 --- a/tests/test-data-cut.R +++ b/tests/test-data-cut.R @@ -44,8 +44,21 @@ test_that("Cut commit and mail data to same date range.", { artifact.type=c("Feature","FeatureExpression","Feature","FeatureExpression"), artifact.diff.size=as.integer(c(1,1,1,1))) + mail.data.expected = data.frame(author.name=c("Thomas"), + author.email=c("thomas@example.org"), + message.id=c("<65a1sf31sagd684dfv31@mail.gmail.com>"), + date=as.POSIXct(c("2016-07-12 16:04:40")), + date.offset=as.integer(c(100)), + subject=c("Re: Fw: busybox 2 tab"), + thread=sprintf("", c(9))) + commit.data = x.data$get.data.cut.to.same.date(data.sources = data.sources)$get.commits.raw() + rownames(commit.data) = 1:nrow(commit.data) + + mail.data = x.data$get.data.cut.to.same.date(data.sources = data.sources)$get.mails() + rownames(mail.data) = 1:nrow(mail.data) expect_identical(commit.data, commit.data.expected, info = "Cut Raw commit data.") + expect_identical(mail.data, mail.data.expected, info = "Cut mail data.") }) diff --git a/tests/test-networks-cut.R b/tests/test-networks-cut.R index 9f7007c2..c7c5e4e3 100644 --- a/tests/test-networks-cut.R +++ b/tests/test-networks-cut.R @@ -46,8 +46,21 @@ test_that("Cut commit and mail data to same date range.", { artifact.type=c("Feature","FeatureExpression","Feature","FeatureExpression"), artifact.diff.size=as.integer(c(1,1,1,1))) + mail.data.expected = data.frame(author.name=c("Thomas"), + author.email=c("thomas@example.org"), + message.id=c("<65a1sf31sagd684dfv31@mail.gmail.com>"), + date=as.POSIXct(c("2016-07-12 16:04:40")), + date.offset=as.integer(c(100)), + subject=c("Re: Fw: busybox 2 tab"), + thread=sprintf("", c(9))) + commit.data = x$get.project.data()$get.commits.raw() + rownames(commit.data) = 1:nrow(commit.data) + + mail.data = x$get.project.data()$get.mails() + rownames(mail.data) = 1:nrow(mail.data) expect_identical(commit.data, commit.data.expected, info = "Cut Raw commit data.") + expect_identical(mail.data, mail.data.expected, info = "Cut mail data.") }) diff --git a/util-data.R b/util-data.R index 5085cc00..d65247c3 100644 --- a/util-data.R +++ b/util-data.R @@ -510,7 +510,6 @@ ProjectData = R6::R6Class("ProjectData", if(is.null(private$issues)) { private$issues = read.issues(self$get.data.path.issues()) } - private$extract.timestamps(source = "issues") return(private$issues) diff --git a/util-networks.R b/util-networks.R index 57a31b1e..88b886bc 100644 --- a/util-networks.R +++ b/util-networks.R @@ -456,6 +456,10 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", private$network.conf$update.value(entry, value) }, + #' Get the project data Object of the NetworkBuilder. + #' This Method is mainly used for testing purposes at the moment. + #' + #' @return the project data object of the NetworkBuilder get.project.data = function() { return(private$proj.data) }, From 4c2bd70cce97a29bdb7f7d645c2ceda8337d0a3e Mon Sep 17 00:00:00 2001 From: Roger1995 Date: Tue, 10 Oct 2017 16:18:20 +0200 Subject: [PATCH 15/40] Minor changes to metrics behaviour --- metrics.R | 31 ++++++++++++---------- plot-metrics.R | 70 ------------------------------------------------- util-networks.R | 1 + 3 files changed, 18 insertions(+), 84 deletions(-) delete mode 100644 plot-metrics.R diff --git a/metrics.R b/metrics.R index 66cf69c5..7c05ce27 100644 --- a/metrics.R +++ b/metrics.R @@ -1,15 +1,15 @@ requireNamespace("igraph") -metrics.hub.indegree = function(network, project){ - degrees = igraph::degree(network, mode = c("in")) +metrics.hub.degree = function(network, project){ + degrees = igraph::degree(network, mode = c("total")) vertex = which.max(degrees) - df = data.frame("name" = names(vertex), "degree" = unname(vertex), "project" = project) + df = data.frame("name" = names(vertex), "degree" = unname(degrees[vertex]), "project" = project) return(df) } -metrics.avg.outdegree = function(network, project) { - outdegrees = igraph::degree(network, mode = c("out")) - avg = mean(outdegrees) +metrics.avg.degree = function(network, project) { + degrees = igraph::degree(network, mode = c("total")) + avg = mean(degrees) df = data.frame("project" = project, "avg.degree" = avg) return(df) } @@ -25,12 +25,16 @@ metrics.density = function(network, project) { } metrics.avg.pathlength = function(network, project) { - return(data.frame("project" = project, "avg.pathlength" = igraph::average.path.length(network, directed = TRUE, unconnected = TRUE))) + return(data.frame("project" = project, "avg.pathlength" = igraph::average.path.length(network, directed = FALSE, unconnected = TRUE))) } metrics.clustering.coeff = function(network, project) { - local.cc = igraph::transitivity(network, type = "local", vids = NULL) - cc = mean(local.cc, na.rm = TRUE) + cc = igraph::transitivity(network, type = "localaverage", vids = NULL) + return(data.frame("project" = project, "clustering.coeff" = cc)) +} + +metrics.clustering.coeff.global = function(network, project) { + cc = igraph::transitivity(network, type = "global", vids = NULL) return(data.frame("project" = project, "clustering.coeff" = cc)) } @@ -48,12 +52,11 @@ metrics.amount.nodes = function(network, project) { metrics.smallworldness = function(network, project) { # construct Erdös-Renyi network with same number of nodes and edges as g - h = igraph::erdos.renyi.game(n=igraph::vcount(network), p.or.m=igraph::gsize(network), type="gnm", directed=TRUE) + h = igraph::erdos.renyi.game(n=igraph::vcount(network), p.or.m=igraph::gsize(network), type="gnm", directed=FALSE) ## compute clustering coefficients g.cc = igraph::transitivity(network) h.cc = igraph::transitivity(h) - ## compute average shortest-path length g.l = igraph::average.path.length(network, unconnected = TRUE) h.l = igraph::average.path.length(h, unconnected = TRUE) @@ -72,7 +75,7 @@ metrics.smallworldness = function(network, project) { return (data.frame("project" = project, "smallworldness" = s.delta)) } -metrics.power.law.fitting = function(network) { +metrics.power.law.fitting = function(network, project) { v.degree <- sort(igraph::degree(network, mode="all"), decreasing=TRUE) ## Power-law fiting @@ -86,7 +89,7 @@ metrics.power.law.fitting = function(network) { res$num.power.law = length(which(v.degree >= res$xmin)) res$percent.power.law = 100 * (res$num.power.law / length(v.degree)) df = data.frame(res$alpha,res$xmin,res$KS.p,res$num.power.law,res$percent.power.law) - return(data.frame("power.law" = names(df), "value" = c(res$alpha,res$xmin,res$KS.p,res$num.power.law,res$percent.power.law))) + return(data.frame("project" = project, "KS.p" = res$KS.p)) } metrics.hierarchy = function(network) { @@ -96,6 +99,6 @@ metrics.hierarchy = function(network) { degrees.without.cc = subset(degrees, !(is.nan(cluster.coeff) | cluster.coeff == 0)) cluster.coeff = subset(cluster.coeff, !(is.nan(cluster.coeff) | cluster.coeff == 0)) - return(data.frame(deg = log(degrees.without.cc), cc = log(cluster.coeff))) + return(data.frame(log.deg = log(degrees.without.cc), log.cc = log(cluster.coeff))) } diff --git a/plot-metrics.R b/plot-metrics.R deleted file mode 100644 index 2779903d..00000000 --- a/plot-metrics.R +++ /dev/null @@ -1,70 +0,0 @@ - -requireNamespace("ggplot2") - -#plots the maximum indegree one or more projects as a bar diagram -metrics.plot.hub.indegree = function(df) { - plot = ggplot2::ggplot(df, ggplot2::aes(y = degree, x = project)) + - ggplot2::geom_bar(stat="identity") - return(plot) -} - -metrics.plot.avg.outdegree = function(df) { - plot = ggplot2::ggplot(df, ggplot2::aes(y = avg.degree, x = project)) + - ggplot2::geom_bar(stat="identity") - return(plot) -} - -metrics.plot.node.degrees = function(df) { - plot = ggplot2::ggplot(df, ggplot2::aes(y = degree, x = name)) + - ggplot2::geom_bar(stat="identity") - return(plot) -} - -metrics.plot.density = function(df) { - plot = ggplot2::ggplot(df, ggplot2::aes(y = density, x = project)) + - ggplot2::geom_bar(stat="identity") - return(plot) -} - -metrics.plot.avg.pathlength = function(df) { - plot = ggplot2::ggplot(df, ggplot2::aes(y = avg.papthlength, x = project)) + - ggplot2::geom_bar(stat="identity") - return(plot) -} - -metrics.plot.clustering.coeff = function(df) { - plot = ggplot2::ggplot(df, ggplot2::aes(y = clustering.coeff, x = project)) + - ggplot2::geom_bar(stat="identity") - return(plot) -} - -metrics.plot.modularity = function(df) { - plot = ggplot2::ggplot(df, ggplot2::aes(y = modularity, x = project)) + - ggplot2::geom_bar(stat="identity") - return(plot) -} - -metrics.plot.amount.nodes = function(df) { - plot = ggplot2::ggplot(df, ggplot2::aes(y = amount.nodes, x = project)) + - ggplot2::geom_bar(stat="identity") - return(plot) -} - -metrics.plot.smallworldness = function(df) { - plot = ggplot2::ggplot(df, ggplot2::aes(y = smallworldness, x = project)) + - ggplot2::geom_bar(stat="identity") - return(plot) -} - -metrics.plot.power.law.fitting = function(df) { - -} - -metrics.plot.hierarchy = function(df) { - pred = predict(lm(cc ~ deg, data = df)) - plot = ggplot2::ggplot(df, ggplot2::aes(y = cc, x = deg)) + - ggplot2::geom_point(ggplot2::aes(color = deg)) + - ggplot2::geom_line(ggplot2::aes(y = pred)) - return(plot) -} - diff --git a/util-networks.R b/util-networks.R index 47ef2f73..475ca8e9 100644 --- a/util-networks.R +++ b/util-networks.R @@ -388,6 +388,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", reset.environment = function() { private$authors.network.mail = NULL private$authors.network.cochange = NULL + private$authors.network.issue = NULL private$artifacts.network.cochange = NULL private$artifacts.network.callgraph = NULL }, From 53a24601452e33d7fe6670b745ab1d2a7c42d8ec Mon Sep 17 00:00:00 2001 From: Christian Hechtl Date: Tue, 24 Oct 2017 13:58:37 +0200 Subject: [PATCH 16/40] Update pasta reading method and test Signed-off-by: Christian Hechtl --- .../results/testing/test_pasta/similar-mailbox | 2 +- tests/test-read.R | 5 +++-- util-read.R | 12 ++++++++---- 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/tests/codeface-data/results/testing/test_pasta/similar-mailbox b/tests/codeface-data/results/testing/test_pasta/similar-mailbox index 546e1bad..ffba64c4 100644 --- a/tests/codeface-data/results/testing/test_pasta/similar-mailbox +++ b/tests/codeface-data/results/testing/test_pasta/similar-mailbox @@ -2,4 +2,4 @@ => 5a5ec9675e98187e1e92561e1888aa6f04faa338 => 3a0ed78458b3976243db6829f63eba3eead26774 => 1143db502761379c2bfcecc2007fc34282e7ee61 - => 0a1a5c523d835459c42f33e863623138555e2526 + => 0a1a5c523d835459c42f33e863623138555e2526 72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0 diff --git a/tests/test-read.R b/tests/test-read.R index 2cd0b0a2..87eaead2 100644 --- a/tests/test-read.R +++ b/tests/test-read.R @@ -155,11 +155,12 @@ test_that("Read and parse the pasta data.", { ## build the expected data.frame pasta.data.expected = data.frame(message.id=c("","", "","", - "","",""), + "","","", + ""), commit.hash=c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0","5a5ec9675e98187e1e92561e1888aa6f04faa338", "3a0ed78458b3976243db6829f63eba3eead26774","1143db502761379c2bfcecc2007fc34282e7ee61", "1143db502761379c2bfcecc2007fc34282e7ee61","1143db502761379c2bfcecc2007fc34282e7ee61", - "0a1a5c523d835459c42f33e863623138555e2526")) + "0a1a5c523d835459c42f33e863623138555e2526", "72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0")) ## check the results expect_identical(pasta.data.read, pasta.data.expected, info = "PaStA data.") diff --git a/util-read.R b/util-read.R index 72491987..a0883576 100644 --- a/util-read.R +++ b/util-read.R @@ -254,7 +254,7 @@ read.authors = function(data.path) { ## PaStA data -------------------------------------------------------------- #' Read and parse the pasta data from the 'similar-mailbox' file. -#' The form in the file is : => commit.hash. +#' The form in the file is : ... => commit.hash commit.hash2 .... #' The parsed form is a data frame with message IDs as keys and commit hashes as values. #' #' @param data.path the path to the pasta data @@ -291,14 +291,18 @@ read.pasta = function(data.path) { # 1) split at arrow # 2) split keys - # 3) insert all key-value pairs by iteration (works also if there is only one key) + # 3) split values + # 4) insert all key-value pairs by iteration (works also if there is only one key) line.split = unlist(strsplit(line, SEPERATOR)) keys = line.split[1] - value = line.split[2] + values = line.split[2] keys.split = unlist(strsplit(keys, KEY.SEPERATOR)) + values.split = unlist(strsplit(values, KEY.SEPERATOR)) # Transform data to data.frame - df = data.frame(message.id = keys.split, commit.hash = value) + #df = data.frame(message.id = keys.split, commit.hash = values.split) + df = merge(keys.split, values.split) + colnames(df) = c("message.id", "commit.hash") return(df) }) result.df = plyr::rbind.fill(result.list) From abfafb9bbac88b061cf80771407fa555ac7b9c16 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Raphael=20N=C3=B6mmer?= Date: Tue, 24 Oct 2017 14:11:10 +0200 Subject: [PATCH 17/40] Add comments to network metrics. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Raphael Nömmer --- metrics.R | 117 +++++++++++++++++++++++++++++++++++++++++----------- util-read.R | 1 - 2 files changed, 94 insertions(+), 24 deletions(-) diff --git a/metrics.R b/metrics.R index 7c05ce27..1ce3c874 100644 --- a/metrics.R +++ b/metrics.R @@ -1,55 +1,117 @@ requireNamespace("igraph") -metrics.hub.degree = function(network, project){ + +#' Determine the maximum degree for the given network. +#' +#' @param network The network to be examined +#' @param name The name of the network +#' +#' @return A dataframe containing the name of the vertex with the maximum degree, the degree and +#' the name of the network that this value belongs to. +metrics.hub.degree = function(network, name){ degrees = igraph::degree(network, mode = c("total")) vertex = which.max(degrees) - df = data.frame("name" = names(vertex), "degree" = unname(degrees[vertex]), "project" = project) + df = data.frame("name" = names(vertex), "degree" = unname(degrees[vertex]), "name" = name) return(df) } -metrics.avg.degree = function(network, project) { +#' Calculate the average degree of a network. +#' +#' @param network The network to be examined +#' @param name The name of the network +#' +#' @return A dataframe containing the average degree of the network and the name of the network. +metrics.avg.degree = function(network, name) { degrees = igraph::degree(network, mode = c("total")) avg = mean(degrees) - df = data.frame("project" = project, "avg.degree" = avg) + df = data.frame("name" = name, "avg.degree" = avg) return(df) } +#' Calculate all node degrees for the given network +#' +#' @param network The network to be examined +#' +#' @return A dataframe containing the nodes and their respective degrees. metrics.node.degrees = function(network) { degrees = sort(igraph::degree(network, mode="total"), decreasing = TRUE) return(data.frame("name" = names(degrees), "degree" = unname(degrees))) } -metrics.density = function(network, project) { +#' Calculate the density of the given network +#' +#' @param network The network to be examined +#' @param name The name of the network +#' +#' @return A dataframe containing the network density and the name of the network. +metrics.density = function(network, name) { density = igraph::graph.density(network) - return(data.frame("project" = project, "density" = unname(density))) + return(data.frame("name" = name, "density" = unname(density))) } -metrics.avg.pathlength = function(network, project) { - return(data.frame("project" = project, "avg.pathlength" = igraph::average.path.length(network, directed = FALSE, unconnected = TRUE))) +#' Calculate the average path length for the given network. +#' +#' @param network The network to e examined +#' @param name The name of the network +#' +#' @return A dataframe containing the average path length and the name of the network. +metrics.avg.pathlength = function(network, name) { + return(data.frame("name" = name, "avg.pathlength" = + igraph::average.path.length(network, directed = FALSE, unconnected = TRUE))) } -metrics.clustering.coeff = function(network, project) { +#' Calculate the average local clustering coefficient for the given network. +#' +#' @param network The network to be examined +#' @param name The name of the network +#' +#' @return A dataframe containing the average local clustering coefficient and the name of the network. +metrics.clustering.coeff = function(network, name) { cc = igraph::transitivity(network, type = "localaverage", vids = NULL) - return(data.frame("project" = project, "clustering.coeff" = cc)) + return(data.frame("name" = name, "clustering.coeff" = cc)) } -metrics.clustering.coeff.global = function(network, project) { +#' Calculate the global clustering coefficient for the given network. +#' +#' @param network The network to be examined +#' @param name The name of the network +#' +#' @return A dataframe containing the global clustering coefficient of the network and the name of the network. +metrics.clustering.coeff.global = function(network, name) { cc = igraph::transitivity(network, type = "global", vids = NULL) - return(data.frame("project" = project, "clustering.coeff" = cc)) + return(data.frame("name" = name, "clustering.coeff" = cc)) } -metrics.modularity = function(network, project) { +#' Calculate the modularity metric for the given network. +#' +#' @param network The network to be examined +#' @param name The name of the network +#' +#' @return A dataframe containing the modularity value for the given network and the name of the network. +metrics.modularity = function(network, name) { comm = igraph::cluster_walktrap(network) mod = igraph::modularity(network, igraph::membership(comm)) - return(data.frame("project" = project, "modularity" = mod)) + return(data.frame("name" = name, "modularity" = mod)) } -metrics.amount.nodes = function(network, project) { - return(data.frame("project" = project, "amount.nodes" = igraph::vcount(network))) +#' Count the number of nodes for the given network. +#' +#' @param network The network to be examined +#' @param name The name of the network +#' +#' @return A dataframe containing the number of nodes in the network and the name of the network. +metrics.amount.nodes = function(network, name) { + return(data.frame("name" = name, "amount.nodes" = igraph::vcount(network))) } -# requires simplified network -metrics.smallworldness = function(network, project) { +#' Calculate the smallworldness value for the given network. +#' This metric requires a simplified network. +#' +#' @param network The network to be examined +#' @param name The name of the network +#' +#' @return A dataframe containing the smallworldness value of the network and the name of the network. +metrics.smallworldness = function(network, name) { # construct Erdös-Renyi network with same number of nodes and edges as g h = igraph::erdos.renyi.game(n=igraph::vcount(network), p.or.m=igraph::gsize(network), type="gnm", directed=FALSE) @@ -72,10 +134,16 @@ metrics.smallworldness = function(network, project) { # if s.delta > 1, then the network is a small-world network #is.smallworld = ifelse(s.delta > 1, TRUE, FALSE) - return (data.frame("project" = project, "smallworldness" = s.delta)) + return (data.frame("name" = name, "smallworldness" = s.delta)) } -metrics.power.law.fitting = function(network, project) { +#' Determine scale freeness of a network using the power law fitting method. +#' +#' @param network The network to be examined +#' @param name The name of the network +#' +#' @return A dataframe containing the scale freeness value of the network and the name of the network. +metrics.scale.freeness = function(network, name) { v.degree <- sort(igraph::degree(network, mode="all"), decreasing=TRUE) ## Power-law fiting @@ -89,16 +157,19 @@ metrics.power.law.fitting = function(network, project) { res$num.power.law = length(which(v.degree >= res$xmin)) res$percent.power.law = 100 * (res$num.power.law / length(v.degree)) df = data.frame(res$alpha,res$xmin,res$KS.p,res$num.power.law,res$percent.power.law) - return(data.frame("project" = project, "KS.p" = res$KS.p)) + return(data.frame("name" = name, "KS.p" = res$KS.p)) } +#' Calculate the hierarchy for a network +#' +#' @param network The network to be examined +#' +#' @return A dataframe containing the logarithm of the node degree and the logarithm of the local clustering coefficient for each node. metrics.hierarchy = function(network) { degrees = igraph::degree(network, mode="total") cluster.coeff = igraph::transitivity(network, type = "local", vids = NULL) - degrees.without.cc = subset(degrees, !(is.nan(cluster.coeff) | cluster.coeff == 0)) cluster.coeff = subset(cluster.coeff, !(is.nan(cluster.coeff) | cluster.coeff == 0)) - return(data.frame(log.deg = log(degrees.without.cc), log.cc = log(cluster.coeff))) } diff --git a/util-read.R b/util-read.R index df9b76a5..f7066e95 100644 --- a/util-read.R +++ b/util-read.R @@ -348,7 +348,6 @@ read.issues = function(data.path) { issue.data[["is.pull.request"]] = as.logical(issue.data[["is.pull.request"]]) ## convert dates and sort by 'date' column - print(issue.data) issue.data[["date"]] = as.POSIXct(issue.data[["date"]]) issue.data[["creation.date"]] = as.POSIXct(issue.data[["creation.date"]]) issue.data[["closing.date"]][ issue.data[["closing.date"]] == "" ] = NA From 7a19d354c2fd99d79117afc33b35a352039ff7df Mon Sep 17 00:00:00 2001 From: Claus Hunsen Date: Thu, 2 Nov 2017 14:55:57 +0100 Subject: [PATCH 18/40] Fix issue-reading functionality and corresponding tests Fix "author.email". Introduce "ref.name" column in tests (WIP and temporary fix). Add TODO item for fixing the test file "issues.list". Signed-off-by: Claus Hunsen Signed-off-by: Christian Hechtl --- .../results/testing/test_issues/issues.list | 72 +++++++++---------- tests/test-read.R | 9 ++- util-read.R | 2 +- 3 files changed, 43 insertions(+), 40 deletions(-) diff --git a/tests/codeface-data/results/testing/test_issues/issues.list b/tests/codeface-data/results/testing/test_issues/issues.list index 9c6a939a..15ce1e0e 100644 --- a/tests/codeface-data/results/testing/test_issues/issues.list +++ b/tests/codeface-data/results/testing/test_issues/issues.list @@ -1,36 +1,36 @@ -2;"CLOSED";"2013-04-21 23:52:09";"2013-05-25 20:02:08";"true";11;"Karl";"karl@example.org";"2013-04-21 23:52:09";"created" -2;"CLOSED";"2013-04-21 23:52:09";"2013-05-25 20:02:08";"true";11;"Karl";"karl@example.org";"2013-05-05 23:28:57";"commented" -2;"CLOSED";"2013-04-21 23:52:09";"2013-05-25 20:02:08";"true";1;"Olaf";"olaf@example.org";"2013-05-25 20:02:08";"referenced" -2;"CLOSED";"2013-04-21 23:52:09";"2013-05-25 20:02:08";"true";1;"Olaf";"olaf@example.org";"2013-05-25 20:02:08";"merged" -2;"CLOSED";"2013-04-21 23:52:09";"2013-05-25 20:02:08";"true";1;"Olaf";"olaf@example.org";"2013-05-25 20:02:08";"closed" -2;"CLOSED";"2013-04-21 23:52:09";"2013-05-25 20:02:08";"true";11;"Karl";"karl@example.org";"2013-06-01 22:37:03";"head_ref_deleted" -2;"CLOSED";"2013-04-21 23:52:09";"2014-05-25 20:02:08";"true";1342;"Thomas";"thomas@example.org";"2016-07-19 10:47:25";"referenced" -48;"OPEN";"2016-04-17 02:06:38";"null";"false";15;"udo";"udo@example.org";"2016-04-17 02:07:37";"mentioned" -48;"OPEN";"2016-04-17 02:06:38";"null";"false";15;"udo";"udo@example.org";"2016-04-17 02:07:37";"subscribed" -48;"OPEN";"2016-04-17 02:06:38";"null";"false";1350;"Thomas";"thomas@example.org";"2016-07-14 02:03:14";"commented" -48;"OPEN";"2016-04-17 02:06:38";"null";"false";13;"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-07-14 17:42:52";"commented" -48;"OPEN";"2016-04-17 02:06:38";"null";"false";13;"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-07-15 08:37:57";"mentioned" -48;"OPEN";"2016-04-17 02:06:38";"null";"false";13;"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-07-15 08:37:57";"subscribed" -48;"OPEN";"2016-04-17 02:06:38";"null";"false";1350;"Thomas";"thomas@example.org";"2016-07-15 08:37:57";"commented" -48;"OPEN";"2016-04-17 02:06:38";"null";"false";13;"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-07-27 22:25:25";"mentioned" -48;"OPEN";"2016-04-17 02:06:38";"null";"false";13;"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-07-27 22:25:25";"subscribed" -48;"OPEN";"2016-04-17 02:06:38";"null";"false";1;"Olaf";"olaf@example.org";"2016-07-27 22:25:25";"commented" -51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";1342;"Thomas";"thomas@example.org";"2016-07-12 15:59:25";"mentioned" -51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";1342;"Thomas";"thomas@example.org";"2016-07-12 15:59:25";"subscribed" -51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";13;"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-07-12 15:59:25";"created" -51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";13;"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-07-12 16:03:23";"renamed" -51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";13;"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-07-12 16:05:47";"commented" -51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";13;"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-08-31 18:21:48";"commented" -51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";1;"Olaf";"olaf@example.org";"2016-10-05 01:07:46";"commented" -51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";13;"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-10-13 15:33:56";"commented" -51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";13;"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-12-06 14:03:42";"commented" -51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";1;"Olaf";"olaf@example.org";"2016-12-07 15:37:02";"merged" -51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";1;"Olaf";"olaf@example.org";"2016-12-07 15:37:02";"closed" -51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";1;"Olaf";"olaf@example.org";"2016-12-07 15:37:21";"commented" -51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";13;"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-12-07 15:53:02";"commented" -57;"CLOSED";"2016-12-07 15:53:02";"2017-05-23 12:32:21";"true";13;"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-12-07 15:53:02";"created" -57;"CLOSED";"2016-12-07 15:53:02";"2017-05-23 12:32:21";"true";13;"Claus Hunsen";"hunsen@fim.uni-passau.de";"2017-02-20 22:25:41";"commented" -57;"CLOSED";"2016-12-07 15:53:02";"2017-05-23 12:32:21";"true";13;"Claus Hunsen";"hunsen@fim.uni-passau.de";"2017-03-02 17:30:10";"commented" -57;"CLOSED";"2016-12-07 15:53:02";"2017-05-23 12:32:21";"true";1;"Max";"max@example.org";"2017-05-23 12:32:21";"merged" -57;"CLOSED";"2016-12-07 15:53:02";"2017-05-23 12:32:21";"true";1;"Max";"max@example.org";"2017-05-23 12:32:21";"closed" -57;"CLOSED";"2016-12-07 15:53:02";"2017-05-23 12:32:21";"true";1;"Max";"max@example.org";"2017-05-23 12:32:39";"commented" +2;"CLOSED";"2013-04-21 23:52:09";"2013-05-25 20:02:08";"true";"Karl";"karl@example.org";"2013-04-21 23:52:09";;"created" +2;"CLOSED";"2013-04-21 23:52:09";"2013-05-25 20:02:08";"true";"Karl";"karl@example.org";"2013-05-05 23:28:57";;"commented" +2;"CLOSED";"2013-04-21 23:52:09";"2013-05-25 20:02:08";"true";"Karl";"karl@example.org";"2013-05-05 23:28:57";"Olaf";"referenced" +2;"CLOSED";"2013-04-21 23:52:09";"2013-05-25 20:02:08";"true";"Olaf";"olaf@example.org";"2013-05-25 20:02:08";;"merged" +2;"CLOSED";"2013-04-21 23:52:09";"2013-05-25 20:02:08";"true";"Olaf";"olaf@example.org";"2013-05-25 20:02:08";;"closed" +2;"CLOSED";"2013-04-21 23:52:09";"2013-05-25 20:02:08";"true";"Karl";"karl@example.org";"2013-06-01 22:37:03";;"head_ref_deleted" +2;"CLOSED";"2013-04-21 23:52:09";"2014-05-25 20:02:08";"true";"Thomas";"thomas@example.org";"2016-07-19 10:47:25";;"referenced" +48;"OPEN";"2016-04-17 02:06:38";;"false";"udo";"udo@example.org";"2016-04-17 02:07:37";;"mentioned" +48;"OPEN";"2016-04-17 02:06:38";;"false";"udo";"udo@example.org";"2016-04-17 02:07:37";;"subscribed" +48;"OPEN";"2016-04-17 02:06:38";;"false";"Thomas";"thomas@example.org";"2016-07-14 02:03:14";;"commented" +48;"OPEN";"2016-04-17 02:06:38";;"false";"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-07-14 17:42:52";;"commented" +48;"OPEN";"2016-04-17 02:06:38";;"false";"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-07-15 08:37:57";;"mentioned" +48;"OPEN";"2016-04-17 02:06:38";;"false";"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-07-15 08:37:57";;"subscribed" +48;"OPEN";"2016-04-17 02:06:38";;"false";"Thomas";"thomas@example.org";"2016-07-15 08:37:57";;"commented" +48;"OPEN";"2016-04-17 02:06:38";;"false";"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-07-27 22:25:25";;"mentioned" +48;"OPEN";"2016-04-17 02:06:38";;"false";"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-07-27 22:25:25";;"subscribed" +48;"OPEN";"2016-04-17 02:06:38";;"false";"Olaf";"olaf@example.org";"2016-07-27 22:25:25";;"commented" +51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";"Thomas";"thomas@example.org";"2016-07-12 15:59:25";;"mentioned" +51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";"Thomas";"thomas@example.org";"2016-07-12 15:59:25";;"subscribed" +51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-07-12 15:59:25";;"created" +51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-07-12 16:03:23";;"renamed" +51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-07-12 16:05:47";;"commented" +51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-08-31 18:21:48";;"commented" +51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";"Olaf";"olaf@example.org";"2016-10-05 01:07:46";;"commented" +51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-10-13 15:33:56";;"commented" +51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-12-06 14:03:42";;"commented" +51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";"Olaf";"olaf@example.org";"2016-12-07 15:37:02";;"merged" +51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";"Olaf";"olaf@example.org";"2016-12-07 15:37:02";;"closed" +51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";"Olaf";"olaf@example.org";"2016-12-07 15:37:21";;"commented" +51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-12-07 15:53:02";;"commented" +57;"CLOSED";"2016-12-07 15:53:02";"2017-05-23 12:32:21";"true";"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-12-07 15:53:02";;"created" +57;"CLOSED";"2016-12-07 15:53:02";"2017-05-23 12:32:21";"true";"Claus Hunsen";"hunsen@fim.uni-passau.de";"2017-02-20 22:25:41";;"commented" +57;"CLOSED";"2016-12-07 15:53:02";"2017-05-23 12:32:21";"true";"Claus Hunsen";"hunsen@fim.uni-passau.de";"2017-03-02 17:30:10";;"commented" +57;"CLOSED";"2016-12-07 15:53:02";"2017-05-23 12:32:21";"true";"Max";"max@example.org";"2017-05-23 12:32:21";;"merged" +57;"CLOSED";"2016-12-07 15:53:02";"2017-05-23 12:32:21";"true";"Max";"max@example.org";"2017-05-23 12:32:21";;"closed" +57;"CLOSED";"2016-12-07 15:53:02";"2017-05-23 12:32:21";"true";"Max";"max@example.org";"2017-05-23 12:32:39";;"commented" diff --git a/tests/test-read.R b/tests/test-read.R index 87eaead2..3419f54d 100644 --- a/tests/test-read.R +++ b/tests/test-read.R @@ -167,6 +167,8 @@ test_that("Read and parse the pasta data.", { }) test_that("Read and parse the issue data.", { + ## FIXME @Roger1995: update issues.list with a more recent content! + ## configuration object for the datapath proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, ARTIFACT) @@ -179,9 +181,10 @@ test_that("Read and parse the issue data.", { creation.date=as.POSIXct(rep(c("2013-04-21 23:52:09","2016-04-17 02:06:38","2016-07-12 15:59:25","2016-04-17 02:06:38","2013-04-21 23:52:09","2016-04-17 02:06:38","2016-07-12 15:59:25","2016-12-07 15:53:02"), c(6,2,5,5,1,3,8,6))), closing.date=as.POSIXct(rep(c("2013-05-25 20:02:08",NA,"2016-12-07 15:37:02",NA,"2014-05-25 20:02:08",NA,"2016-12-07 15:37:02","2017-05-23 12:32:21"), c(6,2,5,5,1,3,8,6))), is.pull.request=rep(c(TRUE,FALSE,TRUE,FALSE,TRUE,FALSE,TRUE,TRUE), c(6,2,5,5,1,3,8,6)), - author.name=c("Karl","Karl","Olaf","Olaf","Olaf","Karl","udo","udo","Thomas","Thomas","Claus Hunsen","Claus Hunsen","Claus Hunsen","Thomas","Claus Hunsen","Claus Hunsen","Claus Hunsen","Thomas","Thomas","Claus Hunsen","Claus Hunsen","Olaf","Claus Hunsen","Olaf","Claus Hunsen","Claus Hunsen","Olaf","Olaf","Olaf","Claus Hunsen","Claus Hunsen","Claus Hunsen","Claus Hunsen","Max","Max","Max"), - author.email=c("karl@example.org","karl@example.org","olaf@example.org","olaf@example.org","olaf@example.org","karl@example.org","udo@example.org","udo@example.org","thomas@example.org","thomas@example.org","hunsen@fim.uni-passau.de","hunsen@fim.uni-passau.de","hunsen@fim.uni-passau.de","thomas@example.org","hunsen@fim.uni-passau.de","hunsen@fim.uni-passau.de","hunsen@fim.uni-passau.de","thomas@example.org","thomas@example.org","hunsen@fim.uni-passau.de","hunsen@fim.uni-passau.de","olaf@example.org","hunsen@fim.uni-passau.de","olaf@example.org","hunsen@fim.uni-passau.de","hunsen@fim.uni-passau.de","olaf@example.org","olaf@example.org","olaf@example.org","hunsen@fim.uni-passau.de","hunsen@fim.uni-passau.de","hunsen@fim.uni-passau.de","hunsen@fim.uni-passau.de","max@example.org","max@example.org","max@example.org"), - date=as.POSIXct(c("2013-04-21 23:52:09","2013-05-05 23:28:57","2013-05-25 20:02:08","2013-05-25 20:02:08","2013-05-25 20:02:08","2013-06-01 22:37:03","2016-04-17 02:07:37","2016-04-17 02:07:37","2016-07-12 15:59:25","2016-07-12 15:59:25","2016-07-12 15:59:25","2016-07-12 16:03:23","2016-07-12 16:05:47","2016-07-14 02:03:14","2016-07-14 17:42:52","2016-07-15 08:37:57","2016-07-15 08:37:57","2016-07-15 08:37:57","2016-07-19 10:47:25","2016-07-27 22:25:25","2016-07-27 22:25:25","2016-07-27 22:25:25","2016-08-31 18:21:48","2016-10-05 01:07:46","2016-10-13 15:33:56","2016-12-06 14:03:42","2016-12-07 15:37:02","2016-12-07 15:37:02","2016-12-07 15:37:21","2016-12-07 15:53:02","2016-12-07 15:53:02","2017-02-20 22:25:41","2017-03-02 17:30:10","2017-05-23 12:32:21","2017-05-23 12:32:21","2017-05-23 12:32:39")), + author.name=c("Karl","Karl","Karl","Olaf","Olaf","Karl","udo","udo","Thomas","Thomas","Claus Hunsen","Claus Hunsen","Claus Hunsen","Thomas","Claus Hunsen","Claus Hunsen","Claus Hunsen","Thomas","Thomas","Claus Hunsen","Claus Hunsen","Olaf","Claus Hunsen","Olaf","Claus Hunsen","Claus Hunsen","Olaf","Olaf","Olaf","Claus Hunsen","Claus Hunsen","Claus Hunsen","Claus Hunsen","Max","Max","Max"), + author.email=c("karl@example.org","karl@example.org","karl@example.org","olaf@example.org","olaf@example.org","karl@example.org","udo@example.org","udo@example.org","thomas@example.org","thomas@example.org","hunsen@fim.uni-passau.de","hunsen@fim.uni-passau.de","hunsen@fim.uni-passau.de","thomas@example.org","hunsen@fim.uni-passau.de","hunsen@fim.uni-passau.de","hunsen@fim.uni-passau.de","thomas@example.org","thomas@example.org","hunsen@fim.uni-passau.de","hunsen@fim.uni-passau.de","olaf@example.org","hunsen@fim.uni-passau.de","olaf@example.org","hunsen@fim.uni-passau.de","hunsen@fim.uni-passau.de","olaf@example.org","olaf@example.org","olaf@example.org","hunsen@fim.uni-passau.de","hunsen@fim.uni-passau.de","hunsen@fim.uni-passau.de","hunsen@fim.uni-passau.de","max@example.org","max@example.org","max@example.org"), + date=as.POSIXct(c("2013-04-21 23:52:09","2013-05-05 23:28:57","2013-05-05 23:28:57","2013-05-25 20:02:08","2013-05-25 20:02:08","2013-06-01 22:37:03","2016-04-17 02:07:37","2016-04-17 02:07:37","2016-07-12 15:59:25","2016-07-12 15:59:25","2016-07-12 15:59:25","2016-07-12 16:03:23","2016-07-12 16:05:47","2016-07-14 02:03:14","2016-07-14 17:42:52","2016-07-15 08:37:57","2016-07-15 08:37:57","2016-07-15 08:37:57","2016-07-19 10:47:25","2016-07-27 22:25:25","2016-07-27 22:25:25","2016-07-27 22:25:25","2016-08-31 18:21:48","2016-10-05 01:07:46","2016-10-13 15:33:56","2016-12-06 14:03:42","2016-12-07 15:37:02","2016-12-07 15:37:02","2016-12-07 15:37:21","2016-12-07 15:53:02","2016-12-07 15:53:02","2017-02-20 22:25:41","2017-03-02 17:30:10","2017-05-23 12:32:21","2017-05-23 12:32:21","2017-05-23 12:32:39")), + ref.name=c(rep("", 2), "Olaf", rep("", 33)), event.name=c("created","commented","referenced","merged","closed","head_ref_deleted","mentioned","subscribed","mentioned","subscribed","created","renamed","commented","commented","commented","mentioned","subscribed","commented","referenced","mentioned","subscribed","commented","commented","commented","commented","commented","merged","closed","commented","commented","created","commented","commented","merged","closed","commented")) ## calculate event IDs issue.data.expected[["event.id"]] = sapply( diff --git a/util-read.R b/util-read.R index a077f73a..59032a41 100644 --- a/util-read.R +++ b/util-read.R @@ -340,7 +340,7 @@ read.issues = function(data.path) { ## set proper column names colnames(issue.data) = c( "issue.id", "issue.state", "creation.date", "closing.date", "is.pull.request", # issue information - "author.name", "author.mail", # author information + "author.name", "author.email", # author information "date", # the date "ref.name", "event.name" # the event describing the row's entry ) From c0e0f2a2f44252eabbd4ccbd1e9739dd7762348b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Raphael=20N=C3=B6mmer?= Date: Sat, 2 Dec 2017 16:13:03 +0100 Subject: [PATCH 19/40] Various minor changes to metrics MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Raphael Nömmer --- metrics.R | 137 ++++++++++++++++++++++++++---------------------------- 1 file changed, 66 insertions(+), 71 deletions(-) diff --git a/metrics.R b/metrics.R index 1ce3c874..9315f38b 100644 --- a/metrics.R +++ b/metrics.R @@ -1,31 +1,41 @@ +## (c) Thomas Bock, February 2015 +## bockthom@fim.uni-passau.de +## (c) Raphael Nömmer, 2017 +## noemmer@fim.uni-passau.de + + +## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / +## Libraries --------------------------------------------------------------- + requireNamespace("igraph") +## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / +## Metric functions -------------------------------------------------------- + #' Determine the maximum degree for the given network. #' #' @param network The network to be examined -#' @param name The name of the network +#' @param mode The mode to be used for determining the degrees. #' -#' @return A dataframe containing the name of the vertex with the maximum degree, the degree and -#' the name of the network that this value belongs to. -metrics.hub.degree = function(network, name){ - degrees = igraph::degree(network, mode = c("total")) +#' @return A dataframe containing the name of the vertex with with maximum degree its degree. +metrics.hub.degree = function(network, mode){ + degrees = igraph::degree(network, mode = c(mode)) vertex = which.max(degrees) - df = data.frame("name" = names(vertex), "degree" = unname(degrees[vertex]), "name" = name) + df = data.frame("name" = names(vertex), "degree" = unname(degrees[vertex])) return(df) } #' Calculate the average degree of a network. #' #' @param network The network to be examined -#' @param name The name of the network +#' @param mode The mode to be used for determining the degrees. #' -#' @return A dataframe containing the average degree of the network and the name of the network. -metrics.avg.degree = function(network, name) { - degrees = igraph::degree(network, mode = c("total")) +#' @return The average degree of the nodes in the network. +metrics.avg.degree = function(network, mode) { + degrees = igraph::degree(network, mode = c(mode)) avg = mean(degrees) - df = data.frame("name" = name, "avg.degree" = avg) - return(df) + return(avg) } #' Calculate all node degrees for the given network @@ -38,87 +48,74 @@ metrics.node.degrees = function(network) { return(data.frame("name" = names(degrees), "degree" = unname(degrees))) } -#' Calculate the density of the given network +#' Calculate the density of the given network. #' -#' @param network The network to be examined -#' @param name The name of the network +#' @param network The network to be examined. #' -#' @return A dataframe containing the network density and the name of the network. -metrics.density = function(network, name) { +#' @return The density of the network. +metrics.density = function(network) { density = igraph::graph.density(network) - return(data.frame("name" = name, "density" = unname(density))) + return(density) } #' Calculate the average path length for the given network. #' -#' @param network The network to e examined -#' @param name The name of the network +#' @param network The network to be examined. +#' @param directed Wehther the given network is directed or undirected. +#' @param unconnected Whether all nodes of the network are connected. #' -#' @return A dataframe containing the average path length and the name of the network. -metrics.avg.pathlength = function(network, name) { - return(data.frame("name" = name, "avg.pathlength" = - igraph::average.path.length(network, directed = FALSE, unconnected = TRUE))) +#' @return The average pathlength of the given network. +metrics.avg.pathlength = function(network, directed, unconnected) { + avg.pathlength = igraph::average.path.length(network, directed = directed, unconnected = unconnected) + return(avg.pathlength) } #' Calculate the average local clustering coefficient for the given network. #' -#' @param network The network to be examined -#' @param name The name of the network -#' -#' @return A dataframe containing the average local clustering coefficient and the name of the network. -metrics.clustering.coeff = function(network, name) { - cc = igraph::transitivity(network, type = "localaverage", vids = NULL) - return(data.frame("name" = name, "clustering.coeff" = cc)) -} - -#' Calculate the global clustering coefficient for the given network. -#' -#' @param network The network to be examined -#' @param name The name of the network +#' @param network The network to be examined. +#' @param cc.type The type of cluserting coefficient to be calculated, i.e. global or local. #' -#' @return A dataframe containing the global clustering coefficient of the network and the name of the network. -metrics.clustering.coeff.global = function(network, name) { - cc = igraph::transitivity(network, type = "global", vids = NULL) - return(data.frame("name" = name, "clustering.coeff" = cc)) +#' @return The average local clustering coefficient of the network. +metrics.clustering.coeff = function(network, cc.type) { + cc = igraph::transitivity(network, type = cc.type, vids = NULL) + return(cc) } #' Calculate the modularity metric for the given network. #' #' @param network The network to be examined -#' @param name The name of the network +#' @param community.detection.algorithm The algorithm to be used for the detection of communities which +#' is required for the calculation of the clustering coefficient. #' -#' @return A dataframe containing the modularity value for the given network and the name of the network. -metrics.modularity = function(network, name) { - comm = igraph::cluster_walktrap(network) +#' @return The modularity value for the given network. +metrics.modularity = function(network, community.detection.algorithm = igraph::cluster_walktrap) { + comm = community.detection.algorithm(network) mod = igraph::modularity(network, igraph::membership(comm)) return(data.frame("name" = name, "modularity" = mod)) } -#' Count the number of nodes for the given network. +## This function determines whether a network can be considered a +## small-world network based on a quantitative categorical decision. +## +## The procedure used in this function is based on the work "Network +## 'Small-World-Ness': A Quantitative Method for Determining Canonical +## Network Equivalence" by Mark D. Humphries and Kevin Gurney [1]. +## [1] http://journals.plos.org/plosone/article?id=10.1371/journal.pone.0002051 +## +## The algorithm relies on the Erdös-Renyi random network with the same number +## of nodes and edges as the given network. #' #' @param network The network to be examined -#' @param name The name of the network #' -#' @return A dataframe containing the number of nodes in the network and the name of the network. -metrics.amount.nodes = function(network, name) { - return(data.frame("name" = name, "amount.nodes" = igraph::vcount(network))) -} - -#' Calculate the smallworldness value for the given network. -#' This metric requires a simplified network. -#' -#' @param network The network to be examined -#' @param name The name of the network -#' -#' @return A dataframe containing the smallworldness value of the network and the name of the network. -metrics.smallworldness = function(network, name) { +#' @return The smallworldness value of the network. +metrics.smallworldness = function(network) { # construct Erdös-Renyi network with same number of nodes and edges as g h = igraph::erdos.renyi.game(n=igraph::vcount(network), p.or.m=igraph::gsize(network), type="gnm", directed=FALSE) ## compute clustering coefficients - g.cc = igraph::transitivity(network) - h.cc = igraph::transitivity(h) + g.cc = igraph::transitivity(network, type = 'global') + h.cc = igraph::transitivity(h, type = 'global') ## compute average shortest-path length g.l = igraph::average.path.length(network, unconnected = TRUE) h.l = igraph::average.path.length(h, unconnected = TRUE) @@ -134,16 +131,15 @@ metrics.smallworldness = function(network, name) { # if s.delta > 1, then the network is a small-world network #is.smallworld = ifelse(s.delta > 1, TRUE, FALSE) - return (data.frame("name" = name, "smallworldness" = s.delta)) + return ("smallworldness" = s.delta) } #' Determine scale freeness of a network using the power law fitting method. #' #' @param network The network to be examined -#' @param name The name of the network #' -#' @return A dataframe containing the scale freeness value of the network and the name of the network. -metrics.scale.freeness = function(network, name) { +#' @return A dataframe containing the different values, connected to scale-freeness. +metrics.scale.freeness = function(network) { v.degree <- sort(igraph::degree(network, mode="all"), decreasing=TRUE) ## Power-law fiting @@ -157,10 +153,10 @@ metrics.scale.freeness = function(network, name) { res$num.power.law = length(which(v.degree >= res$xmin)) res$percent.power.law = 100 * (res$num.power.law / length(v.degree)) df = data.frame(res$alpha,res$xmin,res$KS.p,res$num.power.law,res$percent.power.law) - return(data.frame("name" = name, "KS.p" = res$KS.p)) + return(df) } -#' Calculate the hierarchy for a network +#' Calculate the hierarchy for a network. #' #' @param network The network to be examined #' @@ -168,8 +164,7 @@ metrics.scale.freeness = function(network, name) { metrics.hierarchy = function(network) { degrees = igraph::degree(network, mode="total") cluster.coeff = igraph::transitivity(network, type = "local", vids = NULL) - degrees.without.cc = subset(degrees, !(is.nan(cluster.coeff) | cluster.coeff == 0)) - cluster.coeff = subset(cluster.coeff, !(is.nan(cluster.coeff) | cluster.coeff == 0)) - return(data.frame(log.deg = log(degrees.without.cc), log.cc = log(cluster.coeff))) + degrees.without.without.cluster.coeff = subset(degrees, !(is.nan(cluster.coeff) | cluster.coeff == 0)) + return(data.frame(log.deg = log(degrees.without.without.cluster.coeff), log.cc = log(cluster.coeff))) } From 894feeb6afd2bdc53bf9f15cd99ac9b3bb9c1a3d Mon Sep 17 00:00:00 2001 From: Roger1995 Date: Sat, 2 Dec 2017 20:40:12 +0100 Subject: [PATCH 20/40] Minor changes to metrics.R MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Raphael Nömmer --- metrics.R | 52 ++++++++++++++++++++++++++++++---------------------- 1 file changed, 30 insertions(+), 22 deletions(-) diff --git a/metrics.R b/metrics.R index 9315f38b..c50c20d3 100644 --- a/metrics.R +++ b/metrics.R @@ -19,7 +19,7 @@ requireNamespace("igraph") #' @param mode The mode to be used for determining the degrees. #' #' @return A dataframe containing the name of the vertex with with maximum degree its degree. -metrics.hub.degree = function(network, mode){ +metrics.hub.degree = function(network, modec = c("total", "in", "out")){ degrees = igraph::degree(network, mode = c(mode)) vertex = which.max(degrees) df = data.frame("name" = names(vertex), "degree" = unname(degrees[vertex])) @@ -32,7 +32,7 @@ metrics.hub.degree = function(network, mode){ #' @param mode The mode to be used for determining the degrees. #' #' @return The average degree of the nodes in the network. -metrics.avg.degree = function(network, mode) { +metrics.avg.degree = function(network, mode = c("total", "in", "out")) { degrees = igraph::degree(network, mode = c(mode)) avg = mean(degrees) return(avg) @@ -41,10 +41,17 @@ metrics.avg.degree = function(network, mode) { #' Calculate all node degrees for the given network #' #' @param network The network to be examined +#' @param sort Whether the resulting dataframe is to be sorted by the node degree +#' @param sort.decreasing If sorting is active, this says whether the dataframe is to be sorted +#' in descending or ascending order. #' #' @return A dataframe containing the nodes and their respective degrees. -metrics.node.degrees = function(network) { - degrees = sort(igraph::degree(network, mode="total"), decreasing = TRUE) +metrics.node.degrees = function(network, sort = TRUE, sort.decreasing = TRUE) { + if(sort) { + degrees = sort(igraph::degree(network, mode="total"), decreasing = sort.decreasing) + } else { + igraph::degree(network, mode="total") + } return(data.frame("name" = names(degrees), "degree" = unname(degrees))) } @@ -73,10 +80,10 @@ metrics.avg.pathlength = function(network, directed, unconnected) { #' Calculate the average local clustering coefficient for the given network. #' #' @param network The network to be examined. -#' @param cc.type The type of cluserting coefficient to be calculated, i.e. global or local. +#' @param cc.type The type of cluserting coefficient to be calculated. #' -#' @return The average local clustering coefficient of the network. -metrics.clustering.coeff = function(network, cc.type) { +#' @return The clustering coefficient of the network. +metrics.clustering.coeff = function(network, cc.type = c("global", "local", "barrat", "localaverage")) { cc = igraph::transitivity(network, type = cc.type, vids = NULL) return(cc) } @@ -94,18 +101,18 @@ metrics.modularity = function(network, community.detection.algorithm = igraph::c return(data.frame("name" = name, "modularity" = mod)) } -## This function determines whether a network can be considered a -## small-world network based on a quantitative categorical decision. -## -## The procedure used in this function is based on the work "Network -## 'Small-World-Ness': A Quantitative Method for Determining Canonical -## Network Equivalence" by Mark D. Humphries and Kevin Gurney [1]. -## [1] http://journals.plos.org/plosone/article?id=10.1371/journal.pone.0002051 -## -## The algorithm relies on the Erdös-Renyi random network with the same number -## of nodes and edges as the given network. +#' This function determines whether a network can be considered a +#' small-world network based on a quantitative categorical decision. #' -#' @param network The network to be examined +#' The procedure used in this function is based on the work "Network +#' 'Small-World-Ness': A Quantitative Method for Determining Canonical +#' Network Equivalence" by Mark D. Humphries and Kevin Gurney [1]. +#' [1] http://journals.plos.org/plosone/article?id=10.1371/journal.pone.0002051 +#' +#' The algorithm relies on the Erdös-Renyi random network with the same number +#' of nodes and edges as the given network. +#' +#' @param network The network to be examined. This network needs to be simplified for the calculation to work. #' #' @return The smallworldness value of the network. metrics.smallworldness = function(network) { @@ -113,14 +120,14 @@ metrics.smallworldness = function(network) { # construct Erdös-Renyi network with same number of nodes and edges as g h = igraph::erdos.renyi.game(n=igraph::vcount(network), p.or.m=igraph::gsize(network), type="gnm", directed=FALSE) - ## compute clustering coefficients + # compute clustering coefficients g.cc = igraph::transitivity(network, type = 'global') h.cc = igraph::transitivity(h, type = 'global') - ## compute average shortest-path length + # compute average shortest-path length g.l = igraph::average.path.length(network, unconnected = TRUE) h.l = igraph::average.path.length(h, unconnected = TRUE) - ## binary decision + # binary decision # intermediate variables gamma = g.cc / h.cc lambda = g.l / h.l @@ -164,7 +171,8 @@ metrics.scale.freeness = function(network) { metrics.hierarchy = function(network) { degrees = igraph::degree(network, mode="total") cluster.coeff = igraph::transitivity(network, type = "local", vids = NULL) - degrees.without.without.cluster.coeff = subset(degrees, !(is.nan(cluster.coeff) | cluster.coeff == 0)) + degrees.without.cluster.coeff = subset(degrees, !(is.nan(cluster.coeff) | cluster.coeff == 0)) + cluster.coeff = subset(cluster.coeff, !(is.nan(cluster.coeff) | cluster.coeff == 0)) return(data.frame(log.deg = log(degrees.without.without.cluster.coeff), log.cc = log(cluster.coeff))) } From ea0a252f14cae4637475ce5df30698d4135ac39e Mon Sep 17 00:00:00 2001 From: Roger1995 Date: Sat, 2 Dec 2017 21:18:14 +0100 Subject: [PATCH 21/40] Update issues.list test file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Raphael Nömmer --- .../results/testing/test_issues/issues.list | 70 +++++++++---------- 1 file changed, 35 insertions(+), 35 deletions(-) diff --git a/tests/codeface-data/results/testing/test_issues/issues.list b/tests/codeface-data/results/testing/test_issues/issues.list index 15ce1e0e..898bec9a 100644 --- a/tests/codeface-data/results/testing/test_issues/issues.list +++ b/tests/codeface-data/results/testing/test_issues/issues.list @@ -1,36 +1,36 @@ -2;"CLOSED";"2013-04-21 23:52:09";"2013-05-25 20:02:08";"true";"Karl";"karl@example.org";"2013-04-21 23:52:09";;"created" -2;"CLOSED";"2013-04-21 23:52:09";"2013-05-25 20:02:08";"true";"Karl";"karl@example.org";"2013-05-05 23:28:57";;"commented" -2;"CLOSED";"2013-04-21 23:52:09";"2013-05-25 20:02:08";"true";"Karl";"karl@example.org";"2013-05-05 23:28:57";"Olaf";"referenced" -2;"CLOSED";"2013-04-21 23:52:09";"2013-05-25 20:02:08";"true";"Olaf";"olaf@example.org";"2013-05-25 20:02:08";;"merged" -2;"CLOSED";"2013-04-21 23:52:09";"2013-05-25 20:02:08";"true";"Olaf";"olaf@example.org";"2013-05-25 20:02:08";;"closed" -2;"CLOSED";"2013-04-21 23:52:09";"2013-05-25 20:02:08";"true";"Karl";"karl@example.org";"2013-06-01 22:37:03";;"head_ref_deleted" -2;"CLOSED";"2013-04-21 23:52:09";"2014-05-25 20:02:08";"true";"Thomas";"thomas@example.org";"2016-07-19 10:47:25";;"referenced" -48;"OPEN";"2016-04-17 02:06:38";;"false";"udo";"udo@example.org";"2016-04-17 02:07:37";;"mentioned" -48;"OPEN";"2016-04-17 02:06:38";;"false";"udo";"udo@example.org";"2016-04-17 02:07:37";;"subscribed" +2;"CLOSED";"2013-04-21 23:52:09";"2013-05-25 20:02:08";"true";"Karl";"karl@example.org";"2013-04-21 23:52:09";"";"created" +2;"CLOSED";"2013-04-21 23:52:09";"2013-05-25 20:02:08";"true";"Karl";"karl@example.org";"2013-05-05 23:28:57";"";"commented" +2;"CLOSED";"2013-04-21 23:52:09";"2013-05-25 20:02:08";"true";"Karl";"karl@example.org";"2013-05-05 23:28:57";"";"referenced" +2;"CLOSED";"2013-04-21 23:52:09";"2013-05-25 20:02:08";"true";"Olaf";"olaf@example.org";"2013-05-25 20:02:08";"";"merged" +2;"CLOSED";"2013-04-21 23:52:09";"2013-05-25 20:02:08";"true";"Olaf";"olaf@example.org";"2013-05-25 20:02:08";"";"closed" +2;"CLOSED";"2013-04-21 23:52:09";"2013-05-25 20:02:08";"true";"Karl";"karl@example.org";"2013-06-01 22:37:03";"";"head_ref_deleted" +2;"CLOSED";"2013-04-21 23:52:09";"2014-05-25 20:02:08";"true";"Thomas";"thomas@example.org";"2016-07-19 10:47:25";"";"referenced" +48;"OPEN";"2016-04-17 02:06:38";;"false";"udo";"udo@example.org";"2016-04-17 02:07:37";"Karl";"mentioned" +48;"OPEN";"2016-04-17 02:06:38";;"false";"udo";"udo@example.org";"2016-04-17 02:07:37";"Karl";"subscribed" 48;"OPEN";"2016-04-17 02:06:38";;"false";"Thomas";"thomas@example.org";"2016-07-14 02:03:14";;"commented" -48;"OPEN";"2016-04-17 02:06:38";;"false";"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-07-14 17:42:52";;"commented" -48;"OPEN";"2016-04-17 02:06:38";;"false";"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-07-15 08:37:57";;"mentioned" -48;"OPEN";"2016-04-17 02:06:38";;"false";"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-07-15 08:37:57";;"subscribed" -48;"OPEN";"2016-04-17 02:06:38";;"false";"Thomas";"thomas@example.org";"2016-07-15 08:37:57";;"commented" -48;"OPEN";"2016-04-17 02:06:38";;"false";"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-07-27 22:25:25";;"mentioned" -48;"OPEN";"2016-04-17 02:06:38";;"false";"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-07-27 22:25:25";;"subscribed" -48;"OPEN";"2016-04-17 02:06:38";;"false";"Olaf";"olaf@example.org";"2016-07-27 22:25:25";;"commented" -51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";"Thomas";"thomas@example.org";"2016-07-12 15:59:25";;"mentioned" -51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";"Thomas";"thomas@example.org";"2016-07-12 15:59:25";;"subscribed" -51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-07-12 15:59:25";;"created" -51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-07-12 16:03:23";;"renamed" -51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-07-12 16:05:47";;"commented" -51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-08-31 18:21:48";;"commented" -51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";"Olaf";"olaf@example.org";"2016-10-05 01:07:46";;"commented" -51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-10-13 15:33:56";;"commented" -51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-12-06 14:03:42";;"commented" -51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";"Olaf";"olaf@example.org";"2016-12-07 15:37:02";;"merged" -51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";"Olaf";"olaf@example.org";"2016-12-07 15:37:02";;"closed" -51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";"Olaf";"olaf@example.org";"2016-12-07 15:37:21";;"commented" -51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-12-07 15:53:02";;"commented" -57;"CLOSED";"2016-12-07 15:53:02";"2017-05-23 12:32:21";"true";"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-12-07 15:53:02";;"created" -57;"CLOSED";"2016-12-07 15:53:02";"2017-05-23 12:32:21";"true";"Claus Hunsen";"hunsen@fim.uni-passau.de";"2017-02-20 22:25:41";;"commented" -57;"CLOSED";"2016-12-07 15:53:02";"2017-05-23 12:32:21";"true";"Claus Hunsen";"hunsen@fim.uni-passau.de";"2017-03-02 17:30:10";;"commented" -57;"CLOSED";"2016-12-07 15:53:02";"2017-05-23 12:32:21";"true";"Max";"max@example.org";"2017-05-23 12:32:21";;"merged" -57;"CLOSED";"2016-12-07 15:53:02";"2017-05-23 12:32:21";"true";"Max";"max@example.org";"2017-05-23 12:32:21";;"closed" -57;"CLOSED";"2016-12-07 15:53:02";"2017-05-23 12:32:21";"true";"Max";"max@example.org";"2017-05-23 12:32:39";;"commented" +48;"OPEN";"2016-04-17 02:06:38";;"false";"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-07-14 17:42:52";"";"commented" +48;"OPEN";"2016-04-17 02:06:38";;"false";"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-07-15 08:37:57";"Thomas";"mentioned" +48;"OPEN";"2016-04-17 02:06:38";;"false";"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-07-15 08:37:57";"Thomas";"subscribed" +48;"OPEN";"2016-04-17 02:06:38";;"false";"Thomas";"thomas@example.org";"2016-07-15 08:37:57";"";"commented" +48;"OPEN";"2016-04-17 02:06:38";;"false";"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-07-27 22:25:25";"udo";"mentioned" +48;"OPEN";"2016-04-17 02:06:38";;"false";"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-07-27 22:25:25";"udo";"subscribed" +48;"OPEN";"2016-04-17 02:06:38";;"false";"Olaf";"olaf@example.org";"2016-07-27 22:25:25";"";"commented" +51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";"Thomas";"thomas@example.org";"2016-07-12 15:59:25";"Claus Hunsen";"mentioned" +51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";"Thomas";"thomas@example.org";"2016-07-12 15:59:25";"Claus Hunsen";"subscribed" +51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-07-12 15:59:25";"";"created" +51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-07-12 16:03:23";"";"renamed" +51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-07-12 16:05:47";"";"commented" +51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-08-31 18:21:48";"";"commented" +51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";"Olaf";"olaf@example.org";"2016-10-05 01:07:46";"";"commented" +51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-10-13 15:33:56";"";"commented" +51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-12-06 14:03:42";"";"commented" +51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";"Olaf";"olaf@example.org";"2016-12-07 15:37:02";"";"merged" +51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";"Olaf";"olaf@example.org";"2016-12-07 15:37:02";"";"closed" +51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";"Olaf";"olaf@example.org";"2016-12-07 15:37:21";"";"commented" +51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-12-07 15:53:02";"";"commented" +57;"CLOSED";"2016-12-07 15:53:02";"2017-05-23 12:32:21";"true";"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-12-07 15:53:02";"";"created" +57;"CLOSED";"2016-12-07 15:53:02";"2017-05-23 12:32:21";"true";"Claus Hunsen";"hunsen@fim.uni-passau.de";"2017-02-20 22:25:41";"";"commented" +57;"CLOSED";"2016-12-07 15:53:02";"2017-05-23 12:32:21";"true";"Claus Hunsen";"hunsen@fim.uni-passau.de";"2017-03-02 17:30:10";"";"commented" +57;"CLOSED";"2016-12-07 15:53:02";"2017-05-23 12:32:21";"true";"Max";"max@example.org";"2017-05-23 12:32:21";"";"merged" +57;"CLOSED";"2016-12-07 15:53:02";"2017-05-23 12:32:21";"true";"Max";"max@example.org";"2017-05-23 12:32:21";"";"closed" +57;"CLOSED";"2016-12-07 15:53:02";"2017-05-23 12:32:21";"true";"Max";"max@example.org";"2017-05-23 12:32:39";"";"commented" From c8de29596686593f6b5bda4c9f2fd801209a548c Mon Sep 17 00:00:00 2001 From: Roger1995 Date: Sat, 2 Dec 2017 21:33:39 +0100 Subject: [PATCH 22/40] Change issue reading test according to issues.list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Raphael Nömmer --- tests/test-read.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test-read.R b/tests/test-read.R index 3419f54d..4decebd1 100644 --- a/tests/test-read.R +++ b/tests/test-read.R @@ -184,7 +184,7 @@ test_that("Read and parse the issue data.", { author.name=c("Karl","Karl","Karl","Olaf","Olaf","Karl","udo","udo","Thomas","Thomas","Claus Hunsen","Claus Hunsen","Claus Hunsen","Thomas","Claus Hunsen","Claus Hunsen","Claus Hunsen","Thomas","Thomas","Claus Hunsen","Claus Hunsen","Olaf","Claus Hunsen","Olaf","Claus Hunsen","Claus Hunsen","Olaf","Olaf","Olaf","Claus Hunsen","Claus Hunsen","Claus Hunsen","Claus Hunsen","Max","Max","Max"), author.email=c("karl@example.org","karl@example.org","karl@example.org","olaf@example.org","olaf@example.org","karl@example.org","udo@example.org","udo@example.org","thomas@example.org","thomas@example.org","hunsen@fim.uni-passau.de","hunsen@fim.uni-passau.de","hunsen@fim.uni-passau.de","thomas@example.org","hunsen@fim.uni-passau.de","hunsen@fim.uni-passau.de","hunsen@fim.uni-passau.de","thomas@example.org","thomas@example.org","hunsen@fim.uni-passau.de","hunsen@fim.uni-passau.de","olaf@example.org","hunsen@fim.uni-passau.de","olaf@example.org","hunsen@fim.uni-passau.de","hunsen@fim.uni-passau.de","olaf@example.org","olaf@example.org","olaf@example.org","hunsen@fim.uni-passau.de","hunsen@fim.uni-passau.de","hunsen@fim.uni-passau.de","hunsen@fim.uni-passau.de","max@example.org","max@example.org","max@example.org"), date=as.POSIXct(c("2013-04-21 23:52:09","2013-05-05 23:28:57","2013-05-05 23:28:57","2013-05-25 20:02:08","2013-05-25 20:02:08","2013-06-01 22:37:03","2016-04-17 02:07:37","2016-04-17 02:07:37","2016-07-12 15:59:25","2016-07-12 15:59:25","2016-07-12 15:59:25","2016-07-12 16:03:23","2016-07-12 16:05:47","2016-07-14 02:03:14","2016-07-14 17:42:52","2016-07-15 08:37:57","2016-07-15 08:37:57","2016-07-15 08:37:57","2016-07-19 10:47:25","2016-07-27 22:25:25","2016-07-27 22:25:25","2016-07-27 22:25:25","2016-08-31 18:21:48","2016-10-05 01:07:46","2016-10-13 15:33:56","2016-12-06 14:03:42","2016-12-07 15:37:02","2016-12-07 15:37:02","2016-12-07 15:37:21","2016-12-07 15:53:02","2016-12-07 15:53:02","2017-02-20 22:25:41","2017-03-02 17:30:10","2017-05-23 12:32:21","2017-05-23 12:32:21","2017-05-23 12:32:39")), - ref.name=c(rep("", 2), "Olaf", rep("", 33)), + ref.name=c(rep("", 7), rep("Karl", 2), rep("", 2), rep("Thomas", 2), "", rep("udo", 2), "", rep("Claus Hunsen", 2), rep("", 17)), event.name=c("created","commented","referenced","merged","closed","head_ref_deleted","mentioned","subscribed","mentioned","subscribed","created","renamed","commented","commented","commented","mentioned","subscribed","commented","referenced","mentioned","subscribed","commented","commented","commented","commented","commented","merged","closed","commented","commented","created","commented","commented","merged","closed","commented")) ## calculate event IDs issue.data.expected[["event.id"]] = sapply( From 9672a708fba12df2b478495cc3758611a9c3a0a2 Mon Sep 17 00:00:00 2001 From: Raphael Date: Tue, 5 Dec 2017 16:26:42 +0100 Subject: [PATCH 23/40] Rename metrics, add parameter check in metrics MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Raphael Nömmer --- util-init.R | 1 + metrics.R => util-metrics.R | 7 +++++-- 2 files changed, 6 insertions(+), 2 deletions(-) rename metrics.R => util-metrics.R (96%) diff --git a/util-init.R b/util-init.R index dcd270df..21aa535a 100644 --- a/util-init.R +++ b/util-init.R @@ -23,3 +23,4 @@ source("util-motifs.R") source("util-bulk.R") source("util-plot.R") source("util-core-peripheral.R") +source("util-metrics.R") diff --git a/metrics.R b/util-metrics.R similarity index 96% rename from metrics.R rename to util-metrics.R index c50c20d3..00893a1f 100644 --- a/metrics.R +++ b/util-metrics.R @@ -19,7 +19,8 @@ requireNamespace("igraph") #' @param mode The mode to be used for determining the degrees. #' #' @return A dataframe containing the name of the vertex with with maximum degree its degree. -metrics.hub.degree = function(network, modec = c("total", "in", "out")){ +metrics.hub.degree = function(network, mode = c("total", "in", "out")){ + match.arg(mode) degrees = igraph::degree(network, mode = c(mode)) vertex = which.max(degrees) df = data.frame("name" = names(vertex), "degree" = unname(degrees[vertex])) @@ -33,6 +34,7 @@ metrics.hub.degree = function(network, modec = c("total", "in", "out")){ #' #' @return The average degree of the nodes in the network. metrics.avg.degree = function(network, mode = c("total", "in", "out")) { + match.arg(mode) degrees = igraph::degree(network, mode = c(mode)) avg = mean(degrees) return(avg) @@ -84,6 +86,7 @@ metrics.avg.pathlength = function(network, directed, unconnected) { #' #' @return The clustering coefficient of the network. metrics.clustering.coeff = function(network, cc.type = c("global", "local", "barrat", "localaverage")) { + match.arg(cc.type) cc = igraph::transitivity(network, type = cc.type, vids = NULL) return(cc) } @@ -173,6 +176,6 @@ metrics.hierarchy = function(network) { cluster.coeff = igraph::transitivity(network, type = "local", vids = NULL) degrees.without.cluster.coeff = subset(degrees, !(is.nan(cluster.coeff) | cluster.coeff == 0)) cluster.coeff = subset(cluster.coeff, !(is.nan(cluster.coeff) | cluster.coeff == 0)) - return(data.frame(log.deg = log(degrees.without.without.cluster.coeff), log.cc = log(cluster.coeff))) + return(data.frame(log.deg = log(degrees.without.cluster.coeff), log.cc = log(cluster.coeff))) } From b433115f7d70ddb776eac33472dcdc09aaf49461 Mon Sep 17 00:00:00 2001 From: Raphael Date: Tue, 5 Dec 2017 19:25:54 +0100 Subject: [PATCH 24/40] Rename util-metrics.R to util-network-metrics.R MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Raphael Nömmer --- util-metrics.R => util-network-metrics.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename util-metrics.R => util-network-metrics.R (98%) diff --git a/util-metrics.R b/util-network-metrics.R similarity index 98% rename from util-metrics.R rename to util-network-metrics.R index 00893a1f..9e0affc8 100644 --- a/util-metrics.R +++ b/util-network-metrics.R @@ -150,7 +150,7 @@ metrics.smallworldness = function(network) { #' #' @return A dataframe containing the different values, connected to scale-freeness. metrics.scale.freeness = function(network) { - v.degree <- sort(igraph::degree(network, mode="all"), decreasing=TRUE) + v.degree <- sort(igraph::degree(network, mode="total"), decreasing=TRUE) ## Power-law fiting ## (from Mitchell Joblin , Siemens AG, 2012, 2013) From 187d220ebc28c71879d9181fa2077467fc6a3302 Mon Sep 17 00:00:00 2001 From: Raphael Date: Tue, 5 Dec 2017 19:29:41 +0100 Subject: [PATCH 25/40] Adjust documentation of metrics to follow guidelines MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Raphael Nömmer --- util-network-metrics.R | 43 +++++++++++++++++++++--------------------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/util-network-metrics.R b/util-network-metrics.R index 9e0affc8..55331fe0 100644 --- a/util-network-metrics.R +++ b/util-network-metrics.R @@ -15,8 +15,8 @@ requireNamespace("igraph") #' Determine the maximum degree for the given network. #' -#' @param network The network to be examined -#' @param mode The mode to be used for determining the degrees. +#' @param network the network to be examined +#' @param mode the mode to be used for determining the degrees #' #' @return A dataframe containing the name of the vertex with with maximum degree its degree. metrics.hub.degree = function(network, mode = c("total", "in", "out")){ @@ -29,8 +29,8 @@ metrics.hub.degree = function(network, mode = c("total", "in", "out")){ #' Calculate the average degree of a network. #' -#' @param network The network to be examined -#' @param mode The mode to be used for determining the degrees. +#' @param network the network to be examined +#' @param mode the mode to be used for determining the degrees #' #' @return The average degree of the nodes in the network. metrics.avg.degree = function(network, mode = c("total", "in", "out")) { @@ -42,10 +42,10 @@ metrics.avg.degree = function(network, mode = c("total", "in", "out")) { #' Calculate all node degrees for the given network #' -#' @param network The network to be examined -#' @param sort Whether the resulting dataframe is to be sorted by the node degree -#' @param sort.decreasing If sorting is active, this says whether the dataframe is to be sorted -#' in descending or ascending order. +#' @param network the network to be examined +#' @param sort whether the resulting dataframe is to be sorted by the node degree +#' @param sort.decreasing if sorting is active, this says whether the dataframe is to be sorted +#' in descending or ascending order #' #' @return A dataframe containing the nodes and their respective degrees. metrics.node.degrees = function(network, sort = TRUE, sort.decreasing = TRUE) { @@ -59,7 +59,7 @@ metrics.node.degrees = function(network, sort = TRUE, sort.decreasing = TRUE) { #' Calculate the density of the given network. #' -#' @param network The network to be examined. +#' @param network the network to be examined #' #' @return The density of the network. metrics.density = function(network) { @@ -69,9 +69,9 @@ metrics.density = function(network) { #' Calculate the average path length for the given network. #' -#' @param network The network to be examined. -#' @param directed Wehther the given network is directed or undirected. -#' @param unconnected Whether all nodes of the network are connected. +#' @param network the network to be examined +#' @param directed wehther the given network is directed or undirected +#' @param unconnected whether all nodes of the network are connected #' #' @return The average pathlength of the given network. metrics.avg.pathlength = function(network, directed, unconnected) { @@ -81,8 +81,8 @@ metrics.avg.pathlength = function(network, directed, unconnected) { #' Calculate the average local clustering coefficient for the given network. #' -#' @param network The network to be examined. -#' @param cc.type The type of cluserting coefficient to be calculated. +#' @param network the network to be examined +#' @param cc.type the type of cluserting coefficient to be calculated #' #' @return The clustering coefficient of the network. metrics.clustering.coeff = function(network, cc.type = c("global", "local", "barrat", "localaverage")) { @@ -93,9 +93,9 @@ metrics.clustering.coeff = function(network, cc.type = c("global", "local", "bar #' Calculate the modularity metric for the given network. #' -#' @param network The network to be examined -#' @param community.detection.algorithm The algorithm to be used for the detection of communities which -#' is required for the calculation of the clustering coefficient. +#' @param network the network to be examined +#' @param community.detection.algorithm the algorithm to be used for the detection of communities which +#' is required for the calculation of the clustering coefficient #' #' @return The modularity value for the given network. metrics.modularity = function(network, community.detection.algorithm = igraph::cluster_walktrap) { @@ -115,7 +115,7 @@ metrics.modularity = function(network, community.detection.algorithm = igraph::c #' The algorithm relies on the Erdös-Renyi random network with the same number #' of nodes and edges as the given network. #' -#' @param network The network to be examined. This network needs to be simplified for the calculation to work. +#' @param network the network to be examined. This network needs to be simplified for the calculation to work #' #' @return The smallworldness value of the network. metrics.smallworldness = function(network) { @@ -146,7 +146,7 @@ metrics.smallworldness = function(network) { #' Determine scale freeness of a network using the power law fitting method. #' -#' @param network The network to be examined +#' @param network the network to be examined #' #' @return A dataframe containing the different values, connected to scale-freeness. metrics.scale.freeness = function(network) { @@ -168,9 +168,10 @@ metrics.scale.freeness = function(network) { #' Calculate the hierarchy for a network. #' -#' @param network The network to be examined +#' @param network the network to be examined #' -#' @return A dataframe containing the logarithm of the node degree and the logarithm of the local clustering coefficient for each node. +#' @return A dataframe containing the logarithm of the node degree and the logarithm +#' of the local clustering coefficient for each node. metrics.hierarchy = function(network) { degrees = igraph::degree(network, mode="total") cluster.coeff = igraph::transitivity(network, type = "local", vids = NULL) From ddb93be9717d8349b45b794e98f5b5f015f8ae2f Mon Sep 17 00:00:00 2001 From: Raphael Date: Tue, 5 Dec 2017 23:33:39 +0100 Subject: [PATCH 26/40] Rename metrics file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Raphael Nömmer --- util-network-metrics.R => util-networks-metrics.R | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename util-network-metrics.R => util-networks-metrics.R (100%) diff --git a/util-network-metrics.R b/util-networks-metrics.R similarity index 100% rename from util-network-metrics.R rename to util-networks-metrics.R From d4b53df20e6d2d89a0004810716cd749e7d57967 Mon Sep 17 00:00:00 2001 From: Raphael Date: Wed, 6 Dec 2017 23:41:17 +0100 Subject: [PATCH 27/40] Update filename in util-init.R MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Raphael Nömmer --- util-init.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/util-init.R b/util-init.R index 21aa535a..0ab03439 100644 --- a/util-init.R +++ b/util-init.R @@ -23,4 +23,4 @@ source("util-motifs.R") source("util-bulk.R") source("util-plot.R") source("util-core-peripheral.R") -source("util-metrics.R") +source("util-networks-metrics.R") From 52dd2e314728ca32ebad2ae856b7b3b3dc290837 Mon Sep 17 00:00:00 2001 From: Claus Hunsen Date: Mon, 11 Dec 2017 14:41:50 +0100 Subject: [PATCH 28/40] Fix indentation to avoid merge conflict To avoid a merge conflict in PR #78, we fix the indentation of a statement in the file 'util-read.R'. Signed-off-by: Claus Hunsen --- util-read.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/util-read.R b/util-read.R index 59032a41..0a1db07b 100644 --- a/util-read.R +++ b/util-read.R @@ -328,7 +328,7 @@ read.issues = function(data.path) { ## read issues from disk [can be empty] issue.data = try(read.table(filepath, header = FALSE, sep = ";", strip.white = TRUE, - encoding = "UTF-8"), silent = TRUE) + encoding = "UTF-8"), silent = TRUE) ## handle the case that the list of commits is empty if (inherits(issue.data, 'try-error')) { From 7bfbe8403d6fdfb76d8856f3c2885028958ff12d Mon Sep 17 00:00:00 2001 From: Claus Hunsen Date: Mon, 11 Dec 2017 16:24:02 +0100 Subject: [PATCH 29/40] Always add e-mail addresses in author data We now always have e-mail-address data available for authors, independent of the real data source containing those. If there is no data available, we add NAs. This is a follow-up for issue #69 and PR #71. Signed-off-by: Claus Hunsen --- util-read.R | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/util-read.R b/util-read.R index b67099ec..7578eb44 100644 --- a/util-read.R +++ b/util-read.R @@ -229,15 +229,15 @@ read.authors = function(data.path) { stop("Stopped due to missing authors.") } + ## if there is no third column, we need to add e-mail-address dummy data (NAs) + if (ncol(authors.df) != 3) { + authors.df[3] = NA + } + ## set proper column names based on Codeface extraction: ## ## SELECT a.name AS authorName, a.email1, m.creationDate, m.subject, m.threadId - cols.names = c("author.id", "author.name") - ## if there is a third column, we have e-mail-address data available - if (ncol(authors.df) == 3) { - cols.names = c(cols.names, "author.email") - } - colnames(authors.df) = cols.names + colnames(authors.df) = c("author.id", "author.name", "author.email") ## store the ID--author mapping logging::logdebug("read.authors: finished.") From a803425e6bdb54c1654fb9de1f9375499e3aa829 Mon Sep 17 00:00:00 2001 From: Claus Hunsen Date: Tue, 12 Dec 2017 09:57:51 +0100 Subject: [PATCH 30/40] Change 'commits.raw' to 'commits' For easier internal use of data-source names (i.e., "commits", "issues", and "mails"), the data item containing the commit data is now called "commits" -- and not "commits.raw" anymore. All corresponding methods and method calls are renamed accordingly. This change will make it easier to handle data sources by their specific name, e.g., when performing a parameterizable subset 'proj.data[[data.source.name]]'. Note: The methods 'ProjectData$get.commits.raw()' and 'ProjectData$set.commits.raw()' are still there for compatibility reasons. They are now mere delegates to the new methods. Signed-off-by: Claus Hunsen --- test.R | 4 +- tests/test-data-cut.R | 2 +- tests/test-networks-cut.R | 2 +- tests/test-read.R | 2 +- tests/test-split.R | 106 +++++++++++++++++++------------------- util-core-peripheral.R | 2 +- util-data.R | 61 ++++++++++++++++------ util-networks.R | 2 +- util-read.R | 16 ++++-- util-split.R | 8 +-- 10 files changed, 121 insertions(+), 84 deletions(-) diff --git a/test.R b/test.R index 19ff793c..ea455e38 100644 --- a/test.R +++ b/test.R @@ -63,7 +63,7 @@ x = NetworkBuilder$new(project.data = x.data, network.conf = net.conf) ## * Data retrieval -------------------------------------------------------- -# x.data$get.commits.raw() +# x.data$get.commits() # x.data$get.synchronicity() # x.data$get.author2artifact() # x.data$get.commits.filtered() @@ -107,7 +107,7 @@ y = NetworkBuilder$new(project.data = y.data, network.conf = net.conf) ## * Data retrieval -------------------------------------------------------- -# y.data$get.commits.raw() +# y.data$get.commits() # y.data$get.synchronicity() # y.data$get.author2artifact() # y.data$get.commits.filtered() diff --git a/tests/test-data-cut.R b/tests/test-data-cut.R index 0cf8420c..3b3f461d 100644 --- a/tests/test-data-cut.R +++ b/tests/test-data-cut.R @@ -52,7 +52,7 @@ test_that("Cut commit and mail data to same date range.", { subject=c("Re: Fw: busybox 2 tab"), thread=sprintf("", c(9))) - commit.data = x.data$get.data.cut.to.same.date(data.sources = data.sources)$get.commits.raw() + commit.data = x.data$get.data.cut.to.same.date(data.sources = data.sources)$get.commits() rownames(commit.data) = 1:nrow(commit.data) mail.data = x.data$get.data.cut.to.same.date(data.sources = data.sources)$get.mails() diff --git a/tests/test-networks-cut.R b/tests/test-networks-cut.R index c7c5e4e3..9d7985e1 100644 --- a/tests/test-networks-cut.R +++ b/tests/test-networks-cut.R @@ -54,7 +54,7 @@ test_that("Cut commit and mail data to same date range.", { subject=c("Re: Fw: busybox 2 tab"), thread=sprintf("", c(9))) - commit.data = x$get.project.data()$get.commits.raw() + commit.data = x$get.project.data()$get.commits() rownames(commit.data) = 1:nrow(commit.data) mail.data = x$get.project.data()$get.mails() diff --git a/tests/test-read.R b/tests/test-read.R index 0fe3001c..6363c571 100644 --- a/tests/test-read.R +++ b/tests/test-read.R @@ -22,7 +22,7 @@ test_that("Read the raw commit data.", { proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, ARTIFACT) ## read the actual data - commit.data.read = read.commits.raw(proj.conf$get.value("datapath"), proj.conf$get.value("artifact")) + commit.data.read = read.commits(proj.conf$get.value("datapath"), proj.conf$get.value("artifact")) ## build the expected data.frame commit.data.expected = data.frame(commit.id=sprintf("", c(32712,32712,32713,32713,32710,32710,32714,32711,32711)), diff --git a/tests/test-split.R b/tests/test-split.R index 9f34aa0e..2f2ac965 100644 --- a/tests/test-split.R +++ b/tests/test-split.R @@ -46,7 +46,7 @@ test_that("Split a data object time-based (split.basis == 'commits').", { ## data object project.data = ProjectData$new(proj.conf) data = list( - commits.raw = project.data$get.commits.raw(), + commits = project.data$get.commits(), mails = project.data$get.mails(), issues = project.data$get.issues(), synchronicity = project.data$get.synchronicity(), @@ -68,10 +68,10 @@ test_that("Split a data object time-based (split.basis == 'commits').", { ## check data for all ranges expected.data = list( - commits.raw = list( - "2016-07-12 15:58:59-2016-07-12 16:01:59" = data$commits.raw[1:4, ], + commits = list( + "2016-07-12 15:58:59-2016-07-12 16:01:59" = data$commits[1:4, ], "2016-07-12 16:01:59-2016-07-12 16:04:59" = data.frame(), - "2016-07-12 16:04:59-2016-07-12 16:06:33" = data$commits.raw[5:9, ] + "2016-07-12 16:04:59-2016-07-12 16:06:33" = data$commits[5:9, ] ), mails = list( "2016-07-12 15:58:59-2016-07-12 16:01:59" = data.frame(), @@ -95,7 +95,7 @@ test_that("Split a data object time-based (split.basis == 'commits').", { ) ) results.data = list( - commits.raw = lapply(results, function(cf.data) cf.data$get.commits.raw()), + commits = lapply(results, function(cf.data) cf.data$get.commits()), mails = lapply(results, function(cf.data) cf.data$get.mails()), issues = lapply(results, function(cf.data) cf.data$get.issues()), synchronicity = lapply(results, function(cf.data) cf.data$get.synchronicity()), @@ -119,7 +119,7 @@ test_that("Split a data object time-based (split.basis == 'mails').", { ## data object project.data = ProjectData$new(proj.conf) data = list( - commits.raw = project.data$get.commits.raw(), + commits = project.data$get.commits(), mails = project.data$get.mails(), issues = project.data$get.issues(), synchronicity = project.data$get.synchronicity(), @@ -142,11 +142,11 @@ test_that("Split a data object time-based (split.basis == 'mails').", { ## check data for all ranges expected.data = list( - commits.raw = list( + commits = list( "2004-10-09 18:38:13-2007-10-09 18:38:13" = data.frame(), "2007-10-09 18:38:13-2010-10-09 18:38:13" = data.frame(), "2010-10-09 18:38:13-2013-10-09 18:38:13" = data.frame(), - "2013-10-09 18:38:13-2016-07-12 16:05:38" = data$commits.raw[1:4, ] + "2013-10-09 18:38:13-2016-07-12 16:05:38" = data$commits[1:4, ] ), mails = list( "2004-10-09 18:38:13-2007-10-09 18:38:13" = data$mails[rownames(data$mails) %in% 1:2, ], @@ -174,7 +174,7 @@ test_that("Split a data object time-based (split.basis == 'mails').", { ) ) results.data = list( - commits.raw = lapply(results, function(cf.data) cf.data$get.commits.raw()), + commits = lapply(results, function(cf.data) cf.data$get.commits()), mails = lapply(results, function(cf.data) cf.data$get.mails()), issues = lapply(results, function(cf.data) cf.data$get.issues()), synchronicity = lapply(results, function(cf.data) cf.data$get.synchronicity()), @@ -198,7 +198,7 @@ test_that("Split a data object time-based (split.basis == 'issues').", { ## data object project.data = ProjectData$new(proj.conf) data = list( - commits.raw = project.data$get.commits.raw(), + commits = project.data$get.commits(), mails = project.data$get.mails(), issues = project.data$get.issues(), synchronicity = project.data$get.synchronicity(), @@ -220,9 +220,9 @@ test_that("Split a data object time-based (split.basis == 'issues').", { ## check data for all ranges expected.data = list( - commits.raw = list( + commits = list( "2013-04-21 23:52:09-2015-04-21 23:52:09" = data.frame(), - "2015-04-21 23:52:09-2017-04-21 23:52:09" = data$commits.raw, + "2015-04-21 23:52:09-2017-04-21 23:52:09" = data$commits, "2017-04-21 23:52:09-2017-05-23 12:32:40" = data.frame() ), mails = list( @@ -247,7 +247,7 @@ test_that("Split a data object time-based (split.basis == 'issues').", { ) ) results.data = list( - commits.raw = lapply(results, function(cf.data) cf.data$get.commits.raw()), + commits = lapply(results, function(cf.data) cf.data$get.commits()), mails = lapply(results, function(cf.data) cf.data$get.mails()), issues = lapply(results, function(cf.data) cf.data$get.issues()), synchronicity = lapply(results, function(cf.data) cf.data$get.synchronicity()), @@ -271,7 +271,7 @@ test_that("Split a data object time-based (bins == ... ).", { ## data object project.data = ProjectData$new(proj.conf) data = list( - commits.raw = project.data$get.commits.raw(), + commits = project.data$get.commits(), mails = project.data$get.mails(), issues = project.data$get.issues(), synchronicity = project.data$get.synchronicity(), @@ -291,8 +291,8 @@ test_that("Split a data object time-based (bins == ... ).", { ## check data for all ranges expected.data = list( - commits.raw = list( - "2016-01-01 00:00:00-2016-12-31 23:59:59" = data$commits.raw + commits = list( + "2016-01-01 00:00:00-2016-12-31 23:59:59" = data$commits ), mails = list( "2016-01-01 00:00:00-2016-12-31 23:59:59" = data$mails[rownames(data$mails) %in% 13:17, ] @@ -308,7 +308,7 @@ test_that("Split a data object time-based (bins == ... ).", { ) ) results.data = list( - commits.raw = lapply(results, function(cf.data) cf.data$get.commits.raw()), + commits = lapply(results, function(cf.data) cf.data$get.commits()), mails = lapply(results, function(cf.data) cf.data$get.mails()), issues = lapply(results, function(cf.data) cf.data$get.issues()), synchronicity = lapply(results, function(cf.data) cf.data$get.synchronicity()), @@ -332,7 +332,7 @@ test_that("Split a data object activity-based (activity.type = 'commits').", { ## data object project.data = ProjectData$new(proj.conf) data = list( - commits.raw = project.data$get.commits.raw(), + commits = project.data$get.commits(), mails = project.data$get.mails(), issues = project.data$get.issues(), synchronicity = project.data$get.synchronicity(), @@ -354,10 +354,10 @@ test_that("Split a data object activity-based (activity.type = 'commits').", { ## check data for all ranges expected.data = list( - commits.raw = list( - "2016-07-12 15:58:59-2016-07-12 16:05:41" = data$commits.raw[1:4, ], - "2016-07-12 16:05:41-2016-07-12 16:06:32" = data$commits.raw[5:7, ], - "2016-07-12 16:06:32-2016-07-12 16:06:33" = data$commits.raw[8:9, ] + commits = list( + "2016-07-12 15:58:59-2016-07-12 16:05:41" = data$commits[1:4, ], + "2016-07-12 16:05:41-2016-07-12 16:06:32" = data$commits[5:7, ], + "2016-07-12 16:06:32-2016-07-12 16:06:33" = data$commits[8:9, ] ), mails = list( "2016-07-12 15:58:59-2016-07-12 16:05:41" = data$mails[rownames(data$mails) %in% 16:17, ], @@ -381,7 +381,7 @@ test_that("Split a data object activity-based (activity.type = 'commits').", { ) ) results.data = list( - commits.raw = lapply(results, function(cf.data) cf.data$get.commits.raw()), + commits = lapply(results, function(cf.data) cf.data$get.commits()), mails = lapply(results, function(cf.data) cf.data$get.mails()), issues = lapply(results, function(cf.data) cf.data$get.issues()), synchronicity = lapply(results, function(cf.data) cf.data$get.synchronicity()), @@ -394,7 +394,7 @@ test_that("Split a data object activity-based (activity.type = 'commits').", { ## ## split data - results = split.data.activity.based(project.data, activity.amount = nrow(data$commits.raw) + 10, + results = split.data.activity.based(project.data, activity.amount = nrow(data$commits) + 10, activity.type = "commits", sliding.window = FALSE) ## check time ranges @@ -406,8 +406,8 @@ test_that("Split a data object activity-based (activity.type = 'commits').", { ## check data for all ranges expected.data = list( - commits.raw = list( - "2016-07-12 15:58:59-2016-07-12 16:06:33" = data$commits.raw + commits = list( + "2016-07-12 15:58:59-2016-07-12 16:06:33" = data$commits ), mails = list( "2016-07-12 15:58:59-2016-07-12 16:06:33" = data$mails[rownames(data$mails) %in% 16:17, ] @@ -423,7 +423,7 @@ test_that("Split a data object activity-based (activity.type = 'commits').", { ) ) results.data = list( - commits.raw = lapply(results, function(cf.data) cf.data$get.commits.raw()), + commits = lapply(results, function(cf.data) cf.data$get.commits()), mails = lapply(results, function(cf.data) cf.data$get.mails()), issues = lapply(results, function(cf.data) cf.data$get.issues()), synchronicity = lapply(results, function(cf.data) cf.data$get.synchronicity()), @@ -449,9 +449,9 @@ test_that("Split a data object activity-based (activity.type = 'commits').", { ## check data for all ranges expected.data = list( - commits.raw = list( - "2016-07-12 15:58:59-2016-07-12 16:06:10" = data$commits.raw[1:6, ], - "2016-07-12 16:06:10-2016-07-12 16:06:33" = data$commits.raw[7:9, ] + commits = list( + "2016-07-12 15:58:59-2016-07-12 16:06:10" = data$commits[1:6, ], + "2016-07-12 16:06:10-2016-07-12 16:06:33" = data$commits[7:9, ] ), mails = list( "2016-07-12 15:58:59-2016-07-12 16:06:10" = data$mails[rownames(data$mails) %in% 16:17, ], @@ -471,7 +471,7 @@ test_that("Split a data object activity-based (activity.type = 'commits').", { ) ) results.data = list( - commits.raw = lapply(results, function(cf.data) cf.data$get.commits.raw()), + commits = lapply(results, function(cf.data) cf.data$get.commits()), mails = lapply(results, function(cf.data) cf.data$get.mails()), issues = lapply(results, function(cf.data) cf.data$get.issues()), synchronicity = lapply(results, function(cf.data) cf.data$get.synchronicity()), @@ -482,7 +482,7 @@ test_that("Split a data object activity-based (activity.type = 'commits').", { ## too large number of windows expect_error( - split.data.activity.based(project.data, activity.type = "commits", number.windows = nrow(project.data$get.commits.raw()) + 10), + split.data.activity.based(project.data, activity.type = "commits", number.windows = nrow(project.data$get.commits()) + 10), info = "Error expected (number.windows) (1)." ) @@ -507,7 +507,7 @@ test_that("Split a data object activity-based (activity.type = 'mails').", { ## data object project.data = ProjectData$new(proj.conf) data = list( - commits.raw = project.data$get.commits.raw(), + commits = project.data$get.commits(), mails = project.data$get.mails(), issues = project.data$get.issues(), synchronicity = project.data$get.synchronicity(), @@ -532,12 +532,12 @@ test_that("Split a data object activity-based (activity.type = 'mails').", { ## check data for all ranges expected.data = list( - commits.raw = list( + commits = list( "2004-10-09 18:38:13-2010-07-12 11:05:35" = data.frame(), "2010-07-12 11:05:35-2010-07-12 12:05:41" = data.frame(), "2010-07-12 12:05:41-2010-07-12 12:05:44" = data.frame(), "2010-07-12 12:05:44-2016-07-12 15:58:40" = data.frame(), - "2016-07-12 15:58:40-2016-07-12 16:05:37" = data$commits.raw[1:4, ], + "2016-07-12 15:58:40-2016-07-12 16:05:37" = data$commits[1:4, ], "2016-07-12 16:05:37-2016-07-12 16:05:38" = data.frame() ), mails = list( @@ -574,7 +574,7 @@ test_that("Split a data object activity-based (activity.type = 'mails').", { ) ) results.data = list( - commits.raw = lapply(results, function(cf.data) cf.data$get.commits.raw()), + commits = lapply(results, function(cf.data) cf.data$get.commits()), mails = lapply(results, function(cf.data) cf.data$get.mails()), issues = lapply(results, function(cf.data) cf.data$get.issues()), synchronicity = lapply(results, function(cf.data) cf.data$get.synchronicity()), @@ -599,8 +599,8 @@ test_that("Split a data object activity-based (activity.type = 'mails').", { ## check data for all ranges expected.data = list( - commits.raw = list( - "2004-10-09 18:38:13-2016-07-12 16:05:38" = data$commits.raw[1:4, ] + commits = list( + "2004-10-09 18:38:13-2016-07-12 16:05:38" = data$commits[1:4, ] ), mails = list( "2004-10-09 18:38:13-2016-07-12 16:05:38" = data$mails @@ -616,7 +616,7 @@ test_that("Split a data object activity-based (activity.type = 'mails').", { ) ) results.data = list( - commits.raw = lapply(results, function(cf.data) cf.data$get.commits.raw()), + commits = lapply(results, function(cf.data) cf.data$get.commits()), mails = lapply(results, function(cf.data) cf.data$get.mails()), issues = lapply(results, function(cf.data) cf.data$get.issues()), synchronicity = lapply(results, function(cf.data) cf.data$get.synchronicity()), @@ -642,9 +642,9 @@ test_that("Split a data object activity-based (activity.type = 'mails').", { ## check data for all ranges expected.data = list( - commits.raw = list( + commits = list( "2004-10-09 18:38:13-2010-07-12 12:05:43" = data.frame(), - "2010-07-12 12:05:43-2016-07-12 16:05:38" = data$commits.raw[1:4, ] + "2010-07-12 12:05:43-2016-07-12 16:05:38" = data$commits[1:4, ] ), mails = list( "2004-10-09 18:38:13-2010-07-12 12:05:43" = data$mails[rownames(data$mails) %in% 1:8, ], @@ -664,7 +664,7 @@ test_that("Split a data object activity-based (activity.type = 'mails').", { ) ) results.data = list( - commits.raw = lapply(results, function(cf.data) cf.data$get.commits.raw()), + commits = lapply(results, function(cf.data) cf.data$get.commits()), mails = lapply(results, function(cf.data) cf.data$get.mails()), issues = lapply(results, function(cf.data) cf.data$get.issues()), synchronicity = lapply(results, function(cf.data) cf.data$get.synchronicity()), @@ -699,7 +699,7 @@ test_that("Split a data object activity-based (activity.type = 'issues').", { ## data object project.data = ProjectData$new(proj.conf) data = list( - commits.raw = project.data$get.commits.raw(), + commits = project.data$get.commits(), mails = project.data$get.mails(), issues = project.data$get.issues(), synchronicity = project.data$get.synchronicity(), @@ -722,9 +722,9 @@ test_that("Split a data object activity-based (activity.type = 'issues').", { ## check data for all ranges expected.data = list( - commits.raw = list( - "2013-04-21 23:52:09-2016-07-12 16:05:47" = data$commits.raw[1:6, ], - "2016-07-12 16:05:47-2016-08-31 18:21:48" = data$commits.raw[7:9, ], + commits = list( + "2013-04-21 23:52:09-2016-07-12 16:05:47" = data$commits[1:6, ], + "2016-07-12 16:05:47-2016-08-31 18:21:48" = data$commits[7:9, ], "2016-08-31 18:21:48-2017-02-20 22:25:41" = data.frame(), "2017-02-20 22:25:41-2017-05-23 12:32:40" = data.frame() ), @@ -754,7 +754,7 @@ test_that("Split a data object activity-based (activity.type = 'issues').", { ) ) results.data = list( - commits.raw = lapply(results, function(cf.data) cf.data$get.commits.raw()), + commits = lapply(results, function(cf.data) cf.data$get.commits()), mails = lapply(results, function(cf.data) cf.data$get.mails()), issues = lapply(results, function(cf.data) cf.data$get.issues()), synchronicity = lapply(results, function(cf.data) cf.data$get.synchronicity()), @@ -779,8 +779,8 @@ test_that("Split a data object activity-based (activity.type = 'issues').", { ## check data for all ranges expected.data = list( - commits.raw = list( - "2013-04-21 23:52:09-2017-05-23 12:32:40" = data$commits.raw + commits = list( + "2013-04-21 23:52:09-2017-05-23 12:32:40" = data$commits ), mails = list( "2013-04-21 23:52:09-2017-05-23 12:32:40" = data$mails[rownames(data$mails) %in% 14:17, ] @@ -796,7 +796,7 @@ test_that("Split a data object activity-based (activity.type = 'issues').", { ) ) results.data = list( - commits.raw = lapply(results, function(cf.data) cf.data$get.commits.raw()), + commits = lapply(results, function(cf.data) cf.data$get.commits()), mails = lapply(results, function(cf.data) cf.data$get.mails()), issues = lapply(results, function(cf.data) cf.data$get.issues()), synchronicity = lapply(results, function(cf.data) cf.data$get.synchronicity()), @@ -822,8 +822,8 @@ test_that("Split a data object activity-based (activity.type = 'issues').", { ## check data for all ranges expected.data = list( - commits.raw = list( - "2013-04-21 23:52:09-2016-07-27 22:25:25" = data$commits.raw, + commits = list( + "2013-04-21 23:52:09-2016-07-27 22:25:25" = data$commits, "2016-07-27 22:25:25-2017-05-23 12:32:40" = data.frame() ), mails = list( @@ -844,7 +844,7 @@ test_that("Split a data object activity-based (activity.type = 'issues').", { ) ) results.data = list( - commits.raw = lapply(results, function(cf.data) cf.data$get.commits.raw()), + commits = lapply(results, function(cf.data) cf.data$get.commits()), mails = lapply(results, function(cf.data) cf.data$get.mails()), issues = lapply(results, function(cf.data) cf.data$get.issues()), synchronicity = lapply(results, function(cf.data) cf.data$get.synchronicity()), diff --git a/util-core-peripheral.R b/util-core-peripheral.R index 0e14435f..a66b0323 100644 --- a/util-core-peripheral.R +++ b/util-core-peripheral.R @@ -968,7 +968,7 @@ get.commit.data = function(range.data, columns = c("author.name", "author.email" logging::logdebug("get.commit.data: starting.") ## Get commit data - commits.df = range.data$get.commits.raw() + commits.df = range.data$get.commits() ## In case no commit data is available, return NA if(nrow(commits.df) == 0) { diff --git a/util-data.R b/util-data.R index f28f4c57..0d210fef 100644 --- a/util-data.R +++ b/util-data.R @@ -15,9 +15,9 @@ requireNamespace("R6") # for R6 classes requireNamespace("logging") # for logging requireNamespace("parallel") # for parallel computation -## / / / / / / / / / / / / / / -## Constant -## + +## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / +## Constants --------------------------------------------------------------- ## mapping of relation to data source RELATION.TO.DATASOURCE = list( @@ -46,7 +46,7 @@ ProjectData = R6::R6Class("ProjectData", ## commits and commit data commits.filtered = NULL, # data.frame commits.filtered.empty = NULL, #data.frame - commits.raw = NULL, # data.frame + commits = NULL, # data.frame artifacts = NULL, # list synchronicity = NULL, # data.frame pasta = NULL, # data.frame @@ -107,7 +107,7 @@ ProjectData = R6::R6Class("ProjectData", } ## get raw commit data - commit.data = self$get.commits.raw() + commit.data = self$get.commits() ## break if the list of commits is empty if (nrow(commit.data) == 0) { @@ -182,7 +182,7 @@ ProjectData = R6::R6Class("ProjectData", self$get.mails() } if("commits" %in% data.sources) { - self$get.commits.raw() + self$get.commits() } if("issues" %in% data.sources) { self$get.issues() @@ -202,8 +202,8 @@ ProjectData = R6::R6Class("ProjectData", private$data.timestamps$mails = c(min(private$mails$date), max(private$mails$date)) } else if(source == "commits") { - private$data.timestamps$commits = c(min(private$commits.raw$date), - max(private$commits.raw$date)) + private$data.timestamps$commits = c(min(private$commits$date), + max(private$commits$date)) } else if(source == "issues") { private$data.timestamps$issues = c(min(private$issues$date), @@ -245,7 +245,7 @@ ProjectData = R6::R6Class("ProjectData", reset.environment = function() { private$commits.filtered = NULL private$commits.filtered.empty = NULL - private$commits.raw = NULL + private$commits = NULL private$artifacts = NULL private$synchronicity = NULL private$mails = NULL @@ -332,6 +332,9 @@ ProjectData = R6::R6Class("ProjectData", return(data.path) }, + #' Get the absolute path to the result folder for issue data. + #' + #' @return the path to the issue data get.data.path.issues = function() { data.path = private$project.conf$get.value("datapath.issues") return(data.path) @@ -373,28 +376,47 @@ ProjectData = R6::R6Class("ProjectData", #' If it doesn´t already exist call the read method first. #' #' @return the list of commits - get.commits.raw = function() { + get.commits = function() { logging::loginfo("Getting raw commit data.") ## if commits are not read already, do this - if (is.null(private$commits.raw)) { - private$commits.raw = read.commits.raw( + if (is.null(private$commits)) { + private$commits = read.commits( self$get.data.path(), private$project.conf$get.value("artifact") ) } private$extract.timestamps(source = "commits") - return(private$commits.raw) + return(private$commits) + }, + + #' Get the complete list of commits. + #' If it doesn´t already exist call the read method first. + #' + #' Note: This is just a delegate for \code{ProjectData$get.commits()}. + #' + #' @return the list of commits + get.commits.raw = function() { + return(self$get.commits()) }, #' Set the commit list of the project to a new one. #' #' @param data the new list of commits - set.commits.raw = function(data) { + set.commits = function(data) { logging::loginfo("Setting raw commit data.") if (is.null(data)) data = data.frame() - private$commits.raw = data + private$commits = data + }, + + #' Set the commit list of the project to a new one. + #' + #' Note: This is just a delegate for \code{ProjectData$set.commits(data)}. + #' + #' @param data the new list of commits + set.commits.raw = function(data) { + self$set.commits(data) }, #' Get the synchronicity data. @@ -694,7 +716,9 @@ ProjectData = R6::R6Class("ProjectData", return(mylist) }, - + #' Map the corresponding authors to each issue and return the list. + #' + #' @return the list of authors for each issue get.issue2author = function() { logging::loginfo("Getting issue--author data") @@ -703,6 +727,9 @@ ProjectData = R6::R6Class("ProjectData", return(mylist) }, + #' Map the corresponding issues to each author and return the list. + #' + #' @return the list of issues for each author get.author2issue = function() { logging::loginfo("Getting author--issue data") @@ -718,7 +745,7 @@ ProjectData = R6::R6Class("ProjectData", logging::loginfo("Getting author--commit data.") ## store the authors per artifact - mylist = get.key.to.value.from.df(self$get.commits.raw(), "author.name", "hash") + mylist = get.key.to.value.from.df(self$get.commits(), "author.name", "hash") mylist = parallel::mclapply(mylist, unique) return(mylist) diff --git a/util-networks.R b/util-networks.R index c20b3656..2e9cfd60 100644 --- a/util-networks.R +++ b/util-networks.R @@ -582,7 +582,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", ## remove vertices that are not committers if wanted if (private$network.conf$get.value("author.only.committers")) { - committers = unique(private$proj.data$get.commits.raw()[["author.name"]]) + committers = unique(private$proj.data$get.commits()[["author.name"]]) authors = igraph::get.vertex.attribute(u, "name", igraph::V(u)[ type == TYPE.AUTHOR ]) authors.to.remove = setdiff(authors, committers) u = igraph::delete.vertices(u, authors.to.remove) diff --git a/util-read.R b/util-read.R index 7578eb44..50b646b5 100644 --- a/util-read.R +++ b/util-read.R @@ -24,8 +24,8 @@ requireNamespace("digest") # for sha1 hashing of IDs #' @param artifact the artifact whichs commits are read #' #' @return the read commits -read.commits.raw = function(data.path, artifact) { - logging::logdebug("read.commits.raw: starting.") +read.commits = function(data.path, artifact) { + logging::logdebug("read.commits: starting.") file = file.path(data.path, "commits.list") @@ -91,10 +91,20 @@ read.commits.raw = function(data.path, artifact) { commit.data[["commit.id"]] = sprintf("", commit.data[["commit.id"]]) ## store the commit data - logging::logdebug("read.commits.raw: finished.") + logging::logdebug("read.commits: finished.") return(commit.data) } +#' Read the commits from the 'commits.list' file. +#' +#' @param data.path the path to the commit list +#' @param artifact the artifact whichs commits are read +#' +#' @return the read commits +read.commits.raw = function(data.path, artifact) { + return(read.commits(data.path = data.path, artifact = artifact)) +} + ## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / ## Synchronicity data ------------------------------------------------------ diff --git a/util-split.R b/util-split.R index 9ca3b677..76b644dd 100644 --- a/util-split.R +++ b/util-split.R @@ -40,7 +40,7 @@ split.data.time.based = function(project.data, time.period = "3 months", bins = split.basis = c("commits", "mails", "issues"), sliding.window = FALSE) { ## get actual raw data data = list( - commits = project.data$get.commits.raw(), + commits = project.data$get.commits(), mails = project.data$get.mails(), issues = project.data$get.issues() ) @@ -110,7 +110,7 @@ split.data.time.based = function(project.data, time.period = "3 months", bins = ## set data ## 1) commits - cf.range.data$set.commits.raw(df.list[["commits"]]) + cf.range.data$set.commits(df.list[["commits"]]) ## 2) mails cf.range.data$set.mails(df.list[["mails"]]) ## 3) issues @@ -203,7 +203,7 @@ split.data.activity.based = function(project.data, activity.type = c("commits", ## get actual raw data data = list( - commits = project.data$get.commits.raw(), + commits = project.data$get.commits(), mails = project.data$get.mails(), issues = project.data$get.issues() ) @@ -280,7 +280,7 @@ split.data.activity.based = function(project.data, activity.type = c("commits", ## clone the project data and update raw data to split it again project.data.clone = project.data$clone() - project.data.clone$set.commits.raw(data[["commits"]]) + project.data.clone$set.commits(data[["commits"]]) project.data.clone$set.mails(data[["mails"]]) ## split data for sliding windows From 886d318de5a68a5054fa4c4d9c48642d66bee29c Mon Sep 17 00:00:00 2001 From: Claus Hunsen Date: Tue, 12 Dec 2017 17:44:37 +0100 Subject: [PATCH 31/40] Refactor cutting mechanism in data classes After the landing of commit a803425e6bdb54c1654fb9de1f9375499e3aa829, the general code for the data cutting mechanism is streamlined: 1) The 'ProjectData$data.timestamps' attribute is now transposed -- to map data sources per line to their respective timestamps in the columns. This way is more intuitive and better for later access. 2) All related methods are adapted accordingly. 3) The amount of inline documentation is increased significantly. Signed-off-by: Claus Hunsen Reviewed-by: Thomas Bock --- util-data.R | 112 +++++++++++++++++++++++++++++++++------------------- 1 file changed, 72 insertions(+), 40 deletions(-) diff --git a/util-data.R b/util-data.R index 0d210fef..9d0b6a6e 100644 --- a/util-data.R +++ b/util-data.R @@ -173,21 +173,16 @@ ProjectData = R6::R6Class("ProjectData", return(data) }, + ## * * timestamps ------------------------------------------- + #' Call the getters of the specified data sources in order to #' initialize the sources and extract the timestamps. #' #' @param data.sources the data sources to be prepated prepare.timestamps = function(data.sources) { - if("mails" %in% data.sources) { - self$get.mails() - } - if("commits" %in% data.sources) { - self$get.commits() - } - if("issues" %in% data.sources) { - self$get.issues() + for(source in data.sources) { + self[[ paste0("get.", source) ]]() } - }, #' Extract the earliest and the latest date from the specified data source @@ -195,21 +190,38 @@ ProjectData = R6::R6Class("ProjectData", #' #' @param source the specified data source extract.timestamps = function(source) { + ## initialize data structure for timestamp if(is.null(private$data.timestamps)) { - private$data.timestamps = data.frame(row.names = c("start", "end")) + private$data.timestamps = data.frame(start = numeric(0), end = numeric(0)) } - if(source == "mails") { - private$data.timestamps$mails = c(min(private$mails$date), - max(private$mails$date)) - } else if(source == "commits") { - private$data.timestamps$commits = c(min(private$commits$date), - max(private$commits$date)) - - } else if(source == "issues") { - private$data.timestamps$issues = c(min(private$issues$date), - max(private$issues$date)) + ## collect minimum and maximum date for data source + ## 1) if we have data available + if (nrow(private[[source]]) > 0) { + source.date.min = min(private[[source]][, "date"]) + source.date.max = max(private[[source]][, "date"]) + } + ## NAs otherwise + else { + source.date.min = NA + source.date.max = NA } + + ## remove old line if existing + private$data.timestamps = subset( + private$data.timestamps, + !(rownames(private$data.timestamps) == source) + ) + + ## store the data in the timestamp data set + private$data.timestamps = rbind( + private$data.timestamps, + data.frame( + start = source.date.min, + end = source.date.max, + row.names = source + ) + ) } ), @@ -569,30 +581,42 @@ ProjectData = R6::R6Class("ProjectData", return(private$artifacts) }, + ## * * data cutting ----------------------------------------- + #' Get the timestamps (earliest and latest date) of the specified data sources. - #' If 'simple' is TRUE return the overall latest start and earliest end date + #' If 'simple' is TRUE, return the overall latest start and earliest end date #' in order to cut the specified data sources to the same date ranges. #' + #' If there are no actual data available for a data source, the result indicates NA + #' #' @param data.sources the specified data sources #' @param simple whether or not the timestamps get simplified #' - #' @return a data.frame with the timestamps + #' @return a data.frame with the timestamps of each data source as columns "start" and "end", + #' with the data source as corresponding row name get.data.timestamps = function(data.sources = c("mails", "commits", "issues"), simple = FALSE) { - data.sources = match.arg(arg = data.sources, several.ok = TRUE, choices = c("mails", "commits", "issues")) + ## check arguments + data.sources = match.arg(arg = data.sources, several.ok = TRUE) + + ## read all data sources and prepare list of timestamps private$prepare.timestamps(data.sources = data.sources) - if(simple == FALSE) { - timestamps = subset(private$data.timestamps, select = data.sources) - return(timestamps) - } else { - subset.timestamps = private$data.timestamps[data.sources] - timestamps.buffer = data.frame(max = apply(subset.timestamps, 1, max), - min = apply(subset.timestamps, 1, min)) - timestamps = data.frame(start = timestamps.buffer["start", "max"], - end = timestamps.buffer["end", "min"]) - return(timestamps) + ## get the needed subset of timestamp data + subset.timestamps = private$data.timestamps[data.sources, ] + + ## get the proper subset of timestamps for returning + if(simple) { + ## get minima and maxima across data sources (rows) + timestamps = data.frame( + start = max(subset.timestamps[, "start"], na.rm = TRUE), + end = min(subset.timestamps[, "end"], na.rm = TRUE) + ) + } else { + ## select the complete raw data + timestamps = subset.timestamps } + return(timestamps) }, #' Cut the specified data sources to the same date range depending on the extracted @@ -602,14 +626,22 @@ ProjectData = R6::R6Class("ProjectData", #' #' @return a list of the cut data.sources get.data.cut.to.same.date = function(data.sources = c("mails", "commits", "issues")) { - data.sources = match.arg(arg = data.sources, several.ok = TRUE, choices = c("mails", "commits", "issues")) - timestamps = self$get.data.timestamps(data.sources = data.sources , simple = TRUE) - timestamps.vector = c(timestamps$start, timestamps$end) - if(timestamps$start > timestamps$end) { - logging::logwarn("The datasources don't overlap. The result will be empty.") + ## check arguments + data.sources = match.arg(arg = data.sources, several.ok = TRUE) + + ## get the timestamp data as vector + timestamps.df = self$get.data.timestamps(data.sources = data.sources , simple = TRUE) + timestamps = c(start = timestamps.df[, "start"], end = timestamps.df[, "end"]) + + ## check consistency + if(timestamps["start"] > timestamps["end"]) { + logging::logwarn("The datasources don't overlap. The result will be empty!") } - result = split.data.time.based(self, bins = timestamps.vector) - return(result[[1]]) + + ## split data based on the timestamps and get the single result + result = split.data.time.based(self, bins = timestamps)[[1]] + + return(result) }, #' Get single pasta items. From 45ec73ff2e78e0c7963c63a8231e8f90dc4377bc Mon Sep 17 00:00:00 2001 From: Claus Hunsen Date: Tue, 12 Dec 2017 18:03:09 +0100 Subject: [PATCH 32/40] Format networks-metrics module This patch only applies some more readable code formatting to the networks-metrics module. Signed-off-by: Claus Hunsen --- util-networks-metrics.R | 56 +++++++++++++++++++++++------------------ 1 file changed, 31 insertions(+), 25 deletions(-) diff --git a/util-networks-metrics.R b/util-networks-metrics.R index 55331fe0..43638863 100644 --- a/util-networks-metrics.R +++ b/util-networks-metrics.R @@ -9,18 +9,18 @@ requireNamespace("igraph") + ## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / ## Metric functions -------------------------------------------------------- - #' Determine the maximum degree for the given network. #' #' @param network the network to be examined #' @param mode the mode to be used for determining the degrees #' #' @return A dataframe containing the name of the vertex with with maximum degree its degree. -metrics.hub.degree = function(network, mode = c("total", "in", "out")){ - match.arg(mode) +metrics.hub.degree = function(network, mode = c("total", "in", "out")) { + mode = match.arg(mode) degrees = igraph::degree(network, mode = c(mode)) vertex = which.max(degrees) df = data.frame("name" = names(vertex), "degree" = unname(degrees[vertex])) @@ -34,7 +34,7 @@ metrics.hub.degree = function(network, mode = c("total", "in", "out")){ #' #' @return The average degree of the nodes in the network. metrics.avg.degree = function(network, mode = c("total", "in", "out")) { - match.arg(mode) + mode = match.arg(mode) degrees = igraph::degree(network, mode = c(mode)) avg = mean(degrees) return(avg) @@ -44,15 +44,15 @@ metrics.avg.degree = function(network, mode = c("total", "in", "out")) { #' #' @param network the network to be examined #' @param sort whether the resulting dataframe is to be sorted by the node degree -#' @param sort.decreasing if sorting is active, this says whether the dataframe is to be sorted -#' in descending or ascending order +#' @param sort.decreasing if sorting is active, this says whether the dataframe is to be +#' sorted in descending or ascending order #' #' @return A dataframe containing the nodes and their respective degrees. metrics.node.degrees = function(network, sort = TRUE, sort.decreasing = TRUE) { if(sort) { - degrees = sort(igraph::degree(network, mode="total"), decreasing = sort.decreasing) + degrees = sort(igraph::degree(network, mode = "total"), decreasing = sort.decreasing) } else { - igraph::degree(network, mode="total") + igraph::degree(network, mode = "total") } return(data.frame("name" = names(degrees), "degree" = unname(degrees))) } @@ -70,7 +70,7 @@ metrics.density = function(network) { #' Calculate the average path length for the given network. #' #' @param network the network to be examined -#' @param directed wehther the given network is directed or undirected +#' @param directed whether to consider directed paths in directed networks #' @param unconnected whether all nodes of the network are connected #' #' @return The average pathlength of the given network. @@ -86,7 +86,7 @@ metrics.avg.pathlength = function(network, directed, unconnected) { #' #' @return The clustering coefficient of the network. metrics.clustering.coeff = function(network, cc.type = c("global", "local", "barrat", "localaverage")) { - match.arg(cc.type) + cc.type = match.arg(cc.type) cc = igraph::transitivity(network, type = cc.type, vids = NULL) return(cc) } @@ -94,8 +94,8 @@ metrics.clustering.coeff = function(network, cc.type = c("global", "local", "bar #' Calculate the modularity metric for the given network. #' #' @param network the network to be examined -#' @param community.detection.algorithm the algorithm to be used for the detection of communities which -#' is required for the calculation of the clustering coefficient +#' @param community.detection.algorithm the algorithm to be used for the detection of communities +#' which is required for the calculation of the clustering coefficient #' #' @return The modularity value for the given network. metrics.modularity = function(network, community.detection.algorithm = igraph::cluster_walktrap) { @@ -115,17 +115,24 @@ metrics.modularity = function(network, community.detection.algorithm = igraph::c #' The algorithm relies on the Erdös-Renyi random network with the same number #' of nodes and edges as the given network. #' -#' @param network the network to be examined. This network needs to be simplified for the calculation to work +#' To check the result value \code{s.delta} for a binary (true/false) decision on smallworldness, +#' do this: \code{is.smallworld = s.delta > 1}. +#' +#' Important: The given network needs to be simplified for the calculation to work! +#' +#' @param network the simplified network to be examined #' #' @return The smallworldness value of the network. metrics.smallworldness = function(network) { - # construct Erdös-Renyi network with same number of nodes and edges as g - h = igraph::erdos.renyi.game(n=igraph::vcount(network), p.or.m=igraph::gsize(network), type="gnm", directed=FALSE) + h = igraph::erdos.renyi.game(n = igraph::vcount(network), + p.or.m = igraph::ecount(network), + type = "gnm", + directed = FALSE) # compute clustering coefficients - g.cc = igraph::transitivity(network, type = 'global') - h.cc = igraph::transitivity(h, type = 'global') + g.cc = igraph::transitivity(network, type = "global") + h.cc = igraph::transitivity(h, type = "global") # compute average shortest-path length g.l = igraph::average.path.length(network, unconnected = TRUE) h.l = igraph::average.path.length(h, unconnected = TRUE) @@ -138,9 +145,8 @@ metrics.smallworldness = function(network) { # indicator s.delta s.delta = gamma / lambda - # if s.delta > 1, then the network is a small-world network - #is.smallworld = ifelse(s.delta > 1, TRUE, FALSE) - + ## if s.delta > 1, then the network is a small-world network + # is.smallworld = s.delta > 1 return ("smallworldness" = s.delta) } @@ -150,11 +156,11 @@ metrics.smallworldness = function(network) { #' #' @return A dataframe containing the different values, connected to scale-freeness. metrics.scale.freeness = function(network) { - v.degree <- sort(igraph::degree(network, mode="total"), decreasing=TRUE) + v.degree = sort(igraph::degree(network, mode = "total"), decreasing = TRUE) ## Power-law fiting - ## (from Mitchell Joblin , Siemens AG, 2012, 2013) - p.fit = igraph::power.law.fit(v.degree, implementation="plfit") + ## (by Mitchell Joblin , Siemens AG, 2012, 2013) + p.fit = igraph::power.law.fit(v.degree, implementation = "plfit") param.names = c("alpha", "xmin", "KS.p") res = list() res[param.names] = p.fit[param.names] @@ -162,7 +168,7 @@ metrics.scale.freeness = function(network) { ## Check percent of vertices under power-law res$num.power.law = length(which(v.degree >= res$xmin)) res$percent.power.law = 100 * (res$num.power.law / length(v.degree)) - df = data.frame(res$alpha,res$xmin,res$KS.p,res$num.power.law,res$percent.power.law) + df = data.frame(res$alpha, res$xmin, res$KS.p, res$num.power.law, res$percent.power.law) return(df) } @@ -173,7 +179,7 @@ metrics.scale.freeness = function(network) { #' @return A dataframe containing the logarithm of the node degree and the logarithm #' of the local clustering coefficient for each node. metrics.hierarchy = function(network) { - degrees = igraph::degree(network, mode="total") + degrees = igraph::degree(network, mode = "total") cluster.coeff = igraph::transitivity(network, type = "local", vids = NULL) degrees.without.cluster.coeff = subset(degrees, !(is.nan(cluster.coeff) | cluster.coeff == 0)) cluster.coeff = subset(cluster.coeff, !(is.nan(cluster.coeff) | cluster.coeff == 0)) From 21941bdfd62cf61dbdc61f296f4857aae48475a3 Mon Sep 17 00:00:00 2001 From: Claus Hunsen Date: Tue, 12 Dec 2017 18:08:29 +0100 Subject: [PATCH 33/40] Fix small bugs in network metrics In this patch, we fix two minor bugs in the network metrics 'node.degree' and 'modularity'. In the first, the unsorted result was not assigned correctly to the return value. In the latter, the single modularity value does not need a name (which was also an undefined variable). Props to @ecklbarb for reporting these two mistakes. Signed-off-by: Claus Hunsen --- util-networks-metrics.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/util-networks-metrics.R b/util-networks-metrics.R index 43638863..2d62562a 100644 --- a/util-networks-metrics.R +++ b/util-networks-metrics.R @@ -52,7 +52,7 @@ metrics.node.degrees = function(network, sort = TRUE, sort.decreasing = TRUE) { if(sort) { degrees = sort(igraph::degree(network, mode = "total"), decreasing = sort.decreasing) } else { - igraph::degree(network, mode = "total") + degrees = igraph::degree(network, mode = "total") } return(data.frame("name" = names(degrees), "degree" = unname(degrees))) } @@ -101,7 +101,7 @@ metrics.clustering.coeff = function(network, cc.type = c("global", "local", "bar metrics.modularity = function(network, community.detection.algorithm = igraph::cluster_walktrap) { comm = community.detection.algorithm(network) mod = igraph::modularity(network, igraph::membership(comm)) - return(data.frame("name" = name, "modularity" = mod)) + return("modularity" = mod) } #' This function determines whether a network can be considered a From af2b3b626a7a84d0204902e27c95e9611dbc1029 Mon Sep 17 00:00:00 2001 From: Claus Hunsen Date: Tue, 12 Dec 2017 18:38:46 +0100 Subject: [PATCH 34/40] Update README file Fix mistakes and add 'unify.date.ranges' documentation. Signed-off-by: Claus Hunsen --- README.md | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 9968f022..fac1fdf0 100644 --- a/README.md +++ b/README.md @@ -124,7 +124,7 @@ Updates to the parameters can be done by calling `NetworkConf$update.variables(. - issue information: *`"issue.id"`*, *`"event.name"`*, `"issue.state"`, `"creation.date"`, `"closing.date"`, `"is.pull.request"` * **Note**: `"date"` is always included as this information is needed for several parts of the library, e.g., time-based splitting. * **Note**: For each type of network that can be built, only the applicable part of the given vector of names is respected. - * **Note**: For the edge attributes `"pasta"` and `"synchronicty"`, the network configuration's parameters `pasta` and `synchronicity` need to be set to `TRUE`, respectively (see below). + * **Note**: For the edge attributes `"pasta"` and `"synchronicty"`, the project configuration's parameters `pasta` and `synchronicity` need to be set to `TRUE`, respectively (see below). - `simplify` * Perform edge contraction to retrieve a simplified network * [`TRUE`, *`FALSE`*] @@ -132,11 +132,14 @@ Updates to the parameters can be done by calling `NetworkConf$update.variables(. * The upper bound for total amount of edges to build for a subset of the data, i.e., not building any edges for the subset exceeding the limit * any positive integer * **Example**: The amount of `mail`-based directed edges in an author network for one thread with 100 authors is 5049. - A value of 5000 for `skip.threshold` would lead to the omission of this thread from the network. + A value of 5000 for `skip.threshold` (as it is smaller than 5049) would lead to the omission of this thread from the network. +- `unify.date.ranges` + * Cut the data sources to the largest start date and the smallest end date across all data sources + * **Note**: This parameter does not affect the original data object, but rather creates a clone. + * [`TRUE`, *`FALSE`*] The classes `ProjectData` and `RangeData` hold instances of the `NetworkConf` class, just pass the object as parameter to the constructor. -You can also update the object at any time, but as soon as you do so, all -cached data of the data object are reset and have to be rebuilt. +You can also update the object at any time, but as soon as you do so, all cached data of the data object are reset and have to be rebuilt. For more examples, please look in the file `test.R`. From 1b86b1d5a14cdf2985d55690c123b135acbddd1a Mon Sep 17 00:00:00 2001 From: Claus Hunsen Date: Tue, 12 Dec 2017 18:40:57 +0100 Subject: [PATCH 35/40] Re-order NetworkConf and ProjectConf Now, in the README file and the configuration module, we have 'ProjectConf' documentation first and 'NetworkConf' documentation second. Signed-off-by: Claus Hunsen --- README.md | 122 +++++----- util-conf.R | 640 ++++++++++++++++++++++++++-------------------------- 2 files changed, 381 insertions(+), 381 deletions(-) diff --git a/README.md b/README.md index fac1fdf0..11d9eb82 100644 --- a/README.md +++ b/README.md @@ -82,67 +82,6 @@ For examples on how to use both classes and how to build networks with them, ple ## Configuration Classes -### NetworkConf - -In this section, we give an overview on the parameters of the `NetworkConf` class and their meaning. - -All parameters can be retrieved with the method `NetworkConf$get.variable(...)`, by passing one parameter name as method parameter. -Updates to the parameters can be done by calling `NetworkConf$update.variables(...)` and passing a list of parameter names and their respective values. - -**Note**: Default values are shown in *italics*. - -- `author.relation` - * The relation among authors, encoded as edges in an author network - * **Note**: The author--artifact relation in bipartite and multi networks is configured by `artifact.relation`! - * possible values: [*`"mail"`*, `"cochange"`, `"issue"`] -- `author.directed` - * The (time-based) directedness of edges in an author network - * [`TRUE`, *`FALSE`*] -- `author.all.authors` - * Denotes whether all available authors (from all analyses and data sources) shall be added to the network as a basis - * **Note**: Depending on the chosen author relation, there may be isolates then - * [`TRUE`, *`FALSE`*] -- `author.only.committers` - * Remove all authors from an author network (including bipartite and multi networks) who are not present in an author network constructed with `artifact.relation` as relation, i.e., all authors that have no biparite relations in a bipartite/multi network are removed. - * [`TRUE`, *`FALSE`*] -- `artifact.relation` - * The relation among artifacts, encoded as edges in an artifact network - * **Note**: This relation configures also the author--artifact relation in bipartite and multi networks! - * possible values: [*`"cochange"`*, `"callgraph"`, `"mail"`, `"issue"`] -- `artifact.directed` - * The (time-based) directedness of edges in an artifact network - * **Note**: This parameter does not take effect for now, as the co-change relation is always undirected, while the call-graph relation is always directed. - * [`TRUE`, *`FALSE`*] -- `edge.attributes` - * The list of edge-attribute names and information - * a subset of the following as a single vector: - - timestamp information: *`"date"`* - - author information: `"author.name"`, `"author.email"` - - e-mail information: *`"message.id"`*, *`"thread"`*, `"subject"` - - commit information: *`"hash"`*, *`"file"`*, *`"artifact.type"`*, *`"artifact"`*, `"changed.files"`, `"added.lines"`, `"deleted.lines"`, `"diff.size"`, `"artifact.diff.size"`, `"synchronicity"` - - PaStA information: `"pasta"`, - - issue information: *`"issue.id"`*, *`"event.name"`*, `"issue.state"`, `"creation.date"`, `"closing.date"`, `"is.pull.request"` - * **Note**: `"date"` is always included as this information is needed for several parts of the library, e.g., time-based splitting. - * **Note**: For each type of network that can be built, only the applicable part of the given vector of names is respected. - * **Note**: For the edge attributes `"pasta"` and `"synchronicty"`, the project configuration's parameters `pasta` and `synchronicity` need to be set to `TRUE`, respectively (see below). -- `simplify` - * Perform edge contraction to retrieve a simplified network - * [`TRUE`, *`FALSE`*] -- `skip.threshold` - * The upper bound for total amount of edges to build for a subset of the data, i.e., not building any edges for the subset exceeding the limit - * any positive integer - * **Example**: The amount of `mail`-based directed edges in an author network for one thread with 100 authors is 5049. - A value of 5000 for `skip.threshold` (as it is smaller than 5049) would lead to the omission of this thread from the network. -- `unify.date.ranges` - * Cut the data sources to the largest start date and the smallest end date across all data sources - * **Note**: This parameter does not affect the original data object, but rather creates a clone. - * [`TRUE`, *`FALSE`*] - -The classes `ProjectData` and `RangeData` hold instances of the `NetworkConf` class, just pass the object as parameter to the constructor. -You can also update the object at any time, but as soon as you do so, all cached data of the data object are reset and have to be rebuilt. - -For more examples, please look in the file `test.R`. - ## ProjectConf In this section, we give an overview on the parameters of the `ProjectConf` class and their meaning. @@ -247,6 +186,67 @@ There is no way to update the entries, except for the revision-based parameters. * [`TRUE`, *`FALSE`*] * **Note**: To include PaStA-based edge attributes, you need to give the `"pasta"` edge attribute for `edge.attributes`. +### NetworkConf + +In this section, we give an overview on the parameters of the `NetworkConf` class and their meaning. + +All parameters can be retrieved with the method `NetworkConf$get.variable(...)`, by passing one parameter name as method parameter. +Updates to the parameters can be done by calling `NetworkConf$update.variables(...)` and passing a list of parameter names and their respective values. + +**Note**: Default values are shown in *italics*. + +- `author.relation` + * The relation among authors, encoded as edges in an author network + * **Note**: The author--artifact relation in bipartite and multi networks is configured by `artifact.relation`! + * possible values: [*`"mail"`*, `"cochange"`, `"issue"`] +- `author.directed` + * The (time-based) directedness of edges in an author network + * [`TRUE`, *`FALSE`*] +- `author.all.authors` + * Denotes whether all available authors (from all analyses and data sources) shall be added to the network as a basis + * **Note**: Depending on the chosen author relation, there may be isolates then + * [`TRUE`, *`FALSE`*] +- `author.only.committers` + * Remove all authors from an author network (including bipartite and multi networks) who are not present in an author network constructed with `artifact.relation` as relation, i.e., all authors that have no biparite relations in a bipartite/multi network are removed. + * [`TRUE`, *`FALSE`*] +- `artifact.relation` + * The relation among artifacts, encoded as edges in an artifact network + * **Note**: This relation configures also the author--artifact relation in bipartite and multi networks! + * possible values: [*`"cochange"`*, `"callgraph"`, `"mail"`, `"issue"`] +- `artifact.directed` + * The (time-based) directedness of edges in an artifact network + * **Note**: This parameter does not take effect for now, as the co-change relation is always undirected, while the call-graph relation is always directed. + * [`TRUE`, *`FALSE`*] +- `edge.attributes` + * The list of edge-attribute names and information + * a subset of the following as a single vector: + - timestamp information: *`"date"`* + - author information: `"author.name"`, `"author.email"` + - e-mail information: *`"message.id"`*, *`"thread"`*, `"subject"` + - commit information: *`"hash"`*, *`"file"`*, *`"artifact.type"`*, *`"artifact"`*, `"changed.files"`, `"added.lines"`, `"deleted.lines"`, `"diff.size"`, `"artifact.diff.size"`, `"synchronicity"` + - PaStA information: `"pasta"`, + - issue information: *`"issue.id"`*, *`"event.name"`*, `"issue.state"`, `"creation.date"`, `"closing.date"`, `"is.pull.request"` + * **Note**: `"date"` is always included as this information is needed for several parts of the library, e.g., time-based splitting. + * **Note**: For each type of network that can be built, only the applicable part of the given vector of names is respected. + * **Note**: For the edge attributes `"pasta"` and `"synchronicty"`, the project configuration's parameters `pasta` and `synchronicity` need to be set to `TRUE`, respectively (see below). +- `simplify` + * Perform edge contraction to retrieve a simplified network + * [`TRUE`, *`FALSE`*] +- `skip.threshold` + * The upper bound for total amount of edges to build for a subset of the data, i.e., not building any edges for the subset exceeding the limit + * any positive integer + * **Example**: The amount of `mail`-based directed edges in an author network for one thread with 100 authors is 5049. + A value of 5000 for `skip.threshold` (as it is smaller than 5049) would lead to the omission of this thread from the network. +- `unify.date.ranges` + * Cut the data sources to the largest start date and the smallest end date across all data sources + * **Note**: This parameter does not affect the original data object, but rather creates a clone. + * [`TRUE`, *`FALSE`*] + +The classes `ProjectData` and `RangeData` hold instances of the `NetworkConf` class, just pass the object as parameter to the constructor. +You can also update the object at any time, but as soon as you do so, all cached data of the data object are reset and have to be rebuilt. + +For more examples, please look in the file `test.R`. + ## File overview diff --git a/util-conf.R b/util-conf.R index 3d50f61f..9ad2283d 100644 --- a/util-conf.R +++ b/util-conf.R @@ -299,6 +299,326 @@ Conf = R6::R6Class("Conf", ) +## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / +## ProjectConf ------------------------------------------------------------- + +ProjectConf = R6::R6Class("ProjectConf", inherit = Conf, + + ## * private ----------------------------------------------------------- + + private = list( + + ## * * project info ------------------------------------------------ + + data = NULL, # character + selection.process = NULL, # character + casestudy = NULL, # character + artifact = NULL, # character + + ## * * attributes --------------------------------------------------- + + attributes = list( + artifact.filter.base = list( + default = TRUE, + type = "logical", + allowed = c(TRUE, FALSE), + allowed.number = 1 + ), + synchronicity = list( + default = FALSE, + type = "logical", + allowed = c(TRUE, FALSE), + allowed.number = 1 + ), + synchronicity.time.window = list( + default = 5, + type = "numeric", + allowed = c(1, 5, 10, 15), + allowed.number = 1 + ), + pasta = list( + default = FALSE, + type = "logical", + allowed = c(TRUE, FALSE), + allowed.number = 1 + ) + ), + + ## * * revisions and ranges ---------------------------------------- + + #' Change the revision names to a equal name standard. + #' + #' @param ranges the list of ranges to be postprocessed + #' + #' @return the postprocessed ranges + postprocess.revision.list = function(ranges) { + # remove names ,e.g. "version", from release cycle names + casestudy = private$casestudy + to.remove = c( + "version-", "v-","version_", "v_","version", "v", + paste0(casestudy, "-"), paste0(casestudy,"-"), + paste0(casestudy, "_"), paste0(casestudy,"_"), + casestudy, casestudy + ) + + # run gsub for all pattern + ranges = tolower(ranges) + for (string in to.remove) { + ranges = gsub(string, "", ranges) + } + + # return simplified list of ranges + return(ranges) + }, + + #' Change the revision names of callgraph data to a equal name standard. + #' + #' @param r list of revisions to be postprocessed + #' + #' @return list of postprocessed revisions + postprocess.revision.list.for.callgraph.data = function(r) { + r = gsub("version-", "", r) # remove version prefix (SQLite) + r = gsub("OpenSSL_", "", r) # remove name prefix (OpenSSL) + r = gsub("\\.", "_", r) # replace dots by underscores + return(r) + }, + + ## * * path construction ------------------------------------------- + + subfolder.configurations = "configurations", + subfolder.results = "results", + + #' Construct and return the path to the configuration folder of Codeface. + #' + #' @param data the path to the codeface-data folder + #' @param selection.process the selection process of the current study ('threemonth', 'releases') + #' + #' @return the path to the configuration folder + get.configurations.folder = function(data, selection.process) { + return(file.path(data, private$subfolder.configurations, selection.process)) + + }, + + #' Construct and return the path to a Codeface configuration. + #' + #' @param data the path to the codeface-data folder + #' @param selection.process the selection process of the current study ('threemonth', 'releases') + #' @param casestudy the current casestudy + #' @param tagging the current tagging ('feature', 'proximity') + #' + #' @return the path to the configuration + construct.conf.path = function(data, selection.process, casestudy, tagging) { + ## construct the base name of the configuration + conf.basename = paste(casestudy, "_", tagging, ".conf", sep = "") + ## construct complete path + conf.file = file.path(private$get.configurations.folder(data, selection.process), conf.basename) + ## return path to config file + return(conf.file) + }, + + #' Construct and return the path to the results folder of Codeface. + #' + #' @param data the path to the codeface-data folder + #' @param selection.process the selection process of the current study ('threemonth', 'releases') + #' @param casestudy the current casestudy + #' @param suffix the suffix of the casestudy's results folder + #' @param subfolder an optional subfolder + #' + #' @return the path to the results folder + #' (i.e., "{data}/{selection.process}/{casestudy}_{suffix}[/{subfolder}]") + get.results.folder = function(data, selection.process, casestudy, suffix, subfolder = NULL) { + path = file.path(data, private$subfolder.results, selection.process, paste(casestudy, suffix, sep = "_")) + if (!is.null(subfolder)) { + path = file.path(path, subfolder) + } + return(path) + } + + ), + + ## * public ------------------------------------------------------------ + + public = list( + + #' Constructor of the class. + #' + #' @param data the path to the codeface-data folder + #' @param selection.process the selection process of the current study ('threemonth', 'releases') + #' @param casestudy the current casestudy + #' @param artifact the artifact to study ('feature','function','file') + initialize = function(data, selection.process, casestudy, artifact = "feature") { + super$initialize() + + if (!missing(data) && is.character(data)) { + private$data <- data + } + if (!missing(selection.process) && is.character(selection.process)) { + private$selection.process <- selection.process + } + if (!missing(casestudy) && is.character(casestudy)) { + private$casestudy <- casestudy + } + if (!missing(artifact) && is.character(artifact)) { + private$artifact <- artifact + } + + logging::loginfo("Construct configuration: starting.") + + ## convert artifact to tagging + tagging = ARTIFACT.TO.TAGGING[[ artifact ]] + if (is.null(tagging)) { + logging::logerror("Artifact '%s' cannot be converted to a proper Codeface tagging! Stopping...", artifact) + stop("Stopped due to wrong configuration parameters!") + } + ## construct file name for configuration + conf.file = private$construct.conf.path(data, selection.process, casestudy, tagging) + + ## load case-study confuration from given file + logging::loginfo("Attempting to load configuration file: %s", conf.file) + conf = yaml::yaml.load_file(conf.file) + + ## store basic information + conf$selection.process = selection.process + conf$casestudy = casestudy + + ## store artifact in configuration + conf$artifact = artifact + conf$artifact.short = ARTIFACT.TO.ABBREVIATION[[ conf$artifact ]] + conf$artifact.codeface = ARTIFACT.CODEFACE[[ conf$artifact ]] + ## store path to actual Codeface data + conf$datapath = private$get.results.folder(data, selection.process, casestudy, tagging, subfolder = tagging) + ## store path to call graphs + conf$datapath.callgraph = private$get.results.folder(data, selection.process, casestudy, "callgraphs") + ## store path to synchronicity data + conf$datapath.synchronicity = private$get.results.folder(data, selection.process, casestudy, "synchronicity") + ## store path to pasta data + conf$datapath.pasta = private$get.results.folder(data, selection.process, casestudy, "pasta") + ## store path to issue data + conf$datapath.issues = private$get.results.folder(data, selection.process, casestudy, tagging, subfolder = tagging) + + ## READ REVISIONS META-DATA + + ## read revisions file + revisions.file = file.path(conf$datapath, "revisions.list") + revisions.df <- try(read.table(revisions.file, header = FALSE, sep = ";", strip.white = TRUE, + encoding = "UTF-8"), silent = TRUE) + ## break if the list of revisions is empty or any other error occurs + if (inherits(revisions.df, 'try-error')) { + logging::logerror("There are no revisions available for the current casestudy.") + logging::logerror("Attempted to load following file: %s", revisions.file) + stop("Stopped due to missing revisions.") + } + ## convert columns accordingly + revisions.cols = c(revision = "as.character", date = "as.POSIXct") + for (i in 1:ncol(revisions.df)) { + revisions.df[i] = do.call(c, lapply(revisions.df[[i]], revisions.cols[i])) + colnames(revisions.df)[i] = names(revisions.cols)[i] + } + revisions = revisions.df[["revision"]] + revisions.dates = revisions.df[["date"]] + if (!is.null(revisions.dates)) names(revisions.dates) = revisions + conf[["revisions"]] = NULL + + ## change structure of values (i.e., insert 'default' sublists) + conf = lapply(conf, function(entry) { + return(list(value = entry, updatable = FALSE)) + }) + + ## SAVE FULL CONFIGURATION OBJECT + private$attributes = c(conf, private$attributes) + + ## construct and save revisions and ranges + ## (this has to be done after storing conf due to the needed access to the conf object) + self$set.revisions(revisions, revisions.dates) + + # ## logging + # self$print(allowed = TRUE) + + logging::loginfo("Construct configuration: finished.") + }, + + ## * * helper methods ---------------------------------------------- + + #' Get the corresponding callgraph revision for the given range. + #' + #' @param range the range for the callgraph revisions + #' + #' @return the callgraph revisions + get.callgraph.revision.from.range = function(range) { + idx = which(self$get.value("ranges") == range) + rev = self$get.value("revisions.callgraph")[idx + 1] + return(rev) + }, + + ## * * updating revisions and splitting information ---------------- + + #' Set the revisions and ranges for the study. + #' + #' @param revisions the revisions of the study + #' @param revisions.dates the revision dates of the study + #' @param sliding.window whether sliding window splitting is enabled or not + #' default: 'FALSE' + set.revisions = function(revisions, revisions.dates, sliding.window = FALSE) { + ## construct revisions for call-graph data + revisions.callgraph = private$postprocess.revision.list.for.callgraph.data(revisions) + + ## assemble revision data + rev.data = list( + revisions = revisions, + revisions.dates = revisions.dates, + revisions.callgraph = revisions.callgraph, + ranges = construct.ranges(revisions, sliding.window = sliding.window), + ranges.callgraph = construct.ranges(revisions.callgraph, sliding.window = sliding.window) + ) + ## change structure of values (i.e., insert 'default' sublists and set 'updatable' value) + rev.data = lapply(rev.data, function(entry) { + return(list(value = entry, updatable = FALSE)) + }) + + ## insert new values (update if needed) + for (name in names(rev.data)) { + private[["attributes"]][[name]] = rev.data[[name]] + } + }, + + #' Update the information on revisions and ranges regarding splitting. + #' + #' @param type either "time-based" or "activity-based", depending on splitting function + #' @param length the string given to time-based splitting (e.g., "3 months") or the activity + #' amount given to acitivity-based splitting + #' @param basis the data used as basis for splitting (either "commits", "mails", or "issues") + #' @param sliding.window whether sliding window splitting is enabled or not [default: FALSE] + #' @param revisions the revisions of the study + #' @param revisions.dates the revision dates of the study + set.splitting.info = function(type, length, basis, sliding.window, revisions, revisions.dates) { + ## assemble splitting information + split.info = list( + ## basic slpitting information + split.type = type, + split.length = length, + split.basis = basis, + split.sliding.window = sliding.window, + ## splitting information on ranges + split.revisions = revisions, + split.revisions.dates = revisions.dates, + split.ranges = construct.ranges(revisions, sliding.window = sliding.window) + + ) + ## change structure of values (i.e., insert 'default' sublists and set 'updatable' value) + split.info = lapply(split.info, function(entry) { + return(list(value = entry, updatable = FALSE)) + }) + + ## insert new values (update if needed) + for (name in names(split.info)) { + private[["attributes"]][[name]] = split.info[[name]] + } + } + + ) +) + + ## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / ## NetworkConf ------------------------------------------------------------- @@ -425,326 +745,6 @@ NetworkConf = R6::R6Class("NetworkConf", inherit = Conf, ) -## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / -## ProjectConf ------------------------------------------------------------- - -ProjectConf = R6::R6Class("ProjectConf", inherit = Conf, - - ## * private ----------------------------------------------------------- - - private = list( - - ## * * project info ------------------------------------------------ - - data = NULL, # character - selection.process = NULL, # character - casestudy = NULL, # character - artifact = NULL, # character - - ## * * attributes --------------------------------------------------- - - attributes = list( - artifact.filter.base = list( - default = TRUE, - type = "logical", - allowed = c(TRUE, FALSE), - allowed.number = 1 - ), - synchronicity = list( - default = FALSE, - type = "logical", - allowed = c(TRUE, FALSE), - allowed.number = 1 - ), - synchronicity.time.window = list( - default = 5, - type = "numeric", - allowed = c(1, 5, 10, 15), - allowed.number = 1 - ), - pasta = list( - default = FALSE, - type = "logical", - allowed = c(TRUE, FALSE), - allowed.number = 1 - ) - ), - - ## * * revisions and ranges ---------------------------------------- - - #' Change the revision names to a equal name standard. - #' - #' @param ranges the list of ranges to be postprocessed - #' - #' @return the postprocessed ranges - postprocess.revision.list = function(ranges) { - # remove names ,e.g. "version", from release cycle names - casestudy = private$casestudy - to.remove = c( - "version-", "v-","version_", "v_","version", "v", - paste0(casestudy, "-"), paste0(casestudy,"-"), - paste0(casestudy, "_"), paste0(casestudy,"_"), - casestudy, casestudy - ) - - # run gsub for all pattern - ranges = tolower(ranges) - for (string in to.remove) { - ranges = gsub(string, "", ranges) - } - - # return simplified list of ranges - return(ranges) - }, - - #' Change the revision names of callgraph data to a equal name standard. - #' - #' @param r list of revisions to be postprocessed - #' - #' @return list of postprocessed revisions - postprocess.revision.list.for.callgraph.data = function(r) { - r = gsub("version-", "", r) # remove version prefix (SQLite) - r = gsub("OpenSSL_", "", r) # remove name prefix (OpenSSL) - r = gsub("\\.", "_", r) # replace dots by underscores - return(r) - }, - - ## * * path construction ------------------------------------------- - - subfolder.configurations = "configurations", - subfolder.results = "results", - - #' Construct and return the path to the configuration folder of Codeface. - #' - #' @param data the path to the codeface-data folder - #' @param selection.process the selection process of the current study ('threemonth', 'releases') - #' - #' @return the path to the configuration folder - get.configurations.folder = function(data, selection.process) { - return(file.path(data, private$subfolder.configurations, selection.process)) - - }, - - #' Construct and return the path to a Codeface configuration. - #' - #' @param data the path to the codeface-data folder - #' @param selection.process the selection process of the current study ('threemonth', 'releases') - #' @param casestudy the current casestudy - #' @param tagging the current tagging ('feature', 'proximity') - #' - #' @return the path to the configuration - construct.conf.path = function(data, selection.process, casestudy, tagging) { - ## construct the base name of the configuration - conf.basename = paste(casestudy, "_", tagging, ".conf", sep = "") - ## construct complete path - conf.file = file.path(private$get.configurations.folder(data, selection.process), conf.basename) - ## return path to config file - return(conf.file) - }, - - #' Construct and return the path to the results folder of Codeface. - #' - #' @param data the path to the codeface-data folder - #' @param selection.process the selection process of the current study ('threemonth', 'releases') - #' @param casestudy the current casestudy - #' @param suffix the suffix of the casestudy's results folder - #' @param subfolder an optional subfolder - #' - #' @return the path to the results folder - #' (i.e., "{data}/{selection.process}/{casestudy}_{suffix}[/{subfolder}]") - get.results.folder = function(data, selection.process, casestudy, suffix, subfolder = NULL) { - path = file.path(data, private$subfolder.results, selection.process, paste(casestudy, suffix, sep = "_")) - if (!is.null(subfolder)) { - path = file.path(path, subfolder) - } - return(path) - } - - ), - - ## * public ------------------------------------------------------------ - - public = list( - - #' Constructor of the class. - #' - #' @param data the path to the codeface-data folder - #' @param selection.process the selection process of the current study ('threemonth', 'releases') - #' @param casestudy the current casestudy - #' @param artifact the artifact to study ('feature','function','file') - initialize = function(data, selection.process, casestudy, artifact = "feature") { - super$initialize() - - if (!missing(data) && is.character(data)) { - private$data <- data - } - if (!missing(selection.process) && is.character(selection.process)) { - private$selection.process <- selection.process - } - if (!missing(casestudy) && is.character(casestudy)) { - private$casestudy <- casestudy - } - if (!missing(artifact) && is.character(artifact)) { - private$artifact <- artifact - } - - logging::loginfo("Construct configuration: starting.") - - ## convert artifact to tagging - tagging = ARTIFACT.TO.TAGGING[[ artifact ]] - if (is.null(tagging)) { - logging::logerror("Artifact '%s' cannot be converted to a proper Codeface tagging! Stopping...", artifact) - stop("Stopped due to wrong configuration parameters!") - } - ## construct file name for configuration - conf.file = private$construct.conf.path(data, selection.process, casestudy, tagging) - - ## load case-study confuration from given file - logging::loginfo("Attempting to load configuration file: %s", conf.file) - conf = yaml::yaml.load_file(conf.file) - - ## store basic information - conf$selection.process = selection.process - conf$casestudy = casestudy - - ## store artifact in configuration - conf$artifact = artifact - conf$artifact.short = ARTIFACT.TO.ABBREVIATION[[ conf$artifact ]] - conf$artifact.codeface = ARTIFACT.CODEFACE[[ conf$artifact ]] - ## store path to actual Codeface data - conf$datapath = private$get.results.folder(data, selection.process, casestudy, tagging, subfolder = tagging) - ## store path to call graphs - conf$datapath.callgraph = private$get.results.folder(data, selection.process, casestudy, "callgraphs") - ## store path to synchronicity data - conf$datapath.synchronicity = private$get.results.folder(data, selection.process, casestudy, "synchronicity") - ## store path to pasta data - conf$datapath.pasta = private$get.results.folder(data, selection.process, casestudy, "pasta") - ## store path to issue data - conf$datapath.issues = private$get.results.folder(data, selection.process, casestudy, tagging, subfolder = tagging) - - ## READ REVISIONS META-DATA - - ## read revisions file - revisions.file = file.path(conf$datapath, "revisions.list") - revisions.df <- try(read.table(revisions.file, header = FALSE, sep = ";", strip.white = TRUE, - encoding = "UTF-8"), silent = TRUE) - ## break if the list of revisions is empty or any other error occurs - if (inherits(revisions.df, 'try-error')) { - logging::logerror("There are no revisions available for the current casestudy.") - logging::logerror("Attempted to load following file: %s", revisions.file) - stop("Stopped due to missing revisions.") - } - ## convert columns accordingly - revisions.cols = c(revision = "as.character", date = "as.POSIXct") - for (i in 1:ncol(revisions.df)) { - revisions.df[i] = do.call(c, lapply(revisions.df[[i]], revisions.cols[i])) - colnames(revisions.df)[i] = names(revisions.cols)[i] - } - revisions = revisions.df[["revision"]] - revisions.dates = revisions.df[["date"]] - if (!is.null(revisions.dates)) names(revisions.dates) = revisions - conf[["revisions"]] = NULL - - ## change structure of values (i.e., insert 'default' sublists) - conf = lapply(conf, function(entry) { - return(list(value = entry, updatable = FALSE)) - }) - - ## SAVE FULL CONFIGURATION OBJECT - private$attributes = c(conf, private$attributes) - - ## construct and save revisions and ranges - ## (this has to be done after storing conf due to the needed access to the conf object) - self$set.revisions(revisions, revisions.dates) - - # ## logging - # self$print(allowed = TRUE) - - logging::loginfo("Construct configuration: finished.") - }, - - ## * * helper methods ---------------------------------------------- - - #' Get the corresponding callgraph revision for the given range. - #' - #' @param range the range for the callgraph revisions - #' - #' @return the callgraph revisions - get.callgraph.revision.from.range = function(range) { - idx = which(self$get.value("ranges") == range) - rev = self$get.value("revisions.callgraph")[idx + 1] - return(rev) - }, - - ## * * updating revisions and splitting information ---------------- - - #' Set the revisions and ranges for the study. - #' - #' @param revisions the revisions of the study - #' @param revisions.dates the revision dates of the study - #' @param sliding.window whether sliding window splitting is enabled or not - #' default: 'FALSE' - set.revisions = function(revisions, revisions.dates, sliding.window = FALSE) { - ## construct revisions for call-graph data - revisions.callgraph = private$postprocess.revision.list.for.callgraph.data(revisions) - - ## assemble revision data - rev.data = list( - revisions = revisions, - revisions.dates = revisions.dates, - revisions.callgraph = revisions.callgraph, - ranges = construct.ranges(revisions, sliding.window = sliding.window), - ranges.callgraph = construct.ranges(revisions.callgraph, sliding.window = sliding.window) - ) - ## change structure of values (i.e., insert 'default' sublists and set 'updatable' value) - rev.data = lapply(rev.data, function(entry) { - return(list(value = entry, updatable = FALSE)) - }) - - ## insert new values (update if needed) - for (name in names(rev.data)) { - private[["attributes"]][[name]] = rev.data[[name]] - } - }, - - #' Update the information on revisions and ranges regarding splitting. - #' - #' @param type either "time-based" or "activity-based", depending on splitting function - #' @param length the string given to time-based splitting (e.g., "3 months") or the activity - #' amount given to acitivity-based splitting - #' @param basis the data used as basis for splitting (either "commits", "mails", or "issues") - #' @param sliding.window whether sliding window splitting is enabled or not [default: FALSE] - #' @param revisions the revisions of the study - #' @param revisions.dates the revision dates of the study - set.splitting.info = function(type, length, basis, sliding.window, revisions, revisions.dates) { - ## assemble splitting information - split.info = list( - ## basic slpitting information - split.type = type, - split.length = length, - split.basis = basis, - split.sliding.window = sliding.window, - ## splitting information on ranges - split.revisions = revisions, - split.revisions.dates = revisions.dates, - split.ranges = construct.ranges(revisions, sliding.window = sliding.window) - - ) - ## change structure of values (i.e., insert 'default' sublists and set 'updatable' value) - split.info = lapply(split.info, function(entry) { - return(list(value = entry, updatable = FALSE)) - }) - - ## insert new values (update if needed) - for (name in names(split.info)) { - private[["attributes"]][[name]] = split.info[[name]] - } - } - - ) -) - - ## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / ## Helper functions -------------------------------------------------------- From 61fc7717334422e44ce6f1b5d15e9d17cc587bc3 Mon Sep 17 00:00:00 2001 From: Claus Hunsen Date: Tue, 12 Dec 2017 18:44:28 +0100 Subject: [PATCH 36/40] Move PaStA method in data class This is just a code movement of the method 'get.pasta.items' for better structure in the 'ProjectData' class. Signed-off-by: Claus Hunsen --- util-data.R | 60 ++++++++++++++++++++++++++--------------------------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/util-data.R b/util-data.R index 9d0b6a6e..2c451900 100644 --- a/util-data.R +++ b/util-data.R @@ -581,6 +581,36 @@ ProjectData = R6::R6Class("ProjectData", return(private$artifacts) }, + #' Get single pasta items. + #' For a given 'message.id', the associated 'commit.hash' is returned. + #' For a given 'commit.hash', the associated 'message.id' or IDs are returned. + #' + #' @param message.id the message ID to get the corresponding commit hash + #' @param commit.hash the commit hash to get the corresponding message ID + #' + #' @return the selected pasta data + get.pasta.items = function(message.id = NULL, commit.hash = NULL) { + logging::loginfo("Getting pasta items started.") + #if neither message.id nor commit.hash are specified break the code + if(is.null(message.id) && is.null(commit.hash)) { + logging::logwarn("Neither message.id nor commit.hash specified.") + return() + } + + ## get pasta data + self$get.pasta() + + ## if a message.id is given just return the attached list of commit hashes + ## else gather all message.ids which contain the given commit.hash and return them + if(!is.null(message.id)) { + result = private$pasta[private$pasta[["message.id"]] == message.id, "commit.hash"] + return(result) + } else { + result = private$pasta[private$pasta[["commit.hash"]] == commit.hash, "message.id"] + return(result) + } + }, + ## * * data cutting ----------------------------------------- #' Get the timestamps (earliest and latest date) of the specified data sources. @@ -644,36 +674,6 @@ ProjectData = R6::R6Class("ProjectData", return(result) }, - #' Get single pasta items. - #' For a given 'message.id', the associated 'commit.hash' is returned. - #' For a given 'commit.hash', the associated 'message.id' or IDs are returned. - #' - #' @param message.id the message ID to get the corresponding commit hash - #' @param commit.hash the commit hash to get the corresponding message ID - #' - #' @return the selected pasta data - get.pasta.items = function(message.id = NULL, commit.hash = NULL) { - logging::loginfo("Getting pasta items started.") - #if neither message.id nor commit.hash are specified break the code - if(is.null(message.id) && is.null(commit.hash)) { - logging::logwarn("Neither message.id nor commit.hash specified.") - return() - } - - ## get pasta data - self$get.pasta() - - ## if a message.id is given just return the attached list of commit hashes - ## else gather all message.ids which contain the given commit.hash and return them - if(!is.null(message.id)) { - result = private$pasta[private$pasta[["message.id"]] == message.id, "commit.hash"] - return(result) - } else { - result = private$pasta[private$pasta[["commit.hash"]] == commit.hash, "message.id"] - return(result) - } - }, - ## * * processed data ---------------------------------------------- #' Map the corresponding authors to each artifact and return the list. From a67ea247855bfccd7e8338085a53b481be347cfe Mon Sep 17 00:00:00 2001 From: Claus Hunsen Date: Wed, 13 Dec 2017 11:14:17 +0100 Subject: [PATCH 37/40] Update return values for metrics In the metrics module, single-value returns are now named vectors. The name is the metrics name as used in the respective function definition. Additionally, the column names for the scale-freeness metric is changed to not include 'res.' at each column name's beginning. Signed-off-by: Claus Hunsen --- util-networks-metrics.R | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/util-networks-metrics.R b/util-networks-metrics.R index 2d62562a..b6b4e248 100644 --- a/util-networks-metrics.R +++ b/util-networks-metrics.R @@ -37,7 +37,7 @@ metrics.avg.degree = function(network, mode = c("total", "in", "out")) { mode = match.arg(mode) degrees = igraph::degree(network, mode = c(mode)) avg = mean(degrees) - return(avg) + return(c(avg.degree = avg)) } #' Calculate all node degrees for the given network @@ -64,7 +64,7 @@ metrics.node.degrees = function(network, sort = TRUE, sort.decreasing = TRUE) { #' @return The density of the network. metrics.density = function(network) { density = igraph::graph.density(network) - return(density) + return(c(density = density)) } #' Calculate the average path length for the given network. @@ -76,7 +76,7 @@ metrics.density = function(network) { #' @return The average pathlength of the given network. metrics.avg.pathlength = function(network, directed, unconnected) { avg.pathlength = igraph::average.path.length(network, directed = directed, unconnected = unconnected) - return(avg.pathlength) + return(c(avg.pathlength = avg.pathlength)) } #' Calculate the average local clustering coefficient for the given network. @@ -88,7 +88,7 @@ metrics.avg.pathlength = function(network, directed, unconnected) { metrics.clustering.coeff = function(network, cc.type = c("global", "local", "barrat", "localaverage")) { cc.type = match.arg(cc.type) cc = igraph::transitivity(network, type = cc.type, vids = NULL) - return(cc) + return(c(clustering = cc)) } #' Calculate the modularity metric for the given network. @@ -101,7 +101,7 @@ metrics.clustering.coeff = function(network, cc.type = c("global", "local", "bar metrics.modularity = function(network, community.detection.algorithm = igraph::cluster_walktrap) { comm = community.detection.algorithm(network) mod = igraph::modularity(network, igraph::membership(comm)) - return("modularity" = mod) + return(c(modularity = mod)) } #' This function determines whether a network can be considered a @@ -147,7 +147,7 @@ metrics.smallworldness = function(network) { ## if s.delta > 1, then the network is a small-world network # is.smallworld = s.delta > 1 - return ("smallworldness" = s.delta) + return (c(smallworldness = s.delta)) } #' Determine scale freeness of a network using the power law fitting method. @@ -168,7 +168,7 @@ metrics.scale.freeness = function(network) { ## Check percent of vertices under power-law res$num.power.law = length(which(v.degree >= res$xmin)) res$percent.power.law = 100 * (res$num.power.law / length(v.degree)) - df = data.frame(res$alpha, res$xmin, res$KS.p, res$num.power.law, res$percent.power.law) + df = as.data.frame(res, row.names = "scale.freeness") return(df) } From 830d047471cf9fc008ae0431fdd36985739d9f17 Mon Sep 17 00:00:00 2001 From: Claus Hunsen Date: Wed, 13 Dec 2017 15:12:38 +0100 Subject: [PATCH 38/40] Update README file - Add missing files and their respective descriptions. - Update how-to section's code snippet. - Fix some typos. - Fix the indentation of the 'ProjectConf' sections. - Add proper links to intra-document sections. - Add syntax highlighting for multi-line code snippets. Props to @bockthom for mentioning some of the points in PR #78. Signed-off-by: Claus Hunsen --- README.md | 67 ++++++++++++++++++++++++++++++++----------------------- 1 file changed, 39 insertions(+), 28 deletions(-) diff --git a/README.md b/README.md index 11d9eb82..2081dca0 100644 --- a/README.md +++ b/README.md @@ -8,13 +8,13 @@ The network library `codeface-extraction-r` can be used to construct analyzable ### Submodule Please insert the project into yours by use of [git submodules](https://git-scm.com/book/en/v2/Git-Tools-Submodules). -Furthermore, the file `install.R` installs all needed R packages (see below) into your R library. +Furthermore, the file `install.R` installs all needed R packages (see [below](#needed-r-packages)) into your R library. Although, the use of of [packrat](https://rstudio.github.io/packrat/) with your project is recommended. This library is written in a way to not interfere with the loading order of your project's `R` packages (i.e., `library()` calls), so that the library does not lead to masked definitions. To initialize the library in your project, you need to source all files of the library in your project using the following command: -``` +```R source("path/to/util-init.R", chdir = TRUE) ``` It may lead to unpredictable behavior, when you do not do this, as we need to set some system and environment variables to ensure correct behavior of all functionality (e.g., parsing timestamps in the correct timezone and reading files from disk using the correct encoding). @@ -40,7 +40,7 @@ It may lead to unpredictable behavior, when you do not do this, as we need to se In this section, we give a short example on how to initialize all needed objects and build a bipartite network. For more examples, please see the file `test.R`. -``` +```R CF.DATA = "/path/to/codeface-data" # path to codeface data CF.SELECTION.PROCESS = "threemonth" # releases, threemonth(, testing) @@ -57,39 +57,44 @@ net.conf = NetworkConf$new() ## update the values of the NetworkConf object to the specific needs net.conf$update.values(list(author.relation = AUTHOR.RELATION, - artifact.relation = ARTIFACT.RELATION)) + artifact.relation = ARTIFACT.RELATION, + simplify = TRUE)) ## get ranges information from project configuration -ranges = proj.conf$get.entry(entry.name = "ranges") +ranges = proj.conf$get.entry("ranges") ## create data object which actually holds and handles data -cf.data = ProjectData$new(proj.conf, net.conf) +data = ProjectData$new(proj.conf) + +## create network builder to construct networks from the given data object +netbuilder = NetworkBuilder$new(data, net.conf) ## create and get the bipartite network ## (construction configured by net.conf's "artifact.relation") -bpn = cf.data$get.bipartite.network() +bpn = netbuilder$get.bipartite.network() ## plot the retrieved network -plot.bipartite.network(bpn) +plot.network(bpn) + ``` There are two different classes of configuration objects in this library: -- the `ProjectConf` class, which determines all configuration parameters needed for the configured project (mainly data paths) and -- the `NetworkConf` class, which is used for all configuration parameters concerning data retrieval and network construction. +- the `ProjectConf` class which determines all configuration parameters needed for the configured project (mainly data paths) and +- the `NetworkConf` class which is used for all configuration parameters concerning data retrieval and network construction. You can find an overview on all the parameters in these classes below in this file. For examples on how to use both classes and how to build networks with them, please look in the file `test.R`. ## Configuration Classes -## ProjectConf +### ProjectConf In this section, we give an overview on the parameters of the `ProjectConf` class and their meaning. All parameters can be retrieved with the method `ProjectConf$get.entry(...)`, by passing one parameter name as method parameter. There is no way to update the entries, except for the revision-based parameters. -### Basic Information +#### Basic Information - `project` * The project name from the Codeface analysis @@ -103,7 +108,7 @@ There is no way to update the entries, except for the revision-based parameters. - `mailinglists` * A list of the mailinglists of the project containing their name, type and source -### Artifact-Related Information +#### Artifact-Related Information - `artifact` * The artifact of the project used for all data retrievals @@ -117,9 +122,9 @@ There is no way to update the entries, except for the revision-based parameters. * The Codeface tagging parameter for the project, based on the `artifact` parameter * Either `"proximity"` or `"feature"` -### Revision-Related Information +#### Revision-Related Information -**Note**: This data is updated after performing a data-based splitting (i.e., by calling the functions `split.data.*`). +**Note**: This data is updated after performing a data-based splitting (i.e., by calling the functions `split.data.*(...)`). **Note**: These parameters can be updated using the method `ProjectConf$set.splitting.info()`, but you should *not* do that manually! - `revisions` @@ -134,7 +139,7 @@ There is no way to update the entries, except for the revision-based parameters. - `ranges.callgraph` * The revision ranges based on the list `revisions.callgraph` -### Data Paths +#### Data Paths - `datapath` * The data path to the Codeface results folder of this project @@ -145,9 +150,9 @@ There is no way to update the entries, except for the revision-based parameters. - `datapath.pasta` * The data path to the pasta data -### Splitting Information +#### Splitting Information -**Note**: This data is added to the `ProjectConf` object only after performing a data-based splitting (by calling the functions `split.data.*`). +**Note**: This data is added to the `ProjectConf` object only after performing a data-based splitting (by calling the functions `split.data.*(...)`). **Note**: These parameters can be updated using the method `ProjectConf$set.splitting.info()`, but you should *not* do that manually! - `split.type` @@ -165,13 +170,13 @@ There is no way to update the entries, except for the revision-based parameters. - `split.ranges` * The ranges constructed from `split.revisions` (either in sliding-window manner or not, depending on `split.sliding.window`) -### Data-Retrieval-Related Parameters (Configurable!) +#### (Configurable) Data-Retrieval-Related Parameters **Note**: These parameters can be configured using the method `ProjectConf$update.values()`. - `artifact.filter.base` - Remove all artifact information regarding the base artifact - (`Base_Feature` or `File_Level` for features and functions, respectively, as artifacts) + (`"Base_Feature"` or `"File_Level"` for features and functions, respectively, as artifacts) - [*`TRUE`*, `FALSE`] - `synchronicity` * Read and add synchronicity data to commits and co-change-based networks @@ -228,7 +233,7 @@ Updates to the parameters can be done by calling `NetworkConf$update.variables(. - issue information: *`"issue.id"`*, *`"event.name"`*, `"issue.state"`, `"creation.date"`, `"closing.date"`, `"is.pull.request"` * **Note**: `"date"` is always included as this information is needed for several parts of the library, e.g., time-based splitting. * **Note**: For each type of network that can be built, only the applicable part of the given vector of names is respected. - * **Note**: For the edge attributes `"pasta"` and `"synchronicty"`, the project configuration's parameters `pasta` and `synchronicity` need to be set to `TRUE`, respectively (see below). + * **Note**: For the edge attributes `"pasta"` and `"synchronicity"`, the project configuration's parameters `pasta` and `synchronicity` need to be set to `TRUE`, respectively (see below). - `simplify` * Perform edge contraction to retrieve a simplified network * [`TRUE`, *`FALSE`*] @@ -250,26 +255,32 @@ For more examples, please look in the file `test.R`. ## File overview +- `util-init.R` + * Initialization file that can be used by other analysis projects (see Section [*Submodule*](#submodule)) - `util-conf.R` * The configuration classes of the project +- `util-read.R` + * Functionality to read data file from disk - `util-data.R` * All representations of the data classes -- `util-plot.R` - * Everything needed for plotting networks -- `util-misc.R` - * Helper functions and also legacy functions, both needed in the other files +- `util-networks.R` + * The `NetworkBuilder` class and all corresponding helper functions to construct networks - `util-split.R` * Splitting functionality for data objects and networks (time-based and activity-based, using arbitrary ranges) - `util-motifs.R` * Functionality for the identifaction of network motifs (subgraph patterns) - `util-bulk.R` * Collection functionality for the different network types (using Codeface revision ranges) +- `util-plot.R` + * Everything needed for plotting networks - `util-core-peripheral.R` * Author classification (core and peripheral) and related functions -- `util-init.R` - * Initialization file that can be used by other analysis projects (see Section *Submodule*) +- `util-networks-metrics.R` + * A set of network-metric functions +- `util-misc.R` + * Helper functions and also legacy functions, both needed in the other files - `test.R` - * Showcase file (see Section *How-To*) + * Showcase file (see Section also [*How-To*](#how-to)) - `tests.R` * Test suite (running all tests in `tests/` subfolder) From 389a63ba4f43d74020f01478c04a88eda1b6b344 Mon Sep 17 00:00:00 2001 From: Claus Hunsen Date: Wed, 13 Dec 2017 15:14:38 +0100 Subject: [PATCH 39/40] Fix and update function documentation in reading module Fix typos in the roxygen documentation of some of the reading functions. Remove wrong documentation for the issue-reading function. Props to @bockthom for pointing this out. Signed-off-by: Claus Hunsen --- util-read.R | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/util-read.R b/util-read.R index 50b646b5..e1b392a6 100644 --- a/util-read.R +++ b/util-read.R @@ -21,7 +21,7 @@ requireNamespace("digest") # for sha1 hashing of IDs #' Read the commits from the 'commits.list' file. #' #' @param data.path the path to the commit list -#' @param artifact the artifact whichs commits are read +#' @param artifact the artifact whose commits are read #' #' @return the read commits read.commits = function(data.path, artifact) { @@ -98,7 +98,9 @@ read.commits = function(data.path, artifact) { #' Read the commits from the 'commits.list' file. #' #' @param data.path the path to the commit list -#' @param artifact the artifact whichs commits are read +#' @param artifact the artifact whose commits are read +#' +#' Note: This is just a delegate for \code{read.commits(data.path, artifact)}. #' #' @return the read commits read.commits.raw = function(data.path, artifact) { @@ -114,7 +116,7 @@ read.commits.raw = function(data.path, artifact) { #' where artifact and time.window are the given variables. #' #' @param data.path the path to the synchronicity data -#' @param artifact the artifact whichs synchronicity data get read +#' @param artifact the artifact whose synchronicity data get read #' @param time.window the time window of the data to be read #' #' @return the read synchronicity data @@ -320,11 +322,10 @@ read.pasta = function(data.path) { ## Issue data -------------------------------------------------------------- #' Read and parse the issue data from the 'issues.list' file. -#' The parsed format is a data frame with message IDs as keys and commit hashes as values. #' -#' @param data.path the path to the pasta data +#' @param data.path the path to the issue data #' -#' @return the read and parsed pasta data +#' @return the read and parsed issue data read.issues = function(data.path) { logging::logdebug("read.issues: starting.") From caa38c1a28a4f12b7e59792770e8413ef17068c2 Mon Sep 17 00:00:00 2001 From: Claus Hunsen Date: Wed, 13 Dec 2017 15:17:53 +0100 Subject: [PATCH 40/40] Rename showcase file to 'showcase.R' For better comprehensibility, the showcase file is renamed to 'showcase.R', as the name 'test.R' was misleading regarding the tests. Signed-off-by: Claus Hunsen --- README.md | 8 ++++---- test.R => showcase.R | 0 2 files changed, 4 insertions(+), 4 deletions(-) rename test.R => showcase.R (100%) diff --git a/README.md b/README.md index 2081dca0..25a8f64e 100644 --- a/README.md +++ b/README.md @@ -38,7 +38,7 @@ It may lead to unpredictable behavior, when you do not do this, as we need to se ## How-To In this section, we give a short example on how to initialize all needed objects and build a bipartite network. -For more examples, please see the file `test.R`. +For more examples, please see the file `showcase.R`. ```R CF.DATA = "/path/to/codeface-data" # path to codeface data @@ -83,7 +83,7 @@ There are two different classes of configuration objects in this library: - the `NetworkConf` class which is used for all configuration parameters concerning data retrieval and network construction. You can find an overview on all the parameters in these classes below in this file. -For examples on how to use both classes and how to build networks with them, please look in the file `test.R`. +For examples on how to use both classes and how to build networks with them, please look in the file `showcase.R`. ## Configuration Classes @@ -250,7 +250,7 @@ Updates to the parameters can be done by calling `NetworkConf$update.variables(. The classes `ProjectData` and `RangeData` hold instances of the `NetworkConf` class, just pass the object as parameter to the constructor. You can also update the object at any time, but as soon as you do so, all cached data of the data object are reset and have to be rebuilt. -For more examples, please look in the file `test.R`. +For more examples, please look in the file `showcase.R`. ## File overview @@ -279,7 +279,7 @@ For more examples, please look in the file `test.R`. * A set of network-metric functions - `util-misc.R` * Helper functions and also legacy functions, both needed in the other files -- `test.R` +- `showcase.R` * Showcase file (see Section also [*How-To*](#how-to)) - `tests.R` * Test suite (running all tests in `tests/` subfolder) diff --git a/test.R b/showcase.R similarity index 100% rename from test.R rename to showcase.R