From 83bfc0e55c584926406bc9587afb00b9baa1baff Mon Sep 17 00:00:00 2001
From: Roger1995 <R.Noemmer@gmx.de>
Date: Wed, 23 Aug 2017 20:00:23 +0200
Subject: [PATCH 01/40] Introduce file with network metrics
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Raphael Nömmer <R.Noemmer@gmx.de>
---
 metrics.R | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)
 create mode 100644 metrics.R

diff --git a/metrics.R b/metrics.R
new file mode 100644
index 00000000..e9b00c9e
--- /dev/null
+++ b/metrics.R
@@ -0,0 +1,33 @@
+
+requireNamespace("igraph")
+
+
+
+
+
+hub.indegree = function(network){
+    degrees = igraph::degree(network, mode = c("in"))
+    vertex = which.max(degrees)
+    return(igraph::V(network)[vertex])
+}
+
+density = function(network) {
+    density = igraph::graph.density(network)
+    return(density)
+}
+
+avg.outdegree = function(network) {
+    outdegrees = igraph::degree(network, mode = c("out"))
+    avg = mean(outdegrees)
+    return(avg)
+}
+
+avg.pathlength = function(network) {
+    lengths = igraph::shortest.paths(network, V(network), mode = "out", weights = NA)
+    lengths = unname(lengths
+
+
+
+
+
+}

From 76014b2b8e5d11f484cb05b53f0f0932fe875768 Mon Sep 17 00:00:00 2001
From: Roger1995 <R.Noemmer@gmx.de>
Date: Wed, 23 Aug 2017 23:45:06 +0200
Subject: [PATCH 02/40] Add more network metrics, change issue data folder
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-Off-By: Raphael Nömmer <R.Noemmer@gmx.de>
---
 metrics.R   | 43 +++++++++++++++++++++++++++++++++++++++++--
 util-conf.R |  7 +++----
 2 files changed, 44 insertions(+), 6 deletions(-)

diff --git a/metrics.R b/metrics.R
index e9b00c9e..91acedac 100644
--- a/metrics.R
+++ b/metrics.R
@@ -23,11 +23,50 @@ avg.outdegree = function(network) {
 }
 
 avg.pathlength = function(network) {
-    lengths = igraph::shortest.paths(network, V(network), mode = "out", weights = NA)
-    lengths = unname(lengths
+    return(igraph::average.path.length(network, directed = TRUE, unconnected = FALSE))
+}
+
+clustering.coeff = function(network) {
+    local.cc = igraph::transitivity(network, type = "local", vids = NULL)
+    cc = mean(local.cc, na.rm = TRUE)
+    return(cc)
+}
+
+# Not sure if this is correct
+modularity = function(network) {
+    comm = igraph::cluster_walktrap(network)
+    mod = igraph::modularity(network, igraph::membership(comm))
+    return(mod)
+}
+
+smallworldness = function(network) {
+
+}
+
+
+determine.smallworldness = function(g) {
+
+    # construct Erdös-Renyi network with same number of nodes and edges as g
+    h = erdos.renyi.game(n=vcount(g), p.or.m=ecount(g), type="gnm", directed=TRUE)
+
+    ## compute clustering coefficients
+    g.cc = transitivity(g)
+    h.cc = transitivity(h)
 
+    ## compute average shortest-path length
+    g.l = average.path.length(g)
+    h.l = average.path.length(h)
 
+    ## binary decision
+    # intermediate variables
+    gamma = g.cc / h.cc
+    lambda = g.l / h.l
 
+    # indicator s.delta
+    s.delta = gamma / lambda
 
+    # if s.delta > 1, then the network is a small-world network
+    #is.smallworld = ifelse(s.delta > 1, TRUE, FALSE)
 
+    return (s.delta)
 }
diff --git a/util-conf.R b/util-conf.R
index 17e73d29..8db89d78 100644
--- a/util-conf.R
+++ b/util-conf.R
@@ -265,8 +265,8 @@ ProjectConf = R6::R6Class("ProjectConf",
         #' @param casestudy the current casestudy
         #'
         #' @return the path to the issues folder
-        get.issues.folder = function(data, selection.process, casestudy) {
-            return(file.path(data, private$subfolder.results, selection.process, paste(casestudy, "issues", sep = "_")))
+        get.issues.folder = function(data, selection.process, project) {
+            return(file.path(data, private$subfolder.results, selection.process, project))
         },
 
         #' Construct and return the path to a Codeface configuration.
@@ -317,7 +317,6 @@ ProjectConf = R6::R6Class("ProjectConf",
             conf$artifact = artifact
             conf$artifact.short = ARTIFACT.TO.ABBREVIATION[[ conf$artifact ]]
             conf$artifact.codeface = ARTIFACT.CODEFACE[[ conf$artifact ]]
-
             ## store path to actual Codeface data
             conf$datapath = private$get.results.folder(data, selection.process, conf[["project"]], tagging)
             ## store path to call graphs
@@ -327,7 +326,7 @@ ProjectConf = R6::R6Class("ProjectConf",
             ## store path to pasta data
             conf$datapath.pasta = private$get.pasta.folder(data, selection.process, casestudy)
             ## store path to issue data
-            conf$datapath.issues = private$get.issues.folder(data, selection.process, casestudy)
+            conf$datapath.issues = private$get.issues.folder(data, selection.process, conf[["project"]])
 
             ## READ REVISIONS META-DATA
 

From ba08a24a1ba789f7cd4a7b07600bfc483a65356b Mon Sep 17 00:00:00 2001
From: Roger1995 <R.Noemmer@gmx.de>
Date: Thu, 24 Aug 2017 09:48:20 +0200
Subject: [PATCH 03/40] Add namespace requirements to smallworldness
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-Off-By: Raphael Nömmer <R.Noemmer@gmx.de>
---
 metrics.R | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/metrics.R b/metrics.R
index 91acedac..9c288ccd 100644
--- a/metrics.R
+++ b/metrics.R
@@ -40,22 +40,23 @@ modularity = function(network) {
 }
 
 smallworldness = function(network) {
-
+    smallworldness <- determine.smallworldness(network) # smallworldness(nw.data$nw) #
+    return(smallworldness)
 }
 
 
 determine.smallworldness = function(g) {
 
     # construct Erdös-Renyi network with same number of nodes and edges as g
-    h = erdos.renyi.game(n=vcount(g), p.or.m=ecount(g), type="gnm", directed=TRUE)
+    h = igraph::erdos.renyi.game(n=igraph::vcount(g), p.or.m=igraph::gsize(g), type="gnm", directed=TRUE)
 
     ## compute clustering coefficients
-    g.cc = transitivity(g)
-    h.cc = transitivity(h)
+    g.cc = igraph::transitivity(g)
+    h.cc = igraph::transitivity(h)
 
     ## compute average shortest-path length
-    g.l = average.path.length(g)
-    h.l = average.path.length(h)
+    g.l = igraph::average.path.length(g)
+    h.l = igraph::average.path.length(h)
 
     ## binary decision
     # intermediate variables

From 6adabaf58acbeecccfb27ad56d1b42a925b3144a Mon Sep 17 00:00:00 2001
From: Roger1995 <R.Noemmer@gmx.de>
Date: Thu, 31 Aug 2017 09:52:57 +0200
Subject: [PATCH 04/40] Continue implementing metrics
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-Off-By: Raphael Nömmer <R.Noemmer@gmx.de>
---
 metrics.R | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

diff --git a/metrics.R b/metrics.R
index 9c288ccd..cf50daf1 100644
--- a/metrics.R
+++ b/metrics.R
@@ -39,6 +39,7 @@ modularity = function(network) {
     return(mod)
 }
 
+# requires simplified network
 smallworldness = function(network) {
     smallworldness <- determine.smallworldness(network) # smallworldness(nw.data$nw) #
     return(smallworldness)
@@ -71,3 +72,37 @@ determine.smallworldness = function(g) {
 
     return (s.delta)
 }
+
+
+amount.nodes = function(network) {
+    return(igraph::vcount(network))
+}
+
+power.law.fitting = function(network) {
+    v.degree <- sort(igraph::degree(network, mode="all"), decreasing=TRUE)
+
+    ## Power-law fiting
+    ## (from  Mitchell Joblin <mitchell.joblin.ext@siemens.com>, Siemens AG,  2012, 2013)
+    p.fit = igraph::power.law.fit(v.degree, implementation="plfit")
+    param.names = c("alpha", "xmin", "KS.p")
+    res = list()
+    res[param.names] = p.fit[param.names]
+
+    ## Check percent of vertices under power-law
+    res$num.power.law = length(which(v.degree >= res$xmin))
+    res$percent.power.law = 100 * (res$num.power.law / length(v.degree))
+
+    return(cbind(res$alpha,res$xmin,res$KS.p,res$num.power.law,res$percent.power.law))
+}
+
+generate.hierarchy = function(network) {
+    degrees = igraph::degree(network, mode="total")
+    cluster.coeff = igraph::transitivity(network, type = "local", vids = NULL)
+
+    degrees.without.cc = subset(degrees, !(is.nan(cluster.coeff) | cluster.coeff == 0))
+    cluster.coeff = subset(cluster.coeff, !(is.nan(cluster.coeff) | cluster.coeff == 0))
+
+    names.of.points = row.names(as.data.frame(degrees.without.cc))
+}
+
+

From aeb456f4e6bca8ddca8516950d5ea4ce008f2c03 Mon Sep 17 00:00:00 2001
From: Roger1995 <R.Noemmer@gmx.de>
Date: Wed, 6 Sep 2017 15:26:44 +0200
Subject: [PATCH 05/40] Add more metrics for network analysis.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-Off-By: Raphael Nömmer <R.Noemmer@gmx.de>
---
 metrics.R | 36 +++++++++++++++++-------------------
 1 file changed, 17 insertions(+), 19 deletions(-)

diff --git a/metrics.R b/metrics.R
index cf50daf1..f2cf160b 100644
--- a/metrics.R
+++ b/metrics.R
@@ -1,19 +1,11 @@
 
 requireNamespace("igraph")
 
-
-
-
-
 hub.indegree = function(network){
     degrees = igraph::degree(network, mode = c("in"))
     vertex = which.max(degrees)
-    return(igraph::V(network)[vertex])
-}
-
-density = function(network) {
-    density = igraph::graph.density(network)
-    return(density)
+    node = igraph::V(network)[vertex]
+    return(node)
 }
 
 avg.outdegree = function(network) {
@@ -22,6 +14,15 @@ avg.outdegree = function(network) {
     return(avg)
 }
 
+node.degrees = function(network) {
+    return(igraph::degree(network, mode="total"))
+}
+
+density = function(network) {
+    density = igraph::graph.density(network)
+    return(density)
+}
+
 avg.pathlength = function(network) {
     return(igraph::average.path.length(network, directed = TRUE, unconnected = FALSE))
 }
@@ -32,16 +33,19 @@ clustering.coeff = function(network) {
     return(cc)
 }
 
-# Not sure if this is correct
 modularity = function(network) {
     comm = igraph::cluster_walktrap(network)
     mod = igraph::modularity(network, igraph::membership(comm))
     return(mod)
 }
 
+amount.nodes = function(network) {
+    return(igraph::vcount(network))
+}
+
 # requires simplified network
 smallworldness = function(network) {
-    smallworldness <- determine.smallworldness(network) # smallworldness(nw.data$nw) #
+    smallworldness <- determine.smallworldness(network)
     return(smallworldness)
 }
 
@@ -73,11 +77,6 @@ determine.smallworldness = function(g) {
     return (s.delta)
 }
 
-
-amount.nodes = function(network) {
-    return(igraph::vcount(network))
-}
-
 power.law.fitting = function(network) {
     v.degree <- sort(igraph::degree(network, mode="all"), decreasing=TRUE)
 
@@ -102,7 +101,6 @@ generate.hierarchy = function(network) {
     degrees.without.cc = subset(degrees, !(is.nan(cluster.coeff) | cluster.coeff == 0))
     cluster.coeff = subset(cluster.coeff, !(is.nan(cluster.coeff) | cluster.coeff == 0))
 
-    names.of.points = row.names(as.data.frame(degrees.without.cc))
+    return(data.frame(x = log(degrees.without.cc), y = cluster.coeff))
 }
 
-

From c1ad7dc17eea14078a60dc6426349403d3f8e1b3 Mon Sep 17 00:00:00 2001
From: Roger1995 <R.Noemmer@gmx.de>
Date: Wed, 6 Sep 2017 21:08:24 +0200
Subject: [PATCH 06/40] Indroduce first plot, fix error when loading issue
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-Off-By: Raphael Nömmer <R.Noemmer@gmx.de>
---
 metrics.R      |  2 +-
 plot-metrics.R | 10 ++++++++++
 util-conf.R    |  2 +-
 util-read.R    | 10 ++++------
 4 files changed, 16 insertions(+), 8 deletions(-)
 create mode 100644 plot-metrics.R

diff --git a/metrics.R b/metrics.R
index f2cf160b..8a4f20f6 100644
--- a/metrics.R
+++ b/metrics.R
@@ -101,6 +101,6 @@ generate.hierarchy = function(network) {
     degrees.without.cc = subset(degrees, !(is.nan(cluster.coeff) | cluster.coeff == 0))
     cluster.coeff = subset(cluster.coeff, !(is.nan(cluster.coeff) | cluster.coeff == 0))
 
-    return(data.frame(x = log(degrees.without.cc), y = cluster.coeff))
+    return(data.frame(deg = log(degrees.without.cc), cc = cluster.coeff))
 }
 
diff --git a/plot-metrics.R b/plot-metrics.R
new file mode 100644
index 00000000..86591bf9
--- /dev/null
+++ b/plot-metrics.R
@@ -0,0 +1,10 @@
+
+requireNamespace("ggplot2")
+
+
+plot.hierarchy = function(df) {
+    plot = ggplot2::ggplot(df, ggplot2::aes(y = cc, x = deg, color = deg)) +
+        ggplot2::geom_point() +
+        ggplot2::geom_smooth()
+    return(plot)
+}
diff --git a/util-conf.R b/util-conf.R
index 8739f456..e1c86133 100644
--- a/util-conf.R
+++ b/util-conf.R
@@ -516,6 +516,7 @@ ProjectConf = R6::R6Class("ProjectConf", inherit = Conf,
         #' @return the path to the configuration folder
         get.configurations.folder = function(data, selection.process) {
             return(file.path(data, private$subfolder.configurations, selection.process))
+
         },
 
         #' Construct and return the path to a Codeface configuration.
@@ -589,7 +590,6 @@ ProjectConf = R6::R6Class("ProjectConf", inherit = Conf,
                 logging::logerror("Artifact '%s' cannot be converted to a proper Codeface tagging! Stopping...", artifact)
                 stop("Stopped due to wrong configuration parameters!")
             }
-
             ## construct file name for configuration
             conf.file = private$construct.conf.path(data, selection.process, casestudy, tagging)
 
diff --git a/util-read.R b/util-read.R
index 72491987..cee5fceb 100644
--- a/util-read.R
+++ b/util-read.R
@@ -336,14 +336,11 @@ read.issues = function(data.path) {
     ## set proper column names
     colnames(issue.data) = c(
         "issue.id", "issue.state", "creation.date", "closing.date", "is.pull.request", # issue information
-        "author.id", "author.name", "author.email", # author information
+        "author.name", "author.mail", # author information
         "date", # the date
-        "event.name" # the event describing the row's entry
+        "ref.name", "event.name" # the event describing the row's entry
     )
 
-    ## remove unneeded columns from data
-    issue.data["author.id"] = NULL
-
     ## set pattern for issue ID for better recognition
     issue.data[["issue.id"]] = sprintf("<issue-%s>", issue.data[["issue.id"]])
 
@@ -351,9 +348,10 @@ read.issues = function(data.path) {
     issue.data[["is.pull.request"]] = as.logical(issue.data[["is.pull.request"]])
 
     ## convert dates and sort by 'date' column
+    print(issue.data)
     issue.data[["date"]] = as.POSIXct(issue.data[["date"]])
     issue.data[["creation.date"]] = as.POSIXct(issue.data[["creation.date"]])
-    issue.data[["closing.date"]][ issue.data[["closing.date"]] == "null" ] = NA
+    issue.data[["closing.date"]][ issue.data[["closing.date"]] == "" ] = NA
     issue.data[["closing.date"]] = as.POSIXct(issue.data[["closing.date"]])
     issue.data = issue.data[order(issue.data[["date"]], decreasing = FALSE), ] # sort!
 

From 545850aeae438a872838c7ecbd0ef13e0ca0f5ab Mon Sep 17 00:00:00 2001
From: Christian Hechtl <hechtl@fim.uni-passau.de>
Date: Mon, 11 Sep 2017 15:27:50 +0200
Subject: [PATCH 07/40] Introduce handling of incomplete ranges

Add functionality to cut data sources to the same date ranges
Add parameter in NetworkConf for that purpose
Add cutting functionalities in the NetworkBuilder

fixes #38

Signed-off-by: Christian Hechtl <hechtl@fim.uni-passau.de>
---
 util-conf.R     |   6 ++
 util-data.R     | 150 +++++++++++++++++++++++++++++++++++++++++++++++-
 util-networks.R |  56 ++++++++++++++++++
 3 files changed, 210 insertions(+), 2 deletions(-)

diff --git a/util-conf.R b/util-conf.R
index 6b625917..d5493a5c 100644
--- a/util-conf.R
+++ b/util-conf.R
@@ -385,6 +385,12 @@ NetworkConf = R6::R6Class("NetworkConf", inherit = Conf,
                 type = "numeric",
                 allowed = Inf,
                 allowed.number = 1
+            ),
+            unify.date.ranges = list(
+                default = FALSE,
+                type = "logical",
+                allowed = c(TRUE, FALSE),
+                allowed.number = 1
             )
         )
 
diff --git a/util-data.R b/util-data.R
index 38a319c4..7b5bba31 100644
--- a/util-data.R
+++ b/util-data.R
@@ -13,6 +13,18 @@ requireNamespace("R6") # for R6 classes
 requireNamespace("logging") # for logging
 requireNamespace("parallel") # for parallel computation
 
+## / / / / / / / / / / / / / /
+## Constant
+##
+
+## mapping of relation to data source
+RELATION.TO.DATASOURCE = list(
+    "cochange"  = "commits",
+    "callgraph" = "commits",
+    "mail"      = "mails",
+    "issue"     = "issues"
+)
+
 
 ## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / /
 ## ProjectData -------------------------------------------------------------
@@ -40,8 +52,10 @@ ProjectData = R6::R6Class("ProjectData",
         mails = NULL, # data.frame
         ## authors
         authors = NULL, # list
-        ##issues
+        ## issues
         issues = NULL, #data.frame
+        ## timestamps of mail, issue and commit data
+        data.timestamps = NULL, #data.frame
 
         ## * * filtering commits -------------------------------------------
 
@@ -155,6 +169,45 @@ ProjectData = R6::R6Class("ProjectData",
                 }
             }
             return(data)
+        },
+
+        #' Call the getters of the specified data sources in order to
+        #' initialize the sources and extract the timestamps.
+        #'
+        #' @param data.sources the data sources to be prepated
+        prepare.timestamps = function(data.sources) {
+            if("mails" %in% data.sources) {
+                self$get.mails()
+            }
+            if("commits" %in% data.sources) {
+                self$get.commits.raw()
+            }
+            if("issues" %in% data.sources) {
+                self$get.issues()
+            }
+
+        },
+
+        #' Extract the earliest and the latest date from the specified data source
+        #' and store it to the timestamps data.frame.
+        #'
+        #' @param source the specified data source
+        extract.timestamps = function(source) {
+            if(is.null(private$data.timestamps)) {
+                private$data.timestamps = data.frame(row.names = c("start", "end"))
+            }
+            if(source == "mails") {
+                private$data.timestamps$mails = c(as.POSIXct(min(private$mails$date)),
+                                          as.POSIXct(max(private$mails$date)))
+            } else if(source == "commits") {
+                private$data.timestamps$commits = c(as.POSIXct(min(private$commits.raw$date)),
+                                            as.POSIXct(max(private$commits.raw$date)))
+
+            } else if(source == "issues") {
+                private$data.timestamps$issues = c(as.POSIXct(min(private$issues$creation.date)),
+                                           as.POSIXct(max(private$issues$creation.date)))
+
+            }
         }
     ),
 
@@ -196,6 +249,7 @@ ProjectData = R6::R6Class("ProjectData",
             private$mails = NULL
             private$authors = NULL
             private$pasta = NULL
+            private$data.timestamps = NULL
         },
 
         ## * * configuration -----------------------------------------------
@@ -298,6 +352,10 @@ ProjectData = R6::R6Class("ProjectData",
             return(private$commits.filtered.empty)
         },
 
+        set.commits.filtered.empty = function(data) {
+            private$commits.filtered.empty = data
+        },
+
         #' Get the list of commits without the base artifact.
         #' If it doesn´t already exist call the filter method.
         #'
@@ -313,6 +371,10 @@ ProjectData = R6::R6Class("ProjectData",
             return(private$commits.filtered)
         },
 
+        set.commits.filtered = function(data) {
+            private$commits.filtered = data
+        },
+
         #' Get the complete list of commits.
         #' If it doesn´t already exist call the read method first.
         #'
@@ -327,6 +389,7 @@ ProjectData = R6::R6Class("ProjectData",
                     private$project.conf$get.value("artifact")
                 )
             }
+            private$extract.timestamps(source = "commits")
 
             return(private$commits.raw)
         },
@@ -407,6 +470,7 @@ ProjectData = R6::R6Class("ProjectData",
                     private$mails = private$add.pasta.data(private$mails)
                 }
             }
+            private$extract.timestamps(source = "mails")
 
             return(private$mails)
         },
@@ -487,6 +551,88 @@ ProjectData = R6::R6Class("ProjectData",
             return(private$artifacts)
         },
 
+        set.artifacts = function(artifacts) {
+            logging::loginfo("Setting artifact data.")
+            private$artifacts = artifacts
+        },
+
+        ## get the list of issues
+        get.issues = function() {
+            logging::loginfo("Getting issue data")
+
+            ## if issues have not been read yet do this
+            if(is.null(private$issues)) {
+                private$issues = read.issues(self$get.data.path.issues())
+            }
+            private$extract.timestamps(source = "issues")
+
+            return(private$issues)
+        },
+
+        #' Set the issue data to the given new data.
+        #'
+        #' @param issues the given new data
+        set.issues = function(issues) {
+            logging::loginfo("Setting issue data.")
+            private$issues = issues
+        },
+
+        #' Get the timestamps (earliest and latest date) of the specified data sources.
+        #' If 'simple' is TRUE return the overall latest start and earliest end date
+        #' in order to cut the specified data sources to the same date ranges.
+        #'
+        #' @param data.sources the specified data sources
+        #' @param simple whether or not the timestamps get simplified
+        #'
+        #' @return a data.frame with the timestamps
+        get.data.timestamps = function(data.sources = c("mails", "commits", "issues"), simple = FALSE) {
+            private$prepare.timestamps(data.sources = data.sources)
+            if(is.null(private$data.timestamps)) {
+                logging::logwarn("No timestamps available.")
+                return(data.frame())
+            } else if(simple == FALSE) {
+                timestamps = subset(private$data.timestamps, select = data.sources)
+                return(timestamps)
+            } else {
+                subset.timestamps = private$data.timestamps[data.sources]
+                timestamps.buffer = data.frame(max = apply(subset.timestamps,1,max),
+                                               min = apply(subset.timestamps,1,min))
+                timestamps = data.frame(start = timestamps.buffer["start", "max"],
+                                        end = timestamps.buffer["end", "min"])
+
+                return(timestamps)
+            }
+
+        },
+
+        #' Cut the specified data sources to the same date range depending on the extracted
+        #' timestamps.
+        #'
+        #' @param data.sources the specified data sources
+        #'
+        #' @return a list of the cut data.sources
+        get.data.cut.to.same.date = function(data.sources = c("mails", "commits", "issues")) {
+            timestamps = self$get.data.timestamps(data.sources = data.sources , simple = TRUE)
+            result = list()
+            if("mails" %in% data.sources) {
+                mails.cut = self$get.mails()[which(private$mails$date >= timestamps$start),]
+                mails.cut = mails.cut[which(mails.cut$date <= timestamps$end),]
+                result[["mails"]] = mails.cut
+            }
+            if("commits" %in% data.sources) {
+                commits.cut = self$get.commits.raw()[which(private$commits.raw$date >= timestamps$start),]
+                commits.cut = commits.cut[which(commits.cut$date <= timestamps$end),]
+                result[["commits"]] = commits.cut
+            }
+            if("issues" %in% data.sources) {
+                issues.cut = self$get.issues()[which(private$issues$creation.date >= timestamps$start),]
+                issues.cut = issues.cut[which(issues.cut$creation.date <= timestamps$end),]
+                result[["issues"]] = issues.cut
+            }
+
+            return(result)
+        },
+
         #' Get single pasta items.
         #' For a given 'message.id', the associated 'commit.hash' is returned.
         #' For a given 'commit.hash', the associated 'message.id' or IDs are returned.
@@ -736,7 +882,7 @@ RangeData = R6::R6Class("RangeData", inherit = ProjectData,
             return(private$revision.callgraph)
         }
 
-    )
+        )
 )
 
 
diff --git a/util-networks.R b/util-networks.R
index 47ef2f73..68fba05b 100644
--- a/util-networks.R
+++ b/util-networks.R
@@ -60,6 +60,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder",
         ## * * data and configuration --------------------------------------
 
         proj.data = NULL,
+        proj.data.original = NULL,
         network.conf = NULL,
 
         ## * * network caching ---------------------------------------------
@@ -72,6 +73,50 @@ NetworkBuilder = R6::R6Class("NetworkBuilder",
         artifacts.network.mail = NULL, # igraph
         artifacts.network.issue = NULL, # igraph
 
+        ## * * data cutting ---------------------------------------------
+
+        #' Clone the current data object and replace the specified
+        #' data sources by the cut ones
+        #'
+        #' @param cut.data the cut data sources
+        #'
+        #' @return the clone
+        clone.data = function(cut.data) {
+            clone = private$proj.data$clone()
+            if("mails" %in% names(cut.data)) {
+                clone$set.mails(cut.data$mails)
+            }
+            if("commits" %in% names(cut.data)) {
+                clone$set.commits.raw(cut.data$commits)
+                clone$set.commits.filtered(NULL)
+                clone$set.commits.filtered.empty(NULL)
+            }
+            if("issues" %in% names(cut.data)) {
+                clone$set.issues(cut.data$issues)
+            }
+            return(clone)
+        },
+
+        #' Cut the data sources of the data object to the same date ranges.
+        cut.data.to.same.timestamps = function() {
+            cut.data = private$proj.data$get.data.cut.to.same.date(data.sources = private$get.data.sources())
+            clone = private$clone.data(cut.data = cut.data)
+            private$proj.data.original = private$proj.data
+            private$proj.data = clone
+        },
+
+        #' Determine which data sources should be cut depending on the artifact and author relation.
+        #'
+        #' @return the data sources to be cut
+        get.data.sources = function() {
+            author.relation = private$network.conf$get.variable("author.relation")
+            artifact.relation = private$network.conf$get.variable("artifact.relation")
+            data.sources = c(RELATION.TO.DATASOURCE[[author.relation]])
+            data.sources = c(data.sources, RELATION.TO.DATASOURCE[[artifact.relation]])
+            data.sources = unique(data.sources)
+            return(data.sources)
+        },
+
         ## * * author networks ---------------------------------------------
 
         #' Get the co-change-based author relation as network.
@@ -379,6 +424,10 @@ NetworkBuilder = R6::R6Class("NetworkBuilder",
 
             if (class(self)[1] == "ProjectData")
                 logging::loginfo("Initialized data object %s", self$get.class.name())
+
+            if(private$network.conf$get.variable("unify.date.ranges")) {
+                private$cut.data.to.same.timestamps()
+            }
         },
 
         ## * * resetting environment ---------------------------------------
@@ -390,6 +439,13 @@ NetworkBuilder = R6::R6Class("NetworkBuilder",
             private$authors.network.cochange = NULL
             private$artifacts.network.cochange = NULL
             private$artifacts.network.callgraph = NULL
+            if(!is.null(private$proj.data.original)) {
+                private$proj.data = private$proj.data.original
+                private$proj.data.original = NULL
+                if(private$network.conf$get.variable("unify.date.ranges")) {
+                    private$cut.data.to.same.timestamps()
+                }
+            }
         },
 
         ## * * configuration -----------------------------------------------

From 5921070b874ba1aada50d429481baea37bef5cbe Mon Sep 17 00:00:00 2001
From: Christian Hechtl <hechtl@fim.uni-passau.de>
Date: Mon, 11 Sep 2017 15:52:57 +0200
Subject: [PATCH 08/40] Adjust getter of NetworkConf to new type

Signed-off-by: Christian Hechtl <hechtl@fim.uni-passau.de>
---
 util-networks.R | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/util-networks.R b/util-networks.R
index 68fba05b..dddfc049 100644
--- a/util-networks.R
+++ b/util-networks.R
@@ -109,8 +109,8 @@ NetworkBuilder = R6::R6Class("NetworkBuilder",
         #'
         #' @return the data sources to be cut
         get.data.sources = function() {
-            author.relation = private$network.conf$get.variable("author.relation")
-            artifact.relation = private$network.conf$get.variable("artifact.relation")
+            author.relation = private$network.conf$get.value("author.relation")
+            artifact.relation = private$network.conf$get.value("artifact.relation")
             data.sources = c(RELATION.TO.DATASOURCE[[author.relation]])
             data.sources = c(data.sources, RELATION.TO.DATASOURCE[[artifact.relation]])
             data.sources = unique(data.sources)
@@ -425,7 +425,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder",
             if (class(self)[1] == "ProjectData")
                 logging::loginfo("Initialized data object %s", self$get.class.name())
 
-            if(private$network.conf$get.variable("unify.date.ranges")) {
+            if(private$network.conf$get.value("unify.date.ranges")) {
                 private$cut.data.to.same.timestamps()
             }
         },
@@ -442,7 +442,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder",
             if(!is.null(private$proj.data.original)) {
                 private$proj.data = private$proj.data.original
                 private$proj.data.original = NULL
-                if(private$network.conf$get.variable("unify.date.ranges")) {
+                if(private$network.conf$get.value("unify.date.ranges")) {
                     private$cut.data.to.same.timestamps()
                 }
             }

From 7187e1f015e2434ebf5376775721513b23177e4a Mon Sep 17 00:00:00 2001
From: Roger1995 <R.Noemmer@gmx.de>
Date: Wed, 13 Sep 2017 22:49:55 +0200
Subject: [PATCH 09/40] Add plot functions for metrics and change encoding
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-Off-By: Raphael Nömmer <R.Noemmer@gmx.de>
---
 metrics.R      | 64 +++++++++++++++++++++++---------------------------
 plot-metrics.R | 61 ++++++++++++++++++++++++++++++++++++++++++++++-
 util-conf.R    |  2 +-
 util-read.R    |  8 +++----
 4 files changed, 95 insertions(+), 40 deletions(-)

diff --git a/metrics.R b/metrics.R
index 8a4f20f6..a0f3aefb 100644
--- a/metrics.R
+++ b/metrics.R
@@ -1,66 +1,61 @@
-
 requireNamespace("igraph")
 
-hub.indegree = function(network){
+metrics.hub.indegree = function(network, project){
     degrees = igraph::degree(network, mode = c("in"))
     vertex = which.max(degrees)
-    node = igraph::V(network)[vertex]
-    return(node)
+    df = data.frame("name" = names(vertex), "degree" = unname(vertex), "project" = project)
+    return(df)
 }
 
-avg.outdegree = function(network) {
+metrics.avg.outdegree = function(network, project) {
     outdegrees = igraph::degree(network, mode = c("out"))
     avg = mean(outdegrees)
-    return(avg)
+    df = data.frame("project" = project, "avg.degree" = avg)
+    return(df)
 }
 
-node.degrees = function(network) {
-    return(igraph::degree(network, mode="total"))
+metrics.node.degrees = function(network) {
+    degrees = igraph::degree(network, mode="total")
+    return(data.frame("name" = names(degrees), "degree" = unname(degrees)))
 }
 
-density = function(network) {
+metrics.density = function(network, project) {
     density = igraph::graph.density(network)
-    return(density)
+    return(data.frame("project" = project, "density" = unname(density)))
 }
 
-avg.pathlength = function(network) {
-    return(igraph::average.path.length(network, directed = TRUE, unconnected = FALSE))
+metrics.avg.pathlength = function(network, project) {
+    return(data.frame("project" = project, "avg.pathlength" = igraph::average.path.length(network, directed = TRUE, unconnected = FALSE)))
 }
 
-clustering.coeff = function(network) {
+metrics.clustering.coeff = function(network, project) {
     local.cc = igraph::transitivity(network, type = "local", vids = NULL)
     cc = mean(local.cc, na.rm = TRUE)
-    return(cc)
+    return(data.frame("project" = project, "clustering.coeff" = cc))
 }
 
-modularity = function(network) {
+metrics.modularity = function(network, project) {
     comm = igraph::cluster_walktrap(network)
     mod = igraph::modularity(network, igraph::membership(comm))
-    return(mod)
+    return(data.frame("project" = project, "modularity" = mod))
 }
 
-amount.nodes = function(network) {
-    return(igraph::vcount(network))
+metrics.amount.nodes = function(network, project) {
+    return(data.frame("project" = project, "amount.nodes" = igraph::vcount(network)))
 }
 
 # requires simplified network
-smallworldness = function(network) {
-    smallworldness <- determine.smallworldness(network)
-    return(smallworldness)
-}
-
-
-determine.smallworldness = function(g) {
+metrics.smallworldness = function(network, project) {
 
     # construct Erdös-Renyi network with same number of nodes and edges as g
-    h = igraph::erdos.renyi.game(n=igraph::vcount(g), p.or.m=igraph::gsize(g), type="gnm", directed=TRUE)
+    h = igraph::erdos.renyi.game(n=igraph::vcount(network), p.or.m=igraph::gsize(network), type="gnm", directed=TRUE)
 
     ## compute clustering coefficients
-    g.cc = igraph::transitivity(g)
+    g.cc = igraph::transitivity(network)
     h.cc = igraph::transitivity(h)
 
     ## compute average shortest-path length
-    g.l = igraph::average.path.length(g)
+    g.l = igraph::average.path.length(network)
     h.l = igraph::average.path.length(h)
 
     ## binary decision
@@ -74,10 +69,10 @@ determine.smallworldness = function(g) {
     # if s.delta > 1, then the network is a small-world network
     #is.smallworld = ifelse(s.delta > 1, TRUE, FALSE)
 
-    return (s.delta)
+    return (data.frame("project" = project, "smallworldness" = s.delta))
 }
 
-power.law.fitting = function(network) {
+metrics.power.law.fitting = function(network) {
     v.degree <- sort(igraph::degree(network, mode="all"), decreasing=TRUE)
 
     ## Power-law fiting
@@ -90,17 +85,18 @@ power.law.fitting = function(network) {
     ## Check percent of vertices under power-law
     res$num.power.law = length(which(v.degree >= res$xmin))
     res$percent.power.law = 100 * (res$num.power.law / length(v.degree))
-
-    return(cbind(res$alpha,res$xmin,res$KS.p,res$num.power.law,res$percent.power.law))
+    df = data.frame(res$alpha,res$xmin,res$KS.p,res$num.power.law,res$percent.power.law)
+    browser()
+    return(data.frame("power.law" = names(df), "value" = c(res$alpha,res$xmin,res$KS.p,res$num.power.law,res$percent.power.law)))
 }
 
-generate.hierarchy = function(network) {
+metrics.hierarchy = function(network) {
     degrees = igraph::degree(network, mode="total")
     cluster.coeff = igraph::transitivity(network, type = "local", vids = NULL)
 
     degrees.without.cc = subset(degrees, !(is.nan(cluster.coeff) | cluster.coeff == 0))
     cluster.coeff = subset(cluster.coeff, !(is.nan(cluster.coeff) | cluster.coeff == 0))
 
-    return(data.frame(deg = log(degrees.without.cc), cc = cluster.coeff))
+    return(data.frame(deg = log(degrees.without.cc), cc = log(cluster.coeff)))
 }
 
diff --git a/plot-metrics.R b/plot-metrics.R
index 86591bf9..31e65b80 100644
--- a/plot-metrics.R
+++ b/plot-metrics.R
@@ -1,10 +1,69 @@
 
 requireNamespace("ggplot2")
 
+#plots the maximum indegree one or more projects as a bar diagram
+metrics.plot.hub.indegree = function(df) {
+    plot = ggplot2::ggplot(df, ggplot2::aes(y = degree, x = project)) +
+        ggplot2::geom_bar(stat="identity")
+    return(plot)
+}
+
+metrics.plot.avg.outdegree = function(df) {
+    plot = ggplot2::ggplot(df, ggplot2::aes(y = avg.degree, x = project)) +
+        ggplot2::geom_bar(stat="identity")
+    return(plot)
+}
+
+metrics.plot.node.degrees = function(df) {
+    plot = ggplot2::ggplot(df, ggplot2::aes(y = degree, x = name)) +
+        ggplot2::geom_bar(stat="identity")
+    return(plot)
+}
+
+metrics.plot.density = function(df) {
+    plot = ggplot2::ggplot(df, ggplot2::aes(y = density, x = project)) +
+        ggplot2::geom_bar(stat="identity")
+    return(plot)
+}
+
+metrics.plot.avg.pathlength = function(df) {
+    plot = ggplot2::ggplot(df, ggplot2::aes(y = avg.papthlength, x = project)) +
+        ggplot2::geom_bar(stat="identity")
+    return(plot)
+}
+
+metrics.plot.clustering.coeff = function(df) {
+    plot = ggplot2::ggplot(df, ggplot2::aes(y = clustering.coeff, x = project)) +
+        ggplot2::geom_bar(stat="identity")
+    return(plot)
+}
+
+metrics.plot.modularity = function(df) {
+    plot = ggplot2::ggplot(df, ggplot2::aes(y = modularity, x = project)) +
+        ggplot2::geom_bar(stat="identity")
+    return(plot)
+}
+
+metrics.plot.amount.nodes = function(df) {
+    plot = ggplot2::ggplot(df, ggplot2::aes(y = amount.nodes, x = project)) +
+        ggplot2::geom_bar(stat="identity")
+    return(plot)
+}
 
-plot.hierarchy = function(df) {
+metrics.plot.smallworldness = function(df) {
+    plot = ggplot2::ggplot(df, ggplot2::aes(y = smallworldness, x = project)) +
+        ggplot2::geom_bar(stat="identity")
+    return(plot)
+}
+
+metrics.plot.power.law.fitting = function(df) {
+
+}
+
+metrics.plot.hierarchy = function(df) {
     plot = ggplot2::ggplot(df, ggplot2::aes(y = cc, x = deg, color = deg)) +
         ggplot2::geom_point() +
         ggplot2::geom_smooth()
     return(plot)
 }
+
diff --git a/util-conf.R b/util-conf.R
index e1c86133..4e4c67bb 100644
--- a/util-conf.R
+++ b/util-conf.R
@@ -621,7 +621,7 @@ ProjectConf = R6::R6Class("ProjectConf", inherit = Conf,
             ## read revisions file
             revisions.file = file.path(conf$datapath, "revisions.list")
             revisions.df <- try(read.table(revisions.file, header = FALSE, sep = ";", strip.white = TRUE,
-                                           fileEncoding = "latin1", encoding = "utf8"), silent = TRUE)
+                                           encoding = "UTF-8"), silent = TRUE)
             ## break if the list of revisions is empty or any other error occurs
             if (inherits(revisions.df, 'try-error')) {
                 logging::logerror("There are no revisions available for the current casestudy.")
diff --git a/util-read.R b/util-read.R
index cee5fceb..df9b76a5 100644
--- a/util-read.R
+++ b/util-read.R
@@ -39,7 +39,7 @@ read.commits.raw = function(data.path, artifact) {
 
     ## read data.frame from disk (as expected from save.list.to.file) [can be empty]
     commit.data <- try(read.table(file, header = FALSE, sep = ";", strip.white = TRUE,
-                                  fileEncoding = "latin1", encoding = "utf8"), silent = TRUE)
+                                  encoding = "UTF-8"), silent = TRUE)
 
     ## handle the case that the list of commits is empty
     if (inherits(commit.data, 'try-error')) {
@@ -164,7 +164,7 @@ read.mails = function(data.path) {
 
     ## read data.frame from disk (as expected from save.list.to.file) [can be empty]
     mail.data <- try(read.table(file, header = FALSE, sep = ";", strip.white = TRUE,
-                                fileEncoding = "latin1", encoding = "utf8"), silent = TRUE)
+                                encoding = "UTF-8"), silent = TRUE)
 
     ## handle the case that the list of mails is empty
     if (inherits(mail.data, 'try-error')) {
@@ -228,7 +228,7 @@ read.authors = function(data.path) {
 
     ## read data.frame from disk (as expected from save.list.to.file) [can be empty]
     authors.df <- try(read.table(file, header = FALSE, sep = ";", strip.white = TRUE,
-                                 fileEncoding = "latin1", encoding = "utf8"), silent = TRUE)
+                                 encoding = "UTF-8"), silent = TRUE)
 
     ## break if the list of authors is empty
     if (inherits(authors.df, 'try-error')) {
@@ -324,7 +324,7 @@ read.issues = function(data.path) {
 
     ## read issues from disk [can be empty]
     issue.data = try(read.table(filepath, header = FALSE, sep = ";", strip.white = TRUE,
-                                  fileEncoding = "latin1", encoding = "utf8"), silent = TRUE)
+                                   encoding = "UTF-8"), silent = TRUE)
 
     ## handle the case that the list of commits is empty
     if (inherits(issue.data, 'try-error')) {

From 3e616d8610f5f2b5c0084373dfda9230bdcfaacb Mon Sep 17 00:00:00 2001
From: Roger1995 <R.Noemmer@gmx.de>
Date: Thu, 21 Sep 2017 00:31:34 +0200
Subject: [PATCH 10/40] Visual changes to hierarchy plot
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-Off-By: Raphael Nömmer <R.Noemmer@gmx.de>
---
 metrics.R      | 9 ++++-----
 plot-metrics.R | 7 ++++---
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/metrics.R b/metrics.R
index a0f3aefb..66cf69c5 100644
--- a/metrics.R
+++ b/metrics.R
@@ -15,7 +15,7 @@ metrics.avg.outdegree = function(network, project) {
 }
 
 metrics.node.degrees = function(network) {
-    degrees = igraph::degree(network, mode="total")
+    degrees = sort(igraph::degree(network, mode="total"), decreasing = TRUE)
     return(data.frame("name" = names(degrees), "degree" = unname(degrees)))
 }
 
@@ -25,7 +25,7 @@ metrics.density = function(network, project) {
 }
 
 metrics.avg.pathlength = function(network, project) {
-    return(data.frame("project" = project, "avg.pathlength" = igraph::average.path.length(network, directed = TRUE, unconnected = FALSE)))
+    return(data.frame("project" = project, "avg.pathlength" = igraph::average.path.length(network, directed = TRUE, unconnected = TRUE)))
 }
 
 metrics.clustering.coeff = function(network, project) {
@@ -55,8 +55,8 @@ metrics.smallworldness = function(network, project) {
     h.cc = igraph::transitivity(h)
 
     ## compute average shortest-path length
-    g.l = igraph::average.path.length(network)
-    h.l = igraph::average.path.length(h)
+    g.l = igraph::average.path.length(network, unconnected = TRUE)
+    h.l = igraph::average.path.length(h, unconnected = TRUE)
 
     ## binary decision
     # intermediate variables
@@ -86,7 +86,6 @@ metrics.power.law.fitting = function(network) {
     res$num.power.law = length(which(v.degree >= res$xmin))
     res$percent.power.law = 100 * (res$num.power.law / length(v.degree))
     df = data.frame(res$alpha,res$xmin,res$KS.p,res$num.power.law,res$percent.power.law)
-    browser()
     return(data.frame("power.law" = names(df), "value" = c(res$alpha,res$xmin,res$KS.p,res$num.power.law,res$percent.power.law)))
 }
 
diff --git a/plot-metrics.R b/plot-metrics.R
index 31e65b80..2779903d 100644
--- a/plot-metrics.R
+++ b/plot-metrics.R
@@ -61,9 +61,10 @@ metrics.plot.power.law.fitting = function(df) {
 }
 
 metrics.plot.hierarchy = function(df) {
-    plot = ggplot2::ggplot(df, ggplot2::aes(y = cc, x = deg, color = deg)) +
-        ggplot2::geom_point() +
-        ggplot2::geom_smooth()
+    pred = predict(lm(cc ~ deg, data = df))
+    plot = ggplot2::ggplot(df, ggplot2::aes(y = cc, x = deg)) +
+        ggplot2::geom_point(ggplot2::aes(color = deg)) +
+        ggplot2::geom_line(ggplot2::aes(y = pred))
     return(plot)
 }
 

From 90cdc092dd11db9f4ee57fd1d1e36a4b354d5edf Mon Sep 17 00:00:00 2001
From: Christian Hechtl <hechtl@fim.uni-passau.de>
Date: Thu, 21 Sep 2017 13:51:46 +0200
Subject: [PATCH 11/40] Rebuild cutting mechanism for incomplete Ranges

The cutting is replaced by the already existing splitting mechanism

Signed-off-by: Christian Hechtl <hechtl@fim.uni-passau.de>
---
 util-data.R     | 79 +++++++++----------------------------------------
 util-networks.R | 40 +++++--------------------
 2 files changed, 21 insertions(+), 98 deletions(-)

diff --git a/util-data.R b/util-data.R
index 7b5bba31..220602d1 100644
--- a/util-data.R
+++ b/util-data.R
@@ -197,15 +197,15 @@ ProjectData = R6::R6Class("ProjectData",
                 private$data.timestamps = data.frame(row.names = c("start", "end"))
             }
             if(source == "mails") {
-                private$data.timestamps$mails = c(as.POSIXct(min(private$mails$date)),
-                                          as.POSIXct(max(private$mails$date)))
+                private$data.timestamps$mails = c(min(private$mails$date),
+                                          max(private$mails$date))
             } else if(source == "commits") {
-                private$data.timestamps$commits = c(as.POSIXct(min(private$commits.raw$date)),
-                                            as.POSIXct(max(private$commits.raw$date)))
+                private$data.timestamps$commits = c(min(private$commits.raw$date),
+                                            max(private$commits.raw$date))
 
             } else if(source == "issues") {
-                private$data.timestamps$issues = c(as.POSIXct(min(private$issues$creation.date)),
-                                           as.POSIXct(max(private$issues$creation.date)))
+                private$data.timestamps$issues = c(min(private$issues$date),
+                                           max(private$issues$date))
 
             }
         }
@@ -352,10 +352,6 @@ ProjectData = R6::R6Class("ProjectData",
             return(private$commits.filtered.empty)
         },
 
-        set.commits.filtered.empty = function(data) {
-            private$commits.filtered.empty = data
-        },
-
         #' Get the list of commits without the base artifact.
         #' If it doesn´t already exist call the filter method.
         #'
@@ -371,10 +367,6 @@ ProjectData = R6::R6Class("ProjectData",
             return(private$commits.filtered)
         },
 
-        set.commits.filtered = function(data) {
-            private$commits.filtered = data
-        },
-
         #' Get the complete list of commits.
         #' If it doesn´t already exist call the read method first.
         #'
@@ -551,32 +543,6 @@ ProjectData = R6::R6Class("ProjectData",
             return(private$artifacts)
         },
 
-        set.artifacts = function(artifacts) {
-            logging::loginfo("Setting artifact data.")
-            private$artifacts = artifacts
-        },
-
-        ## get the list of issues
-        get.issues = function() {
-            logging::loginfo("Getting issue data")
-
-            ## if issues have not been read yet do this
-            if(is.null(private$issues)) {
-                private$issues = read.issues(self$get.data.path.issues())
-            }
-            private$extract.timestamps(source = "issues")
-
-            return(private$issues)
-        },
-
-        #' Set the issue data to the given new data.
-        #'
-        #' @param issues the given new data
-        set.issues = function(issues) {
-            logging::loginfo("Setting issue data.")
-            private$issues = issues
-        },
-
         #' Get the timestamps (earliest and latest date) of the specified data sources.
         #' If 'simple' is TRUE return the overall latest start and earliest end date
         #' in order to cut the specified data sources to the same date ranges.
@@ -586,17 +552,15 @@ ProjectData = R6::R6Class("ProjectData",
         #'
         #' @return a data.frame with the timestamps
         get.data.timestamps = function(data.sources = c("mails", "commits", "issues"), simple = FALSE) {
+            data.sources = match.arg(arg = data.sources, several.ok = TRUE, choices = c("mails", "commits", "issues"))
             private$prepare.timestamps(data.sources = data.sources)
-            if(is.null(private$data.timestamps)) {
-                logging::logwarn("No timestamps available.")
-                return(data.frame())
-            } else if(simple == FALSE) {
+           if(simple == FALSE) {
                 timestamps = subset(private$data.timestamps, select = data.sources)
                 return(timestamps)
             } else {
                 subset.timestamps = private$data.timestamps[data.sources]
-                timestamps.buffer = data.frame(max = apply(subset.timestamps,1,max),
-                                               min = apply(subset.timestamps,1,min))
+                timestamps.buffer = data.frame(max = apply(subset.timestamps, 1, max),
+                                               min = apply(subset.timestamps, 1, min))
                 timestamps = data.frame(start = timestamps.buffer["start", "max"],
                                         end = timestamps.buffer["end", "min"])
 
@@ -612,25 +576,11 @@ ProjectData = R6::R6Class("ProjectData",
         #'
         #' @return a list of the cut data.sources
         get.data.cut.to.same.date = function(data.sources = c("mails", "commits", "issues")) {
+            data.sources = match.arg(arg = data.sources, several.ok = TRUE, choices = c("mails", "commits", "issues"))
             timestamps = self$get.data.timestamps(data.sources = data.sources , simple = TRUE)
-            result = list()
-            if("mails" %in% data.sources) {
-                mails.cut = self$get.mails()[which(private$mails$date >= timestamps$start),]
-                mails.cut = mails.cut[which(mails.cut$date <= timestamps$end),]
-                result[["mails"]] = mails.cut
-            }
-            if("commits" %in% data.sources) {
-                commits.cut = self$get.commits.raw()[which(private$commits.raw$date >= timestamps$start),]
-                commits.cut = commits.cut[which(commits.cut$date <= timestamps$end),]
-                result[["commits"]] = commits.cut
-            }
-            if("issues" %in% data.sources) {
-                issues.cut = self$get.issues()[which(private$issues$creation.date >= timestamps$start),]
-                issues.cut = issues.cut[which(issues.cut$creation.date <= timestamps$end),]
-                result[["issues"]] = issues.cut
-            }
-
-            return(result)
+            timestamps.vector = c(timestamps$start, timestamps$end)
+            result = split.data.time.based(self, bins = timestamps.vector)
+            return(result[[1]])
         },
 
         #' Get single pasta items.
@@ -790,7 +740,6 @@ ProjectData = R6::R6Class("ProjectData",
 
             return(mylist)
         }
-
     )
 )
 
diff --git a/util-networks.R b/util-networks.R
index dddfc049..a31fb3b4 100644
--- a/util-networks.R
+++ b/util-networks.R
@@ -75,34 +75,11 @@ NetworkBuilder = R6::R6Class("NetworkBuilder",
 
         ## * * data cutting ---------------------------------------------
 
-        #' Clone the current data object and replace the specified
-        #' data sources by the cut ones
-        #'
-        #' @param cut.data the cut data sources
-        #'
-        #' @return the clone
-        clone.data = function(cut.data) {
-            clone = private$proj.data$clone()
-            if("mails" %in% names(cut.data)) {
-                clone$set.mails(cut.data$mails)
-            }
-            if("commits" %in% names(cut.data)) {
-                clone$set.commits.raw(cut.data$commits)
-                clone$set.commits.filtered(NULL)
-                clone$set.commits.filtered.empty(NULL)
-            }
-            if("issues" %in% names(cut.data)) {
-                clone$set.issues(cut.data$issues)
-            }
-            return(clone)
-        },
 
         #' Cut the data sources of the data object to the same date ranges.
         cut.data.to.same.timestamps = function() {
             cut.data = private$proj.data$get.data.cut.to.same.date(data.sources = private$get.data.sources())
-            clone = private$clone.data(cut.data = cut.data)
-            private$proj.data.original = private$proj.data
-            private$proj.data = clone
+            private$proj.data = cut.data
         },
 
         #' Determine which data sources should be cut depending on the artifact and author relation.
@@ -111,9 +88,8 @@ NetworkBuilder = R6::R6Class("NetworkBuilder",
         get.data.sources = function() {
             author.relation = private$network.conf$get.value("author.relation")
             artifact.relation = private$network.conf$get.value("artifact.relation")
-            data.sources = c(RELATION.TO.DATASOURCE[[author.relation]])
-            data.sources = c(data.sources, RELATION.TO.DATASOURCE[[artifact.relation]])
-            data.sources = unique(data.sources)
+            data.sources = unique(c(RELATION.TO.DATASOURCE[[author.relation]],
+                                    RELATION.TO.DATASOURCE[[artifact.relation]]))
             return(data.sources)
         },
 
@@ -417,6 +393,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder",
                   #' @param network.conf the network configuration
         initialize = function(project.data, network.conf) {
             private$proj.data = project.data
+            private$proj.data.original = project.data
 
             if(!missing(network.conf) && "NetworkConf" %in% class(network.conf)) {
                 private$network.conf = network.conf
@@ -439,12 +416,9 @@ NetworkBuilder = R6::R6Class("NetworkBuilder",
             private$authors.network.cochange = NULL
             private$artifacts.network.cochange = NULL
             private$artifacts.network.callgraph = NULL
-            if(!is.null(private$proj.data.original)) {
-                private$proj.data = private$proj.data.original
-                private$proj.data.original = NULL
-                if(private$network.conf$get.value("unify.date.ranges")) {
-                    private$cut.data.to.same.timestamps()
-                }
+            private$proj.data = private$proj.data.original
+            if(private$network.conf$get.value("unify.date.ranges")) {
+                private$cut.data.to.same.timestamps()
             }
         },
 

From 4be6f2fc9eccb477ea8973f48d19e5aaea47fce3 Mon Sep 17 00:00:00 2001
From: Christian Hechtl <hechtl@fim.uni-passau.de>
Date: Wed, 4 Oct 2017 13:01:21 +0200
Subject: [PATCH 12/40] Fix minor bugs in cutting mechanism

The timestamps are now extracted when the issue getter is called
A warning message is printed when the data sources don't overlap
Add project data getter in the NetworkBuilder for testing reasons

Signed-off-by: Christian Hechtl <hechtl@fim.uni-passau.de>
---
 util-data.R     | 8 +++++++-
 util-networks.R | 4 ++++
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/util-data.R b/util-data.R
index 220602d1..5085cc00 100644
--- a/util-data.R
+++ b/util-data.R
@@ -510,6 +510,9 @@ ProjectData = R6::R6Class("ProjectData",
             if(is.null(private$issues)) {
                 private$issues = read.issues(self$get.data.path.issues())
             }
+
+            private$extract.timestamps(source = "issues")
+
             return(private$issues)
         },
 
@@ -579,6 +582,9 @@ ProjectData = R6::R6Class("ProjectData",
             data.sources = match.arg(arg = data.sources, several.ok = TRUE, choices = c("mails", "commits", "issues"))
             timestamps = self$get.data.timestamps(data.sources = data.sources , simple = TRUE)
             timestamps.vector = c(timestamps$start, timestamps$end)
+            if(timestamps$start > timestamps$end) {
+                logging::logwarn("The datasources don't overlap. The result will be empty.")
+            }
             result = split.data.time.based(self, bins = timestamps.vector)
             return(result[[1]])
         },
@@ -831,7 +837,7 @@ RangeData = R6::R6Class("RangeData", inherit = ProjectData,
             return(private$revision.callgraph)
         }
 
-        )
+    )
 )
 
 
diff --git a/util-networks.R b/util-networks.R
index a31fb3b4..57a31b1e 100644
--- a/util-networks.R
+++ b/util-networks.R
@@ -456,6 +456,10 @@ NetworkBuilder = R6::R6Class("NetworkBuilder",
             private$network.conf$update.value(entry, value)
         },
 
+        get.project.data = function() {
+            return(private$proj.data)
+        },
+
         #' Update the network configuration based on the given list
         #' of values and reset the environment afterwards
         #'

From 9373d0172a29349e587f6cfec850ba59f6a792b6 Mon Sep 17 00:00:00 2001
From: Christian Hechtl <hechtl@fim.uni-passau.de>
Date: Wed, 4 Oct 2017 13:02:11 +0200
Subject: [PATCH 13/40] Add tests for the cutting mechanism on data and network
 side

Signed-off-by: Christian Hechtl <hechtl@fim.uni-passau.de>
---
 tests/test-data-cut.R     | 51 +++++++++++++++++++++++++++++++++++++
 tests/test-networks-cut.R | 53 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 104 insertions(+)
 create mode 100644 tests/test-data-cut.R
 create mode 100644 tests/test-networks-cut.R

diff --git a/tests/test-data-cut.R b/tests/test-data-cut.R
new file mode 100644
index 00000000..07182c0c
--- /dev/null
+++ b/tests/test-data-cut.R
@@ -0,0 +1,51 @@
+## (c) Christian Hechtl, 2017
+## hechtl@fim.uni-passau.de
+
+
+context("Cutting functionality on ProjectData side.")
+
+##
+## Context
+##
+
+CF.DATA = file.path(".", "codeface-data")
+CF.SELECTION.PROCESS = "testing"
+CASESTUDY = "test"
+ARTIFACT = "feature"
+
+## use only when debugging this file independently
+if (!dir.exists(CF.DATA)) CF.DATA = file.path(".", "tests", "codeface-data")
+
+test_that("Cut commit and mail data to same date range.", {
+
+    ## configurations
+
+    proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, ARTIFACT)
+    data.sources = c("mails", "commits")
+
+    ## construct objects
+
+    x.data = ProjectData$new(proj.conf)
+
+    commit.data.expected = data.frame(commit.id=sprintf("<commit-%s>", c(32712,32712,32713,32713)),
+                                      date=as.POSIXct(c("2016-07-12 15:58:59","2016-07-12 15:58:59","2016-07-12 16:00:45",
+                                                        "2016-07-12 16:00:45")),
+                                      author.name=c("Claus Hunsen","Claus Hunsen","Olaf","Olaf"),
+                                      author.email=c("hunsen@fim.uni-passau.de","hunsen@fim.uni-passau.de","olaf@example.org",
+                                                     "olaf@example.org"),
+                                      hash=c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0","72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0",
+                                             "5a5ec9675e98187e1e92561e1888aa6f04faa338","5a5ec9675e98187e1e92561e1888aa6f04faa338"),
+                                      changed.files=as.integer(c(1,1,1,1)),
+                                      added.lines=as.integer(c(1,1,1,1)),
+                                      deleted.lines=as.integer(c(1,1,0,0)),
+                                      diff.size=as.integer(c(2,2,1,1)),
+                                      file=c("test.c","test.c","test.c","test.c"),
+                                      artifact=c("A","defined(A)","A","defined(A)"),
+                                      artifact.type=c("Feature","FeatureExpression","Feature","FeatureExpression"),
+                                      artifact.diff.size=as.integer(c(1,1,1,1)))
+
+    commit.data = x.data$get.data.cut.to.same.date(data.sources = data.sources)$get.commits.raw()
+
+    expect_identical(commit.data, commit.data.expected, info = "Cut Raw commit data.")
+
+})
diff --git a/tests/test-networks-cut.R b/tests/test-networks-cut.R
new file mode 100644
index 00000000..9f7007c2
--- /dev/null
+++ b/tests/test-networks-cut.R
@@ -0,0 +1,53 @@
+## (c) Christian Hechtl, 2017
+## hechtl@fim.uni-passau.de
+
+
+context("Cutting functionality on NetworkBuilder side.")
+
+##
+## Context
+##
+
+CF.DATA = file.path(".", "codeface-data")
+CF.SELECTION.PROCESS = "testing"
+CASESTUDY = "test"
+ARTIFACT = "feature"
+
+## use only when debugging this file independently
+if (!dir.exists(CF.DATA)) CF.DATA = file.path(".", "tests", "codeface-data")
+
+test_that("Cut commit and mail data to same date range.", {
+
+    ## configurations
+
+    proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, ARTIFACT)
+    net.conf = NetworkConf$new()
+    net.conf$update.value(entry = "unify.date.ranges", value = TRUE)
+
+    ## construct objects
+
+    x.data = ProjectData$new(proj.conf)
+    x = NetworkBuilder$new(x.data, net.conf)
+
+    commit.data.expected = data.frame(commit.id=sprintf("<commit-%s>", c(32712,32712,32713,32713)),
+                                      date=as.POSIXct(c("2016-07-12 15:58:59","2016-07-12 15:58:59","2016-07-12 16:00:45",
+                                                        "2016-07-12 16:00:45")),
+                                      author.name=c("Claus Hunsen","Claus Hunsen","Olaf","Olaf"),
+                                      author.email=c("hunsen@fim.uni-passau.de","hunsen@fim.uni-passau.de","olaf@example.org",
+                                                     "olaf@example.org"),
+                                      hash=c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0","72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0",
+                                             "5a5ec9675e98187e1e92561e1888aa6f04faa338","5a5ec9675e98187e1e92561e1888aa6f04faa338"),
+                                      changed.files=as.integer(c(1,1,1,1)),
+                                      added.lines=as.integer(c(1,1,1,1)),
+                                      deleted.lines=as.integer(c(1,1,0,0)),
+                                      diff.size=as.integer(c(2,2,1,1)),
+                                      file=c("test.c","test.c","test.c","test.c"),
+                                      artifact=c("A","defined(A)","A","defined(A)"),
+                                      artifact.type=c("Feature","FeatureExpression","Feature","FeatureExpression"),
+                                      artifact.diff.size=as.integer(c(1,1,1,1)))
+
+    commit.data = x$get.project.data()$get.commits.raw()
+
+    expect_identical(commit.data, commit.data.expected, info = "Cut Raw commit data.")
+
+})

From 1ec406c7dce7ad36648daf1ec36886dfc90d9ad1 Mon Sep 17 00:00:00 2001
From: Christian Hechtl <hechtl@fim.uni-passau.de>
Date: Thu, 5 Oct 2017 21:58:01 +0200
Subject: [PATCH 14/40] Add documentation to new getter

Add checking of mail data in the cutting tests.

Signed-off-by: Christian Hechtl <hechtl@fim.uni-passau.de>
---
 tests/test-data-cut.R     | 13 +++++++++++++
 tests/test-networks-cut.R | 13 +++++++++++++
 util-data.R               |  1 -
 util-networks.R           |  4 ++++
 4 files changed, 30 insertions(+), 1 deletion(-)

diff --git a/tests/test-data-cut.R b/tests/test-data-cut.R
index 07182c0c..0cf8420c 100644
--- a/tests/test-data-cut.R
+++ b/tests/test-data-cut.R
@@ -44,8 +44,21 @@ test_that("Cut commit and mail data to same date range.", {
                                       artifact.type=c("Feature","FeatureExpression","Feature","FeatureExpression"),
                                       artifact.diff.size=as.integer(c(1,1,1,1)))
 
+    mail.data.expected = data.frame(author.name=c("Thomas"),
+                                    author.email=c("thomas@example.org"),
+                                    message.id=c("<65a1sf31sagd684dfv31@mail.gmail.com>"),
+                                    date=as.POSIXct(c("2016-07-12 16:04:40")),
+                                    date.offset=as.integer(c(100)),
+                                    subject=c("Re: Fw: busybox 2 tab"),
+                                    thread=sprintf("<thread-%s>", c(9)))
+
     commit.data = x.data$get.data.cut.to.same.date(data.sources = data.sources)$get.commits.raw()
+    rownames(commit.data) = 1:nrow(commit.data)
+
+    mail.data = x.data$get.data.cut.to.same.date(data.sources = data.sources)$get.mails()
+    rownames(mail.data) = 1:nrow(mail.data)
 
     expect_identical(commit.data, commit.data.expected, info = "Cut Raw commit data.")
+    expect_identical(mail.data, mail.data.expected, info = "Cut mail data.")
 
 })
diff --git a/tests/test-networks-cut.R b/tests/test-networks-cut.R
index 9f7007c2..c7c5e4e3 100644
--- a/tests/test-networks-cut.R
+++ b/tests/test-networks-cut.R
@@ -46,8 +46,21 @@ test_that("Cut commit and mail data to same date range.", {
                                       artifact.type=c("Feature","FeatureExpression","Feature","FeatureExpression"),
                                       artifact.diff.size=as.integer(c(1,1,1,1)))
 
+    mail.data.expected = data.frame(author.name=c("Thomas"),
+                                    author.email=c("thomas@example.org"),
+                                    message.id=c("<65a1sf31sagd684dfv31@mail.gmail.com>"),
+                                    date=as.POSIXct(c("2016-07-12 16:04:40")),
+                                    date.offset=as.integer(c(100)),
+                                    subject=c("Re: Fw: busybox 2 tab"),
+                                    thread=sprintf("<thread-%s>", c(9)))
+
     commit.data = x$get.project.data()$get.commits.raw()
+    rownames(commit.data) = 1:nrow(commit.data)
+
+    mail.data = x$get.project.data()$get.mails()
+    rownames(mail.data) = 1:nrow(mail.data)
 
     expect_identical(commit.data, commit.data.expected, info = "Cut Raw commit data.")
+    expect_identical(mail.data, mail.data.expected, info = "Cut mail data.")
 
 })
diff --git a/util-data.R b/util-data.R
index 5085cc00..d65247c3 100644
--- a/util-data.R
+++ b/util-data.R
@@ -510,7 +510,6 @@ ProjectData = R6::R6Class("ProjectData",
             if(is.null(private$issues)) {
                 private$issues = read.issues(self$get.data.path.issues())
             }
-
             private$extract.timestamps(source = "issues")
 
             return(private$issues)
diff --git a/util-networks.R b/util-networks.R
index 57a31b1e..88b886bc 100644
--- a/util-networks.R
+++ b/util-networks.R
@@ -456,6 +456,10 @@ NetworkBuilder = R6::R6Class("NetworkBuilder",
             private$network.conf$update.value(entry, value)
         },
 
+        #' Get the project data Object of the NetworkBuilder.
+        #' This Method is mainly used for testing purposes at the moment.
+        #'
+        #' @return the project data object of the NetworkBuilder
         get.project.data = function() {
             return(private$proj.data)
         },

From 4c2bd70cce97a29bdb7f7d645c2ceda8337d0a3e Mon Sep 17 00:00:00 2001
From: Roger1995 <R.Noemmer@gmx.de>
Date: Tue, 10 Oct 2017 16:18:20 +0200
Subject: [PATCH 15/40] Minor changes to metrics behaviour

---
 metrics.R       | 31 ++++++++++++----------
 plot-metrics.R  | 70 -------------------------------------------------
 util-networks.R |  1 +
 3 files changed, 18 insertions(+), 84 deletions(-)
 delete mode 100644 plot-metrics.R

diff --git a/metrics.R b/metrics.R
index 66cf69c5..7c05ce27 100644
--- a/metrics.R
+++ b/metrics.R
@@ -1,15 +1,15 @@
 requireNamespace("igraph")
 
-metrics.hub.indegree = function(network, project){
-    degrees = igraph::degree(network, mode = c("in"))
+metrics.hub.degree = function(network, project){
+    degrees = igraph::degree(network, mode = c("total"))
     vertex = which.max(degrees)
-    df = data.frame("name" = names(vertex), "degree" = unname(vertex), "project" = project)
+    df = data.frame("name" = names(vertex), "degree" = unname(degrees[vertex]), "project" = project)
     return(df)
 }
 
-metrics.avg.outdegree = function(network, project) {
-    outdegrees = igraph::degree(network, mode = c("out"))
-    avg = mean(outdegrees)
+metrics.avg.degree = function(network, project) {
+    degrees = igraph::degree(network, mode = c("total"))
+    avg = mean(degrees)
     df = data.frame("project" = project, "avg.degree" = avg)
     return(df)
 }
@@ -25,12 +25,16 @@ metrics.density = function(network, project) {
 }
 
 metrics.avg.pathlength = function(network, project) {
-    return(data.frame("project" = project, "avg.pathlength" = igraph::average.path.length(network, directed = TRUE, unconnected = TRUE)))
+    return(data.frame("project" = project, "avg.pathlength" = igraph::average.path.length(network, directed = FALSE, unconnected = TRUE)))
 }
 
 metrics.clustering.coeff = function(network, project) {
-    local.cc = igraph::transitivity(network, type = "local", vids = NULL)
-    cc = mean(local.cc, na.rm = TRUE)
+    cc = igraph::transitivity(network, type = "localaverage", vids = NULL)
+    return(data.frame("project" = project, "clustering.coeff" = cc))
+}
+
+metrics.clustering.coeff.global = function(network, project) {
+    cc = igraph::transitivity(network, type = "global", vids = NULL)
     return(data.frame("project" = project, "clustering.coeff" = cc))
 }
 
@@ -48,12 +52,11 @@ metrics.amount.nodes = function(network, project) {
 metrics.smallworldness = function(network, project) {
 
     # construct Erdös-Renyi network with same number of nodes and edges as g
-    h = igraph::erdos.renyi.game(n=igraph::vcount(network), p.or.m=igraph::gsize(network), type="gnm", directed=TRUE)
+    h = igraph::erdos.renyi.game(n=igraph::vcount(network), p.or.m=igraph::gsize(network), type="gnm", directed=FALSE)
 
     ## compute clustering coefficients
     g.cc = igraph::transitivity(network)
     h.cc = igraph::transitivity(h)
-
     ## compute average shortest-path length
     g.l = igraph::average.path.length(network, unconnected = TRUE)
     h.l = igraph::average.path.length(h, unconnected = TRUE)
@@ -72,7 +75,7 @@ metrics.smallworldness = function(network, project) {
     return (data.frame("project" = project, "smallworldness" = s.delta))
 }
 
-metrics.power.law.fitting = function(network) {
+metrics.power.law.fitting = function(network, project) {
     v.degree <- sort(igraph::degree(network, mode="all"), decreasing=TRUE)
 
     ## Power-law fiting
@@ -86,7 +89,7 @@ metrics.power.law.fitting = function(network) {
     res$num.power.law = length(which(v.degree >= res$xmin))
     res$percent.power.law = 100 * (res$num.power.law / length(v.degree))
     df = data.frame(res$alpha,res$xmin,res$KS.p,res$num.power.law,res$percent.power.law)
-    return(data.frame("power.law" = names(df), "value" = c(res$alpha,res$xmin,res$KS.p,res$num.power.law,res$percent.power.law)))
+    return(data.frame("project" = project, "KS.p" = res$KS.p))
 }
 
 metrics.hierarchy = function(network) {
@@ -96,6 +99,6 @@ metrics.hierarchy = function(network) {
     degrees.without.cc = subset(degrees, !(is.nan(cluster.coeff) | cluster.coeff == 0))
     cluster.coeff = subset(cluster.coeff, !(is.nan(cluster.coeff) | cluster.coeff == 0))
 
-    return(data.frame(deg = log(degrees.without.cc), cc = log(cluster.coeff)))
+    return(data.frame(log.deg = log(degrees.without.cc), log.cc = log(cluster.coeff)))
 }
 
diff --git a/plot-metrics.R b/plot-metrics.R
deleted file mode 100644
index 2779903d..00000000
--- a/plot-metrics.R
+++ /dev/null
@@ -1,70 +0,0 @@
-
-requireNamespace("ggplot2")
-
-#plots the maximum indegree one or more projects as a bar diagram
-metrics.plot.hub.indegree = function(df) {
-    plot = ggplot2::ggplot(df, ggplot2::aes(y = degree, x = project)) +
-        ggplot2::geom_bar(stat="identity")
-    return(plot)
-}
-
-metrics.plot.avg.outdegree = function(df) {
-    plot = ggplot2::ggplot(df, ggplot2::aes(y = avg.degree, x = project)) +
-        ggplot2::geom_bar(stat="identity")
-    return(plot)
-}
-
-metrics.plot.node.degrees = function(df) {
-    plot = ggplot2::ggplot(df, ggplot2::aes(y = degree, x = name)) +
-        ggplot2::geom_bar(stat="identity")
-    return(plot)
-}
-
-metrics.plot.density = function(df) {
-    plot = ggplot2::ggplot(df, ggplot2::aes(y = density, x = project)) +
-        ggplot2::geom_bar(stat="identity")
-    return(plot)
-}
-
-metrics.plot.avg.pathlength = function(df) {
-    plot = ggplot2::ggplot(df, ggplot2::aes(y = avg.papthlength, x = project)) +
-        ggplot2::geom_bar(stat="identity")
-    return(plot)
-}
-
-metrics.plot.clustering.coeff = function(df) {
-    plot = ggplot2::ggplot(df, ggplot2::aes(y = clustering.coeff, x = project)) +
-        ggplot2::geom_bar(stat="identity")
-    return(plot)
-}
-
-metrics.plot.modularity = function(df) {
-    plot = ggplot2::ggplot(df, ggplot2::aes(y = modularity, x = project)) +
-        ggplot2::geom_bar(stat="identity")
-    return(plot)
-}
-
-metrics.plot.amount.nodes = function(df) {
-    plot = ggplot2::ggplot(df, ggplot2::aes(y = amount.nodes, x = project)) +
-        ggplot2::geom_bar(stat="identity")
-    return(plot)
-}
-
-metrics.plot.smallworldness = function(df) {
-    plot = ggplot2::ggplot(df, ggplot2::aes(y = smallworldness, x = project)) +
-        ggplot2::geom_bar(stat="identity")
-    return(plot)
-}
-
-metrics.plot.power.law.fitting = function(df) {
-
-}
-
-metrics.plot.hierarchy = function(df) {
-    pred = predict(lm(cc ~ deg, data = df))
-    plot = ggplot2::ggplot(df, ggplot2::aes(y = cc, x = deg)) +
-        ggplot2::geom_point(ggplot2::aes(color = deg)) +
-        ggplot2::geom_line(ggplot2::aes(y = pred))
-    return(plot)
-}
-
diff --git a/util-networks.R b/util-networks.R
index 47ef2f73..475ca8e9 100644
--- a/util-networks.R
+++ b/util-networks.R
@@ -388,6 +388,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder",
         reset.environment = function() {
             private$authors.network.mail = NULL
             private$authors.network.cochange = NULL
+            private$authors.network.issue = NULL
             private$artifacts.network.cochange = NULL
             private$artifacts.network.callgraph = NULL
         },

From 53a24601452e33d7fe6670b745ab1d2a7c42d8ec Mon Sep 17 00:00:00 2001
From: Christian Hechtl <hechtl@fim.uni-passau.de>
Date: Tue, 24 Oct 2017 13:58:37 +0200
Subject: [PATCH 16/40] Update pasta reading method and test

Signed-off-by: Christian Hechtl <hechtl@fim.uni-passau.de>
---
 .../results/testing/test_pasta/similar-mailbox       |  2 +-
 tests/test-read.R                                    |  5 +++--
 util-read.R                                          | 12 ++++++++----
 3 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/tests/codeface-data/results/testing/test_pasta/similar-mailbox b/tests/codeface-data/results/testing/test_pasta/similar-mailbox
index 546e1bad..ffba64c4 100644
--- a/tests/codeface-data/results/testing/test_pasta/similar-mailbox
+++ b/tests/codeface-data/results/testing/test_pasta/similar-mailbox
@@ -2,4 +2,4 @@
 <asddghdswqeasdasd@mail.gmail.com> => 5a5ec9675e98187e1e92561e1888aa6f04faa338
 <jlkjsdgihwkfjnvbjwkrbnwe@mail.gmail.com> => 3a0ed78458b3976243db6829f63eba3eead26774
 <hans1@mail.gmail.com> <hans2@mail.gmail.com> <hans3@mail.gmail.com> => 1143db502761379c2bfcecc2007fc34282e7ee61
-<saf54sd4gfasf46asf46@mail.gmail.com> => 0a1a5c523d835459c42f33e863623138555e2526
+<saf54sd4gfasf46asf46@mail.gmail.com> => 0a1a5c523d835459c42f33e863623138555e2526 72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0
diff --git a/tests/test-read.R b/tests/test-read.R
index 2cd0b0a2..87eaead2 100644
--- a/tests/test-read.R
+++ b/tests/test-read.R
@@ -155,11 +155,12 @@ test_that("Read and parse the pasta data.", {
     ## build the expected data.frame
     pasta.data.expected = data.frame(message.id=c("<adgkljsdfhkwafdkbhjasfcjn@mail.gmail.com>","<asddghdswqeasdasd@mail.gmail.com>",
                                                   "<jlkjsdgihwkfjnvbjwkrbnwe@mail.gmail.com>","<hans1@mail.gmail.com>",
-                                                  "<hans2@mail.gmail.com>","<hans3@mail.gmail.com>","<saf54sd4gfasf46asf46@mail.gmail.com>"),
+                                                  "<hans2@mail.gmail.com>","<hans3@mail.gmail.com>","<saf54sd4gfasf46asf46@mail.gmail.com>",
+                                                  "<saf54sd4gfasf46asf46@mail.gmail.com>"),
                                      commit.hash=c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0","5a5ec9675e98187e1e92561e1888aa6f04faa338",
                                                    "3a0ed78458b3976243db6829f63eba3eead26774","1143db502761379c2bfcecc2007fc34282e7ee61",
                                                    "1143db502761379c2bfcecc2007fc34282e7ee61","1143db502761379c2bfcecc2007fc34282e7ee61",
-                                                   "0a1a5c523d835459c42f33e863623138555e2526"))
+                                                   "0a1a5c523d835459c42f33e863623138555e2526", "72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0"))
 
     ## check the results
     expect_identical(pasta.data.read, pasta.data.expected, info = "PaStA data.")
diff --git a/util-read.R b/util-read.R
index 72491987..a0883576 100644
--- a/util-read.R
+++ b/util-read.R
@@ -254,7 +254,7 @@ read.authors = function(data.path) {
 ## PaStA data --------------------------------------------------------------
 
 #' Read and parse the pasta data from the 'similar-mailbox' file.
-#' The form in the file is : <message-id> <possibly another message.id> => commit.hash.
+#' The form in the file is : <message-id> <possibly another message.id> ... => commit.hash commit.hash2 ....
 #' The parsed form is a data frame with message IDs as keys and commit hashes as values.
 #'
 #' @param data.path the path to the pasta data
@@ -291,14 +291,18 @@ read.pasta = function(data.path) {
 
         # 1) split at arrow
         # 2) split keys
-        # 3) insert all key-value pairs by iteration (works also if there is only one key)
+        # 3) split values
+        # 4) insert all key-value pairs by iteration (works also if there is only one key)
         line.split = unlist(strsplit(line, SEPERATOR))
         keys = line.split[1]
-        value = line.split[2]
+        values = line.split[2]
         keys.split = unlist(strsplit(keys, KEY.SEPERATOR))
+        values.split = unlist(strsplit(values, KEY.SEPERATOR))
 
         # Transform data to data.frame
-        df = data.frame(message.id = keys.split, commit.hash = value)
+        #df = data.frame(message.id = keys.split, commit.hash = values.split)
+        df = merge(keys.split, values.split)
+        colnames(df) = c("message.id", "commit.hash")
         return(df)
     })
     result.df = plyr::rbind.fill(result.list)

From abfafb9bbac88b061cf80771407fa555ac7b9c16 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Raphael=20N=C3=B6mmer?= <R.Noemmer@gmx.de>
Date: Tue, 24 Oct 2017 14:11:10 +0200
Subject: [PATCH 17/40] Add comments to network metrics.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Raphael Nömmer <R.Noemmer@gmx.de>
---
 metrics.R   | 117 +++++++++++++++++++++++++++++++++++++++++-----------
 util-read.R |   1 -
 2 files changed, 94 insertions(+), 24 deletions(-)

diff --git a/metrics.R b/metrics.R
index 7c05ce27..1ce3c874 100644
--- a/metrics.R
+++ b/metrics.R
@@ -1,55 +1,117 @@
 requireNamespace("igraph")
 
-metrics.hub.degree = function(network, project){
+
+#' Determine the maximum degree for the given network.
+#'
+#' @param network The network to be examined
+#' @param name The name of the network
+#'
+#' @return A dataframe containing the name of the vertex with the maximum degree, the degree and
+#' the name of the network that this value belongs to.
+metrics.hub.degree = function(network, name){
     degrees = igraph::degree(network, mode = c("total"))
     vertex = which.max(degrees)
-    df = data.frame("name" = names(vertex), "degree" = unname(degrees[vertex]), "project" = project)
+    df = data.frame("name" = names(vertex), "degree" = unname(degrees[vertex]), "name" = name)
     return(df)
 }
 
-metrics.avg.degree = function(network, project) {
+#' Calculate the average degree of a network.
+#'
+#' @param network The network to be examined
+#' @param name The name of the network
+#'
+#' @return A dataframe containing the average degree of the network and the name of the network.
+metrics.avg.degree = function(network, name) {
     degrees = igraph::degree(network, mode = c("total"))
     avg = mean(degrees)
-    df = data.frame("project" = project, "avg.degree" = avg)
+    df = data.frame("name" = name, "avg.degree" = avg)
     return(df)
 }
 
+#' Calculate all node degrees for the given network
+#'
+#' @param network The network to be examined
+#'
+#' @return A dataframe containing the nodes and their respective degrees.
 metrics.node.degrees = function(network) {
     degrees = sort(igraph::degree(network, mode="total"), decreasing = TRUE)
     return(data.frame("name" = names(degrees), "degree" = unname(degrees)))
 }
 
-metrics.density = function(network, project) {
+#' Calculate the density of the given network
+#'
+#' @param network The network to be examined
+#' @param name The name of the network
+#'
+#' @return A dataframe containing the network density and the name of the network.
+metrics.density = function(network, name) {
     density = igraph::graph.density(network)
-    return(data.frame("project" = project, "density" = unname(density)))
+    return(data.frame("name" = name, "density" = unname(density)))
 }
 
-metrics.avg.pathlength = function(network, project) {
-    return(data.frame("project" = project, "avg.pathlength" = igraph::average.path.length(network, directed = FALSE, unconnected = TRUE)))
+#' Calculate the average path length for the given network.
+#'
+#' @param network The network to e examined
+#' @param name The name of the network
+#'
+#' @return A dataframe containing the average path length and the name of the network.
+metrics.avg.pathlength = function(network, name) {
+    return(data.frame("name" = name, "avg.pathlength" =
+                          igraph::average.path.length(network, directed = FALSE, unconnected = TRUE)))
 }
 
-metrics.clustering.coeff = function(network, project) {
+#' Calculate the average local clustering coefficient for the given network.
+#'
+#' @param network The network to be examined
+#' @param name The name of the network
+#'
+#' @return A dataframe containing the average local clustering coefficient and the name of the network.
+metrics.clustering.coeff = function(network, name) {
     cc = igraph::transitivity(network, type = "localaverage", vids = NULL)
-    return(data.frame("project" = project, "clustering.coeff" = cc))
+    return(data.frame("name" = name, "clustering.coeff" = cc))
 }
 
-metrics.clustering.coeff.global = function(network, project) {
+#' Calculate the global clustering coefficient for the given network.
+#'
+#' @param network The network to be examined
+#' @param name The name of the network
+#'
+#' @return A dataframe containing the global clustering coefficient of the network and the name of the network.
+metrics.clustering.coeff.global = function(network, name) {
     cc = igraph::transitivity(network, type = "global", vids = NULL)
-    return(data.frame("project" = project, "clustering.coeff" = cc))
+    return(data.frame("name" = name, "clustering.coeff" = cc))
 }
 
-metrics.modularity = function(network, project) {
+#' Calculate the modularity metric for the given network.
+#'
+#' @param network The network to be examined
+#' @param name The name of the network
+#'
+#' @return A dataframe containing the modularity value for the given network and the name of the network.
+metrics.modularity = function(network, name) {
     comm = igraph::cluster_walktrap(network)
     mod = igraph::modularity(network, igraph::membership(comm))
-    return(data.frame("project" = project, "modularity" = mod))
+    return(data.frame("name" = name, "modularity" = mod))
 }
 
-metrics.amount.nodes = function(network, project) {
-    return(data.frame("project" = project, "amount.nodes" = igraph::vcount(network)))
+#' Count the number of nodes for the given network.
+#'
+#' @param network The network to be examined
+#' @param name The name of the network
+#'
+#' @return A dataframe containing the number of nodes in the network and the name of the network.
+metrics.amount.nodes = function(network, name) {
+    return(data.frame("name" = name, "amount.nodes" = igraph::vcount(network)))
 }
 
-# requires simplified network
-metrics.smallworldness = function(network, project) {
+#' Calculate the smallworldness value for the given network.
+#' This metric requires a simplified network.
+#'
+#' @param network The network to be examined
+#' @param name The name of the network
+#'
+#' @return A dataframe containing the smallworldness value of the network and the name of the network.
+metrics.smallworldness = function(network, name) {
 
     # construct Erdös-Renyi network with same number of nodes and edges as g
     h = igraph::erdos.renyi.game(n=igraph::vcount(network), p.or.m=igraph::gsize(network), type="gnm", directed=FALSE)
@@ -72,10 +134,16 @@ metrics.smallworldness = function(network, project) {
     # if s.delta > 1, then the network is a small-world network
     #is.smallworld = ifelse(s.delta > 1, TRUE, FALSE)
 
-    return (data.frame("project" = project, "smallworldness" = s.delta))
+    return (data.frame("name" = name, "smallworldness" = s.delta))
 }
 
-metrics.power.law.fitting = function(network, project) {
+#' Determine scale freeness of a network using the power law fitting method.
+#'
+#' @param network The network to be examined
+#' @param name The name of the network
+#'
+#' @return A dataframe containing the scale freeness value of the network and the name of the network.
+metrics.scale.freeness = function(network, name) {
     v.degree <- sort(igraph::degree(network, mode="all"), decreasing=TRUE)
 
     ## Power-law fiting
@@ -89,16 +157,19 @@ metrics.power.law.fitting = function(network, project) {
     res$num.power.law = length(which(v.degree >= res$xmin))
     res$percent.power.law = 100 * (res$num.power.law / length(v.degree))
     df = data.frame(res$alpha,res$xmin,res$KS.p,res$num.power.law,res$percent.power.law)
-    return(data.frame("project" = project, "KS.p" = res$KS.p))
+    return(data.frame("name" = name, "KS.p" = res$KS.p))
 }
 
+#' Calculate the hierarchy for a network
+#'
+#' @param network The network to be examined
+#'
+#' @return A dataframe containing the logarithm of the node degree and the logarithm of the local clustering coefficient for each node.
 metrics.hierarchy = function(network) {
     degrees = igraph::degree(network, mode="total")
     cluster.coeff = igraph::transitivity(network, type = "local", vids = NULL)
-
     degrees.without.cc = subset(degrees, !(is.nan(cluster.coeff) | cluster.coeff == 0))
     cluster.coeff = subset(cluster.coeff, !(is.nan(cluster.coeff) | cluster.coeff == 0))
-
     return(data.frame(log.deg = log(degrees.without.cc), log.cc = log(cluster.coeff)))
 }
 
diff --git a/util-read.R b/util-read.R
index df9b76a5..f7066e95 100644
--- a/util-read.R
+++ b/util-read.R
@@ -348,7 +348,6 @@ read.issues = function(data.path) {
     issue.data[["is.pull.request"]] = as.logical(issue.data[["is.pull.request"]])
 
     ## convert dates and sort by 'date' column
-    print(issue.data)
     issue.data[["date"]] = as.POSIXct(issue.data[["date"]])
     issue.data[["creation.date"]] = as.POSIXct(issue.data[["creation.date"]])
     issue.data[["closing.date"]][ issue.data[["closing.date"]] == "" ] = NA

From 7a19d354c2fd99d79117afc33b35a352039ff7df Mon Sep 17 00:00:00 2001
From: Claus Hunsen <hunsen@fim.uni-passau.de>
Date: Thu, 2 Nov 2017 14:55:57 +0100
Subject: [PATCH 18/40] Fix issue-reading functionality and corresponding tests

Fix "author.email". Introduce "ref.name" column in tests (WIP and
temporary fix).

Add TODO item for fixing the test file "issues.list".

Signed-off-by: Claus Hunsen <hunsen@fim.uni-passau.de>
Signed-off-by: Christian Hechtl <hechtl@fim.uni-passau.de>
---
 .../results/testing/test_issues/issues.list   | 72 +++++++++----------
 tests/test-read.R                             |  9 ++-
 util-read.R                                   |  2 +-
 3 files changed, 43 insertions(+), 40 deletions(-)

diff --git a/tests/codeface-data/results/testing/test_issues/issues.list b/tests/codeface-data/results/testing/test_issues/issues.list
index 9c6a939a..15ce1e0e 100644
--- a/tests/codeface-data/results/testing/test_issues/issues.list
+++ b/tests/codeface-data/results/testing/test_issues/issues.list
@@ -1,36 +1,36 @@
-2;"CLOSED";"2013-04-21 23:52:09";"2013-05-25 20:02:08";"true";11;"Karl";"karl@example.org";"2013-04-21 23:52:09";"created"
-2;"CLOSED";"2013-04-21 23:52:09";"2013-05-25 20:02:08";"true";11;"Karl";"karl@example.org";"2013-05-05 23:28:57";"commented"
-2;"CLOSED";"2013-04-21 23:52:09";"2013-05-25 20:02:08";"true";1;"Olaf";"olaf@example.org";"2013-05-25 20:02:08";"referenced"
-2;"CLOSED";"2013-04-21 23:52:09";"2013-05-25 20:02:08";"true";1;"Olaf";"olaf@example.org";"2013-05-25 20:02:08";"merged"
-2;"CLOSED";"2013-04-21 23:52:09";"2013-05-25 20:02:08";"true";1;"Olaf";"olaf@example.org";"2013-05-25 20:02:08";"closed"
-2;"CLOSED";"2013-04-21 23:52:09";"2013-05-25 20:02:08";"true";11;"Karl";"karl@example.org";"2013-06-01 22:37:03";"head_ref_deleted"
-2;"CLOSED";"2013-04-21 23:52:09";"2014-05-25 20:02:08";"true";1342;"Thomas";"thomas@example.org";"2016-07-19 10:47:25";"referenced"
-48;"OPEN";"2016-04-17 02:06:38";"null";"false";15;"udo";"udo@example.org";"2016-04-17 02:07:37";"mentioned"
-48;"OPEN";"2016-04-17 02:06:38";"null";"false";15;"udo";"udo@example.org";"2016-04-17 02:07:37";"subscribed"
-48;"OPEN";"2016-04-17 02:06:38";"null";"false";1350;"Thomas";"thomas@example.org";"2016-07-14 02:03:14";"commented"
-48;"OPEN";"2016-04-17 02:06:38";"null";"false";13;"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-07-14 17:42:52";"commented"
-48;"OPEN";"2016-04-17 02:06:38";"null";"false";13;"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-07-15 08:37:57";"mentioned"
-48;"OPEN";"2016-04-17 02:06:38";"null";"false";13;"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-07-15 08:37:57";"subscribed"
-48;"OPEN";"2016-04-17 02:06:38";"null";"false";1350;"Thomas";"thomas@example.org";"2016-07-15 08:37:57";"commented"
-48;"OPEN";"2016-04-17 02:06:38";"null";"false";13;"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-07-27 22:25:25";"mentioned"
-48;"OPEN";"2016-04-17 02:06:38";"null";"false";13;"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-07-27 22:25:25";"subscribed"
-48;"OPEN";"2016-04-17 02:06:38";"null";"false";1;"Olaf";"olaf@example.org";"2016-07-27 22:25:25";"commented"
-51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";1342;"Thomas";"thomas@example.org";"2016-07-12 15:59:25";"mentioned"
-51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";1342;"Thomas";"thomas@example.org";"2016-07-12 15:59:25";"subscribed"
-51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";13;"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-07-12 15:59:25";"created"
-51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";13;"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-07-12 16:03:23";"renamed"
-51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";13;"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-07-12 16:05:47";"commented"
-51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";13;"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-08-31 18:21:48";"commented"
-51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";1;"Olaf";"olaf@example.org";"2016-10-05 01:07:46";"commented"
-51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";13;"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-10-13 15:33:56";"commented"
-51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";13;"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-12-06 14:03:42";"commented"
-51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";1;"Olaf";"olaf@example.org";"2016-12-07 15:37:02";"merged"
-51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";1;"Olaf";"olaf@example.org";"2016-12-07 15:37:02";"closed"
-51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";1;"Olaf";"olaf@example.org";"2016-12-07 15:37:21";"commented"
-51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";13;"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-12-07 15:53:02";"commented"
-57;"CLOSED";"2016-12-07 15:53:02";"2017-05-23 12:32:21";"true";13;"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-12-07 15:53:02";"created"
-57;"CLOSED";"2016-12-07 15:53:02";"2017-05-23 12:32:21";"true";13;"Claus Hunsen";"hunsen@fim.uni-passau.de";"2017-02-20 22:25:41";"commented"
-57;"CLOSED";"2016-12-07 15:53:02";"2017-05-23 12:32:21";"true";13;"Claus Hunsen";"hunsen@fim.uni-passau.de";"2017-03-02 17:30:10";"commented"
-57;"CLOSED";"2016-12-07 15:53:02";"2017-05-23 12:32:21";"true";1;"Max";"max@example.org";"2017-05-23 12:32:21";"merged"
-57;"CLOSED";"2016-12-07 15:53:02";"2017-05-23 12:32:21";"true";1;"Max";"max@example.org";"2017-05-23 12:32:21";"closed"
-57;"CLOSED";"2016-12-07 15:53:02";"2017-05-23 12:32:21";"true";1;"Max";"max@example.org";"2017-05-23 12:32:39";"commented"
+2;"CLOSED";"2013-04-21 23:52:09";"2013-05-25 20:02:08";"true";"Karl";"karl@example.org";"2013-04-21 23:52:09";;"created"
+2;"CLOSED";"2013-04-21 23:52:09";"2013-05-25 20:02:08";"true";"Karl";"karl@example.org";"2013-05-05 23:28:57";;"commented"
+2;"CLOSED";"2013-04-21 23:52:09";"2013-05-25 20:02:08";"true";"Karl";"karl@example.org";"2013-05-05 23:28:57";"Olaf";"referenced"
+2;"CLOSED";"2013-04-21 23:52:09";"2013-05-25 20:02:08";"true";"Olaf";"olaf@example.org";"2013-05-25 20:02:08";;"merged"
+2;"CLOSED";"2013-04-21 23:52:09";"2013-05-25 20:02:08";"true";"Olaf";"olaf@example.org";"2013-05-25 20:02:08";;"closed"
+2;"CLOSED";"2013-04-21 23:52:09";"2013-05-25 20:02:08";"true";"Karl";"karl@example.org";"2013-06-01 22:37:03";;"head_ref_deleted"
+2;"CLOSED";"2013-04-21 23:52:09";"2014-05-25 20:02:08";"true";"Thomas";"thomas@example.org";"2016-07-19 10:47:25";;"referenced"
+48;"OPEN";"2016-04-17 02:06:38";;"false";"udo";"udo@example.org";"2016-04-17 02:07:37";;"mentioned"
+48;"OPEN";"2016-04-17 02:06:38";;"false";"udo";"udo@example.org";"2016-04-17 02:07:37";;"subscribed"
+48;"OPEN";"2016-04-17 02:06:38";;"false";"Thomas";"thomas@example.org";"2016-07-14 02:03:14";;"commented"
+48;"OPEN";"2016-04-17 02:06:38";;"false";"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-07-14 17:42:52";;"commented"
+48;"OPEN";"2016-04-17 02:06:38";;"false";"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-07-15 08:37:57";;"mentioned"
+48;"OPEN";"2016-04-17 02:06:38";;"false";"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-07-15 08:37:57";;"subscribed"
+48;"OPEN";"2016-04-17 02:06:38";;"false";"Thomas";"thomas@example.org";"2016-07-15 08:37:57";;"commented"
+48;"OPEN";"2016-04-17 02:06:38";;"false";"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-07-27 22:25:25";;"mentioned"
+48;"OPEN";"2016-04-17 02:06:38";;"false";"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-07-27 22:25:25";;"subscribed"
+48;"OPEN";"2016-04-17 02:06:38";;"false";"Olaf";"olaf@example.org";"2016-07-27 22:25:25";;"commented"
+51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";"Thomas";"thomas@example.org";"2016-07-12 15:59:25";;"mentioned"
+51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";"Thomas";"thomas@example.org";"2016-07-12 15:59:25";;"subscribed"
+51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-07-12 15:59:25";;"created"
+51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-07-12 16:03:23";;"renamed"
+51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-07-12 16:05:47";;"commented"
+51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-08-31 18:21:48";;"commented"
+51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";"Olaf";"olaf@example.org";"2016-10-05 01:07:46";;"commented"
+51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-10-13 15:33:56";;"commented"
+51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-12-06 14:03:42";;"commented"
+51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";"Olaf";"olaf@example.org";"2016-12-07 15:37:02";;"merged"
+51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";"Olaf";"olaf@example.org";"2016-12-07 15:37:02";;"closed"
+51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";"Olaf";"olaf@example.org";"2016-12-07 15:37:21";;"commented"
+51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-12-07 15:53:02";;"commented"
+57;"CLOSED";"2016-12-07 15:53:02";"2017-05-23 12:32:21";"true";"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-12-07 15:53:02";;"created"
+57;"CLOSED";"2016-12-07 15:53:02";"2017-05-23 12:32:21";"true";"Claus Hunsen";"hunsen@fim.uni-passau.de";"2017-02-20 22:25:41";;"commented"
+57;"CLOSED";"2016-12-07 15:53:02";"2017-05-23 12:32:21";"true";"Claus Hunsen";"hunsen@fim.uni-passau.de";"2017-03-02 17:30:10";;"commented"
+57;"CLOSED";"2016-12-07 15:53:02";"2017-05-23 12:32:21";"true";"Max";"max@example.org";"2017-05-23 12:32:21";;"merged"
+57;"CLOSED";"2016-12-07 15:53:02";"2017-05-23 12:32:21";"true";"Max";"max@example.org";"2017-05-23 12:32:21";;"closed"
+57;"CLOSED";"2016-12-07 15:53:02";"2017-05-23 12:32:21";"true";"Max";"max@example.org";"2017-05-23 12:32:39";;"commented"
diff --git a/tests/test-read.R b/tests/test-read.R
index 87eaead2..3419f54d 100644
--- a/tests/test-read.R
+++ b/tests/test-read.R
@@ -167,6 +167,8 @@ test_that("Read and parse the pasta data.", {
 })
 
 test_that("Read and parse the issue data.", {
+    ## FIXME @Roger1995: update issues.list with a more recent content!
+
     ## configuration object for the datapath
     proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, ARTIFACT)
 
@@ -179,9 +181,10 @@ test_that("Read and parse the issue data.", {
                                      creation.date=as.POSIXct(rep(c("2013-04-21 23:52:09","2016-04-17 02:06:38","2016-07-12 15:59:25","2016-04-17 02:06:38","2013-04-21 23:52:09","2016-04-17 02:06:38","2016-07-12 15:59:25","2016-12-07 15:53:02"), c(6,2,5,5,1,3,8,6))),
                                      closing.date=as.POSIXct(rep(c("2013-05-25 20:02:08",NA,"2016-12-07 15:37:02",NA,"2014-05-25 20:02:08",NA,"2016-12-07 15:37:02","2017-05-23 12:32:21"), c(6,2,5,5,1,3,8,6))),
                                      is.pull.request=rep(c(TRUE,FALSE,TRUE,FALSE,TRUE,FALSE,TRUE,TRUE), c(6,2,5,5,1,3,8,6)),
-                                     author.name=c("Karl","Karl","Olaf","Olaf","Olaf","Karl","udo","udo","Thomas","Thomas","Claus Hunsen","Claus Hunsen","Claus Hunsen","Thomas","Claus Hunsen","Claus Hunsen","Claus Hunsen","Thomas","Thomas","Claus Hunsen","Claus Hunsen","Olaf","Claus Hunsen","Olaf","Claus Hunsen","Claus Hunsen","Olaf","Olaf","Olaf","Claus Hunsen","Claus Hunsen","Claus Hunsen","Claus Hunsen","Max","Max","Max"),
-                                     author.email=c("karl@example.org","karl@example.org","olaf@example.org","olaf@example.org","olaf@example.org","karl@example.org","udo@example.org","udo@example.org","thomas@example.org","thomas@example.org","hunsen@fim.uni-passau.de","hunsen@fim.uni-passau.de","hunsen@fim.uni-passau.de","thomas@example.org","hunsen@fim.uni-passau.de","hunsen@fim.uni-passau.de","hunsen@fim.uni-passau.de","thomas@example.org","thomas@example.org","hunsen@fim.uni-passau.de","hunsen@fim.uni-passau.de","olaf@example.org","hunsen@fim.uni-passau.de","olaf@example.org","hunsen@fim.uni-passau.de","hunsen@fim.uni-passau.de","olaf@example.org","olaf@example.org","olaf@example.org","hunsen@fim.uni-passau.de","hunsen@fim.uni-passau.de","hunsen@fim.uni-passau.de","hunsen@fim.uni-passau.de","max@example.org","max@example.org","max@example.org"),
-                                     date=as.POSIXct(c("2013-04-21 23:52:09","2013-05-05 23:28:57","2013-05-25 20:02:08","2013-05-25 20:02:08","2013-05-25 20:02:08","2013-06-01 22:37:03","2016-04-17 02:07:37","2016-04-17 02:07:37","2016-07-12 15:59:25","2016-07-12 15:59:25","2016-07-12 15:59:25","2016-07-12 16:03:23","2016-07-12 16:05:47","2016-07-14 02:03:14","2016-07-14 17:42:52","2016-07-15 08:37:57","2016-07-15 08:37:57","2016-07-15 08:37:57","2016-07-19 10:47:25","2016-07-27 22:25:25","2016-07-27 22:25:25","2016-07-27 22:25:25","2016-08-31 18:21:48","2016-10-05 01:07:46","2016-10-13 15:33:56","2016-12-06 14:03:42","2016-12-07 15:37:02","2016-12-07 15:37:02","2016-12-07 15:37:21","2016-12-07 15:53:02","2016-12-07 15:53:02","2017-02-20 22:25:41","2017-03-02 17:30:10","2017-05-23 12:32:21","2017-05-23 12:32:21","2017-05-23 12:32:39")),
+                                     author.name=c("Karl","Karl","Karl","Olaf","Olaf","Karl","udo","udo","Thomas","Thomas","Claus Hunsen","Claus Hunsen","Claus Hunsen","Thomas","Claus Hunsen","Claus Hunsen","Claus Hunsen","Thomas","Thomas","Claus Hunsen","Claus Hunsen","Olaf","Claus Hunsen","Olaf","Claus Hunsen","Claus Hunsen","Olaf","Olaf","Olaf","Claus Hunsen","Claus Hunsen","Claus Hunsen","Claus Hunsen","Max","Max","Max"),
+                                     author.email=c("karl@example.org","karl@example.org","karl@example.org","olaf@example.org","olaf@example.org","karl@example.org","udo@example.org","udo@example.org","thomas@example.org","thomas@example.org","hunsen@fim.uni-passau.de","hunsen@fim.uni-passau.de","hunsen@fim.uni-passau.de","thomas@example.org","hunsen@fim.uni-passau.de","hunsen@fim.uni-passau.de","hunsen@fim.uni-passau.de","thomas@example.org","thomas@example.org","hunsen@fim.uni-passau.de","hunsen@fim.uni-passau.de","olaf@example.org","hunsen@fim.uni-passau.de","olaf@example.org","hunsen@fim.uni-passau.de","hunsen@fim.uni-passau.de","olaf@example.org","olaf@example.org","olaf@example.org","hunsen@fim.uni-passau.de","hunsen@fim.uni-passau.de","hunsen@fim.uni-passau.de","hunsen@fim.uni-passau.de","max@example.org","max@example.org","max@example.org"),
+                                     date=as.POSIXct(c("2013-04-21 23:52:09","2013-05-05 23:28:57","2013-05-05 23:28:57","2013-05-25 20:02:08","2013-05-25 20:02:08","2013-06-01 22:37:03","2016-04-17 02:07:37","2016-04-17 02:07:37","2016-07-12 15:59:25","2016-07-12 15:59:25","2016-07-12 15:59:25","2016-07-12 16:03:23","2016-07-12 16:05:47","2016-07-14 02:03:14","2016-07-14 17:42:52","2016-07-15 08:37:57","2016-07-15 08:37:57","2016-07-15 08:37:57","2016-07-19 10:47:25","2016-07-27 22:25:25","2016-07-27 22:25:25","2016-07-27 22:25:25","2016-08-31 18:21:48","2016-10-05 01:07:46","2016-10-13 15:33:56","2016-12-06 14:03:42","2016-12-07 15:37:02","2016-12-07 15:37:02","2016-12-07 15:37:21","2016-12-07 15:53:02","2016-12-07 15:53:02","2017-02-20 22:25:41","2017-03-02 17:30:10","2017-05-23 12:32:21","2017-05-23 12:32:21","2017-05-23 12:32:39")),
+                                     ref.name=c(rep("", 2), "Olaf", rep("", 33)),
                                      event.name=c("created","commented","referenced","merged","closed","head_ref_deleted","mentioned","subscribed","mentioned","subscribed","created","renamed","commented","commented","commented","mentioned","subscribed","commented","referenced","mentioned","subscribed","commented","commented","commented","commented","commented","merged","closed","commented","commented","created","commented","commented","merged","closed","commented"))
     ## calculate event IDs
     issue.data.expected[["event.id"]] = sapply(
diff --git a/util-read.R b/util-read.R
index a077f73a..59032a41 100644
--- a/util-read.R
+++ b/util-read.R
@@ -340,7 +340,7 @@ read.issues = function(data.path) {
     ## set proper column names
     colnames(issue.data) = c(
         "issue.id", "issue.state", "creation.date", "closing.date", "is.pull.request", # issue information
-        "author.name", "author.mail", # author information
+        "author.name", "author.email", # author information
         "date", # the date
         "ref.name", "event.name" # the event describing the row's entry
     )

From c0e0f2a2f44252eabbd4ccbd1e9739dd7762348b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Raphael=20N=C3=B6mmer?= <R.Noemmer@gmx.de>
Date: Sat, 2 Dec 2017 16:13:03 +0100
Subject: [PATCH 19/40] Various minor changes to metrics
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Raphael Nömmer <R.Noemmer@gmx.de>
---
 metrics.R | 137 ++++++++++++++++++++++++++----------------------------
 1 file changed, 66 insertions(+), 71 deletions(-)

diff --git a/metrics.R b/metrics.R
index 1ce3c874..9315f38b 100644
--- a/metrics.R
+++ b/metrics.R
@@ -1,31 +1,41 @@
+## (c) Thomas Bock, February 2015
+## bockthom@fim.uni-passau.de
+## (c) Raphael Nömmer, 2017
+## noemmer@fim.uni-passau.de
+
+
+## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / /
+## Libraries ---------------------------------------------------------------
+
 requireNamespace("igraph")
 
+## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / /
+## Metric functions --------------------------------------------------------
+
 
 #' Determine the maximum degree for the given network.
 #'
 #' @param network The network to be examined
-#' @param name The name of the network
+#' @param mode The mode to be used for determining the degrees.
 #'
-#' @return A dataframe containing the name of the vertex with the maximum degree, the degree and
-#' the name of the network that this value belongs to.
-metrics.hub.degree = function(network, name){
-    degrees = igraph::degree(network, mode = c("total"))
+#' @return A dataframe containing the name of the vertex with with maximum degree its degree.
+metrics.hub.degree = function(network, mode){
+    degrees = igraph::degree(network, mode = c(mode))
     vertex = which.max(degrees)
-    df = data.frame("name" = names(vertex), "degree" = unname(degrees[vertex]), "name" = name)
+    df = data.frame("name" = names(vertex), "degree" = unname(degrees[vertex]))
     return(df)
 }
 
 #' Calculate the average degree of a network.
 #'
 #' @param network The network to be examined
-#' @param name The name of the network
+#' @param mode The mode to be used for determining the degrees.
 #'
-#' @return A dataframe containing the average degree of the network and the name of the network.
-metrics.avg.degree = function(network, name) {
-    degrees = igraph::degree(network, mode = c("total"))
+#' @return The average degree of the nodes in the network.
+metrics.avg.degree = function(network, mode) {
+    degrees = igraph::degree(network, mode = c(mode))
     avg = mean(degrees)
-    df = data.frame("name" = name, "avg.degree" = avg)
-    return(df)
+    return(avg)
 }
 
 #' Calculate all node degrees for the given network
@@ -38,87 +48,74 @@ metrics.node.degrees = function(network) {
     return(data.frame("name" = names(degrees), "degree" = unname(degrees)))
 }
 
-#' Calculate the density of the given network
+#' Calculate the density of the given network.
 #'
-#' @param network The network to be examined
-#' @param name The name of the network
+#' @param network The network to be examined.
 #'
-#' @return A dataframe containing the network density and the name of the network.
-metrics.density = function(network, name) {
+#' @return The density of the network.
+metrics.density = function(network) {
     density = igraph::graph.density(network)
-    return(data.frame("name" = name, "density" = unname(density)))
+    return(density)
 }
 
 #' Calculate the average path length for the given network.
 #'
-#' @param network The network to e examined
-#' @param name The name of the network
+#' @param network The network to be examined.
+#' @param directed Wehther the given network is directed or undirected.
+#' @param unconnected Whether all nodes of the network are connected.
 #'
-#' @return A dataframe containing the average path length and the name of the network.
-metrics.avg.pathlength = function(network, name) {
-    return(data.frame("name" = name, "avg.pathlength" =
-                          igraph::average.path.length(network, directed = FALSE, unconnected = TRUE)))
+#' @return The average pathlength of the given network.
+metrics.avg.pathlength = function(network, directed, unconnected) {
+    avg.pathlength = igraph::average.path.length(network, directed = directed, unconnected = unconnected)
+    return(avg.pathlength)
 }
 
 #' Calculate the average local clustering coefficient for the given network.
 #'
-#' @param network The network to be examined
-#' @param name The name of the network
-#'
-#' @return A dataframe containing the average local clustering coefficient and the name of the network.
-metrics.clustering.coeff = function(network, name) {
-    cc = igraph::transitivity(network, type = "localaverage", vids = NULL)
-    return(data.frame("name" = name, "clustering.coeff" = cc))
-}
-
-#' Calculate the global clustering coefficient for the given network.
-#'
-#' @param network The network to be examined
-#' @param name The name of the network
+#' @param network The network to be examined.
+#' @param cc.type The type of cluserting coefficient to be calculated, i.e. global or local.
 #'
-#' @return A dataframe containing the global clustering coefficient of the network and the name of the network.
-metrics.clustering.coeff.global = function(network, name) {
-    cc = igraph::transitivity(network, type = "global", vids = NULL)
-    return(data.frame("name" = name, "clustering.coeff" = cc))
+#' @return The average local clustering coefficient of the network.
+metrics.clustering.coeff = function(network, cc.type) {
+    cc = igraph::transitivity(network, type = cc.type, vids = NULL)
+    return(cc)
 }
 
 #' Calculate the modularity metric for the given network.
 #'
 #' @param network The network to be examined
-#' @param name The name of the network
+#' @param community.detection.algorithm The algorithm to be used for the detection of communities which
+#' is required for the calculation of the clustering coefficient.
 #'
-#' @return A dataframe containing the modularity value for the given network and the name of the network.
-metrics.modularity = function(network, name) {
-    comm = igraph::cluster_walktrap(network)
+#' @return The modularity value for the given network.
+metrics.modularity = function(network, community.detection.algorithm = igraph::cluster_walktrap) {
+    comm = community.detection.algorithm(network)
     mod = igraph::modularity(network, igraph::membership(comm))
     return(data.frame("name" = name, "modularity" = mod))
 }
 
-#' Count the number of nodes for the given network.
+## This function determines whether a network can be considered a
+## small-world network based on a quantitative categorical decision.
+##
+## The procedure used in this function is based on the work "Network
+## 'Small-World-Ness': A Quantitative Method for Determining Canonical
+## Network Equivalence" by Mark D. Humphries and Kevin Gurney [1].
+## [1] http://journals.plos.org/plosone/article?id=10.1371/journal.pone.0002051
+##
+## The algorithm relies on the Erdös-Renyi random network with the same number
+## of nodes and edges as the given network.
 #'
 #' @param network The network to be examined
-#' @param name The name of the network
 #'
-#' @return A dataframe containing the number of nodes in the network and the name of the network.
-metrics.amount.nodes = function(network, name) {
-    return(data.frame("name" = name, "amount.nodes" = igraph::vcount(network)))
-}
-
-#' Calculate the smallworldness value for the given network.
-#' This metric requires a simplified network.
-#'
-#' @param network The network to be examined
-#' @param name The name of the network
-#'
-#' @return A dataframe containing the smallworldness value of the network and the name of the network.
-metrics.smallworldness = function(network, name) {
+#' @return The smallworldness value of the network.
+metrics.smallworldness = function(network) {
 
     # construct Erdös-Renyi network with same number of nodes and edges as g
     h = igraph::erdos.renyi.game(n=igraph::vcount(network), p.or.m=igraph::gsize(network), type="gnm", directed=FALSE)
 
     ## compute clustering coefficients
-    g.cc = igraph::transitivity(network)
-    h.cc = igraph::transitivity(h)
+    g.cc = igraph::transitivity(network, type = 'global')
+    h.cc = igraph::transitivity(h, type = 'global')
     ## compute average shortest-path length
     g.l = igraph::average.path.length(network, unconnected = TRUE)
     h.l = igraph::average.path.length(h, unconnected = TRUE)
@@ -134,16 +131,15 @@ metrics.smallworldness = function(network, name) {
     # if s.delta > 1, then the network is a small-world network
     #is.smallworld = ifelse(s.delta > 1, TRUE, FALSE)
 
-    return (data.frame("name" = name, "smallworldness" = s.delta))
+    return ("smallworldness" = s.delta)
 }
 
 #' Determine scale freeness of a network using the power law fitting method.
 #'
 #' @param network The network to be examined
-#' @param name The name of the network
 #'
-#' @return A dataframe containing the scale freeness value of the network and the name of the network.
-metrics.scale.freeness = function(network, name) {
+#' @return A dataframe containing the different values, connected to scale-freeness.
+metrics.scale.freeness = function(network) {
     v.degree <- sort(igraph::degree(network, mode="all"), decreasing=TRUE)
 
     ## Power-law fiting
@@ -157,10 +153,10 @@ metrics.scale.freeness = function(network, name) {
     res$num.power.law = length(which(v.degree >= res$xmin))
     res$percent.power.law = 100 * (res$num.power.law / length(v.degree))
     df = data.frame(res$alpha,res$xmin,res$KS.p,res$num.power.law,res$percent.power.law)
-    return(data.frame("name" = name, "KS.p" = res$KS.p))
+    return(df)
 }
 
-#' Calculate the hierarchy for a network
+#' Calculate the hierarchy for a network.
 #'
 #' @param network The network to be examined
 #'
@@ -168,8 +164,7 @@ metrics.scale.freeness = function(network, name) {
 metrics.hierarchy = function(network) {
     degrees = igraph::degree(network, mode="total")
     cluster.coeff = igraph::transitivity(network, type = "local", vids = NULL)
-    degrees.without.cc = subset(degrees, !(is.nan(cluster.coeff) | cluster.coeff == 0))
-    cluster.coeff = subset(cluster.coeff, !(is.nan(cluster.coeff) | cluster.coeff == 0))
-    return(data.frame(log.deg = log(degrees.without.cc), log.cc = log(cluster.coeff)))
+    degrees.without.without.cluster.coeff = subset(degrees, !(is.nan(cluster.coeff) | cluster.coeff == 0))
+    return(data.frame(log.deg = log(degrees.without.without.cluster.coeff), log.cc = log(cluster.coeff)))
 }
 

From 894feeb6afd2bdc53bf9f15cd99ac9b3bb9c1a3d Mon Sep 17 00:00:00 2001
From: Roger1995 <R.Noemmer@gmx.de>
Date: Sat, 2 Dec 2017 20:40:12 +0100
Subject: [PATCH 20/40] Minor changes to metrics.R
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Raphael Nömmer <R.Noemmer@gmx.de>
---
 metrics.R | 52 ++++++++++++++++++++++++++++++----------------------
 1 file changed, 30 insertions(+), 22 deletions(-)

diff --git a/metrics.R b/metrics.R
index 9315f38b..c50c20d3 100644
--- a/metrics.R
+++ b/metrics.R
@@ -19,7 +19,7 @@ requireNamespace("igraph")
 #' @param mode The mode to be used for determining the degrees.
 #'
 #' @return A dataframe containing the name of the vertex with with maximum degree its degree.
-metrics.hub.degree = function(network, mode){
+metrics.hub.degree = function(network, modec = c("total", "in", "out")){
     degrees = igraph::degree(network, mode = c(mode))
     vertex = which.max(degrees)
     df = data.frame("name" = names(vertex), "degree" = unname(degrees[vertex]))
@@ -32,7 +32,7 @@ metrics.hub.degree = function(network, mode){
 #' @param mode The mode to be used for determining the degrees.
 #'
 #' @return The average degree of the nodes in the network.
-metrics.avg.degree = function(network, mode) {
+metrics.avg.degree = function(network, mode = c("total", "in", "out")) {
     degrees = igraph::degree(network, mode = c(mode))
     avg = mean(degrees)
     return(avg)
@@ -41,10 +41,17 @@ metrics.avg.degree = function(network, mode) {
 #' Calculate all node degrees for the given network
 #'
 #' @param network The network to be examined
+#' @param sort Whether the resulting dataframe is to be sorted by the node degree
+#' @param sort.decreasing If sorting is active, this says whether the dataframe is to be sorted
+#' in descending or ascending order.
 #'
 #' @return A dataframe containing the nodes and their respective degrees.
-metrics.node.degrees = function(network) {
-    degrees = sort(igraph::degree(network, mode="total"), decreasing = TRUE)
+metrics.node.degrees = function(network, sort = TRUE, sort.decreasing = TRUE) {
+    if(sort) {
+        degrees = sort(igraph::degree(network, mode="total"), decreasing = sort.decreasing)
+    } else {
+        igraph::degree(network, mode="total")
+    }
     return(data.frame("name" = names(degrees), "degree" = unname(degrees)))
 }
 
@@ -73,10 +80,10 @@ metrics.avg.pathlength = function(network, directed, unconnected) {
 #' Calculate the average local clustering coefficient for the given network.
 #'
 #' @param network The network to be examined.
-#' @param cc.type The type of cluserting coefficient to be calculated, i.e. global or local.
+#' @param cc.type The type of cluserting coefficient to be calculated.
 #'
-#' @return The average local clustering coefficient of the network.
-metrics.clustering.coeff = function(network, cc.type) {
+#' @return The clustering coefficient of the network.
+metrics.clustering.coeff = function(network, cc.type = c("global", "local", "barrat", "localaverage")) {
     cc = igraph::transitivity(network, type = cc.type, vids = NULL)
     return(cc)
 }
@@ -94,18 +101,18 @@ metrics.modularity = function(network, community.detection.algorithm = igraph::c
     return(data.frame("name" = name, "modularity" = mod))
 }
 
-## This function determines whether a network can be considered a
-## small-world network based on a quantitative categorical decision.
-##
-## The procedure used in this function is based on the work "Network
-## 'Small-World-Ness': A Quantitative Method for Determining Canonical
-## Network Equivalence" by Mark D. Humphries and Kevin Gurney [1].
-## [1] http://journals.plos.org/plosone/article?id=10.1371/journal.pone.0002051
-##
-## The algorithm relies on the Erdös-Renyi random network with the same number
-## of nodes and edges as the given network.
+#' This function determines whether a network can be considered a
+#' small-world network based on a quantitative categorical decision.
 #'
-#' @param network The network to be examined
+#' The procedure used in this function is based on the work "Network
+#' 'Small-World-Ness': A Quantitative Method for Determining Canonical
+#' Network Equivalence" by Mark D. Humphries and Kevin Gurney [1].
+#' [1] http://journals.plos.org/plosone/article?id=10.1371/journal.pone.0002051
+#'
+#' The algorithm relies on the Erdös-Renyi random network with the same number
+#' of nodes and edges as the given network.
+#'
+#' @param network The network to be examined. This network needs to be simplified for the calculation to work.
 #'
 #' @return The smallworldness value of the network.
 metrics.smallworldness = function(network) {
@@ -113,14 +120,14 @@ metrics.smallworldness = function(network) {
     # construct Erdös-Renyi network with same number of nodes and edges as g
     h = igraph::erdos.renyi.game(n=igraph::vcount(network), p.or.m=igraph::gsize(network), type="gnm", directed=FALSE)
 
-    ## compute clustering coefficients
+    # compute clustering coefficients
     g.cc = igraph::transitivity(network, type = 'global')
     h.cc = igraph::transitivity(h, type = 'global')
-    ## compute average shortest-path length
+    # compute average shortest-path length
     g.l = igraph::average.path.length(network, unconnected = TRUE)
     h.l = igraph::average.path.length(h, unconnected = TRUE)
 
-    ## binary decision
+    # binary decision
     # intermediate variables
     gamma = g.cc / h.cc
     lambda = g.l / h.l
@@ -164,7 +171,8 @@ metrics.scale.freeness = function(network) {
 metrics.hierarchy = function(network) {
     degrees = igraph::degree(network, mode="total")
     cluster.coeff = igraph::transitivity(network, type = "local", vids = NULL)
-    degrees.without.without.cluster.coeff = subset(degrees, !(is.nan(cluster.coeff) | cluster.coeff == 0))
+    degrees.without.cluster.coeff = subset(degrees, !(is.nan(cluster.coeff) | cluster.coeff == 0))
+    cluster.coeff = subset(cluster.coeff, !(is.nan(cluster.coeff) | cluster.coeff == 0))
     return(data.frame(log.deg = log(degrees.without.without.cluster.coeff), log.cc = log(cluster.coeff)))
 }
 

From ea0a252f14cae4637475ce5df30698d4135ac39e Mon Sep 17 00:00:00 2001
From: Roger1995 <R.Noemmer@gmx.de>
Date: Sat, 2 Dec 2017 21:18:14 +0100
Subject: [PATCH 21/40] Update issues.list test file
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Raphael Nömmer <R.Noemmer@gmx.de>
---
 .../results/testing/test_issues/issues.list   | 70 +++++++++----------
 1 file changed, 35 insertions(+), 35 deletions(-)

diff --git a/tests/codeface-data/results/testing/test_issues/issues.list b/tests/codeface-data/results/testing/test_issues/issues.list
index 15ce1e0e..898bec9a 100644
--- a/tests/codeface-data/results/testing/test_issues/issues.list
+++ b/tests/codeface-data/results/testing/test_issues/issues.list
@@ -1,36 +1,36 @@
-2;"CLOSED";"2013-04-21 23:52:09";"2013-05-25 20:02:08";"true";"Karl";"karl@example.org";"2013-04-21 23:52:09";;"created"
-2;"CLOSED";"2013-04-21 23:52:09";"2013-05-25 20:02:08";"true";"Karl";"karl@example.org";"2013-05-05 23:28:57";;"commented"
-2;"CLOSED";"2013-04-21 23:52:09";"2013-05-25 20:02:08";"true";"Karl";"karl@example.org";"2013-05-05 23:28:57";"Olaf";"referenced"
-2;"CLOSED";"2013-04-21 23:52:09";"2013-05-25 20:02:08";"true";"Olaf";"olaf@example.org";"2013-05-25 20:02:08";;"merged"
-2;"CLOSED";"2013-04-21 23:52:09";"2013-05-25 20:02:08";"true";"Olaf";"olaf@example.org";"2013-05-25 20:02:08";;"closed"
-2;"CLOSED";"2013-04-21 23:52:09";"2013-05-25 20:02:08";"true";"Karl";"karl@example.org";"2013-06-01 22:37:03";;"head_ref_deleted"
-2;"CLOSED";"2013-04-21 23:52:09";"2014-05-25 20:02:08";"true";"Thomas";"thomas@example.org";"2016-07-19 10:47:25";;"referenced"
-48;"OPEN";"2016-04-17 02:06:38";;"false";"udo";"udo@example.org";"2016-04-17 02:07:37";;"mentioned"
-48;"OPEN";"2016-04-17 02:06:38";;"false";"udo";"udo@example.org";"2016-04-17 02:07:37";;"subscribed"
+2;"CLOSED";"2013-04-21 23:52:09";"2013-05-25 20:02:08";"true";"Karl";"karl@example.org";"2013-04-21 23:52:09";"";"created"
+2;"CLOSED";"2013-04-21 23:52:09";"2013-05-25 20:02:08";"true";"Karl";"karl@example.org";"2013-05-05 23:28:57";"";"commented"
+2;"CLOSED";"2013-04-21 23:52:09";"2013-05-25 20:02:08";"true";"Karl";"karl@example.org";"2013-05-05 23:28:57";"";"referenced"
+2;"CLOSED";"2013-04-21 23:52:09";"2013-05-25 20:02:08";"true";"Olaf";"olaf@example.org";"2013-05-25 20:02:08";"";"merged"
+2;"CLOSED";"2013-04-21 23:52:09";"2013-05-25 20:02:08";"true";"Olaf";"olaf@example.org";"2013-05-25 20:02:08";"";"closed"
+2;"CLOSED";"2013-04-21 23:52:09";"2013-05-25 20:02:08";"true";"Karl";"karl@example.org";"2013-06-01 22:37:03";"";"head_ref_deleted"
+2;"CLOSED";"2013-04-21 23:52:09";"2014-05-25 20:02:08";"true";"Thomas";"thomas@example.org";"2016-07-19 10:47:25";"";"referenced"
+48;"OPEN";"2016-04-17 02:06:38";;"false";"udo";"udo@example.org";"2016-04-17 02:07:37";"Karl";"mentioned"
+48;"OPEN";"2016-04-17 02:06:38";;"false";"udo";"udo@example.org";"2016-04-17 02:07:37";"Karl";"subscribed"
 48;"OPEN";"2016-04-17 02:06:38";;"false";"Thomas";"thomas@example.org";"2016-07-14 02:03:14";;"commented"
-48;"OPEN";"2016-04-17 02:06:38";;"false";"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-07-14 17:42:52";;"commented"
-48;"OPEN";"2016-04-17 02:06:38";;"false";"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-07-15 08:37:57";;"mentioned"
-48;"OPEN";"2016-04-17 02:06:38";;"false";"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-07-15 08:37:57";;"subscribed"
-48;"OPEN";"2016-04-17 02:06:38";;"false";"Thomas";"thomas@example.org";"2016-07-15 08:37:57";;"commented"
-48;"OPEN";"2016-04-17 02:06:38";;"false";"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-07-27 22:25:25";;"mentioned"
-48;"OPEN";"2016-04-17 02:06:38";;"false";"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-07-27 22:25:25";;"subscribed"
-48;"OPEN";"2016-04-17 02:06:38";;"false";"Olaf";"olaf@example.org";"2016-07-27 22:25:25";;"commented"
-51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";"Thomas";"thomas@example.org";"2016-07-12 15:59:25";;"mentioned"
-51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";"Thomas";"thomas@example.org";"2016-07-12 15:59:25";;"subscribed"
-51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-07-12 15:59:25";;"created"
-51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-07-12 16:03:23";;"renamed"
-51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-07-12 16:05:47";;"commented"
-51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-08-31 18:21:48";;"commented"
-51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";"Olaf";"olaf@example.org";"2016-10-05 01:07:46";;"commented"
-51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-10-13 15:33:56";;"commented"
-51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-12-06 14:03:42";;"commented"
-51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";"Olaf";"olaf@example.org";"2016-12-07 15:37:02";;"merged"
-51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";"Olaf";"olaf@example.org";"2016-12-07 15:37:02";;"closed"
-51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";"Olaf";"olaf@example.org";"2016-12-07 15:37:21";;"commented"
-51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-12-07 15:53:02";;"commented"
-57;"CLOSED";"2016-12-07 15:53:02";"2017-05-23 12:32:21";"true";"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-12-07 15:53:02";;"created"
-57;"CLOSED";"2016-12-07 15:53:02";"2017-05-23 12:32:21";"true";"Claus Hunsen";"hunsen@fim.uni-passau.de";"2017-02-20 22:25:41";;"commented"
-57;"CLOSED";"2016-12-07 15:53:02";"2017-05-23 12:32:21";"true";"Claus Hunsen";"hunsen@fim.uni-passau.de";"2017-03-02 17:30:10";;"commented"
-57;"CLOSED";"2016-12-07 15:53:02";"2017-05-23 12:32:21";"true";"Max";"max@example.org";"2017-05-23 12:32:21";;"merged"
-57;"CLOSED";"2016-12-07 15:53:02";"2017-05-23 12:32:21";"true";"Max";"max@example.org";"2017-05-23 12:32:21";;"closed"
-57;"CLOSED";"2016-12-07 15:53:02";"2017-05-23 12:32:21";"true";"Max";"max@example.org";"2017-05-23 12:32:39";;"commented"
+48;"OPEN";"2016-04-17 02:06:38";;"false";"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-07-14 17:42:52";"";"commented"
+48;"OPEN";"2016-04-17 02:06:38";;"false";"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-07-15 08:37:57";"Thomas";"mentioned"
+48;"OPEN";"2016-04-17 02:06:38";;"false";"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-07-15 08:37:57";"Thomas";"subscribed"
+48;"OPEN";"2016-04-17 02:06:38";;"false";"Thomas";"thomas@example.org";"2016-07-15 08:37:57";"";"commented"
+48;"OPEN";"2016-04-17 02:06:38";;"false";"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-07-27 22:25:25";"udo";"mentioned"
+48;"OPEN";"2016-04-17 02:06:38";;"false";"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-07-27 22:25:25";"udo";"subscribed"
+48;"OPEN";"2016-04-17 02:06:38";;"false";"Olaf";"olaf@example.org";"2016-07-27 22:25:25";"";"commented"
+51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";"Thomas";"thomas@example.org";"2016-07-12 15:59:25";"Claus Hunsen";"mentioned"
+51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";"Thomas";"thomas@example.org";"2016-07-12 15:59:25";"Claus Hunsen";"subscribed"
+51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-07-12 15:59:25";"";"created"
+51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-07-12 16:03:23";"";"renamed"
+51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-07-12 16:05:47";"";"commented"
+51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-08-31 18:21:48";"";"commented"
+51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";"Olaf";"olaf@example.org";"2016-10-05 01:07:46";"";"commented"
+51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-10-13 15:33:56";"";"commented"
+51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-12-06 14:03:42";"";"commented"
+51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";"Olaf";"olaf@example.org";"2016-12-07 15:37:02";"";"merged"
+51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";"Olaf";"olaf@example.org";"2016-12-07 15:37:02";"";"closed"
+51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";"Olaf";"olaf@example.org";"2016-12-07 15:37:21";"";"commented"
+51;"CLOSED";"2016-07-12 15:59:25";"2016-12-07 15:37:02";"true";"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-12-07 15:53:02";"";"commented"
+57;"CLOSED";"2016-12-07 15:53:02";"2017-05-23 12:32:21";"true";"Claus Hunsen";"hunsen@fim.uni-passau.de";"2016-12-07 15:53:02";"";"created"
+57;"CLOSED";"2016-12-07 15:53:02";"2017-05-23 12:32:21";"true";"Claus Hunsen";"hunsen@fim.uni-passau.de";"2017-02-20 22:25:41";"";"commented"
+57;"CLOSED";"2016-12-07 15:53:02";"2017-05-23 12:32:21";"true";"Claus Hunsen";"hunsen@fim.uni-passau.de";"2017-03-02 17:30:10";"";"commented"
+57;"CLOSED";"2016-12-07 15:53:02";"2017-05-23 12:32:21";"true";"Max";"max@example.org";"2017-05-23 12:32:21";"";"merged"
+57;"CLOSED";"2016-12-07 15:53:02";"2017-05-23 12:32:21";"true";"Max";"max@example.org";"2017-05-23 12:32:21";"";"closed"
+57;"CLOSED";"2016-12-07 15:53:02";"2017-05-23 12:32:21";"true";"Max";"max@example.org";"2017-05-23 12:32:39";"";"commented"

From c8de29596686593f6b5bda4c9f2fd801209a548c Mon Sep 17 00:00:00 2001
From: Roger1995 <R.Noemmer@gmx.de>
Date: Sat, 2 Dec 2017 21:33:39 +0100
Subject: [PATCH 22/40] Change issue reading test according to issues.list
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Raphael Nömmer <R.Noemmer@gmx.de>
---
 tests/test-read.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test-read.R b/tests/test-read.R
index 3419f54d..4decebd1 100644
--- a/tests/test-read.R
+++ b/tests/test-read.R
@@ -184,7 +184,7 @@ test_that("Read and parse the issue data.", {
                                      author.name=c("Karl","Karl","Karl","Olaf","Olaf","Karl","udo","udo","Thomas","Thomas","Claus Hunsen","Claus Hunsen","Claus Hunsen","Thomas","Claus Hunsen","Claus Hunsen","Claus Hunsen","Thomas","Thomas","Claus Hunsen","Claus Hunsen","Olaf","Claus Hunsen","Olaf","Claus Hunsen","Claus Hunsen","Olaf","Olaf","Olaf","Claus Hunsen","Claus Hunsen","Claus Hunsen","Claus Hunsen","Max","Max","Max"),
                                      author.email=c("karl@example.org","karl@example.org","karl@example.org","olaf@example.org","olaf@example.org","karl@example.org","udo@example.org","udo@example.org","thomas@example.org","thomas@example.org","hunsen@fim.uni-passau.de","hunsen@fim.uni-passau.de","hunsen@fim.uni-passau.de","thomas@example.org","hunsen@fim.uni-passau.de","hunsen@fim.uni-passau.de","hunsen@fim.uni-passau.de","thomas@example.org","thomas@example.org","hunsen@fim.uni-passau.de","hunsen@fim.uni-passau.de","olaf@example.org","hunsen@fim.uni-passau.de","olaf@example.org","hunsen@fim.uni-passau.de","hunsen@fim.uni-passau.de","olaf@example.org","olaf@example.org","olaf@example.org","hunsen@fim.uni-passau.de","hunsen@fim.uni-passau.de","hunsen@fim.uni-passau.de","hunsen@fim.uni-passau.de","max@example.org","max@example.org","max@example.org"),
                                      date=as.POSIXct(c("2013-04-21 23:52:09","2013-05-05 23:28:57","2013-05-05 23:28:57","2013-05-25 20:02:08","2013-05-25 20:02:08","2013-06-01 22:37:03","2016-04-17 02:07:37","2016-04-17 02:07:37","2016-07-12 15:59:25","2016-07-12 15:59:25","2016-07-12 15:59:25","2016-07-12 16:03:23","2016-07-12 16:05:47","2016-07-14 02:03:14","2016-07-14 17:42:52","2016-07-15 08:37:57","2016-07-15 08:37:57","2016-07-15 08:37:57","2016-07-19 10:47:25","2016-07-27 22:25:25","2016-07-27 22:25:25","2016-07-27 22:25:25","2016-08-31 18:21:48","2016-10-05 01:07:46","2016-10-13 15:33:56","2016-12-06 14:03:42","2016-12-07 15:37:02","2016-12-07 15:37:02","2016-12-07 15:37:21","2016-12-07 15:53:02","2016-12-07 15:53:02","2017-02-20 22:25:41","2017-03-02 17:30:10","2017-05-23 12:32:21","2017-05-23 12:32:21","2017-05-23 12:32:39")),
-                                     ref.name=c(rep("", 2), "Olaf", rep("", 33)),
+                                     ref.name=c(rep("", 7), rep("Karl", 2), rep("", 2), rep("Thomas", 2), "", rep("udo", 2), "", rep("Claus Hunsen", 2), rep("", 17)),
                                      event.name=c("created","commented","referenced","merged","closed","head_ref_deleted","mentioned","subscribed","mentioned","subscribed","created","renamed","commented","commented","commented","mentioned","subscribed","commented","referenced","mentioned","subscribed","commented","commented","commented","commented","commented","merged","closed","commented","commented","created","commented","commented","merged","closed","commented"))
     ## calculate event IDs
     issue.data.expected[["event.id"]] = sapply(

From 9672a708fba12df2b478495cc3758611a9c3a0a2 Mon Sep 17 00:00:00 2001
From: Raphael <R.Noemmer@gmx.de>
Date: Tue, 5 Dec 2017 16:26:42 +0100
Subject: [PATCH 23/40] Rename metrics, add parameter check in metrics
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Raphael Nömmer <R.Noemmer@gmx.de>
---
 util-init.R                 | 1 +
 metrics.R => util-metrics.R | 7 +++++--
 2 files changed, 6 insertions(+), 2 deletions(-)
 rename metrics.R => util-metrics.R (96%)

diff --git a/util-init.R b/util-init.R
index dcd270df..21aa535a 100644
--- a/util-init.R
+++ b/util-init.R
@@ -23,3 +23,4 @@ source("util-motifs.R")
 source("util-bulk.R")
 source("util-plot.R")
 source("util-core-peripheral.R")
+source("util-metrics.R")
diff --git a/metrics.R b/util-metrics.R
similarity index 96%
rename from metrics.R
rename to util-metrics.R
index c50c20d3..00893a1f 100644
--- a/metrics.R
+++ b/util-metrics.R
@@ -19,7 +19,8 @@ requireNamespace("igraph")
 #' @param mode The mode to be used for determining the degrees.
 #'
 #' @return A dataframe containing the name of the vertex with with maximum degree its degree.
-metrics.hub.degree = function(network, modec = c("total", "in", "out")){
+metrics.hub.degree = function(network, mode = c("total", "in", "out")){
+    match.arg(mode)
     degrees = igraph::degree(network, mode = c(mode))
     vertex = which.max(degrees)
     df = data.frame("name" = names(vertex), "degree" = unname(degrees[vertex]))
@@ -33,6 +34,7 @@ metrics.hub.degree = function(network, modec = c("total", "in", "out")){
 #'
 #' @return The average degree of the nodes in the network.
 metrics.avg.degree = function(network, mode = c("total", "in", "out")) {
+    match.arg(mode)
     degrees = igraph::degree(network, mode = c(mode))
     avg = mean(degrees)
     return(avg)
@@ -84,6 +86,7 @@ metrics.avg.pathlength = function(network, directed, unconnected) {
 #'
 #' @return The clustering coefficient of the network.
 metrics.clustering.coeff = function(network, cc.type = c("global", "local", "barrat", "localaverage")) {
+    match.arg(cc.type)
     cc = igraph::transitivity(network, type = cc.type, vids = NULL)
     return(cc)
 }
@@ -173,6 +176,6 @@ metrics.hierarchy = function(network) {
     cluster.coeff = igraph::transitivity(network, type = "local", vids = NULL)
     degrees.without.cluster.coeff = subset(degrees, !(is.nan(cluster.coeff) | cluster.coeff == 0))
     cluster.coeff = subset(cluster.coeff, !(is.nan(cluster.coeff) | cluster.coeff == 0))
-    return(data.frame(log.deg = log(degrees.without.without.cluster.coeff), log.cc = log(cluster.coeff)))
+    return(data.frame(log.deg = log(degrees.without.cluster.coeff), log.cc = log(cluster.coeff)))
 }
 

From b433115f7d70ddb776eac33472dcdc09aaf49461 Mon Sep 17 00:00:00 2001
From: Raphael <R.Noemmer@gmx.de>
Date: Tue, 5 Dec 2017 19:25:54 +0100
Subject: [PATCH 24/40] Rename util-metrics.R to util-network-metrics.R
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Raphael Nömmer <R.Noemmer@gmx.de>
---
 util-metrics.R => util-network-metrics.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
 rename util-metrics.R => util-network-metrics.R (98%)

diff --git a/util-metrics.R b/util-network-metrics.R
similarity index 98%
rename from util-metrics.R
rename to util-network-metrics.R
index 00893a1f..9e0affc8 100644
--- a/util-metrics.R
+++ b/util-network-metrics.R
@@ -150,7 +150,7 @@ metrics.smallworldness = function(network) {
 #'
 #' @return A dataframe containing the different values, connected to scale-freeness.
 metrics.scale.freeness = function(network) {
-    v.degree <- sort(igraph::degree(network, mode="all"), decreasing=TRUE)
+    v.degree <- sort(igraph::degree(network, mode="total"), decreasing=TRUE)
 
     ## Power-law fiting
     ## (from  Mitchell Joblin <mitchell.joblin.ext@siemens.com>, Siemens AG,  2012, 2013)

From 187d220ebc28c71879d9181fa2077467fc6a3302 Mon Sep 17 00:00:00 2001
From: Raphael <R.Noemmer@gmx.de>
Date: Tue, 5 Dec 2017 19:29:41 +0100
Subject: [PATCH 25/40] Adjust documentation of metrics to follow guidelines
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Raphael Nömmer <R.Noemmer@gmx.de>
---
 util-network-metrics.R | 43 +++++++++++++++++++++---------------------
 1 file changed, 22 insertions(+), 21 deletions(-)

diff --git a/util-network-metrics.R b/util-network-metrics.R
index 9e0affc8..55331fe0 100644
--- a/util-network-metrics.R
+++ b/util-network-metrics.R
@@ -15,8 +15,8 @@ requireNamespace("igraph")
 
 #' Determine the maximum degree for the given network.
 #'
-#' @param network The network to be examined
-#' @param mode The mode to be used for determining the degrees.
+#' @param network the network to be examined
+#' @param mode the mode to be used for determining the degrees
 #'
 #' @return A dataframe containing the name of the vertex with with maximum degree its degree.
 metrics.hub.degree = function(network, mode = c("total", "in", "out")){
@@ -29,8 +29,8 @@ metrics.hub.degree = function(network, mode = c("total", "in", "out")){
 
 #' Calculate the average degree of a network.
 #'
-#' @param network The network to be examined
-#' @param mode The mode to be used for determining the degrees.
+#' @param network the network to be examined
+#' @param mode the mode to be used for determining the degrees
 #'
 #' @return The average degree of the nodes in the network.
 metrics.avg.degree = function(network, mode = c("total", "in", "out")) {
@@ -42,10 +42,10 @@ metrics.avg.degree = function(network, mode = c("total", "in", "out")) {
 
 #' Calculate all node degrees for the given network
 #'
-#' @param network The network to be examined
-#' @param sort Whether the resulting dataframe is to be sorted by the node degree
-#' @param sort.decreasing If sorting is active, this says whether the dataframe is to be sorted
-#' in descending or ascending order.
+#' @param network the network to be examined
+#' @param sort whether the resulting dataframe is to be sorted by the node degree
+#' @param sort.decreasing if sorting is active, this says whether the dataframe is to be sorted
+#' in descending or ascending order
 #'
 #' @return A dataframe containing the nodes and their respective degrees.
 metrics.node.degrees = function(network, sort = TRUE, sort.decreasing = TRUE) {
@@ -59,7 +59,7 @@ metrics.node.degrees = function(network, sort = TRUE, sort.decreasing = TRUE) {
 
 #' Calculate the density of the given network.
 #'
-#' @param network The network to be examined.
+#' @param network the network to be examined
 #'
 #' @return The density of the network.
 metrics.density = function(network) {
@@ -69,9 +69,9 @@ metrics.density = function(network) {
 
 #' Calculate the average path length for the given network.
 #'
-#' @param network The network to be examined.
-#' @param directed Wehther the given network is directed or undirected.
-#' @param unconnected Whether all nodes of the network are connected.
+#' @param network the network to be examined
+#' @param directed wehther the given network is directed or undirected
+#' @param unconnected whether all nodes of the network are connected
 #'
 #' @return The average pathlength of the given network.
 metrics.avg.pathlength = function(network, directed, unconnected) {
@@ -81,8 +81,8 @@ metrics.avg.pathlength = function(network, directed, unconnected) {
 
 #' Calculate the average local clustering coefficient for the given network.
 #'
-#' @param network The network to be examined.
-#' @param cc.type The type of cluserting coefficient to be calculated.
+#' @param network the network to be examined
+#' @param cc.type the type of cluserting coefficient to be calculated
 #'
 #' @return The clustering coefficient of the network.
 metrics.clustering.coeff = function(network, cc.type = c("global", "local", "barrat", "localaverage")) {
@@ -93,9 +93,9 @@ metrics.clustering.coeff = function(network, cc.type = c("global", "local", "bar
 
 #' Calculate the modularity metric for the given network.
 #'
-#' @param network The network to be examined
-#' @param community.detection.algorithm The algorithm to be used for the detection of communities which
-#' is required for the calculation of the clustering coefficient.
+#' @param network the network to be examined
+#' @param community.detection.algorithm the algorithm to be used for the detection of communities which
+#' is required for the calculation of the clustering coefficient
 #'
 #' @return The modularity value for the given network.
 metrics.modularity = function(network, community.detection.algorithm = igraph::cluster_walktrap) {
@@ -115,7 +115,7 @@ metrics.modularity = function(network, community.detection.algorithm = igraph::c
 #' The algorithm relies on the Erdös-Renyi random network with the same number
 #' of nodes and edges as the given network.
 #'
-#' @param network The network to be examined. This network needs to be simplified for the calculation to work.
+#' @param network the network to be examined. This network needs to be simplified for the calculation to work
 #'
 #' @return The smallworldness value of the network.
 metrics.smallworldness = function(network) {
@@ -146,7 +146,7 @@ metrics.smallworldness = function(network) {
 
 #' Determine scale freeness of a network using the power law fitting method.
 #'
-#' @param network The network to be examined
+#' @param network the network to be examined
 #'
 #' @return A dataframe containing the different values, connected to scale-freeness.
 metrics.scale.freeness = function(network) {
@@ -168,9 +168,10 @@ metrics.scale.freeness = function(network) {
 
 #' Calculate the hierarchy for a network.
 #'
-#' @param network The network to be examined
+#' @param network the network to be examined
 #'
-#' @return A dataframe containing the logarithm of the node degree and the logarithm of the local clustering coefficient for each node.
+#' @return A dataframe containing the logarithm of the node degree and the logarithm
+#' of the local clustering coefficient for each node.
 metrics.hierarchy = function(network) {
     degrees = igraph::degree(network, mode="total")
     cluster.coeff = igraph::transitivity(network, type = "local", vids = NULL)

From ddb93be9717d8349b45b794e98f5b5f015f8ae2f Mon Sep 17 00:00:00 2001
From: Raphael <R.Noemmer@gmx.de>
Date: Tue, 5 Dec 2017 23:33:39 +0100
Subject: [PATCH 26/40] Rename metrics file
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Raphael Nömmer <R.Noemmer@gmx.de>
---
 util-network-metrics.R => util-networks-metrics.R | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename util-network-metrics.R => util-networks-metrics.R (100%)

diff --git a/util-network-metrics.R b/util-networks-metrics.R
similarity index 100%
rename from util-network-metrics.R
rename to util-networks-metrics.R

From d4b53df20e6d2d89a0004810716cd749e7d57967 Mon Sep 17 00:00:00 2001
From: Raphael <R.Noemmer@gmx.de>
Date: Wed, 6 Dec 2017 23:41:17 +0100
Subject: [PATCH 27/40] Update filename in util-init.R
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Raphael Nömmer <R.Noemmer@gmx.de>
---
 util-init.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/util-init.R b/util-init.R
index 21aa535a..0ab03439 100644
--- a/util-init.R
+++ b/util-init.R
@@ -23,4 +23,4 @@ source("util-motifs.R")
 source("util-bulk.R")
 source("util-plot.R")
 source("util-core-peripheral.R")
-source("util-metrics.R")
+source("util-networks-metrics.R")

From 52dd2e314728ca32ebad2ae856b7b3b3dc290837 Mon Sep 17 00:00:00 2001
From: Claus Hunsen <hunsen@fim.uni-passau.de>
Date: Mon, 11 Dec 2017 14:41:50 +0100
Subject: [PATCH 28/40] Fix indentation to avoid merge conflict

To avoid a merge conflict in PR #78, we fix the indentation of a
statement in the file 'util-read.R'.

Signed-off-by: Claus Hunsen <hunsen@fim.uni-passau.de>
---
 util-read.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/util-read.R b/util-read.R
index 59032a41..0a1db07b 100644
--- a/util-read.R
+++ b/util-read.R
@@ -328,7 +328,7 @@ read.issues = function(data.path) {
 
     ## read issues from disk [can be empty]
     issue.data = try(read.table(filepath, header = FALSE, sep = ";", strip.white = TRUE,
-                                   encoding = "UTF-8"), silent = TRUE)
+                                encoding = "UTF-8"), silent = TRUE)
 
     ## handle the case that the list of commits is empty
     if (inherits(issue.data, 'try-error')) {

From 7bfbe8403d6fdfb76d8856f3c2885028958ff12d Mon Sep 17 00:00:00 2001
From: Claus Hunsen <hunsen@fim.uni-passau.de>
Date: Mon, 11 Dec 2017 16:24:02 +0100
Subject: [PATCH 29/40] Always add e-mail addresses in author data

We now always have e-mail-address data available for authors,
independent of the real data source containing those. If there is no
data available, we add NAs.

This is a follow-up for issue #69 and PR #71.

Signed-off-by: Claus Hunsen <hunsen@fim.uni-passau.de>
---
 util-read.R | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/util-read.R b/util-read.R
index b67099ec..7578eb44 100644
--- a/util-read.R
+++ b/util-read.R
@@ -229,15 +229,15 @@ read.authors = function(data.path) {
         stop("Stopped due to missing authors.")
     }
 
+    ## if there is no third column, we need to add e-mail-address dummy data (NAs)
+    if (ncol(authors.df) != 3) {
+        authors.df[3] = NA
+    }
+
     ## set proper column names based on Codeface extraction:
     ##
     ## SELECT a.name AS authorName, a.email1, m.creationDate, m.subject, m.threadId
-    cols.names = c("author.id", "author.name")
-    ## if there is a third column, we have e-mail-address data available
-    if (ncol(authors.df) == 3) {
-        cols.names = c(cols.names, "author.email")
-    }
-    colnames(authors.df) = cols.names
+    colnames(authors.df) = c("author.id", "author.name", "author.email")
 
     ## store the ID--author mapping
     logging::logdebug("read.authors: finished.")

From a803425e6bdb54c1654fb9de1f9375499e3aa829 Mon Sep 17 00:00:00 2001
From: Claus Hunsen <hunsen@fim.uni-passau.de>
Date: Tue, 12 Dec 2017 09:57:51 +0100
Subject: [PATCH 30/40] Change 'commits.raw' to 'commits'

For easier internal use of data-source names (i.e., "commits", "issues",
and "mails"), the data item containing the commit data is now called
"commits" -- and not "commits.raw" anymore. All corresponding methods
and method calls are renamed accordingly.

This change will make it easier to handle data sources by their specific
name, e.g., when performing a parameterizable subset
'proj.data[[data.source.name]]'.

Note: The methods 'ProjectData$get.commits.raw()' and
'ProjectData$set.commits.raw()' are still there for compatibility
reasons. They are now mere delegates to the new methods.

Signed-off-by: Claus Hunsen <hunsen@fim.uni-passau.de>
---
 test.R                    |   4 +-
 tests/test-data-cut.R     |   2 +-
 tests/test-networks-cut.R |   2 +-
 tests/test-read.R         |   2 +-
 tests/test-split.R        | 106 +++++++++++++++++++-------------------
 util-core-peripheral.R    |   2 +-
 util-data.R               |  61 ++++++++++++++++------
 util-networks.R           |   2 +-
 util-read.R               |  16 ++++--
 util-split.R              |   8 +--
 10 files changed, 121 insertions(+), 84 deletions(-)

diff --git a/test.R b/test.R
index 19ff793c..ea455e38 100644
--- a/test.R
+++ b/test.R
@@ -63,7 +63,7 @@ x = NetworkBuilder$new(project.data = x.data, network.conf = net.conf)
 
 ## * Data retrieval --------------------------------------------------------
 
-# x.data$get.commits.raw()
+# x.data$get.commits()
 # x.data$get.synchronicity()
 # x.data$get.author2artifact()
 # x.data$get.commits.filtered()
@@ -107,7 +107,7 @@ y = NetworkBuilder$new(project.data = y.data, network.conf = net.conf)
 
 ## * Data retrieval --------------------------------------------------------
 
-# y.data$get.commits.raw()
+# y.data$get.commits()
 # y.data$get.synchronicity()
 # y.data$get.author2artifact()
 # y.data$get.commits.filtered()
diff --git a/tests/test-data-cut.R b/tests/test-data-cut.R
index 0cf8420c..3b3f461d 100644
--- a/tests/test-data-cut.R
+++ b/tests/test-data-cut.R
@@ -52,7 +52,7 @@ test_that("Cut commit and mail data to same date range.", {
                                     subject=c("Re: Fw: busybox 2 tab"),
                                     thread=sprintf("<thread-%s>", c(9)))
 
-    commit.data = x.data$get.data.cut.to.same.date(data.sources = data.sources)$get.commits.raw()
+    commit.data = x.data$get.data.cut.to.same.date(data.sources = data.sources)$get.commits()
     rownames(commit.data) = 1:nrow(commit.data)
 
     mail.data = x.data$get.data.cut.to.same.date(data.sources = data.sources)$get.mails()
diff --git a/tests/test-networks-cut.R b/tests/test-networks-cut.R
index c7c5e4e3..9d7985e1 100644
--- a/tests/test-networks-cut.R
+++ b/tests/test-networks-cut.R
@@ -54,7 +54,7 @@ test_that("Cut commit and mail data to same date range.", {
                                     subject=c("Re: Fw: busybox 2 tab"),
                                     thread=sprintf("<thread-%s>", c(9)))
 
-    commit.data = x$get.project.data()$get.commits.raw()
+    commit.data = x$get.project.data()$get.commits()
     rownames(commit.data) = 1:nrow(commit.data)
 
     mail.data = x$get.project.data()$get.mails()
diff --git a/tests/test-read.R b/tests/test-read.R
index 0fe3001c..6363c571 100644
--- a/tests/test-read.R
+++ b/tests/test-read.R
@@ -22,7 +22,7 @@ test_that("Read the raw commit data.", {
     proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, ARTIFACT)
 
     ## read the actual data
-    commit.data.read = read.commits.raw(proj.conf$get.value("datapath"), proj.conf$get.value("artifact"))
+    commit.data.read = read.commits(proj.conf$get.value("datapath"), proj.conf$get.value("artifact"))
 
     ## build the expected data.frame
     commit.data.expected = data.frame(commit.id=sprintf("<commit-%s>", c(32712,32712,32713,32713,32710,32710,32714,32711,32711)),
diff --git a/tests/test-split.R b/tests/test-split.R
index 9f34aa0e..2f2ac965 100644
--- a/tests/test-split.R
+++ b/tests/test-split.R
@@ -46,7 +46,7 @@ test_that("Split a data object time-based (split.basis == 'commits').", {
     ## data object
     project.data = ProjectData$new(proj.conf)
     data = list(
-        commits.raw = project.data$get.commits.raw(),
+        commits = project.data$get.commits(),
         mails = project.data$get.mails(),
         issues = project.data$get.issues(),
         synchronicity = project.data$get.synchronicity(),
@@ -68,10 +68,10 @@ test_that("Split a data object time-based (split.basis == 'commits').", {
 
     ## check data for all ranges
     expected.data = list(
-        commits.raw = list(
-            "2016-07-12 15:58:59-2016-07-12 16:01:59" = data$commits.raw[1:4, ],
+        commits = list(
+            "2016-07-12 15:58:59-2016-07-12 16:01:59" = data$commits[1:4, ],
             "2016-07-12 16:01:59-2016-07-12 16:04:59" = data.frame(),
-            "2016-07-12 16:04:59-2016-07-12 16:06:33" = data$commits.raw[5:9, ]
+            "2016-07-12 16:04:59-2016-07-12 16:06:33" = data$commits[5:9, ]
         ),
         mails = list(
             "2016-07-12 15:58:59-2016-07-12 16:01:59" = data.frame(),
@@ -95,7 +95,7 @@ test_that("Split a data object time-based (split.basis == 'commits').", {
         )
     )
     results.data = list(
-        commits.raw = lapply(results, function(cf.data) cf.data$get.commits.raw()),
+        commits = lapply(results, function(cf.data) cf.data$get.commits()),
         mails = lapply(results, function(cf.data) cf.data$get.mails()),
         issues = lapply(results, function(cf.data) cf.data$get.issues()),
         synchronicity = lapply(results, function(cf.data) cf.data$get.synchronicity()),
@@ -119,7 +119,7 @@ test_that("Split a data object time-based (split.basis == 'mails').", {
     ## data object
     project.data = ProjectData$new(proj.conf)
     data = list(
-        commits.raw = project.data$get.commits.raw(),
+        commits = project.data$get.commits(),
         mails = project.data$get.mails(),
         issues = project.data$get.issues(),
         synchronicity = project.data$get.synchronicity(),
@@ -142,11 +142,11 @@ test_that("Split a data object time-based (split.basis == 'mails').", {
 
     ## check data for all ranges
     expected.data = list(
-        commits.raw = list(
+        commits = list(
             "2004-10-09 18:38:13-2007-10-09 18:38:13" = data.frame(),
             "2007-10-09 18:38:13-2010-10-09 18:38:13" = data.frame(),
             "2010-10-09 18:38:13-2013-10-09 18:38:13" = data.frame(),
-            "2013-10-09 18:38:13-2016-07-12 16:05:38" = data$commits.raw[1:4, ]
+            "2013-10-09 18:38:13-2016-07-12 16:05:38" = data$commits[1:4, ]
         ),
         mails = list(
             "2004-10-09 18:38:13-2007-10-09 18:38:13" = data$mails[rownames(data$mails) %in% 1:2, ],
@@ -174,7 +174,7 @@ test_that("Split a data object time-based (split.basis == 'mails').", {
         )
     )
     results.data = list(
-        commits.raw = lapply(results, function(cf.data) cf.data$get.commits.raw()),
+        commits = lapply(results, function(cf.data) cf.data$get.commits()),
         mails = lapply(results, function(cf.data) cf.data$get.mails()),
         issues = lapply(results, function(cf.data) cf.data$get.issues()),
         synchronicity = lapply(results, function(cf.data) cf.data$get.synchronicity()),
@@ -198,7 +198,7 @@ test_that("Split a data object time-based (split.basis == 'issues').", {
     ## data object
     project.data = ProjectData$new(proj.conf)
     data = list(
-        commits.raw = project.data$get.commits.raw(),
+        commits = project.data$get.commits(),
         mails = project.data$get.mails(),
         issues = project.data$get.issues(),
         synchronicity = project.data$get.synchronicity(),
@@ -220,9 +220,9 @@ test_that("Split a data object time-based (split.basis == 'issues').", {
 
     ## check data for all ranges
     expected.data = list(
-        commits.raw = list(
+        commits = list(
             "2013-04-21 23:52:09-2015-04-21 23:52:09" = data.frame(),
-            "2015-04-21 23:52:09-2017-04-21 23:52:09" = data$commits.raw,
+            "2015-04-21 23:52:09-2017-04-21 23:52:09" = data$commits,
             "2017-04-21 23:52:09-2017-05-23 12:32:40" = data.frame()
         ),
         mails = list(
@@ -247,7 +247,7 @@ test_that("Split a data object time-based (split.basis == 'issues').", {
         )
     )
     results.data = list(
-        commits.raw = lapply(results, function(cf.data) cf.data$get.commits.raw()),
+        commits = lapply(results, function(cf.data) cf.data$get.commits()),
         mails = lapply(results, function(cf.data) cf.data$get.mails()),
         issues = lapply(results, function(cf.data) cf.data$get.issues()),
         synchronicity = lapply(results, function(cf.data) cf.data$get.synchronicity()),
@@ -271,7 +271,7 @@ test_that("Split a data object time-based (bins == ... ).", {
     ## data object
     project.data = ProjectData$new(proj.conf)
     data = list(
-        commits.raw = project.data$get.commits.raw(),
+        commits = project.data$get.commits(),
         mails = project.data$get.mails(),
         issues = project.data$get.issues(),
         synchronicity = project.data$get.synchronicity(),
@@ -291,8 +291,8 @@ test_that("Split a data object time-based (bins == ... ).", {
 
     ## check data for all ranges
     expected.data = list(
-        commits.raw = list(
-            "2016-01-01 00:00:00-2016-12-31 23:59:59" = data$commits.raw
+        commits = list(
+            "2016-01-01 00:00:00-2016-12-31 23:59:59" = data$commits
         ),
         mails = list(
             "2016-01-01 00:00:00-2016-12-31 23:59:59" = data$mails[rownames(data$mails) %in% 13:17, ]
@@ -308,7 +308,7 @@ test_that("Split a data object time-based (bins == ... ).", {
         )
     )
     results.data = list(
-        commits.raw = lapply(results, function(cf.data) cf.data$get.commits.raw()),
+        commits = lapply(results, function(cf.data) cf.data$get.commits()),
         mails = lapply(results, function(cf.data) cf.data$get.mails()),
         issues = lapply(results, function(cf.data) cf.data$get.issues()),
         synchronicity = lapply(results, function(cf.data) cf.data$get.synchronicity()),
@@ -332,7 +332,7 @@ test_that("Split a data object activity-based (activity.type = 'commits').", {
     ## data object
     project.data = ProjectData$new(proj.conf)
     data = list(
-        commits.raw = project.data$get.commits.raw(),
+        commits = project.data$get.commits(),
         mails = project.data$get.mails(),
         issues = project.data$get.issues(),
         synchronicity = project.data$get.synchronicity(),
@@ -354,10 +354,10 @@ test_that("Split a data object activity-based (activity.type = 'commits').", {
 
     ## check data for all ranges
     expected.data = list(
-        commits.raw = list(
-            "2016-07-12 15:58:59-2016-07-12 16:05:41" = data$commits.raw[1:4, ],
-            "2016-07-12 16:05:41-2016-07-12 16:06:32" = data$commits.raw[5:7, ],
-            "2016-07-12 16:06:32-2016-07-12 16:06:33" = data$commits.raw[8:9, ]
+        commits = list(
+            "2016-07-12 15:58:59-2016-07-12 16:05:41" = data$commits[1:4, ],
+            "2016-07-12 16:05:41-2016-07-12 16:06:32" = data$commits[5:7, ],
+            "2016-07-12 16:06:32-2016-07-12 16:06:33" = data$commits[8:9, ]
         ),
         mails = list(
             "2016-07-12 15:58:59-2016-07-12 16:05:41" = data$mails[rownames(data$mails) %in% 16:17, ],
@@ -381,7 +381,7 @@ test_that("Split a data object activity-based (activity.type = 'commits').", {
         )
     )
     results.data = list(
-        commits.raw = lapply(results, function(cf.data) cf.data$get.commits.raw()),
+        commits = lapply(results, function(cf.data) cf.data$get.commits()),
         mails = lapply(results, function(cf.data) cf.data$get.mails()),
         issues = lapply(results, function(cf.data) cf.data$get.issues()),
         synchronicity = lapply(results, function(cf.data) cf.data$get.synchronicity()),
@@ -394,7 +394,7 @@ test_that("Split a data object activity-based (activity.type = 'commits').", {
     ##
 
     ## split data
-    results = split.data.activity.based(project.data, activity.amount = nrow(data$commits.raw) + 10,
+    results = split.data.activity.based(project.data, activity.amount = nrow(data$commits) + 10,
                                         activity.type = "commits", sliding.window = FALSE)
 
     ## check time ranges
@@ -406,8 +406,8 @@ test_that("Split a data object activity-based (activity.type = 'commits').", {
 
     ## check data for all ranges
     expected.data = list(
-        commits.raw = list(
-            "2016-07-12 15:58:59-2016-07-12 16:06:33" = data$commits.raw
+        commits = list(
+            "2016-07-12 15:58:59-2016-07-12 16:06:33" = data$commits
         ),
         mails = list(
             "2016-07-12 15:58:59-2016-07-12 16:06:33" = data$mails[rownames(data$mails) %in% 16:17, ]
@@ -423,7 +423,7 @@ test_that("Split a data object activity-based (activity.type = 'commits').", {
         )
     )
     results.data = list(
-        commits.raw = lapply(results, function(cf.data) cf.data$get.commits.raw()),
+        commits = lapply(results, function(cf.data) cf.data$get.commits()),
         mails = lapply(results, function(cf.data) cf.data$get.mails()),
         issues = lapply(results, function(cf.data) cf.data$get.issues()),
         synchronicity = lapply(results, function(cf.data) cf.data$get.synchronicity()),
@@ -449,9 +449,9 @@ test_that("Split a data object activity-based (activity.type = 'commits').", {
 
     ## check data for all ranges
     expected.data = list(
-        commits.raw = list(
-            "2016-07-12 15:58:59-2016-07-12 16:06:10" = data$commits.raw[1:6, ],
-            "2016-07-12 16:06:10-2016-07-12 16:06:33" = data$commits.raw[7:9, ]
+        commits = list(
+            "2016-07-12 15:58:59-2016-07-12 16:06:10" = data$commits[1:6, ],
+            "2016-07-12 16:06:10-2016-07-12 16:06:33" = data$commits[7:9, ]
         ),
         mails = list(
             "2016-07-12 15:58:59-2016-07-12 16:06:10" = data$mails[rownames(data$mails) %in% 16:17, ],
@@ -471,7 +471,7 @@ test_that("Split a data object activity-based (activity.type = 'commits').", {
         )
     )
     results.data = list(
-        commits.raw = lapply(results, function(cf.data) cf.data$get.commits.raw()),
+        commits = lapply(results, function(cf.data) cf.data$get.commits()),
         mails = lapply(results, function(cf.data) cf.data$get.mails()),
         issues = lapply(results, function(cf.data) cf.data$get.issues()),
         synchronicity = lapply(results, function(cf.data) cf.data$get.synchronicity()),
@@ -482,7 +482,7 @@ test_that("Split a data object activity-based (activity.type = 'commits').", {
     ## too large number of windows
 
     expect_error(
-        split.data.activity.based(project.data, activity.type = "commits", number.windows = nrow(project.data$get.commits.raw()) + 10),
+        split.data.activity.based(project.data, activity.type = "commits", number.windows = nrow(project.data$get.commits()) + 10),
         info = "Error expected (number.windows) (1)."
     )
 
@@ -507,7 +507,7 @@ test_that("Split a data object activity-based (activity.type = 'mails').", {
     ## data object
     project.data = ProjectData$new(proj.conf)
     data = list(
-        commits.raw = project.data$get.commits.raw(),
+        commits = project.data$get.commits(),
         mails = project.data$get.mails(),
         issues = project.data$get.issues(),
         synchronicity = project.data$get.synchronicity(),
@@ -532,12 +532,12 @@ test_that("Split a data object activity-based (activity.type = 'mails').", {
 
     ## check data for all ranges
     expected.data = list(
-        commits.raw = list(
+        commits = list(
             "2004-10-09 18:38:13-2010-07-12 11:05:35" = data.frame(),
             "2010-07-12 11:05:35-2010-07-12 12:05:41" = data.frame(),
             "2010-07-12 12:05:41-2010-07-12 12:05:44" = data.frame(),
             "2010-07-12 12:05:44-2016-07-12 15:58:40" = data.frame(),
-            "2016-07-12 15:58:40-2016-07-12 16:05:37" = data$commits.raw[1:4, ],
+            "2016-07-12 15:58:40-2016-07-12 16:05:37" = data$commits[1:4, ],
             "2016-07-12 16:05:37-2016-07-12 16:05:38" = data.frame()
         ),
         mails = list(
@@ -574,7 +574,7 @@ test_that("Split a data object activity-based (activity.type = 'mails').", {
         )
     )
     results.data = list(
-        commits.raw = lapply(results, function(cf.data) cf.data$get.commits.raw()),
+        commits = lapply(results, function(cf.data) cf.data$get.commits()),
         mails = lapply(results, function(cf.data) cf.data$get.mails()),
         issues = lapply(results, function(cf.data) cf.data$get.issues()),
         synchronicity = lapply(results, function(cf.data) cf.data$get.synchronicity()),
@@ -599,8 +599,8 @@ test_that("Split a data object activity-based (activity.type = 'mails').", {
 
     ## check data for all ranges
     expected.data = list(
-        commits.raw = list(
-            "2004-10-09 18:38:13-2016-07-12 16:05:38" = data$commits.raw[1:4, ]
+        commits = list(
+            "2004-10-09 18:38:13-2016-07-12 16:05:38" = data$commits[1:4, ]
         ),
         mails = list(
             "2004-10-09 18:38:13-2016-07-12 16:05:38" = data$mails
@@ -616,7 +616,7 @@ test_that("Split a data object activity-based (activity.type = 'mails').", {
         )
     )
     results.data = list(
-        commits.raw = lapply(results, function(cf.data) cf.data$get.commits.raw()),
+        commits = lapply(results, function(cf.data) cf.data$get.commits()),
         mails = lapply(results, function(cf.data) cf.data$get.mails()),
         issues = lapply(results, function(cf.data) cf.data$get.issues()),
         synchronicity = lapply(results, function(cf.data) cf.data$get.synchronicity()),
@@ -642,9 +642,9 @@ test_that("Split a data object activity-based (activity.type = 'mails').", {
 
     ## check data for all ranges
     expected.data = list(
-        commits.raw = list(
+        commits = list(
             "2004-10-09 18:38:13-2010-07-12 12:05:43" = data.frame(),
-            "2010-07-12 12:05:43-2016-07-12 16:05:38" = data$commits.raw[1:4, ]
+            "2010-07-12 12:05:43-2016-07-12 16:05:38" = data$commits[1:4, ]
         ),
         mails = list(
             "2004-10-09 18:38:13-2010-07-12 12:05:43" = data$mails[rownames(data$mails) %in% 1:8, ],
@@ -664,7 +664,7 @@ test_that("Split a data object activity-based (activity.type = 'mails').", {
         )
     )
     results.data = list(
-        commits.raw = lapply(results, function(cf.data) cf.data$get.commits.raw()),
+        commits = lapply(results, function(cf.data) cf.data$get.commits()),
         mails = lapply(results, function(cf.data) cf.data$get.mails()),
         issues = lapply(results, function(cf.data) cf.data$get.issues()),
         synchronicity = lapply(results, function(cf.data) cf.data$get.synchronicity()),
@@ -699,7 +699,7 @@ test_that("Split a data object activity-based (activity.type = 'issues').", {
     ## data object
     project.data = ProjectData$new(proj.conf)
     data = list(
-        commits.raw = project.data$get.commits.raw(),
+        commits = project.data$get.commits(),
         mails = project.data$get.mails(),
         issues = project.data$get.issues(),
         synchronicity = project.data$get.synchronicity(),
@@ -722,9 +722,9 @@ test_that("Split a data object activity-based (activity.type = 'issues').", {
 
     ## check data for all ranges
     expected.data = list(
-        commits.raw = list(
-            "2013-04-21 23:52:09-2016-07-12 16:05:47" = data$commits.raw[1:6, ],
-            "2016-07-12 16:05:47-2016-08-31 18:21:48" = data$commits.raw[7:9, ],
+        commits = list(
+            "2013-04-21 23:52:09-2016-07-12 16:05:47" = data$commits[1:6, ],
+            "2016-07-12 16:05:47-2016-08-31 18:21:48" = data$commits[7:9, ],
             "2016-08-31 18:21:48-2017-02-20 22:25:41" = data.frame(),
             "2017-02-20 22:25:41-2017-05-23 12:32:40" = data.frame()
         ),
@@ -754,7 +754,7 @@ test_that("Split a data object activity-based (activity.type = 'issues').", {
         )
     )
     results.data = list(
-        commits.raw = lapply(results, function(cf.data) cf.data$get.commits.raw()),
+        commits = lapply(results, function(cf.data) cf.data$get.commits()),
         mails = lapply(results, function(cf.data) cf.data$get.mails()),
         issues = lapply(results, function(cf.data) cf.data$get.issues()),
         synchronicity = lapply(results, function(cf.data) cf.data$get.synchronicity()),
@@ -779,8 +779,8 @@ test_that("Split a data object activity-based (activity.type = 'issues').", {
 
     ## check data for all ranges
     expected.data = list(
-        commits.raw = list(
-            "2013-04-21 23:52:09-2017-05-23 12:32:40" = data$commits.raw
+        commits = list(
+            "2013-04-21 23:52:09-2017-05-23 12:32:40" = data$commits
         ),
         mails = list(
             "2013-04-21 23:52:09-2017-05-23 12:32:40" = data$mails[rownames(data$mails) %in% 14:17, ]
@@ -796,7 +796,7 @@ test_that("Split a data object activity-based (activity.type = 'issues').", {
         )
     )
     results.data = list(
-        commits.raw = lapply(results, function(cf.data) cf.data$get.commits.raw()),
+        commits = lapply(results, function(cf.data) cf.data$get.commits()),
         mails = lapply(results, function(cf.data) cf.data$get.mails()),
         issues = lapply(results, function(cf.data) cf.data$get.issues()),
         synchronicity = lapply(results, function(cf.data) cf.data$get.synchronicity()),
@@ -822,8 +822,8 @@ test_that("Split a data object activity-based (activity.type = 'issues').", {
 
     ## check data for all ranges
     expected.data = list(
-        commits.raw = list(
-            "2013-04-21 23:52:09-2016-07-27 22:25:25" = data$commits.raw,
+        commits = list(
+            "2013-04-21 23:52:09-2016-07-27 22:25:25" = data$commits,
             "2016-07-27 22:25:25-2017-05-23 12:32:40" = data.frame()
         ),
         mails = list(
@@ -844,7 +844,7 @@ test_that("Split a data object activity-based (activity.type = 'issues').", {
         )
     )
     results.data = list(
-        commits.raw = lapply(results, function(cf.data) cf.data$get.commits.raw()),
+        commits = lapply(results, function(cf.data) cf.data$get.commits()),
         mails = lapply(results, function(cf.data) cf.data$get.mails()),
         issues = lapply(results, function(cf.data) cf.data$get.issues()),
         synchronicity = lapply(results, function(cf.data) cf.data$get.synchronicity()),
diff --git a/util-core-peripheral.R b/util-core-peripheral.R
index 0e14435f..a66b0323 100644
--- a/util-core-peripheral.R
+++ b/util-core-peripheral.R
@@ -968,7 +968,7 @@ get.commit.data = function(range.data, columns = c("author.name", "author.email"
     logging::logdebug("get.commit.data: starting.")
 
     ## Get commit data
-    commits.df = range.data$get.commits.raw()
+    commits.df = range.data$get.commits()
 
     ## In case no commit data is available, return NA
     if(nrow(commits.df) == 0) {
diff --git a/util-data.R b/util-data.R
index f28f4c57..0d210fef 100644
--- a/util-data.R
+++ b/util-data.R
@@ -15,9 +15,9 @@ requireNamespace("R6") # for R6 classes
 requireNamespace("logging") # for logging
 requireNamespace("parallel") # for parallel computation
 
-## / / / / / / / / / / / / / /
-## Constant
-##
+
+## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / /
+## Constants ---------------------------------------------------------------
 
 ## mapping of relation to data source
 RELATION.TO.DATASOURCE = list(
@@ -46,7 +46,7 @@ ProjectData = R6::R6Class("ProjectData",
         ## commits and commit data
         commits.filtered = NULL, # data.frame
         commits.filtered.empty = NULL, #data.frame
-        commits.raw = NULL, # data.frame
+        commits = NULL, # data.frame
         artifacts = NULL, # list
         synchronicity = NULL, # data.frame
         pasta = NULL, # data.frame
@@ -107,7 +107,7 @@ ProjectData = R6::R6Class("ProjectData",
             }
 
             ## get raw commit data
-            commit.data = self$get.commits.raw()
+            commit.data = self$get.commits()
 
             ## break if the list of commits is empty
             if (nrow(commit.data) == 0) {
@@ -182,7 +182,7 @@ ProjectData = R6::R6Class("ProjectData",
                 self$get.mails()
             }
             if("commits" %in% data.sources) {
-                self$get.commits.raw()
+                self$get.commits()
             }
             if("issues" %in% data.sources) {
                 self$get.issues()
@@ -202,8 +202,8 @@ ProjectData = R6::R6Class("ProjectData",
                 private$data.timestamps$mails = c(min(private$mails$date),
                                           max(private$mails$date))
             } else if(source == "commits") {
-                private$data.timestamps$commits = c(min(private$commits.raw$date),
-                                            max(private$commits.raw$date))
+                private$data.timestamps$commits = c(min(private$commits$date),
+                                            max(private$commits$date))
 
             } else if(source == "issues") {
                 private$data.timestamps$issues = c(min(private$issues$date),
@@ -245,7 +245,7 @@ ProjectData = R6::R6Class("ProjectData",
         reset.environment = function() {
             private$commits.filtered = NULL
             private$commits.filtered.empty = NULL
-            private$commits.raw = NULL
+            private$commits = NULL
             private$artifacts = NULL
             private$synchronicity = NULL
             private$mails = NULL
@@ -332,6 +332,9 @@ ProjectData = R6::R6Class("ProjectData",
             return(data.path)
         },
 
+        #' Get the absolute path to the result folder for issue data.
+        #'
+        #' @return the path to the issue data
         get.data.path.issues = function() {
             data.path = private$project.conf$get.value("datapath.issues")
             return(data.path)
@@ -373,28 +376,47 @@ ProjectData = R6::R6Class("ProjectData",
         #' If it doesn´t already exist call the read method first.
         #'
         #' @return the list of commits
-        get.commits.raw = function() {
+        get.commits = function() {
             logging::loginfo("Getting raw commit data.")
 
             ## if commits are not read already, do this
-            if (is.null(private$commits.raw)) {
-                private$commits.raw = read.commits.raw(
+            if (is.null(private$commits)) {
+                private$commits = read.commits(
                     self$get.data.path(),
                     private$project.conf$get.value("artifact")
                 )
             }
             private$extract.timestamps(source = "commits")
 
-            return(private$commits.raw)
+            return(private$commits)
+        },
+
+        #' Get the complete list of commits.
+        #' If it doesn´t already exist call the read method first.
+        #'
+        #' Note: This is just a delegate for \code{ProjectData$get.commits()}.
+        #'
+        #' @return the list of commits
+        get.commits.raw = function() {
+            return(self$get.commits())
         },
 
         #' Set the commit list of the project to a new one.
         #'
         #' @param data the new list of commits
-        set.commits.raw = function(data) {
+        set.commits = function(data) {
             logging::loginfo("Setting raw commit data.")
             if (is.null(data)) data = data.frame()
-            private$commits.raw = data
+            private$commits = data
+        },
+
+        #' Set the commit list of the project to a new one.
+        #'
+        #' Note: This is just a delegate for \code{ProjectData$set.commits(data)}.
+        #'
+        #' @param data the new list of commits
+        set.commits.raw = function(data) {
+            self$set.commits(data)
         },
 
         #' Get the synchronicity data.
@@ -694,7 +716,9 @@ ProjectData = R6::R6Class("ProjectData",
             return(mylist)
         },
 
-
+        #' Map the corresponding authors to each issue and return the list.
+        #'
+        #' @return the list of authors for each issue
         get.issue2author = function() {
             logging::loginfo("Getting issue--author data")
 
@@ -703,6 +727,9 @@ ProjectData = R6::R6Class("ProjectData",
             return(mylist)
         },
 
+        #' Map the corresponding issues to each author and return the list.
+        #'
+        #' @return the list of issues for each author
         get.author2issue = function() {
             logging::loginfo("Getting author--issue data")
 
@@ -718,7 +745,7 @@ ProjectData = R6::R6Class("ProjectData",
           logging::loginfo("Getting author--commit data.")
 
           ## store the authors per artifact
-          mylist = get.key.to.value.from.df(self$get.commits.raw(), "author.name", "hash")
+          mylist = get.key.to.value.from.df(self$get.commits(), "author.name", "hash")
           mylist = parallel::mclapply(mylist, unique)
 
           return(mylist)
diff --git a/util-networks.R b/util-networks.R
index c20b3656..2e9cfd60 100644
--- a/util-networks.R
+++ b/util-networks.R
@@ -582,7 +582,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder",
 
             ## remove vertices that are not committers if wanted
             if (private$network.conf$get.value("author.only.committers")) {
-                committers = unique(private$proj.data$get.commits.raw()[["author.name"]])
+                committers = unique(private$proj.data$get.commits()[["author.name"]])
                 authors = igraph::get.vertex.attribute(u, "name", igraph::V(u)[ type == TYPE.AUTHOR ])
                 authors.to.remove = setdiff(authors, committers)
                 u = igraph::delete.vertices(u, authors.to.remove)
diff --git a/util-read.R b/util-read.R
index 7578eb44..50b646b5 100644
--- a/util-read.R
+++ b/util-read.R
@@ -24,8 +24,8 @@ requireNamespace("digest") # for sha1 hashing of IDs
 #' @param artifact the artifact whichs commits are read
 #'
 #' @return the read commits
-read.commits.raw = function(data.path, artifact) {
-    logging::logdebug("read.commits.raw: starting.")
+read.commits = function(data.path, artifact) {
+    logging::logdebug("read.commits: starting.")
 
     file = file.path(data.path, "commits.list")
 
@@ -91,10 +91,20 @@ read.commits.raw = function(data.path, artifact) {
     commit.data[["commit.id"]] = sprintf("<commit-%s>", commit.data[["commit.id"]])
 
     ## store the commit data
-    logging::logdebug("read.commits.raw: finished.")
+    logging::logdebug("read.commits: finished.")
     return(commit.data)
 }
 
+#' Read the commits from the 'commits.list' file.
+#'
+#' @param data.path the path to the commit list
+#' @param artifact the artifact whichs commits are read
+#'
+#' @return the read commits
+read.commits.raw = function(data.path, artifact) {
+    return(read.commits(data.path = data.path, artifact = artifact))
+}
+
 
 ## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / /
 ## Synchronicity data ------------------------------------------------------
diff --git a/util-split.R b/util-split.R
index 9ca3b677..76b644dd 100644
--- a/util-split.R
+++ b/util-split.R
@@ -40,7 +40,7 @@ split.data.time.based = function(project.data, time.period = "3 months", bins =
                                  split.basis = c("commits", "mails", "issues"), sliding.window = FALSE) {
     ## get actual raw data
     data = list(
-        commits = project.data$get.commits.raw(),
+        commits = project.data$get.commits(),
         mails = project.data$get.mails(),
         issues = project.data$get.issues()
     )
@@ -110,7 +110,7 @@ split.data.time.based = function(project.data, time.period = "3 months", bins =
 
         ## set data
         ## 1) commits
-        cf.range.data$set.commits.raw(df.list[["commits"]])
+        cf.range.data$set.commits(df.list[["commits"]])
         ## 2) mails
         cf.range.data$set.mails(df.list[["mails"]])
         ## 3) issues
@@ -203,7 +203,7 @@ split.data.activity.based = function(project.data, activity.type = c("commits",
 
     ## get actual raw data
     data = list(
-        commits = project.data$get.commits.raw(),
+        commits = project.data$get.commits(),
         mails = project.data$get.mails(),
         issues = project.data$get.issues()
     )
@@ -280,7 +280,7 @@ split.data.activity.based = function(project.data, activity.type = c("commits",
 
         ## clone the project data and update raw data to split it again
         project.data.clone = project.data$clone()
-        project.data.clone$set.commits.raw(data[["commits"]])
+        project.data.clone$set.commits(data[["commits"]])
         project.data.clone$set.mails(data[["mails"]])
 
         ## split data for sliding windows

From 886d318de5a68a5054fa4c4d9c48642d66bee29c Mon Sep 17 00:00:00 2001
From: Claus Hunsen <hunsen@fim.uni-passau.de>
Date: Tue, 12 Dec 2017 17:44:37 +0100
Subject: [PATCH 31/40] Refactor cutting mechanism in data classes

After the landing of commit a803425e6bdb54c1654fb9de1f9375499e3aa829,
the general code for the data cutting mechanism is streamlined:
1) The 'ProjectData$data.timestamps' attribute is now transposed -- to
map data sources per line to their respective timestamps in the columns.
This way is more intuitive and better for later access.
2) All related methods are adapted accordingly.
3) The amount of inline documentation is increased significantly.

Signed-off-by: Claus Hunsen <hunsen@fim.uni-passau.de>
Reviewed-by: Thomas Bock <bockthom@fim.uni-passau.de>
---
 util-data.R | 112 +++++++++++++++++++++++++++++++++-------------------
 1 file changed, 72 insertions(+), 40 deletions(-)

diff --git a/util-data.R b/util-data.R
index 0d210fef..9d0b6a6e 100644
--- a/util-data.R
+++ b/util-data.R
@@ -173,21 +173,16 @@ ProjectData = R6::R6Class("ProjectData",
             return(data)
         },
 
+        ## * * timestamps -------------------------------------------
+
         #' Call the getters of the specified data sources in order to
         #' initialize the sources and extract the timestamps.
         #'
         #' @param data.sources the data sources to be prepated
         prepare.timestamps = function(data.sources) {
-            if("mails" %in% data.sources) {
-                self$get.mails()
-            }
-            if("commits" %in% data.sources) {
-                self$get.commits()
-            }
-            if("issues" %in% data.sources) {
-                self$get.issues()
+            for(source in data.sources) {
+                self[[ paste0("get.", source) ]]()
             }
-
         },
 
         #' Extract the earliest and the latest date from the specified data source
@@ -195,21 +190,38 @@ ProjectData = R6::R6Class("ProjectData",
         #'
         #' @param source the specified data source
         extract.timestamps = function(source) {
+            ## initialize data structure for timestamp
             if(is.null(private$data.timestamps)) {
-                private$data.timestamps = data.frame(row.names = c("start", "end"))
+                private$data.timestamps = data.frame(start = numeric(0), end = numeric(0))
             }
-            if(source == "mails") {
-                private$data.timestamps$mails = c(min(private$mails$date),
-                                          max(private$mails$date))
-            } else if(source == "commits") {
-                private$data.timestamps$commits = c(min(private$commits$date),
-                                            max(private$commits$date))
-
-            } else if(source == "issues") {
-                private$data.timestamps$issues = c(min(private$issues$date),
-                                           max(private$issues$date))
 
+            ## collect minimum and maximum date for data source
+            ## 1) if we have data available
+            if (nrow(private[[source]]) > 0) {
+                source.date.min = min(private[[source]][, "date"])
+                source.date.max = max(private[[source]][, "date"])
+            }
+            ## NAs otherwise
+            else {
+                source.date.min = NA
+                source.date.max = NA
             }
+
+            ## remove old line if existing
+            private$data.timestamps = subset(
+                private$data.timestamps,
+                !(rownames(private$data.timestamps) == source)
+            )
+
+            ## store the data in the timestamp data set
+            private$data.timestamps = rbind(
+                private$data.timestamps,
+                data.frame(
+                    start = source.date.min,
+                    end = source.date.max,
+                    row.names = source
+                )
+            )
         }
     ),
 
@@ -569,30 +581,42 @@ ProjectData = R6::R6Class("ProjectData",
             return(private$artifacts)
         },
 
+        ## * * data cutting -----------------------------------------
+
         #' Get the timestamps (earliest and latest date) of the specified data sources.
-        #' If 'simple' is TRUE return the overall latest start and earliest end date
+        #' If 'simple' is TRUE, return the overall latest start and earliest end date
         #' in order to cut the specified data sources to the same date ranges.
         #'
+        #' If there are no actual data available for a data source, the result indicates NA
+        #'
         #' @param data.sources the specified data sources
         #' @param simple whether or not the timestamps get simplified
         #'
-        #' @return a data.frame with the timestamps
+        #' @return a data.frame with the timestamps of each data source as columns "start" and "end",
+        #'         with the data source as corresponding row name
         get.data.timestamps = function(data.sources = c("mails", "commits", "issues"), simple = FALSE) {
-            data.sources = match.arg(arg = data.sources, several.ok = TRUE, choices = c("mails", "commits", "issues"))
+            ## check arguments
+            data.sources = match.arg(arg = data.sources, several.ok = TRUE)
+
+            ## read all data sources and prepare list of timestamps
             private$prepare.timestamps(data.sources = data.sources)
-           if(simple == FALSE) {
-                timestamps = subset(private$data.timestamps, select = data.sources)
-                return(timestamps)
-            } else {
-                subset.timestamps = private$data.timestamps[data.sources]
-                timestamps.buffer = data.frame(max = apply(subset.timestamps, 1, max),
-                                               min = apply(subset.timestamps, 1, min))
-                timestamps = data.frame(start = timestamps.buffer["start", "max"],
-                                        end = timestamps.buffer["end", "min"])
 
-                return(timestamps)
+            ## get the needed subset of timestamp data
+            subset.timestamps = private$data.timestamps[data.sources, ]
+
+            ## get the proper subset of timestamps for returning
+            if(simple) {
+                ## get minima and maxima across data sources (rows)
+                timestamps = data.frame(
+                    start = max(subset.timestamps[, "start"], na.rm = TRUE),
+                    end = min(subset.timestamps[, "end"], na.rm = TRUE)
+                )
+            } else {
+                ## select the complete raw data
+                timestamps = subset.timestamps
             }
 
+            return(timestamps)
         },
 
         #' Cut the specified data sources to the same date range depending on the extracted
@@ -602,14 +626,22 @@ ProjectData = R6::R6Class("ProjectData",
         #'
         #' @return a list of the cut data.sources
         get.data.cut.to.same.date = function(data.sources = c("mails", "commits", "issues")) {
-            data.sources = match.arg(arg = data.sources, several.ok = TRUE, choices = c("mails", "commits", "issues"))
-            timestamps = self$get.data.timestamps(data.sources = data.sources , simple = TRUE)
-            timestamps.vector = c(timestamps$start, timestamps$end)
-            if(timestamps$start > timestamps$end) {
-                logging::logwarn("The datasources don't overlap. The result will be empty.")
+            ## check arguments
+            data.sources = match.arg(arg = data.sources, several.ok = TRUE)
+
+            ## get the timestamp data as vector
+            timestamps.df = self$get.data.timestamps(data.sources = data.sources , simple = TRUE)
+            timestamps = c(start = timestamps.df[, "start"], end = timestamps.df[, "end"])
+
+            ## check consistency
+            if(timestamps["start"] > timestamps["end"]) {
+                logging::logwarn("The datasources don't overlap. The result will be empty!")
             }
-            result = split.data.time.based(self, bins = timestamps.vector)
-            return(result[[1]])
+
+            ## split data based on the timestamps and get the single result
+            result = split.data.time.based(self, bins = timestamps)[[1]]
+
+            return(result)
         },
 
         #' Get single pasta items.

From 45ec73ff2e78e0c7963c63a8231e8f90dc4377bc Mon Sep 17 00:00:00 2001
From: Claus Hunsen <hunsen@fim.uni-passau.de>
Date: Tue, 12 Dec 2017 18:03:09 +0100
Subject: [PATCH 32/40] Format networks-metrics module

This patch only applies some more readable code formatting to the
networks-metrics module.

Signed-off-by: Claus Hunsen <hunsen@fim.uni-passau.de>
---
 util-networks-metrics.R | 56 +++++++++++++++++++++++------------------
 1 file changed, 31 insertions(+), 25 deletions(-)

diff --git a/util-networks-metrics.R b/util-networks-metrics.R
index 55331fe0..43638863 100644
--- a/util-networks-metrics.R
+++ b/util-networks-metrics.R
@@ -9,18 +9,18 @@
 
 requireNamespace("igraph")
 
+
 ## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / /
 ## Metric functions --------------------------------------------------------
 
-
 #' Determine the maximum degree for the given network.
 #'
 #' @param network the network to be examined
 #' @param mode the mode to be used for determining the degrees
 #'
 #' @return A dataframe containing the name of the vertex with with maximum degree its degree.
-metrics.hub.degree = function(network, mode = c("total", "in", "out")){
-    match.arg(mode)
+metrics.hub.degree = function(network, mode = c("total", "in", "out")) {
+    mode = match.arg(mode)
     degrees = igraph::degree(network, mode = c(mode))
     vertex = which.max(degrees)
     df = data.frame("name" = names(vertex), "degree" = unname(degrees[vertex]))
@@ -34,7 +34,7 @@ metrics.hub.degree = function(network, mode = c("total", "in", "out")){
 #'
 #' @return The average degree of the nodes in the network.
 metrics.avg.degree = function(network, mode = c("total", "in", "out")) {
-    match.arg(mode)
+    mode = match.arg(mode)
     degrees = igraph::degree(network, mode = c(mode))
     avg = mean(degrees)
     return(avg)
@@ -44,15 +44,15 @@ metrics.avg.degree = function(network, mode = c("total", "in", "out")) {
 #'
 #' @param network the network to be examined
 #' @param sort whether the resulting dataframe is to be sorted by the node degree
-#' @param sort.decreasing if sorting is active, this says whether the dataframe is to be sorted
-#' in descending or ascending order
+#' @param sort.decreasing if sorting is active, this says whether the dataframe is to be
+#'            sorted in descending or ascending order
 #'
 #' @return A dataframe containing the nodes and their respective degrees.
 metrics.node.degrees = function(network, sort = TRUE, sort.decreasing = TRUE) {
     if(sort) {
-        degrees = sort(igraph::degree(network, mode="total"), decreasing = sort.decreasing)
+        degrees = sort(igraph::degree(network, mode = "total"), decreasing = sort.decreasing)
     } else {
-        igraph::degree(network, mode="total")
+        igraph::degree(network, mode = "total")
     }
     return(data.frame("name" = names(degrees), "degree" = unname(degrees)))
 }
@@ -70,7 +70,7 @@ metrics.density = function(network) {
 #' Calculate the average path length for the given network.
 #'
 #' @param network the network to be examined
-#' @param directed wehther the given network is directed or undirected
+#' @param directed whether to consider directed paths in directed networks
 #' @param unconnected whether all nodes of the network are connected
 #'
 #' @return The average pathlength of the given network.
@@ -86,7 +86,7 @@ metrics.avg.pathlength = function(network, directed, unconnected) {
 #'
 #' @return The clustering coefficient of the network.
 metrics.clustering.coeff = function(network, cc.type = c("global", "local", "barrat", "localaverage")) {
-    match.arg(cc.type)
+    cc.type = match.arg(cc.type)
     cc = igraph::transitivity(network, type = cc.type, vids = NULL)
     return(cc)
 }
@@ -94,8 +94,8 @@ metrics.clustering.coeff = function(network, cc.type = c("global", "local", "bar
 #' Calculate the modularity metric for the given network.
 #'
 #' @param network the network to be examined
-#' @param community.detection.algorithm the algorithm to be used for the detection of communities which
-#' is required for the calculation of the clustering coefficient
+#' @param community.detection.algorithm the algorithm to be used for the detection of communities
+#'            which is required for the calculation of the clustering coefficient
 #'
 #' @return The modularity value for the given network.
 metrics.modularity = function(network, community.detection.algorithm = igraph::cluster_walktrap) {
@@ -115,17 +115,24 @@ metrics.modularity = function(network, community.detection.algorithm = igraph::c
 #' The algorithm relies on the Erdös-Renyi random network with the same number
 #' of nodes and edges as the given network.
 #'
-#' @param network the network to be examined. This network needs to be simplified for the calculation to work
+#' To check the result value \code{s.delta} for a binary (true/false) decision on smallworldness,
+#' do this: \code{is.smallworld = s.delta > 1}.
+#'
+#' Important: The given network needs to be simplified for the calculation to work!
+#'
+#' @param network the simplified network to be examined
 #'
 #' @return The smallworldness value of the network.
 metrics.smallworldness = function(network) {
-
     # construct Erdös-Renyi network with same number of nodes and edges as g
-    h = igraph::erdos.renyi.game(n=igraph::vcount(network), p.or.m=igraph::gsize(network), type="gnm", directed=FALSE)
+    h = igraph::erdos.renyi.game(n = igraph::vcount(network),
+                                 p.or.m = igraph::ecount(network),
+                                 type = "gnm",
+                                 directed = FALSE)
 
     # compute clustering coefficients
-    g.cc = igraph::transitivity(network, type = 'global')
-    h.cc = igraph::transitivity(h, type = 'global')
+    g.cc = igraph::transitivity(network, type = "global")
+    h.cc = igraph::transitivity(h, type = "global")
     # compute average shortest-path length
     g.l = igraph::average.path.length(network, unconnected = TRUE)
     h.l = igraph::average.path.length(h, unconnected = TRUE)
@@ -138,9 +145,8 @@ metrics.smallworldness = function(network) {
     # indicator s.delta
     s.delta = gamma / lambda
 
-    # if s.delta > 1, then the network is a small-world network
-    #is.smallworld = ifelse(s.delta > 1, TRUE, FALSE)
-
+    ## if s.delta > 1, then the network is a small-world network
+    # is.smallworld = s.delta > 1
     return ("smallworldness" = s.delta)
 }
 
@@ -150,11 +156,11 @@ metrics.smallworldness = function(network) {
 #'
 #' @return A dataframe containing the different values, connected to scale-freeness.
 metrics.scale.freeness = function(network) {
-    v.degree <- sort(igraph::degree(network, mode="total"), decreasing=TRUE)
+    v.degree = sort(igraph::degree(network, mode = "total"), decreasing = TRUE)
 
     ## Power-law fiting
-    ## (from  Mitchell Joblin <mitchell.joblin.ext@siemens.com>, Siemens AG,  2012, 2013)
-    p.fit = igraph::power.law.fit(v.degree, implementation="plfit")
+    ## (by  Mitchell Joblin <mitchell.joblin.ext@siemens.com>, Siemens AG,  2012, 2013)
+    p.fit = igraph::power.law.fit(v.degree, implementation = "plfit")
     param.names = c("alpha", "xmin", "KS.p")
     res = list()
     res[param.names] = p.fit[param.names]
@@ -162,7 +168,7 @@ metrics.scale.freeness = function(network) {
     ## Check percent of vertices under power-law
     res$num.power.law = length(which(v.degree >= res$xmin))
     res$percent.power.law = 100 * (res$num.power.law / length(v.degree))
-    df = data.frame(res$alpha,res$xmin,res$KS.p,res$num.power.law,res$percent.power.law)
+    df = data.frame(res$alpha, res$xmin, res$KS.p, res$num.power.law, res$percent.power.law)
     return(df)
 }
 
@@ -173,7 +179,7 @@ metrics.scale.freeness = function(network) {
 #' @return A dataframe containing the logarithm of the node degree and the logarithm
 #' of the local clustering coefficient for each node.
 metrics.hierarchy = function(network) {
-    degrees = igraph::degree(network, mode="total")
+    degrees = igraph::degree(network, mode = "total")
     cluster.coeff = igraph::transitivity(network, type = "local", vids = NULL)
     degrees.without.cluster.coeff = subset(degrees, !(is.nan(cluster.coeff) | cluster.coeff == 0))
     cluster.coeff = subset(cluster.coeff, !(is.nan(cluster.coeff) | cluster.coeff == 0))

From 21941bdfd62cf61dbdc61f296f4857aae48475a3 Mon Sep 17 00:00:00 2001
From: Claus Hunsen <hunsen@fim.uni-passau.de>
Date: Tue, 12 Dec 2017 18:08:29 +0100
Subject: [PATCH 33/40] Fix small bugs in network metrics

In this patch, we fix two minor bugs in the network metrics
'node.degree' and 'modularity'. In the first, the unsorted result was
not assigned correctly to the return value. In the latter, the single
modularity value does not need a name (which was also an undefined
variable).

Props to @ecklbarb for reporting these two mistakes.

Signed-off-by: Claus Hunsen <hunsen@fim.uni-passau.de>
---
 util-networks-metrics.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/util-networks-metrics.R b/util-networks-metrics.R
index 43638863..2d62562a 100644
--- a/util-networks-metrics.R
+++ b/util-networks-metrics.R
@@ -52,7 +52,7 @@ metrics.node.degrees = function(network, sort = TRUE, sort.decreasing = TRUE) {
     if(sort) {
         degrees = sort(igraph::degree(network, mode = "total"), decreasing = sort.decreasing)
     } else {
-        igraph::degree(network, mode = "total")
+        degrees = igraph::degree(network, mode = "total")
     }
     return(data.frame("name" = names(degrees), "degree" = unname(degrees)))
 }
@@ -101,7 +101,7 @@ metrics.clustering.coeff = function(network, cc.type = c("global", "local", "bar
 metrics.modularity = function(network, community.detection.algorithm = igraph::cluster_walktrap) {
     comm = community.detection.algorithm(network)
     mod = igraph::modularity(network, igraph::membership(comm))
-    return(data.frame("name" = name, "modularity" = mod))
+    return("modularity" = mod)
 }
 
 #' This function determines whether a network can be considered a

From af2b3b626a7a84d0204902e27c95e9611dbc1029 Mon Sep 17 00:00:00 2001
From: Claus Hunsen <hunsen@fim.uni-passau.de>
Date: Tue, 12 Dec 2017 18:38:46 +0100
Subject: [PATCH 34/40] Update README file

Fix mistakes and add 'unify.date.ranges' documentation.

Signed-off-by: Claus Hunsen <hunsen@fim.uni-passau.de>
---
 README.md | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index 9968f022..fac1fdf0 100644
--- a/README.md
+++ b/README.md
@@ -124,7 +124,7 @@ Updates to the parameters can be done by calling `NetworkConf$update.variables(.
        - issue information: *`"issue.id"`*, *`"event.name"`*, `"issue.state"`, `"creation.date"`, `"closing.date"`, `"is.pull.request"`
   * **Note**: `"date"` is always included as this information is needed for several parts of the library, e.g., time-based splitting.
   * **Note**: For each type of network that can be built, only the applicable part of the given vector of names is respected.
-  * **Note**: For the edge attributes `"pasta"` and `"synchronicty"`, the network configuration's parameters `pasta` and `synchronicity` need to be set to `TRUE`, respectively (see below).
+  * **Note**: For the edge attributes `"pasta"` and `"synchronicty"`, the project configuration's parameters `pasta` and `synchronicity` need to be set to `TRUE`, respectively (see below).
 - `simplify`
   * Perform edge contraction to retrieve a simplified network
   * [`TRUE`, *`FALSE`*]
@@ -132,11 +132,14 @@ Updates to the parameters can be done by calling `NetworkConf$update.variables(.
   * The upper bound for total amount of edges to build for a subset of the data, i.e., not building any edges for the subset exceeding the limit
   * any positive integer
   * **Example**: The amount of `mail`-based directed edges in an author network for one thread with 100 authors is 5049.
-    A value of 5000 for `skip.threshold` would lead to the omission of this thread from the network.
+    A value of 5000 for `skip.threshold` (as it is smaller than 5049) would lead to the omission of this thread from the network.
+- `unify.date.ranges`
+  * Cut the data sources to the largest start date and the smallest end date across all data sources
+  * **Note**: This parameter does not affect the original data object, but rather creates a clone.
+  * [`TRUE`, *`FALSE`*]
 
 The classes `ProjectData` and `RangeData` hold instances of  the `NetworkConf` class, just pass the object as parameter to the constructor.
-You can also update the object at any time, but as soon as you do so, all
-cached data of the data object are reset and have to be rebuilt.
+You can also update the object at any time, but as soon as you do so, all cached data of the data object are reset and have to be rebuilt.
 
 For more examples, please look in the file `test.R`.
 

From 1b86b1d5a14cdf2985d55690c123b135acbddd1a Mon Sep 17 00:00:00 2001
From: Claus Hunsen <hunsen@fim.uni-passau.de>
Date: Tue, 12 Dec 2017 18:40:57 +0100
Subject: [PATCH 35/40] Re-order NetworkConf and ProjectConf

Now, in the README file and the configuration module, we have
'ProjectConf' documentation first and 'NetworkConf'
documentation second.

Signed-off-by: Claus Hunsen <hunsen@fim.uni-passau.de>
---
 README.md   | 122 +++++-----
 util-conf.R | 640 ++++++++++++++++++++++++++--------------------------
 2 files changed, 381 insertions(+), 381 deletions(-)

diff --git a/README.md b/README.md
index fac1fdf0..11d9eb82 100644
--- a/README.md
+++ b/README.md
@@ -82,67 +82,6 @@ For examples on how to use both classes and how to build networks with them, ple
 
 ## Configuration Classes
 
-### NetworkConf
-
-In this section, we give an overview on the parameters of the `NetworkConf` class and their meaning.
-
-All parameters can be retrieved with the method `NetworkConf$get.variable(...)`, by passing one parameter name as method parameter.
-Updates to the parameters can be done by calling `NetworkConf$update.variables(...)` and passing a list of parameter names and their respective values.
-
-**Note**: Default values are shown in *italics*.
-
-- `author.relation`
-  * The relation among authors, encoded as edges in an author network
-  * **Note**: The  author--artifact relation in bipartite and multi networks is configured by `artifact.relation`!
-  * possible values: [*`"mail"`*, `"cochange"`, `"issue"`]
-- `author.directed`
-  * The (time-based) directedness of edges in an author network
-  * [`TRUE`, *`FALSE`*]
-- `author.all.authors`
-  * Denotes whether all available authors (from all analyses and data sources) shall be added to the network as a basis
-  * **Note**: Depending on the chosen author relation, there may be isolates then
-  * [`TRUE`, *`FALSE`*]
-- `author.only.committers`
-  * Remove all authors from an author network (including bipartite and multi networks) who are not present in an author network constructed with `artifact.relation` as relation, i.e., all authors that have no biparite relations in a bipartite/multi network are removed.
-  * [`TRUE`, *`FALSE`*]
-- `artifact.relation`
-  * The relation among artifacts, encoded as edges in an artifact network
-  * **Note**: This relation configures also the author--artifact relation in bipartite and multi networks!
-  * possible values: [*`"cochange"`*, `"callgraph"`, `"mail"`, `"issue"`]
-- `artifact.directed`
-  * The (time-based) directedness of edges in an artifact network
-  * **Note**: This parameter does not take effect for now, as the co-change relation is always undirected, while the call-graph relation is always directed.
-  * [`TRUE`, *`FALSE`*]
-- `edge.attributes`
-  * The list of edge-attribute names and information
-  * a subset of the following as a single vector:
-       - timestamp information: *`"date"`*
-       - author information: `"author.name"`, `"author.email"`
-       - e-mail information: *`"message.id"`*, *`"thread"`*, `"subject"`
-       - commit information: *`"hash"`*, *`"file"`*, *`"artifact.type"`*, *`"artifact"`*, `"changed.files"`, `"added.lines"`, `"deleted.lines"`, `"diff.size"`, `"artifact.diff.size"`, `"synchronicity"`
-       - PaStA information: `"pasta"`,
-       - issue information: *`"issue.id"`*, *`"event.name"`*, `"issue.state"`, `"creation.date"`, `"closing.date"`, `"is.pull.request"`
-  * **Note**: `"date"` is always included as this information is needed for several parts of the library, e.g., time-based splitting.
-  * **Note**: For each type of network that can be built, only the applicable part of the given vector of names is respected.
-  * **Note**: For the edge attributes `"pasta"` and `"synchronicty"`, the project configuration's parameters `pasta` and `synchronicity` need to be set to `TRUE`, respectively (see below).
-- `simplify`
-  * Perform edge contraction to retrieve a simplified network
-  * [`TRUE`, *`FALSE`*]
-- `skip.threshold`
-  * The upper bound for total amount of edges to build for a subset of the data, i.e., not building any edges for the subset exceeding the limit
-  * any positive integer
-  * **Example**: The amount of `mail`-based directed edges in an author network for one thread with 100 authors is 5049.
-    A value of 5000 for `skip.threshold` (as it is smaller than 5049) would lead to the omission of this thread from the network.
-- `unify.date.ranges`
-  * Cut the data sources to the largest start date and the smallest end date across all data sources
-  * **Note**: This parameter does not affect the original data object, but rather creates a clone.
-  * [`TRUE`, *`FALSE`*]
-
-The classes `ProjectData` and `RangeData` hold instances of  the `NetworkConf` class, just pass the object as parameter to the constructor.
-You can also update the object at any time, but as soon as you do so, all cached data of the data object are reset and have to be rebuilt.
-
-For more examples, please look in the file `test.R`.
-
 ## ProjectConf
 
 In this section, we give an overview on the parameters of the `ProjectConf` class and their meaning.
@@ -247,6 +186,67 @@ There is no way to update the entries, except for the revision-based parameters.
   * [`TRUE`, *`FALSE`*]
   * **Note**: To include PaStA-based edge attributes, you need to give the `"pasta"` edge attribute for `edge.attributes`.
 
+### NetworkConf
+
+In this section, we give an overview on the parameters of the `NetworkConf` class and their meaning.
+
+All parameters can be retrieved with the method `NetworkConf$get.variable(...)`, by passing one parameter name as method parameter.
+Updates to the parameters can be done by calling `NetworkConf$update.variables(...)` and passing a list of parameter names and their respective values.
+
+**Note**: Default values are shown in *italics*.
+
+- `author.relation`
+  * The relation among authors, encoded as edges in an author network
+  * **Note**: The  author--artifact relation in bipartite and multi networks is configured by `artifact.relation`!
+  * possible values: [*`"mail"`*, `"cochange"`, `"issue"`]
+- `author.directed`
+  * The (time-based) directedness of edges in an author network
+  * [`TRUE`, *`FALSE`*]
+- `author.all.authors`
+  * Denotes whether all available authors (from all analyses and data sources) shall be added to the network as a basis
+  * **Note**: Depending on the chosen author relation, there may be isolates then
+  * [`TRUE`, *`FALSE`*]
+- `author.only.committers`
+  * Remove all authors from an author network (including bipartite and multi networks) who are not present in an author network constructed with `artifact.relation` as relation, i.e., all authors that have no biparite relations in a bipartite/multi network are removed.
+  * [`TRUE`, *`FALSE`*]
+- `artifact.relation`
+  * The relation among artifacts, encoded as edges in an artifact network
+  * **Note**: This relation configures also the author--artifact relation in bipartite and multi networks!
+  * possible values: [*`"cochange"`*, `"callgraph"`, `"mail"`, `"issue"`]
+- `artifact.directed`
+  * The (time-based) directedness of edges in an artifact network
+  * **Note**: This parameter does not take effect for now, as the co-change relation is always undirected, while the call-graph relation is always directed.
+  * [`TRUE`, *`FALSE`*]
+- `edge.attributes`
+  * The list of edge-attribute names and information
+  * a subset of the following as a single vector:
+       - timestamp information: *`"date"`*
+       - author information: `"author.name"`, `"author.email"`
+       - e-mail information: *`"message.id"`*, *`"thread"`*, `"subject"`
+       - commit information: *`"hash"`*, *`"file"`*, *`"artifact.type"`*, *`"artifact"`*, `"changed.files"`, `"added.lines"`, `"deleted.lines"`, `"diff.size"`, `"artifact.diff.size"`, `"synchronicity"`
+       - PaStA information: `"pasta"`,
+       - issue information: *`"issue.id"`*, *`"event.name"`*, `"issue.state"`, `"creation.date"`, `"closing.date"`, `"is.pull.request"`
+  * **Note**: `"date"` is always included as this information is needed for several parts of the library, e.g., time-based splitting.
+  * **Note**: For each type of network that can be built, only the applicable part of the given vector of names is respected.
+  * **Note**: For the edge attributes `"pasta"` and `"synchronicty"`, the project configuration's parameters `pasta` and `synchronicity` need to be set to `TRUE`, respectively (see below).
+- `simplify`
+  * Perform edge contraction to retrieve a simplified network
+  * [`TRUE`, *`FALSE`*]
+- `skip.threshold`
+  * The upper bound for total amount of edges to build for a subset of the data, i.e., not building any edges for the subset exceeding the limit
+  * any positive integer
+  * **Example**: The amount of `mail`-based directed edges in an author network for one thread with 100 authors is 5049.
+    A value of 5000 for `skip.threshold` (as it is smaller than 5049) would lead to the omission of this thread from the network.
+- `unify.date.ranges`
+  * Cut the data sources to the largest start date and the smallest end date across all data sources
+  * **Note**: This parameter does not affect the original data object, but rather creates a clone.
+  * [`TRUE`, *`FALSE`*]
+
+The classes `ProjectData` and `RangeData` hold instances of  the `NetworkConf` class, just pass the object as parameter to the constructor.
+You can also update the object at any time, but as soon as you do so, all cached data of the data object are reset and have to be rebuilt.
+
+For more examples, please look in the file `test.R`.
+
 
 ## File overview
 
diff --git a/util-conf.R b/util-conf.R
index 3d50f61f..9ad2283d 100644
--- a/util-conf.R
+++ b/util-conf.R
@@ -299,6 +299,326 @@ Conf = R6::R6Class("Conf",
 )
 
 
+## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / /
+## ProjectConf -------------------------------------------------------------
+
+ProjectConf = R6::R6Class("ProjectConf", inherit = Conf,
+
+                          ## * private -----------------------------------------------------------
+
+                          private = list(
+
+                              ## * * project info ------------------------------------------------
+
+                              data = NULL, # character
+                              selection.process = NULL, # character
+                              casestudy = NULL, # character
+                              artifact = NULL, # character
+
+                              ## * * attributes ---------------------------------------------------
+
+                              attributes = list(
+                                  artifact.filter.base = list(
+                                      default = TRUE,
+                                      type = "logical",
+                                      allowed = c(TRUE, FALSE),
+                                      allowed.number = 1
+                                  ),
+                                  synchronicity = list(
+                                      default = FALSE,
+                                      type = "logical",
+                                      allowed = c(TRUE, FALSE),
+                                      allowed.number = 1
+                                  ),
+                                  synchronicity.time.window = list(
+                                      default = 5,
+                                      type = "numeric",
+                                      allowed = c(1, 5, 10, 15),
+                                      allowed.number = 1
+                                  ),
+                                  pasta = list(
+                                      default = FALSE,
+                                      type = "logical",
+                                      allowed = c(TRUE, FALSE),
+                                      allowed.number = 1
+                                  )
+                              ),
+
+                              ## * * revisions and ranges ----------------------------------------
+
+                              #' Change the revision names to a equal name standard.
+                              #'
+                              #' @param ranges the list of ranges to be postprocessed
+                              #'
+                              #' @return the postprocessed ranges
+                              postprocess.revision.list = function(ranges) {
+                                  # remove names ,e.g. "version", from release cycle names
+                                  casestudy = private$casestudy
+                                  to.remove = c(
+                                      "version-", "v-","version_", "v_","version", "v",
+                                      paste0(casestudy, "-"), paste0(casestudy,"-"),
+                                      paste0(casestudy, "_"), paste0(casestudy,"_"),
+                                      casestudy, casestudy
+                                  )
+
+                                  # run gsub for all pattern
+                                  ranges = tolower(ranges)
+                                  for (string in to.remove) {
+                                      ranges = gsub(string, "", ranges)
+                                  }
+
+                                  # return simplified list of ranges
+                                  return(ranges)
+                              },
+
+                              #' Change the revision names of callgraph data to a equal name standard.
+                              #'
+                              #' @param r list of revisions to be postprocessed
+                              #'
+                              #' @return list of postprocessed revisions
+                              postprocess.revision.list.for.callgraph.data = function(r) {
+                                  r = gsub("version-", "", r) # remove version prefix (SQLite)
+                                  r = gsub("OpenSSL_", "", r) # remove name prefix (OpenSSL)
+                                  r = gsub("\\.", "_", r) # replace dots by underscores
+                                  return(r)
+                              },
+
+                              ## * * path construction -------------------------------------------
+
+                              subfolder.configurations = "configurations",
+                              subfolder.results = "results",
+
+                              #' Construct and return the path to the configuration folder of Codeface.
+                              #'
+                              #' @param data the path to the codeface-data folder
+                              #' @param selection.process the selection process of the current study ('threemonth', 'releases')
+                              #'
+                              #' @return the path to the configuration folder
+                              get.configurations.folder = function(data, selection.process) {
+                                  return(file.path(data, private$subfolder.configurations, selection.process))
+
+                              },
+
+                              #' Construct and return the path to a Codeface configuration.
+                              #'
+                              #' @param data the path to the codeface-data folder
+                              #' @param selection.process the selection process of the current study ('threemonth', 'releases')
+                              #' @param casestudy the current casestudy
+                              #' @param tagging the current tagging ('feature', 'proximity')
+                              #'
+                              #' @return the path to the configuration
+                              construct.conf.path = function(data, selection.process, casestudy, tagging) {
+                                  ## construct the base name of the configuration
+                                  conf.basename = paste(casestudy, "_", tagging, ".conf", sep = "")
+                                  ## construct complete path
+                                  conf.file = file.path(private$get.configurations.folder(data, selection.process), conf.basename)
+                                  ## return path to config file
+                                  return(conf.file)
+                              },
+
+                              #' Construct and return the path to the results folder of Codeface.
+                              #'
+                              #' @param data the path to the codeface-data folder
+                              #' @param selection.process the selection process of the current study ('threemonth', 'releases')
+                              #' @param casestudy the current casestudy
+                              #' @param suffix the suffix of the casestudy's results folder
+                              #' @param subfolder an optional subfolder
+                              #'
+                              #' @return the path to the results folder
+                              #'         (i.e., "{data}/{selection.process}/{casestudy}_{suffix}[/{subfolder}]")
+                              get.results.folder = function(data, selection.process, casestudy, suffix, subfolder = NULL) {
+                                  path = file.path(data, private$subfolder.results, selection.process, paste(casestudy, suffix, sep = "_"))
+                                  if (!is.null(subfolder)) {
+                                      path = file.path(path, subfolder)
+                                  }
+                                  return(path)
+                              }
+
+                          ),
+
+                          ## * public ------------------------------------------------------------
+
+                          public = list(
+
+                              #' Constructor of the class.
+                              #'
+                              #' @param data the path to the codeface-data folder
+                              #' @param selection.process the selection process of the current study ('threemonth', 'releases')
+                              #' @param casestudy the current casestudy
+                              #' @param artifact the artifact to study ('feature','function','file')
+                              initialize = function(data, selection.process, casestudy, artifact = "feature") {
+                                  super$initialize()
+
+                                  if (!missing(data) && is.character(data)) {
+                                      private$data <- data
+                                  }
+                                  if (!missing(selection.process) && is.character(selection.process)) {
+                                      private$selection.process <- selection.process
+                                  }
+                                  if (!missing(casestudy) && is.character(casestudy)) {
+                                      private$casestudy <- casestudy
+                                  }
+                                  if (!missing(artifact) && is.character(artifact)) {
+                                      private$artifact <- artifact
+                                  }
+
+                                  logging::loginfo("Construct configuration: starting.")
+
+                                  ## convert artifact to tagging
+                                  tagging = ARTIFACT.TO.TAGGING[[ artifact ]]
+                                  if (is.null(tagging)) {
+                                      logging::logerror("Artifact '%s' cannot be converted to a proper Codeface tagging! Stopping...", artifact)
+                                      stop("Stopped due to wrong configuration parameters!")
+                                  }
+                                  ## construct file name for configuration
+                                  conf.file = private$construct.conf.path(data, selection.process, casestudy, tagging)
+
+                                  ## load case-study confuration from given file
+                                  logging::loginfo("Attempting to load configuration file: %s", conf.file)
+                                  conf = yaml::yaml.load_file(conf.file)
+
+                                  ## store basic information
+                                  conf$selection.process = selection.process
+                                  conf$casestudy = casestudy
+
+                                  ## store artifact in configuration
+                                  conf$artifact = artifact
+                                  conf$artifact.short = ARTIFACT.TO.ABBREVIATION[[ conf$artifact ]]
+                                  conf$artifact.codeface = ARTIFACT.CODEFACE[[ conf$artifact ]]
+                                  ## store path to actual Codeface data
+                                  conf$datapath = private$get.results.folder(data, selection.process, casestudy, tagging, subfolder = tagging)
+                                  ## store path to call graphs
+                                  conf$datapath.callgraph = private$get.results.folder(data, selection.process, casestudy, "callgraphs")
+                                  ## store path to synchronicity data
+                                  conf$datapath.synchronicity = private$get.results.folder(data, selection.process, casestudy, "synchronicity")
+                                  ## store path to pasta data
+                                  conf$datapath.pasta = private$get.results.folder(data, selection.process, casestudy, "pasta")
+                                  ## store path to issue data
+                                  conf$datapath.issues = private$get.results.folder(data, selection.process, casestudy, tagging, subfolder = tagging)
+
+                                  ## READ REVISIONS META-DATA
+
+                                  ## read revisions file
+                                  revisions.file = file.path(conf$datapath, "revisions.list")
+                                  revisions.df <- try(read.table(revisions.file, header = FALSE, sep = ";", strip.white = TRUE,
+                                                                 encoding = "UTF-8"), silent = TRUE)
+                                  ## break if the list of revisions is empty or any other error occurs
+                                  if (inherits(revisions.df, 'try-error')) {
+                                      logging::logerror("There are no revisions available for the current casestudy.")
+                                      logging::logerror("Attempted to load following file: %s", revisions.file)
+                                      stop("Stopped due to missing revisions.")
+                                  }
+                                  ## convert columns accordingly
+                                  revisions.cols = c(revision = "as.character", date = "as.POSIXct")
+                                  for (i in 1:ncol(revisions.df)) {
+                                      revisions.df[i] = do.call(c, lapply(revisions.df[[i]], revisions.cols[i]))
+                                      colnames(revisions.df)[i] = names(revisions.cols)[i]
+                                  }
+                                  revisions = revisions.df[["revision"]]
+                                  revisions.dates = revisions.df[["date"]]
+                                  if (!is.null(revisions.dates)) names(revisions.dates) = revisions
+                                  conf[["revisions"]] = NULL
+
+                                  ## change structure of values (i.e., insert 'default' sublists)
+                                  conf = lapply(conf, function(entry) {
+                                      return(list(value = entry, updatable = FALSE))
+                                  })
+
+                                  ## SAVE FULL CONFIGURATION OBJECT
+                                  private$attributes = c(conf, private$attributes)
+
+                                  ## construct and save revisions and ranges
+                                  ## (this has to be done after storing conf due to the needed access to the conf object)
+                                  self$set.revisions(revisions, revisions.dates)
+
+                                  # ## logging
+                                  # self$print(allowed = TRUE)
+
+                                  logging::loginfo("Construct configuration: finished.")
+                              },
+
+                              ## * * helper methods ----------------------------------------------
+
+                              #' Get the corresponding callgraph revision for the given range.
+                              #'
+                              #' @param range the range for the callgraph revisions
+                              #'
+                              #' @return the callgraph revisions
+                              get.callgraph.revision.from.range = function(range) {
+                                  idx = which(self$get.value("ranges") == range)
+                                  rev = self$get.value("revisions.callgraph")[idx + 1]
+                                  return(rev)
+                              },
+
+                              ## * * updating revisions and splitting information ----------------
+
+                              #' Set the revisions and ranges for the study.
+                              #'
+                              #' @param revisions the revisions of the study
+                              #' @param revisions.dates the revision dates of the study
+                              #' @param sliding.window whether sliding window splitting is enabled or not
+                              #'                       default: 'FALSE'
+                              set.revisions = function(revisions, revisions.dates, sliding.window = FALSE) {
+                                  ## construct revisions for call-graph data
+                                  revisions.callgraph = private$postprocess.revision.list.for.callgraph.data(revisions)
+
+                                  ## assemble revision data
+                                  rev.data = list(
+                                      revisions = revisions,
+                                      revisions.dates = revisions.dates,
+                                      revisions.callgraph = revisions.callgraph,
+                                      ranges = construct.ranges(revisions, sliding.window = sliding.window),
+                                      ranges.callgraph = construct.ranges(revisions.callgraph, sliding.window = sliding.window)
+                                  )
+                                  ## change structure of values (i.e., insert 'default' sublists and set 'updatable' value)
+                                  rev.data = lapply(rev.data, function(entry) {
+                                      return(list(value = entry, updatable = FALSE))
+                                  })
+
+                                  ## insert new values (update if needed)
+                                  for (name in names(rev.data)) {
+                                      private[["attributes"]][[name]] = rev.data[[name]]
+                                  }
+                              },
+
+                              #' Update the information on revisions and ranges regarding splitting.
+                              #'
+                              #' @param type either "time-based" or "activity-based", depending on splitting function
+                              #' @param length the string given to time-based splitting (e.g., "3 months") or the activity
+                              #'               amount given to acitivity-based splitting
+                              #' @param basis the data used as basis for splitting (either "commits", "mails", or "issues")
+                              #' @param sliding.window whether sliding window splitting is enabled or not [default: FALSE]
+                              #' @param revisions the revisions of the study
+                              #' @param revisions.dates the revision dates of the study
+                              set.splitting.info = function(type, length, basis, sliding.window, revisions, revisions.dates) {
+                                  ## assemble splitting information
+                                  split.info = list(
+                                      ## basic slpitting information
+                                      split.type = type,
+                                      split.length = length,
+                                      split.basis = basis,
+                                      split.sliding.window = sliding.window,
+                                      ## splitting information on ranges
+                                      split.revisions = revisions,
+                                      split.revisions.dates = revisions.dates,
+                                      split.ranges = construct.ranges(revisions, sliding.window = sliding.window)
+
+                                  )
+                                  ## change structure of values (i.e., insert 'default' sublists and set 'updatable' value)
+                                  split.info = lapply(split.info, function(entry) {
+                                      return(list(value = entry, updatable = FALSE))
+                                  })
+
+                                  ## insert new values (update if needed)
+                                  for (name in names(split.info)) {
+                                      private[["attributes"]][[name]] = split.info[[name]]
+                                  }
+                              }
+
+                          )
+)
+
+
 ## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / /
 ## NetworkConf -------------------------------------------------------------
 
@@ -425,326 +745,6 @@ NetworkConf = R6::R6Class("NetworkConf", inherit = Conf,
 )
 
 
-## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / /
-## ProjectConf -------------------------------------------------------------
-
-ProjectConf = R6::R6Class("ProjectConf", inherit = Conf,
-
-    ## * private -----------------------------------------------------------
-
-    private = list(
-
-        ## * * project info ------------------------------------------------
-
-        data = NULL, # character
-        selection.process = NULL, # character
-        casestudy = NULL, # character
-        artifact = NULL, # character
-
-        ## * * attributes ---------------------------------------------------
-
-        attributes = list(
-            artifact.filter.base = list(
-                default = TRUE,
-                type = "logical",
-                allowed = c(TRUE, FALSE),
-                allowed.number = 1
-            ),
-            synchronicity = list(
-                default = FALSE,
-                type = "logical",
-                allowed = c(TRUE, FALSE),
-                allowed.number = 1
-            ),
-            synchronicity.time.window = list(
-                default = 5,
-                type = "numeric",
-                allowed = c(1, 5, 10, 15),
-                allowed.number = 1
-            ),
-            pasta = list(
-                default = FALSE,
-                type = "logical",
-                allowed = c(TRUE, FALSE),
-                allowed.number = 1
-            )
-        ),
-
-        ## * * revisions and ranges ----------------------------------------
-
-        #' Change the revision names to a equal name standard.
-        #'
-        #' @param ranges the list of ranges to be postprocessed
-        #'
-        #' @return the postprocessed ranges
-        postprocess.revision.list = function(ranges) {
-            # remove names ,e.g. "version", from release cycle names
-            casestudy = private$casestudy
-            to.remove = c(
-                "version-", "v-","version_", "v_","version", "v",
-                paste0(casestudy, "-"), paste0(casestudy,"-"),
-                paste0(casestudy, "_"), paste0(casestudy,"_"),
-                casestudy, casestudy
-            )
-
-            # run gsub for all pattern
-            ranges = tolower(ranges)
-            for (string in to.remove) {
-                ranges = gsub(string, "", ranges)
-            }
-
-            # return simplified list of ranges
-            return(ranges)
-        },
-
-        #' Change the revision names of callgraph data to a equal name standard.
-        #'
-        #' @param r list of revisions to be postprocessed
-        #'
-        #' @return list of postprocessed revisions
-        postprocess.revision.list.for.callgraph.data = function(r) {
-            r = gsub("version-", "", r) # remove version prefix (SQLite)
-            r = gsub("OpenSSL_", "", r) # remove name prefix (OpenSSL)
-            r = gsub("\\.", "_", r) # replace dots by underscores
-            return(r)
-        },
-
-        ## * * path construction -------------------------------------------
-
-        subfolder.configurations = "configurations",
-        subfolder.results = "results",
-
-        #' Construct and return the path to the configuration folder of Codeface.
-        #'
-        #' @param data the path to the codeface-data folder
-        #' @param selection.process the selection process of the current study ('threemonth', 'releases')
-        #'
-        #' @return the path to the configuration folder
-        get.configurations.folder = function(data, selection.process) {
-            return(file.path(data, private$subfolder.configurations, selection.process))
-
-        },
-
-        #' Construct and return the path to a Codeface configuration.
-        #'
-        #' @param data the path to the codeface-data folder
-        #' @param selection.process the selection process of the current study ('threemonth', 'releases')
-        #' @param casestudy the current casestudy
-        #' @param tagging the current tagging ('feature', 'proximity')
-        #'
-        #' @return the path to the configuration
-        construct.conf.path = function(data, selection.process, casestudy, tagging) {
-            ## construct the base name of the configuration
-            conf.basename = paste(casestudy, "_", tagging, ".conf", sep = "")
-            ## construct complete path
-            conf.file = file.path(private$get.configurations.folder(data, selection.process), conf.basename)
-            ## return path to config file
-            return(conf.file)
-        },
-
-        #' Construct and return the path to the results folder of Codeface.
-        #'
-        #' @param data the path to the codeface-data folder
-        #' @param selection.process the selection process of the current study ('threemonth', 'releases')
-        #' @param casestudy the current casestudy
-        #' @param suffix the suffix of the casestudy's results folder
-        #' @param subfolder an optional subfolder
-        #'
-        #' @return the path to the results folder
-        #'         (i.e., "{data}/{selection.process}/{casestudy}_{suffix}[/{subfolder}]")
-        get.results.folder = function(data, selection.process, casestudy, suffix, subfolder = NULL) {
-            path = file.path(data, private$subfolder.results, selection.process, paste(casestudy, suffix, sep = "_"))
-            if (!is.null(subfolder)) {
-                path = file.path(path, subfolder)
-            }
-            return(path)
-        }
-
-    ),
-
-    ## * public ------------------------------------------------------------
-
-    public = list(
-
-        #' Constructor of the class.
-        #'
-        #' @param data the path to the codeface-data folder
-        #' @param selection.process the selection process of the current study ('threemonth', 'releases')
-        #' @param casestudy the current casestudy
-        #' @param artifact the artifact to study ('feature','function','file')
-        initialize = function(data, selection.process, casestudy, artifact = "feature") {
-            super$initialize()
-
-            if (!missing(data) && is.character(data)) {
-                private$data <- data
-            }
-            if (!missing(selection.process) && is.character(selection.process)) {
-                private$selection.process <- selection.process
-            }
-            if (!missing(casestudy) && is.character(casestudy)) {
-                private$casestudy <- casestudy
-            }
-            if (!missing(artifact) && is.character(artifact)) {
-                private$artifact <- artifact
-            }
-
-            logging::loginfo("Construct configuration: starting.")
-
-            ## convert artifact to tagging
-            tagging = ARTIFACT.TO.TAGGING[[ artifact ]]
-            if (is.null(tagging)) {
-                logging::logerror("Artifact '%s' cannot be converted to a proper Codeface tagging! Stopping...", artifact)
-                stop("Stopped due to wrong configuration parameters!")
-            }
-            ## construct file name for configuration
-            conf.file = private$construct.conf.path(data, selection.process, casestudy, tagging)
-
-            ## load case-study confuration from given file
-            logging::loginfo("Attempting to load configuration file: %s", conf.file)
-            conf = yaml::yaml.load_file(conf.file)
-
-            ## store basic information
-            conf$selection.process = selection.process
-            conf$casestudy = casestudy
-
-            ## store artifact in configuration
-            conf$artifact = artifact
-            conf$artifact.short = ARTIFACT.TO.ABBREVIATION[[ conf$artifact ]]
-            conf$artifact.codeface = ARTIFACT.CODEFACE[[ conf$artifact ]]
-            ## store path to actual Codeface data
-            conf$datapath = private$get.results.folder(data, selection.process, casestudy, tagging, subfolder = tagging)
-            ## store path to call graphs
-            conf$datapath.callgraph = private$get.results.folder(data, selection.process, casestudy, "callgraphs")
-            ## store path to synchronicity data
-            conf$datapath.synchronicity = private$get.results.folder(data, selection.process, casestudy, "synchronicity")
-            ## store path to pasta data
-            conf$datapath.pasta = private$get.results.folder(data, selection.process, casestudy, "pasta")
-            ## store path to issue data
-            conf$datapath.issues = private$get.results.folder(data, selection.process, casestudy, tagging, subfolder = tagging)
-
-            ## READ REVISIONS META-DATA
-
-            ## read revisions file
-            revisions.file = file.path(conf$datapath, "revisions.list")
-            revisions.df <- try(read.table(revisions.file, header = FALSE, sep = ";", strip.white = TRUE,
-                                           encoding = "UTF-8"), silent = TRUE)
-            ## break if the list of revisions is empty or any other error occurs
-            if (inherits(revisions.df, 'try-error')) {
-                logging::logerror("There are no revisions available for the current casestudy.")
-                logging::logerror("Attempted to load following file: %s", revisions.file)
-                stop("Stopped due to missing revisions.")
-            }
-            ## convert columns accordingly
-            revisions.cols = c(revision = "as.character", date = "as.POSIXct")
-            for (i in 1:ncol(revisions.df)) {
-                revisions.df[i] = do.call(c, lapply(revisions.df[[i]], revisions.cols[i]))
-                colnames(revisions.df)[i] = names(revisions.cols)[i]
-            }
-            revisions = revisions.df[["revision"]]
-            revisions.dates = revisions.df[["date"]]
-            if (!is.null(revisions.dates)) names(revisions.dates) = revisions
-            conf[["revisions"]] = NULL
-
-            ## change structure of values (i.e., insert 'default' sublists)
-            conf = lapply(conf, function(entry) {
-                return(list(value = entry, updatable = FALSE))
-            })
-
-            ## SAVE FULL CONFIGURATION OBJECT
-            private$attributes = c(conf, private$attributes)
-
-            ## construct and save revisions and ranges
-            ## (this has to be done after storing conf due to the needed access to the conf object)
-            self$set.revisions(revisions, revisions.dates)
-
-            # ## logging
-            # self$print(allowed = TRUE)
-
-            logging::loginfo("Construct configuration: finished.")
-        },
-
-        ## * * helper methods ----------------------------------------------
-
-        #' Get the corresponding callgraph revision for the given range.
-        #'
-        #' @param range the range for the callgraph revisions
-        #'
-        #' @return the callgraph revisions
-        get.callgraph.revision.from.range = function(range) {
-            idx = which(self$get.value("ranges") == range)
-            rev = self$get.value("revisions.callgraph")[idx + 1]
-            return(rev)
-        },
-
-        ## * * updating revisions and splitting information ----------------
-
-        #' Set the revisions and ranges for the study.
-        #'
-        #' @param revisions the revisions of the study
-        #' @param revisions.dates the revision dates of the study
-        #' @param sliding.window whether sliding window splitting is enabled or not
-        #'                       default: 'FALSE'
-        set.revisions = function(revisions, revisions.dates, sliding.window = FALSE) {
-            ## construct revisions for call-graph data
-            revisions.callgraph = private$postprocess.revision.list.for.callgraph.data(revisions)
-
-            ## assemble revision data
-            rev.data = list(
-                revisions = revisions,
-                revisions.dates = revisions.dates,
-                revisions.callgraph = revisions.callgraph,
-                ranges = construct.ranges(revisions, sliding.window = sliding.window),
-                ranges.callgraph = construct.ranges(revisions.callgraph, sliding.window = sliding.window)
-            )
-            ## change structure of values (i.e., insert 'default' sublists and set 'updatable' value)
-            rev.data = lapply(rev.data, function(entry) {
-                return(list(value = entry, updatable = FALSE))
-            })
-
-            ## insert new values (update if needed)
-            for (name in names(rev.data)) {
-                private[["attributes"]][[name]] = rev.data[[name]]
-            }
-        },
-
-        #' Update the information on revisions and ranges regarding splitting.
-        #'
-        #' @param type either "time-based" or "activity-based", depending on splitting function
-        #' @param length the string given to time-based splitting (e.g., "3 months") or the activity
-        #'               amount given to acitivity-based splitting
-        #' @param basis the data used as basis for splitting (either "commits", "mails", or "issues")
-        #' @param sliding.window whether sliding window splitting is enabled or not [default: FALSE]
-        #' @param revisions the revisions of the study
-        #' @param revisions.dates the revision dates of the study
-        set.splitting.info = function(type, length, basis, sliding.window, revisions, revisions.dates) {
-            ## assemble splitting information
-            split.info = list(
-                ## basic slpitting information
-                split.type = type,
-                split.length = length,
-                split.basis = basis,
-                split.sliding.window = sliding.window,
-                ## splitting information on ranges
-                split.revisions = revisions,
-                split.revisions.dates = revisions.dates,
-                split.ranges = construct.ranges(revisions, sliding.window = sliding.window)
-
-            )
-            ## change structure of values (i.e., insert 'default' sublists and set 'updatable' value)
-            split.info = lapply(split.info, function(entry) {
-                return(list(value = entry, updatable = FALSE))
-            })
-
-            ## insert new values (update if needed)
-            for (name in names(split.info)) {
-                private[["attributes"]][[name]] = split.info[[name]]
-            }
-        }
-
-    )
-)
-
-
 ## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / /
 ## Helper functions --------------------------------------------------------
 

From 61fc7717334422e44ce6f1b5d15e9d17cc587bc3 Mon Sep 17 00:00:00 2001
From: Claus Hunsen <hunsen@fim.uni-passau.de>
Date: Tue, 12 Dec 2017 18:44:28 +0100
Subject: [PATCH 36/40] Move PaStA method in data class

This is just a code movement of the method 'get.pasta.items' for better
structure in the 'ProjectData' class.

Signed-off-by: Claus Hunsen <hunsen@fim.uni-passau.de>
---
 util-data.R | 60 ++++++++++++++++++++++++++---------------------------
 1 file changed, 30 insertions(+), 30 deletions(-)

diff --git a/util-data.R b/util-data.R
index 9d0b6a6e..2c451900 100644
--- a/util-data.R
+++ b/util-data.R
@@ -581,6 +581,36 @@ ProjectData = R6::R6Class("ProjectData",
             return(private$artifacts)
         },
 
+        #' Get single pasta items.
+        #' For a given 'message.id', the associated 'commit.hash' is returned.
+        #' For a given 'commit.hash', the associated 'message.id' or IDs are returned.
+        #'
+        #' @param message.id the message ID to get the corresponding commit hash
+        #' @param commit.hash the commit hash to get the corresponding message ID
+        #'
+        #' @return the selected pasta data
+        get.pasta.items = function(message.id = NULL, commit.hash = NULL) {
+            logging::loginfo("Getting pasta items started.")
+            #if neither message.id nor commit.hash are specified break the code
+            if(is.null(message.id) && is.null(commit.hash)) {
+                logging::logwarn("Neither message.id nor commit.hash specified.")
+                return()
+            }
+
+            ## get pasta data
+            self$get.pasta()
+
+            ## if a message.id is given just return the attached list of commit hashes
+            ## else gather all message.ids which contain the given commit.hash and return them
+            if(!is.null(message.id)) {
+                result = private$pasta[private$pasta[["message.id"]] == message.id, "commit.hash"]
+                return(result)
+            } else {
+                result = private$pasta[private$pasta[["commit.hash"]] == commit.hash, "message.id"]
+                return(result)
+            }
+        },
+
         ## * * data cutting -----------------------------------------
 
         #' Get the timestamps (earliest and latest date) of the specified data sources.
@@ -644,36 +674,6 @@ ProjectData = R6::R6Class("ProjectData",
             return(result)
         },
 
-        #' Get single pasta items.
-        #' For a given 'message.id', the associated 'commit.hash' is returned.
-        #' For a given 'commit.hash', the associated 'message.id' or IDs are returned.
-        #'
-        #' @param message.id the message ID to get the corresponding commit hash
-        #' @param commit.hash the commit hash to get the corresponding message ID
-        #'
-        #' @return the selected pasta data
-        get.pasta.items = function(message.id = NULL, commit.hash = NULL) {
-            logging::loginfo("Getting pasta items started.")
-            #if neither message.id nor commit.hash are specified break the code
-            if(is.null(message.id) && is.null(commit.hash)) {
-                logging::logwarn("Neither message.id nor commit.hash specified.")
-                return()
-            }
-
-            ## get pasta data
-            self$get.pasta()
-
-            ## if a message.id is given just return the attached list of commit hashes
-            ## else gather all message.ids which contain the given commit.hash and return them
-            if(!is.null(message.id)) {
-                result = private$pasta[private$pasta[["message.id"]] == message.id, "commit.hash"]
-                return(result)
-            } else {
-                result = private$pasta[private$pasta[["commit.hash"]] == commit.hash, "message.id"]
-                return(result)
-            }
-        },
-
         ## * * processed data ----------------------------------------------
 
         #' Map the corresponding authors to each artifact and return the list.

From a67ea247855bfccd7e8338085a53b481be347cfe Mon Sep 17 00:00:00 2001
From: Claus Hunsen <hunsen@fim.uni-passau.de>
Date: Wed, 13 Dec 2017 11:14:17 +0100
Subject: [PATCH 37/40] Update return values for metrics

In the metrics module, single-value returns are now named vectors. The
name is the metrics name as used in the respective function definition.

Additionally, the column names for the scale-freeness metric is changed
to not include 'res.' at each column name's beginning.

Signed-off-by: Claus Hunsen <hunsen@fim.uni-passau.de>
---
 util-networks-metrics.R | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/util-networks-metrics.R b/util-networks-metrics.R
index 2d62562a..b6b4e248 100644
--- a/util-networks-metrics.R
+++ b/util-networks-metrics.R
@@ -37,7 +37,7 @@ metrics.avg.degree = function(network, mode = c("total", "in", "out")) {
     mode = match.arg(mode)
     degrees = igraph::degree(network, mode = c(mode))
     avg = mean(degrees)
-    return(avg)
+    return(c(avg.degree = avg))
 }
 
 #' Calculate all node degrees for the given network
@@ -64,7 +64,7 @@ metrics.node.degrees = function(network, sort = TRUE, sort.decreasing = TRUE) {
 #' @return The density of the network.
 metrics.density = function(network) {
     density = igraph::graph.density(network)
-    return(density)
+    return(c(density = density))
 }
 
 #' Calculate the average path length for the given network.
@@ -76,7 +76,7 @@ metrics.density = function(network) {
 #' @return The average pathlength of the given network.
 metrics.avg.pathlength = function(network, directed, unconnected) {
     avg.pathlength = igraph::average.path.length(network, directed = directed, unconnected = unconnected)
-    return(avg.pathlength)
+    return(c(avg.pathlength = avg.pathlength))
 }
 
 #' Calculate the average local clustering coefficient for the given network.
@@ -88,7 +88,7 @@ metrics.avg.pathlength = function(network, directed, unconnected) {
 metrics.clustering.coeff = function(network, cc.type = c("global", "local", "barrat", "localaverage")) {
     cc.type = match.arg(cc.type)
     cc = igraph::transitivity(network, type = cc.type, vids = NULL)
-    return(cc)
+    return(c(clustering = cc))
 }
 
 #' Calculate the modularity metric for the given network.
@@ -101,7 +101,7 @@ metrics.clustering.coeff = function(network, cc.type = c("global", "local", "bar
 metrics.modularity = function(network, community.detection.algorithm = igraph::cluster_walktrap) {
     comm = community.detection.algorithm(network)
     mod = igraph::modularity(network, igraph::membership(comm))
-    return("modularity" = mod)
+    return(c(modularity = mod))
 }
 
 #' This function determines whether a network can be considered a
@@ -147,7 +147,7 @@ metrics.smallworldness = function(network) {
 
     ## if s.delta > 1, then the network is a small-world network
     # is.smallworld = s.delta > 1
-    return ("smallworldness" = s.delta)
+    return (c(smallworldness = s.delta))
 }
 
 #' Determine scale freeness of a network using the power law fitting method.
@@ -168,7 +168,7 @@ metrics.scale.freeness = function(network) {
     ## Check percent of vertices under power-law
     res$num.power.law = length(which(v.degree >= res$xmin))
     res$percent.power.law = 100 * (res$num.power.law / length(v.degree))
-    df = data.frame(res$alpha, res$xmin, res$KS.p, res$num.power.law, res$percent.power.law)
+    df = as.data.frame(res, row.names = "scale.freeness")
     return(df)
 }
 

From 830d047471cf9fc008ae0431fdd36985739d9f17 Mon Sep 17 00:00:00 2001
From: Claus Hunsen <hunsen@fim.uni-passau.de>
Date: Wed, 13 Dec 2017 15:12:38 +0100
Subject: [PATCH 38/40] Update README file

- Add missing files and their respective descriptions.
- Update how-to section's code snippet.
- Fix some typos.
- Fix the indentation of the 'ProjectConf' sections.
- Add proper links to intra-document sections.
- Add syntax highlighting for multi-line code snippets.

Props to @bockthom for mentioning some of the points in PR #78.

Signed-off-by: Claus Hunsen <hunsen@fim.uni-passau.de>
---
 README.md | 67 ++++++++++++++++++++++++++++++++-----------------------
 1 file changed, 39 insertions(+), 28 deletions(-)

diff --git a/README.md b/README.md
index 11d9eb82..2081dca0 100644
--- a/README.md
+++ b/README.md
@@ -8,13 +8,13 @@ The network library `codeface-extraction-r` can be used to construct analyzable
 ### Submodule
 
 Please insert the project into yours by use of [git submodules](https://git-scm.com/book/en/v2/Git-Tools-Submodules).
-Furthermore, the file `install.R` installs all needed R packages (see below) into your R library.
+Furthermore, the file `install.R` installs all needed R packages (see [below](#needed-r-packages)) into your R library.
 Although, the use of of [packrat](https://rstudio.github.io/packrat/) with your project is recommended.
 
 This library is written in a way to not interfere with the loading order of your project's `R` packages (i.e., `library()` calls), so that the library does not lead to masked definitions.
 
 To initialize the library in your project, you need to source all files of the library in your project using the following command:
-```
+```R
 source("path/to/util-init.R", chdir = TRUE)
 ```
 It may lead to unpredictable behavior, when you do not do this, as we need to set some system and environment variables to ensure correct behavior of all functionality (e.g., parsing timestamps in the correct timezone and reading files from disk using the correct encoding).
@@ -40,7 +40,7 @@ It may lead to unpredictable behavior, when you do not do this, as we need to se
 In this section, we give a short example on how to initialize all needed objects and build a bipartite network.
 For more examples, please see the file `test.R`.
 
-```
+```R
 CF.DATA = "/path/to/codeface-data" # path to codeface data
 
 CF.SELECTION.PROCESS = "threemonth" # releases, threemonth(, testing)
@@ -57,39 +57,44 @@ net.conf = NetworkConf$new()
 
 ## update the values of the NetworkConf object to the specific needs
 net.conf$update.values(list(author.relation = AUTHOR.RELATION,
-                            artifact.relation = ARTIFACT.RELATION))
+                            artifact.relation = ARTIFACT.RELATION,
+                            simplify = TRUE))
 
 ## get ranges information from project configuration
-ranges = proj.conf$get.entry(entry.name = "ranges")
+ranges = proj.conf$get.entry("ranges")
 
 ## create data object which actually holds and handles data
-cf.data = ProjectData$new(proj.conf, net.conf)
+data = ProjectData$new(proj.conf)
+
+## create network builder to construct networks from the given data object
+netbuilder = NetworkBuilder$new(data, net.conf)
 
 ## create and get the bipartite network
 ## (construction configured by net.conf's "artifact.relation")
-bpn = cf.data$get.bipartite.network()
+bpn = netbuilder$get.bipartite.network()
 
 ## plot the retrieved network
-plot.bipartite.network(bpn)
+plot.network(bpn)
+
 ```
 
 There are two different classes of configuration objects in this library:
-- the `ProjectConf` class, which determines all configuration parameters needed for the configured project (mainly data paths) and
-- the `NetworkConf` class, which is used for all configuration parameters concerning data retrieval and network construction.
+- the `ProjectConf` class which determines all configuration parameters needed for the configured project (mainly data paths) and
+- the `NetworkConf` class which is used for all configuration parameters concerning data retrieval and network construction.
 
 You can find an overview on all the parameters in these classes below in this file.
 For examples on how to use both classes and how to build networks with them, please look in the file `test.R`.
 
 ## Configuration Classes
 
-## ProjectConf
+### ProjectConf
 
 In this section, we give an overview on the parameters of the `ProjectConf` class and their meaning.
 
 All parameters can be retrieved with the method `ProjectConf$get.entry(...)`, by passing one parameter name as method parameter.
 There is no way to update the entries, except for the revision-based parameters.
 
-### Basic Information
+#### Basic Information
 
 - `project`
   * The project name from the Codeface analysis
@@ -103,7 +108,7 @@ There is no way to update the entries, except for the revision-based parameters.
 - `mailinglists`
   * A list of the mailinglists of the project containing their name, type and source
 
-### Artifact-Related Information
+#### Artifact-Related Information
 
 - `artifact`
   * The artifact of the project used for all data retrievals
@@ -117,9 +122,9 @@ There is no way to update the entries, except for the revision-based parameters.
   * The Codeface tagging parameter for the project, based on the `artifact` parameter
   * Either `"proximity"` or `"feature"`
 
-### Revision-Related Information
+#### Revision-Related Information
 
-**Note**: This data is updated after performing a data-based splitting (i.e., by calling the functions `split.data.*`).
+**Note**: This data is updated after performing a data-based splitting (i.e., by calling the functions `split.data.*(...)`).
 **Note**: These parameters can be updated using the method `ProjectConf$set.splitting.info()`, but you should *not* do that manually!
 
 - `revisions`
@@ -134,7 +139,7 @@ There is no way to update the entries, except for the revision-based parameters.
 - `ranges.callgraph`
   * The revision ranges based on the list `revisions.callgraph`
 
-### Data Paths
+#### Data Paths
 
 - `datapath`
   * The data path to the Codeface results folder of this project
@@ -145,9 +150,9 @@ There is no way to update the entries, except for the revision-based parameters.
 - `datapath.pasta`
   * The data path to the pasta data
 
-### Splitting Information
+#### Splitting Information
 
-**Note**: This data is added to the `ProjectConf` object only after performing a data-based splitting (by calling the functions `split.data.*`).
+**Note**: This data is added to the `ProjectConf` object only after performing a data-based splitting (by calling the  functions `split.data.*(...)`).
 **Note**: These parameters can be updated using the method `ProjectConf$set.splitting.info()`, but you should *not* do that manually!
 
 - `split.type`
@@ -165,13 +170,13 @@ There is no way to update the entries, except for the revision-based parameters.
 - `split.ranges`
   * The ranges constructed from `split.revisions` (either in sliding-window manner or not, depending on `split.sliding.window`)
 
-### Data-Retrieval-Related Parameters (Configurable!)
+#### (Configurable) Data-Retrieval-Related Parameters
 
 **Note**: These parameters can be configured using the method `ProjectConf$update.values()`.
 
 - `artifact.filter.base`
   - Remove all artifact information regarding the base artifact
-    (`Base_Feature` or `File_Level` for features and functions, respectively, as artifacts)
+    (`"Base_Feature"` or `"File_Level"` for features and functions, respectively, as artifacts)
   - [*`TRUE`*, `FALSE`]
 - `synchronicity`
   * Read and add synchronicity data to commits and co-change-based networks
@@ -228,7 +233,7 @@ Updates to the parameters can be done by calling `NetworkConf$update.variables(.
        - issue information: *`"issue.id"`*, *`"event.name"`*, `"issue.state"`, `"creation.date"`, `"closing.date"`, `"is.pull.request"`
   * **Note**: `"date"` is always included as this information is needed for several parts of the library, e.g., time-based splitting.
   * **Note**: For each type of network that can be built, only the applicable part of the given vector of names is respected.
-  * **Note**: For the edge attributes `"pasta"` and `"synchronicty"`, the project configuration's parameters `pasta` and `synchronicity` need to be set to `TRUE`, respectively (see below).
+  * **Note**: For the edge attributes `"pasta"` and `"synchronicity"`, the project configuration's parameters `pasta` and `synchronicity` need to be set to `TRUE`, respectively (see below).
 - `simplify`
   * Perform edge contraction to retrieve a simplified network
   * [`TRUE`, *`FALSE`*]
@@ -250,26 +255,32 @@ For more examples, please look in the file `test.R`.
 
 ## File overview
 
+- `util-init.R`
+  * Initialization file that can be used by other analysis projects (see Section [*Submodule*](#submodule))
 - `util-conf.R`
   * The configuration classes of the project
+- `util-read.R`
+  * Functionality to read data file from disk
 - `util-data.R`
   * All representations of the data classes
-- `util-plot.R`
-  * Everything needed for plotting networks
-- `util-misc.R`
-  * Helper functions and also legacy functions, both needed in the other files
+- `util-networks.R`
+  * The `NetworkBuilder` class and all corresponding helper functions to construct networks
 - `util-split.R`
   * Splitting functionality for data objects and networks (time-based and activity-based, using arbitrary ranges)
 - `util-motifs.R`
   * Functionality for the identifaction of network motifs (subgraph patterns)
 - `util-bulk.R`
   * Collection functionality for the different network types (using Codeface revision ranges)
+- `util-plot.R`
+  * Everything needed for plotting networks
 - `util-core-peripheral.R`
   * Author classification (core and peripheral) and related functions
-- `util-init.R`
-  * Initialization file that can be used by other analysis projects (see Section *Submodule*)
+- `util-networks-metrics.R`
+  * A set of network-metric functions
+- `util-misc.R`
+  * Helper functions and also legacy functions, both needed in the other files
 - `test.R`
-  * Showcase file (see Section *How-To*)
+  * Showcase file (see Section also [*How-To*](#how-to))
 - `tests.R`
   * Test suite (running all tests in `tests/` subfolder)
 

From 389a63ba4f43d74020f01478c04a88eda1b6b344 Mon Sep 17 00:00:00 2001
From: Claus Hunsen <hunsen@fim.uni-passau.de>
Date: Wed, 13 Dec 2017 15:14:38 +0100
Subject: [PATCH 39/40] Fix and update function documentation in reading module

Fix typos in the roxygen documentation of some of the reading functions.
Remove wrong documentation for the issue-reading function.

Props to @bockthom for pointing this out.

Signed-off-by: Claus Hunsen <hunsen@fim.uni-passau.de>
---
 util-read.R | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/util-read.R b/util-read.R
index 50b646b5..e1b392a6 100644
--- a/util-read.R
+++ b/util-read.R
@@ -21,7 +21,7 @@ requireNamespace("digest") # for sha1 hashing of IDs
 #' Read the commits from the 'commits.list' file.
 #'
 #' @param data.path the path to the commit list
-#' @param artifact the artifact whichs commits are read
+#' @param artifact the artifact whose commits are read
 #'
 #' @return the read commits
 read.commits = function(data.path, artifact) {
@@ -98,7 +98,9 @@ read.commits = function(data.path, artifact) {
 #' Read the commits from the 'commits.list' file.
 #'
 #' @param data.path the path to the commit list
-#' @param artifact the artifact whichs commits are read
+#' @param artifact the artifact whose commits are read
+#'
+#' Note: This is just a delegate for \code{read.commits(data.path, artifact)}.
 #'
 #' @return the read commits
 read.commits.raw = function(data.path, artifact) {
@@ -114,7 +116,7 @@ read.commits.raw = function(data.path, artifact) {
 #' where artifact and time.window are the given variables.
 #'
 #' @param data.path the path to the synchronicity data
-#' @param artifact the artifact whichs synchronicity data get read
+#' @param artifact the artifact whose synchronicity data get read
 #' @param time.window the time window of the data to be read
 #'
 #' @return the read synchronicity data
@@ -320,11 +322,10 @@ read.pasta = function(data.path) {
 ## Issue data --------------------------------------------------------------
 
 #' Read and parse the issue data from the 'issues.list' file.
-#' The parsed format is a data frame with message IDs as keys and commit hashes as values.
 #'
-#' @param data.path the path to the pasta data
+#' @param data.path the path to the issue data
 #'
-#' @return the read and parsed pasta data
+#' @return the read and parsed issue data
 read.issues = function(data.path) {
     logging::logdebug("read.issues: starting.")
 

From caa38c1a28a4f12b7e59792770e8413ef17068c2 Mon Sep 17 00:00:00 2001
From: Claus Hunsen <hunsen@fim.uni-passau.de>
Date: Wed, 13 Dec 2017 15:17:53 +0100
Subject: [PATCH 40/40] Rename showcase file to 'showcase.R'

For better comprehensibility, the showcase file is renamed to
'showcase.R', as the name 'test.R' was misleading regarding the tests.

Signed-off-by: Claus Hunsen <hunsen@fim.uni-passau.de>
---
 README.md            | 8 ++++----
 test.R => showcase.R | 0
 2 files changed, 4 insertions(+), 4 deletions(-)
 rename test.R => showcase.R (100%)

diff --git a/README.md b/README.md
index 2081dca0..25a8f64e 100644
--- a/README.md
+++ b/README.md
@@ -38,7 +38,7 @@ It may lead to unpredictable behavior, when you do not do this, as we need to se
 ## How-To
 
 In this section, we give a short example on how to initialize all needed objects and build a bipartite network.
-For more examples, please see the file `test.R`.
+For more examples, please see the file `showcase.R`.
 
 ```R
 CF.DATA = "/path/to/codeface-data" # path to codeface data
@@ -83,7 +83,7 @@ There are two different classes of configuration objects in this library:
 - the `NetworkConf` class which is used for all configuration parameters concerning data retrieval and network construction.
 
 You can find an overview on all the parameters in these classes below in this file.
-For examples on how to use both classes and how to build networks with them, please look in the file `test.R`.
+For examples on how to use both classes and how to build networks with them, please look in the file `showcase.R`.
 
 ## Configuration Classes
 
@@ -250,7 +250,7 @@ Updates to the parameters can be done by calling `NetworkConf$update.variables(.
 The classes `ProjectData` and `RangeData` hold instances of  the `NetworkConf` class, just pass the object as parameter to the constructor.
 You can also update the object at any time, but as soon as you do so, all cached data of the data object are reset and have to be rebuilt.
 
-For more examples, please look in the file `test.R`.
+For more examples, please look in the file `showcase.R`.
 
 
 ## File overview
@@ -279,7 +279,7 @@ For more examples, please look in the file `test.R`.
   * A set of network-metric functions
 - `util-misc.R`
   * Helper functions and also legacy functions, both needed in the other files
-- `test.R`
+- `showcase.R`
   * Showcase file (see Section also [*How-To*](#how-to))
 - `tests.R`
   * Test suite (running all tests in `tests/` subfolder)
diff --git a/test.R b/showcase.R
similarity index 100%
rename from test.R
rename to showcase.R