Skip to content

Commit

Permalink
Apply Review from @bockthom and @clhunsen
Browse files Browse the repository at this point in the history
Apply the Review from @bockthom and @clhunsen on the previous changes.
This includes compliance of coding conventions, update of copyright
headers and improvement of documentation. Move the functions for
'get.author.names.from.networks' and 'get.expanded.adjacency' to new
file 'util-networks-misc.R'. Also add two functions
'get.author.names.from.data' and
'convert.adjacency.matrix.list.to.array' from the 'dev-network-growth'
project to the new file.

Signed-off-by: fehnkera <[email protected]>
  • Loading branch information
fehnkera committed Sep 19, 2020
1 parent e178cf2 commit b6b8398
Show file tree
Hide file tree
Showing 5 changed files with 339 additions and 206 deletions.
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ Alternatively, you can run `Rscript install.R` to install the packages.
- `viridis`: For plotting of networks with nice colors
- `jsonlite`: For parsing the issue data
- `rTensor`: For calculating EDCPTD centrality
- `Matrix`: For sparse matrix representation of large adjacency matrices

### Submodule

Expand Down Expand Up @@ -418,6 +419,10 @@ Additionally, for more examples, the file `showcase.R` is worth a look.
* Everything needed for plotting networks
- `util-misc.R`
* Helper functions and also legacy functions, both needed in the other files
- `util-networks-miscs.R`
* Helper functions for network creation (e.g., create adjacency matrices)
- `util-tensor.R`
* Functionality to build fourth-order tensors
- `showcase.R`
* Showcase file (see also Section [*How-To*](#how-to))
- `tests.R`
Expand Down
8 changes: 4 additions & 4 deletions showcase.R
Original file line number Diff line number Diff line change
Expand Up @@ -118,13 +118,13 @@ x = NetworkBuilder$new(project.data = x.data, network.conf = net.conf)
## Calculate EDCPTD centrality ---------------------------------------------

## get author networks for each relation
author.networks = get.author.networks(x, c("cochange", "mail", "issue"))
author.networks = get.author.networks.for.multiple.relations(x, c("cochange", "mail", "issue"))

## create forth-order tensor
forth.order.tensor = ForthOrderTensor$new(author.networks)
## create fourth-order tensor
fourth.order.tensor = FourthOrderTensor$new(author.networks)

## calculate EDCPTD scores
edcptd.scores = calculate.EDCPTD.centrality(forth.order.tensor)
edcptd.scores = calculate.EDCPTD.centrality(fourth.order.tensor)

## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / /
## Range-level data --------------------------------------------------------
Expand Down
1 change: 1 addition & 0 deletions util-init.R
Original file line number Diff line number Diff line change
Expand Up @@ -60,4 +60,5 @@ source("util-plot.R")
source("util-core-peripheral.R")
source("util-networks-metrics.R")
source("util-networks-covariates.R")
source("util-networks-misc.R")
source("util-tensor.R")
203 changes: 203 additions & 0 deletions util-networks-misc.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,203 @@
## This file is part of coronet, which is free software: you
## can redistribute it and/or modify it under the terms of the GNU General
## Public License as published by the Free Software Foundation, version 2.
##
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
## GNU General Public License for more details.
##
## You should have received a copy of the GNU General Public License along
## with this program; if not, write to the Free Software Foundation, Inc.,
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
##
## Copyright 2016 by Sofie Kemper <[email protected]>
## Copyright 2016 by Claus Hunsen <[email protected]>
## Copyright 2016-2018 by Thomas Bock <[email protected]>
## Copyright 2017 by Angelika Schmid <[email protected]>
## Copyright 2019 by Jakob Kronawitter <[email protected]>
## Copyright 2019-2020 by Anselm Fehnker <[email protected]>
## All Rights Reserved.

## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / /
## Get active authors -----------------------------------------------------

#' Get all author names that are active in at least one of the networks.
#'
#' @param networks the list of networks
#' @param globally decides if all author names are in one list or in separate for each network [default: TRUE]
#'
#' @return the list of author names
get.author.names.from.networks = function(networks, globally = TRUE) {

## for each network, get a list of authors that are in this network
active.authors.list = lapply(networks, function(network) {
active.authors = igraph::V(network)$name
return(active.authors)
})

if (globally) {
## flatten the list of lists to one list of authors
active.authors = unlist(active.authors.list, recursive = FALSE)

## remove distracting named list members
names(active.authors) = NULL

## remove duplicates and order alphabetically ascending
active.authors = active.authors[!duplicated(active.authors)]
active.authors = active.authors[order(active.authors)]
return(active.authors)
} else {
return(active.authors.list)
}
}

#' Get all author names that are active in at least one of the date ranges.
#'
#' @param data.ranges the list of the data ranges
#' @param is.mail.analysis bool if the data is a mail analysis
#' @param globally decides if all author names are in one list or in separate for each network [default: TRUE]
#'
#' @return the list of author names
get.author.names.from.data = function(dataRanges, isMailAnalysis, globally = TRUE) {

## for each range, get the authors who made at least one commit/mail in this range
active.authors.list = lapply(dataRanges, function(range.data) {
if (isMailAnalysis) {
active.authors = names(range.data$group.artifacts.by.data.column("mails", "author.name"))
} else {
active.authors = names(range.data$group.artifacts.by.data.column("commits", "author.name"))
}
return(active.authors)
})

if (globally) {
## flatten the list of lists to one list of authors
active.authors = unlist(active.authors.list, recursive = FALSE)

## remove distracting named list members
names(active.authors) = NULL

## remove duplicates and order alphabetically ascending
active.authors = active.authors[!duplicated(active.authors)]
active.authors = active.authors[order(active.authors)]
return(active.authors)
} else {
return(active.authors.list)
}
}

## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / /
## Adjacency matrices ----------------------------------------------------

#' Get a sparse adjacency matrix for a network.
#'
#' @param network the given network
#' @param authors all authors that are wanted in the adjacency matrix
#' @param weighted decides if the adjacency matrix shall be weighted [default: FALSE]
#'
#' @return the sparse adjacency matrix of the network
get.expanded.adjacency = function(network, authors, weighted = FALSE) {

## create an empty sparse matrix with the right size
matrix = Matrix::sparseMatrix(i = c(), j = c(), dims = c(length(authors), length(authors)), giveCsparse = FALSE)
matrix = as(matrix, "dgTMatrix")

## add row and column names
rownames(matrix) = authors
colnames(matrix) = authors

if (igraph::vcount(network) > 0) {

if (weighted) {
## get the weighted adjacency matrix for the current network
matrix.data = igraph::get.adjacency(network, attr = "weight")
} else {
## get the unweighted adjacency matrix for the current network
matrix.data = igraph::get.adjacency(network)
}

## order the adjacency matrix
if (nrow(matrix.data) > 1) { # for a 1x1 matrix ordering does not work
matrix.data = matrix.data[order(rownames(matrix.data)), order(colnames(matrix.data))]
}

## save the activity data per author
if (nrow(matrix.data) > 0) {
matrix[rownames(matrix.data), colnames(matrix.data)] = matrix.data
}

if (!weighted) {
matrix[matrix > 0] = 1
}

}

return(matrix)
}

#' Gets a list of networks, converts them to sparse adjacency matrices and ums up the adjacency matrices cumulatively.
#' This means that the first entry of the return list is just the adjacency matrix from the first network,
#' the second entry is the sum of the first and the second entry, and so on.
#'
#' @param networks list of networks
#' @param weighted decides if the adjacency matrix shall be weighted [default: FALSE]
#'
#' @return the list of cumulated adjacency matrices
get.expanded.adjacency.cumulated = function(networks, weighted = FALSE) {
## get expanded adjacency matrices first
matrices = get.expanded.adjacency(networks, weighted)

## pair-wise sum of matrices: m.cumul(n) = m.cumul(m-1) + m
## (intermediate results consecutively stored in matrices.cumulated)
matrices.cumulated = list(matrices[[1]]) # first one is complete already

if (length(matrices) > 1) {
for (m in 2:(length(matrices))){

matrices.cumulated[[m]] = matrices.cumulated[[m - 1]] + matrices[[m]]
rownames(matrices.cumulated[[m]]) = rownames(matrices.cumulated[[m-1]])
colnames(matrices.cumulated[[m]]) = colnames(matrices.cumulated[[m-1]])

if (!weighted) {
## search for a non-zero entry and set them to an arbitray number (e.g., 42)
## to force that all non-zero entries are correctly set to 1 afterwards
not.zero.idxs = which(matrices.cumulated[[m]] >= 1, arr.ind = TRUE)
if (nrow(not.zero.idxs) > 0) {
first.not.zero.idx = not.zero.idxs[1, ]
names(first.not.zero.idx) = c("row", "col")
matrices.cumulated[[m]][first.not.zero.idx[["row"]], first.not.zero.idx[["col"]]] = 42
matrices.cumulated[[m]]@x = rep(1, length(matrices.cumulated[[m]]@i))
}
}
}
}

return(matrices.cumulated)
}

#' Converts a list of adjacency matrices to an array.
#'
#' @param network the list of adjacency matrices
#'
#' @return the converted array
convert.adjacency.matrix.list.to.array = function(adjacency.list){

## create a 3-dimensional array representing the adjacency matrices (SIENA data format) as result
array = array(data = 0, dim = c(nrow(adjacency.list[[1]]), nrow(adjacency.list[[1]]), length(adjacency.list)))
rownames(array) = rownames(adjacency.list[[1]])
colnames(array) = colnames(adjacency.list[[1]])

## copy the activity values from the adjacency matrices in the list to the corresponding array slices
for (i in seq_along(adjacency.ist)){
adjacency = adjacency.list[[i]]
activity.indices = which(adjacency != 0, arr.ind = TRUE)

for (j in 1:nrow(activity.indices)){
array[as.vector(activity.indices[j, 1]), as.vector(activity.indices[j,2]), i] =
adjacency[as.vector(activity.indices[j, 1]), as.vector(activity.indices[j, 2])]
}
}

return(array)
}
Loading

0 comments on commit b6b8398

Please sign in to comment.