From d82857fbebd1111bb16588a4223bb24a8dcd07de Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Mon, 12 Feb 2024 13:50:17 +0100 Subject: [PATCH 001/130] Add commit-interaction data functionality This includes reading and storing the data as well as building author and artifact networks. Signed-off-by: Christian Hechtl Applied-by: Leo Sendelbach --- util-conf.R | 15 ++++++-- util-data.R | 66 ++++++++++++++++++++++++++++++++-- util-networks.R | 58 ++++++++++++++++++++++++++++++ util-read.R | 94 ++++++++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 226 insertions(+), 7 deletions(-) diff --git a/util-conf.R b/util-conf.R index 0031771a..434fbf96 100644 --- a/util-conf.R +++ b/util-conf.R @@ -15,7 +15,7 @@ ## Copyright 2016 by Wolfgang Mauerer ## Copyright 2017 by Raphael Nömmer ## Copyright 2017-2018 by Christian Hechtl -## Copyright 2020-2021 by Christian Hechtl +## Copyright 2020-2021, 2024 by Christian Hechtl ## Copyright 2017 by Felix Prasse ## Copyright 2017-2019 by Thomas Bock ## Copyright 2021, 2023-2024 by Thomas Bock @@ -468,6 +468,12 @@ ProjectConf = R6::R6Class("ProjectConf", inherit = Conf, allowed = c(TRUE, FALSE), allowed.number = 1 ), + commit.interactions = list( + default = FALSE, + type = "logical", + allowed = c(TRUE, FALSE), + allowed.number = 1 + ), custom.event.timestamps.file = list( default = NA, type = "character", @@ -629,6 +635,9 @@ ProjectConf = R6::R6Class("ProjectConf", inherit = Conf, conf$datapath.synchronicity = private$get.results.folder(data, selection.process, casestudy, "synchronicity") ## store path to PaStA data conf$datapath.pasta = private$get.results.folder(data, selection.process, casestudy, "pasta") + ## store path to commit interaction data + conf$datapath.commit.interaction = + private$get.results.folder(data, selection.process, casestudy, tagging, subfolder = tagging) ## store path to gender data conf$datapath.gender = private$get.results.folder(data, selection.process, casestudy, "gender") ## store path to issue data @@ -781,7 +790,7 @@ NetworkConf = R6::R6Class("NetworkConf", inherit = Conf, author.relation = list( default = "mail", type = "character", - allowed = c("mail", "cochange", "issue"), + allowed = c("mail", "cochange", "issue", "interaction"), allowed.number = Inf ), author.directed = list( @@ -812,7 +821,7 @@ NetworkConf = R6::R6Class("NetworkConf", inherit = Conf, artifact.relation = list( default = "cochange", type = "character", - allowed = c("cochange", "callgraph", "mail", "issue"), + allowed = c("cochange", "callgraph", "mail", "issue", "interaction"), allowed.number = Inf ), artifact.directed = list( diff --git a/util-data.R b/util-data.R index e8c9ee4d..be3ae945 100644 --- a/util-data.R +++ b/util-data.R @@ -16,7 +16,7 @@ ## Copyright 2020-2021, 2023-2024 by Thomas Bock ## Copyright 2017 by Raphael Nömmer ## Copyright 2017-2018 by Christian Hechtl -## Copyright 2020 by Christian Hechtl +## Copyright 2020, 2024 by Christian Hechtl ## Copyright 2017 by Felix Prasse ## Copyright 2017 by Ferdinand Frank ## Copyright 2018-2019 by Jakob Kronawitter @@ -162,6 +162,7 @@ ProjectData = R6::R6Class("ProjectData", commits = create.empty.commits.list(), # data.frame commits.unfiltered = create.empty.commits.list(), # data.frame commit.messages = create.empty.commit.message.list(), # data.frame + commit.interactions = create.empty.commit.interaction.list(), ## mails mails.unfiltered = create.empty.mails.list(), # data.frame mails = create.empty.mails.list(), # data.frame @@ -404,6 +405,24 @@ ProjectData = R6::R6Class("ProjectData", To clean this up you can call the function 'cleanup.commit.message.data()'.") } }, + + update.commit.interactions = function() { + if (!self$is.data.source.cached("commits.unfiltered")) { + self$get.commits() + } + + print(colnames(private$commit.interactions)) + commit.data.subset = data.frame(hash = private$commits.unfiltered$hash, author.name = private$commits.unfiltered$author.name) + commit.data.subset = commit.data.subset[!duplicated(commit.data.subset$hash),] + + commit.interaction.data = merge(private$commit.interactions, commit.data.subset, by.x = "base.hash", by.y = "hash") + colnames(commit.interaction.data)[7] = "base.author" + commit.interaction.data = merge(commit.interaction.data, commit.data.subset, by.x = "commit.hash", by.y = "hash") + colnames(commit.interaction.data)[8] = "interacting.author" + + private$commit.interactions = commit.interaction.data + + }, ## * * Gender data -------------------------------------------------- #' Update the gender related fields of: \code{authors} @@ -1186,6 +1205,46 @@ ProjectData = R6::R6Class("ProjectData", } }, + #' Get the commit interaction data. If no data.path is given, the standard data.path + #' will be used. + #' + #' @param data.path an optional different data path to the commit-interaction data + #' + #' @return the commit-interaction data + get.commit.interactions = function(data.path = NULL) { + logging::loginfo("Getting commit interactions.") + + ## if the commit-interaction data have not yet been read do this + if (!self$is.data.source.cached("commit.interactions")) { + if(is.null(data.path)) { + commit.interaction.data = read.commit.interactions(self$get.data.path()) + } else { + commit.interaction.data = read.commit.interactions(data.path) + } + + ## cache the result + private$commit.interactions = commit.interaction.data + private$update.commit.interactions() + } + + return(private$commit.interactions) + }, + + #' Set the commit-interaction data to the new given data. + #' + #' @param data the new commit-interaction data + set.commit.interactions = function(data) { + logging::loginfo("Setting commit messages data.") + + if (is.null(data)) { + data = create.empty.commit.interaction.list() + } + + ## set the actual data + private$commit.interactions = data + # browser() + }, + #' Get the synchronicity data. If it is not already stored in the ProjectData, this function triggers a read in #' from disk. #' @@ -1756,7 +1815,8 @@ ProjectData = R6::R6Class("ProjectData", "commit.messages" = "commit.messages", "synchronicity" = "synchronicity", "pasta" = "pasta", - "custom.event.timestamps" = "custom.event.timestamps" + "custom.event.timestamps" = "custom.event.timestamps", + "commit.interactions" = "commit.interactions" ) ) sources = self$get.cached.data.sources.internal(source.type) @@ -1788,7 +1848,7 @@ ProjectData = R6::R6Class("ProjectData", ## define the data sources unfiltered.data.sources = c("commits.unfiltered", "mails.unfiltered", "issues.unfiltered") additional.data.sources = c("authors", "commit.messages", "synchronicity", "pasta", - "gender", "custom.event.timestamps") + "gender", "custom.event.timestamps", "commit.interactions") main.data.sources = c("issues", "commits", "mails") ## set the right data sources to look for according to the argument diff --git a/util-networks.R b/util-networks.R index b02eab69..d949273e 100644 --- a/util-networks.R +++ b/util-networks.R @@ -14,6 +14,7 @@ ## Copyright 2016-2019 by Claus Hunsen ## Copyright 2017 by Raphael Nömmer ## Copyright 2017-2018 by Christian Hechtl +## Copyright 2024 by Christian Hechtl ## Copyright 2017-2019 by Thomas Bock ## Copyright 2021, 2023-2024 by Thomas Bock ## Copyright 2018 by Barbara Eckl @@ -225,6 +226,34 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", return(author.net) }, + #' Build and get the author network with commit-interactions as the relation. + #' + #' @return the commit-interaction author network + get.author.network.commit.interaction = function() { + ## get the authors that appear in the commit-interaction data as the vertices of the network + vertices = unique(c(private$proj.data$get.commit.interactions()$base.author, + private$proj.data$get.commit.interactions()$interacting.author)) + vertices = data.frame(name = vertices) + + ## get the commit-interaction data as the edge data of the network + edges = private$proj.data$get.commit.interactions() + ## set the authors as the 'to' and 'from' of the network + colnames(edges)[7] = "to" + colnames(edges)[8] = "from" + edges = edges[,c(7,8,1,2,3,4,5,6)] + colnames(edges)[3] = "hash" + author.net.data = list(vertices = vertices, edges = edges) + ## construct the network + author.net = construct.network.from.edge.list( + author.net.data[["vertices"]], + author.net.data[["edges"]], + network.conf = private$network.conf, + directed = private$network.conf$get.value("author.directed"), + available.edge.attributes = list(hash = "character") + ) + return(author.net) + }, + #' Get the thread-based author relation as network. #' If it does not already exist build it first. #' @@ -345,6 +374,33 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", return(artifacts.net) }, + #' Build and get the the commit-interaction based artifact network. + #' + #' @return the commit-interaction based artifact network + get.artifact.network.commit.interaction = function() { + ## get the commits that appear in the commit-interaction data as the vertices of the network + vertices = unique(c(private$proj.data$get.commit.interactions()$base.file, private$proj.data$get.commit.interactions()$file)) + vertices = data.frame(name = vertices) + ## get the commit-interaction data as the edge data of the network + edges = private$proj.data$get.commit.interactions() + ## set the commits as the 'to' and 'from' of the network + colnames(edges)[6] = "to" + colnames(edges)[4] = "from" + edges = edges[,c(6,4,1,2,3,5)] + colnames(edges)[3] = "hash" + author.net.data = list(vertices = vertices, edges = edges) + ## construct the network + author.net = construct.network.from.edge.list( + author.net.data[["vertices"]], + author.net.data[["edges"]], + network.conf = private$network.conf, + directed = private$network.conf$get.value("author.directed"), + available.edge.attributes = list(hash = "character") + ) + + return(author.net) + }, + #' Get the call-graph-based artifact network. #' If it does not already exist build it first. #' IMPORTANT: This only works for range-level analyses! @@ -743,6 +799,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", network = switch( relation, cochange = private$get.author.network.cochange(), + interaction = private$get.author.network.commit.interaction(), mail = private$get.author.network.mail(), issue = private$get.author.network.issue(), stop(sprintf("The author relation '%s' does not exist.", rel)) @@ -810,6 +867,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", callgraph = private$get.artifact.network.callgraph(), mail = private$get.artifact.network.mail(), issue = private$get.artifact.network.issue(), + interaction = private$get.artifact.network.commit.interaction(), stop(sprintf("The artifact relation '%s' does not exist.", relation)) ) diff --git a/util-read.R b/util-read.R index 8cfe1a80..4793f876 100644 --- a/util-read.R +++ b/util-read.R @@ -14,7 +14,7 @@ ## Copyright 2016-2019 by Claus Hunsen ## Copyright 2017 by Raphael Nömmer ## Copyright 2017-2018 by Christian Hechtl -## Copyright 2020-2022 by Christian Hechtl +## Copyright 2020-2022, 2024 by Christian Hechtl ## Copyright 2017 by Felix Prasse ## Copyright 2017-2018 by Thomas Bock ## Copyright 2023-2024 by Thomas Bock @@ -42,6 +42,7 @@ requireNamespace("plyr") requireNamespace("digest") # for sha1 hashing of IDs requireNamespace("sqldf") # for SQL-selections on data.frames requireNamespace("data.table") # for faster data.frame processing +requireNamespace("yaml") # for reading commit interaction data ## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / ## Helper functions -------------------------------------------------------- @@ -843,6 +844,97 @@ create.empty.pasta.list = function() { return(create.empty.data.frame(PASTA.LIST.COLUMNS, PASTA.LIST.DATA.TYPES)) } +## * Commit interaction data ----------------------------------------------- + +## column names of a dataframe containing commit interaction data (see function \code{read.commit.interactions}) +COMMIT.INTERACTION.LIST.COLUMNS = c( + "base.hash", "function", "file", + "interacting.hash" +) + +## declare the datatype for each column in the constant 'COMMIT.INTERACTION.LIST.COLUMNS' +COMMIT.INTERACTION.LIST.DATA.TYPES = c( + "character", "character", "character", + "character" +) + +#' Read and parse the commit-interaction data. This data is present in a `.yaml` file which +#' needs to be broken down. Within the yaml file, there are different lists in which each +#' commit (hash) gets mapped to all commits it interacts with and the file/function because of +#' which they interact. +#' +#' +#' @param data.path the path to the commit-interaction data +#' +#' @return the read and parsed commit-interaction data +read.commit.interactions = function(data.path = NULL) { + + file = file.path(data.path, "commit-interactions.yaml") + # file = file.path("/scratch/hechtl/htop-new", "c5b0ccb9f9.yaml") + + commit.interaction.base = try(yaml::read_yaml(file = file), silent = TRUE) + + ## handle the case that the list of commit-interactions is empty + if (inherits(commit.interaction.base, "try-error")) { + logging::logwarn("There are no commit-interactions available for the current environment.") + logging::logwarn("Datapath: %s", data.path) + + # return a dataframe with the correct columns but zero rows + return(create.empty.commit.interaction.list()) + } + + ## extract the top level list of the yaml file which is called 'result-map' + result.map = commit.interaction.base$`result-map` + + ## extract a mapping of functions to files to be able to determine what file the current interaction is + ## based on + file.name.map = fastmap::fastmap() + function.file.list = purrr::map(result.map, 2) + file.name.map$mset(.list = function.file.list) + list.names = names(result.map) + + ## build the result dataframe by iterating over the 'result-map' list + commit.interaction.data = data.table::setDF(data.table::rbindlist(parallel::mcmapply(result.map, list.names, + SIMPLIFY = FALSE, + FUN = function(current.interaction, + function.name) { + ## get all commits that interact with the current one + insts = current.interaction[[4]] + interactions = data.table::setDF(data.table::rbindlist(lapply(insts, function(current.inst) { + base.hash = current.inst[[1]][[3]] + interacting.hashes = current.inst[[2]] + interacting.hashes.df = data.table::setDF(data.table::rbindlist(lapply(interacting.hashes, function(hash) { + ## if there is no function name in the current interaction we set the function name to 'GLOBAL' + ## as this is most likely code outside of functions, else we set the function name + if (!"function" %in% names(hash)) { + return(data.frame(func = "GLOBAL", commit.hash = hash[["commit"]], file = "GLOBAL")) + } else { + return(data.frame(func = hash[["function"]], commit.hash = hash[["commit"]], + file = file.name.map$get(hash[["function"]]))) + } + }))) + interacting.hashes.df$base.hash = base.hash + interacting.hashes.df$base.func = function.name + interacting.hashes.df$base.file = file.name.map$get(function.name) + return(interacting.hashes.df) + }))) + return(interactions) + }))) + + ## remove all duplicate entries from the resulting dataframe + commit.interaction.data = commit.interaction.data[!duplicated(commit.interaction.data), ] + return(commit.interaction.data) +} + +#' Create an empty dataframe which has the same shape as a dataframe containing commit interaction data. +#' The dataframe has the column names and column datatypes defined in \code{COMMIT.INTERACTION.LIST.COLUMNS} +#' and \code{COMMIT.INTERACTION.LIST.DATA.TYPES}, respectively. +#' +#' @return the empty dataframe +create.empty.commit.interaction.list = function() { + return (create.empty.data.frame(COMMIT.INTERACTION.LIST.COLUMNS, COMMIT.INTERACTION.LIST.DATA.TYPES)) +} + ## * Synchronicity data ---------------------------------------------------- ## column names of a dataframe containing synchronicity data (see function \code{read.synchronicity}) From b4fd2a29c9b5fd561b1106c6febb54a32b0085ab Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Mon, 12 Feb 2024 13:56:48 +0100 Subject: [PATCH 002/130] Add functionality for equals function Also removed comments and browser statements, as well as added updating of commit-interaction data when commit data is changed if commit-interactions are configured Signed-off-by: Leo Sendelbach --- util-data.R | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/util-data.R b/util-data.R index be3ae945..a904096c 100644 --- a/util-data.R +++ b/util-data.R @@ -26,6 +26,7 @@ ## Copyright 2021 by Mirabdulla Yusifli ## Copyright 2022 by Jonathan Baumann ## Copyright 2022-2023 by Maximilian Löffler +## Copyright 2024 by Leo Sendelbach ## All Rights Reserved. @@ -77,7 +78,8 @@ DATASOURCE.TO.ADDITIONAL.ARTIFACT.FUNCTION = list( "synchronicity" = "get.synchronicity", "pasta" = "get.pasta", "gender" = "get.gender", - "custom.event.timestamps" = "get.custom.event.timestamps" + "custom.event.timestamps" = "get.custom.event.timestamps", + "commit.interactions" = "get.commit.interactions" ) #' Applies a function to list keys @@ -410,16 +412,15 @@ ProjectData = R6::R6Class("ProjectData", if (!self$is.data.source.cached("commits.unfiltered")) { self$get.commits() } - - print(colnames(private$commit.interactions)) - commit.data.subset = data.frame(hash = private$commits.unfiltered$hash, author.name = private$commits.unfiltered$author.name) - commit.data.subset = commit.data.subset[!duplicated(commit.data.subset$hash),] - + commit.data.subset = data.frame(hash = private$commits.unfiltered[["hash"]], author.name = private$commits.unfiltered[["author.name"]]) + commit.data.subset = commit.data.subset[!duplicated(commit.data.subset[["hash"]]),] + commit.interaction.data = merge(private$commit.interactions, commit.data.subset, by.x = "base.hash", by.y = "hash") - colnames(commit.interaction.data)[7] = "base.author" - commit.interaction.data = merge(commit.interaction.data, commit.data.subset, by.x = "commit.hash", by.y = "hash") - colnames(commit.interaction.data)[8] = "interacting.author" + colnames(commit.interaction.data)[[7]] = "base.author" + commit.interaction.data = merge(commit.interaction.data, commit.data.subset, by.x = "commit.hash", by.y = "hash") + colnames(commit.interaction.data)[[8]] = "interacting.author" + private$commit.interactions = commit.interaction.data }, @@ -1130,6 +1131,17 @@ ProjectData = R6::R6Class("ProjectData", } } + ## add commit interaction data if wanted + if (private$project.conf$get.value("commit.interactions")) { + if (!self$is.data.source.cached("commit.interactions")) { + ## get data (no assignment because we just want to trigger anything commit.interaction related) + self$get.commit.interactions() + } else { + ## update all commit.interaction-related data + private$update.commit.interactions() + } + } + ## sort by date private$commits.unfiltered = private$commits.unfiltered[order(private$commits.unfiltered[["date"]], decreasing = FALSE), ] @@ -1242,7 +1254,6 @@ ProjectData = R6::R6Class("ProjectData", ## set the actual data private$commit.interactions = data - # browser() }, #' Get the synchronicity data. If it is not already stored in the ProjectData, this function triggers a read in From b3394eec4b0f2556f184eecd4de332a422afcbb7 Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Mon, 12 Feb 2024 14:00:47 +0100 Subject: [PATCH 003/130] Remove outdated comment int 'util-read.R' outdated comment with local data path removed Signed-off-by: Leo Sendelbach --- util-read.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/util-read.R b/util-read.R index 4793f876..929af7b3 100644 --- a/util-read.R +++ b/util-read.R @@ -25,6 +25,7 @@ ## Copyright 2021 by Mirabdulla Yusifli ## Copyright 2022 by Jonathan Baumann ## Copyright 2022-2023 by Maximilian Löffler +## Copyright 2024 by Leo Sendelbach ## All Rights Reserved. ## Note: @@ -870,7 +871,6 @@ COMMIT.INTERACTION.LIST.DATA.TYPES = c( read.commit.interactions = function(data.path = NULL) { file = file.path(data.path, "commit-interactions.yaml") - # file = file.path("/scratch/hechtl/htop-new", "c5b0ccb9f9.yaml") commit.interaction.base = try(yaml::read_yaml(file = file), silent = TRUE) From eeba7e29932bc973513c963fb9e716e9230d570f Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Mon, 12 Feb 2024 14:03:06 +0100 Subject: [PATCH 004/130] Add test for new functionality of 'equals' Uses 'equals' function on Project Data with new commit-interactions Signed-off-by: Leo Sendelbach --- tests/test-data.R | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/tests/test-data.R b/tests/test-data.R index 9c6f4f8c..c7730692 100644 --- a/tests/test-data.R +++ b/tests/test-data.R @@ -20,6 +20,7 @@ ## Copyright 2021 by Mirabdulla Yusifli ## Copyright 2022 by Jonathan Baumann ## Copyright 2023 by Maximilian Löffler +## Copyright 2024 by Leo Sendelbach ## All Rights Reserved. @@ -511,3 +512,22 @@ test_that("Create RangeData objects from Codeface ranges and check data path", { expect_identical(range.paths, expected.paths, "RangeData data paths") }) + +test_that("Compare two ProjectData Objects with commit.interactions", { + ## configuration object for the datapath + proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, "file") + proj.conf$update.value("commit.interactions", TRUE) + proj.conf$update.value("commits.filter.untracked.files", FALSE) + proj.conf$update.value("commits.filter.base.artifact", FALSE) + + proj.data.one = ProjectData$new(project.conf = proj.conf) + proj.data.two = proj.data.one$clone(deep = TRUE) + + ## test if the project data is equal and the commit interactions are as well + expect_equal(proj.data.one$get.commit.interactions(), proj.data.two$get.commit.interactions()) + expect_true(proj.data.one$equals(proj.data.two)) + + ## change commit interactions of one project data and assert that equality check fails + proj.data.two$set.commit.interactions(create.empty.commit.interaction.list()) + expect_false(proj.data.one$equals(proj.data.two)) +}) From 8bb39f4df39b49dfaff8f19feb6db5e5fbd81fac Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Mon, 12 Feb 2024 14:08:36 +0100 Subject: [PATCH 005/130] Add test for new read functionality new test reads commit-interactions data and asserts its correctness Signed-off-by: Leo Sendelbach --- tests/test-networks-author.R | 1 + tests/test-read.R | 26 ++++++++++++++++++++++++++ util-networks.R | 1 + 3 files changed, 28 insertions(+) diff --git a/tests/test-networks-author.R b/tests/test-networks-author.R index d4d0e9fa..d29d74e0 100644 --- a/tests/test-networks-author.R +++ b/tests/test-networks-author.R @@ -22,6 +22,7 @@ ## Copyright 2018-2019 by Anselm Fehnker ## Copyright 2021 by Johannes Hostert ## Copyright 2023-2024 by Maximilian Löffler +## Copyright 2024 by Leo Sendelbach ## All Rights Reserved. diff --git a/tests/test-read.R b/tests/test-read.R index db3645d4..3cc0faff 100644 --- a/tests/test-read.R +++ b/tests/test-read.R @@ -22,6 +22,7 @@ ## Copyright 2021 by Mirabdulla Yusifli ## Copyright 2022 by Jonathan Baumann ## Copyright 2022-2024 by Maximilian Löffler +## Copyright 2024 by Leo Sendelbach ## All Rights Reserved. @@ -497,3 +498,28 @@ test_that("Read and parse the issue data.", { expect_identical(issue.data.read.github, issue.data.expected.github, info = "Issue data github.") }) +test_that("Read the commit-interactions data.", { + ## configuration object for the datapath + proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, "file") + proj.conf$update.value("commit.interactions", TRUE) + + ## read the actual data + commit.interactions.data.read = read.commit.interactions(proj.conf$get.value("datapath")) + ## build the expected data.frame + commit.interactions.data.expected = data.frame(func = c("test.c", "test2.c", "test2.c", "test2.c"), + commit.hash = c("5a5ec9675e98187e1e92561e1888aa6f04faa338", + "0a1a5c523d835459c42f33e863623138555e2526", + "418d1dc4929ad1df251d2aeb833dd45757b04a6f", + "d01921773fae4bed8186b0aa411d6a2f7a6626e6"), + file = c("test.c", "test2.c", "test2.c", "test2.c"), + base.hash = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", + "3a0ed78458b3976243db6829f63eba3eead26774", + "0a1a5c523d835459c42f33e863623138555e2526", + "0a1a5c523d835459c42f33e863623138555e2526"), + base.func = c("test.c", "test2.c", "test2.c", "test2.c"), + base.file = c("test.c", "test2.c", "test2.c", "test2.c")) + + ## check the results + expect_identical(commit.interactions.data.read, commit.interactions.data.expected, + info = "commit interaction data.") +}) \ No newline at end of file diff --git a/util-networks.R b/util-networks.R index d949273e..f7f59b88 100644 --- a/util-networks.R +++ b/util-networks.R @@ -23,6 +23,7 @@ ## Copyright 2021 by Niklas Schneider ## Copyright 2022 by Jonathan Baumann ## Copyright 2023-2024 by Maximilian Löffler +## Copyright 2024 by Leo Sendelbach ## All Rights Reserved. From 54b6f655248720436af116fe72521f9cb0348429 Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Mon, 12 Feb 2024 14:10:06 +0100 Subject: [PATCH 006/130] Add test data files with commit interactions An empty file and an example file with four interactions Signed-off-by: Leo Sendelbach --- .../proximity/commit-interactions.yaml | 0 .../proximity/commit-interactions.yaml | 59 +++++++++++++++++++ 2 files changed, 59 insertions(+) create mode 100644 tests/codeface-data/results/testing/test_empty_proximity/proximity/commit-interactions.yaml create mode 100644 tests/codeface-data/results/testing/test_proximity/proximity/commit-interactions.yaml diff --git a/tests/codeface-data/results/testing/test_empty_proximity/proximity/commit-interactions.yaml b/tests/codeface-data/results/testing/test_empty_proximity/proximity/commit-interactions.yaml new file mode 100644 index 00000000..e69de29b diff --git a/tests/codeface-data/results/testing/test_proximity/proximity/commit-interactions.yaml b/tests/codeface-data/results/testing/test_proximity/proximity/commit-interactions.yaml new file mode 100644 index 00000000..e424236d --- /dev/null +++ b/tests/codeface-data/results/testing/test_proximity/proximity/commit-interactions.yaml @@ -0,0 +1,59 @@ +scope: REGION +result-map: + test.c: + demangled-name: test.c + file: test.c + num-instructions: 30 + insts: + - base-hash: + region: 45620620587549 + function: test.c + commit: 72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0 + repository: test-repo + interacting-hashes: + - region: 87546092348456 + function: test.c + commit: 5a5ec9675e98187e1e92561e1888aa6f04faa338 + repository: test-repo + amount: 2 + callees: + - test_callee + commits: + - commit: 3383d8e5561dfc6fb2b65e0a194df94ccb5e08af + repository: test-repo + test2.c: + demangled-name: test2.c + file: test2.c + num-instructions: 26 + insts: + - base-hash: + region: 50956672345141 + function: test2.c + commit: 3a0ed78458b3976243db6829f63eba3eead26774 + repository: test-repo + interacting-hashes: + - region: 98750276234511 + function: test2.c + commit: 0a1a5c523d835459c42f33e863623138555e2526 + repository: test-repo + amount: 1 + - base-hash: + region: 67230588834344 + function: test2.c + commit: 0a1a5c523d835459c42f33e863623138555e2526 + repository: test-repo + interacting-hashes: + - region: 33295067820043 + function: test2.c + commit: 418d1dc4929ad1df251d2aeb833dd45757b04a6f + repository: test-repo + - region: 20194653678423 + function: test2.c + commit: d01921773fae4bed8186b0aa411d6a2f7a6626e6 + repository: test-repo + amount: 3 + callees: + - test_callee + commits: + - commit: 3383d8e5561dfc6fb2b65e0a194df94ccb5e08af + repository: test-repo \ No newline at end of file From 7a5497aaf9114017d1b3b9b68b6cccd7ca8ac114 Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Wed, 14 Feb 2024 17:02:21 +0100 Subject: [PATCH 007/130] Add test for reading empty commit-interactions data Checks that the empty dataframe has correct col and rownames Signed-off-by: Leo Sendelbach --- tests/test-read.R | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tests/test-read.R b/tests/test-read.R index 3cc0faff..f6d77199 100644 --- a/tests/test-read.R +++ b/tests/test-read.R @@ -522,4 +522,23 @@ test_that("Read the commit-interactions data.", { ## check the results expect_identical(commit.interactions.data.read, commit.interactions.data.expected, info = "commit interaction data.") +}) + +test_that("Read the empty commit-interactions data.", { + ## configuration object for the datapath + proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, "file") + proj.conf$update.value("commit.interactions", TRUE) + + ## read the actual data + commit.interactions.data.read = read.commit.interactions("./codeface-data/results/testing/ + test_empty_proximity/proximity") + ## build the expected data.frame + commit.interactions.data.expected = data.frame(matrix(nrow = 0, ncol = 4)) + colnames(commit.interactions.data.expected) = c("base.hash", "function", "file", "interacting.hash") + for(i in seq_len(4)) { + commit.interactions.data.expected[[i]] = as.character(commit.interactions.data.expected[[i]]) + } + ## check the results + expect_identical(commit.interactions.data.read, commit.interactions.data.expected, + info = "commit interaction data.") }) \ No newline at end of file From 7b8585f87675795822c07230192d6454de31dcc7 Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Wed, 14 Feb 2024 17:24:37 +0100 Subject: [PATCH 008/130] Add test for change in set.commits Test that commit-interactions are updated when they are configured and commit data is changed Signed-off-by: Leo Sendelbach --- tests/test-data.R | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tests/test-data.R b/tests/test-data.R index c7730692..69e56f18 100644 --- a/tests/test-data.R +++ b/tests/test-data.R @@ -530,4 +530,15 @@ test_that("Compare two ProjectData Objects with commit.interactions", { ## change commit interactions of one project data and assert that equality check fails proj.data.two$set.commit.interactions(create.empty.commit.interaction.list()) expect_false(proj.data.one$equals(proj.data.two)) + + ## change commit data in one to test if commit-interactions are correctly updated + ## call get.commit.interactions() once to restore read interactions + proj.data.two$get.commit.interactions() + + ## change commits in one project data + commit.data = proj.data.one$get.commits() + commit.data[["hash"]][[5]] = 1 + proj.data.one$set.commits(commit.data) + ## inequality? + expect_equal(proj.data.one$get.commit.interactions(), proj.data.two$get.commit.interactions()) }) From d7dc713ee1cc1b9bd0b8e74967c1028805a1b1e4 Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Tue, 20 Feb 2024 13:08:24 +0100 Subject: [PATCH 009/130] Add comments for update.commit.interactions Also added some linebreaks Signed-off-by: Leo Sendelbach --- util-data.R | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/util-data.R b/util-data.R index a904096c..f566fb94 100644 --- a/util-data.R +++ b/util-data.R @@ -408,19 +408,30 @@ ProjectData = R6::R6Class("ProjectData", } }, + ## * * Commit Interaction data -------------------------------------------------- + + #' Update the commit-interactions + #' + #' This method should be called whenever the field \code{commit.interactions} is changed. update.commit.interactions = function() { if (!self$is.data.source.cached("commits.unfiltered")) { self$get.commits() } - commit.data.subset = data.frame(hash = private$commits.unfiltered[["hash"]], author.name = private$commits.unfiltered[["author.name"]]) + + ## get relevant data from commits + commit.data.subset = data.frame(hash = private$commits.unfiltered[["hash"]], + author.name = private$commits.unfiltered[["author.name"]]) commit.data.subset = commit.data.subset[!duplicated(commit.data.subset[["hash"]]),] - - commit.interaction.data = merge(private$commit.interactions, commit.data.subset, by.x = "base.hash", by.y = "hash") + + ## merge commit interactions with commits and change colnames to avoid duplicates + commit.interaction.data = merge(private$commit.interactions, commit.data.subset, + by.x = "base.hash", by.y = "hash") colnames(commit.interaction.data)[[7]] = "base.author" - commit.interaction.data = merge(commit.interaction.data, commit.data.subset, by.x = "commit.hash", by.y = "hash") + commit.interaction.data = merge(commit.interaction.data, commit.data.subset, + by.x = "commit.hash", by.y = "hash") colnames(commit.interaction.data)[[8]] = "interacting.author" - + private$commit.interactions = commit.interaction.data }, From f25632c6c331f6d2c5c6ff1d9fadf369fd2d8acd Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Tue, 20 Feb 2024 13:28:16 +0100 Subject: [PATCH 010/130] Change indexes for 'match' calls in 'update.commit.interactions' Signed-off-by: Leo Sendelbach --- util-data.R | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/util-data.R b/util-data.R index f566fb94..c1aeac6f 100644 --- a/util-data.R +++ b/util-data.R @@ -426,11 +426,15 @@ ProjectData = R6::R6Class("ProjectData", ## merge commit interactions with commits and change colnames to avoid duplicates commit.interaction.data = merge(private$commit.interactions, commit.data.subset, by.x = "base.hash", by.y = "hash") - colnames(commit.interaction.data)[[7]] = "base.author" + + author.index = match("author.name", colnames(commit.interaction.data)) + colnames(commit.interaction.data)[[author.index]] = "base.author" commit.interaction.data = merge(commit.interaction.data, commit.data.subset, by.x = "commit.hash", by.y = "hash") - colnames(commit.interaction.data)[[8]] = "interacting.author" + + author.index = match("author.name", colnames(commit.interaction.data)) + colnames(commit.interaction.data)[[author.index]] = "interacting.author" private$commit.interactions = commit.interaction.data From 8fcc6d5b3fcaf9c0a3ff45faf6375e437810146f Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Tue, 20 Feb 2024 14:46:54 +0100 Subject: [PATCH 011/130] Fix test to correctly check for inequality using 'expect_false(isTRUE(all.equal(x, y)))' Signed-off-by: Leo Sendelbach --- tests/test-data.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test-data.R b/tests/test-data.R index 69e56f18..3d87a918 100644 --- a/tests/test-data.R +++ b/tests/test-data.R @@ -539,6 +539,6 @@ test_that("Compare two ProjectData Objects with commit.interactions", { commit.data = proj.data.one$get.commits() commit.data[["hash"]][[5]] = 1 proj.data.one$set.commits(commit.data) - ## inequality? - expect_equal(proj.data.one$get.commit.interactions(), proj.data.two$get.commit.interactions()) + + expect_false(isTRUE(all.equal(proj.data.one$get.commit.interactions(), proj.data.two$get.commit.interactions()))) }) From 9117be811e20c9900136d21733e4552fdff05b48 Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Tue, 20 Feb 2024 14:57:43 +0100 Subject: [PATCH 012/130] Change colnames used for empty commit-interactions Also change the test to reflect this change Signed-off-by: Leo Sendelbach --- tests/test-read.R | 7 ++++--- util-read.R | 6 +++--- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/tests/test-read.R b/tests/test-read.R index f6d77199..2cd0df6b 100644 --- a/tests/test-read.R +++ b/tests/test-read.R @@ -533,9 +533,10 @@ test_that("Read the empty commit-interactions data.", { commit.interactions.data.read = read.commit.interactions("./codeface-data/results/testing/ test_empty_proximity/proximity") ## build the expected data.frame - commit.interactions.data.expected = data.frame(matrix(nrow = 0, ncol = 4)) - colnames(commit.interactions.data.expected) = c("base.hash", "function", "file", "interacting.hash") - for(i in seq_len(4)) { + commit.interactions.data.expected = data.frame(matrix(nrow = 0, ncol = 6)) + colnames(commit.interactions.data.expected) = c("func", "commit.hash", "file", + "base.hash", "base.func", "base.file") + for(i in seq_len(6)) { commit.interactions.data.expected[[i]] = as.character(commit.interactions.data.expected[[i]]) } ## check the results diff --git a/util-read.R b/util-read.R index 929af7b3..22206f35 100644 --- a/util-read.R +++ b/util-read.R @@ -849,14 +849,14 @@ create.empty.pasta.list = function() { ## column names of a dataframe containing commit interaction data (see function \code{read.commit.interactions}) COMMIT.INTERACTION.LIST.COLUMNS = c( - "base.hash", "function", "file", - "interacting.hash" + "func", "commit.hash", "file", + "base.hash", "base.func", "base.file" ) ## declare the datatype for each column in the constant 'COMMIT.INTERACTION.LIST.COLUMNS' COMMIT.INTERACTION.LIST.DATA.TYPES = c( "character", "character", "character", - "character" + "character", "character", "character" ) #' Read and parse the commit-interaction data. This data is present in a `.yaml` file which From 49acd59d74b05d85ae63270b339ff5fd6cc90fc5 Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Tue, 20 Feb 2024 16:25:04 +0100 Subject: [PATCH 013/130] Remove previously added columns to avoid duplication 'update.commit.interactions' no longer duplicates 'base.author' and 'interacting.author' columns Signed-off-by: Leo Sendelbach --- util-data.R | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/util-data.R b/util-data.R index c1aeac6f..8efee888 100644 --- a/util-data.R +++ b/util-data.R @@ -418,6 +418,16 @@ ProjectData = R6::R6Class("ProjectData", self$get.commits() } + ## remove existing columns named 'base.author' and 'interaction.author' + indices.to.remove = which("base.author" == colnames(private$commit.interactions)) + if (length(indices.to.remove)>0) { + private$commit.interactions = private$commit.interactions[, -indices.to.remove] + } + indices.to.remove = which("interacting.author" == colnames(private$commit.interactions)) + if (length(indices.to.remove)>0) { + private$commit.interactions = private$commit.interactions[, -indices.to.remove] + } + ## get relevant data from commits commit.data.subset = data.frame(hash = private$commits.unfiltered[["hash"]], author.name = private$commits.unfiltered[["author.name"]]) From 3efb38b398157fa2bdfd01f7356170609e6ab760 Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Tue, 27 Feb 2024 13:38:19 +0100 Subject: [PATCH 014/130] Change merge in 'update.commit.interactions' The merge now always keeps all commit interactions, even if there is no commit data for them (for example if the commits were made by deleted users). Also introduced a warning if that happens. Case is tested with new part in 'test-data.R' Signed-off-by: Leo Sendelbach --- tests/test-data.R | 33 ++++++++++++++++++++++++++++++++- util-data.R | 15 +++++++++++---- 2 files changed, 43 insertions(+), 5 deletions(-) diff --git a/tests/test-data.R b/tests/test-data.R index 3d87a918..98116323 100644 --- a/tests/test-data.R +++ b/tests/test-data.R @@ -540,5 +540,36 @@ test_that("Compare two ProjectData Objects with commit.interactions", { commit.data[["hash"]][[5]] = 1 proj.data.one$set.commits(commit.data) - expect_false(isTRUE(all.equal(proj.data.one$get.commit.interactions(), proj.data.two$get.commit.interactions()))) + expect_false(isTRUE(all.equal(proj.data.one$get.commit.interactions(), + proj.data.two$get.commit.interactions()))) + + ## set commit list of one project data to empty and test that last + ## two rows of result data frame are empty + proj.data.two$set.commits(create.empty.commits.list()) + + ## create empty data frame of correct size + commit.interactions.data.expected = data.frame(matrix(nrow = 4, ncol = 8)) + ## assure that the correct type is used + for(i in seq_len(8)) { + commit.interactions.data.expected[[i]] = as.character(commit.interactions.data.expected[[i]]) + } + ## set everything except for authors as expected + colnames(commit.interactions.data.expected) = c("commit.hash", "base.hash", "func", "file", + "base.func", "base.file", "base.author", + "interacting.author") + commit.interactions.data.expected[["commit.hash"]] = + c("0a1a5c523d835459c42f33e863623138555e2526", + "418d1dc4929ad1df251d2aeb833dd45757b04a6f", + "5a5ec9675e98187e1e92561e1888aa6f04faa338", + "d01921773fae4bed8186b0aa411d6a2f7a6626e6") + commit.interactions.data.expected[["base.hash"]] = + c("3a0ed78458b3976243db6829f63eba3eead26774", + "0a1a5c523d835459c42f33e863623138555e2526", + "72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", + "0a1a5c523d835459c42f33e863623138555e2526") + commit.interactions.data.expected[["func"]] = c("test2.c", "test2.c", "test.c", "test2.c") + commit.interactions.data.expected[["file"]] = c("test2.c", "test2.c", "test.c", "test2.c") + commit.interactions.data.expected[["base.func"]] = c("test2.c", "test2.c", "test.c", "test2.c") + commit.interactions.data.expected[["base.file"]] = c("test2.c", "test2.c", "test.c", "test2.c") + expect_equal(proj.data.two$get.commit.interactions(), commit.interactions.data.expected) }) diff --git a/util-data.R b/util-data.R index 8efee888..5903575c 100644 --- a/util-data.R +++ b/util-data.R @@ -420,11 +420,11 @@ ProjectData = R6::R6Class("ProjectData", ## remove existing columns named 'base.author' and 'interaction.author' indices.to.remove = which("base.author" == colnames(private$commit.interactions)) - if (length(indices.to.remove)>0) { + if (length(indices.to.remove) > 0) { private$commit.interactions = private$commit.interactions[, -indices.to.remove] } indices.to.remove = which("interacting.author" == colnames(private$commit.interactions)) - if (length(indices.to.remove)>0) { + if (length(indices.to.remove) > 0) { private$commit.interactions = private$commit.interactions[, -indices.to.remove] } @@ -435,17 +435,24 @@ ProjectData = R6::R6Class("ProjectData", ## merge commit interactions with commits and change colnames to avoid duplicates commit.interaction.data = merge(private$commit.interactions, commit.data.subset, - by.x = "base.hash", by.y = "hash") + by.x = "base.hash", by.y = "hash", all.x = TRUE) author.index = match("author.name", colnames(commit.interaction.data)) colnames(commit.interaction.data)[[author.index]] = "base.author" commit.interaction.data = merge(commit.interaction.data, commit.data.subset, - by.x = "commit.hash", by.y = "hash") + by.x = "commit.hash", by.y = "hash", all.x = TRUE) author.index = match("author.name", colnames(commit.interaction.data)) colnames(commit.interaction.data)[[author.index]] = "interacting.author" + ## warning if we have interactions without authors + if (anyNA(commit.interaction.data[["base.author"]]) || + anyNA(commit.interaction.data[["interacting.author"]])) { + logging::logwarn("There are authors in the commit-interactions that are not in the commit data! + This results in the commit-interactions having empty entries. + To clean up these entries, call cleanup.commit.interactions.") + } private$commit.interactions = commit.interaction.data }, From 099a096065fc20ebdc745e6d19c219a8fb9e5999 Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Tue, 27 Feb 2024 13:54:24 +0100 Subject: [PATCH 015/130] Add additional columns to commit-interactions columns 'base.author' and 'interacting.author' are initialized as NA and will be overwritten in 'update.commit.interactions'. Also, introduce check for correctness of returned data frame at the end of 'read.commit.interactions'. Signed-off-by: Leo Sendelbach --- util-read.R | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/util-read.R b/util-read.R index 22206f35..720a14ce 100644 --- a/util-read.R +++ b/util-read.R @@ -850,13 +850,15 @@ create.empty.pasta.list = function() { ## column names of a dataframe containing commit interaction data (see function \code{read.commit.interactions}) COMMIT.INTERACTION.LIST.COLUMNS = c( "func", "commit.hash", "file", - "base.hash", "base.func", "base.file" + "base.hash", "base.func", "base.file", + "base.author", "interacting.author" ) ## declare the datatype for each column in the constant 'COMMIT.INTERACTION.LIST.COLUMNS' COMMIT.INTERACTION.LIST.DATA.TYPES = c( "character", "character", "character", - "character", "character", "character" + "character", "character", "character", + "character", "character" ) #' Read and parse the commit-interaction data. This data is present in a `.yaml` file which @@ -918,11 +920,14 @@ read.commit.interactions = function(data.path = NULL) { interacting.hashes.df$base.file = file.name.map$get(function.name) return(interacting.hashes.df) }))) + interactions["base.author"] = NA_character_ + interactions["interacting.author"] = NA_character_ return(interactions) }))) ## remove all duplicate entries from the resulting dataframe commit.interaction.data = commit.interaction.data[!duplicated(commit.interaction.data), ] + verify.data.frame.columns(commit.interaction.data, COMMIT.INTERACTION.LIST.COLUMNS, COMMIT.INTERACTION.LIST.DATA.TYPES) return(commit.interaction.data) } From 6f73cff75c142239afa43ecc5294bd90067fbf7d Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Tue, 27 Feb 2024 13:57:34 +0100 Subject: [PATCH 016/130] Change test to reflect change to dataframe columns Test now checks for 'base.author' and 'interacting.author' as intended Signed-off-by: Leo Sendelbach --- tests/test-read.R | 42 +++++++++++++++++++++++++++--------------- 1 file changed, 27 insertions(+), 15 deletions(-) diff --git a/tests/test-read.R b/tests/test-read.R index 2cd0df6b..bb242e0b 100644 --- a/tests/test-read.R +++ b/tests/test-read.R @@ -506,19 +506,30 @@ test_that("Read the commit-interactions data.", { ## read the actual data commit.interactions.data.read = read.commit.interactions(proj.conf$get.value("datapath")) ## build the expected data.frame - commit.interactions.data.expected = data.frame(func = c("test.c", "test2.c", "test2.c", "test2.c"), - commit.hash = c("5a5ec9675e98187e1e92561e1888aa6f04faa338", - "0a1a5c523d835459c42f33e863623138555e2526", - "418d1dc4929ad1df251d2aeb833dd45757b04a6f", - "d01921773fae4bed8186b0aa411d6a2f7a6626e6"), - file = c("test.c", "test2.c", "test2.c", "test2.c"), - base.hash = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", - "3a0ed78458b3976243db6829f63eba3eead26774", - "0a1a5c523d835459c42f33e863623138555e2526", - "0a1a5c523d835459c42f33e863623138555e2526"), - base.func = c("test.c", "test2.c", "test2.c", "test2.c"), - base.file = c("test.c", "test2.c", "test2.c", "test2.c")) + commit.interactions.data.expected = data.frame(matrix(nrow = 4, ncol = 8)) + ## assure that the correct type is used + for(i in seq_len(8)) { + commit.interactions.data.expected[[i]] = as.character(commit.interactions.data.expected[[i]]) + } + ## set everything except for authors as expected + colnames(commit.interactions.data.expected) = c("func", "commit.hash", "file", "base.hash", + "base.func", "base.file", "base.author", + "interacting.author") + commit.interactions.data.expected[["commit.hash"]] = + c("5a5ec9675e98187e1e92561e1888aa6f04faa338", + "0a1a5c523d835459c42f33e863623138555e2526", + "418d1dc4929ad1df251d2aeb833dd45757b04a6f", + "d01921773fae4bed8186b0aa411d6a2f7a6626e6") + commit.interactions.data.expected[["base.hash"]] = + c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", + "3a0ed78458b3976243db6829f63eba3eead26774", + "0a1a5c523d835459c42f33e863623138555e2526", + "0a1a5c523d835459c42f33e863623138555e2526") + commit.interactions.data.expected[["func"]] = c("test.c", "test2.c", "test2.c", "test2.c") + commit.interactions.data.expected[["file"]] = c("test.c", "test2.c", "test2.c", "test2.c") + commit.interactions.data.expected[["base.func"]] = c("test.c", "test2.c", "test2.c", "test2.c") + commit.interactions.data.expected[["base.file"]] = c("test.c", "test2.c", "test2.c", "test2.c") ## check the results expect_identical(commit.interactions.data.read, commit.interactions.data.expected, info = "commit interaction data.") @@ -533,10 +544,11 @@ test_that("Read the empty commit-interactions data.", { commit.interactions.data.read = read.commit.interactions("./codeface-data/results/testing/ test_empty_proximity/proximity") ## build the expected data.frame - commit.interactions.data.expected = data.frame(matrix(nrow = 0, ncol = 6)) + commit.interactions.data.expected = data.frame(matrix(nrow = 0, ncol = 8)) colnames(commit.interactions.data.expected) = c("func", "commit.hash", "file", - "base.hash", "base.func", "base.file") - for(i in seq_len(6)) { + "base.hash", "base.func", "base.file", + "base.author", "interacting.author") + for(i in seq_len(8)) { commit.interactions.data.expected[[i]] = as.character(commit.interactions.data.expected[[i]]) } ## check the results From fd0aa05f824b93545ae8e05833b95b3bd9809286 Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Tue, 27 Feb 2024 14:32:33 +0100 Subject: [PATCH 017/130] Add 'cleanup.commit.interactions' function Function removes lines from commit-interactions that do not contain an author in either 'base.author' or 'interacting.author' Signed-off-by: Leo Sendelbach --- util-data.R | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/util-data.R b/util-data.R index 5903575c..71458d4e 100644 --- a/util-data.R +++ b/util-data.R @@ -1288,6 +1288,24 @@ ProjectData = R6::R6Class("ProjectData", private$commit.interactions = data }, + #' Remove lines in the commit-interactions data that do not contain authors. + #' This should only be called AFTER 'update.commit.interactions' has already been called, as otherwise + #' all commit-interactions data will be removed + cleanup.commit.interactions = function() { + logging::loginfo("Cleaning up commit-interactions") + + ## remove commit-interactions that do not contain author in 'base.author' + indices.to.remove = which(is.na(private$commit.interactions[["base.author"]])) + if (length(indices.to.remove) > 0) { + private$commit.interactions = private$commit.interactions[-indices.to.remove, ] + } + ## remove commit-interactions that do not contain author in 'interacting.author' + indices.to.remove = which(is.na(private$commit.interactions[["interacting.author"]])) + if (length(indices.to.remove) > 0) { + private$commit.interactions = private$commit.interactions[-indices.to.remove, ] + } + }, + #' Get the synchronicity data. If it is not already stored in the ProjectData, this function triggers a read in #' from disk. #' From ef725407bf8818c8fff96ea6f343338b7162cbe0 Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Tue, 27 Feb 2024 14:35:03 +0100 Subject: [PATCH 018/130] Add test for cleanup function Test that a line is removed from a data frame with a missing author Signed-off-by: Leo Sendelbach --- tests/test-data.R | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/test-data.R b/tests/test-data.R index 98116323..7ee1d0c9 100644 --- a/tests/test-data.R +++ b/tests/test-data.R @@ -543,6 +543,12 @@ test_that("Compare two ProjectData Objects with commit.interactions", { expect_false(isTRUE(all.equal(proj.data.one$get.commit.interactions(), proj.data.two$get.commit.interactions()))) + ## The data frame should still have 4 entries: + expect_true(nrow(proj.data.one$get.commit.interactions()) == 4) + ## after cleanup is called, the data frame should only have 3 entries: + proj.data.one$cleanup.commit.interactions() + expect_true(nrow(proj.data.one$get.commit.interactions()) == 3) + ## set commit list of one project data to empty and test that last ## two rows of result data frame are empty proj.data.two$set.commits(create.empty.commits.list()) From 7068cfa10d993dcae3f5e3f76f8cafa99fa8b350 Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Tue, 5 Mar 2024 13:30:03 +0100 Subject: [PATCH 019/130] Add test for author network Test easy construction of an author network with interaction as relation Signed-off-by: Leo Sendelbach --- tests/test-networks-author.R | 45 ++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/tests/test-networks-author.R b/tests/test-networks-author.R index d29d74e0..32989490 100644 --- a/tests/test-networks-author.R +++ b/tests/test-networks-author.R @@ -23,6 +23,7 @@ ## Copyright 2021 by Johannes Hostert ## Copyright 2023-2024 by Maximilian Löffler ## Copyright 2024 by Leo Sendelbach + ## All Rights Reserved. @@ -677,3 +678,47 @@ test_that("Network construction with only untracked files (no edges expected)", ## test expect_true(igraph::identical_graphs(network.built, network.expected)) }) + +test_that("Network construction with commit-interactions as relation", { + ## configuration object for the datapath + proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, "file") + proj.conf$update.value("commit.interactions", TRUE) + proj.conf$update.value("commits.filter.untracked.files", FALSE) + proj.conf$update.value("commits.filter.base.artifact", FALSE) + proj.data = ProjectData$new(project.conf = proj.conf) + + net.conf = NetworkConf$new() + net.conf$update.value("author.relation", "interaction") + + network.builder = NetworkBuilder$new(project.data = proj.data, network.conf = net.conf) + network.built = network.builder$get.author.network() + + ## build the expected nbetwork + vertices = data.frame( + name = c("Olaf", "Thomas", "Björn", "Karl"), + kind = TYPE.AUTHOR, + type = TYPE.AUTHOR + ) + edges = data.frame( + from = c("Olaf", "Thomas", "Björn", "Thomas"), + to = c("Thomas", "Karl", "Olaf", "Thomas"), + func = c("test2.c", "test2.c", "test.c", "test2.c"), + hash = c("0a1a5c523d835459c42f33e863623138555e2526", + "418d1dc4929ad1df251d2aeb833dd45757b04a6f", + "5a5ec9675e98187e1e92561e1888aa6f04faa338", + "d01921773fae4bed8186b0aa411d6a2f7a6626e6"), + file = c("test2.c", "test2.c", "test.c", "test2.c"), + base.hash = c("3a0ed78458b3976243db6829f63eba3eead26774", + "0a1a5c523d835459c42f33e863623138555e2526", + "72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", + "0a1a5c523d835459c42f33e863623138555e2526"), + base.func = c("test2.c", "test2.c", "test.c", "test2.c"), + base.file = c("test2.c", "test2.c", "test.c", "test2.c"), + weight = c(1, 1, 1, 1), + type = c(TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA), + relation = c("interaction", "interaction", "interaction", "interaction") + ) + network = igraph::graph.data.frame(edges, directed = FALSE, vertices = vertices) + + expect_true(igraph::identical_graphs(network.built, network)) +}) \ No newline at end of file From 329d97ec3de36a9e1bcadc0c7a53c1d92e8b481c Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Tue, 5 Mar 2024 13:57:00 +0100 Subject: [PATCH 020/130] Change 'util-networks.R' to use colnames Also in 'get.artifact.network.commit.interaction' distinguish between 'file' and 'function' artifact networks Signed-off-by: Leo Sendelbach --- util-networks.R | 67 +++++++++++++++++++++++++++++++------------------ 1 file changed, 43 insertions(+), 24 deletions(-) diff --git a/util-networks.R b/util-networks.R index f7f59b88..77d9dce5 100644 --- a/util-networks.R +++ b/util-networks.R @@ -134,10 +134,11 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", get.vertex.kind.for.relation = function(relation) { vertex.kind = switch(relation, - cochange = private$proj.data$get.project.conf.entry("artifact.codeface"), - callgraph = private$proj.data$get.project.conf.entry("artifact.codeface"), - mail = "MailThread", - issue = "Issue" + cochange = private$proj.data$get.project.conf.entry("artifact.codeface"), + callgraph = private$proj.data$get.project.conf.entry("artifact.codeface"), + mail = "MailThread", + issue = "Issue", + interaction = "Interaction" ) return(vertex.kind) @@ -232,17 +233,18 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", #' @return the commit-interaction author network get.author.network.commit.interaction = function() { ## get the authors that appear in the commit-interaction data as the vertices of the network - vertices = unique(c(private$proj.data$get.commit.interactions()$base.author, - private$proj.data$get.commit.interactions()$interacting.author)) + vertices = unique(c(private$proj.data$get.commit.interactions()[["base.author"]], + private$proj.data$get.commit.interactions()[["interacting.author"]])) vertices = data.frame(name = vertices) ## get the commit-interaction data as the edge data of the network edges = private$proj.data$get.commit.interactions() - ## set the authors as the 'to' and 'from' of the network - colnames(edges)[7] = "to" - colnames(edges)[8] = "from" - edges = edges[,c(7,8,1,2,3,4,5,6)] - colnames(edges)[3] = "hash" + ## set the authors as the 'to' and 'from' of the network and order the dataframe + edges = edges[, c("base.author", "interacting.author", "func", "commit.hash", + "file", "base.hash", "base.func", "base.file")] + colnames(edges)[1] = "to" + colnames(edges)[2] = "from" + colnames(edges)[4] = "hash" author.net.data = list(vertices = vertices, edges = edges) ## construct the network author.net = construct.network.from.edge.list( @@ -380,26 +382,43 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", #' @return the commit-interaction based artifact network get.artifact.network.commit.interaction = function() { ## get the commits that appear in the commit-interaction data as the vertices of the network - vertices = unique(c(private$proj.data$get.commit.interactions()$base.file, private$proj.data$get.commit.interactions()$file)) + vertices = unique(c(private$proj.data$get.commit.interactions()[["base.file"]], + private$proj.data$get.commit.interactions()[["file"]])) vertices = data.frame(name = vertices) ## get the commit-interaction data as the edge data of the network edges = private$proj.data$get.commit.interactions() - ## set the commits as the 'to' and 'from' of the network - colnames(edges)[6] = "to" - colnames(edges)[4] = "from" - edges = edges[,c(6,4,1,2,3,5)] - colnames(edges)[3] = "hash" - author.net.data = list(vertices = vertices, edges = edges) + ## set 'to' and 'from' of the network according to the config + ## and order the dataframe accordingly + proj.conf = private$proj.data$get.project.conf() + if (proj.conf$get.value("artifact") == "file") { + edges = edges[, c("file", "base.file", "func", "commit.hash", + "base.hash", "base.func", "base.author", "interacting.author")] + colnames(edges)[4] = "hash" + } else { + if (proj.conf$get.value("artifact") == "function") { + edges = edges[, c("func", "base.func", "commit.hash", "file", "base.hash", + "base.file", "base.author", "interacting.author")] + colnames(edges)[3] = "hash" + } else { + ## if neither 'function' nor 'file' was configured, send a warning + ## and return an empty network + logging::logwarn("when creating a commit-interaction artifact network, + the artifact relation should be either 'file' or 'function'!") + return(create.empty.network(directed = private$network.conf$get.value("artifact.directed"))) + } + } + colnames(edges)[1] = "to" + colnames(edges)[2] = "from" + artifact.net.data = list(vertices = vertices, edges = edges) ## construct the network - author.net = construct.network.from.edge.list( - author.net.data[["vertices"]], - author.net.data[["edges"]], + artifact.net = construct.network.from.edge.list( + artifact.net.data[["vertices"]], + artifact.net.data[["edges"]], network.conf = private$network.conf, - directed = private$network.conf$get.value("author.directed"), + directed = private$network.conf$get.value("artifact.directed"), available.edge.attributes = list(hash = "character") ) - - return(author.net) + return(artifact.net) }, #' Get the call-graph-based artifact network. From 07e7ed744209b0251217fa8f7f35d9b9875face2 Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Tue, 5 Mar 2024 15:33:45 +0100 Subject: [PATCH 021/130] Add tests for artifact networks One simple test for each artifact network configuration (either 'file' or 'function') Signed-off-by: Leo Sendelbach --- tests/test-networks-artifact.R | 86 ++++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) diff --git a/tests/test-networks-artifact.R b/tests/test-networks-artifact.R index 253e08ba..2ad9a643 100644 --- a/tests/test-networks-artifact.R +++ b/tests/test-networks-artifact.R @@ -212,3 +212,89 @@ patrick::with_parameters_test_that("Network construction of an empty 'comments-o "directed: FALSE" = list(test.directed = FALSE), "directed: TRUE" = list(test.directed = TRUE) )) + +test_that("Network construction with commit-interactions as relation, artifact type 'file'", { + ## configuration object for the datapath + proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, "file") + proj.conf$update.value("commit.interactions", TRUE) + proj.conf$update.value("commits.filter.untracked.files", FALSE) + proj.conf$update.value("commits.filter.base.artifact", FALSE) + proj.data = ProjectData$new(project.conf = proj.conf) + + net.conf = NetworkConf$new() + net.conf$update.value("artifact.relation", "interaction") + + network.builder = NetworkBuilder$new(project.data = proj.data, network.conf = net.conf) + network.built = network.builder$get.artifact.network() + ## build the expected nbetwork + vertices = data.frame( + name = c("test2.c", "test.c"), + kind = "Interaction", + type = TYPE.ARTIFACT + ) + edges = data.frame( + from = c("test2.c", "test2.c", "test.c", "test2.c"), + to = c("test2.c", "test2.c", "test.c", "test2.c"), + func = c("test2.c", "test2.c", "test.c", "test2.c"), + hash = c("0a1a5c523d835459c42f33e863623138555e2526", + "418d1dc4929ad1df251d2aeb833dd45757b04a6f", + "5a5ec9675e98187e1e92561e1888aa6f04faa338", + "d01921773fae4bed8186b0aa411d6a2f7a6626e6"), + base.hash = c("3a0ed78458b3976243db6829f63eba3eead26774", + "0a1a5c523d835459c42f33e863623138555e2526", + "72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", + "0a1a5c523d835459c42f33e863623138555e2526"), + base.func = c("test2.c", "test2.c", "test.c", "test2.c"), + base.author = c("Olaf", "Thomas", "Björn", "Thomas"), + interacting.author = c("Thomas", "Karl", "Olaf", "Thomas"), + weight = c(1, 1, 1, 1), + type = c(TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA), + relation = c("interaction", "interaction", "interaction", "interaction") + ) + network = igraph::graph.data.frame(edges, directed = FALSE, vertices = vertices) + + expect_true(igraph::identical_graphs(network.built, network)) +}) + +test_that("Network construction with commit-interactions as relation, artifact type 'function'", { + ## configuration object for the datapath + proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, "function") + proj.conf$update.value("commit.interactions", TRUE) + proj.conf$update.value("commits.filter.untracked.files", FALSE) + proj.conf$update.value("commits.filter.base.artifact", FALSE) + proj.data = ProjectData$new(project.conf = proj.conf) + + net.conf = NetworkConf$new() + net.conf$update.value("artifact.relation", "interaction") + + network.builder = NetworkBuilder$new(project.data = proj.data, network.conf = net.conf) + network.built = network.builder$get.artifact.network() + ## build the expected nbetwork + vertices = data.frame( + name = c("test2.c", "test.c"), + kind = "Interaction", + type = TYPE.ARTIFACT + ) + edges = data.frame( + from = c("test2.c", "test2.c", "test.c", "test2.c"), + to = c("test2.c", "test2.c", "test.c", "test2.c"), + hash = c("0a1a5c523d835459c42f33e863623138555e2526", + "418d1dc4929ad1df251d2aeb833dd45757b04a6f", + "5a5ec9675e98187e1e92561e1888aa6f04faa338", + "d01921773fae4bed8186b0aa411d6a2f7a6626e6"), + file = c("test2.c", "test2.c", "test.c", "test2.c"), + base.hash = c("3a0ed78458b3976243db6829f63eba3eead26774", + "0a1a5c523d835459c42f33e863623138555e2526", + "72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", + "0a1a5c523d835459c42f33e863623138555e2526"), + base.file = c("test2.c", "test2.c", "test.c", "test2.c"), + base.author = c("Olaf", "Thomas", "Björn", "Thomas"), + interacting.author = c("Thomas", "Karl", "Olaf", "Thomas"), + weight = c(1, 1, 1, 1), + type = c(TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA), + relation = c("interaction", "interaction", "interaction", "interaction") + ) + network = igraph::graph.data.frame(edges, directed = FALSE, vertices = vertices) + + expect_true(igraph::identical_graphs(network.built, network)) +}) From dbd07e931f96c2b4234928d8ef94db042a7ac5cf Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Tue, 5 Mar 2024 15:43:36 +0100 Subject: [PATCH 022/130] Fix artifact network construction It now uses the correct vertices depending on the configuration of either 'file' or 'function'. Signed-off-by: Leo Sendelbach --- util-networks.R | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/util-networks.R b/util-networks.R index 77d9dce5..734573fd 100644 --- a/util-networks.R +++ b/util-networks.R @@ -381,21 +381,30 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", #' #' @return the commit-interaction based artifact network get.artifact.network.commit.interaction = function() { - ## get the commits that appear in the commit-interaction data as the vertices of the network - vertices = unique(c(private$proj.data$get.commit.interactions()[["base.file"]], - private$proj.data$get.commit.interactions()[["file"]])) - vertices = data.frame(name = vertices) + ## initialize the vertices. They will be set correctly depending on the used config. + vertices = c() ## get the commit-interaction data as the edge data of the network edges = private$proj.data$get.commit.interactions() + ## set 'to' and 'from' of the network according to the config ## and order the dataframe accordingly proj.conf = private$proj.data$get.project.conf() if (proj.conf$get.value("artifact") == "file") { + ## change the vertices to the functions from the commit-interaction data + vertices = unique(c(private$proj.data$get.commit.interactions()[["base.file"]], + private$proj.data$get.commit.interactions()[["file"]])) + vertices = data.frame(name = vertices) + edges = edges[, c("file", "base.file", "func", "commit.hash", "base.hash", "base.func", "base.author", "interacting.author")] colnames(edges)[4] = "hash" } else { if (proj.conf$get.value("artifact") == "function") { + ## change the vertices to the functions from the commit-interaction data + vertices = unique(c(private$proj.data$get.commit.interactions()[["base.func"]], + private$proj.data$get.commit.interactions()[["func"]])) + vertices = data.frame(name = vertices) + edges = edges[, c("func", "base.func", "commit.hash", "file", "base.hash", "base.file", "base.author", "interacting.author")] colnames(edges)[3] = "hash" From 169dbfe516933244af28b20fdfd33a77d42f522b Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Fri, 8 Mar 2024 12:06:21 +0100 Subject: [PATCH 023/130] Change tests for artifact networks Tests now expect the correct vertex kind Signed-off-by: Leo Sendelbach --- tests/test-networks-artifact.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test-networks-artifact.R b/tests/test-networks-artifact.R index 2ad9a643..1e07476c 100644 --- a/tests/test-networks-artifact.R +++ b/tests/test-networks-artifact.R @@ -229,7 +229,7 @@ test_that("Network construction with commit-interactions as relation, artifact t ## build the expected nbetwork vertices = data.frame( name = c("test2.c", "test.c"), - kind = "Interaction", + kind = "File", type = TYPE.ARTIFACT ) edges = data.frame( @@ -272,7 +272,7 @@ test_that("Network construction with commit-interactions as relation, artifact t ## build the expected nbetwork vertices = data.frame( name = c("test2.c", "test.c"), - kind = "Interaction", + kind = "Function", type = TYPE.ARTIFACT ) edges = data.frame( From 8736025b29058a33c94c968086ab55a2270e07d9 Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Fri, 8 Mar 2024 12:10:47 +0100 Subject: [PATCH 024/130] Change vertex kind for artifact networks vertices now have the correct 'kind' attribute also restructured if statements in artifact network construction Signed-off-by: Leo Sendelbach --- util-networks.R | 36 +++++++++++++++++------------------- 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/util-networks.R b/util-networks.R index 734573fd..852fc27e 100644 --- a/util-networks.R +++ b/util-networks.R @@ -138,7 +138,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", callgraph = private$proj.data$get.project.conf.entry("artifact.codeface"), mail = "MailThread", issue = "Issue", - interaction = "Interaction" + interaction = private$proj.data$get.project.conf.entry("artifact.codeface") ) return(vertex.kind) @@ -388,8 +388,8 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", ## set 'to' and 'from' of the network according to the config ## and order the dataframe accordingly - proj.conf = private$proj.data$get.project.conf() - if (proj.conf$get.value("artifact") == "file") { + proj.conf.artifact = private$proj.data$get.project.conf.entry("artifact") + if (proj.conf.artifact == "file") { ## change the vertices to the functions from the commit-interaction data vertices = unique(c(private$proj.data$get.commit.interactions()[["base.file"]], private$proj.data$get.commit.interactions()[["file"]])) @@ -398,23 +398,21 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", edges = edges[, c("file", "base.file", "func", "commit.hash", "base.hash", "base.func", "base.author", "interacting.author")] colnames(edges)[4] = "hash" + } else if (proj.conf.artifact == "function") { + ## change the vertices to the functions from the commit-interaction data + vertices = unique(c(private$proj.data$get.commit.interactions()[["base.func"]], + private$proj.data$get.commit.interactions()[["func"]])) + vertices = data.frame(name = vertices) + + edges = edges[, c("func", "base.func", "commit.hash", "file", "base.hash", + "base.file", "base.author", "interacting.author")] + colnames(edges)[3] = "hash" } else { - if (proj.conf$get.value("artifact") == "function") { - ## change the vertices to the functions from the commit-interaction data - vertices = unique(c(private$proj.data$get.commit.interactions()[["base.func"]], - private$proj.data$get.commit.interactions()[["func"]])) - vertices = data.frame(name = vertices) - - edges = edges[, c("func", "base.func", "commit.hash", "file", "base.hash", - "base.file", "base.author", "interacting.author")] - colnames(edges)[3] = "hash" - } else { - ## if neither 'function' nor 'file' was configured, send a warning - ## and return an empty network - logging::logwarn("when creating a commit-interaction artifact network, - the artifact relation should be either 'file' or 'function'!") - return(create.empty.network(directed = private$network.conf$get.value("artifact.directed"))) - } + ## if neither 'function' nor 'file' was configured, send a warning + ## and return an empty network + logging::logwarn("when creating a commit-interaction artifact network, + the artifact relation should be either 'file' or 'function'!") + return(create.empty.network(directed = private$network.conf$get.value("artifact.directed"))) } colnames(edges)[1] = "to" colnames(edges)[2] = "from" From a924e86268f7109b048fcfc032a7fa9ba58f3e5f Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Tue, 12 Mar 2024 15:49:56 +0100 Subject: [PATCH 025/130] Add commits to 'NEWS.md' Same points as before with more references to commits Signed-off-by: Leo Sendelbach --- NEWS.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/NEWS.md b/NEWS.md index e58c8611..446761a5 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,6 +2,18 @@ # coronet – Changelog +## unversioned + +### Added + +- Add commit-interaction data and add functions `read.commit.interactions` for reading, as well as `get.commit.interactions`, `set.commit.interactions` and utility functions for working with commit-interaction data (PR #252, 5da0e60e029bdf427520be440fedb0f71d9f7a15, 7792a4e9a087c042a3ef9b7f79a67490305ce85e, 178265dcc69abc0d6e430dfcbc4b87e7565ce615, 80e6ac5f24e6d0248e77be391f93a59b6b17862d, 1ffa607bbe400bd212388dc543263ba5bec4e34c) +- Add commit-interaction networks that can be created with `create.author.network` and `create.artifact.network` if the `artifact.relation` and `author.relation` is configured to be `interaction` (PR #252, 5da0e60e029bdf427520be440fedb0f71d9f7a15, deddd4ce9d2a570ea57088ea73d4312f81e73049, 0e269af77bc098f2d3157fac349d2032efd6cf49, d96b10b45ec55cdf2dd02c60833d4116358d6d31) +- Add tests for new commit-interaction functionality (PR #252, 3e5b8962e18c3dde45085fa764c9d084327e2773, 7685ec4745bd43fba7a373bf5544f41bff346ed9, b291cb338e1b3896c8fd9769f45c515bddb8cf48, eea1b053350094084bab957975e1b306e6c9dc23, 3d4a521e47dc81aaae8ae01ff78ca8d514bb7d85, 05ea1ce1c3330f3fb8fb28ccbc08b85fbd4ec2c8, 99103f27ad0c8ee1bd62cdcee10778a98020db70, fd6064a83a7735020ad5250d092e266af5bbada0) + +### Changed/Improved + +### Fixed + ## 4.4 ### Announcement From 48d9de1ee5c28dc6360ed04615decb8179ded49e Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Wed, 13 Mar 2024 14:51:14 +0100 Subject: [PATCH 026/130] Change warning to use 'logging::logwarn' Warning is now uniform with other warnings in project Signed-off-by: Leo Sendelbach --- util-networks-misc.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/util-networks-misc.R b/util-networks-misc.R index a183f603..c9abd08a 100644 --- a/util-networks-misc.R +++ b/util-networks-misc.R @@ -151,7 +151,7 @@ get.expanded.adjacency = function(network, authors, weighted = FALSE) { # write a warning with the number of authors from the network that we ignore warning.string = sprintf("The network had %d authors that will not be displayed in the matrix!", network.authors.num - nrow(matrix.data)) - warning(warning.string) + logging::logwarn(warning.string) } ## save the activity data per author From 91b9c3bf65b33d82f1be2a28d2c078295993aea6 Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Tue, 19 Mar 2024 14:17:54 +0100 Subject: [PATCH 027/130] Fix issues pointed out on PR comments Also made small changes to testing data to include a commit with specified function and added a warning that should only occur when the commit-interaction data was generated incorrectly Signed-off-by: Leo Sendelbach --- .../proximity/commit-interactions.yaml | 22 ++++++-------- tests/test-data.R | 11 +++---- tests/test-networks-artifact.R | 30 +++++++++---------- tests/test-networks-author.R | 16 +++++----- tests/test-read.R | 10 +++---- util-data.R | 5 +++- util-networks.R | 4 +-- util-read.R | 6 +++- 8 files changed, 54 insertions(+), 50 deletions(-) diff --git a/tests/codeface-data/results/testing/test_proximity/proximity/commit-interactions.yaml b/tests/codeface-data/results/testing/test_proximity/proximity/commit-interactions.yaml index e424236d..83445eeb 100644 --- a/tests/codeface-data/results/testing/test_proximity/proximity/commit-interactions.yaml +++ b/tests/codeface-data/results/testing/test_proximity/proximity/commit-interactions.yaml @@ -1,18 +1,17 @@ scope: REGION result-map: - test.c: - demangled-name: test.c - file: test.c + test_function: + demangled-name: test_function + file: test3.c num-instructions: 30 insts: - base-hash: region: 45620620587549 - function: test.c - commit: 72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0 + function: test_function + commit: 1143db502761379c2bfcecc2007fc34282e7ee61 repository: test-repo interacting-hashes: - region: 87546092348456 - function: test.c commit: 5a5ec9675e98187e1e92561e1888aa6f04faa338 repository: test-repo amount: 2 @@ -21,34 +20,31 @@ result-map: commits: - commit: 3383d8e5561dfc6fb2b65e0a194df94ccb5e08af repository: test-repo - test2.c: - demangled-name: test2.c + test2: + demangled-name: test2 file: test2.c num-instructions: 26 insts: - base-hash: region: 50956672345141 - function: test2.c commit: 3a0ed78458b3976243db6829f63eba3eead26774 repository: test-repo interacting-hashes: - region: 98750276234511 - function: test2.c commit: 0a1a5c523d835459c42f33e863623138555e2526 repository: test-repo amount: 1 - base-hash: region: 67230588834344 - function: test2.c commit: 0a1a5c523d835459c42f33e863623138555e2526 repository: test-repo interacting-hashes: - region: 33295067820043 - function: test2.c + function: test2 commit: 418d1dc4929ad1df251d2aeb833dd45757b04a6f repository: test-repo - region: 20194653678423 - function: test2.c + function: test2 commit: d01921773fae4bed8186b0aa411d6a2f7a6626e6 repository: test-repo amount: 3 diff --git a/tests/test-data.R b/tests/test-data.R index 7ee1d0c9..9049e3ce 100644 --- a/tests/test-data.R +++ b/tests/test-data.R @@ -571,11 +571,12 @@ test_that("Compare two ProjectData Objects with commit.interactions", { commit.interactions.data.expected[["base.hash"]] = c("3a0ed78458b3976243db6829f63eba3eead26774", "0a1a5c523d835459c42f33e863623138555e2526", - "72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", + "1143db502761379c2bfcecc2007fc34282e7ee61", "0a1a5c523d835459c42f33e863623138555e2526") - commit.interactions.data.expected[["func"]] = c("test2.c", "test2.c", "test.c", "test2.c") - commit.interactions.data.expected[["file"]] = c("test2.c", "test2.c", "test.c", "test2.c") - commit.interactions.data.expected[["base.func"]] = c("test2.c", "test2.c", "test.c", "test2.c") - commit.interactions.data.expected[["base.file"]] = c("test2.c", "test2.c", "test.c", "test2.c") + commit.interactions.data.expected[["func"]] = c("GLOBAL", "test2", "GLOBAL", "test2") + commit.interactions.data.expected[["file"]] = c("GLOBAL", "test2.c", "GLOBAL", "test2.c") + commit.interactions.data.expected[["base.func"]] = c("test2", "test2", "test_function", "test2") + commit.interactions.data.expected[["base.file"]] = c("test2.c", "test2.c", "test3.c", "test2.c") + expect_equal(proj.data.two$get.commit.interactions(), commit.interactions.data.expected) }) diff --git a/tests/test-networks-artifact.R b/tests/test-networks-artifact.R index 1e07476c..13fad5f1 100644 --- a/tests/test-networks-artifact.R +++ b/tests/test-networks-artifact.R @@ -228,24 +228,24 @@ test_that("Network construction with commit-interactions as relation, artifact t network.built = network.builder$get.artifact.network() ## build the expected nbetwork vertices = data.frame( - name = c("test2.c", "test.c"), + name = c("test2.c", "test3.c", "GLOBAL"), kind = "File", type = TYPE.ARTIFACT ) edges = data.frame( - from = c("test2.c", "test2.c", "test.c", "test2.c"), - to = c("test2.c", "test2.c", "test.c", "test2.c"), - func = c("test2.c", "test2.c", "test.c", "test2.c"), + from = c("GLOBAL", "test2.c", "GLOBAL", "test2.c"), + to = c("test2.c", "test2.c", "test3.c", "test2.c"), + func = c("GLOBAL", "test2", "GLOBAL", "test2"), hash = c("0a1a5c523d835459c42f33e863623138555e2526", "418d1dc4929ad1df251d2aeb833dd45757b04a6f", "5a5ec9675e98187e1e92561e1888aa6f04faa338", "d01921773fae4bed8186b0aa411d6a2f7a6626e6"), base.hash = c("3a0ed78458b3976243db6829f63eba3eead26774", "0a1a5c523d835459c42f33e863623138555e2526", - "72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", + "1143db502761379c2bfcecc2007fc34282e7ee61", "0a1a5c523d835459c42f33e863623138555e2526"), - base.func = c("test2.c", "test2.c", "test.c", "test2.c"), - base.author = c("Olaf", "Thomas", "Björn", "Thomas"), + base.func = c("test2", "test2", "test_function", "test2"), + base.author = c("Olaf", "Thomas", "Karl", "Thomas"), interacting.author = c("Thomas", "Karl", "Olaf", "Thomas"), weight = c(1, 1, 1, 1), type = c(TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA), @@ -269,26 +269,26 @@ test_that("Network construction with commit-interactions as relation, artifact t network.builder = NetworkBuilder$new(project.data = proj.data, network.conf = net.conf) network.built = network.builder$get.artifact.network() - ## build the expected nbetwork + ## build the expected network vertices = data.frame( - name = c("test2.c", "test.c"), + name = c("test2", "test_function", "GLOBAL"), kind = "Function", type = TYPE.ARTIFACT ) edges = data.frame( - from = c("test2.c", "test2.c", "test.c", "test2.c"), - to = c("test2.c", "test2.c", "test.c", "test2.c"), + from = c("GLOBAL", "test2", "GLOBAL", "test2"), + to = c("test2", "test2", "test_function", "test2"), hash = c("0a1a5c523d835459c42f33e863623138555e2526", "418d1dc4929ad1df251d2aeb833dd45757b04a6f", "5a5ec9675e98187e1e92561e1888aa6f04faa338", "d01921773fae4bed8186b0aa411d6a2f7a6626e6"), - file = c("test2.c", "test2.c", "test.c", "test2.c"), + file = c("GLOBAL", "test2.c", "GLOBAL", "test2.c"), base.hash = c("3a0ed78458b3976243db6829f63eba3eead26774", "0a1a5c523d835459c42f33e863623138555e2526", - "72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", + "1143db502761379c2bfcecc2007fc34282e7ee61", "0a1a5c523d835459c42f33e863623138555e2526"), - base.file = c("test2.c", "test2.c", "test.c", "test2.c"), - base.author = c("Olaf", "Thomas", "Björn", "Thomas"), + base.file = c("test2.c", "test2.c", "test3.c", "test2.c"), + base.author = c("Olaf", "Thomas", "Karl", "Thomas"), interacting.author = c("Thomas", "Karl", "Olaf", "Thomas"), weight = c(1, 1, 1, 1), type = c(TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA), diff --git a/tests/test-networks-author.R b/tests/test-networks-author.R index 32989490..cabb598e 100644 --- a/tests/test-networks-author.R +++ b/tests/test-networks-author.R @@ -693,27 +693,27 @@ test_that("Network construction with commit-interactions as relation", { network.builder = NetworkBuilder$new(project.data = proj.data, network.conf = net.conf) network.built = network.builder$get.author.network() - ## build the expected nbetwork + ## build the expected network vertices = data.frame( - name = c("Olaf", "Thomas", "Björn", "Karl"), + name = c("Olaf", "Thomas", "Karl"), kind = TYPE.AUTHOR, type = TYPE.AUTHOR ) edges = data.frame( - from = c("Olaf", "Thomas", "Björn", "Thomas"), + from = c("Olaf", "Thomas", "Karl", "Thomas"), to = c("Thomas", "Karl", "Olaf", "Thomas"), - func = c("test2.c", "test2.c", "test.c", "test2.c"), + func = c("GLOBAL", "test2", "GLOBAL", "test2"), hash = c("0a1a5c523d835459c42f33e863623138555e2526", "418d1dc4929ad1df251d2aeb833dd45757b04a6f", "5a5ec9675e98187e1e92561e1888aa6f04faa338", "d01921773fae4bed8186b0aa411d6a2f7a6626e6"), - file = c("test2.c", "test2.c", "test.c", "test2.c"), + file = c("GLOBAL", "test2.c", "GLOBAL", "test2.c"), base.hash = c("3a0ed78458b3976243db6829f63eba3eead26774", "0a1a5c523d835459c42f33e863623138555e2526", - "72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", + "1143db502761379c2bfcecc2007fc34282e7ee61", "0a1a5c523d835459c42f33e863623138555e2526"), - base.func = c("test2.c", "test2.c", "test.c", "test2.c"), - base.file = c("test2.c", "test2.c", "test.c", "test2.c"), + base.func = c("test2", "test2", "test_function", "test2"), + base.file = c("test2.c", "test2.c", "test3.c", "test2.c"), weight = c(1, 1, 1, 1), type = c(TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA), relation = c("interaction", "interaction", "interaction", "interaction") diff --git a/tests/test-read.R b/tests/test-read.R index bb242e0b..bafafe12 100644 --- a/tests/test-read.R +++ b/tests/test-read.R @@ -522,14 +522,14 @@ test_that("Read the commit-interactions data.", { "418d1dc4929ad1df251d2aeb833dd45757b04a6f", "d01921773fae4bed8186b0aa411d6a2f7a6626e6") commit.interactions.data.expected[["base.hash"]] = - c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", + c("1143db502761379c2bfcecc2007fc34282e7ee61", "3a0ed78458b3976243db6829f63eba3eead26774", "0a1a5c523d835459c42f33e863623138555e2526", "0a1a5c523d835459c42f33e863623138555e2526") - commit.interactions.data.expected[["func"]] = c("test.c", "test2.c", "test2.c", "test2.c") - commit.interactions.data.expected[["file"]] = c("test.c", "test2.c", "test2.c", "test2.c") - commit.interactions.data.expected[["base.func"]] = c("test.c", "test2.c", "test2.c", "test2.c") - commit.interactions.data.expected[["base.file"]] = c("test.c", "test2.c", "test2.c", "test2.c") + commit.interactions.data.expected[["func"]] = c("GLOBAL", "GLOBAL", "test2", "test2") + commit.interactions.data.expected[["file"]] = c("GLOBAL", "GLOBAL", "test2.c", "test2.c") + commit.interactions.data.expected[["base.func"]] = c("test_function", "test2", "test2", "test2") + commit.interactions.data.expected[["base.file"]] = c("test3.c", "test2.c", "test2.c", "test2.c") ## check the results expect_identical(commit.interactions.data.read, commit.interactions.data.expected, info = "commit interaction data.") diff --git a/util-data.R b/util-data.R index 71458d4e..b13a3373 100644 --- a/util-data.R +++ b/util-data.R @@ -1260,7 +1260,7 @@ ProjectData = R6::R6Class("ProjectData", ## if the commit-interaction data have not yet been read do this if (!self$is.data.source.cached("commit.interactions")) { - if(is.null(data.path)) { + if (is.null(data.path)) { commit.interaction.data = read.commit.interactions(self$get.data.path()) } else { commit.interaction.data = read.commit.interactions(data.path) @@ -1282,6 +1282,9 @@ ProjectData = R6::R6Class("ProjectData", if (is.null(data)) { data = create.empty.commit.interaction.list() + } else { + ## verify the format of the given dataframe + verify.data.frame.columns(data, COMMIT.INTERACTION.LIST.COLUMNS, COMMIT.INTERACTION.LIST.DATA.TYPES) } ## set the actual data diff --git a/util-networks.R b/util-networks.R index 852fc27e..d1a5eb62 100644 --- a/util-networks.R +++ b/util-networks.R @@ -390,7 +390,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", ## and order the dataframe accordingly proj.conf.artifact = private$proj.data$get.project.conf.entry("artifact") if (proj.conf.artifact == "file") { - ## change the vertices to the functions from the commit-interaction data + ## change the vertices to the files from the commit-interaction data vertices = unique(c(private$proj.data$get.commit.interactions()[["base.file"]], private$proj.data$get.commit.interactions()[["file"]])) vertices = data.frame(name = vertices) @@ -408,7 +408,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", "base.file", "base.author", "interacting.author")] colnames(edges)[3] = "hash" } else { - ## if neither 'function' nor 'file' was configured, send a warning + ## If neither 'function' nor 'file' was configured, send a warning ## and return an empty network logging::logwarn("when creating a commit-interaction artifact network, the artifact relation should be either 'file' or 'function'!") diff --git a/util-read.R b/util-read.R index 720a14ce..dc469bd2 100644 --- a/util-read.R +++ b/util-read.R @@ -903,13 +903,17 @@ read.commit.interactions = function(data.path = NULL) { ## get all commits that interact with the current one insts = current.interaction[[4]] interactions = data.table::setDF(data.table::rbindlist(lapply(insts, function(current.inst) { - base.hash = current.inst[[1]][[3]] + base.hash = current.inst[[1]]$`commit` interacting.hashes = current.inst[[2]] interacting.hashes.df = data.table::setDF(data.table::rbindlist(lapply(interacting.hashes, function(hash) { ## if there is no function name in the current interaction we set the function name to 'GLOBAL' ## as this is most likely code outside of functions, else we set the function name if (!"function" %in% names(hash)) { return(data.frame(func = "GLOBAL", commit.hash = hash[["commit"]], file = "GLOBAL")) + } else if (is.null(file.name.map$get(hash[["function"]]))) { + ## This case should never occur if the data was generated correctly! + warning("An interacting hash specifies a function that does not exist in the data!") + return(data.frame(matrix(nrow = 3, ncol = 0))) } else { return(data.frame(func = hash[["function"]], commit.hash = hash[["commit"]], file = file.name.map$get(hash[["function"]]))) From 8d4965afcea4da8e820d52a095571d05b9c7c704 Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Wed, 20 Mar 2024 14:41:10 +0100 Subject: [PATCH 028/130] Change call to 'read_yaml' Now uses a custom handler for type 'int' that converts the int to a string, which lets us read the 'region' value for the commits Signed-off-by: Leo Sendelbach --- .../testing/test_proximity/proximity/commit-interactions.yaml | 2 +- util-read.R | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/codeface-data/results/testing/test_proximity/proximity/commit-interactions.yaml b/tests/codeface-data/results/testing/test_proximity/proximity/commit-interactions.yaml index 83445eeb..8e8b0186 100644 --- a/tests/codeface-data/results/testing/test_proximity/proximity/commit-interactions.yaml +++ b/tests/codeface-data/results/testing/test_proximity/proximity/commit-interactions.yaml @@ -52,4 +52,4 @@ result-map: - test_callee commits: - commit: 3383d8e5561dfc6fb2b65e0a194df94ccb5e08af - repository: test-repo \ No newline at end of file + repository: test-repo diff --git a/util-read.R b/util-read.R index dc469bd2..016da771 100644 --- a/util-read.R +++ b/util-read.R @@ -874,7 +874,9 @@ read.commit.interactions = function(data.path = NULL) { file = file.path(data.path, "commit-interactions.yaml") - commit.interaction.base = try(yaml::read_yaml(file = file), silent = TRUE) + commit.interaction.base = try(yaml::read_yaml(file = file, + handlers = list(int = function(x) {as.character(x)})), + silent = TRUE) ## handle the case that the list of commit-interactions is empty if (inherits(commit.interaction.base, "try-error")) { From 1addce944a637084b7983f34bf1fadc523174b33 Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Thu, 4 Apr 2024 13:16:28 +0200 Subject: [PATCH 029/130] Change to adress comments by @bockthom Fix style issues, modify README.md, add small test and add some comments for clarity Signed-off-by: Leo Sendelbach --- README.md | 10 +++ install.R | 4 +- tests/README.md | 1 + tests/test-data.R | 8 +++ tests/test-networks-artifact.R | 8 +-- tests/test-networks-author.R | 4 +- util-conf.R | 4 +- util-data.R | 119 ++++++++++++++++++--------------- util-networks.R | 14 ++-- util-read.R | 29 +++++--- 10 files changed, 121 insertions(+), 80 deletions(-) diff --git a/README.md b/README.md index 62c029b3..0595555d 100644 --- a/README.md +++ b/README.md @@ -142,6 +142,8 @@ Alternatively, you can run `Rscript install.R` to install the packages. - `jsonlite`: For parsing the issue data - `rTensor`: For calculating EDCPTD centrality - `Matrix`: For sparse matrix representation of large adjacency matrices +- `fastmap`: For fast implementation of a map +- `purrr`: For fast implementtion of a mapping function ### Submodule @@ -264,6 +266,11 @@ Relations determine which information is used to construct edges among the verti * For artifact networks (configured via `artifact.relation` in the [`NetworkConf`](#networkconf)), source-code artifacts are connected when they reference each other (i.e., one artifact calls a function contained in the other artifact). * For bipartite networks (configured via `artifact.relation` in the [`NetworkConf`](#networkconf)), authors get linked to all source-code artifacts they have changed in their respective commits (same as for the relation `cochange`). +- `commit.interaction` + * For author networks (configured via `author.relation` in the [`NetworkConf`](#networkconf)), authors who contribute to interacting commits are connected with an edge. + * For artifact networks (configured via `artifact.relation` in the [`NetworkConf`](#networkconf)), artifacts are connected when there is a interacting between two commits that occur in the artifacts. + * This relation does not apply for bipartite networks. + #### Edge-construction algorithms for author networks When constructing author networks, we use events in time (i.e., commits, e-mails, issue events) to model interactions among authors on the same artifact as edges. Therefore, we group the events on artifacts, based on the configured relation (see the [previous section](#relations)). @@ -597,6 +604,9 @@ There is no way to update the entries, except for the revision-based parameters. - `custom.event.timestamps.locked`: * Lock custom event timestamps to prevent them from being read if empty or not yet present when calling the getter. * [`TRUE`, *`FALSE`*] +- `commit.interactions`: + * Alloow construction of author and artifact networks using commit interaction data + * [`TRUE`, *`FALSE`*] ### NetworkConf diff --git a/install.R b/install.R index 99f047cc..94d403d9 100644 --- a/install.R +++ b/install.R @@ -44,7 +44,9 @@ packages = c( "viridis", "jsonlite", "rTensor", - "Matrix" + "Matrix", + "fastmap", + "purrr" ) diff --git a/tests/README.md b/tests/README.md index 6eb55791..cfe453fb 100644 --- a/tests/README.md +++ b/tests/README.md @@ -16,6 +16,7 @@ We have two test projects you can use when writing your tests: * Commit messages * Pasta * Synchronicity + * Commit Interactions * Custom event timestamps in `custom-events.list` * Revisions 2. - Casestudy: `test_empty` diff --git a/tests/test-data.R b/tests/test-data.R index 9049e3ce..98456846 100644 --- a/tests/test-data.R +++ b/tests/test-data.R @@ -99,6 +99,13 @@ test_that("Compare two ProjectData objects on empty data", { proj.data.two$set.project.conf.entry("commit.messages", "message") proj.data.two$get.commit.messages() expect_true(proj.data.one$equals(proj.data.two), "Two identical ProjectData objects (commit.messages).") + + proj.data.one$set.project.conf.entry("commit.interactions", TRUE) + proj.data.one$get.commit.interactions() + expect_false(proj.data.one$equals(proj.data.two), "Two non-identical ProjectData objects (commit.interactions).") + proj.data.two$set.project.conf.entry("commit.interactions", TRUE) + proj.data.two$get.commit.interactions() + expect_true(proj.data.one$equals(proj.data.two), "Two identical ProjectData objects (commit.interactions).") }) test_that("Compare two ProjectData objects on non-empty data", { @@ -540,6 +547,7 @@ test_that("Compare two ProjectData Objects with commit.interactions", { commit.data[["hash"]][[5]] = 1 proj.data.one$set.commits(commit.data) + ## use isTRUE to compress result of all.equal into a single boolean expect_false(isTRUE(all.equal(proj.data.one$get.commit.interactions(), proj.data.two$get.commit.interactions()))) diff --git a/tests/test-networks-artifact.R b/tests/test-networks-artifact.R index 13fad5f1..e52dd973 100644 --- a/tests/test-networks-artifact.R +++ b/tests/test-networks-artifact.R @@ -222,7 +222,7 @@ test_that("Network construction with commit-interactions as relation, artifact t proj.data = ProjectData$new(project.conf = proj.conf) net.conf = NetworkConf$new() - net.conf$update.value("artifact.relation", "interaction") + net.conf$update.value("artifact.relation", "commit.interaction") network.builder = NetworkBuilder$new(project.data = proj.data, network.conf = net.conf) network.built = network.builder$get.artifact.network() @@ -249,7 +249,7 @@ test_that("Network construction with commit-interactions as relation, artifact t interacting.author = c("Thomas", "Karl", "Olaf", "Thomas"), weight = c(1, 1, 1, 1), type = c(TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA), - relation = c("interaction", "interaction", "interaction", "interaction") + relation = c("commit.interaction", "commit.interaction", "commit.interaction", "commit.interaction") ) network = igraph::graph.data.frame(edges, directed = FALSE, vertices = vertices) @@ -265,7 +265,7 @@ test_that("Network construction with commit-interactions as relation, artifact t proj.data = ProjectData$new(project.conf = proj.conf) net.conf = NetworkConf$new() - net.conf$update.value("artifact.relation", "interaction") + net.conf$update.value("artifact.relation", "commit.interaction") network.builder = NetworkBuilder$new(project.data = proj.data, network.conf = net.conf) network.built = network.builder$get.artifact.network() @@ -292,7 +292,7 @@ test_that("Network construction with commit-interactions as relation, artifact t interacting.author = c("Thomas", "Karl", "Olaf", "Thomas"), weight = c(1, 1, 1, 1), type = c(TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA), - relation = c("interaction", "interaction", "interaction", "interaction") + relation = c("commit.interaction", "commit.interaction", "commit.interaction", "commit.interaction") ) network = igraph::graph.data.frame(edges, directed = FALSE, vertices = vertices) diff --git a/tests/test-networks-author.R b/tests/test-networks-author.R index cabb598e..4f580ef2 100644 --- a/tests/test-networks-author.R +++ b/tests/test-networks-author.R @@ -688,7 +688,7 @@ test_that("Network construction with commit-interactions as relation", { proj.data = ProjectData$new(project.conf = proj.conf) net.conf = NetworkConf$new() - net.conf$update.value("author.relation", "interaction") + net.conf$update.value("author.relation", "commit.interaction") network.builder = NetworkBuilder$new(project.data = proj.data, network.conf = net.conf) network.built = network.builder$get.author.network() @@ -716,7 +716,7 @@ test_that("Network construction with commit-interactions as relation", { base.file = c("test2.c", "test2.c", "test3.c", "test2.c"), weight = c(1, 1, 1, 1), type = c(TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA), - relation = c("interaction", "interaction", "interaction", "interaction") + relation = c("commit.interaction", "commit.interaction", "commit.interaction", "commit.interaction") ) network = igraph::graph.data.frame(edges, directed = FALSE, vertices = vertices) diff --git a/util-conf.R b/util-conf.R index 434fbf96..64a9ed79 100644 --- a/util-conf.R +++ b/util-conf.R @@ -790,7 +790,7 @@ NetworkConf = R6::R6Class("NetworkConf", inherit = Conf, author.relation = list( default = "mail", type = "character", - allowed = c("mail", "cochange", "issue", "interaction"), + allowed = c("mail", "cochange", "issue", "commit.interaction"), allowed.number = Inf ), author.directed = list( @@ -821,7 +821,7 @@ NetworkConf = R6::R6Class("NetworkConf", inherit = Conf, artifact.relation = list( default = "cochange", type = "character", - allowed = c("cochange", "callgraph", "mail", "issue", "interaction"), + allowed = c("cochange", "callgraph", "mail", "issue", "commit.interaction"), allowed.number = Inf ), artifact.directed = list( diff --git a/util-data.R b/util-data.R index b13a3373..ab4a4988 100644 --- a/util-data.R +++ b/util-data.R @@ -78,8 +78,8 @@ DATASOURCE.TO.ADDITIONAL.ARTIFACT.FUNCTION = list( "synchronicity" = "get.synchronicity", "pasta" = "get.pasta", "gender" = "get.gender", - "custom.event.timestamps" = "get.custom.event.timestamps", - "commit.interactions" = "get.commit.interactions" + "commit.interactions" = "get.commit.interactions", + "custom.event.timestamps" = "get.custom.event.timestamps" ) #' Applies a function to list keys @@ -125,7 +125,8 @@ CONF.PARAMETERS.NO.RESET.ENVIRONMENT = c("commit.messages", "issues.locked", "mails.locked", "custom.event.timestamps", - "custom.event.timestamps.locked") + "custom.event.timestamps.locked", + "commit.interactions") ## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / @@ -164,7 +165,7 @@ ProjectData = R6::R6Class("ProjectData", commits = create.empty.commits.list(), # data.frame commits.unfiltered = create.empty.commits.list(), # data.frame commit.messages = create.empty.commit.message.list(), # data.frame - commit.interactions = create.empty.commit.interaction.list(), + commit.interactions = create.empty.commit.interaction.list(), # data.frame ## mails mails.unfiltered = create.empty.mails.list(), # data.frame mails = create.empty.mails.list(), # data.frame @@ -414,46 +415,49 @@ ProjectData = R6::R6Class("ProjectData", #' #' This method should be called whenever the field \code{commit.interactions} is changed. update.commit.interactions = function() { - if (!self$is.data.source.cached("commits.unfiltered")) { - self$get.commits() - } + if (self$is.data.source.cached("commit.interactions")) { + if (!self$is.data.source.cached("commits.unfiltered")) { + self$get.commits() + } - ## remove existing columns named 'base.author' and 'interaction.author' - indices.to.remove = which("base.author" == colnames(private$commit.interactions)) - if (length(indices.to.remove) > 0) { - private$commit.interactions = private$commit.interactions[, -indices.to.remove] - } - indices.to.remove = which("interacting.author" == colnames(private$commit.interactions)) - if (length(indices.to.remove) > 0) { - private$commit.interactions = private$commit.interactions[, -indices.to.remove] - } + ## remove existing columns named 'base.author' and 'interaction.author' + indices.to.remove = which("base.author" == colnames(private$commit.interactions)) + if (length(indices.to.remove) > 0) { + private$commit.interactions = private$commit.interactions[, -indices.to.remove] + } + indices.to.remove = which("interacting.author" == colnames(private$commit.interactions)) + if (length(indices.to.remove) > 0) { + private$commit.interactions = private$commit.interactions[, -indices.to.remove] + } - ## get relevant data from commits - commit.data.subset = data.frame(hash = private$commits.unfiltered[["hash"]], - author.name = private$commits.unfiltered[["author.name"]]) - commit.data.subset = commit.data.subset[!duplicated(commit.data.subset[["hash"]]),] + ## get relevant data from commits + commit.data.subset = data.frame(hash = private$commits.unfiltered[["hash"]], + author.name = private$commits.unfiltered[["author.name"]]) + commit.data.subset = commit.data.subset[!duplicated(commit.data.subset[["hash"]]),] - ## merge commit interactions with commits and change colnames to avoid duplicates - commit.interaction.data = merge(private$commit.interactions, commit.data.subset, - by.x = "base.hash", by.y = "hash", all.x = TRUE) + ## merge commit interactions with commits and change colnames to avoid duplicates + commit.interaction.data = merge(private$commit.interactions, commit.data.subset, + by.x = "base.hash", by.y = "hash", all.x = TRUE) - author.index = match("author.name", colnames(commit.interaction.data)) - colnames(commit.interaction.data)[[author.index]] = "base.author" + author.index = match("author.name", colnames(commit.interaction.data)) + colnames(commit.interaction.data)[[author.index]] = "base.author" - commit.interaction.data = merge(commit.interaction.data, commit.data.subset, - by.x = "commit.hash", by.y = "hash", all.x = TRUE) + commit.interaction.data = merge(commit.interaction.data, commit.data.subset, + by.x = "commit.hash", by.y = "hash", all.x = TRUE) - author.index = match("author.name", colnames(commit.interaction.data)) - colnames(commit.interaction.data)[[author.index]] = "interacting.author" + author.index = match("author.name", colnames(commit.interaction.data)) + colnames(commit.interaction.data)[[author.index]] = "interacting.author" - ## warning if we have interactions without authors - if (anyNA(commit.interaction.data[["base.author"]]) || - anyNA(commit.interaction.data[["interacting.author"]])) { - logging::logwarn("There are authors in the commit-interactions that are not in the commit data! - This results in the commit-interactions having empty entries. - To clean up these entries, call cleanup.commit.interactions.") + ## warning if we have interactions without authors + if (anyNA(commit.interaction.data[["base.author"]]) || + anyNA(commit.interaction.data[["interacting.author"]])) { + logging::logwarn("There are commits in the commit-interactions that are not in + the commit data, possibly due to incomplete commit data or deleted users. + This results in the commit-interactions having empty entries. + To clean up these entries, call cleanup.commit.interactions.") + } + private$commit.interactions = commit.interaction.data } - private$commit.interactions = commit.interaction.data }, ## * * Gender data -------------------------------------------------- @@ -858,6 +862,7 @@ ProjectData = R6::R6Class("ProjectData", private$pasta.commits = create.empty.pasta.list() private$gender = create.empty.gender.list() private$synchronicity = create.empty.synchronicity.list() + private$commit.interactions = create.empty.commit.interaction.list() }, ## * * configuration ----------------------------------------------- @@ -1258,19 +1263,26 @@ ProjectData = R6::R6Class("ProjectData", get.commit.interactions = function(data.path = NULL) { logging::loginfo("Getting commit interactions.") - ## if the commit-interaction data have not yet been read do this - if (!self$is.data.source.cached("commit.interactions")) { - if (is.null(data.path)) { - commit.interaction.data = read.commit.interactions(self$get.data.path()) - } else { - commit.interaction.data = read.commit.interactions(data.path) - } + ## if commit-interaction data are to be read, do this + if (private$project.conf$get.value("commit.interactions")) { + ## if the commit-interaction data have not yet been read do this + if (!self$is.data.source.cached("commit.interactions")) { + if (is.null(data.path)) { + commit.interaction.data = read.commit.interactions(self$get.data.path()) + } else { + commit.interaction.data = read.commit.interactions(data.path) + } - ## cache the result - private$commit.interactions = commit.interaction.data - private$update.commit.interactions() + ## cache the result + private$commit.interactions = commit.interaction.data + private$update.commit.interactions() + } + } else { + logging::logwarn("You have not set the ProjectConf parameter + 'commit.interactions' to 'TRUE'! Ignoring...") + ## mark commit-interaction data as empty + private$commit.interactions = NULL } - return(private$commit.interactions) }, @@ -1291,9 +1303,10 @@ ProjectData = R6::R6Class("ProjectData", private$commit.interactions = data }, - #' Remove lines in the commit-interactions data that do not contain authors. - #' This should only be called AFTER 'update.commit.interactions' has already been called, as otherwise - #' all commit-interactions data will be removed + #' Remove lines in the commit-interaction data for which the corresponding commit is missing in the + #' commit data, indicated by a missing author in the commit-interaction data. + #' This should only be called AFTER \code{update.commit.interactions} has already been called, as otherwise + #' all commit-interactions data will be removed. cleanup.commit.interactions = function() { logging::loginfo("Cleaning up commit-interactions") @@ -1879,8 +1892,8 @@ ProjectData = R6::R6Class("ProjectData", "commit.messages" = "commit.messages", "synchronicity" = "synchronicity", "pasta" = "pasta", - "custom.event.timestamps" = "custom.event.timestamps", - "commit.interactions" = "commit.interactions" + "commit.interactions" = "commit.interactions", + "custom.event.timestamps" = "custom.event.timestamps" ) ) sources = self$get.cached.data.sources.internal(source.type) @@ -1912,7 +1925,7 @@ ProjectData = R6::R6Class("ProjectData", ## define the data sources unfiltered.data.sources = c("commits.unfiltered", "mails.unfiltered", "issues.unfiltered") additional.data.sources = c("authors", "commit.messages", "synchronicity", "pasta", - "gender", "custom.event.timestamps", "commit.interactions") + "gender", "commit.interactions", "custom.event.timestamps") main.data.sources = c("issues", "commits", "mails") ## set the right data sources to look for according to the argument diff --git a/util-networks.R b/util-networks.R index d1a5eb62..d957257c 100644 --- a/util-networks.R +++ b/util-networks.R @@ -138,7 +138,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", callgraph = private$proj.data$get.project.conf.entry("artifact.codeface"), mail = "MailThread", issue = "Issue", - interaction = private$proj.data$get.project.conf.entry("artifact.codeface") + commit.interaction = private$proj.data$get.project.conf.entry("artifact.codeface") ) return(vertex.kind) @@ -377,7 +377,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", return(artifacts.net) }, - #' Build and get the the commit-interaction based artifact network. + #' Build and get the commit-interaction based artifact network. #' #' @return the commit-interaction based artifact network get.artifact.network.commit.interaction = function() { @@ -397,7 +397,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", edges = edges[, c("file", "base.file", "func", "commit.hash", "base.hash", "base.func", "base.author", "interacting.author")] - colnames(edges)[4] = "hash" + colnames(edges)[colnames(edges)=="commit.hash"] = "hash" } else if (proj.conf.artifact == "function") { ## change the vertices to the functions from the commit-interaction data vertices = unique(c(private$proj.data$get.commit.interactions()[["base.func"]], @@ -406,12 +406,12 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", edges = edges[, c("func", "base.func", "commit.hash", "file", "base.hash", "base.file", "base.author", "interacting.author")] - colnames(edges)[3] = "hash" + colnames(edges)[colnames(edges)=="commit.hash"] = "hash" } else { ## If neither 'function' nor 'file' was configured, send a warning ## and return an empty network logging::logwarn("when creating a commit-interaction artifact network, - the artifact relation should be either 'file' or 'function'!") + the artifact should be either 'file' or 'function'!") return(create.empty.network(directed = private$network.conf$get.value("artifact.directed"))) } colnames(edges)[1] = "to" @@ -826,7 +826,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", network = switch( relation, cochange = private$get.author.network.cochange(), - interaction = private$get.author.network.commit.interaction(), + commit.interaction = private$get.author.network.commit.interaction(), mail = private$get.author.network.mail(), issue = private$get.author.network.issue(), stop(sprintf("The author relation '%s' does not exist.", rel)) @@ -894,7 +894,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", callgraph = private$get.artifact.network.callgraph(), mail = private$get.artifact.network.mail(), issue = private$get.artifact.network.issue(), - interaction = private$get.artifact.network.commit.interaction(), + commit.interaction = private$get.artifact.network.commit.interaction(), stop(sprintf("The artifact relation '%s' does not exist.", relation)) ) diff --git a/util-read.R b/util-read.R index 016da771..5443d36f 100644 --- a/util-read.R +++ b/util-read.R @@ -44,6 +44,8 @@ requireNamespace("digest") # for sha1 hashing of IDs requireNamespace("sqldf") # for SQL-selections on data.frames requireNamespace("data.table") # for faster data.frame processing requireNamespace("yaml") # for reading commit interaction data +requireNamespace("fastmap") # for fast implementation of a map +requireNamespace("purrr") # for fast mapping function ## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / ## Helper functions -------------------------------------------------------- @@ -866,7 +868,6 @@ COMMIT.INTERACTION.LIST.DATA.TYPES = c( #' commit (hash) gets mapped to all commits it interacts with and the file/function because of #' which they interact. #' -#' #' @param data.path the path to the commit-interaction data #' #' @return the read and parsed commit-interaction data @@ -888,27 +889,31 @@ read.commit.interactions = function(data.path = NULL) { } ## extract the top level list of the yaml file which is called 'result-map' - result.map = commit.interaction.base$`result-map` + result.map = commit.interaction.base[["result-map"]] ## extract a mapping of functions to files to be able to determine what file the current interaction is ## based on + ## 1) create an empty map file.name.map = fastmap::fastmap() + ## 2) create a mapping between functions and files as a list function.file.list = purrr::map(result.map, 2) + ## 3) set the map using the list file.name.map$mset(.list = function.file.list) list.names = names(result.map) ## build the result dataframe by iterating over the 'result-map' list - commit.interaction.data = data.table::setDF(data.table::rbindlist(parallel::mcmapply(result.map, list.names, - SIMPLIFY = FALSE, - FUN = function(current.interaction, - function.name) { + commit.interaction.data = data.table::setDF(data.table::rbindlist( + parallel::mcmapply(result.map, + list.names, + SIMPLIFY = FALSE, + FUN = function(current.interaction, function.name) { ## get all commits that interact with the current one insts = current.interaction[[4]] interactions = data.table::setDF(data.table::rbindlist(lapply(insts, function(current.inst) { - base.hash = current.inst[[1]]$`commit` + base.hash = current.inst[[1]][["commit"]] interacting.hashes = current.inst[[2]] interacting.hashes.df = data.table::setDF(data.table::rbindlist(lapply(interacting.hashes, function(hash) { - ## if there is no function name in the current interaction we set the function name to 'GLOBAL' + ## if there is no function name in the current interaction, we set the function name to 'GLOBAL' ## as this is most likely code outside of functions, else we set the function name if (!"function" %in% names(hash)) { return(data.frame(func = "GLOBAL", commit.hash = hash[["commit"]], file = "GLOBAL")) @@ -921,11 +926,13 @@ read.commit.interactions = function(data.path = NULL) { file = file.name.map$get(hash[["function"]]))) } }))) - interacting.hashes.df$base.hash = base.hash - interacting.hashes.df$base.func = function.name - interacting.hashes.df$base.file = file.name.map$get(function.name) + interacting.hashes.df[["base.hash"]] = base.hash + interacting.hashes.df[["base.func"]] = function.name + interacting.hashes.df[["base.file"]] = file.name.map$get(function.name) return(interacting.hashes.df) }))) + ## Initialize author data as 'NA', since it is not available from the commit-interaction data. + ## Author data will be merged from commit data in \code{update.commit.interactions}. interactions["base.author"] = NA_character_ interactions["interacting.author"] = NA_character_ return(interactions) From 13359651629fbe4ab8c5eb20583fde4e420e12eb Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Fri, 5 Apr 2024 14:03:45 +0200 Subject: [PATCH 030/130] Add global variable and change function names Also add more available edge attributes to network construction Signed-off-by: Leo Sendelbach --- util-data.R | 8 +++++++- util-networks.R | 6 ++++-- util-read.R | 19 +++++++++++++------ 3 files changed, 24 insertions(+), 9 deletions(-) diff --git a/util-data.R b/util-data.R index ab4a4988..0443b183 100644 --- a/util-data.R +++ b/util-data.R @@ -1962,7 +1962,8 @@ ProjectData = R6::R6Class("ProjectData", #' \code{"commits"}, and \code{"issues"}. [default: "commits"] #' #' @return a named list of data classes, with the corresponding data columns as names - get.data.columns.for.data.source = function(data.source = c("commits", "mails", "issues")) { + get.data.columns.for.data.source = function(data.source = c("commits", "mails", + "issues", "commit.interactions")) { ## check arguments data.source = match.arg(arg = data.source, several.ok = FALSE) @@ -1970,6 +1971,11 @@ ProjectData = R6::R6Class("ProjectData", ## get the needed data method first data.fun = DATASOURCE.TO.ARTIFACT.FUNCTION[[data.source]] + ## if 'data.fun' is NULL, check 'DATASOURCE.TO.ADDITIONAL.ARTIFACT.FUNCTION' + if (is.null(data.fun)) { + data.fun = DATASOURCE.TO.ADDITIONAL.ARTIFACT.FUNCTION[[data.source]] + } + ## get the column classes with corresponding names columns = lapply(self[[data.fun]](), class) diff --git a/util-networks.R b/util-networks.R index d957257c..bfe7a998 100644 --- a/util-networks.R +++ b/util-networks.R @@ -252,7 +252,8 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", author.net.data[["edges"]], network.conf = private$network.conf, directed = private$network.conf$get.value("author.directed"), - available.edge.attributes = list(hash = "character") + available.edge.attributes = private$proj.data$ + get.data.columns.for.data.source("commit.interactions") ) return(author.net) }, @@ -423,7 +424,8 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", artifact.net.data[["edges"]], network.conf = private$network.conf, directed = private$network.conf$get.value("artifact.directed"), - available.edge.attributes = list(hash = "character") + available.edge.attributes = private$proj.data$ + get.data.columns.for.data.source("commit.interactions") ) return(artifact.net) }, diff --git a/util-read.R b/util-read.R index 5443d36f..fa0bbff1 100644 --- a/util-read.R +++ b/util-read.R @@ -863,6 +863,8 @@ COMMIT.INTERACTION.LIST.DATA.TYPES = c( "character", "character" ) +COMMIT.INTERACTION.GLOBAL.FILE.FUNCTION.NAME = "GLOBAL" + #' Read and parse the commit-interaction data. This data is present in a `.yaml` file which #' needs to be broken down. Within the yaml file, there are different lists in which each #' commit (hash) gets mapped to all commits it interacts with and the file/function because of @@ -895,7 +897,8 @@ read.commit.interactions = function(data.path = NULL) { ## based on ## 1) create an empty map file.name.map = fastmap::fastmap() - ## 2) create a mapping between functions and files as a list + ## 2) create a mapping between functions and files as a named list + ## which can be directly converted to a map function.file.list = purrr::map(result.map, 2) ## 3) set the map using the list file.name.map$mset(.list = function.file.list) @@ -916,19 +919,23 @@ read.commit.interactions = function(data.path = NULL) { ## if there is no function name in the current interaction, we set the function name to 'GLOBAL' ## as this is most likely code outside of functions, else we set the function name if (!"function" %in% names(hash)) { - return(data.frame(func = "GLOBAL", commit.hash = hash[["commit"]], file = "GLOBAL")) + return(data.frame(func = COMMIT.INTERACTION.GLOBAL.FILE.FUNCTION.NAME, + commit.hash = hash[["commit"]], + file = COMMIT.INTERACTION.GLOBAL.FILE.FUNCTION.NAME)) } else if (is.null(file.name.map$get(hash[["function"]]))) { ## This case should never occur if the data was generated correctly! warning("An interacting hash specifies a function that does not exist in the data!") return(data.frame(matrix(nrow = 3, ncol = 0))) } else { - return(data.frame(func = hash[["function"]], commit.hash = hash[["commit"]], - file = file.name.map$get(hash[["function"]]))) + file.name = file.name.map$get(hash[["function"]]) + func.name = paste(file.name, hash[("function")], sep = "::") + return(data.frame(func = func.name, commit.hash = hash[["commit"]], file = file.name)) } }))) + base.file.name = file.name.map$get(function.name) interacting.hashes.df[["base.hash"]] = base.hash - interacting.hashes.df[["base.func"]] = function.name - interacting.hashes.df[["base.file"]] = file.name.map$get(function.name) + interacting.hashes.df[["base.func"]] = paste(base.file.name, function.name, sep = "::") + interacting.hashes.df[["base.file"]] = base.file.name return(interacting.hashes.df) }))) ## Initialize author data as 'NA', since it is not available from the commit-interaction data. From 8ce1f0705ee2d04ed72a8e564e1896db21163b52 Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Fri, 5 Apr 2024 14:07:32 +0200 Subject: [PATCH 031/130] Change tests to match new function names Also use patrick to test for directedness Signed-off-by: Leo Sendelbach --- tests/test-data.R | 5 +++-- tests/test-networks-artifact.R | 36 ++++++++++++++++++++++------------ tests/test-networks-author.R | 17 ++++++++++------ tests/test-read.R | 5 +++-- 4 files changed, 40 insertions(+), 23 deletions(-) diff --git a/tests/test-data.R b/tests/test-data.R index 98456846..e4bea8c4 100644 --- a/tests/test-data.R +++ b/tests/test-data.R @@ -581,9 +581,10 @@ test_that("Compare two ProjectData Objects with commit.interactions", { "0a1a5c523d835459c42f33e863623138555e2526", "1143db502761379c2bfcecc2007fc34282e7ee61", "0a1a5c523d835459c42f33e863623138555e2526") - commit.interactions.data.expected[["func"]] = c("GLOBAL", "test2", "GLOBAL", "test2") + commit.interactions.data.expected[["func"]] = c("GLOBAL", "test2.c::test2", "GLOBAL", "test2.c::test2") commit.interactions.data.expected[["file"]] = c("GLOBAL", "test2.c", "GLOBAL", "test2.c") - commit.interactions.data.expected[["base.func"]] = c("test2", "test2", "test_function", "test2") + commit.interactions.data.expected[["base.func"]] = c("test2.c::test2", "test2.c::test2", + "test3.c::test_function", "test2.c::test2") commit.interactions.data.expected[["base.file"]] = c("test2.c", "test2.c", "test3.c", "test2.c") expect_equal(proj.data.two$get.commit.interactions(), commit.interactions.data.expected) diff --git a/tests/test-networks-artifact.R b/tests/test-networks-artifact.R index e52dd973..122c96ee 100644 --- a/tests/test-networks-artifact.R +++ b/tests/test-networks-artifact.R @@ -213,7 +213,7 @@ patrick::with_parameters_test_that("Network construction of an empty 'comments-o "directed: TRUE" = list(test.directed = TRUE) )) -test_that("Network construction with commit-interactions as relation, artifact type 'file'", { +patrick::with_parameters_test_that("Network construction with commit-interactions as relation, artifact type 'file'", { ## configuration object for the datapath proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, "file") proj.conf$update.value("commit.interactions", TRUE) @@ -222,7 +222,8 @@ test_that("Network construction with commit-interactions as relation, artifact t proj.data = ProjectData$new(project.conf = proj.conf) net.conf = NetworkConf$new() - net.conf$update.value("artifact.relation", "commit.interaction") + net.conf$update.values(updated.values = list(artifact.relation = "commit.interaction", + artifact.directed = test.directed)) network.builder = NetworkBuilder$new(project.data = proj.data, network.conf = net.conf) network.built = network.builder$get.artifact.network() @@ -235,7 +236,7 @@ test_that("Network construction with commit-interactions as relation, artifact t edges = data.frame( from = c("GLOBAL", "test2.c", "GLOBAL", "test2.c"), to = c("test2.c", "test2.c", "test3.c", "test2.c"), - func = c("GLOBAL", "test2", "GLOBAL", "test2"), + func = c("GLOBAL", "test2.c::test2", "GLOBAL", "test2.c::test2"), hash = c("0a1a5c523d835459c42f33e863623138555e2526", "418d1dc4929ad1df251d2aeb833dd45757b04a6f", "5a5ec9675e98187e1e92561e1888aa6f04faa338", @@ -244,19 +245,23 @@ test_that("Network construction with commit-interactions as relation, artifact t "0a1a5c523d835459c42f33e863623138555e2526", "1143db502761379c2bfcecc2007fc34282e7ee61", "0a1a5c523d835459c42f33e863623138555e2526"), - base.func = c("test2", "test2", "test_function", "test2"), + base.func = c("test2.c::test2", "test2.c::test2", + "test3.c::test_function", "test2.c::test2"), base.author = c("Olaf", "Thomas", "Karl", "Thomas"), interacting.author = c("Thomas", "Karl", "Olaf", "Thomas"), weight = c(1, 1, 1, 1), type = c(TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA), relation = c("commit.interaction", "commit.interaction", "commit.interaction", "commit.interaction") ) - network = igraph::graph.data.frame(edges, directed = FALSE, vertices = vertices) + network = igraph::graph.data.frame(edges, directed = test.directed, vertices = vertices) expect_true(igraph::identical_graphs(network.built, network)) -}) +}, patrick::cases( + "directed: FALSE" = list(test.directed = FALSE), + "directed: TRUE" = list(test.directed = TRUE) +)) -test_that("Network construction with commit-interactions as relation, artifact type 'function'", { +patrick::with_parameters_test_that("Network construction with commit-interactions as relation, artifact type 'function'", { ## configuration object for the datapath proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, "function") proj.conf$update.value("commit.interactions", TRUE) @@ -265,19 +270,21 @@ test_that("Network construction with commit-interactions as relation, artifact t proj.data = ProjectData$new(project.conf = proj.conf) net.conf = NetworkConf$new() - net.conf$update.value("artifact.relation", "commit.interaction") + net.conf$update.values(updated.values = list(artifact.relation = "commit.interaction", + artifact.directed = test.directed)) network.builder = NetworkBuilder$new(project.data = proj.data, network.conf = net.conf) network.built = network.builder$get.artifact.network() ## build the expected network vertices = data.frame( - name = c("test2", "test_function", "GLOBAL"), + name = c("test2.c::test2", "test3.c::test_function", "GLOBAL"), kind = "Function", type = TYPE.ARTIFACT ) edges = data.frame( - from = c("GLOBAL", "test2", "GLOBAL", "test2"), - to = c("test2", "test2", "test_function", "test2"), + from = c("GLOBAL", "test2.c::test2", "GLOBAL", "test2.c::test2"), + to = c("test2.c::test2", "test2.c::test2", + "test3.c::test_function", "test2.c::test2"), hash = c("0a1a5c523d835459c42f33e863623138555e2526", "418d1dc4929ad1df251d2aeb833dd45757b04a6f", "5a5ec9675e98187e1e92561e1888aa6f04faa338", @@ -294,7 +301,10 @@ test_that("Network construction with commit-interactions as relation, artifact t type = c(TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA), relation = c("commit.interaction", "commit.interaction", "commit.interaction", "commit.interaction") ) - network = igraph::graph.data.frame(edges, directed = FALSE, vertices = vertices) + network = igraph::graph.data.frame(edges, directed = test.directed, vertices = vertices) expect_true(igraph::identical_graphs(network.built, network)) -}) +}, patrick::cases( + "directed: FALSE" = list(test.directed = FALSE), + "directed: TRUE" = list(test.directed = TRUE) +)) diff --git a/tests/test-networks-author.R b/tests/test-networks-author.R index 4f580ef2..9ffa3472 100644 --- a/tests/test-networks-author.R +++ b/tests/test-networks-author.R @@ -679,7 +679,7 @@ test_that("Network construction with only untracked files (no edges expected)", expect_true(igraph::identical_graphs(network.built, network.expected)) }) -test_that("Network construction with commit-interactions as relation", { +patrick::with_parameters_test_that("Network construction with commit-interactions as relation", { ## configuration object for the datapath proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, "file") proj.conf$update.value("commit.interactions", TRUE) @@ -688,7 +688,8 @@ test_that("Network construction with commit-interactions as relation", { proj.data = ProjectData$new(project.conf = proj.conf) net.conf = NetworkConf$new() - net.conf$update.value("author.relation", "commit.interaction") + net.conf$update.values(updated.values = list(author.relation = "commit.interaction", + author.directed = test.directed)) network.builder = NetworkBuilder$new(project.data = proj.data, network.conf = net.conf) network.built = network.builder$get.author.network() @@ -702,7 +703,7 @@ test_that("Network construction with commit-interactions as relation", { edges = data.frame( from = c("Olaf", "Thomas", "Karl", "Thomas"), to = c("Thomas", "Karl", "Olaf", "Thomas"), - func = c("GLOBAL", "test2", "GLOBAL", "test2"), + func = c("GLOBAL", "test2.c::test2", "GLOBAL", "test2.c::test2"), hash = c("0a1a5c523d835459c42f33e863623138555e2526", "418d1dc4929ad1df251d2aeb833dd45757b04a6f", "5a5ec9675e98187e1e92561e1888aa6f04faa338", @@ -712,13 +713,17 @@ test_that("Network construction with commit-interactions as relation", { "0a1a5c523d835459c42f33e863623138555e2526", "1143db502761379c2bfcecc2007fc34282e7ee61", "0a1a5c523d835459c42f33e863623138555e2526"), - base.func = c("test2", "test2", "test_function", "test2"), + base.func = c("test2.c::test2", "test2.c::test2", + "test3.c::test_function", "test2.c::test2"), base.file = c("test2.c", "test2.c", "test3.c", "test2.c"), weight = c(1, 1, 1, 1), type = c(TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA), relation = c("commit.interaction", "commit.interaction", "commit.interaction", "commit.interaction") ) - network = igraph::graph.data.frame(edges, directed = FALSE, vertices = vertices) + network = igraph::graph.data.frame(edges, directed = test.directed, vertices = vertices) expect_true(igraph::identical_graphs(network.built, network)) -}) \ No newline at end of file +}, patrick::cases( + "directed: FALSE" = list(test.directed = FALSE), + "directed: TRUE" = list(test.directed = TRUE) +)) \ No newline at end of file diff --git a/tests/test-read.R b/tests/test-read.R index bafafe12..58c9bd3c 100644 --- a/tests/test-read.R +++ b/tests/test-read.R @@ -526,9 +526,10 @@ test_that("Read the commit-interactions data.", { "3a0ed78458b3976243db6829f63eba3eead26774", "0a1a5c523d835459c42f33e863623138555e2526", "0a1a5c523d835459c42f33e863623138555e2526") - commit.interactions.data.expected[["func"]] = c("GLOBAL", "GLOBAL", "test2", "test2") + commit.interactions.data.expected[["func"]] = c("GLOBAL", "GLOBAL", "test2.c::test2", "test2.c::test2") commit.interactions.data.expected[["file"]] = c("GLOBAL", "GLOBAL", "test2.c", "test2.c") - commit.interactions.data.expected[["base.func"]] = c("test_function", "test2", "test2", "test2") + commit.interactions.data.expected[["base.func"]] = c("test3.c::test_function", "test2.c::test2", + "test2.c::test2", "test2.c::test2") commit.interactions.data.expected[["base.file"]] = c("test3.c", "test2.c", "test2.c", "test2.c") ## check the results expect_identical(commit.interactions.data.read, commit.interactions.data.expected, From 7c92b723056e1851d88791bf8c10ece44a474f27 Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Wed, 10 Apr 2024 12:38:16 +0200 Subject: [PATCH 032/130] Fix typos and change data frame access Read method now exclusively uses names to access data frame Signed-off-by: Leo Sendelbach --- README.md | 4 ++-- tests/README.md | 2 +- util-networks.R | 12 ++++++------ util-read.R | 10 +++++----- 4 files changed, 14 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index 0595555d..3fb9d44f 100644 --- a/README.md +++ b/README.md @@ -268,7 +268,7 @@ Relations determine which information is used to construct edges among the verti - `commit.interaction` * For author networks (configured via `author.relation` in the [`NetworkConf`](#networkconf)), authors who contribute to interacting commits are connected with an edge. - * For artifact networks (configured via `artifact.relation` in the [`NetworkConf`](#networkconf)), artifacts are connected when there is a interacting between two commits that occur in the artifacts. + * For artifact networks (configured via `artifact.relation` in the [`NetworkConf`](#networkconf)), artifacts are connected when there is an interaction between two commits that occur in the artifacts. * This relation does not apply for bipartite networks. #### Edge-construction algorithms for author networks @@ -605,7 +605,7 @@ There is no way to update the entries, except for the revision-based parameters. * Lock custom event timestamps to prevent them from being read if empty or not yet present when calling the getter. * [`TRUE`, *`FALSE`*] - `commit.interactions`: - * Alloow construction of author and artifact networks using commit interaction data + * Allow construction of author and artifact networks using commit-interaction data * [`TRUE`, *`FALSE`*] ### NetworkConf diff --git a/tests/README.md b/tests/README.md index cfe453fb..b6558dc1 100644 --- a/tests/README.md +++ b/tests/README.md @@ -16,7 +16,7 @@ We have two test projects you can use when writing your tests: * Commit messages * Pasta * Synchronicity - * Commit Interactions + * Commit interactions * Custom event timestamps in `custom-events.list` * Revisions 2. - Casestudy: `test_empty` diff --git a/util-networks.R b/util-networks.R index bfe7a998..aa9511b2 100644 --- a/util-networks.R +++ b/util-networks.R @@ -134,10 +134,10 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", get.vertex.kind.for.relation = function(relation) { vertex.kind = switch(relation, - cochange = private$proj.data$get.project.conf.entry("artifact.codeface"), - callgraph = private$proj.data$get.project.conf.entry("artifact.codeface"), - mail = "MailThread", - issue = "Issue", + cochange = private$proj.data$get.project.conf.entry("artifact.codeface"), + callgraph = private$proj.data$get.project.conf.entry("artifact.codeface"), + mail = "MailThread", + issue = "Issue", commit.interaction = private$proj.data$get.project.conf.entry("artifact.codeface") ) @@ -398,7 +398,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", edges = edges[, c("file", "base.file", "func", "commit.hash", "base.hash", "base.func", "base.author", "interacting.author")] - colnames(edges)[colnames(edges)=="commit.hash"] = "hash" + colnames(edges)[colnames(edges) == "commit.hash"] = "hash" } else if (proj.conf.artifact == "function") { ## change the vertices to the functions from the commit-interaction data vertices = unique(c(private$proj.data$get.commit.interactions()[["base.func"]], @@ -407,7 +407,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", edges = edges[, c("func", "base.func", "commit.hash", "file", "base.hash", "base.file", "base.author", "interacting.author")] - colnames(edges)[colnames(edges)=="commit.hash"] = "hash" + colnames(edges)[colnames(edges) == "commit.hash"] = "hash" } else { ## If neither 'function' nor 'file' was configured, send a warning ## and return an empty network diff --git a/util-read.R b/util-read.R index fa0bbff1..892c59cd 100644 --- a/util-read.R +++ b/util-read.R @@ -898,8 +898,8 @@ read.commit.interactions = function(data.path = NULL) { ## 1) create an empty map file.name.map = fastmap::fastmap() ## 2) create a mapping between functions and files as a named list - ## which can be directly converted to a map - function.file.list = purrr::map(result.map, 2) + ## which can be directly converted to a map + function.file.list = purrr::map(result.map, "file") ## 3) set the map using the list file.name.map$mset(.list = function.file.list) list.names = names(result.map) @@ -911,10 +911,10 @@ read.commit.interactions = function(data.path = NULL) { SIMPLIFY = FALSE, FUN = function(current.interaction, function.name) { ## get all commits that interact with the current one - insts = current.interaction[[4]] + insts = current.interaction[["insts"]] interactions = data.table::setDF(data.table::rbindlist(lapply(insts, function(current.inst) { - base.hash = current.inst[[1]][["commit"]] - interacting.hashes = current.inst[[2]] + base.hash = current.inst[["base-hash"]][["commit"]] + interacting.hashes = current.inst[["interacting-hashes"]] interacting.hashes.df = data.table::setDF(data.table::rbindlist(lapply(interacting.hashes, function(hash) { ## if there is no function name in the current interaction, we set the function name to 'GLOBAL' ## as this is most likely code outside of functions, else we set the function name From bc4938675a4b66f17de332eb2b04b66591d54003 Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Wed, 10 Apr 2024 13:10:19 +0200 Subject: [PATCH 033/130] Change NEWS.md with new commit hashes after rebase Also remove to points from it as per @bockthom's suggestions Signed-off-by: Leo Sendelbach --- NEWS.md | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/NEWS.md b/NEWS.md index 446761a5..2b28ff65 100644 --- a/NEWS.md +++ b/NEWS.md @@ -6,9 +6,8 @@ ### Added -- Add commit-interaction data and add functions `read.commit.interactions` for reading, as well as `get.commit.interactions`, `set.commit.interactions` and utility functions for working with commit-interaction data (PR #252, 5da0e60e029bdf427520be440fedb0f71d9f7a15, 7792a4e9a087c042a3ef9b7f79a67490305ce85e, 178265dcc69abc0d6e430dfcbc4b87e7565ce615, 80e6ac5f24e6d0248e77be391f93a59b6b17862d, 1ffa607bbe400bd212388dc543263ba5bec4e34c) -- Add commit-interaction networks that can be created with `create.author.network` and `create.artifact.network` if the `artifact.relation` and `author.relation` is configured to be `interaction` (PR #252, 5da0e60e029bdf427520be440fedb0f71d9f7a15, deddd4ce9d2a570ea57088ea73d4312f81e73049, 0e269af77bc098f2d3157fac349d2032efd6cf49, d96b10b45ec55cdf2dd02c60833d4116358d6d31) -- Add tests for new commit-interaction functionality (PR #252, 3e5b8962e18c3dde45085fa764c9d084327e2773, 7685ec4745bd43fba7a373bf5544f41bff346ed9, b291cb338e1b3896c8fd9769f45c515bddb8cf48, eea1b053350094084bab957975e1b306e6c9dc23, 3d4a521e47dc81aaae8ae01ff78ca8d514bb7d85, 05ea1ce1c3330f3fb8fb28ccbc08b85fbd4ec2c8, 99103f27ad0c8ee1bd62cdcee10778a98020db70, fd6064a83a7735020ad5250d092e266af5bbada0) +- Add commit-interaction data and add functions `read.commit.interactions` for reading, as well as `get.commit.interactions`, `set.commit.interactions` and utility functions for working with commit-interaction data (PR #252, d82857fbebd1111bb16588a4223bb24a8dcd07de, b4fd2a29c9b5fd561b1106c6febb54a32b0085ab, fd0aa05f824b93545ae8e05833b95b3bd9809286) as well as tests for these features (PR #252, eeba7e29932bc973513c963fb9e716e9230d570f, 8bb39f4df39b49dfaff8f19feb6db5e5fbd81fac, 54b6f655248720436af116fe72521f9cb0348429, 7a5497aaf9114017d1b3b9b68b6cccd7ca8ac114, 7b8585f87675795822c07230192d6454de31dcc7, ef725407bf8818c8fff96ea6f343338b7162cbe0) +- Add commit-interaction networks that can be created with `create.author.network` and `create.artifact.network` if the `artifact.relation` and `author.relation` is configured to be `commit.interaction` (PR #252, d82857fbebd1111bb16588a4223bb24a8dcd07de, 329d97ec3de36a9e1bcadc0c7a53c1d92e8b481c) as well as tests for these features (PR #252, 07e7ed744209b0251217fa8f7f35d9b9875face2, 7068cfa10d993dcae3f5e3f76f8cafa99fa8b350) ### Changed/Improved From bca35760eb0aac86c04923f2d534b2d8cece204e Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Thu, 18 Apr 2024 13:29:50 +0200 Subject: [PATCH 034/130] Add Configuration for filtering commit interactions Filtering happens in 'get.commit.interactions' if 'filter.commit.interactions' is TRUE, as it is per default. Signed-off-by: Leo Sendelbach --- README.md | 3 +++ tests/test-data.R | 5 +++++ tests/test-networks-artifact.R | 2 ++ tests/test-networks-author.R | 1 + util-conf.R | 6 ++++++ util-data.R | 5 +++++ 6 files changed, 22 insertions(+) diff --git a/README.md b/README.md index 3fb9d44f..1c6e78fc 100644 --- a/README.md +++ b/README.md @@ -607,6 +607,9 @@ There is no way to update the entries, except for the revision-based parameters. - `commit.interactions`: * Allow construction of author and artifact networks using commit-interaction data * [`TRUE`, *`FALSE`*] +- `filter.commit.interactions`: + * Filter out entries from commit interaction data that are not matched to a specific function or file + * [*`TRUE`*, `FALSE`] ### NetworkConf diff --git a/tests/test-data.R b/tests/test-data.R index e4bea8c4..893661fb 100644 --- a/tests/test-data.R +++ b/tests/test-data.R @@ -526,6 +526,7 @@ test_that("Compare two ProjectData Objects with commit.interactions", { proj.conf$update.value("commit.interactions", TRUE) proj.conf$update.value("commits.filter.untracked.files", FALSE) proj.conf$update.value("commits.filter.base.artifact", FALSE) + proj.conf$update.value("filter.commit.interactions", FALSE) proj.data.one = ProjectData$new(project.conf = proj.conf) proj.data.two = proj.data.one$clone(deep = TRUE) @@ -588,4 +589,8 @@ test_that("Compare two ProjectData Objects with commit.interactions", { commit.interactions.data.expected[["base.file"]] = c("test2.c", "test2.c", "test3.c", "test2.c") expect_equal(proj.data.two$get.commit.interactions(), commit.interactions.data.expected) + + ## reactivate filtering of commit interactions + proj.data.two$set.project.conf.entry("filter.commit.interactions", TRUE) + expect_true(nrow(proj.data.two$get.commit.interactions()) == 2) }) diff --git a/tests/test-networks-artifact.R b/tests/test-networks-artifact.R index 122c96ee..67d1ec6c 100644 --- a/tests/test-networks-artifact.R +++ b/tests/test-networks-artifact.R @@ -219,6 +219,7 @@ patrick::with_parameters_test_that("Network construction with commit-interaction proj.conf$update.value("commit.interactions", TRUE) proj.conf$update.value("commits.filter.untracked.files", FALSE) proj.conf$update.value("commits.filter.base.artifact", FALSE) + proj.conf$update.value("filter.commit.interactions", FALSE) proj.data = ProjectData$new(project.conf = proj.conf) net.conf = NetworkConf$new() @@ -267,6 +268,7 @@ patrick::with_parameters_test_that("Network construction with commit-interaction proj.conf$update.value("commit.interactions", TRUE) proj.conf$update.value("commits.filter.untracked.files", FALSE) proj.conf$update.value("commits.filter.base.artifact", FALSE) + proj.conf$update.value("filter.commit.interactions", FALSE) proj.data = ProjectData$new(project.conf = proj.conf) net.conf = NetworkConf$new() diff --git a/tests/test-networks-author.R b/tests/test-networks-author.R index 9ffa3472..04d2c392 100644 --- a/tests/test-networks-author.R +++ b/tests/test-networks-author.R @@ -685,6 +685,7 @@ patrick::with_parameters_test_that("Network construction with commit-interaction proj.conf$update.value("commit.interactions", TRUE) proj.conf$update.value("commits.filter.untracked.files", FALSE) proj.conf$update.value("commits.filter.base.artifact", FALSE) + proj.conf$update.value("filter.commit.interactions", FALSE) proj.data = ProjectData$new(project.conf = proj.conf) net.conf = NetworkConf$new() diff --git a/util-conf.R b/util-conf.R index 64a9ed79..12bdca19 100644 --- a/util-conf.R +++ b/util-conf.R @@ -474,6 +474,12 @@ ProjectConf = R6::R6Class("ProjectConf", inherit = Conf, allowed = c(TRUE, FALSE), allowed.number = 1 ), + filter.commit.interactions = list( + default = TRUE, + type = "logical", + allowed = c(TRUE, FALSE), + allowed.number = 1 + ), custom.event.timestamps.file = list( default = NA, type = "character", diff --git a/util-data.R b/util-data.R index 0443b183..673a19d8 100644 --- a/util-data.R +++ b/util-data.R @@ -1273,6 +1273,11 @@ ProjectData = R6::R6Class("ProjectData", commit.interaction.data = read.commit.interactions(data.path) } + ## filter commit interactions if configured + if (private$project.conf$get.value("filter.commit.interactions")) { + commit.interaction.data = subset(commit.interaction.data, + file != COMMIT.INTERACTION.GLOBAL.FILE.FUNCTION.NAME) + } ## cache the result private$commit.interactions = commit.interaction.data private$update.commit.interactions() From f8ea987b138173cf0509c7910e0572d8ee1b3f1f Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Thu, 18 Apr 2024 14:46:45 +0200 Subject: [PATCH 035/130] Add helper function for prefixing function names Helper function 'prefix.function.with.file.names' in 'util-read.R' Signed-off-by: Leo Sendelbach --- util-read.R | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/util-read.R b/util-read.R index 892c59cd..f5261a3f 100644 --- a/util-read.R +++ b/util-read.R @@ -70,6 +70,16 @@ remove.deleted.and.empty.user = function(data, columns = c("author.name")) { return(data) } +#' Concatenation of function and file names, e.g. 'file::function' +#' +#' @param file.name the name of the file +#' @param function.name the name of the function +#' +#' @return the concatenated function name +prefix.function.with.file.names = function(file.name, function.name) { + return(paste(file.name, function.name, sep = "::")) +} + ## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / ## Main data sources ------------------------------------------------------- @@ -168,7 +178,7 @@ read.commits = function(data.path, artifact) { ## (we have proximity-based data as foundation) if (artifact == "function") { ## artifact = file name + "::" . function name - artifacts.new = paste(commit.data[["file"]], commit.data[["artifact"]], sep = "::") + artifacts.new = prefix.function.with.file.names(commit.data[["file"]], commit.data[["artifact"]]) ## clean up empty artifacts and File_Level artifact artifacts.new = gsub("^::$", "", artifacts.new) @@ -928,13 +938,13 @@ read.commit.interactions = function(data.path = NULL) { return(data.frame(matrix(nrow = 3, ncol = 0))) } else { file.name = file.name.map$get(hash[["function"]]) - func.name = paste(file.name, hash[("function")], sep = "::") + func.name = prefix.function.with.file.names(file.name, hash[("function")]) return(data.frame(func = func.name, commit.hash = hash[["commit"]], file = file.name)) } }))) base.file.name = file.name.map$get(function.name) interacting.hashes.df[["base.hash"]] = base.hash - interacting.hashes.df[["base.func"]] = paste(base.file.name, function.name, sep = "::") + interacting.hashes.df[["base.func"]] = prefix.function.with.file.names(base.file.name, function.name) interacting.hashes.df[["base.file"]] = base.file.name return(interacting.hashes.df) }))) From 7d8be96d6a55eebd96c8e8ee609dd52c637cda9e Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Thu, 18 Apr 2024 14:51:18 +0200 Subject: [PATCH 036/130] Change 'NEWS.md' to include new commits Now also contains an entry for new helper method Signed-off-by: Leo Sendelbach --- NEWS.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 2b28ff65..7df8b15f 100644 --- a/NEWS.md +++ b/NEWS.md @@ -6,8 +6,9 @@ ### Added -- Add commit-interaction data and add functions `read.commit.interactions` for reading, as well as `get.commit.interactions`, `set.commit.interactions` and utility functions for working with commit-interaction data (PR #252, d82857fbebd1111bb16588a4223bb24a8dcd07de, b4fd2a29c9b5fd561b1106c6febb54a32b0085ab, fd0aa05f824b93545ae8e05833b95b3bd9809286) as well as tests for these features (PR #252, eeba7e29932bc973513c963fb9e716e9230d570f, 8bb39f4df39b49dfaff8f19feb6db5e5fbd81fac, 54b6f655248720436af116fe72521f9cb0348429, 7a5497aaf9114017d1b3b9b68b6cccd7ca8ac114, 7b8585f87675795822c07230192d6454de31dcc7, ef725407bf8818c8fff96ea6f343338b7162cbe0) +- Add commit-interaction data and add functions `read.commit.interactions` for reading, as well as `get.commit.interactions`, `set.commit.interactions` and utility functions for working with commit-interaction data (PR #252, d82857fbebd1111bb16588a4223bb24a8dcd07de, b4fd2a29c9b5fd561b1106c6febb54a32b0085ab, fd0aa05f824b93545ae8e05833b95b3bd9809286, bca35760eb0aac86c04923f2d534b2d8cece204e) as well as tests for these features (PR #252, eeba7e29932bc973513c963fb9e716e9230d570f, 8bb39f4df39b49dfaff8f19feb6db5e5fbd81fac, 54b6f655248720436af116fe72521f9cb0348429, 7a5497aaf9114017d1b3b9b68b6cccd7ca8ac114, 7b8585f87675795822c07230192d6454de31dcc7, ef725407bf8818c8fff96ea6f343338b7162cbe0) - Add commit-interaction networks that can be created with `create.author.network` and `create.artifact.network` if the `artifact.relation` and `author.relation` is configured to be `commit.interaction` (PR #252, d82857fbebd1111bb16588a4223bb24a8dcd07de, 329d97ec3de36a9e1bcadc0c7a53c1d92e8b481c) as well as tests for these features (PR #252, 07e7ed744209b0251217fa8f7f35d9b9875face2, 7068cfa10d993dcae3f5e3f76f8cafa99fa8b350) +- Add helper function for prefixing function names with file names in `util-read.R` (PR #252, f8ea987b138173cf0509c7910e0572d8ee1b3f1f) ### Changed/Improved From b8857cf64006fffce262f3d109471ae2c2003e7b Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Tue, 23 Apr 2024 12:01:04 +0200 Subject: [PATCH 037/130] Change some comments and variable names Helper function is now called `prefix.function.with.file.name` and config parameter is called `commit.interactions.filter.global`. Signed-off-by: Leo Sendelbach --- README.md | 2 +- tests/test-data.R | 4 ++-- tests/test-networks-artifact.R | 4 ++-- tests/test-networks-author.R | 2 +- util-conf.R | 2 +- util-data.R | 2 +- util-read.R | 10 +++++----- 7 files changed, 13 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index 1c6e78fc..e8bc0877 100644 --- a/README.md +++ b/README.md @@ -607,7 +607,7 @@ There is no way to update the entries, except for the revision-based parameters. - `commit.interactions`: * Allow construction of author and artifact networks using commit-interaction data * [`TRUE`, *`FALSE`*] -- `filter.commit.interactions`: +- `commit.interactions.filter.global`: * Filter out entries from commit interaction data that are not matched to a specific function or file * [*`TRUE`*, `FALSE`] diff --git a/tests/test-data.R b/tests/test-data.R index 893661fb..aa665ac4 100644 --- a/tests/test-data.R +++ b/tests/test-data.R @@ -526,7 +526,7 @@ test_that("Compare two ProjectData Objects with commit.interactions", { proj.conf$update.value("commit.interactions", TRUE) proj.conf$update.value("commits.filter.untracked.files", FALSE) proj.conf$update.value("commits.filter.base.artifact", FALSE) - proj.conf$update.value("filter.commit.interactions", FALSE) + proj.conf$update.value("commit.interactions.filter.global", FALSE) proj.data.one = ProjectData$new(project.conf = proj.conf) proj.data.two = proj.data.one$clone(deep = TRUE) @@ -591,6 +591,6 @@ test_that("Compare two ProjectData Objects with commit.interactions", { expect_equal(proj.data.two$get.commit.interactions(), commit.interactions.data.expected) ## reactivate filtering of commit interactions - proj.data.two$set.project.conf.entry("filter.commit.interactions", TRUE) + proj.data.two$set.project.conf.entry("commit.interactions.filter.global", TRUE) expect_true(nrow(proj.data.two$get.commit.interactions()) == 2) }) diff --git a/tests/test-networks-artifact.R b/tests/test-networks-artifact.R index 67d1ec6c..79251c60 100644 --- a/tests/test-networks-artifact.R +++ b/tests/test-networks-artifact.R @@ -219,7 +219,7 @@ patrick::with_parameters_test_that("Network construction with commit-interaction proj.conf$update.value("commit.interactions", TRUE) proj.conf$update.value("commits.filter.untracked.files", FALSE) proj.conf$update.value("commits.filter.base.artifact", FALSE) - proj.conf$update.value("filter.commit.interactions", FALSE) + proj.conf$update.value("commit.interactions.filter.global", FALSE) proj.data = ProjectData$new(project.conf = proj.conf) net.conf = NetworkConf$new() @@ -268,7 +268,7 @@ patrick::with_parameters_test_that("Network construction with commit-interaction proj.conf$update.value("commit.interactions", TRUE) proj.conf$update.value("commits.filter.untracked.files", FALSE) proj.conf$update.value("commits.filter.base.artifact", FALSE) - proj.conf$update.value("filter.commit.interactions", FALSE) + proj.conf$update.value("commit.interactions.filter.global", FALSE) proj.data = ProjectData$new(project.conf = proj.conf) net.conf = NetworkConf$new() diff --git a/tests/test-networks-author.R b/tests/test-networks-author.R index 04d2c392..8f9dd11b 100644 --- a/tests/test-networks-author.R +++ b/tests/test-networks-author.R @@ -685,7 +685,7 @@ patrick::with_parameters_test_that("Network construction with commit-interaction proj.conf$update.value("commit.interactions", TRUE) proj.conf$update.value("commits.filter.untracked.files", FALSE) proj.conf$update.value("commits.filter.base.artifact", FALSE) - proj.conf$update.value("filter.commit.interactions", FALSE) + proj.conf$update.value("commit.interactions.filter.global", FALSE) proj.data = ProjectData$new(project.conf = proj.conf) net.conf = NetworkConf$new() diff --git a/util-conf.R b/util-conf.R index 12bdca19..ff345c00 100644 --- a/util-conf.R +++ b/util-conf.R @@ -474,7 +474,7 @@ ProjectConf = R6::R6Class("ProjectConf", inherit = Conf, allowed = c(TRUE, FALSE), allowed.number = 1 ), - filter.commit.interactions = list( + commit.interactions.filter.global = list( default = TRUE, type = "logical", allowed = c(TRUE, FALSE), diff --git a/util-data.R b/util-data.R index 673a19d8..988146a5 100644 --- a/util-data.R +++ b/util-data.R @@ -1274,7 +1274,7 @@ ProjectData = R6::R6Class("ProjectData", } ## filter commit interactions if configured - if (private$project.conf$get.value("filter.commit.interactions")) { + if (private$project.conf$get.value("commit.interactions.filter.global")) { commit.interaction.data = subset(commit.interaction.data, file != COMMIT.INTERACTION.GLOBAL.FILE.FUNCTION.NAME) } diff --git a/util-read.R b/util-read.R index f5261a3f..f4fe7025 100644 --- a/util-read.R +++ b/util-read.R @@ -70,13 +70,13 @@ remove.deleted.and.empty.user = function(data, columns = c("author.name")) { return(data) } -#' Concatenation of function and file names, e.g. 'file::function' +#' Concatenate function and file name, i.e. 'file::function' #' #' @param file.name the name of the file #' @param function.name the name of the function #' #' @return the concatenated function name -prefix.function.with.file.names = function(file.name, function.name) { +prefix.function.with.file.name = function(file.name, function.name) { return(paste(file.name, function.name, sep = "::")) } @@ -178,7 +178,7 @@ read.commits = function(data.path, artifact) { ## (we have proximity-based data as foundation) if (artifact == "function") { ## artifact = file name + "::" . function name - artifacts.new = prefix.function.with.file.names(commit.data[["file"]], commit.data[["artifact"]]) + artifacts.new = prefix.function.with.file.name(commit.data[["file"]], commit.data[["artifact"]]) ## clean up empty artifacts and File_Level artifact artifacts.new = gsub("^::$", "", artifacts.new) @@ -938,13 +938,13 @@ read.commit.interactions = function(data.path = NULL) { return(data.frame(matrix(nrow = 3, ncol = 0))) } else { file.name = file.name.map$get(hash[["function"]]) - func.name = prefix.function.with.file.names(file.name, hash[("function")]) + func.name = prefix.function.with.file.name(file.name, hash[("function")]) return(data.frame(func = func.name, commit.hash = hash[["commit"]], file = file.name)) } }))) base.file.name = file.name.map$get(function.name) interacting.hashes.df[["base.hash"]] = base.hash - interacting.hashes.df[["base.func"]] = prefix.function.with.file.names(base.file.name, function.name) + interacting.hashes.df[["base.func"]] = prefix.function.with.file.name(base.file.name, function.name) interacting.hashes.df[["base.file"]] = base.file.name return(interacting.hashes.df) }))) From ee54b1afb5ef25743d675ef7650037a1f02efd29 Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Tue, 23 Apr 2024 12:44:52 +0200 Subject: [PATCH 038/130] Add missing copyright headers Copyright headers in `install.R` and `util-conf` Signed-off-by: Leo Sendelbach --- install.R | 1 + util-conf.R | 1 + 2 files changed, 2 insertions(+) diff --git a/install.R b/install.R index 94d403d9..5a8d5743 100644 --- a/install.R +++ b/install.R @@ -19,6 +19,7 @@ ## Copyright 2020-2023 by Thomas Bock ## Copyright 2019 by Anselm Fehnker ## Copyright 2021 by Christian Hechtl +## Copyright 2024 by Leo Sendelbach ## All Rights Reserved. ## ## Adapted from https://github.com/siemens/codeface/blob/be382e9171fb91b4aa99b99b09b2ef64a6dba0d5/packages.r diff --git a/util-conf.R b/util-conf.R index ff345c00..9ae2fd73 100644 --- a/util-conf.R +++ b/util-conf.R @@ -26,6 +26,7 @@ ## Copyright 2021 by Johannes Hostert ## Copyright 2021 by Mirabdulla Yusifli ## Copyright 2022 by Jonathan Baumann +## Copyright 2024 by Leo Sendelbach ## All Rights Reserved. From 10cac49d005e87c3964cc61711e7f5acef749626 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maximilian=20L=C3=B6ffler?= Date: Tue, 30 Apr 2024 13:24:32 +0200 Subject: [PATCH 039/130] Generate automatic CodeCov coverage reports MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Maximilian Löffler --- .github/workflows/pull_request.yml | 10 +++++ CONTRIBUTING.md | 1 + coverage.R | 67 ++++++++++++++++++++++++++++++ install.R | 4 +- tests/test-data.R | 10 ++--- 5 files changed, 86 insertions(+), 6 deletions(-) create mode 100644 coverage.R diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 4e1b7a6c..aafc36d9 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -50,6 +50,8 @@ jobs: sudo apt-get install --assume-yes libxml2-dev sudo apt-get install --assume-yes libglpk-dev sudo apt-get install --assume-yes libfontconfig1-dev + sudo apt-get install --assume-yes libssl-dev + sudo apt-get install --assume-yes libcurl4-openssl-dev sudo su -c "echo 'deb https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/' >> /etc/apt/sources.list" wget -qO- https://cloud.r-project.org/bin/linux/ubuntu/marutter_pubkey.asc | sudo tee -a /etc/apt/trusted.gpg.d/cran_ubuntu_key.asc @@ -62,8 +64,16 @@ jobs: run: Rscript install.R - name: Run Tests + id: tests run: Rscript tests.R - name: Run Showcase run: Rscript showcase.R if: always() + + - name: Generate Coverage + env: + CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} + run: Rscript coverage.R ${CODECOV_TOKEN} + if: matrix.r-version == 'latest' && steps.tests.outcome == 'success' + diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 0f118cd5..f5622658 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -116,6 +116,7 @@ In our development process, we pursue the following idea: The current build status is as follows: - `master`: ![Build Status](https://github.com/se-sic/coronet/actions/workflows/pull_request.yml/badge.svg?branch=master) - `dev`: ![Build Status](https://github.com/se-sic/coronet/actions/workflows/pull_request.yml/badge.svg?branch=dev) +- `test coverage`: ![Coverage Status](https://codecov.io/gh/MaLoefUDS/coronet/graph/badge.svg?token=OU3SR362X9) ### Pull Requests diff --git a/coverage.R b/coverage.R new file mode 100644 index 00000000..56419b51 --- /dev/null +++ b/coverage.R @@ -0,0 +1,67 @@ +## This file is part of coronet, which is free software: you +## can redistribute it and/or modify it under the terms of the GNU General +## Public License as published by the Free Software Foundation, version 2. +## +## This program is distributed in the hope that it will be useful, +## but WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +## GNU General Public License for more details. +## +## You should have received a copy of the GNU General Public License along +## with this program; if not, write to the Free Software Foundation, Inc., +## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +## +## Copyright 2024 by Maximilian Löffler +## All Rights Reserved. + +## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / +## Initialization ---------------------------------------------------------- + +source("util-init.R") +source("tests/testing-utils.R") + + +## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / +## Logging ----------------------------------------------------------------- + +library("methods") # to prevent weird error during logger initialization (see #153) +library("logging") +logging::basicConfig(level = "WARN") +options(mc.cores = 1L) + + +## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / +## Debug information ------------------------------------------------------- + +logging::loginfo("Session information:") +sessionInfo() + + +## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / +## Run tests in subfolder 'tests' ------------------------------------------ + +logging::loginfo("Generating coverage report.") + +## load packages 'testthat' and 'patrick' +library("testthat") +requireNamespace("patrick") +requireNamespace("covr") + +## define paths +test.dir = c("./tests") +code.dir = c(".") +excluded.code.files = c("./tests.R", "./coverage.R", "./showcase.R", "./install.R", "./util-init.R") +excluded.test.files = c() + +## obtain files +test.files = unlist(sapply(test.dir, list.files, pattern = "\\.R$", full.names = TRUE)) +test.files = test.files[!test.files %in% excluded.test.files] +code.files = unlist(sapply(code.dir, list.files, pattern = "\\.R$", full.names = TRUE)) +code.files = code.files[!code.files %in% excluded.code.files] + +## receive codecov API token from GitHub repo secrets via command line +CODECOV_TOKEN = commandArgs(trailingOnly = TRUE)[1] + +## generate coverage report +report = covr::file_coverage(source_files = code.files, test_files = test.files) +covr::codecov(coverage = report, token = CODECOV_TOKEN) diff --git a/install.R b/install.R index 5a8d5743..8f943e76 100644 --- a/install.R +++ b/install.R @@ -20,6 +20,7 @@ ## Copyright 2019 by Anselm Fehnker ## Copyright 2021 by Christian Hechtl ## Copyright 2024 by Leo Sendelbach +## Copyright 2024 by Maximilian Löffler ## All Rights Reserved. ## ## Adapted from https://github.com/siemens/codeface/blob/be382e9171fb91b4aa99b99b09b2ef64a6dba0d5/packages.r @@ -47,7 +48,8 @@ packages = c( "rTensor", "Matrix", "fastmap", - "purrr" + "purrr", + "covr" ) diff --git a/tests/test-data.R b/tests/test-data.R index aa665ac4..b57a9cd5 100644 --- a/tests/test-data.R +++ b/tests/test-data.R @@ -489,9 +489,9 @@ test_that("Cut data and check for right data path", { project.data = ProjectData$new(project.configuration) project.data = project.data$get.data.cut.to.same.date(data.sources = c("mails", "commits")) - expected = "./codeface-data/results/testing/test_feature/feature" + expected = "./tests/codeface-data/results/testing/test_feature/feature" result = project.data$get.data.path() - expect_identical(result, expected, info = "RangeData data path.") + # expect_identical(result, expected, info = "RangeData data path.") commit.data = project.data$get.commits.unfiltered() @@ -514,10 +514,10 @@ test_that("Create RangeData objects from Codeface ranges and check data path", { range.paths = run.lapply(data, "get.data.path") range.paths = unlist(range.paths, use.names = FALSE) - expected.paths = c("./codeface-data/results/testing/test_feature/feature/001--v1-v2", - "./codeface-data/results/testing/test_feature/feature/002--v2-v3") + expected.paths = c("./tests/codeface-data/results/testing/test_feature/feature/001--v1-v2", + "./tests/codeface-data/results/testing/test_feature/feature/002--v2-v3") - expect_identical(range.paths, expected.paths, "RangeData data paths") + # expect_identical(range.paths, expected.paths, "RangeData data paths") }) test_that("Compare two ProjectData Objects with commit.interactions", { From cb1cf42b2136f35b7e85239d36d5f91ff05d8cd7 Mon Sep 17 00:00:00 2001 From: Thomas Bock Date: Tue, 30 Apr 2024 15:05:47 +0200 Subject: [PATCH 040/130] Fix installation for Matrix package on R 3.6 As the recent package version of Matrix is not compatible any more with R versions < 4.4.0, but the Matrix package version that is automatically shipped with R 3.6 is smaller than the minimum package version of Matrix that coronet requires. To circumvent this problem, specifically install Matrix version 1.3-4 from the package archives if the minimum version required by coronet is not met. This change should not affect recent R versions, which automatically ship a sufficient version of Matrix. Signed-off-by: Thomas Bock --- install.R | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/install.R b/install.R index 8f943e76..56e6ef2a 100644 --- a/install.R +++ b/install.R @@ -16,7 +16,7 @@ ## Copyright 2015 by Wolfgang Mauerer ## Copyright 2015-2017 by Claus Hunsen ## Copyright 2017 by Thomas Bock -## Copyright 2020-2023 by Thomas Bock +## Copyright 2020-2024 by Thomas Bock ## Copyright 2019 by Anselm Fehnker ## Copyright 2021 by Christian Hechtl ## Copyright 2024 by Leo Sendelbach @@ -77,10 +77,12 @@ if (length(p) > 0) { Matrix.version = installed.packages()[rownames(installed.packages()) == "Matrix", "Version"] if (compareVersion(Matrix.version, "1.3.0") == -1) { print("WARNING: Matrix version 1.3.0 or higher is necessary for using coronet. Re-install package Matrix...") - install.packages("Matrix", dependencies = NA, verbose = TRUE, quiet = TRUE) + #install.packages("Matrix", dependencies = NA, verbose = TRUE, quiet = TRUE) + matrix.1.3.4.url = "https://cran.r-project.org/src/contrib/Archive/Matrix/Matrix_1.3-4.tar.gz" + install.packages(matrix.1.3.4.url, repos = NULL, dependencies = NA, verbose = TRUE, quiet = TRUE) Matrix.version = installed.packages()[rownames(installed.packages()) == "Matrix", "Version"] if (compareVersion(Matrix.version, "1.3.0") == -1) { - print("WARNING: Re-installation of package Matrix did not end up in the necessary packge version.") + print("WARNING: Re-installation of package Matrix did not end up in the necessary package version.") } } } From b3b9f4ac7a9911bd00293c68fac88e0f9033bdfb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maximilian=20L=C3=B6ffler?= Date: Mon, 13 May 2024 21:25:30 +0200 Subject: [PATCH 041/130] Use codecov's GitHub action to upload coverage reports MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This should make the coverage-report uploading also possible inside of PRs that originate from forks of the main coronet repo (see, PR #262). Signed-off-by: Maximilian Löffler --- .github/workflows/pull_request.yml | 22 ++++++++++++++++------ coverage.R | 7 ++----- install.R | 5 +++-- 3 files changed, 21 insertions(+), 13 deletions(-) diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index aafc36d9..a9d34d5d 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -64,16 +64,26 @@ jobs: run: Rscript install.R - name: Run Tests - id: tests + id: run_tests run: Rscript tests.R - name: Run Showcase run: Rscript showcase.R if: always() - - name: Generate Coverage - env: - CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} - run: Rscript coverage.R ${CODECOV_TOKEN} - if: matrix.r-version == 'latest' && steps.tests.outcome == 'success' + - name: Generate Coverage Report + id: gen_coverage + run: Rscript coverage.R + if: matrix.r-version == 'latest' && steps.run_tests.outcome == 'success' + + - name: Upload Report to CodeCov + uses: codecov/codecov-action@v4 + with: + token: ${{ secrets.CODECOV_TOKEN }} + file: ./cobertura.xml + disable_search: true + fail_ci_if_error: true + verbose: true + if: matrix.r-version == 'latest' && steps.gen_coverage.outcome == 'success' + diff --git a/coverage.R b/coverage.R index 56419b51..f871b7b3 100644 --- a/coverage.R +++ b/coverage.R @@ -59,9 +59,6 @@ test.files = test.files[!test.files %in% excluded.test.files] code.files = unlist(sapply(code.dir, list.files, pattern = "\\.R$", full.names = TRUE)) code.files = code.files[!code.files %in% excluded.code.files] -## receive codecov API token from GitHub repo secrets via command line -CODECOV_TOKEN = commandArgs(trailingOnly = TRUE)[1] - -## generate coverage report +## generate and save coverage report report = covr::file_coverage(source_files = code.files, test_files = test.files) -covr::codecov(coverage = report, token = CODECOV_TOKEN) +covr::to_cobertura(report) diff --git a/install.R b/install.R index 56e6ef2a..cabc8352 100644 --- a/install.R +++ b/install.R @@ -37,18 +37,19 @@ packages = c( "sqldf", "data.table", "reshape2", - "testthat", - "patrick", "ggplot2", "ggraph", "markovchain", "lubridate", "viridis", "jsonlite", + "xml2", "rTensor", "Matrix", "fastmap", "purrr", + "testthat", + "patrick", "covr" ) From c815d18dc6266d620a7a145493417b87ac08679e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maximilian=20L=C3=B6ffler?= Date: Mon, 13 May 2024 22:54:25 +0200 Subject: [PATCH 042/130] Include correct coverage status badges for master and dev branch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Maximilian Löffler --- CONTRIBUTING.md | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index f5622658..7608d161 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -114,9 +114,8 @@ In our development process, we pursue the following idea: - The current development will be performed on the branch `dev`, i.e., all incoming pull requests are against this branch. The current build status is as follows: -- `master`: ![Build Status](https://github.com/se-sic/coronet/actions/workflows/pull_request.yml/badge.svg?branch=master) -- `dev`: ![Build Status](https://github.com/se-sic/coronet/actions/workflows/pull_request.yml/badge.svg?branch=dev) -- `test coverage`: ![Coverage Status](https://codecov.io/gh/MaLoefUDS/coronet/graph/badge.svg?token=OU3SR362X9) +- `master`: ![Build Status](https://github.com/se-sic/coronet/actions/workflows/pull_request.yml/badge.svg?branch=master) [![Coverage Status](https://codecov.io/gh/se-sic/coronet/branch/master/graph/badge.svg?token=2dhAb3ScLy)](https://codecov.io/gh/se-sic/coronet) +- `dev`: ![Build Status](https://github.com/se-sic/coronet/actions/workflows/pull_request.yml/badge.svg?branch=dev) [![Coverage Status](https://codecov.io/gh/se-sic/coronet/branch/dev/graph/badge.svg?token=2dhAb3ScLy)](https://codecov.io/gh/se-sic/coronet) ### Pull Requests From e8093525fdaf46e54f2f7fcc6358ca7892e795e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maximilian=20L=C3=B6ffler?= Date: Mon, 13 May 2024 23:15:40 +0200 Subject: [PATCH 043/130] Introduce a constant for the data path prefix in tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce a new constant in 'testing-utils.R' that describes a prefix of the path leading to the testing data files. This constant is necessary since the 'coverage.R' run of all tests demands a different path prefix than the 'tests.R' run of all tests. Signed-off-by: Maximilian Löffler --- coverage.R | 3 +++ tests/test-data.R | 12 ++++++------ tests/testing-utils.R | 9 ++++++++- 3 files changed, 17 insertions(+), 7 deletions(-) diff --git a/coverage.R b/coverage.R index f871b7b3..9e66cbfc 100644 --- a/coverage.R +++ b/coverage.R @@ -59,6 +59,9 @@ test.files = test.files[!test.files %in% excluded.test.files] code.files = unlist(sapply(code.dir, list.files, pattern = "\\.R$", full.names = TRUE)) code.files = code.files[!code.files %in% excluded.code.files] +## adjust data path prefix when generating coverage reports +DATA.PATH.PREFIX = "./tests" + ## generate and save coverage report report = covr::file_coverage(source_files = code.files, test_files = test.files) covr::to_cobertura(report) diff --git a/tests/test-data.R b/tests/test-data.R index b57a9cd5..e6136f54 100644 --- a/tests/test-data.R +++ b/tests/test-data.R @@ -19,7 +19,7 @@ ## Copyright 2021 by Johannes Hostert ## Copyright 2021 by Mirabdulla Yusifli ## Copyright 2022 by Jonathan Baumann -## Copyright 2023 by Maximilian Löffler +## Copyright 2023-2024 by Maximilian Löffler ## Copyright 2024 by Leo Sendelbach ## All Rights Reserved. @@ -489,9 +489,9 @@ test_that("Cut data and check for right data path", { project.data = ProjectData$new(project.configuration) project.data = project.data$get.data.cut.to.same.date(data.sources = c("mails", "commits")) - expected = "./tests/codeface-data/results/testing/test_feature/feature" + expected = paste0(DATA.PATH.PREFIX, "/codeface-data/results/testing/test_feature/feature") result = project.data$get.data.path() - # expect_identical(result, expected, info = "RangeData data path.") + expect_identical(result, expected, info = "RangeData data path.") commit.data = project.data$get.commits.unfiltered() @@ -514,10 +514,10 @@ test_that("Create RangeData objects from Codeface ranges and check data path", { range.paths = run.lapply(data, "get.data.path") range.paths = unlist(range.paths, use.names = FALSE) - expected.paths = c("./tests/codeface-data/results/testing/test_feature/feature/001--v1-v2", - "./tests/codeface-data/results/testing/test_feature/feature/002--v2-v3") + expected.paths = c(paste0(DATA.PATH.PREFIX, "/codeface-data/results/testing/test_feature/feature/001--v1-v2"), + paste0(DATA.PATH.PREFIX, "/codeface-data/results/testing/test_feature/feature/002--v2-v3")) - # expect_identical(range.paths, expected.paths, "RangeData data paths") + expect_identical(range.paths, expected.paths, "RangeData data paths") }) test_that("Compare two ProjectData Objects with commit.interactions", { diff --git a/tests/testing-utils.R b/tests/testing-utils.R index 567cdd78..a3783c39 100644 --- a/tests/testing-utils.R +++ b/tests/testing-utils.R @@ -13,6 +13,7 @@ ## ## Copyright 2022 by Jonathan Baumann ## Copyright 2024 by Leo Sendelbach +## Copyright 2024 by Maximilian Löffler ## All Rights Reserved. ## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / @@ -21,6 +22,12 @@ requireNamespace("patrick") requireNamespace("igraph") +## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / +## Constants --------------------------------------------------------------- + +## The path prefix for data files +DATA.PATH.PREFIX = "." + #' Construct the 'cross product' of two patrick::cases objects. #' Each case of the first object is combined with each case of the second, #' test names are joined with a comma. @@ -150,4 +157,4 @@ assert.sparse.matrices.equal = function(matrix.expected, matrix.actual) { for (i in seq_len(expected.size)) { expect_equal(matrix.expected[i], matrix.actual[i]) } -} \ No newline at end of file +} From 32d04823e2007c63d2a43ce59bea3057327c19a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maximilian=20L=C3=B6ffler?= Date: Fri, 17 May 2024 19:00:11 +0200 Subject: [PATCH 044/130] Decouple coverage report generation from upload to codecov in CI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This allows us to rerun only the upload if it fails because of rate limiting. Signed-off-by: Maximilian Löffler --- .github/workflows/pull_request.yml | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index a9d34d5d..3ddb30f4 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -76,14 +76,30 @@ jobs: run: Rscript coverage.R if: matrix.r-version == 'latest' && steps.run_tests.outcome == 'success' + - name: Store coverage report + uses: actions/upload-artifact@v4 + with: + name: coverage-report + path: cobertura.xml + if: matrix.r-version == 'latest' && steps.gen_coverage.outcome == 'success' + + upload-coverage-report: + name: Upload coverage report + needs: build + runs-on: ubuntu-latest + + steps: + - name: Load coverage report + uses: actions/download-artifact@v4 + with: + name: coverage-report + - name: Upload Report to CodeCov uses: codecov/codecov-action@v4 with: token: ${{ secrets.CODECOV_TOKEN }} - file: ./cobertura.xml + file: cobertura.xml disable_search: true fail_ci_if_error: true verbose: true - if: matrix.r-version == 'latest' && steps.gen_coverage.outcome == 'success' - From 1e8c23bc3d8563dfa335df84318aca2c1e794d6c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maximilian=20L=C3=B6ffler?= Date: Wed, 15 May 2024 19:59:48 +0200 Subject: [PATCH 045/130] Update 'NEWS.md' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Maximilian Löffler --- NEWS.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/NEWS.md b/NEWS.md index 7df8b15f..6c6804a1 100644 --- a/NEWS.md +++ b/NEWS.md @@ -9,11 +9,14 @@ - Add commit-interaction data and add functions `read.commit.interactions` for reading, as well as `get.commit.interactions`, `set.commit.interactions` and utility functions for working with commit-interaction data (PR #252, d82857fbebd1111bb16588a4223bb24a8dcd07de, b4fd2a29c9b5fd561b1106c6febb54a32b0085ab, fd0aa05f824b93545ae8e05833b95b3bd9809286, bca35760eb0aac86c04923f2d534b2d8cece204e) as well as tests for these features (PR #252, eeba7e29932bc973513c963fb9e716e9230d570f, 8bb39f4df39b49dfaff8f19feb6db5e5fbd81fac, 54b6f655248720436af116fe72521f9cb0348429, 7a5497aaf9114017d1b3b9b68b6cccd7ca8ac114, 7b8585f87675795822c07230192d6454de31dcc7, ef725407bf8818c8fff96ea6f343338b7162cbe0) - Add commit-interaction networks that can be created with `create.author.network` and `create.artifact.network` if the `artifact.relation` and `author.relation` is configured to be `commit.interaction` (PR #252, d82857fbebd1111bb16588a4223bb24a8dcd07de, 329d97ec3de36a9e1bcadc0c7a53c1d92e8b481c) as well as tests for these features (PR #252, 07e7ed744209b0251217fa8f7f35d9b9875face2, 7068cfa10d993dcae3f5e3f76f8cafa99fa8b350) - Add helper function for prefixing function names with file names in `util-read.R` (PR #252, f8ea987b138173cf0509c7910e0572d8ee1b3f1f) +- Add line-based code coverage reports into CI pipeline. Coverage reports are generated by `coverage.R` (PR #262, 10cac49d005e87c3964cc61711e7f5acef749626, b3b9f4ac7a9911bd00293c68fac88e0f9033bdfb, c815d18dc6266d620a7a145493417b87ac08679e, e8093525fdaf46e54f2f7fcc6358ca7892e795e5, 32d04823e2007c63d2a43ce59bea3057327c19a7) ### Changed/Improved ### Fixed +- Ensure the correct installation of a compatible `Matrix` version for R 3.6 (PR #262, cb1cf42b2136f35b7e85239d36d5f91ff05d8cd7) + ## 4.4 ### Announcement From 1088395f46b84028c8d7c463ca86b5dc38500c26 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maximilian=20L=C3=B6ffler?= Date: Sat, 27 Apr 2024 11:35:23 +0200 Subject: [PATCH 046/130] Allow 'split.basis' to be a vector in 'split.data.time.based' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allow to explicitly specify 'split.basis' as a vector when using 'split.data.time.based'. The resulting splits will then be constructed from the union of elements from all datasources in 'split.basis'. Signed-off-by: Maximilian Löffler --- util-split.R | 33 ++++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/util-split.R b/util-split.R index d68f9cae..1b54f897 100644 --- a/util-split.R +++ b/util-split.R @@ -52,7 +52,8 @@ requireNamespace("lubridate") # for date conversion #' time-sized windows for all ranges. If set, the \code{time.period} and \code{bins} parameters are ignored; #' consequently, \code{sliding.window} does not make sense then either. #' [default: NULL] -#' @param split.basis the data name to use as the basis for split bins, either 'commits', 'mails', or 'issues' +#' @param split.basis the data source to use as the basis for split bins, either 'commits', 'mails', 'issues' +#' or an arbitrary combination of them #' [default: "commits"] #' @param sliding.window logical indicating whether the splitting should be performed using a sliding-window approach #' [default: FALSE] @@ -65,6 +66,14 @@ split.data.time.based = function(project.data, time.period = "3 months", bins = number.windows = NULL, split.basis = c("commits", "mails", "issues"), sliding.window = FALSE, project.conf.new = NULL) { + # ensure 'split.basis' defaults to 'commits' if not defined + # and allow it to be a vector if explicitly wanted + if(!hasArg("split.basis")) { + split.basis = match.arg.or.default(split.basis, several.ok = FALSE, default = "commits") + } else { + split.basis = match.arg.or.default(split.basis, several.ok = TRUE) + } + # validate existence and type of the 'bins' parameter if (!is.null(bins) && !lubridate::is.POSIXct(bins)) { dates = parallel::mclapply(unlist(bins), get.date.from.string) @@ -99,6 +108,9 @@ split.data.time.based = function(project.data, time.period = "3 months", bins = split.data.by.bins = function(project.data, activity.amount, bins, split.basis = c("commits", "mails", "issues"), sliding.window) { + ## get basis for splitting process + split.basis = match.arg(split.basis) + # validate type of the 'bins' parameter if (is.null(bins) || !is.list(bins)) { logging::logerror("The bins parameter needs to be of type list, (is %s)", class(bins)) @@ -887,7 +899,8 @@ split.network.by.bins = function(network, bins, bins.vector, bins.date = NULL, r #' @param split.by.time logical indicating whether splitting is done time-based or activity-bins-based #' @param number.windows see \code{number.windows} from \code{split.data.time.based} #' [default: NULL] -#' @param split.basis the data source to use as the basis for split bins, either 'commits', 'mails', or 'issues' +#' @param split.basis the data source to use as the basis for split bins, either 'commits', 'mails', 'issues' +#' or an arbitrary combination of them #' [default: "commits"] #' @param sliding.window logical indicating whether the splitting should be performed using a sliding-window approach #' [default: FALSE] @@ -903,13 +916,13 @@ split.data.by.time.or.bins = function(project.data, splitting.length, bins, spli number.windows = NULL, split.basis = c("commits", "mails", "issues"), sliding.window = FALSE, project.conf.new = NULL) { - ## get basis for splitting process - split.basis = match.arg(split.basis) - ## if the data used by the split basis is not present, load it automatically - if (!(split.basis %in% project.data$get.cached.data.sources("only.unfiltered"))) { - function.name = DATASOURCE.TO.UNFILTERED.ARTIFACT.FUNCTION[[split.basis]] - project.data[[function.name]]() + for (i in seq_along(split.basis)) { + data.source = split.basis[i] + if (!(data.source %in% project.data$get.cached.data.sources("only.unfiltered"))) { + function.name = DATASOURCE.TO.UNFILTERED.ARTIFACT.FUNCTION[[data.source]] + project.data[[function.name]]() + } } ## get actual raw data @@ -945,7 +958,9 @@ split.data.by.time.or.bins = function(project.data, splitting.length, bins, spli ## if bins are NOT given explicitly if (is.null(bins)) { ## get bins based on split.basis - bins = split.get.bins.time.based(data[[split.basis]][["date"]], splitting.length, number.windows)$bins + dates = project.data$get.data.timestamps(split.basis) + dates = get.date.from.unix.timestamp(unname(unlist(dates))) + bins = split.get.bins.time.based(dates, splitting.length, number.windows)$bins bins.labels = head(bins, -1) ## logging logging::loginfo("Splitting data '%s' into time ranges of %s based on '%s' data.", From e1f79fc9e40cd6f41c946be42db364b2101cfe10 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maximilian=20L=C3=B6ffler?= Date: Sat, 27 Apr 2024 11:40:11 +0200 Subject: [PATCH 047/130] Test 'split.data.time.based' with multiple datasources in 'split.basis' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Maximilian Löffler --- tests/test-split-data-time-based.R | 47 ++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/tests/test-split-data-time-based.R b/tests/test-split-data-time-based.R index 67945105..b5da4e3e 100644 --- a/tests/test-split-data-time-based.R +++ b/tests/test-split-data-time-based.R @@ -770,6 +770,53 @@ patrick::with_parameters_test_that("Split a data object time-based (split.basis "pasta, synchronicity: TRUE" = list(test.pasta = TRUE, test.synchronicity = TRUE) )) + +## +## Tests for split.data.time.based(..., split.basis = c('mails', 'issues'), with and without sliding windows +## + +patrick::with_parameters_test_that("Split a data object time-based (split.basis = c('mails', 'issues'))", { + + ## configuration objects + proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, ARTIFACT) + net.conf = NetworkConf$new() + + ## data object + project.data = ProjectData$new(proj.conf) + + # remove really old mail data + mail.data = project.data$get.mails() + mail.data = mail.data[-(1:12), ] + project.data$set.mails(mail.data) + + # check mail date bounds + expect_equal(min(mail.data$date), as.POSIXct("2016-07-12 15:58:40")) + expect_equal(max(mail.data$date), as.POSIXct("2016-07-12 16:05:37")) + + # keep issue data that roughly overlaps the mail data + issue.data = project.data$get.issues() + issue.data = issue.data[-(1:12), ] + issue.data = issue.data[-(8:12), ] + project.data$set.issues(issue.data) + + # check issue date bounds + expect_equal(min(issue.data$date), as.POSIXct("2016-07-12 15:59:25")) + expect_equal(max(issue.data$date), as.POSIXct("2016-07-12 16:06:01")) + + # split by 'mails' and 'issues' without sliding window + results = split.data.time.based(project.data, time.period = "1 min", + split.basis = c("mails", "issues"), sliding.window = test.sliding.window) + + # bins should be union of both sources + expect_equal(min(attr(results, "bins")), min(c(issue.data$date, mail.data$date))) + expect_equal(max(attr(results, "bins")), max(c(issue.data$date, mail.data$date)) + 1) + +}, patrick::cases( + "sliding.windows: FALSE" = list(test.sliding.window = FALSE), + "sliding.windoww: TRUE" = list(test.sliding.window = TRUE) +)) + + ## * * bins ---------------------------------------------------------------- ## From a232fae260fa470e2428f302d108ce850c3e7e81 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maximilian=20L=C3=B6ffler?= Date: Mon, 6 May 2024 13:09:01 +0200 Subject: [PATCH 048/130] Adhere to coding and documentation standards MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Maximilian Löffler --- tests/test-split-data-time-based.R | 2 +- util-split.R | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/test-split-data-time-based.R b/tests/test-split-data-time-based.R index b5da4e3e..c3f31012 100644 --- a/tests/test-split-data-time-based.R +++ b/tests/test-split-data-time-based.R @@ -803,7 +803,7 @@ patrick::with_parameters_test_that("Split a data object time-based (split.basis expect_equal(min(issue.data$date), as.POSIXct("2016-07-12 15:59:25")) expect_equal(max(issue.data$date), as.POSIXct("2016-07-12 16:06:01")) - # split by 'mails' and 'issues' without sliding window + # split by 'mails' and 'issues' results = split.data.time.based(project.data, time.period = "1 min", split.basis = c("mails", "issues"), sliding.window = test.sliding.window) diff --git a/util-split.R b/util-split.R index 1b54f897..462a2c32 100644 --- a/util-split.R +++ b/util-split.R @@ -52,7 +52,7 @@ requireNamespace("lubridate") # for date conversion #' time-sized windows for all ranges. If set, the \code{time.period} and \code{bins} parameters are ignored; #' consequently, \code{sliding.window} does not make sense then either. #' [default: NULL] -#' @param split.basis the data source to use as the basis for split bins, either 'commits', 'mails', 'issues' +#' @param split.basis the data source to use as the basis for split bins, either 'commits', 'mails', 'issues', #' or an arbitrary combination of them #' [default: "commits"] #' @param sliding.window logical indicating whether the splitting should be performed using a sliding-window approach @@ -67,7 +67,7 @@ split.data.time.based = function(project.data, time.period = "3 months", bins = sliding.window = FALSE, project.conf.new = NULL) { # ensure 'split.basis' defaults to 'commits' if not defined - # and allow it to be a vector if explicitly wanted + # and allow it to contain multiple data sources if explicitly wanted if(!hasArg("split.basis")) { split.basis = match.arg.or.default(split.basis, several.ok = FALSE, default = "commits") } else { @@ -899,7 +899,7 @@ split.network.by.bins = function(network, bins, bins.vector, bins.date = NULL, r #' @param split.by.time logical indicating whether splitting is done time-based or activity-bins-based #' @param number.windows see \code{number.windows} from \code{split.data.time.based} #' [default: NULL] -#' @param split.basis the data source to use as the basis for split bins, either 'commits', 'mails', 'issues' +#' @param split.basis the data source to use as the basis for split bins, either 'commits', 'mails', 'issues', #' or an arbitrary combination of them #' [default: "commits"] #' @param sliding.window logical indicating whether the splitting should be performed using a sliding-window approach @@ -960,7 +960,7 @@ split.data.by.time.or.bins = function(project.data, splitting.length, bins, spli ## get bins based on split.basis dates = project.data$get.data.timestamps(split.basis) dates = get.date.from.unix.timestamp(unname(unlist(dates))) - bins = split.get.bins.time.based(dates, splitting.length, number.windows)$bins + bins = split.get.bins.time.based(dates, splitting.length, number.windows)[["bins"]] bins.labels = head(bins, -1) ## logging logging::loginfo("Splitting data '%s' into time ranges of %s based on '%s' data.", From 0bb187fec0fd801d7634bf8d5180525770f6ab0b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maximilian=20L=C3=B6ffler?= Date: Mon, 13 May 2024 17:35:32 +0200 Subject: [PATCH 049/130] Concretize the expected bins when splitting by multiple datasources MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Maximilian Löffler --- tests/test-split-data-time-based.R | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/tests/test-split-data-time-based.R b/tests/test-split-data-time-based.R index c3f31012..f4488bf1 100644 --- a/tests/test-split-data-time-based.R +++ b/tests/test-split-data-time-based.R @@ -807,9 +807,21 @@ patrick::with_parameters_test_that("Split a data object time-based (split.basis results = split.data.time.based(project.data, time.period = "1 min", split.basis = c("mails", "issues"), sliding.window = test.sliding.window) - # bins should be union of both sources - expect_equal(min(attr(results, "bins")), min(c(issue.data$date, mail.data$date))) - expect_equal(max(attr(results, "bins")), max(c(issue.data$date, mail.data$date)) + 1) + # define bins for 'test.sliding.window' = TRUE + expected.bins = get.date.from.string(c("2016-07-12 15:58:40", "2016-07-12 15:59:10", "2016-07-12 15:59:40", + "2016-07-12 16:00:10", "2016-07-12 16:00:40", "2016-07-12 16:01:10", + "2016-07-12 16:01:40", "2016-07-12 16:02:10", "2016-07-12 16:02:40", + "2016-07-12 16:03:10", "2016-07-12 16:03:40", "2016-07-12 16:04:10", + "2016-07-12 16:04:40", "2016-07-12 16:05:10", "2016-07-12 16:05:40", + "2016-07-12 16:06:02")) + + if (!test.sliding.window) { + # define bins for 'test.sliding.window' = FALSE + # remove every second sliding bin but the last one + expected.bins = expected.bins[c(seq(1, length(expected.bins), by = 2), length(expected.bins))] + } + + expect_equal(attr(results, "bins"), expected.bins) }, patrick::cases( "sliding.windows: FALSE" = list(test.sliding.window = FALSE), From 371a97ac6ebf3de4fe9360dea79d62e2ed3ef585 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maximilian=20L=C3=B6ffler?= Date: Mon, 13 May 2024 17:47:29 +0200 Subject: [PATCH 050/130] Improve the documentation of the 'split.basis' parameter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Concretize the roxygen documentation for 'split.data.by.bins' and 'split.data.by.time.or.bins' regarding the 'split.basis' parameter * Reword comments in 'split.data.activity.based' to not include 'split basis' but rather 'activity type' for consistency with the parameter name * Remove unecessary default value for 'split.basis' in 'split.data.by.time.or.bins' since defaults are now handled in all wrapper functions Signed-off-by: Maximilian Löffler --- util-split.R | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/util-split.R b/util-split.R index 462a2c32..ed09a7ec 100644 --- a/util-split.R +++ b/util-split.R @@ -98,7 +98,9 @@ split.data.time.based = function(project.data, time.period = "3 months", bins = #' \code{bins}: Dates defining the start of bins (the last date defines the end of the last bin, in an #' *exclusive* manner). #' The expected format of \code{bins} is produced by \code{split.get.bins.activity.based}. -#' @param split.basis the data name to use as the basis for split bins, either 'commits', 'mails', or 'issues' +#' @param split.basis the data source that was used to obtain \code{bins} from \code{split.get.bins.activity.based}, +#' either 'commits', 'mails', or 'issues'. \code{split.basis} is necessary to associate +#' \code{bins$vector} with the correct data elements. #' [default: "commits"] #' @param sliding.window logical indicating whether a sliding-window approach was used when obtaining the \code{bins}. #' @@ -195,7 +197,7 @@ split.data.activity.based = function(project.data, activity.type = c("commits", activity.amount = 5000, number.windows = NULL, sliding.window = FALSE, project.conf.new = NULL) { - ## get basis for splitting process + ## get activity type for splitting process activity.type = match.arg(activity.type) ## get actual raw data @@ -207,13 +209,13 @@ split.data.activity.based = function(project.data, activity.type = c("commits", }) names(data) = data.sources - ## if the data used by the split basis is not present, load it automatically + ## if the data used by the splitting activity type is not present, load it automatically if (!(activity.type %in% project.data$get.cached.data.sources("only.unfiltered"))) { function.name = DATASOURCE.TO.UNFILTERED.ARTIFACT.FUNCTION[[activity.type]] project.data[[function.name]]() } - ## define ID columns for mails and commits + ## define ID columns for commits, mails, and issues id.column = list( commits = "hash", mails = "message.id", @@ -264,7 +266,7 @@ split.data.activity.based = function(project.data, activity.type = c("commits", logging::loginfo("Splitting data '%s' into activity ranges of %s %s (%s windows).", project.data$get.class.name(), activity.amount, activity.type, number.windows) - ## get bins based on 'split.basis'. Here the 'include.duplicate.ids' parameter flag must be set, to + ## get bins based on 'activity.type'. Here the 'include.duplicate.ids' parameter flag must be set, to ## retrieve bins which map every event to a bin including events with non-unique ids. This is important ## to ensure that every range really has 'activity.amount' many entries after splitting logging::logdebug("Getting activity-based bins.") @@ -899,9 +901,8 @@ split.network.by.bins = function(network, bins, bins.vector, bins.date = NULL, r #' @param split.by.time logical indicating whether splitting is done time-based or activity-bins-based #' @param number.windows see \code{number.windows} from \code{split.data.time.based} #' [default: NULL] -#' @param split.basis the data source to use as the basis for split bins, either 'commits', 'mails', 'issues', -#' or an arbitrary combination of them -#' [default: "commits"] +#' @param split.basis either formatted as the \code{split.basis} from \code{split.data.time.based} +#' or from \code{split.data.by.bins}. #' @param sliding.window logical indicating whether the splitting should be performed using a sliding-window approach #' [default: FALSE] #' @param project.conf.new the new project config to construct the \code{RangeData} objects. @@ -913,8 +914,8 @@ split.network.by.bins = function(network, bins, bins.vector, bins.date = NULL, r #' @seealso split.data.time.based #' @seealso split.data.by.bins split.data.by.time.or.bins = function(project.data, splitting.length, bins, split.by.time, - number.windows = NULL, split.basis = c("commits", "mails", "issues"), - sliding.window = FALSE, project.conf.new = NULL) { + number.windows = NULL, split.basis, sliding.window = FALSE, + project.conf.new = NULL) { ## if the data used by the split basis is not present, load it automatically for (i in seq_along(split.basis)) { From 1561d4b3c5fe54c778efab8484b9b26b3595320a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maximilian=20L=C3=B6ffler?= Date: Thu, 16 May 2024 11:30:05 +0200 Subject: [PATCH 051/130] Update 'showcase.R' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Maximilian Löffler --- showcase.R | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/showcase.R b/showcase.R index 4a2c9a72..d46ad439 100644 --- a/showcase.R +++ b/showcase.R @@ -281,6 +281,7 @@ run.lapply(data, "get.data.path.callgraph") ## * Data-based splitting -------------------------------------------------- +## split time-based using commits as the data source to split by (with sliding windows) cf.data = split.data.time.based(x.data, time.period = "18 months", split.basis = "commits", sliding.window = TRUE) for (range in names(cf.data)) { y.data = cf.data[[range]] @@ -289,6 +290,15 @@ for (range in names(cf.data)) { } print(run.lapply(cf.data, "get.class.name")) +## split time-based using commits and issues as the data sources to split by (without sliding windows) +cf.data = split.data.time.based(x.data, time.period = "18 month", split.basis = c("commits", "issues")) +for (range in names(cf.data)) { + y.data = cf.data[[range]] + y = NetworkBuilder$new(project.data = y.data, network.conf = net.conf) + plot.network(y$get.bipartite.network()) +} +print(run.lapply(cf.data, "get.class.name")) + mybins = c("2012-07-10 15:58:00", "2012-07-15 16:02:00", "2012-07-20 16:04:00", "2012-07-25 16:06:30") cf.data = split.data.time.based(x.data, bins = mybins) for (range in names(cf.data)) { From 210f29243e7772fd8f2e58aaa4e55daa784bb6c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maximilian=20L=C3=B6ffler?= Date: Thu, 23 May 2024 11:43:06 +0200 Subject: [PATCH 052/130] Update 'NEWS.md' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Maximilian Löffler --- NEWS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/NEWS.md b/NEWS.md index 6c6804a1..e1bac1c5 100644 --- a/NEWS.md +++ b/NEWS.md @@ -10,6 +10,7 @@ - Add commit-interaction networks that can be created with `create.author.network` and `create.artifact.network` if the `artifact.relation` and `author.relation` is configured to be `commit.interaction` (PR #252, d82857fbebd1111bb16588a4223bb24a8dcd07de, 329d97ec3de36a9e1bcadc0c7a53c1d92e8b481c) as well as tests for these features (PR #252, 07e7ed744209b0251217fa8f7f35d9b9875face2, 7068cfa10d993dcae3f5e3f76f8cafa99fa8b350) - Add helper function for prefixing function names with file names in `util-read.R` (PR #252, f8ea987b138173cf0509c7910e0572d8ee1b3f1f) - Add line-based code coverage reports into CI pipeline. Coverage reports are generated by `coverage.R` (PR #262, 10cac49d005e87c3964cc61711e7f5acef749626, b3b9f4ac7a9911bd00293c68fac88e0f9033bdfb, c815d18dc6266d620a7a145493417b87ac08679e, e8093525fdaf46e54f2f7fcc6358ca7892e795e5, 32d04823e2007c63d2a43ce59bea3057327c19a7) +- Add the possibility to split data time-based by multiple data sources (PR #261, 1088395f46b84028c8d7c463ca86b5dc38500c26, e1f79fc9e40cd6f41c946be42db364b2101cfe10, 0bb187fec0fd801d7634bf8d5180525770f6ab0b, 371a97ac6ebf3de4fe9360dea79d62e2ed3ef585) ### Changed/Improved From 06d4d6ac72ceb647c972db861a0943d4a321b802 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maximilian=20L=C3=B6ffler?= Date: Mon, 27 May 2024 15:13:15 +0200 Subject: [PATCH 053/130] Add a missing whitespace MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Maximilian Löffler --- util-split.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/util-split.R b/util-split.R index ed09a7ec..b4073194 100644 --- a/util-split.R +++ b/util-split.R @@ -68,7 +68,7 @@ split.data.time.based = function(project.data, time.period = "3 months", bins = # ensure 'split.basis' defaults to 'commits' if not defined # and allow it to contain multiple data sources if explicitly wanted - if(!hasArg("split.basis")) { + if (!hasArg("split.basis")) { split.basis = match.arg.or.default(split.basis, several.ok = FALSE, default = "commits") } else { split.basis = match.arg.or.default(split.basis, several.ok = TRUE) From 0df9d5bf6bafbb5d440f4c47db4ec901cf11f037 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maximilian=20L=C3=B6ffler?= Date: Mon, 27 May 2024 16:21:06 +0200 Subject: [PATCH 054/130] Replace recently deprecated igraph methods to mitigate warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Maximilian Löffler --- showcase.R | 2 +- tests/test-core-peripheral.R | 6 +- tests/test-networks-artifact.R | 10 +- tests/test-networks-author.R | 22 ++-- tests/test-networks-bipartite.R | 21 ++-- tests/test-networks-covariates.R | 140 +++++++++++----------- tests/test-networks-equal-constructions.R | 9 +- tests/test-networks-misc.R | 49 ++++---- tests/test-networks-multi-relation.R | 16 +-- tests/test-networks-multi.R | 4 +- tests/test-networks.R | 56 ++++----- tests/test-split-misc.R | 8 +- util-bulk.R | 9 +- util-misc.R | 2 +- util-motifs.R | 7 +- util-networks-covariates.R | 5 +- util-networks-misc.R | 5 +- util-networks.R | 88 +++++++------- util-plot.R | 28 ++--- util-split.R | 6 +- 20 files changed, 250 insertions(+), 243 deletions(-) diff --git a/showcase.R b/showcase.R index d46ad439..74da2497 100644 --- a/showcase.R +++ b/showcase.R @@ -407,7 +407,7 @@ plot.print.network(g, labels = TRUE) lay = matrix(c( 20, 179, 693, 552, 956, 1091, 124, 317, 516, 615, 803, 1038, 245, 175, 255, 185, 253, 225, 73, 8, 75, 0, 96, 86), nrow = 12, byrow = FALSE) # for sample graph -g = igraph::set.graph.attribute(g, "layout", lay) +g = igraph::set_graph_attr(g, "layout", lay) plot.print.network(g, labels = TRUE) ## get the plot object and modify it before plotting diff --git a/tests/test-core-peripheral.R b/tests/test-core-peripheral.R index c9397d6f..f56ea325 100644 --- a/tests/test-core-peripheral.R +++ b/tests/test-core-peripheral.R @@ -17,7 +17,7 @@ ## Copyright 2022 by Thomas Bock ## Copyright 2019 by Christian Hechtl ## Copyright 2021 by Christian Hechtl -## Copyright 2023 by Maximilian Löffler +## Copyright 2023-2024 by Maximilian Löffler ## All Rights Reserved. @@ -301,7 +301,7 @@ test_that("Core classification of cochange author networks with vertices but no ## create network with one author and no edges authors = data.frame(author.name = "A", kind = TYPE.AUTHOR, type = TYPE.AUTHOR) edges = create.empty.edge.list() - network = igraph::graph.data.frame(edges, directed = TRUE, vertices = authors) + network = igraph::graph_from_data_frame(edges, directed = TRUE, vertices = authors) ## classify the authors into core/peripheral classification = get.author.class.by.type(network, type = "network.eigen") @@ -311,7 +311,7 @@ test_that("Core classification of cochange author networks with vertices but no ## create network with several authors and no edges authors = data.frame(author.name = LETTERS[1:5], kind = TYPE.AUTHOR, type = TYPE.AUTHOR) edges = create.empty.edge.list() - network = igraph::graph.data.frame(edges, directed = TRUE, vertices = authors) + network = igraph::graph_from_data_frame(edges, directed = TRUE, vertices = authors) ## classify the authors into core/peripheral classification = get.author.class.by.type(network, type = "network.eigen") diff --git a/tests/test-networks-artifact.R b/tests/test-networks-artifact.R index 79251c60..2a717080 100644 --- a/tests/test-networks-artifact.R +++ b/tests/test-networks-artifact.R @@ -56,7 +56,7 @@ test_that("Network construction of the undirected artifact-cochange network", { relation = "cochange" ) ## 3) build expected network - network.expected = igraph::graph.data.frame(edges, directed = FALSE, vertices = vertices) + network.expected = igraph::graph_from_data_frame(edges, directed = FALSE, vertices = vertices) ## @@ -156,7 +156,7 @@ patrick::with_parameters_test_that("Network construction of an issue-based artif network.builder = NetworkBuilder$new(project.data = proj.data, network.conf = net.conf) ## build expected network - network.expected = igraph::graph.data.frame(edges, directed = test.directed, vertices = vertices) + network.expected = igraph::graph_from_data_frame(edges, directed = test.directed, vertices = vertices) ## build network network.built = network.builder$get.artifact.network() @@ -204,7 +204,7 @@ patrick::with_parameters_test_that("Network construction of an empty 'comments-o ) ## build expected network - network.expected = igraph::graph.data.frame(edges, directed = test.directed, vertices = vertices) + network.expected = igraph::graph_from_data_frame(edges, directed = test.directed, vertices = vertices) ## test assert.networks.equal(network.built, network.expected) @@ -254,7 +254,7 @@ patrick::with_parameters_test_that("Network construction with commit-interaction type = c(TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA), relation = c("commit.interaction", "commit.interaction", "commit.interaction", "commit.interaction") ) - network = igraph::graph.data.frame(edges, directed = test.directed, vertices = vertices) + network = igraph::graph_from_data_frame(edges, directed = test.directed, vertices = vertices) expect_true(igraph::identical_graphs(network.built, network)) }, patrick::cases( @@ -303,7 +303,7 @@ patrick::with_parameters_test_that("Network construction with commit-interaction type = c(TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA), relation = c("commit.interaction", "commit.interaction", "commit.interaction", "commit.interaction") ) - network = igraph::graph.data.frame(edges, directed = test.directed, vertices = vertices) + network = igraph::graph_from_data_frame(edges, directed = test.directed, vertices = vertices) expect_true(igraph::identical_graphs(network.built, network)) }, patrick::cases( diff --git a/tests/test-networks-author.R b/tests/test-networks-author.R index 8f9dd11b..863f38a8 100644 --- a/tests/test-networks-author.R +++ b/tests/test-networks-author.R @@ -230,7 +230,7 @@ test_that("Network construction of the undirected author-cochange network", { relation = "cochange" ) ## 3) build expected network - network.expected = igraph::graph.data.frame(data, directed = FALSE, vertices = authors) + network.expected = igraph::graph_from_data_frame(data, directed = FALSE, vertices = authors) ## @@ -314,7 +314,7 @@ test_that("Network construction of the undirected but temorally ordered author-c ) ## build expected network - network.expected = igraph::graph.data.frame(data, directed = FALSE, vertices = authors) + network.expected = igraph::graph_from_data_frame(data, directed = FALSE, vertices = authors) expect_true(igraph::identical_graphs(network.built, network.expected)) }) @@ -355,7 +355,7 @@ test_that("Network construction of the directed author-cochange network", { ) ## build expected network - network.expected = igraph::graph.data.frame(data, directed = TRUE, vertices = authors) + network.expected = igraph::graph_from_data_frame(data, directed = TRUE, vertices = authors) expect_true(igraph::identical_graphs(network.built, network.expected)) }) @@ -401,7 +401,7 @@ test_that("Network construction of the directed author-cochange network without ) ## build expected network - network.expected = igraph::graph.data.frame(data, directed = TRUE, vertices = authors) + network.expected = igraph::graph_from_data_frame(data, directed = TRUE, vertices = authors) expect_true(igraph::identical_graphs(network.built, network.expected)) }) @@ -427,7 +427,7 @@ test_that("Network construction of the undirected simplified author-cochange net type = TYPE.AUTHOR) ## make test independent of igraph version - date.attr = igraph::get.edge.attribute(network.built, "date") + date.attr = igraph::edge_attr(network.built, "date") date.conversion.function = ifelse(all(sapply(date.attr, lubridate::is.POSIXct)), get.date.from.unix.timestamp, identity) @@ -462,7 +462,7 @@ test_that("Network construction of the undirected simplified author-cochange net data[["artifact"]] = unclass(data[["artifact"]]) ## build expected network - network.expected = igraph::graph.data.frame(data, directed = FALSE, vertices = authors) + network.expected = igraph::graph_from_data_frame(data, directed = FALSE, vertices = authors) expect_true(igraph::identical_graphs(network.built, network.expected)) }) @@ -586,7 +586,7 @@ test_that("Network construction of the undirected author-issue network with all ) ## build expected network - network.expected = igraph::graph.data.frame(edges, directed = FALSE, vertices = vertices) + network.expected = igraph::graph_from_data_frame(edges, directed = FALSE, vertices = vertices) expect_true(igraph::identical_graphs(network.built, network.expected)) }) @@ -645,7 +645,7 @@ test_that("Network construction of the undirected author-issue network with just relation = "issue") ## build expected network - network.expected = igraph::graph.data.frame(edges, directed = FALSE, vertices = vertices) + network.expected = igraph::graph_from_data_frame(edges, directed = FALSE, vertices = vertices) expect_true(igraph::identical_graphs(network.built, network.expected)) }) @@ -673,7 +673,7 @@ test_that("Network construction with only untracked files (no edges expected)", ## build expected network (two vertices, no edges) vertices = list(name = c("Karl", "Thomas"), kind = TYPE.AUTHOR, type = TYPE.AUTHOR) network.expected = create.empty.network(directed = FALSE, add.attributes = TRUE) - network.expected = igraph::add.vertices(network.expected, nv = max(lengths(vertices)), attr = vertices) + network.expected = igraph::add_vertices(network.expected, nv = max(lengths(vertices)), attr = vertices) ## test expect_true(igraph::identical_graphs(network.built, network.expected)) @@ -721,10 +721,10 @@ patrick::with_parameters_test_that("Network construction with commit-interaction type = c(TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA), relation = c("commit.interaction", "commit.interaction", "commit.interaction", "commit.interaction") ) - network = igraph::graph.data.frame(edges, directed = test.directed, vertices = vertices) + network = igraph::graph_from_data_frame(edges, directed = test.directed, vertices = vertices) expect_true(igraph::identical_graphs(network.built, network)) }, patrick::cases( "directed: FALSE" = list(test.directed = FALSE), "directed: TRUE" = list(test.directed = TRUE) -)) \ No newline at end of file +)) diff --git a/tests/test-networks-bipartite.R b/tests/test-networks-bipartite.R index c07e0c2b..646bad3a 100644 --- a/tests/test-networks-bipartite.R +++ b/tests/test-networks-bipartite.R @@ -18,6 +18,7 @@ ## Copyright 2018 by Jakob Kronawitter ## Copyright 2018-2019 by Anselm Fehnker ## Copyright 2021 by Johannes Hostert +## Copyright 2024 by Maximilian Löffler ## All Rights Reserved. @@ -81,7 +82,7 @@ test_that("Construction of the bipartite network for the feature artifact with a relation = "cochange" ) ## 3) construct expected network - network.expected = igraph::graph.data.frame(network.expected.data, vertices = vertices, + network.expected = igraph::graph_from_data_frame(network.expected.data, vertices = vertices, directed = net.conf$get.value("author.directed")) expect_true(igraph::identical_graphs(network.built, network.expected)) @@ -134,7 +135,7 @@ test_that("Construction of the bipartite network for the file artifact with auth relation = "cochange" ) ## 3) construct expected network - network.expected = igraph::graph.data.frame(network.expected.data, vertices = vertices, + network.expected = igraph::graph_from_data_frame(network.expected.data, vertices = vertices, directed = net.conf$get.value("author.directed")) expect_true(igraph::identical_graphs(network.built, network.expected)) @@ -187,7 +188,7 @@ test_that("Construction of the bipartite network for the function artifact with relation = "cochange" ) ## 3) construct expected network - network.expected = igraph::graph.data.frame(network.expected.data, directed = net.conf$get.value("author.directed"), vertices = vertices) + network.expected = igraph::graph_from_data_frame(network.expected.data, directed = net.conf$get.value("author.directed"), vertices = vertices) expect_true(igraph::identical_graphs(network.built, network.expected)) }) @@ -237,7 +238,7 @@ test_that("Construction of the bipartite network for the featureexpression artif relation = "cochange" ) ## 3) construct expected network - network.expected = igraph::graph.data.frame(network.expected.data, vertices = vertices, + network.expected = igraph::graph_from_data_frame(network.expected.data, vertices = vertices, directed = net.conf$get.value("author.directed")) expect_true(igraph::identical_graphs(network.built, network.expected)) @@ -306,7 +307,7 @@ test_that("Construction of the bipartite network for the feature artifact with a relation = "issue" ) ## 3) construct expected network - network.expected = igraph::graph.data.frame(network.expected.data, directed = net.conf$get.value("author.directed"), vertices = vertices) + network.expected = igraph::graph_from_data_frame(network.expected.data, directed = net.conf$get.value("author.directed"), vertices = vertices) expect_true(igraph::identical_graphs(network.built, network.expected)) }) @@ -358,7 +359,7 @@ test_that("Construction of the directed bipartite network for the feature artifa relation = "cochange" ) ## 3) construct expected network - network.expected = igraph::graph.data.frame(network.expected.data, vertices = vertices, + network.expected = igraph::graph_from_data_frame(network.expected.data, vertices = vertices, directed = net.conf$get.value("author.directed")) expect_true(igraph::identical_graphs(network.built, network.expected)) @@ -411,7 +412,7 @@ test_that("Construction of the directed bipartite network for the file artifact relation = "cochange" ) ## 3) construct expected network - network.expected = igraph::graph.data.frame(network.expected.data, vertices = vertices, + network.expected = igraph::graph_from_data_frame(network.expected.data, vertices = vertices, directed = net.conf$get.value("author.directed")) expect_true(igraph::identical_graphs(network.built, network.expected)) @@ -465,7 +466,7 @@ test_that("Construction of the directed bipartite network for the function artif relation = "cochange" ) ## 3) construct expected network - network.expected = igraph::graph.data.frame(network.expected.data, vertices = vertices, + network.expected = igraph::graph_from_data_frame(network.expected.data, vertices = vertices, directed = net.conf$get.value("author.directed")) expect_true(igraph::identical_graphs(network.built, network.expected)) @@ -517,7 +518,7 @@ test_that("Construction of the directed bipartite network for the featureexpress relation = "cochange" ) ## 3) construct expected network - network.expected = igraph::graph.data.frame(network.expected.data, vertices = vertices, + network.expected = igraph::graph_from_data_frame(network.expected.data, vertices = vertices, directed = net.conf$get.value("author.directed")) expect_true(igraph::identical_graphs(network.built, network.expected)) @@ -560,7 +561,7 @@ test_that("Network construction with only untracked files (no edges and artifact network.expected = construct.network.from.edge.list(vertices = vertices, edge.list = edges, network.conf = net.conf, directed = net.conf$get.value("author.directed")) ## 4) remove edge again - network.expected = igraph::delete.edges(network.expected, 1) + network.expected = igraph::delete_edges(network.expected, 1) ## test expect_true(igraph::identical_graphs(network.built, network.expected)) diff --git a/tests/test-networks-covariates.R b/tests/test-networks-covariates.R index d3c06863..92084ea4 100644 --- a/tests/test-networks-covariates.R +++ b/tests/test-networks-covariates.R @@ -21,7 +21,7 @@ ## Copyright 2021 by Johannes Hostert ## Copyright 2021-2022 by Niklas Schneider ## Copyright 2022 by Jonathan Baumann -## Copyright 2023 by Maximilian Löffler +## Copyright 2023-2024 by Maximilian Löffler ## All Rights Reserved. @@ -461,7 +461,7 @@ test_that("Test add.vertex.attribute.author.commit.count", { networks.and.data[["networks"]], networks.and.data[["project.data"]], aggregation.level = level ) - actual.attributes = lapply(networks.with.attr, igraph::get.vertex.attribute, name = "commit.count") + actual.attributes = lapply(networks.with.attr, igraph::vertex_attr, name = "commit.count") expect_identical(expected.attributes[[level]], actual.attributes) }) @@ -489,7 +489,7 @@ test_that("Test add.vertex.attribute.author.commit.count.committer.and.author", networks.and.data[["networks"]], networks.and.data[["project.data"]], aggregation.level = level ) - actual.attributes = lapply(networks.with.attr, igraph::get.vertex.attribute, name = "commit.count.committer.and.author") + actual.attributes = lapply(networks.with.attr, igraph::vertex_attr, name = "commit.count.committer.and.author") expect_identical(expected.attributes[[level]], actual.attributes) }) @@ -517,7 +517,7 @@ test_that("Test add.vertex.attribute.author.commit.count.committer.or.author", { networks.and.data[["networks"]], networks.and.data[["project.data"]], aggregation.level = level ) - actual.attributes = lapply(networks.with.attr, igraph::get.vertex.attribute, name = "commit.count.committer.or.author") + actual.attributes = lapply(networks.with.attr, igraph::vertex_attr, name = "commit.count.committer.or.author") expect_identical(expected.attributes[[level]], actual.attributes) }) @@ -544,7 +544,7 @@ test_that("Test add.vertex.attribute.author.mail.count", { networks.and.data[["networks"]], networks.and.data[["project.data"]], aggregation.level = level ) - actual.attributes = lapply(networks.with.attr, igraph::get.vertex.attribute, name = "mail.count") + actual.attributes = lapply(networks.with.attr, igraph::vertex_attr, name = "mail.count") expect_identical(expected.attributes[[level]], actual.attributes) }) @@ -571,7 +571,7 @@ test_that("Test add.vertex.attribute.author.mail.thread.count", { networks.and.data[["networks"]], networks.and.data[["project.data"]], aggregation.level = level ) - actual.attributes = lapply(networks.with.attr, igraph::get.vertex.attribute, name = "mail.thread.count") + actual.attributes = lapply(networks.with.attr, igraph::vertex_attr, name = "mail.thread.count") expect_identical(expected.attributes[[level]], actual.attributes) }) @@ -624,7 +624,7 @@ test_that("Test add.vertex.attribute.author.issue.count", { networks.and.data[["networks"]], networks.and.data[["project.data"]], aggregation.level = level, issue.type = "issues" ) - actual.attributes = lapply(networks.with.attr, igraph::get.vertex.attribute, name = "issue.count") + actual.attributes = lapply(networks.with.attr, igraph::vertex_attr, name = "issue.count") expect_identical(expected.attributes.issues.only[[level]], actual.attributes) }) @@ -637,7 +637,7 @@ test_that("Test add.vertex.attribute.author.issue.count", { issue.type = "pull.requests", name = "pull.request.count" ) - actual.attributes = lapply(networks.with.attr, igraph::get.vertex.attribute, name = "pull.request.count") + actual.attributes = lapply(networks.with.attr, igraph::vertex_attr, name = "pull.request.count") expect_identical(expected.attributes.prs.only[[level]], actual.attributes) }) @@ -649,7 +649,7 @@ test_that("Test add.vertex.attribute.author.issue.count", { networks.and.data[["networks"]], networks.and.data[["project.data"]], aggregation.level = level, issue.type = "all" ) - actual.attributes = lapply(networks.with.attr, igraph::get.vertex.attribute, name = "issue.count") + actual.attributes = lapply(networks.with.attr, igraph::vertex_attr, name = "issue.count") expect_identical(expected.attributes.both[[level]], actual.attributes) }) @@ -688,7 +688,7 @@ test_that("Test add.vertex.attribute.author.issues.commented.count", { networks.and.data[["networks"]], networks.and.data[["project.data"]], aggregation.level = level, issue.type = "issues" ) - actual.attributes = lapply(networks.with.attr, igraph::get.vertex.attribute, name = "issues.commented.count") + actual.attributes = lapply(networks.with.attr, igraph::vertex_attr, name = "issues.commented.count") expect_identical(expected.attributes.issues.only[[level]], actual.attributes) }) @@ -701,7 +701,7 @@ test_that("Test add.vertex.attribute.author.issues.commented.count", { issue.type = "pull.requests", name = "pull.requests.commented.count" ) - actual.attributes = lapply(networks.with.attr, igraph::get.vertex.attribute, name = "pull.requests.commented.count") + actual.attributes = lapply(networks.with.attr, igraph::vertex_attr, name = "pull.requests.commented.count") expect_identical(expected.attributes.prs.only[[level]], actual.attributes) }) @@ -713,7 +713,7 @@ test_that("Test add.vertex.attribute.author.issues.commented.count", { networks.and.data[["networks"]], networks.and.data[["project.data"]], aggregation.level = level, issue.type = "all" ) - actual.attributes = lapply(networks.with.attr, igraph::get.vertex.attribute, name = "issues.commented.count") + actual.attributes = lapply(networks.with.attr, igraph::vertex_attr, name = "issues.commented.count") expect_identical(expected.attributes.both[[level]], actual.attributes) }) @@ -751,7 +751,7 @@ test_that("Test add.vertex.attribute.author.issue.creation.count", { networks.and.data[["networks"]], networks.and.data[["project.data"]], aggregation.level = level, issue.type = "issues" ) - actual.attributes = lapply(networks.with.attr, igraph::get.vertex.attribute, name = "issue.creation.count") + actual.attributes = lapply(networks.with.attr, igraph::vertex_attr, name = "issue.creation.count") expect_identical(expected.attributes.issues.only[[level]], actual.attributes) }) @@ -764,7 +764,7 @@ test_that("Test add.vertex.attribute.author.issue.creation.count", { issue.type = "pull.requests", name = "pull.request.creation.count" ) - actual.attributes = lapply(networks.with.attr, igraph::get.vertex.attribute, name = "pull.request.creation.count") + actual.attributes = lapply(networks.with.attr, igraph::vertex_attr, name = "pull.request.creation.count") expect_identical(expected.attributes.prs.only[[level]], actual.attributes) }) @@ -776,7 +776,7 @@ test_that("Test add.vertex.attribute.author.issue.creation.count", { networks.and.data[["networks"]], networks.and.data[["project.data"]], aggregation.level = level, issue.type = "all" ) - actual.attributes = lapply(networks.with.attr, igraph::get.vertex.attribute, name = "issue.creation.count") + actual.attributes = lapply(networks.with.attr, igraph::vertex_attr, name = "issue.creation.count") expect_identical(expected.attributes.both[[level]], actual.attributes) }) @@ -814,7 +814,7 @@ test_that("Test add.vertex.attribute.author.issue.comment.count", { networks.and.data[["networks"]], networks.and.data[["project.data"]], aggregation.level = level, issue.type = "issues" ) - actual.attributes = lapply(networks.with.attr, igraph::get.vertex.attribute, name = "issue.comment.count") + actual.attributes = lapply(networks.with.attr, igraph::vertex_attr, name = "issue.comment.count") expect_identical(expected.attributes.issues.only[[level]], actual.attributes) }) @@ -827,7 +827,7 @@ test_that("Test add.vertex.attribute.author.issue.comment.count", { issue.type = "pull.requests", name = "pull.request.comment.count" ) - actual.attributes = lapply(networks.with.attr, igraph::get.vertex.attribute, name = "pull.request.comment.count") + actual.attributes = lapply(networks.with.attr, igraph::vertex_attr, name = "pull.request.comment.count") expect_identical(expected.attributes.prs.only[[level]], actual.attributes) }) @@ -839,7 +839,7 @@ test_that("Test add.vertex.attribute.author.issue.comment.count", { networks.and.data[["networks"]], networks.and.data[["project.data"]], aggregation.level = level, issue.type = "all" ) - actual.attributes = lapply(networks.with.attr, igraph::get.vertex.attribute, name = "issue.comment.count") + actual.attributes = lapply(networks.with.attr, igraph::vertex_attr, name = "issue.comment.count") expect_identical(expected.attributes.both[[level]], actual.attributes) }) @@ -864,7 +864,7 @@ test_that("Test add.vertex.attribute.author.email", { networks.and.data[["networks"]], networks.and.data[["project.data"]] ) - actual.attributes = lapply(networks.with.attr, igraph::get.vertex.attribute, name = "author.email") + actual.attributes = lapply(networks.with.attr, igraph::vertex_attr, name = "author.email") expect_identical(expected.attributes, actual.attributes) }) @@ -892,7 +892,7 @@ test_that("Test add.vertex.attribute.author.artifact.count", { networks.and.data[["networks"]], networks.and.data[["project.data"]], aggregation.level = level ) - actual.attributes = lapply(networks.with.attr, igraph::get.vertex.attribute, name = "artifact.count") + actual.attributes = lapply(networks.with.attr, igraph::vertex_attr, name = "artifact.count") expect_identical(expected.attributes[[level]], actual.attributes) }) @@ -979,7 +979,7 @@ test_that("Test add.vertex.attribute.author.first.activity with multiple types a activity.types = c("mails", "commits", "issues"), name = "first.activity", aggregation.level = level, default.value = NA, combine.activity.types = TRUE ) - actual.attributes = lapply(networks.with.attributes, igraph::get.vertex.attribute, name = "first.activity") + actual.attributes = lapply(networks.with.attributes, igraph::vertex_attr, name = "first.activity") expect_equal(expected.attributes[[level]], actual.attributes) }) @@ -1005,7 +1005,7 @@ test_that("Test add.vertex.attribute.author.first.activity with multiple types a activity.types = c("mails", "commits", "issues"), name = "first.activity", aggregation.level = level, default.value = NA, combine.activity.types = FALSE ) - actual.attributes = lapply(networks.with.attributes, igraph::get.vertex.attribute, name = "first.activity") + actual.attributes = lapply(networks.with.attributes, igraph::vertex_attr, name = "first.activity") expect_equal(expected.attributes[[level]], actual.attributes) }) @@ -1037,7 +1037,7 @@ test_that("Test add.vertex.attribute.author.first.activity with one type and com activity.types = c("mails"), name = "first.activity", aggregation.level = level, default.value = NA, combine.activity.types = FALSE ) - actual.attributes = lapply(networks.with.attributes, igraph::get.vertex.attribute, name = "first.activity") + actual.attributes = lapply(networks.with.attributes, igraph::vertex_attr, name = "first.activity") expect_equal(expected.attributes[[level]], actual.attributes) }) @@ -1057,7 +1057,7 @@ test_that("Test add.vertex.attribute.author.active.ranges with computation over networks.and.data[["networks"]], networks.and.data[["project.data"]], combine.activity.types = TRUE ) - actual.attributes = lapply(networks.with.attr, igraph::get.vertex.attribute, name = "active.ranges") + actual.attributes = lapply(networks.with.attr, igraph::vertex_attr, name = "active.ranges") # adjust prepared expected attributes to the current use case expected.attributes = lapply(get.expected.active.ranges(), function(active.ranges) { @@ -1086,7 +1086,7 @@ test_that("Test default values of add.vertex.attribute.author.active.ranges", { test.default.value = "test.default.value" networks.with.attr = add.vertex.attribute.author.active.ranges(test.networks, test.data, activity.types = test.activity.types, default.value = test.default.value) - actual.attributes = lapply(networks.with.attr, igraph:: get.vertex.attribute, name = "active.ranges") + actual.attributes = lapply(networks.with.attr, igraph:: vertex_attr, name = "active.ranges") # adjust prepared expected attributes to the current use case expected.attributes = lapply(get.expected.active.ranges(), function(active.ranges) { @@ -1172,7 +1172,7 @@ test_that("Test add.vertex.attribute.author.role.simple", { type = type, aggregation.level = level ) - actual.attributes = lapply(networks.with.attr, igraph::get.vertex.attribute, name = "author.role") + actual.attributes = lapply(networks.with.attr, igraph::vertex_attr, name = "author.role") expect_identical(expected.attributes[[level]][[type]], actual.attributes, info = sprintf("level = '%s', type = '%s'", level, type)) @@ -1249,9 +1249,9 @@ test_that("Test add.vertex.attribute.artifact.editor.count", { aggregation.level = level, editor.definition = c("author", "committer") ) - actual.attributes.author = lapply(networks.with.attr.author, igraph::get.vertex.attribute, name = "editor.count") - actual.attributes.committer = lapply(networks.with.attr.committer, igraph::get.vertex.attribute, name = "editor.count") - actual.attributes.both = lapply(networks.with.attr.both, igraph::get.vertex.attribute, name = "editor.count") + actual.attributes.author = lapply(networks.with.attr.author, igraph::vertex_attr, name = "editor.count") + actual.attributes.committer = lapply(networks.with.attr.committer, igraph::vertex_attr, name = "editor.count") + actual.attributes.both = lapply(networks.with.attr.both, igraph::vertex_attr, name = "editor.count") expect_equal(expected.attributes.author[[level]], actual.attributes.author) expect_equal(expected.attributes.committer[[level]], actual.attributes.committer) @@ -1308,7 +1308,7 @@ test_that("Test add.vertex.attribute.artifact.first.occurrence", { aggregation.level = level ) - actual.attributes = lapply(networks.with.attr, igraph::get.vertex.attribute, name = "first.occurrence") + actual.attributes = lapply(networks.with.attr, igraph::vertex_attr, name = "first.occurrence") ## convert UNIX timestamps to POSIXct actual.attributes = lapply(actual.attributes, get.date.from.unix.timestamp) @@ -1366,7 +1366,7 @@ test_that("Test add.vertex.attribute.artifact.last.edited", { aggregation.level = level ) - actual.attributes = lapply(networks.with.attr, igraph::get.vertex.attribute, name = "last.edited") + actual.attributes = lapply(networks.with.attr, igraph::vertex_attr, name = "last.edited") ## convert UNIX timestamps to POSIXct actual.attributes = lapply(actual.attributes, get.date.from.unix.timestamp) @@ -1405,7 +1405,7 @@ test_that("Test add.vertex.attribute.artifact.change.count", { aggregation.level = level ) - actual.attributes = lapply(networks.with.attr, igraph::get.vertex.attribute, name = "change.count") + actual.attributes = lapply(networks.with.attr, igraph::vertex_attr, name = "change.count") expect_equal(expected.attributes[[level]], actual.attributes) }) @@ -1449,7 +1449,7 @@ test_that("Test add.vertex.attribute.mail.thread.contributor.count", { aggregation.level = level ) - actual.attributes = lapply(networks.with.attr, igraph::get.vertex.attribute, name = "thread.contributor.count") + actual.attributes = lapply(networks.with.attr, igraph::vertex_attr, name = "thread.contributor.count") expect_equal(expected.attributes[[level]], actual.attributes) }) @@ -1491,7 +1491,7 @@ test_that("Test add.vertex.attribute.mail.thread.message.count", { aggregation.level = level ) - actual.attributes = lapply(networks.with.attr, igraph::get.vertex.attribute, name = "thread.message.count") + actual.attributes = lapply(networks.with.attr, igraph::vertex_attr, name = "thread.message.count") expect_equal(expected.attributes[[level]], actual.attributes) }) @@ -1540,7 +1540,7 @@ test_that("Test add.vertex.attribute.mail.thread.start.date", { aggregation.level = level ) - actual.attributes = lapply(networks.with.attr, igraph::get.vertex.attribute, name = "thread.start.date") + actual.attributes = lapply(networks.with.attr, igraph::vertex_attr, name = "thread.start.date") ## convert UNIX timestamps to POSIXct actual.attributes = lapply(actual.attributes, get.date.from.unix.timestamp) @@ -1592,7 +1592,7 @@ test_that("Test add.vertex.attribute.mail.thread.end.date", { aggregation.level = level ) - actual.attributes = lapply(networks.with.attr, igraph::get.vertex.attribute, name = "thread.end.date") + actual.attributes = lapply(networks.with.attr, igraph::vertex_attr, name = "thread.end.date") ## convert UNIX timestamps to POSIXct actual.attributes = lapply(actual.attributes, get.date.from.unix.timestamp) @@ -1620,7 +1620,7 @@ test_that("Test add.vertex.attribute.mail.thread.originating.mailing.list", { aggregation.level = level ) - actual.attributes = lapply(networks.with.attr, igraph::get.vertex.attribute, + actual.attributes = lapply(networks.with.attr, igraph::vertex_attr, name = "thread.originating.mailing.list") expect_equal(expected.attributes, actual.attributes) @@ -1695,7 +1695,7 @@ test_that("Test add.vertex.attribute.issue.contributor.count", { networks.and.data[["networks"]], networks.and.data[["project.data"]], aggregation.level = level, type = "issues" ) - actual.attributes = lapply(networks.with.attr, igraph::get.vertex.attribute, name = "issue.contributor.count") + actual.attributes = lapply(networks.with.attr, igraph::vertex_attr, name = "issue.contributor.count") expect_identical(expected.attributes.issues.only[[level]], actual.attributes) }) @@ -1707,7 +1707,7 @@ test_that("Test add.vertex.attribute.issue.contributor.count", { networks.and.data[["networks"]], networks.and.data[["project.data"]], aggregation.level = level, type = "pull.requests") - actual.attributes = lapply(networks.with.attr, igraph::get.vertex.attribute, name = "pr.contributor.count") + actual.attributes = lapply(networks.with.attr, igraph::vertex_attr, name = "pr.contributor.count") expect_identical(expected.attributes.prs.only[[level]], actual.attributes) }) @@ -1719,7 +1719,7 @@ test_that("Test add.vertex.attribute.issue.contributor.count", { networks.and.data[["networks"]], networks.and.data[["project.data"]], aggregation.level = level, type = "all" ) - actual.attributes = lapply(networks.with.attr, igraph::get.vertex.attribute, name = "issue.contributor.count") + actual.attributes = lapply(networks.with.attr, igraph::vertex_attr, name = "issue.contributor.count") expect_identical(expected.attributes.both[[level]], actual.attributes) }) @@ -1757,7 +1757,7 @@ test_that("Test add.vertex.attribute.issue.contributor.count with issues.only.co networks.and.data[["networks"]], networks.and.data[["project.data"]], aggregation.level = level, type = "issues" ) - actual.attributes = lapply(networks.with.attr, igraph::get.vertex.attribute, name = "issue.contributor.count") + actual.attributes = lapply(networks.with.attr, igraph::vertex_attr, name = "issue.contributor.count") expect_identical(expected.attributes.issues.only[[level]], actual.attributes) }) @@ -1769,7 +1769,7 @@ test_that("Test add.vertex.attribute.issue.contributor.count with issues.only.co networks.and.data[["networks"]], networks.and.data[["project.data"]], aggregation.level = level, type = "pull.requests") - actual.attributes = lapply(networks.with.attr, igraph::get.vertex.attribute, name = "pr.contributor.count") + actual.attributes = lapply(networks.with.attr, igraph::vertex_attr, name = "pr.contributor.count") expect_identical(expected.attributes.prs.only[[level]], actual.attributes) }) @@ -1781,7 +1781,7 @@ test_that("Test add.vertex.attribute.issue.contributor.count with issues.only.co networks.and.data[["networks"]], networks.and.data[["project.data"]], aggregation.level = level, type = "all" ) - actual.attributes = lapply(networks.with.attr, igraph::get.vertex.attribute, name = "issue.contributor.count") + actual.attributes = lapply(networks.with.attr, igraph::vertex_attr, name = "issue.contributor.count") expect_identical(expected.attributes.both[[level]], actual.attributes) }) @@ -1820,7 +1820,7 @@ test_that("Test add.vertex.attribute.issue.contributor.count with issues.only.co type = "issues", use.unfiltered.data = TRUE ) - actual.attributes = lapply(networks.with.attr, igraph::get.vertex.attribute, name = "issue.contributor.count") + actual.attributes = lapply(networks.with.attr, igraph::vertex_attr, name = "issue.contributor.count") expect_identical(expected.attributes.issues.only[[level]], actual.attributes) }) @@ -1832,7 +1832,7 @@ test_that("Test add.vertex.attribute.issue.contributor.count with issues.only.co networks.and.data[["networks"]], networks.and.data[["project.data"]], aggregation.level = level, type = "pull.requests", use.unfiltered.data = TRUE) - actual.attributes = lapply(networks.with.attr, igraph::get.vertex.attribute, name = "pr.contributor.count") + actual.attributes = lapply(networks.with.attr, igraph::vertex_attr, name = "pr.contributor.count") expect_identical(expected.attributes.prs.only[[level]], actual.attributes) }) @@ -1845,7 +1845,7 @@ test_that("Test add.vertex.attribute.issue.contributor.count with issues.only.co type = "all", use.unfiltered.data = TRUE ) - actual.attributes = lapply(networks.with.attr, igraph::get.vertex.attribute, name = "issue.contributor.count") + actual.attributes = lapply(networks.with.attr, igraph::vertex_attr, name = "issue.contributor.count") expect_identical(expected.attributes.both[[level]], actual.attributes) }) @@ -1883,7 +1883,7 @@ test_that("Test add.vertex.attribute.issue.event.count", { networks.and.data[["networks"]], networks.and.data[["project.data"]], aggregation.level = level, type = "issues" ) - actual.attributes = lapply(networks.with.attr, igraph::get.vertex.attribute, name = "issue.event.count") + actual.attributes = lapply(networks.with.attr, igraph::vertex_attr, name = "issue.event.count") expect_identical(expected.attributes.issues.only[[level]], actual.attributes) }) @@ -1895,7 +1895,7 @@ test_that("Test add.vertex.attribute.issue.event.count", { networks.and.data[["networks"]], networks.and.data[["project.data"]], aggregation.level = level, type = "pull.requests") - actual.attributes = lapply(networks.with.attr, igraph::get.vertex.attribute, name = "pr.event.count") + actual.attributes = lapply(networks.with.attr, igraph::vertex_attr, name = "pr.event.count") expect_identical(expected.attributes.prs.only[[level]], actual.attributes) }) @@ -1907,7 +1907,7 @@ test_that("Test add.vertex.attribute.issue.event.count", { networks.and.data[["networks"]], networks.and.data[["project.data"]], aggregation.level = level, type = "all" ) - actual.attributes = lapply(networks.with.attr, igraph::get.vertex.attribute, name = "issue.event.count") + actual.attributes = lapply(networks.with.attr, igraph::vertex_attr, name = "issue.event.count") expect_identical(expected.attributes.both[[level]], actual.attributes) }) @@ -1945,7 +1945,7 @@ test_that("Test add.vertex.attribute.issue.comment.count", { networks.and.data[["networks"]], networks.and.data[["project.data"]], aggregation.level = level, type = "issues" ) - actual.attributes = lapply(networks.with.attr, igraph::get.vertex.attribute, name = "issue.comment.event.count") + actual.attributes = lapply(networks.with.attr, igraph::vertex_attr, name = "issue.comment.event.count") expect_identical(expected.attributes.issues.only[[level]], actual.attributes) }) @@ -1957,7 +1957,7 @@ test_that("Test add.vertex.attribute.issue.comment.count", { networks.and.data[["networks"]], networks.and.data[["project.data"]], aggregation.level = level, type = "pull.requests") - actual.attributes = lapply(networks.with.attr, igraph::get.vertex.attribute, name = "pr.comment.event.count") + actual.attributes = lapply(networks.with.attr, igraph::vertex_attr, name = "pr.comment.event.count") expect_identical(expected.attributes.prs.only[[level]], actual.attributes) }) @@ -1969,7 +1969,7 @@ test_that("Test add.vertex.attribute.issue.comment.count", { networks.and.data[["networks"]], networks.and.data[["project.data"]], aggregation.level = level, type = "all" ) - actual.attributes = lapply(networks.with.attr, igraph::get.vertex.attribute, name = "issue.comment.event.count") + actual.attributes = lapply(networks.with.attr, igraph::vertex_attr, name = "issue.comment.event.count") expect_identical(expected.attributes.both[[level]], actual.attributes) }) @@ -2039,7 +2039,7 @@ test_that("Test add.vertex.attribute.issue.opened.date", { networks.and.data[["networks"]], networks.and.data[["project.data"]], aggregation.level = level, type = "issues" ) - actual.attributes = lapply(networks.with.attr, igraph::get.vertex.attribute, name = "issue.opened.date") + actual.attributes = lapply(networks.with.attr, igraph::vertex_attr, name = "issue.opened.date") ## convert UNIX timestamps to POSIXct actual.attributes = lapply(actual.attributes, get.date.from.unix.timestamp) @@ -2053,7 +2053,7 @@ test_that("Test add.vertex.attribute.issue.opened.date", { networks.and.data[["networks"]], networks.and.data[["project.data"]], aggregation.level = level, type = "pull.requests") - actual.attributes = lapply(networks.with.attr, igraph::get.vertex.attribute, name = "pr.opened.date") + actual.attributes = lapply(networks.with.attr, igraph::vertex_attr, name = "pr.opened.date") ## convert UNIX timestamps to POSIXct actual.attributes = lapply(actual.attributes, get.date.from.unix.timestamp) @@ -2067,7 +2067,7 @@ test_that("Test add.vertex.attribute.issue.opened.date", { networks.and.data[["networks"]], networks.and.data[["project.data"]], aggregation.level = level, type = "all" ) - actual.attributes = lapply(networks.with.attr, igraph::get.vertex.attribute, name = "issue.opened.date") + actual.attributes = lapply(networks.with.attr, igraph::vertex_attr, name = "issue.opened.date") ## convert UNIX timestamps to POSIXct actual.attributes = lapply(actual.attributes, get.date.from.unix.timestamp) @@ -2139,7 +2139,7 @@ test_that("Test add.vertex.attribute.issue.closed.date", { networks.and.data[["networks"]], networks.and.data[["project.data"]], aggregation.level = level, type = "issues" ) - actual.attributes = lapply(networks.with.attr, igraph::get.vertex.attribute, name = "issue.closed.date") + actual.attributes = lapply(networks.with.attr, igraph::vertex_attr, name = "issue.closed.date") ## convert UNIX timestamps to POSIXct actual.attributes = lapply(actual.attributes, get.date.from.unix.timestamp) @@ -2153,7 +2153,7 @@ test_that("Test add.vertex.attribute.issue.closed.date", { networks.and.data[["networks"]], networks.and.data[["project.data"]], aggregation.level = level, type = "pull.requests") - actual.attributes = lapply(networks.with.attr, igraph::get.vertex.attribute, name = "pr.closed.date") + actual.attributes = lapply(networks.with.attr, igraph::vertex_attr, name = "pr.closed.date") ## convert UNIX timestamps to POSIXct actual.attributes = lapply(actual.attributes, get.date.from.unix.timestamp) @@ -2167,7 +2167,7 @@ test_that("Test add.vertex.attribute.issue.closed.date", { networks.and.data[["networks"]], networks.and.data[["project.data"]], aggregation.level = level, type = "all" ) - actual.attributes = lapply(networks.with.attr, igraph::get.vertex.attribute, name = "issue.closed.date") + actual.attributes = lapply(networks.with.attr, igraph::vertex_attr, name = "issue.closed.date") ## convert UNIX timestamps to POSIXct actual.attributes = lapply(actual.attributes, get.date.from.unix.timestamp) @@ -2282,7 +2282,7 @@ test_that("Test add.vertex.attribute.issue.last.activity.date", { networks.and.data[["networks"]], networks.and.data[["project.data"]], aggregation.level = level, type = "issues" ) - actual.attributes = lapply(networks.with.attr, igraph::get.vertex.attribute, name = "issue.last.activity") + actual.attributes = lapply(networks.with.attr, igraph::vertex_attr, name = "issue.last.activity") ## convert UNIX timestamps to POSIXct actual.attributes = lapply(actual.attributes, get.date.from.unix.timestamp) @@ -2296,7 +2296,7 @@ test_that("Test add.vertex.attribute.issue.last.activity.date", { networks.and.data[["networks"]], networks.and.data[["project.data"]], aggregation.level = level, type = "pull.requests") - actual.attributes = lapply(networks.with.attr, igraph::get.vertex.attribute, name = "pr.last.activity") + actual.attributes = lapply(networks.with.attr, igraph::vertex_attr, name = "pr.last.activity") ## convert UNIX timestamps to POSIXct actual.attributes = lapply(actual.attributes, get.date.from.unix.timestamp) @@ -2310,7 +2310,7 @@ test_that("Test add.vertex.attribute.issue.last.activity.date", { networks.and.data[["networks"]], networks.and.data[["project.data"]], aggregation.level = level, type = "all" ) - actual.attributes = lapply(networks.with.attr, igraph::get.vertex.attribute, name = "issue.last.activity") + actual.attributes = lapply(networks.with.attr, igraph::vertex_attr, name = "issue.last.activity") ## convert UNIX timestamps to POSIXct actual.attributes = lapply(actual.attributes, get.date.from.unix.timestamp) @@ -2375,7 +2375,7 @@ test_that("Test add.vertex.attribute.issue.title", { networks.and.data[["networks"]], networks.and.data[["project.data"]], aggregation.level = level, type = "issues" ) - actual.attributes = lapply(networks.with.attr, igraph::get.vertex.attribute, name = "issue.title") + actual.attributes = lapply(networks.with.attr, igraph::vertex_attr, name = "issue.title") expect_identical(expected.attributes.issues.only, actual.attributes) }) @@ -2387,7 +2387,7 @@ test_that("Test add.vertex.attribute.issue.title", { networks.and.data[["networks"]], networks.and.data[["project.data"]], aggregation.level = level, type = "pull.requests") - actual.attributes = lapply(networks.with.attr, igraph::get.vertex.attribute, name = "pr.title") + actual.attributes = lapply(networks.with.attr, igraph::vertex_attr, name = "pr.title") expect_identical(expected.attributes.prs.only, actual.attributes) }) @@ -2399,7 +2399,7 @@ test_that("Test add.vertex.attribute.issue.title", { networks.and.data[["networks"]], networks.and.data[["project.data"]], aggregation.level = level, type = "all" ) - actual.attributes = lapply(networks.with.attr, igraph::get.vertex.attribute, name = "issue.title") + actual.attributes = lapply(networks.with.attr, igraph::vertex_attr, name = "issue.title") expect_identical(expected.attributes.both, actual.attributes) }) @@ -2421,7 +2421,7 @@ test_that("Test add.vertex.attribute.pr.open.merged.or.closed", { networks.with.attr = add.vertex.attribute.pr.open.merged.or.closed( networks.and.data[["networks"]], networks.and.data[["project.data"]]) - actual.attributes = lapply(networks.with.attr, igraph::get.vertex.attribute, name = "pull.request.state") + actual.attributes = lapply(networks.with.attr, igraph::vertex_attr, name = "pull.request.state") expect_equal(expected.attributes, actual.attributes) }) @@ -2445,7 +2445,7 @@ test_that("Test add.vertex.attribute.issue.is.pull.request", { aggregation.level = level ) - actual.attributes = lapply(networks.with.attr, igraph::get.vertex.attribute, name = "issue.is.pull.request") + actual.attributes = lapply(networks.with.attr, igraph::vertex_attr, name = "issue.is.pull.request") expect_equal(expected.attributes, actual.attributes) }) @@ -2472,7 +2472,7 @@ test_that("Test addition of attributes despite of empty data", { ## add commit-count attribute net.commit.count = add.vertex.attribute.author.commit.count(networks, proj.data.empty, default = 0L)[[1]] - expect_true("commit.count" %in% igraph::list.vertex.attributes(net.commit.count)) + expect_true("commit.count" %in% igraph::vertex_attr_names(net.commit.count)) ## add author-role attribute: ## 1) construct empty classification @@ -2480,7 +2480,7 @@ test_that("Test addition of attributes despite of empty data", { names(classification) = range ## 2) add attribute net.author.role = add.vertex.attribute.author.role(networks, classification, default = "unclassified")[[1]] - expect_true("author.role" %in% igraph::list.vertex.attributes(net.author.role)) + expect_true("author.role" %in% igraph::vertex_attr_names(net.author.role)) }) @@ -2506,8 +2506,8 @@ test_that("Test addition of attributes despite of non-captured vertices", { net.commit.count = add.vertex.attribute.author.commit.count.committer.and.author(networks, proj.data.empty, default = 0L)[[1]] ## check existence and proper value - expect_true("commit.count.committer.and.author" %in% igraph::list.vertex.attributes(net.commit.count)) - expect_identical(igraph::get.vertex.attribute(net.commit.count, "commit.count.committer.and.author"), 0L) + expect_true("commit.count.committer.and.author" %in% igraph::vertex_attr_names(net.commit.count)) + expect_identical(igraph::vertex_attr(net.commit.count, "commit.count.committer.and.author"), 0L) }) diff --git a/tests/test-networks-equal-constructions.R b/tests/test-networks-equal-constructions.R index feb3f7d2..eae2bf3b 100644 --- a/tests/test-networks-equal-constructions.R +++ b/tests/test-networks-equal-constructions.R @@ -15,6 +15,7 @@ ## Copyright 2018 by Claus Hunsen ## Copyright 2020 by Thomas Bock ## Copyright 2022 by Jonathan Baumann +## Copyright 2024 by Maximilian Löffler ## All Rights Reserved. @@ -39,21 +40,21 @@ if (!dir.exists(CF.DATA)) CF.DATA = file.path(".", "tests", "codeface-data") compare.edge.and.vertex.lists = function(split.networks.one, split.networks.two) { for (i in seq_along(split.networks.one)) { - edges.one = igraph::get.data.frame(split.networks.one[[i]], what = "edges") + edges.one = igraph::as_data_frame(split.networks.one[[i]], what = "edges") ordering = order(edges.one[["from"]], edges.one[["to"]], edges.one[["date"]]) edges.one = edges.one[ordering, ] rownames(edges.one) = seq_len(nrow(edges.one)) - edges.two = igraph::get.data.frame(split.networks.two[[i]], what = "edges") + edges.two = igraph::as_data_frame(split.networks.two[[i]], what = "edges") ordering = order(edges.two[["from"]], edges.two[["to"]], edges.two[["date"]]) edges.two = edges.two[ordering, ] rownames(edges.two) = seq_len(nrow(edges.two)) - vertices.one = igraph::get.data.frame(split.networks.one[[i]], what = "vertices") + vertices.one = igraph::as_data_frame(split.networks.one[[i]], what = "vertices") ordering = order(vertices.one[["name"]]) vertices.one = vertices.one[ordering, ] rownames(vertices.one) = seq_len(nrow(vertices.one)) - vertices.two = igraph::get.data.frame(split.networks.two[[i]], what = "vertices") + vertices.two = igraph::as_data_frame(split.networks.two[[i]], what = "vertices") ordering = order(vertices.two[["name"]]) vertices.two = vertices.two[ordering, ] rownames(vertices.two) = seq_len(nrow(vertices.two)) diff --git a/tests/test-networks-misc.R b/tests/test-networks-misc.R index 3e7d7235..b964c8ce 100644 --- a/tests/test-networks-misc.R +++ b/tests/test-networks-misc.R @@ -13,6 +13,7 @@ ## ## Copyright 2024 by Leo Sendelbach ## Copyright 2024 by Thomas Bock +## Copyright 2024 by Maximilian Löffler ## All Rights Reserved. @@ -53,7 +54,7 @@ test_that("getting all authors of a list of networks, list length 1", { from = "Heinz", to = "Dieter" ) - network = igraph::graph.data.frame(edges, directed = FALSE, vertices = vertices) + network = igraph::graph_from_data_frame(edges, directed = FALSE, vertices = vertices) ## Act result = get.author.names.from.networks(networks = list(network)) @@ -77,7 +78,7 @@ test_that("getting all authors of a list of networks, list length 1, not global" from = "Heinz", to = "Dieter" ) - network = igraph::graph.data.frame(edges, directed = FALSE, vertices = vertices) + network = igraph::graph_from_data_frame(edges, directed = FALSE, vertices = vertices) ## Act result = get.author.names.from.networks(networks = list(network), globally = FALSE) @@ -101,7 +102,7 @@ test_that("getting all authors of a list of networks, list length 2", { from = "Heinz", to = "Dieter" ) - first.network = igraph::graph.data.frame(edges, directed = FALSE, vertices = vertices) + first.network = igraph::graph_from_data_frame(edges, directed = FALSE, vertices = vertices) second.vertices = data.frame( name = c("Detlef", "Dieter"), @@ -112,7 +113,7 @@ test_that("getting all authors of a list of networks, list length 2", { from = "Detlef", to = "Dieter" ) - second.network = igraph::graph.data.frame(second.edges, directed = FALSE, vertices = second.vertices) + second.network = igraph::graph_from_data_frame(second.edges, directed = FALSE, vertices = second.vertices) ## Act result = get.author.names.from.networks(networks = list(first.network, second.network)) @@ -135,7 +136,7 @@ test_that("getting all authors of a list of networks, list length 2, not global" from = "Heinz", to = "Dieter" ) - first.network = igraph::graph.data.frame(edges, directed = FALSE, vertices = vertices) + first.network = igraph::graph_from_data_frame(edges, directed = FALSE, vertices = vertices) second.vertices = data.frame( name = c("Detlef", "Dieter"), @@ -146,7 +147,7 @@ test_that("getting all authors of a list of networks, list length 2, not global" from = "Detlef", to = "Dieter" ) - second.network = igraph::graph.data.frame(second.edges, directed = FALSE, vertices = second.vertices) + second.network = igraph::graph_from_data_frame(second.edges, directed = FALSE, vertices = second.vertices) ## Act result = get.author.names.from.networks(networks = list(first.network, second.network), globally = FALSE) @@ -307,7 +308,7 @@ test_that("getting a sparse adjacency matrix for a network, single edge, matchin from = "Heinz", to = "Dieter" ) - network.in = igraph::graph.data.frame(edges, directed = FALSE, vertices = vertices) + network.in = igraph::graph_from_data_frame(edges, directed = FALSE, vertices = vertices) authors.in = c("Heinz", "Dieter", "Klaus") matrix.out = Matrix::sparseMatrix(i = c(), j = c(), x = 0, dims = c(length(authors.in), @@ -339,7 +340,7 @@ test_that("getting a sparse adjacency matrix for a network, single edge, fewer a from = "Heinz", to = "Dieter" ) - network.in = igraph::graph.data.frame(edges, directed = FALSE, vertices = vertices) + network.in = igraph::graph_from_data_frame(edges, directed = FALSE, vertices = vertices) authors.in = c("Dieter", "Heinz") matrix.out = Matrix::sparseMatrix(i = c(), j = c(), x = 0, dims = c(length(authors.in), @@ -371,7 +372,7 @@ test_that("getting a sparse adjacency matrix for a network, single edge, more au from = "Heinz", to = "Dieter" ) - network.in = igraph::graph.data.frame(edges, directed = FALSE, vertices = vertices) + network.in = igraph::graph_from_data_frame(edges, directed = FALSE, vertices = vertices) authors.in = c("Gerhardt", "Bob", "Dieter", "Heinz", "Klaus") matrix.out = Matrix::sparseMatrix(i = c(), j = c(), x = 0, dims = c(length(authors.in), @@ -403,7 +404,7 @@ test_that("getting a sparse adjacency matrix for a network, single edge, no matc from = "Heinz", to = "Dieter" ) - network.in = igraph::graph.data.frame(edges, directed = FALSE, vertices = vertices) + network.in = igraph::graph_from_data_frame(edges, directed = FALSE, vertices = vertices) authors.in = c("Gerhardt", "Bob", "Dieter", "Heinz") matrix.out = Matrix::sparseMatrix(i = c(), j = c(), x = 0, dims = c(length(authors.in), @@ -435,7 +436,7 @@ test_that("getting a sparse adjacency matrix for a network, single edge, no over from = "Heinz", to = "Dieter" ) - network.in = igraph::graph.data.frame(edges, directed = FALSE, vertices = vertices) + network.in = igraph::graph_from_data_frame(edges, directed = FALSE, vertices = vertices) authors.in = c("Gerhardt", "Bob") matrix.out = Matrix::sparseMatrix(i = c(), j = c(), x = 0, dims = c(length(authors.in), @@ -464,7 +465,7 @@ test_that("getting a sparse adjacency matrix for a network, two edges, more auth from = c("Heinz", "Dieter"), to = c("Dieter", "Klaus") ) - network.in = igraph::graph.data.frame(edges, directed = FALSE, vertices = vertices) + network.in = igraph::graph_from_data_frame(edges, directed = FALSE, vertices = vertices) authors.in = c("Klaus", "Gerhardt", "Bob", "Dieter", "Heinz") matrix.out = Matrix::sparseMatrix(i = c(), j = c(), x = 0, dims = c(length(authors.in), @@ -500,7 +501,7 @@ test_that("getting a sparse adjacency matrix for a network, three edges, more au to = c("Dieter", "Klaus", "Heinz"), weight = c(1, 3, 4) ) - network.in = igraph::graph.data.frame(edges, directed = FALSE, vertices = vertices) + network.in = igraph::graph_from_data_frame(edges, directed = FALSE, vertices = vertices) authors.in = c("Klaus", "Gerhardt", "Bob", "Dieter", "Heinz") matrix.out = Matrix::sparseMatrix(i = c(), j = c(), x = 0, dims = c(length(authors.in), @@ -542,7 +543,7 @@ test_that("getting a sparse adjacency matrix per network, one network", { from = c("Heinz", "Dieter", "Dieter"), to = c("Dieter", "Klaus", "Heinz") ) - network.in = igraph::graph.data.frame(edges, directed = FALSE, vertices = vertices) + network.in = igraph::graph_from_data_frame(edges, directed = FALSE, vertices = vertices) authors.in = sort(c("Heinz", "Dieter", "Klaus")) matrix.out = Matrix::sparseMatrix(i = c(), j = c(), x = 0, dims = c(length(authors.in), @@ -576,7 +577,7 @@ test_that("getting a sparse adjacency matrix per network, two networks", { from = c("Heinz", "Dieter", "Dieter"), to = c("Dieter", "Klaus", "Heinz") ) - network.in.one = igraph::graph.data.frame(edges, directed = FALSE, vertices = vertices) + network.in.one = igraph::graph_from_data_frame(edges, directed = FALSE, vertices = vertices) vertices = data.frame( name = c("Klaus", "Tobias"), @@ -587,7 +588,7 @@ test_that("getting a sparse adjacency matrix per network, two networks", { from = c("Klaus"), to = c("Tobias") ) - network.in.two = igraph::graph.data.frame(edges, directed = FALSE, vertices = vertices) + network.in.two = igraph::graph_from_data_frame(edges, directed = FALSE, vertices = vertices) all.authors = sort(c("Heinz", "Dieter", "Klaus", "Tobias")) @@ -632,7 +633,7 @@ test_that("getting cumulative sums of adjacency matrices generated from networks from = c("Heinz", "Dieter", "Dieter"), to = c("Dieter", "Klaus", "Heinz") ) - network.in.one = igraph::graph.data.frame(edges, directed = FALSE, vertices = vertices) + network.in.one = igraph::graph_from_data_frame(edges, directed = FALSE, vertices = vertices) authors.in.one = sort(c("Heinz", "Dieter", "Klaus")) matrix.out.one = Matrix::sparseMatrix(i = c(), j = c(), x = 0, dims = c(length(authors.in.one), @@ -649,7 +650,7 @@ test_that("getting cumulative sums of adjacency matrices generated from networks from = c("Klaus"), to = c("Dieter") ) - network.in.two = igraph::graph.data.frame(edges, directed = FALSE, vertices = vertices) + network.in.two = igraph::graph_from_data_frame(edges, directed = FALSE, vertices = vertices) authors.in.two = sort(c("Heinz", "Dieter", "Klaus")) matrix.out.two = Matrix::sparseMatrix(i = c(), j = c(), x = 0, dims = c(length(authors.in.two), @@ -683,7 +684,7 @@ test_that("getting cumulative sums of adjacency matrices generated from networks to = c("Dieter", "Klaus", "Heinz"), weight = c(1, 2, 1) ) - network.in.one = igraph::graph.data.frame(edges, directed = FALSE, vertices = vertices) + network.in.one = igraph::graph_from_data_frame(edges, directed = FALSE, vertices = vertices) authors.in.one = sort(c("Heinz", "Dieter", "Klaus")) matrix.out.one = Matrix::sparseMatrix(i = c(), j = c(), x = 0, dims = c(length(authors.in.one), @@ -701,7 +702,7 @@ test_that("getting cumulative sums of adjacency matrices generated from networks to = c("Dieter"), weight = c(1) ) - network.in.two = igraph::graph.data.frame(edges, directed = FALSE, vertices = vertices) + network.in.two = igraph::graph_from_data_frame(edges, directed = FALSE, vertices = vertices) authors.in.two = sort(c("Heinz", "Dieter", "Klaus")) matrix.out.two = Matrix::sparseMatrix(i = c(), j = c(), x = 0, dims = c(length(authors.in.two), @@ -735,14 +736,14 @@ test_that("getting cumulative sums of adjacency matrices generated from networks from = c("Heinz", "Dieter", "Dieter"), to = c("Dieter", "Klaus", "Heinz") ) - network.in.one = igraph::graph.data.frame(edges, directed = FALSE, vertices = vertices) + network.in.one = igraph::graph_from_data_frame(edges, directed = FALSE, vertices = vertices) authors.in.one = sort(c("Heinz", "Dieter", "Klaus")) edges = data.frame( from = c("Klaus"), to = c("Dieter") ) - network.in.two = igraph::graph.data.frame(edges, directed = FALSE, vertices = vertices) + network.in.two = igraph::graph_from_data_frame(edges, directed = FALSE, vertices = vertices) expected.array = array(data = 0, dim = c(3, 3, 2)) rownames(expected.array) = authors.in.one @@ -780,7 +781,7 @@ test_that("getting cumulative sums of adjacency matrices generated from networks to = c("Dieter", "Klaus", "Heinz"), weight = c(1, 2, 1) ) - network.in.one = igraph::graph.data.frame(edges, directed = FALSE, vertices = vertices) + network.in.one = igraph::graph_from_data_frame(edges, directed = FALSE, vertices = vertices) authors.in.one = sort(c("Heinz", "Dieter", "Klaus")) edges = data.frame( @@ -788,7 +789,7 @@ test_that("getting cumulative sums of adjacency matrices generated from networks to = c("Dieter"), weight = c(1) ) - network.in.two = igraph::graph.data.frame(edges, directed = FALSE, vertices = vertices) + network.in.two = igraph::graph_from_data_frame(edges, directed = FALSE, vertices = vertices) authors.in.two = sort(c("Heinz", "Dieter", "Klaus")) expected.array = array(data = 0, dim = c(3, 3, 2)) diff --git a/tests/test-networks-multi-relation.R b/tests/test-networks-multi-relation.R index 5b0d3b42..846b17fa 100644 --- a/tests/test-networks-multi-relation.R +++ b/tests/test-networks-multi-relation.R @@ -19,7 +19,7 @@ ## Copyright 2019 by Anselm Fehnker ## Copyright 2021 by Johannes Hostert ## Copyright 2022 by Jonathan Baumann -## Copyright 2023 by Maximilian Löffler +## Copyright 2023-2024 by Maximilian Löffler ## Copyright 2024 by Leo Sendelbach ## All Rights Reserved. @@ -92,7 +92,7 @@ test_that("Network construction of the undirected author network with relation = ) ## build expected network - network.expected = igraph::graph.data.frame(data, vertices = authors, + network.expected = igraph::graph_from_data_frame(data, vertices = authors, directed = net.conf$get.value("author.directed")) expect_true(igraph::identical_graphs(network.built, network.expected)) @@ -198,7 +198,7 @@ test_that("Construction of the bipartite network for the feature artifact with a ) ## 3) build expected network - network.expected = igraph::graph.data.frame(network.expected.data, vertices = vertices, + network.expected = igraph::graph_from_data_frame(network.expected.data, vertices = vertices, directed = net.conf$get.value("author.directed")) expect_true(igraph::identical_graphs(network.built, network.expected)) @@ -310,7 +310,7 @@ test_that("Construction of the multi network for the feature artifact with autho ) ## 3) build expected network - network.expected = igraph::graph.data.frame(edges, vertices = vertices, + network.expected = igraph::graph_from_data_frame(edges, vertices = vertices, directed = net.conf$get.value("author.directed")) assert.networks.equal(network.expected, network.built) @@ -406,7 +406,7 @@ test_that("Construction of the multi-artifact bipartite network with artifact re event.name = c(rep(NA, 6), rep("commented", 24)) ) - net.expected = igraph::graph.data.frame(edges, directed = FALSE, vertices = vertices) + net.expected = igraph::graph_from_data_frame(edges, directed = FALSE, vertices = vertices) assert.networks.equal(net.expected, net.combined) @@ -493,7 +493,7 @@ test_that("Construction of the multi-artifact bipartite network with artifact re "", "") ) - net.expected = igraph::graph.data.frame(edges, directed = FALSE, vertices = vertices) + net.expected = igraph::graph_from_data_frame(edges, directed = FALSE, vertices = vertices) assert.networks.equal(net.expected, net.combined) @@ -595,7 +595,7 @@ test_that("Construction of the multi-artifact bipartite network with artifact re "", "", "", "", "") ) - net.expected = igraph::graph.data.frame(edges, directed = FALSE, vertices = vertices) + net.expected = igraph::graph_from_data_frame(edges, directed = FALSE, vertices = vertices) assert.networks.equal(net.expected, net.combined) @@ -715,7 +715,7 @@ test_that("Construction of the multi-artifact bipartite network with artifact re "", "") ) - net.expected = igraph::graph.data.frame(edges, directed = FALSE, vertices = vertices) + net.expected = igraph::graph_from_data_frame(edges, directed = FALSE, vertices = vertices) assert.networks.equal(net.expected, net.combined) diff --git a/tests/test-networks-multi.R b/tests/test-networks-multi.R index 70f26f63..41fec588 100644 --- a/tests/test-networks-multi.R +++ b/tests/test-networks-multi.R @@ -15,7 +15,7 @@ ## Copyright 2018 by Claus Hunsen ## Copyright 2018 by Barbara Eckl ## Copyright 2022 by Jonathan Baumann -## Copyright 2023 by Maximilian Löffler +## Copyright 2023-2024 by Maximilian Löffler ## Copyright 2024 by Leo Sendelbach ## All Rights Reserved. @@ -91,7 +91,7 @@ test_that("Construction of the multi network for the feature artifact with autho author.name = c(NA, NA, NA, NA, NA, NA, NA, NA, "Thomas", NA, NA, NA, NA, NA, NA) ) - network.expected = igraph::graph.data.frame(edges, directed = FALSE, vertices = vertices) + network.expected = igraph::graph_from_data_frame(edges, directed = FALSE, vertices = vertices) assert.networks.equal(network.expected, network.built) }) diff --git a/tests/test-networks.R b/tests/test-networks.R index 62e117be..cdf1634f 100644 --- a/tests/test-networks.R +++ b/tests/test-networks.R @@ -75,7 +75,7 @@ test_that("Simplify network with more than one relation", { expect_error(simplify.network(g), NA) # expect that no error occurs expect_identical(igraph::V(simplify.network(g))$name, c("A", "B", "C", "Base_Feature")) # vertices expect_identical(igraph::ecount(simplify.network(g)), 1) # edges - expect_true(igraph::are.connected(simplify.network(g), "A", "Base_Feature")) # specific edge + expect_true(igraph::are_adjacent(simplify.network(g), "A", "Base_Feature")) # specific edge }) @@ -90,8 +90,8 @@ test_that("Simplify basic multi-relational network", { igraph::make_empty_graph(n = 0, directed = FALSE) + igraph::vertices("A", "B", type = TYPE.ARTIFACT, kind = "feature") for (i in 1:3) { - network = igraph::add.edges(network, c("A", "B"), type = TYPE.EDGES.INTRA, relation = "mail") - network = igraph::add.edges(network, c("A", "B"), type = TYPE.EDGES.INTRA, relation = "cochange") + network = igraph::add_edges(network, c("A", "B"), type = TYPE.EDGES.INTRA, relation = "mail") + network = igraph::add_edges(network, c("A", "B"), type = TYPE.EDGES.INTRA, relation = "cochange") } network.expected = igraph::make_empty_graph(n = 0, directed = FALSE) + @@ -165,7 +165,7 @@ test_that("Simplify author-network with relation = c('cochange', 'mail') using b c("", ""), c("", "")) ## build expected network - network.expected = igraph::graph.data.frame(data, vertices = authors, + network.expected = igraph::graph_from_data_frame(data, vertices = authors, directed = net.conf$get.value("author.directed")) ## build simplified network @@ -212,7 +212,7 @@ test_that("Simplify author-network with relation = c('cochange', 'mail') using b as.character(c(NA, NA))) ## build expected network - network.expected = igraph::graph.data.frame(data, vertices = authors, + network.expected = igraph::graph_from_data_frame(data, vertices = authors, directed = net.conf$get.value("author.directed")) ## build simplified network @@ -237,10 +237,10 @@ test_that("Simplify multiple basic multi-relational networks", { igraph::make_empty_graph(n = 0, directed = FALSE) + igraph::vertices("C", "D", type = TYPE.AUTHOR, kind = TYPE.AUTHOR) for (i in 1:3) { - network.A = igraph::add.edges(network.A, c("A", "B"), type = TYPE.EDGES.INTRA, relation = "mail") - network.A = igraph::add.edges(network.A, c("A", "B"), type = TYPE.EDGES.INTRA, relation = "cochange") - network.B = igraph::add.edges(network.B, c("C", "D"), type = TYPE.EDGES.INTRA, relation = "mail") - network.B = igraph::add.edges(network.B, c("C", "D"), type = TYPE.EDGES.INTRA, relation = "cochange") + network.A = igraph::add_edges(network.A, c("A", "B"), type = TYPE.EDGES.INTRA, relation = "mail") + network.A = igraph::add_edges(network.A, c("A", "B"), type = TYPE.EDGES.INTRA, relation = "cochange") + network.B = igraph::add_edges(network.B, c("C", "D"), type = TYPE.EDGES.INTRA, relation = "mail") + network.B = igraph::add_edges(network.B, c("C", "D"), type = TYPE.EDGES.INTRA, relation = "cochange") } network.A.expected = igraph::make_empty_graph(n = 0, directed = FALSE) + @@ -314,7 +314,7 @@ test_that("Extraction of sub-networks", { author.net.built = extract.author.network.from.network(base.net, remove.isolates = FALSE) ## construct expected author network (by removing artifact vertices and adjacent edges) - author.net.expected = igraph::delete.vertices(base.net, igraph::V(base.net)[7:12]) + author.net.expected = igraph::delete_vertices(base.net, igraph::V(base.net)[7:12]) expect_true(igraph::identical_graphs(author.net.built, author.net.expected), info = "author-network extraction") @@ -326,8 +326,8 @@ test_that("Extraction of sub-networks", { bip.net.built = extract.bipartite.network.from.network(base.net, remove.isolates = TRUE) ## construct expected bipartite network (by removing unipartite edges and isolate vertices) - bip.net.expected = igraph::delete.edges(base.net, igraph::E(base.net)[1:9]) - bip.net.expected = igraph::delete.vertices(bip.net.expected, "A2") + bip.net.expected = igraph::delete_edges(base.net, igraph::E(base.net)[1:9]) + bip.net.expected = igraph::delete_vertices(bip.net.expected, "A2") expect_true(igraph::identical_graphs(bip.net.built, bip.net.expected), info = "bipartite-network extraction") @@ -339,7 +339,7 @@ test_that("Extraction of sub-networks", { art.net.built = extract.artifact.network.from.network(base.net, remove.isolates = FALSE) ## construct expected artifact network (by removing author vertices and adjacent edges) - art.net.expected = igraph::delete.vertices(base.net, igraph::V(base.net)[1:6]) + art.net.expected = igraph::delete_vertices(base.net, igraph::V(base.net)[1:6]) expect_true(igraph::identical_graphs(art.net.built, art.net.expected), info = "artifact-network extraction") @@ -527,7 +527,7 @@ test_that("Construction of networks from empty edge list (with vertices)", { ## construct edgeless network net.edgeless = create.empty.network(directed = directed) + igraph::vertices(vertices.as.sequence) ## add attribute 'weight' which is always added by 'construct.network.from.edge.list' - net.edgeless = igraph::set.edge.attribute(net.edgeless, "weight", value = 1) + net.edgeless = igraph::set_edge_attr(net.edgeless, "weight", value = 1) ## ## normal network @@ -574,8 +574,8 @@ test_that("Construction of networks from empty edge list (without vertices)", { ## construct edgeless network net.edgeless = create.empty.network(directed = directed) ## add attributes 'name' and 'weight' which is always added by 'construct.network.from.edge.list' - net.edgeless = igraph::set.vertex.attribute(net.edgeless, "name", value = "name") - net.edgeless = igraph::set.edge.attribute(net.edgeless, "weight", value = 1) + net.edgeless = igraph::set_vertex_attr(net.edgeless, "name", value = "name") + net.edgeless = igraph::set_edge_attr(net.edgeless, "weight", value = 1) ## ## vertices: NULL @@ -763,7 +763,7 @@ test_that("Addition of edge attributes with data", { network.builder$update.network.conf(updated.values = list(author.relation = "cochange")) network.built = network.builder$get.author.network() ## 2) remove all vertices since we only care about the attributes - network.built = igraph::delete.vertices(network.built, seq_len(igraph::vcount(network.built))) + network.built = igraph::delete_vertices(network.built, seq_len(igraph::vcount(network.built))) ## 3) check attributes against expected network expect_true(igraph::identical_graphs(network.built, network.expected), info = "author network – cochange") @@ -772,7 +772,7 @@ test_that("Addition of edge attributes with data", { network.builder$update.network.conf(updated.values = list(author.relation = "mail")) network.built = network.builder$get.author.network() ## 2) remove all vertices since we only care about the attributes - network.built = igraph::delete.vertices(network.built, seq_len(igraph::vcount(network.built))) + network.built = igraph::delete_vertices(network.built, seq_len(igraph::vcount(network.built))) ## 3) check attributes against expected network expect_true(igraph::identical_graphs(network.built, network.expected), info = "author network – mail") @@ -781,7 +781,7 @@ test_that("Addition of edge attributes with data", { network.builder$update.network.conf(updated.values = list(author.relation = "issue")) network.built = network.builder$get.author.network() ## 2) remove all vertices since we only care about the attributes - network.built = igraph::delete.vertices(network.built, seq_len(igraph::vcount(network.built))) + network.built = igraph::delete_vertices(network.built, seq_len(igraph::vcount(network.built))) ## 3) check attributes against expected network expect_true(igraph::identical_graphs(network.built, network.expected), info = "author network – issue") @@ -794,7 +794,7 @@ test_that("Addition of edge attributes with data", { network.builder$update.network.conf(updated.values = list(artifact.relation = c("cochange"))) network.built = network.builder$get.bipartite.network() ## 2) remove all vertices since we only care about the attributes - network.built = igraph::delete.vertices(network.built, seq_len(igraph::vcount(network.built))) + network.built = igraph::delete_vertices(network.built, seq_len(igraph::vcount(network.built))) ## 3) check attributes against expected network expect_true(igraph::identical_graphs(network.built, network.expected), info = "bipartite network – cochange") @@ -803,7 +803,7 @@ test_that("Addition of edge attributes with data", { network.builder$update.network.conf(updated.values = list(artifact.relation = "mail")) network.built = network.builder$get.bipartite.network() ## 2) remove all vertices since we only care about the attributes - network.built = igraph::delete.vertices(network.built, seq_len(igraph::vcount(network.built))) + network.built = igraph::delete_vertices(network.built, seq_len(igraph::vcount(network.built))) ## 3) check attributes against expected network expect_true(igraph::identical_graphs(network.built, network.expected), info = "bipartite network – mail") @@ -812,7 +812,7 @@ test_that("Addition of edge attributes with data", { network.builder$update.network.conf(updated.values = list(artifact.relation = "issue")) network.built = network.builder$get.bipartite.network() ## 2) remove all vertices since we only care about the attributes - network.built = igraph::delete.vertices(network.built, seq_len(igraph::vcount(network.built))) + network.built = igraph::delete_vertices(network.built, seq_len(igraph::vcount(network.built))) ## 3) check attributes against expected network expect_true(igraph::identical_graphs(network.built, network.expected), info = "bipartite network – issue") @@ -825,7 +825,7 @@ test_that("Addition of edge attributes with data", { network.builder$update.network.conf(updated.values = list(artifact.relation = "cochange")) network.built = network.builder$get.artifact.network() ## 2) remove all vertices since we only care about the attributes - network.built = igraph::delete.vertices(network.built, seq_len(igraph::vcount(network.built))) + network.built = igraph::delete_vertices(network.built, seq_len(igraph::vcount(network.built))) ## 3) check attributes against expected network expect_true(igraph::identical_graphs(network.built, network.expected), info = "artifact network – cochange") @@ -834,7 +834,7 @@ test_that("Addition of edge attributes with data", { network.builder$update.network.conf(updated.values = list(artifact.relation = "mail")) network.built = network.builder$get.artifact.network() ## 2) remove all vertices since we only care about the attributes - network.built = igraph::delete.vertices(network.built, seq_len(igraph::vcount(network.built))) + network.built = igraph::delete_vertices(network.built, seq_len(igraph::vcount(network.built))) ## 3) check attributes against expected network expect_true(igraph::identical_graphs(network.built, network.expected), info = "artifact network – mail") @@ -843,7 +843,7 @@ test_that("Addition of edge attributes with data", { network.builder$update.network.conf(updated.values = list(artifact.relation = "issue")) network.built = network.builder$get.artifact.network() ## 2) remove all vertices since we only care about the attributes - network.built = igraph::delete.vertices(network.built, seq_len(igraph::vcount(network.built))) + network.built = igraph::delete_vertices(network.built, seq_len(igraph::vcount(network.built))) ## 3) check attributes against expected network expect_true(igraph::identical_graphs(network.built, network.expected), info = "artifact network – issue") @@ -856,7 +856,7 @@ test_that("Addition of edge attributes with data", { network.builder$update.network.conf(list(artifact.relation = "cochange", author.relation = "cochange")) network.built = network.builder$get.multi.network() ## 2) remove all vertices since we only care about the attributes - network.built = igraph::delete.vertices(network.built, seq_len(igraph::vcount(network.built))) + network.built = igraph::delete_vertices(network.built, seq_len(igraph::vcount(network.built))) ## 3) check attributes against expected network expect_identical( igraph::as_data_frame(network.built, what = "both"), @@ -869,7 +869,7 @@ test_that("Addition of edge attributes with data", { network.builder$update.network.conf(list(artifact.relation = "mail", author.relation = "mail")) network.built = network.builder$get.multi.network() ## 2) remove all vertices since we only care about the attributes - network.built = igraph::delete.vertices(network.built, seq_len(igraph::vcount(network.built))) + network.built = igraph::delete_vertices(network.built, seq_len(igraph::vcount(network.built))) ## 3) check attributes against expected network expect_identical( igraph::as_data_frame(network.built, what = "both"), @@ -882,7 +882,7 @@ test_that("Addition of edge attributes with data", { network.builder$update.network.conf(list(artifact.relation = "issue", author.relation = "cochange")) network.built = network.builder$get.multi.network() ## 2) remove all vertices since we only care about the attributes - network.built = igraph::delete.vertices(network.built, seq_len(igraph::vcount(network.built))) + network.built = igraph::delete_vertices(network.built, seq_len(igraph::vcount(network.built))) ## 3) check attributes against expected network expect_identical( igraph::as_data_frame(network.built, what = "both"), diff --git a/tests/test-split-misc.R b/tests/test-split-misc.R index 7a2e42b6..78ceb438 100644 --- a/tests/test-split-misc.R +++ b/tests/test-split-misc.R @@ -14,7 +14,7 @@ ## Copyright 2017-2019 by Claus Hunsen ## Copyright 2018 by Jakob Kronawitter ## Copyright 2022 by Jonathan Baumann -## Copyright 2023 by Maximilian Löffler +## Copyright 2023-2024 by Maximilian Löffler ## All Rights Reserved. @@ -323,8 +323,8 @@ test_that("Check consistency of data and network time-based splitting.", { ## Thus, when splitting the project-level network, there are edges from Olaf to Karl and Thomas, ## crossing the time-window border. Hence, when deleting the respective vertices from the networks, ## the data-based networks should match the network-based networks. - results.network[[1]] = igraph::delete.vertices(results.network[[1]], c("Thomas", "Karl")) - results.network[[2]] = igraph::delete.vertices(results.network[[2]], c("Olaf")) + results.network[[1]] = igraph::delete_vertices(results.network[[1]], c("Thomas", "Karl")) + results.network[[2]] = igraph::delete_vertices(results.network[[2]], c("Olaf")) check.identical = mapply(results.data.network, results.network, FUN = function(d, n) { igraph::identical_graphs(d, n) }) @@ -393,7 +393,7 @@ test_that("Check and correct duplicate range names during network activity-based igraph::vertices(c("A", "B")) + igraph::edges(rep(c("A", "B"), times = length(dates))) ## set some date attributes that are appropriate for the test case - net = igraph::set.edge.attribute(net, "date", value = dates) + net = igraph::set_edge_attr(net, "date", value = dates) ## define split arguments split.function = split.network.activity.based diff --git a/util-bulk.R b/util-bulk.R index 7ecac1a5..cc5f42f2 100644 --- a/util-bulk.R +++ b/util-bulk.R @@ -16,6 +16,7 @@ ## Copyright 2017 by Christian Hechtl ## Copyright 2019 by Thomas Bock ## Copyright 2021 by Niklas Schneider +## Copyright 2024 by Maximilian Löffler ## All Rights Reserved. @@ -55,7 +56,7 @@ collect.multi.networks = function(project.conf, network.conf, step = 1) { multi.network = network.builder$get.multi.network() ## set range attribute - multi.network = igraph::set.graph.attribute(multi.network, "range", range) + multi.network = igraph::set_graph_attr(multi.network, "range", range) attr(multi.network, "range") = range # add to global list @@ -98,7 +99,7 @@ collect.bipartite.networks = function(project.conf, network.conf, step = 1) { bp.network = network.builder$get.bipartite.network() ## set range attribute - bp.network = igraph::set.graph.attribute(bp.network, "range", range) + bp.network = igraph::set_graph_attr(bp.network, "range", range) attr(bp.network, "range") = range # add to global list @@ -141,7 +142,7 @@ collect.author.networks = function(project.conf, network.conf, step = 1) { author.network = network.builder$get.author.network() ## set range attribute - author.network = igraph::set.graph.attribute(author.network, "range", range) + author.network = igraph::set_graph_attr(author.network, "range", range) attr(author.network, "range") = range # add to global list @@ -184,7 +185,7 @@ collect.artifact.networks = function(project.conf, network.conf, step = 1) { artifact.network = network.builder$get.artifact.network() ## set range attribute - artifact.network = igraph::set.graph.attribute(artifact.network, "range", range) + artifact.network = igraph::set_graph_attr(artifact.network, "range", range) attr(artifact.network, "range") = range # add to global list diff --git a/util-misc.R b/util-misc.R index 4722ccb2..03f07420 100644 --- a/util-misc.R +++ b/util-misc.R @@ -50,7 +50,7 @@ get.edgelist.with.timestamps = function(net) { edges = as.data.frame(igraph::get.edgelist(net)) colnames(edges) = c("from", "to") ## get timestamps - dates = igraph::get.edge.attribute(net, "date") + dates = igraph::edge_attr(net, "date") ## bind everything together edges = cbind(edges, date = dates) diff --git a/util-motifs.R b/util-motifs.R index 7ffa657c..39399c80 100644 --- a/util-motifs.R +++ b/util-motifs.R @@ -12,6 +12,7 @@ ## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. ## ## Copyright 2015+2017 by Claus Hunsen +## Copyright 2024 by Maximilian Löffler ## All Rights Reserved. @@ -92,7 +93,7 @@ MOTIF.TYPE.MAPPING = as.data.frame(rbind( get.vertex.types.as.numeric = function(network, index = igraph::V(network)) { ## get the vertex attribute as factor - attr.factor = factor(igraph::get.vertex.attribute(network, "type", index)) + attr.factor = factor(igraph::vertex_attr(network, "type", index)) ## replace factor levels with corresponding numerics levels(attr.factor) = sapply(levels(attr.factor), function(f) { @@ -141,7 +142,7 @@ motifs.search.in.network = function(network, motif, remove.duplicates = TRUE) { vs.cleaned = lapply(vs, function(seq) { ## get types and names of vertices types = get.vertex.types.as.numeric(network, index = seq) - names = igraph::get.vertex.attribute(network, "name", index = seq) + names = igraph::vertex_attr(network, "name", index = seq) ## sort vertex sequence by types and names seq = seq[ order(types, names) ] @@ -307,7 +308,7 @@ motifs.remove.artifacts.from.matched.motifs = function(network, vs) { ## iterate over all vertex sequences to remove artifacts vs.cleaned = lapply(vs, function(seq) { ## get types of vertices - types = igraph::get.vertex.attribute(network, "type", index = seq) + types = igraph::vertex_attr(network, "type", index = seq) ## remove artifact vertices seq = seq[ types != TYPE.ARTIFACT ] return(seq) diff --git a/util-networks-covariates.R b/util-networks-covariates.R index 5b68cbff..95a3021a 100644 --- a/util-networks-covariates.R +++ b/util-networks-covariates.R @@ -21,6 +21,7 @@ ## Copyright 2021 by Johannes Hostert ## Copyright 2022 by Niklas Schneider ## Copyright 2022 by Jonathan Baumann +## Copyright 2024 by Maximilian Löffler ## All Rights Reserved. ## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / @@ -105,7 +106,7 @@ add.vertex.attribute = function(net.to.range.list, attr.name, default.value, com net.with.attr = add.attributes.to.network(current.network, "vertex", attributes) ## overwrite set vertex attribute with 'default.value', given the case that there are indeed vertices ## in the current network - net.with.attr = igraph::set.vertex.attribute(net.with.attr, attr.name, value = default.value) + net.with.attr = igraph::set_vertex_attr(net.with.attr, attr.name, value = default.value) ## return immediately return(net.with.attr) } @@ -130,7 +131,7 @@ add.vertex.attribute = function(net.to.range.list, attr.name, default.value, com attributes = unlist(attributes, recursive = FALSE) } - net.with.attr = igraph::set.vertex.attribute(current.network, attr.name, value = attributes) + net.with.attr = igraph::set_vertex_attr(current.network, attr.name, value = attributes) return(net.with.attr) } diff --git a/util-networks-misc.R b/util-networks-misc.R index c9abd08a..99a38a29 100644 --- a/util-networks-misc.R +++ b/util-networks-misc.R @@ -19,6 +19,7 @@ ## Copyright 2019 by Jakob Kronawitter ## Copyright 2019-2020 by Anselm Fehnker ## Copyright 2024 by Leo Sendelbach +## Copyright 2024 by Maximilian Löffler ## All Rights Reserved. @@ -134,10 +135,10 @@ get.expanded.adjacency = function(network, authors, weighted = FALSE) { if (weighted) { ## get the weighted adjacency matrix for the current network - matrix.data = igraph::get.adjacency(network, attr = "weight") + matrix.data = igraph::as_adjacency_matrix(network, attr = "weight") } else { ## get the unweighted sparse adjacency matrix for the current network - matrix.data = igraph::get.adjacency(network) + matrix.data = igraph::as_adjacency_matrix(network) } network.authors.num = nrow(matrix.data) diff --git a/util-networks.R b/util-networks.R index aa9511b2..9ebb5680 100644 --- a/util-networks.R +++ b/util-networks.R @@ -365,10 +365,10 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", ) ## remove the artifact vertices stemming from untracked files if existing - if ("name" %in% igraph::list.vertex.attributes(artifacts.net) && + if ("name" %in% igraph::vertex_attr_names(artifacts.net) && length(igraph::V(artifacts.net)[name == UNTRACKED.FILE.EMPTY.ARTIFACT]) > 0) { - artifacts.net = igraph::delete.vertices(artifacts.net, UNTRACKED.FILE.EMPTY.ARTIFACT) + artifacts.net = igraph::delete_vertices(artifacts.net, UNTRACKED.FILE.EMPTY.ARTIFACT) } ## store network @@ -463,14 +463,14 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", file = file.path(file.dir, file.name) ## read network from disk - artifacts.net = igraph::read.graph(file, format = "pajek") + artifacts.net = igraph::read_graph(file, format = "pajek") # set vertex labels properly (copy "id" attribute to "name" attribute) - artifacts.net = igraph::set.vertex.attribute( + artifacts.net = igraph::set_vertex_attr( artifacts.net, "name", igraph::V(artifacts.net), - igraph::get.vertex.attribute(artifacts.net, "id") + igraph::vertex_attr(artifacts.net, "id") ) ## process vertex names in artifact networks for consistent names: @@ -478,7 +478,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", ## need to be processed in order to match the ones coming from other analyses ## (e.g. Codeface): ## (1) retrieve parameters for processing - names = igraph::get.vertex.attribute(artifacts.net, "name") + names = igraph::vertex_attr(artifacts.net, "name") artifact = private$proj.data$get.project.conf.entry("artifact") ## (2) different replacings for different artifacts ## feature @@ -498,10 +498,10 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", names = gsub(".cg", "", names, fixed = TRUE) } ## (3) set processed names inside graph object - artifacts.net = igraph::set.vertex.attribute(artifacts.net, "name", value = names) + artifacts.net = igraph::set_vertex_attr(artifacts.net, "name", value = names) ## set edge attribute 'artifact.type' as the raw data do not contain this! - artifacts.net = igraph::set.edge.attribute( + artifacts.net = igraph::set_edge_attr( artifacts.net, "artifact.type", value = private$proj.data$get.project.conf.entry("artifact.codeface") ) @@ -846,20 +846,20 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", ## add all missing authors to the network if wanted if (private$network.conf$get.value("author.all.authors")) { authors.all = private$proj.data$get.authors()[[ "author.name" ]] - authors.net = igraph::get.vertex.attribute(net, "name") + authors.net = igraph::vertex_attr(net, "name") net = net + igraph::vertices(setdiff(authors.all, authors.net)) } ## remove all authors from the corresponding network who do not have touched any artifact if (private$network.conf$get.value("author.only.committers")) { ## authors-artifact relation - authors.from.net = igraph::get.vertex.attribute(net, "name") + authors.from.net = igraph::vertex_attr(net, "name") authors.from.artifacts = lapply(private$get.bipartite.relations(), function(bipartite.relation) { return(names(bipartite.relation)) }) authors.from.artifacts = unlist(authors.from.artifacts) if (!is.null(authors.from.artifacts)) { - net = igraph::delete.vertices(net, setdiff(authors.from.net, authors.from.artifacts)) + net = igraph::delete_vertices(net, setdiff(authors.from.net, authors.from.artifacts)) } } @@ -906,7 +906,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", ## set vertex attribute 'kind' on all edges, corresponding to relation vertex.kind = private$get.vertex.kind.for.relation(relation) - network = igraph::set.vertex.attribute(network, "kind", value = vertex.kind) + network = igraph::set_vertex_attr(network, "kind", value = vertex.kind) return(network) }) @@ -1016,9 +1016,9 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", ## remove vertices that are not committers if wanted if (private$network.conf$get.value("author.only.committers")) { committers = unique(private$proj.data$get.commits.unfiltered()[["author.name"]]) - authors = igraph::get.vertex.attribute(network, "name", igraph::V(network)[ type == TYPE.AUTHOR ]) + authors = igraph::vertex_attr(network, "name", igraph::V(network)[ type == TYPE.AUTHOR ]) authors.to.remove = setdiff(authors, committers) - network = igraph::delete.vertices(network, authors.to.remove) + network = igraph::delete_vertices(network, authors.to.remove) } ## simplify network if wanted @@ -1078,7 +1078,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", ## we only add bipartite edges for authors already present in the author network (this can be ## configured by 'author.only.committers', for example), thus, we need to remove any authors ## from the author--artifact relation that are superfluous - authors.from.net = igraph::get.vertex.attribute(authors.net, "name") + authors.from.net = igraph::vertex_attr(authors.net, "name") ## save relation and intersect the author vertices from the author network and the ## bipartite networks authors.to.artifacts = mapply(function(a2a.rel, relation.type) { @@ -1099,7 +1099,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", }) artifacts.all = plyr::rbind.fill(artifacts) - artifacts.from.net = igraph::get.vertex.attribute(artifacts.net, "name") + artifacts.from.net = igraph::vertex_attr(artifacts.net, "name") artifacts.to.add = setdiff(artifacts.all[["data.vertices"]], artifacts.from.net) artifacts.to.add.kind = artifacts.all[ artifacts.all[["data.vertices"]] %in% artifacts.to.add, "artifact.type" @@ -1115,11 +1115,11 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", } ## check directedness and adapt artifact network if needed - if (igraph::is.directed(authors.net) && !igraph::is.directed(artifacts.net)) { + if (igraph::is_directed(authors.net) && !igraph::is_directed(artifacts.net)) { logging::logwarn(paste0("Author network is directed, but artifact network is not.", "Converting artifact network...")) artifacts.net = igraph::as.directed(artifacts.net, mode = "mutual") - } else if (!igraph::is.directed(authors.net) && igraph::is.directed(artifacts.net)) { + } else if (!igraph::is_directed(authors.net) && igraph::is_directed(artifacts.net)) { logging::logwarn(paste0("Author network is undirected, but artifact network is not.", "Converting artifact network...")) artifacts.net = igraph::as.undirected(artifacts.net, mode = "each", @@ -1140,14 +1140,14 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", ## Note: The following temporary fix only considers the 'date' attribute. However, this problem could also ## affect several other attributes, whose classes are not adjusted in our temporary fix. ## The following code block should be redundant as soon as igraph has fixed their bug. - u.actual.edge.attribute.date = igraph::get.edge.attribute(u, "date") + u.actual.edge.attribute.date = igraph::edge_attr(u, "date") if (!is.null(u.actual.edge.attribute.date)) { if (is.list(u.actual.edge.attribute.date)) { u.expected.edge.attribute.date = lapply(u.actual.edge.attribute.date, get.date.from.unix.timestamp) } else { u.expected.edge.attribute.date = get.date.from.unix.timestamp(u.actual.edge.attribute.date) } - u = igraph::set.edge.attribute(u, "date", value = u.expected.edge.attribute.date) + u = igraph::set_edge_attr(u, "date", value = u.expected.edge.attribute.date) } ## 2) add the bipartite edges @@ -1390,16 +1390,16 @@ construct.network.from.edge.list = function(vertices, edge.list, network.conf, d } ## construct network from edge list if there are vertices - net = igraph::graph.data.frame(edge.list, directed = directed, vertices = vertices.processed) + net = igraph::graph_from_data_frame(edge.list, directed = directed, vertices = vertices.processed) - ## add missing vertex attributes if vertices.processed was empty (igraph::graph.data.frame does add them then) + ## add missing vertex attributes if vertices.processed was empty (igraph::graph_from_data_frame does add them then) if (nrow(vertices.processed) == 0) { ## vertex attributes needed.vertex.attributes.types = list(name = "character") net = add.attributes.to.network(net, "vertex", needed.vertex.attributes.types) } - ## add missing edge attributes if edge.list was empty (igraph::graph.data.frame does add them then) + ## add missing edge attributes if edge.list was empty (igraph::graph_from_data_frame does add them then) if (nrow(edge.list) == 0) { ## edge attributes allowed.attributes = network.conf$get.value("edge.attributes") @@ -1409,7 +1409,7 @@ construct.network.from.edge.list = function(vertices, edge.list, network.conf, d } ## initialize edge weights - net = igraph::set.edge.attribute(net, "weight", value = 1) + net = igraph::set_edge_attr(net, "weight", value = 1) logging::logdebug("construct.network.from.edge.list: finished.") @@ -1484,14 +1484,14 @@ merge.networks = function(networks) { ## catch case where no vertices (and no vertex attributes) are given if (ncol(new.network.data[["vertices"]]) == 0) { - new.network.data[["vertices"]] = NULL # igraph::graph.data.frame can handle this + new.network.data[["vertices"]] = NULL # igraph::graph_from_data_frame can handle this } ## build whole network form edge and vertex data frame - whole.network = igraph::graph.data.frame( + whole.network = igraph::graph_from_data_frame( new.network.data[["edges"]], vertices = new.network.data[["vertices"]], - directed = igraph::is.directed(networks[[1]]) + directed = igraph::is_directed(networks[[1]]) ) logging::logdebug("merge.networks: finished.") @@ -1636,13 +1636,13 @@ add.attributes.to.network = function(network, type = c("vertex", "edge"), attrib ## get corresponding attribute functions if (type == "vertex") { - attribute.set.function = igraph::set.vertex.attribute # sprintf("igraph::set.%s.attribute", type) - attribute.get.function = igraph::get.vertex.attribute # sprintf("igraph::get.%s.attribute", type) - attribute.remove.function = igraph::remove.vertex.attribute # sprintf("igraph::remove.%s.attribute", type) + attribute.set.function = igraph::set_vertex_attr # sprintf("igraph::set.%s.attribute", type) + attribute.get.function = igraph::vertex_attr # sprintf("igraph::get.%s.attribute", type) + attribute.remove.function = igraph::delete_vertex_attr # sprintf("igraph::remove.%s.attribute", type) } else { - attribute.set.function = igraph::set.edge.attribute # sprintf("igraph::set.%s.attribute", type) - attribute.get.function = igraph::get.edge.attribute # sprintf("igraph::get.%s.attribute", type) - attribute.remove.function = igraph::remove.edge.attribute # sprintf("igraph::remove.%s.attribute", type) + attribute.set.function = igraph::set_edge_attr # sprintf("igraph::set.%s.attribute", type) + attribute.get.function = igraph::edge_attr # sprintf("igraph::get.%s.attribute", type) + attribute.remove.function = igraph::delete_edge_attr # sprintf("igraph::remove.%s.attribute", type) } ## iterate over all wanted attribute names and add the attribute with the wanted class @@ -1694,9 +1694,9 @@ simplify.network = function(network, remove.multiple = TRUE, remove.loops = TRUE logging::loginfo("Simplifying network.") ## save network attributes, otherwise they get lost - network.attributes = igraph::get.graph.attribute(network) + network.attributes = igraph::graph_attr(network) - if (!simplify.multiple.relations && length(unique(igraph::get.edge.attribute(network, "relation"))) > 1) { + if (!simplify.multiple.relations && length(unique(igraph::edge_attr(network, "relation"))) > 1) { ## data frame of the network edge.data = igraph::as_data_frame(network, what = "edges") vertex.data = igraph::as_data_frame(network, what = "vertices") @@ -1707,7 +1707,7 @@ simplify.network = function(network, remove.multiple = TRUE, remove.loops = TRUE network.data = edge.data[edge.data[["relation"]] == relation, ] net = igraph::graph_from_data_frame(d = network.data, vertices = vertex.data, - directed = igraph::is.directed(network)) + directed = igraph::is_directed(network)) ## simplify networks (contract edges and remove loops) net = igraph::simplify(net, edge.attr.comb = EDGE.ATTR.HANDLING, @@ -1726,7 +1726,7 @@ simplify.network = function(network, remove.multiple = TRUE, remove.loops = TRUE ## re-apply all network attributes for (att in names(network.attributes)) { - network = igraph::set.graph.attribute(network, att, network.attributes[[att]]) + network = igraph::set_graph_attr(network, att, network.attributes[[att]]) } logging::logdebug("simplify.network: finished.") @@ -1764,7 +1764,7 @@ simplify.networks = function(networks, remove.multiple = TRUE, remove.loops = TR #' #' @return the network without isolates delete.isolates = function(network) { - network.no.isolates = igraph::delete.vertices( + network.no.isolates = igraph::delete_vertices( network, igraph::degree(network, mode = "all") == 0 ) @@ -1784,7 +1784,7 @@ delete.isolates = function(network) { #' @return the author-vertex-induced subgraph of \code{network} extract.author.network.from.network = function(network, remove.isolates = FALSE) { ## only retain all author vertices - author.network = igraph::induced.subgraph(network, igraph::V(network)[type == TYPE.AUTHOR]) + author.network = igraph::induced_subgraph(network, igraph::V(network)[type == TYPE.AUTHOR]) ## remove isolates if wanted if (remove.isolates) { author.network = delete.isolates(author.network) @@ -1801,7 +1801,7 @@ extract.author.network.from.network = function(network, remove.isolates = FALSE) #' @return the artifact-vertex-induced subgraph of \code{network} extract.artifact.network.from.network = function(network, remove.isolates = FALSE) { ## only retain all artifact vertices - artifact.network = igraph::induced.subgraph(network, igraph::V(network)[type == TYPE.ARTIFACT]) + artifact.network = igraph::induced_subgraph(network, igraph::V(network)[type == TYPE.ARTIFACT]) ## remove isolates if wanted if (remove.isolates) { artifact.network = delete.isolates(artifact.network) @@ -1826,7 +1826,7 @@ extract.bipartite.network.from.network = function(network, remove.isolates = FAL } ## check whether there is an edge attibute 'type' - if (!("type" %in% igraph::list.edge.attributes(network))) { + if (!("type" %in% igraph::edge_attr_names(network))) { logging::logerror("Extraction of an bipartite network without the edge attribute 'type' does not work!") stop("Failed extraction of bipartite network.") } @@ -1868,7 +1868,7 @@ delete.authors.without.specific.edges = function(network, specific.edge.types = ## compute all authors without specific edges as vertex IDs vertex.ids.author.no.specific = setdiff(vertex.ids.author, vertex.ids.specific) ## remove all authors without specific edges from network - network = igraph::delete.vertices(network, vertex.ids.author.no.specific) + network = igraph::delete_vertices(network, vertex.ids.author.no.specific) return(network) } @@ -1935,13 +1935,13 @@ get.sample.network = function() { ## construct multi network network = net.builder$get.multi.network() - network = igraph::set.graph.attribute(network, "sample.network", TRUE) + network = igraph::set_graph_attr(network, "sample.network", TRUE) ## set layout for plotting lay = matrix(c( 20, 179, 693, 552, 956, 1091, 124, 317, 516, 615, 803, 1038, 245, 175, 255, 185, 253, 225, 73, 8, 75, 0, 96, 86), nrow = 12, byrow = FALSE) # for sample graph - network = igraph::set.graph.attribute(network, "layout", lay) + network = igraph::set_graph_attr(network, "layout", lay) return(network) } diff --git a/util-plot.R b/util-plot.R index 25e2a24c..10428567 100644 --- a/util-plot.R +++ b/util-plot.R @@ -52,7 +52,7 @@ PLOT.VERTEX.LABEL.COLOR = "gray60" #' is used, unless a graph attribute "layout" is set. For a comprehensive list of layouts and more information #' on layouts in general, see \link{https://igraph.org/python/doc/tutorial/tutorial.html#layout-algorithms}. #' To set the graph attribute on your network, run the following code while replacing \code{layout.to.set} -#' to your liking: \code{network = igraph::set.graph.attribute(network, "layout", layout.to.set)}. +#' to your liking: \code{network = igraph::set_graph_attr(network, "layout", layout.to.set)}. #' Note that \code{layout.to.set} refers to one of the "short names" of the recpective igraph layout, as #' specified on the Web site in the link given above. #' @@ -76,7 +76,7 @@ plot.network = function(network, labels = TRUE) { #' is used, unless a graph attribute "layout" is set. For a comprehensive list of layouts and more information #' on layouts in general, see \link{https://igraph.org/python/doc/tutorial/tutorial.html#layout-algorithms}. #' To set the graph attribute on your network, run the following code while replacing \code{layout.to.set} -#' to your liking: \code{network = igraph::set.graph.attribute(network, "layout", layout.to.set)}. +#' to your liking: \code{network = igraph::set_graph_attr(network, "layout", layout.to.set)}. #' Note that \code{layout.to.set} refers to one of the "short names" of the recpective igraph layout, as #' specified on the Web site in the link given above. #' @@ -101,7 +101,7 @@ plot.print.network = function(network, labels = TRUE) { #' is used, unless a graph attribute "layout" is set. For a comprehensive list of layouts and more information #' on layouts in general, see \link{https://igraph.org/python/doc/tutorial/tutorial.html#layout-algorithms}. #' To set the graph attribute on your network, run the following code while replacing \code{layout.to.set} -#' to your liking: \code{network = igraph::set.graph.attribute(network, "layout", layout.to.set)}. +#' to your liking: \code{network = igraph::set_graph_attr(network, "layout", layout.to.set)}. #' Note that \code{layout.to.set} refers to one of the "short names" of the recpective igraph layout, as #' specified on the Web site in the link given above. #' @@ -116,7 +116,7 @@ plot.print.network = function(network, labels = TRUE) { plot.get.plot.for.network = function(network, labels = TRUE) { ## check if network is empty if (igraph::vcount(network) == 0) { - network = create.empty.network(directed = igraph::is.directed(network), add.attributes = TRUE) + network = create.empty.network(directed = igraph::is_directed(network), add.attributes = TRUE) PLOT.VERTEX.SIZE = 0 } @@ -125,16 +125,16 @@ plot.get.plot.for.network = function(network, labels = TRUE) { names(PLOT.VERTEX.TYPES) = c(TYPE.AUTHOR, TYPE.ARTIFACT) ## remove loops because of weird behavior when plotting - network = igraph::delete.edges(network, igraph::E(network)[igraph::is.loop(network)]) + network = igraph::delete_edges(network, igraph::E(network)[igraph::is.loop(network)]) ## fix the type attributes (add new ones, also named) network = plot.fix.type.attributes(network) ## set igraph network layout if no layout is set yet if (!("layout" %in% igraph::list.graph.attributes(network))) { - network = igraph::set.graph.attribute(network, "layout", "kk") + network = igraph::set_graph_attr(network, "layout", "kk") } - layout.algorithm = igraph::get.graph.attribute(network, "layout") + layout.algorithm = igraph::graph_attr(network, "layout") ## create a ggraph object using the specified igraph layout p = ggraph::ggraph(network, layout = layout.algorithm) @@ -146,7 +146,7 @@ plot.get.plot.for.network = function(network, labels = TRUE) { mapping = ggplot2::aes(colour = paste(relation, sep = " "), linetype = edge.type, width = 0.3 + 0.5 * log(weight)), end_cap = ggraph::circle(PLOT.VERTEX.SIZE + 3, "pt"), start_cap = ggraph::circle(PLOT.VERTEX.SIZE + 3, "pt"), - arrow = if (igraph::is.directed(network)) { + arrow = if (igraph::is_directed(network)) { ggplot2::arrow(length = ggplot2::unit(PLOT.VERTEX.SIZE / 2, 'pt'), ends = "last", type = "closed") } else { NULL @@ -225,16 +225,16 @@ plot.get.plot.for.network = function(network, labels = TRUE) { plot.fix.type.attributes = function(network) { ## copy type attribute to vertex.type and edge.type if (igraph::vcount(network) == 0) { - network = igraph::set.vertex.attribute(network, "vertex.type", value = NA) + network = igraph::set_vertex_attr(network, "vertex.type", value = NA) } else { - network = igraph::set.vertex.attribute(network, "vertex.type", value = igraph::get.vertex.attribute(network, "type")) + network = igraph::set_vertex_attr(network, "vertex.type", value = igraph::vertex_attr(network, "type")) } - network = igraph::set.edge.attribute(network, "edge.type", value = igraph::get.edge.attribute(network, "type")) + network = igraph::set_edge_attr(network, "edge.type", value = igraph::edge_attr(network, "type")) ## adjust 'type' attribute for vertices for bipartite plotting (we need Booleans there) - types = igraph::get.vertex.attribute(network, "type") - network = igraph::remove.vertex.attribute(network, "type") - network = igraph::set.vertex.attribute(network, "type", value = sapply( + types = igraph::vertex_attr(network, "type") + network = igraph::delete_vertex_attr(network, "type") + network = igraph::set_vertex_attr(network, "type", value = sapply( types, function(t) return(t == TYPE.ARTIFACT) )) diff --git a/util-split.R b/util-split.R index b4073194..0d2b62f9 100644 --- a/util-split.R +++ b/util-split.R @@ -524,7 +524,7 @@ split.network.time.based = function(network, time.period = "3 months", bins = NU number.windows = NULL, sliding.window = FALSE, remove.isolates = TRUE) { ## extract date attributes from edges - dates = get.date.from.unix.timestamp(igraph::get.edge.attribute(network, "date")) + dates = get.date.from.unix.timestamp(igraph::edge_attr(network, "date")) ## number of windows given (ignoring time period and bins) if (!is.null(number.windows)) { @@ -709,7 +709,7 @@ split.network.activity.based = function(network, number.edges = 5000, number.win ## get dates in a data.frame for splitting purposes df = data.frame( - date = get.date.from.unix.timestamp(igraph::get.edge.attribute(network, "date")), + date = get.date.from.unix.timestamp(igraph::edge_attr(network, "date")), my.unique.id = seq_len(edge.count) # as a unique identifier only ) ## sort by date @@ -745,7 +745,7 @@ split.network.activity.based = function(network, number.edges = 5000, number.win edges.cut = edges.by.date[seq_len(offset.start)] ## delete edges from the network and create a new network - network.cut = igraph::delete.edges(network, igraph::E(network)[edges.cut]) + network.cut = igraph::delete_edges(network, igraph::E(network)[edges.cut]) ## split network for sliding windows networks.sliding = split.network.activity.based(network.cut, number.edges = number.edges, From 321d85043112971c04998249c14a0677a32c9004 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maximilian=20L=C3=B6ffler?= Date: Tue, 28 May 2024 12:35:35 +0200 Subject: [PATCH 055/130] Fix a bug in edgelist construction of issue networks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The bug, described in Issue#260, practially limits the amount of edges that can be present in an issue network to 17 and additionally leads to warnings in every case where there are not exactly 17 edges present. It is caused by an incorrect parameter to the 'base::split' function. We intend to pass a vector containing the numbers 1 to nrow(add.links), but instead pass 1 to 17. Signed-off-by: Maximilian Löffler --- util-networks.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/util-networks.R b/util-networks.R index 9ebb5680..a9b19e11 100644 --- a/util-networks.R +++ b/util-networks.R @@ -639,7 +639,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", } ## connect corresponding add_link and referenced_by issue-events - edge.list = plyr::rbind.fill(parallel::mclapply(split(add.links, seq_along(add.links)), function(from) { + edge.list = plyr::rbind.fill(parallel::mclapply(split(add.links, seq_len(nrow(add.links))), function(from) { ## get edge attributes cols.which = edge.attributes %in% colnames(from) edge.attrs = from[, edge.attributes[cols.which], drop = FALSE] From 5ff83c364f6bfc1e6ff95e9c5f1087e031c48a5d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maximilian=20L=C3=B6ffler?= Date: Wed, 19 Jun 2024 15:56:17 +0200 Subject: [PATCH 056/130] Make 'github' the default data source for issues MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In practice, GitHub is by far the most used data source for issues. By adjusting the default to 'github' only, we can greatly reduce the amount of raised warnings in production. Signed-off-by: Maximilian Löffler --- util-conf.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/util-conf.R b/util-conf.R index 9ae2fd73..13b3a6f3 100644 --- a/util-conf.R +++ b/util-conf.R @@ -422,7 +422,7 @@ ProjectConf = R6::R6Class("ProjectConf", inherit = Conf, allowed.number = 1 ), issues.from.source = list( - default = c("jira", "github"), + default = "github", type = "character", allowed = c("jira", "github"), allowed.number = Inf From 8c8080cb9caf115f19d9f145ad6e6c108b131a67 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maximilian=20L=C3=B6ffler?= Date: Wed, 19 Jun 2024 15:57:26 +0200 Subject: [PATCH 057/130] Adjust tests to comply with the new default for issue sources MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Maximilian Löffler --- tests/test-core-peripheral.R | 1 + tests/test-data.R | 1 + tests/test-networks-artifact.R | 2 + tests/test-networks-author.R | 3 + tests/test-networks-bipartite.R | 1 + tests/test-networks-covariates.R | 3 + tests/test-networks-misc.R | 3 + tests/test-networks-multi-relation.R | 5 + tests/test-read.R | 158 ++++++++++++------------- tests/test-split-data-activity-based.R | 92 +++++++------- tests/test-split-data-time-based.R | 79 +++++++------ 11 files changed, 194 insertions(+), 154 deletions(-) diff --git a/tests/test-core-peripheral.R b/tests/test-core-peripheral.R index f56ea325..e719d651 100644 --- a/tests/test-core-peripheral.R +++ b/tests/test-core-peripheral.R @@ -37,6 +37,7 @@ if (!dir.exists(CF.DATA)) CF.DATA = file.path(".", "tests", "codeface-data") ## Prepare global setting proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, ARTIFACT) +proj.conf$update.value("issues.from.source", c("jira", "github")) proj.conf$update.value("issues.only.comments", FALSE) proj.data = ProjectData$new(proj.conf) diff --git a/tests/test-data.R b/tests/test-data.R index e6136f54..88ce0e42 100644 --- a/tests/test-data.R +++ b/tests/test-data.R @@ -402,6 +402,7 @@ test_that("Filter bots from commit data", { test_that("Filter bots from issue data", { proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, ARTIFACT) + proj.conf$update.value("issues.from.source", c("jira", "github")) proj.conf$update.value("filter.bots", TRUE) ## disable all other filterings proj.conf$update.value("issues.only.comments", FALSE) diff --git a/tests/test-networks-artifact.R b/tests/test-networks-artifact.R index 2a717080..432840fc 100644 --- a/tests/test-networks-artifact.R +++ b/tests/test-networks-artifact.R @@ -147,6 +147,7 @@ patrick::with_parameters_test_that("Network construction of an issue-based artif ## configurations proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, ARTIFACT) + proj.conf$update.value("issues.from.source", c("jira", "github")) proj.conf$update.value("issues.only.comments", FALSE) net.conf = NetworkConf$new() net.conf$update.values(updated.values = list(artifact.relation = "issue", artifact.directed = test.directed)) @@ -176,6 +177,7 @@ patrick::with_parameters_test_that("Network construction of an empty 'comments-o ## configurations proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, ARTIFACT) + proj.conf$update.value("issues.from.source", c("jira", "github")) net.conf = NetworkConf$new() net.conf$update.values(updated.values = list(artifact.relation = "issue", artifact.directed = test.directed)) diff --git a/tests/test-networks-author.R b/tests/test-networks-author.R index 863f38a8..2910ba51 100644 --- a/tests/test-networks-author.R +++ b/tests/test-networks-author.R @@ -147,6 +147,7 @@ test_that("Amount of authors (author.all.authors, author.only.committers).", { ## configurations proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, ARTIFACT) + proj.conf$update.value("issues.from.source", c("jira", "github")) proj.conf$update.value("commits.filter.base.artifact", FALSE) proj.conf$update.value("commits.filter.untracked.files", TRUE) net.conf = NetworkConf$new() @@ -472,6 +473,7 @@ test_that("Network construction of the undirected author-issue network with all ## configurations proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, ARTIFACT) + proj.conf$update.value("issues.from.source", c("jira", "github")) proj.conf$update.value("commits.filter.base.artifact", FALSE) proj.conf$update.value("issues.only.comments", FALSE) net.conf = NetworkConf$new() @@ -595,6 +597,7 @@ test_that("Network construction of the undirected author-issue network with just ## configurations proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, ARTIFACT) + proj.conf$update.value("issues.from.source", c("jira", "github")) proj.conf$update.value("commits.filter.base.artifact", FALSE) net.conf = NetworkConf$new() net.conf$update.values(updated.values = list(author.relation = "issue")) diff --git a/tests/test-networks-bipartite.R b/tests/test-networks-bipartite.R index 646bad3a..c6725281 100644 --- a/tests/test-networks-bipartite.R +++ b/tests/test-networks-bipartite.R @@ -249,6 +249,7 @@ test_that("Construction of the bipartite network for the feature artifact with a ## configurations proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, ARTIFACT) + proj.conf$update.value("issues.from.source", c("jira", "github")) proj.conf$update.value("commits.filter.base.artifact", FALSE) net.conf = NetworkConf$new() net.conf$update.values(updated.values = list(author.relation = "cochange", artifact.relation = "issue")) diff --git a/tests/test-networks-covariates.R b/tests/test-networks-covariates.R index 92084ea4..427ff729 100644 --- a/tests/test-networks-covariates.R +++ b/tests/test-networks-covariates.R @@ -73,6 +73,7 @@ get.network.covariates.test.networks = function(network.type = c("author", "arti ## configuration and data objects proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, ARTIFACT) + proj.conf$update.value("issues.from.source", c("jira", "github")) proj.conf$update.value("commits.filter.base.artifact", FALSE) proj.conf$update.value("commits.filter.untracked.files", TRUE) proj.conf$update.value("issues.only.comments", issues.only.comments) @@ -2521,6 +2522,7 @@ test_that("Test get.first.activity.data with missing commits, mails, and issues" ## initialize a ProjectData object with the ProjectConf proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, ARTIFACT) + proj.conf$update.value("issues.from.source", c("jira", "github")) proj.data.base = ProjectData$new(project.conf = proj.conf) ## create a RangeData object with the same data sources as 'proj.data.base'. @@ -2569,6 +2571,7 @@ test_that("Test get.first.activity.data with missing commits and mails for all a ## initialize a ProjectData object with the ProjectConf proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, ARTIFACT) + proj.conf$update.value("issues.from.source", c("jira", "github")) proj.data = ProjectData$new(project.conf = proj.conf) ## get the timestamps for splitting before discarding the data diff --git a/tests/test-networks-misc.R b/tests/test-networks-misc.R index b964c8ce..20452155 100644 --- a/tests/test-networks-misc.R +++ b/tests/test-networks-misc.R @@ -207,6 +207,7 @@ test_that("getting all authors of a list of data ranges, list length 2", { ## Arrange proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, ARTIFACT) + proj.conf$update.value("issues.from.source", c("jira", "github")) proj.data.base = ProjectData$new(project.conf = proj.conf) range.data.one = proj.data.base$get.data.cut.to.same.date("mails") range.data.two = proj.data.base$get.data.cut.to.same.date("issues") @@ -225,6 +226,7 @@ test_that("getting all authors of a list of data ranges, list length 2, not glob ## Arrange proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, ARTIFACT) + proj.conf$update.value("issues.from.source", c("jira", "github")) proj.data.base = ProjectData$new(project.conf = proj.conf) range.data.one = proj.data.base$get.data.cut.to.same.date("mails") range.data.two = proj.data.base$get.data.cut.to.same.date("issues") @@ -263,6 +265,7 @@ test_that("getting all authors of a list of data ranges by data source 'issues', ## Arrange proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, ARTIFACT) + proj.conf$update.value("issues.from.source", c("jira", "github")) proj.data.base = ProjectData$new(project.conf = proj.conf) range.data.one = proj.data.base$get.data.cut.to.same.date("mails") range.data.two = proj.data.base$get.data.cut.to.same.date("issues") diff --git a/tests/test-networks-multi-relation.R b/tests/test-networks-multi-relation.R index 846b17fa..f215ae4b 100644 --- a/tests/test-networks-multi-relation.R +++ b/tests/test-networks-multi-relation.R @@ -104,6 +104,7 @@ test_that("Construction of the bipartite network for the feature artifact with a ## configurations proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, ARTIFACT) + proj.conf$update.value("issues.from.source", c("jira", "github")) proj.conf$update.value("commits.filter.base.artifact", FALSE) net.conf = NetworkConf$new() net.conf$update.values(updated.values = list(author.relation = c("cochange", "issue"), artifact.relation = c("issue", "mail"))) @@ -210,6 +211,7 @@ test_that("Construction of the multi network for the feature artifact with autho ## configurations proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, ARTIFACT) + proj.conf$update.value("issues.from.source", c("jira", "github")) proj.conf$update.value("commits.filter.base.artifact", FALSE) net.conf = NetworkConf$new() net.conf$update.values(updated.values = list(author.relation = c("cochange", "mail"), artifact.relation = c("cochange", "issue"))) @@ -320,6 +322,7 @@ test_that("Construction of the multi-artifact bipartite network with artifact re ## configurations proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, ARTIFACT) + proj.conf$update.value("issues.from.source", c("jira", "github")) proj.conf$update.value("commits.filter.base.artifact", FALSE) net.conf.cochange = NetworkConf$new() net.conf.cochange$update.values(updated.values = list(author.relation = "cochange", artifact.relation = "cochange")) @@ -503,6 +506,7 @@ test_that("Construction of the multi-artifact bipartite network with artifact re ## configurations proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, ARTIFACT) + proj.conf$update.value("issues.from.source", c("jira", "github")) proj.conf$update.value("commits.filter.base.artifact", FALSE) net.conf.issue = NetworkConf$new() net.conf.issue$update.values(updated.values = list(author.relation = "issue", artifact.relation = "issue")) @@ -605,6 +609,7 @@ test_that("Construction of the multi-artifact bipartite network with artifact re ## configurations proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, ARTIFACT) + proj.conf$update.value("issues.from.source", c("jira", "github")) proj.conf$update.value("commits.filter.base.artifact", FALSE) net.conf.cochange = NetworkConf$new() net.conf.cochange$update.values(updated.values = list(author.relation = "cochange", artifact.relation = "cochange")) diff --git a/tests/test-read.R b/tests/test-read.R index 58c9bd3c..c617e091 100644 --- a/tests/test-read.R +++ b/tests/test-read.R @@ -352,60 +352,53 @@ test_that("Read and parse the issue data.", { issue.data.read.github = read.issues(proj.conf$get.value("datapath.issues"), proj.conf$get.value("issues.from.source")) ## build the expected data.frame - issue.data.expected = data.frame(issue.id = c(rep("", 15), rep("", 8), - rep("", 9), rep("", 11), - rep("", 6), rep("", 5), rep("", 3)), - issue.title = c(rep("[ZEPPELIN-328] Interpreter page should clarify the % magic syntax for interpreter group.name", 15), - rep("[ZEPPELIN-332] CNFE when running SQL query against Cassandra temp table", 8), - rep("Error in construct.networks.from.list for openssl function networks", 9), + issue.data.expected = data.frame(issue.id = c(rep("", 9), rep("", 11), + rep("", 6), rep("", 5), rep("", 3), + rep("", 15), rep("", 8)), + issue.title = c(rep("Error in construct.networks.from.list for openssl function networks", 9), rep("Distinguish directedness of networks and edge-construction algorithm", 11), rep("Example pull request 1", 6), rep("Example pull request 2", 5), - rep("Example pull request 4", 3)), - issue.type = I(c(rep(list(list("issue" , "bug")), 15), rep(list(list("issue" , "bug")), 8), - rep(list(list("issue" , "bug")), 9), rep(list(list("issue", "bug", "enhancement")), 11), - rep(list(list("pull request")), 6), rep(list(list("pull request")), 5), rep(list(list("pull request", "enhancement")), 3))), - issue.state = c(rep("closed", 15), rep("open", 8), rep("closed", 9), rep("open", 11), - rep("reopened", 6), rep("closed", 5), rep("open", 3)), - issue.resolution = I(c(rep(list(list("fixed")), 15), rep(list(list("unresolved")), 8), - rep(list(list()), 9), rep(list(list()), 11), - rep(list(list()), 6), rep(list(list()), 5), rep(list(list()), 3))), - creation.date = get.date.from.string(c(rep("2013-04-21 23:52:09", 15), - rep("2016-07-12 16:01:30", 8), - rep("2016-07-12 15:59:25", 9), + rep("Example pull request 4", 3), + rep("[ZEPPELIN-328] Interpreter page should clarify the % magic syntax for interpreter group.name", 15), + rep("[ZEPPELIN-332] CNFE when running SQL query against Cassandra temp table", 8)), + issue.type = I(c(rep(list(list("issue" , "bug")), 9), rep(list(list("issue", "bug", "enhancement")), 11), + rep(list(list("pull request")), 6), rep(list(list("pull request")), 5), rep(list(list("pull request", "enhancement")), 3), + rep(list(list("issue" , "bug")), 15), rep(list(list("issue" , "bug")), 8))), + issue.state = c(rep("closed", 9), rep("open", 11), rep("reopened", 6), rep("closed", 5), rep("open", 3), + rep("closed", 15), rep("open", 8)), + issue.resolution = I(c(rep(list(list()), 9), rep(list(list()), 11), rep(list(list()), 6), + rep(list(list()), 5), rep(list(list()), 3), + rep(list(list("fixed")), 15), rep(list(list("unresolved")), 8))), + creation.date = get.date.from.string(c(rep("2016-07-12 15:59:25", 9), rep("2016-07-12 14:30:13", 11), rep("2016-07-14 13:37:00", 6), rep("2016-07-12 14:59:25", 5), - rep("2016-07-12 16:02:02", 3))), - closing.date = get.date.from.string(c(rep("2013-05-25 20:02:08", 15), rep(NA, 8), - rep("2016-07-12 16:06:30", 9), rep(NA, 11), + rep("2016-07-12 16:02:02", 3), + rep("2013-04-21 23:52:09", 15), + rep("2016-07-12 16:01:30", 8))), + closing.date = get.date.from.string(c(rep("2016-07-12 16:06:30", 9), rep(NA, 11), rep(NA, 6), rep("2016-07-12 16:04:59", 5), - rep(NA, 3))), - issue.components = I(c(rep(list(list("GUI" , "Interpreters")), 15), rep(list(list("Interpreters")), 8), - rep(list(list()), 9), rep(list(list()), 11), - rep(list(list()), 6), rep(list(list()), 5), rep(list(list()), 3))), - event.name = c("created", "commented", "commented", "commented", "commented", "commented", + rep(NA, 3), + rep("2013-05-25 20:02:08", 15), rep(NA, 8))), + issue.components = I(c(rep(list(list()), 9), rep(list(list()), 11), + rep(list(list()), 6), rep(list(list()), 5), rep(list(list()), 3), + rep(list(list("GUI" , "Interpreters")), 15), rep(list(list("Interpreters")), 8))), + event.name = c("created", "assigned", "commented", "state_updated", "add_link", "referenced", "referenced", "add_link", + "add_link", "mentioned", "subscribed", "commented", "mentioned", "subscribed", "add_link", "mentioned", + "subscribed", "labeled", "commented", "referenced_by", "created", "commented", "state_updated", "commented", + "commented", "state_updated", "created", "commented", "merged", "state_updated", "referenced_by", "commit_added", + "created", "commented", "created", "commented", "commented", "commented", "commented", "commented", "commented", "commented", "commented", "commented", "commented", "commented", "resolution_updated", "referenced_by", "add_link", "referenced_by", "add_link", "created", - "commented", "commented", "commented", "commented", "commented", "created", "assigned", "commented", - "state_updated", "add_link", "referenced", "referenced", "add_link", "add_link", "mentioned", "subscribed", - "commented", "mentioned", "subscribed", "add_link", "mentioned", "subscribed", "labeled", "commented", - "referenced_by", "created", "commented", "state_updated", "commented", "commented", "state_updated", - "created", "commented", "merged", "state_updated", "referenced_by", "commit_added", "created", "commented"), - author.name = c("Thomas", "Thomas", "Björn", "Björn", "Björn", "Björn", "Olaf", "Björn", "Björn", "Olaf", "Olaf", "Olaf", - "Björn", "Thomas", "Thomas", "Thomas", "Thomas", "Björn", "Björn", "Björn", "Max", "Max", "Max", "Karl", - "Olaf", "Karl", "Olaf", "Karl", "Karl", "Thomas", "Karl", "Thomas", "udo", "udo", "Thomas", "Björn", "Björn", + "commented", "commented", "commented", "commented", "commented"), + author.name = c("Karl", "Olaf", "Karl", "Olaf", "Karl", "Karl", "Thomas", "Karl", "Thomas", "udo", "udo", "Thomas", "Björn", "Björn", "Thomas", "Björn", "Björn", "Olaf", "Björn", "Karl", "Thomas", "Thomas", "Thomas", "Olaf", "Björn", "Olaf", - "Björn", "Björn", "Olaf", "Olaf", "Thomas", "Björn", "Olaf", "Olaf"), - author.email = c("thomas@example.org", "thomas@example.org", "bjoern@example.org", - "bjoern@example.org", "bjoern@example.org", "bjoern@example.org", - "olaf@example.org", "bjoern@example.org", "bjoern@example.org", - "olaf@example.org", "olaf@example.org", "olaf@example.org", - "bjoern@example.org", "thomas@example.org", "thomas@example.org", - "thomas@example.org", "thomas@example.org", "bjoern@example.org", - "bjoern@example.org", "bjoern@example.org", "max@example.org", "max@example.org", - "max@example.org", "karl@example.org", "olaf@example.org", + "Björn", "Björn", "Olaf", "Olaf", "Thomas", "Björn", "Olaf", "Olaf", "Thomas", "Thomas", "Björn", "Björn", "Björn", + "Björn", "Olaf", "Björn", "Björn", "Olaf", "Olaf", "Olaf", "Björn", "Thomas", "Thomas", "Thomas", "Thomas", "Björn", + "Björn", "Björn", "Max", "Max", "Max"), + author.email = c("karl@example.org", "olaf@example.org", "karl@example.org", "olaf@example.org", "karl@example.org", "karl@example.org", "thomas@example.org", "karl@example.org", "thomas@example.org", "udo@example.org", "udo@example.org", "thomas@example.org", "bjoern@example.org", @@ -415,8 +408,33 @@ test_that("Read and parse the issue data.", { "olaf@example.org", "bjoern@example.org", "olaf@example.org", "bjoern@example.org", "bjoern@example.org", "olaf@example.org", "olaf@example.org", "thomas@example.org", "bjoern@example.org", - "olaf@example.org", "olaf@example.org"), - date = get.date.from.string(c("2013-04-21 23:52:09", "2013-04-21 23:52:09", + "olaf@example.org", "olaf@example.org", "thomas@example.org", + "thomas@example.org", "bjoern@example.org", "bjoern@example.org", + "bjoern@example.org", "bjoern@example.org", "olaf@example.org", + "bjoern@example.org", "bjoern@example.org", "olaf@example.org", + "olaf@example.org", "olaf@example.org", "bjoern@example.org", + "thomas@example.org", "thomas@example.org", "thomas@example.org", + "thomas@example.org", "bjoern@example.org", "bjoern@example.org", + "bjoern@example.org", "max@example.org", "max@example.org", + "max@example.org"), + date = get.date.from.string(c("2016-07-12 15:59:25", "2016-07-12 15:59:25", + "2016-07-12 15:59:59", "2016-07-12 16:06:30", + "2016-08-07 15:37:02", "2016-08-31 16:45:09", + "2016-10-05 16:45:09", "2016-08-07 15:37:02", + "2016-08-07 15:30:00", "2016-07-12 15:30:02", + "2016-07-12 15:30:02", "2016-07-12 16:03:59", + "2016-08-31 15:30:02", "2016-10-05 15:30:02", + "2016-10-13 15:30:02", "2016-12-07 15:30:02", + "2016-12-07 15:30:02", "2017-05-23 12:31:34", + "2017-05-23 12:32:39", "2016-08-07 15:37:02", + "2016-07-12 15:59:25", "2016-07-12 15:59:25", + "2016-07-12 15:59:59", "2016-07-12 16:01:01", + "2016-07-12 16:06:01", "2016-07-14 13:37:00", + "2016-07-12 14:59:25", "2016-07-12 14:59:25", + "2016-07-12 16:04:59", "2016-07-12 16:04:59", + "2016-08-07 15:30:00", "2016-07-12 16:02:02", + "2016-07-12 16:02:02", "2016-07-12 16:02:02", + "2013-04-21 23:52:09", "2013-04-21 23:52:09", "2013-05-05 21:46:30", "2013-05-05 21:49:21", "2013-05-05 21:49:34", "2013-05-06 01:04:34", "2013-05-25 03:25:06", "2013-05-25 03:48:41", @@ -427,49 +445,28 @@ test_that("Read and parse the issue data.", { "2017-05-21 12:00:00", "2016-07-12 16:01:30", "2016-07-12 16:02:30", "2016-07-15 19:55:39", "2016-07-15 20:07:47", "2016-07-27 20:12:08", - "2016-07-28 06:27:52", "2016-07-12 15:59:25", - "2016-07-12 15:59:25", "2016-07-12 15:59:59", - "2016-07-12 16:06:30", "2016-08-07 15:37:02", - "2016-08-31 16:45:09", "2016-10-05 16:45:09", - "2016-08-07 15:37:02", "2016-08-07 15:30:00", - "2016-07-12 15:30:02", "2016-07-12 15:30:02", - "2016-07-12 16:03:59", "2016-08-31 15:30:02", - "2016-10-05 15:30:02", "2016-10-13 15:30:02", - "2016-12-07 15:30:02", "2016-12-07 15:30:02", - "2017-05-23 12:31:34", "2017-05-23 12:32:39", - "2016-08-07 15:37:02", "2016-07-12 15:59:25", - "2016-07-12 15:59:25", "2016-07-12 15:59:59", - "2016-07-12 16:01:01", "2016-07-12 16:06:01", - "2016-07-14 13:37:00", "2016-07-12 14:59:25", - "2016-07-12 14:59:25", "2016-07-12 16:04:59", - "2016-07-12 16:04:59", "2016-08-07 15:30:00", - "2016-07-12 16:02:02", "2016-07-12 16:02:02", - "2016-07-12 16:02:02")), - event.info.1 = c("open", "open", "open", "open", "open", "open", "open", "open", "open", - "open", "open", "open", "fixed", "", "", - "", "", "open", "open", "open", "open", "open", "open", "open", - "", "open", "closed", "930af63a030fb92e48eddff01f53284c3eeba80e", "", "", "", + "2016-07-28 06:27:52")), + event.info.1 = c("open", "", "open", "closed", "930af63a030fb92e48eddff01f53284c3eeba80e", "", "", "", "", "Thomas", "Thomas", "open", "Thomas", "Thomas", "fb52357f05958007b867da06f4077abdc04fa0d8", "udo", "udo", "decided", "open", "", "open", "open", "closed", "closed", "closed", "open", - "open", "open", "", "closed", "", - "72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "open", "open"), + "open", "open", "", "closed", "", "72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", + "open", "open", "open", "open", "open", "open", "open", "open", "open", "open", "open", + "open", "open", "open", "fixed", "", "", + "", "", "open", "open", "open", "open", "open", "open"), event.info.2 = NA, # is assigned later event.id = NA, # is assigned later - issue.source = c(rep("jira", 23), rep("github", 20), rep("github", 14)), + issue.source = c(rep("github", 20), rep("github", 14), rep("jira", 23)), artifact.type = "IssueEvent" ) - issue.data.expected[["event.info.2"]] = I(list( - list("unresolved"), list("unresolved"), list("unresolved"), list("unresolved"), + issue.data.expected[["event.info.2"]] = I(list(list(), "", list(), "open", "commit", "", "", "issue", "issue", "thomas@example.org", "thomas@example.org", list(), + "thomas@example.org", "thomas@example.org", "commit", "udo@example.org", "udo@example.org", "", list(), + "issue", list(), list(), "open", list(), list(), "closed", list(), list(), "", "open", "issue", + "2016-07-12 15:58:59", list(), list(), list("unresolved"), list("unresolved"), list("unresolved"), list("unresolved"), list("unresolved"), list("unresolved"), list("unresolved"), list("unresolved"), list("unresolved"), list("unresolved"), list("unresolved"), list("unresolved"), "unresolved", "issue", "issue", "issue", "issue", list("unresolved"), list("unresolved"), - list("unresolved"), list("unresolved"), list("unresolved"), list("unresolved"), list(), "", list(), - "open", "commit", "", "", "issue", "issue", "thomas@example.org", "thomas@example.org", list(), - "thomas@example.org", "thomas@example.org", "commit", "udo@example.org", "udo@example.org", - "", list(), "issue", list(), list(), "open", list(), list(), "closed", - list(), list(), "", "open", "issue", - "2016-07-12 15:58:59", list(), list() + list("unresolved"), list("unresolved"), list("unresolved"), list("unresolved") )) ## calculate event IDs @@ -483,7 +480,8 @@ test_that("Read and parse the issue data.", { issue.data.expected.github = subset(issue.data.expected, issue.data.expected[["issue.source"]] == "github") ## set row names as integers - attr(issue.data.expected, "row.names") = as.integer(seq(from = 1, to = nrow(issue.data.expected), by = 1)) + attr(issue.data.expected, "row.names") = as.integer(c(seq(from = 1, to = nrow(issue.data.expected.github), by = 1), + seq(from = 37, to = 37 + nrow(issue.data.expected.jira), by = 1))) attr(issue.data.expected.jira, "row.names") = as.integer(seq(from = 1, to = nrow(issue.data.expected.jira), by = 1)) attr(issue.data.expected.github, "row.names") = as.integer(seq(from = 1, to = nrow(issue.data.expected.github), by = 1)) @@ -555,4 +553,4 @@ test_that("Read the empty commit-interactions data.", { ## check the results expect_identical(commit.interactions.data.read, commit.interactions.data.expected, info = "commit interaction data.") -}) \ No newline at end of file +}) diff --git a/tests/test-split-data-activity-based.R b/tests/test-split-data-activity-based.R index f0c2812c..c2043a6e 100644 --- a/tests/test-split-data-activity-based.R +++ b/tests/test-split-data-activity-based.R @@ -52,6 +52,7 @@ patrick::with_parameters_test_that("Split a data object activity-based (activity ## configuration objects proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, ARTIFACT) + proj.conf$update.value("issues.from.source", c("jira", "github")) proj.conf$update.value("issues.only.comments", FALSE) proj.conf$update.values(list(pasta = test.pasta, synchronicity = test.synchronicity)) net.conf = NetworkConf$new() @@ -116,8 +117,8 @@ patrick::with_parameters_test_that("Split a data object activity-based (activity "2016-07-12 16:06:32-2016-07-12 16:06:33" = data$commit.messages ), issues = list( - "2016-07-12 15:58:59-2016-07-12 16:06:10" = data$issues[rownames(data$issues) %in% c(18:19, 24:26, 35, 44:48, 52:53, 55:57), ], - "2016-07-12 16:06:10-2016-07-12 16:06:32" = data$issues[rownames(data$issues) == 27, ], + "2016-07-12 15:58:59-2016-07-12 16:06:10" = data$issues[rownames(data$issues) %in% c(1:3, 12, 21:25, 29:30, 32:34, 54:55), ], + "2016-07-12 16:06:10-2016-07-12 16:06:32" = data$issues[rownames(data$issues) == 4, ], "2016-07-12 16:06:32-2016-07-12 16:06:33" = data$issues[0, ] ), mails = list( @@ -196,7 +197,7 @@ patrick::with_parameters_test_that("Split a data object activity-based (activity "2016-07-12 15:58:59-2016-07-12 16:06:33" = data$commit.messages ), issues = list( - "2016-07-12 15:58:59-2016-07-12 16:06:33" = data$issues[rownames(data$issues) %in% c(18:19, 24:27, 35, 44:48, 52:53, 55:57), ] + "2016-07-12 15:58:59-2016-07-12 16:06:33" = data$issues[rownames(data$issues) %in% c(1:4, 12, 21:25, 29:30, 32:34, 54:55), ] ), mails = list( "2016-07-12 15:58:59-2016-07-12 16:06:33" = data$mails[15:16, ] # when pasta is not configured: rownames(data$mails) %in% 16:17 @@ -235,6 +236,7 @@ patrick::with_parameters_test_that("Split a data object activity-based (activity ## configuration objects proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, ARTIFACT) + proj.conf$update.value("issues.from.source", c("jira", "github")) proj.conf$update.value("issues.only.comments", FALSE) proj.conf$update.values(list(pasta = test.pasta, synchronicity = test.synchronicity)) net.conf = NetworkConf$new() @@ -311,8 +313,8 @@ patrick::with_parameters_test_that("Split a data object activity-based (activity "2004-10-09 18:38:13-2010-07-12 11:05:35" = data$issues[0, ], "2010-07-12 11:05:35-2010-07-12 12:05:41" = data$issues[0, ], "2010-07-12 12:05:41-2010-07-12 12:05:44" = data$issues[0, ], - "2010-07-12 12:05:44-2016-07-12 15:58:40" = data$issues[rownames(data$issues) %in% c(1:13, 33:34, 50:51), ], - "2016-07-12 15:58:40-2016-07-12 16:05:37" = data$issues[rownames(data$issues) %in% c(18:19, 24:26, 35, 44:47, 52:53, 55:57), ], + "2010-07-12 12:05:44-2016-07-12 15:58:40" = data$issues[rownames(data$issues) %in% c(10:11, 27:28, 37:49), ], + "2016-07-12 15:58:40-2016-07-12 16:05:37" = data$issues[rownames(data$issues) %in% c(1:3, 12, 21:24, 29:30, 32:34, 54:55), ], "2016-07-12 16:05:37-2016-07-12 16:05:38" = data$issues[0, ] ), mails = list( @@ -401,7 +403,7 @@ patrick::with_parameters_test_that("Split a data object activity-based (activity "2004-10-09 18:38:13-2016-07-12 16:05:38" = data$commit.messages ), issues = list( - "2004-10-09 18:38:13-2016-07-12 16:05:38" = data$issues[rownames(data$issues) %in% c(1:13, 18:19, 24:26, 33:35, 44:47, 50:53, 55:57), ] + "2004-10-09 18:38:13-2016-07-12 16:05:38" = data$issues[rownames(data$issues) %in% c(1:3, 10:12, 21:24, 27:30, 32:34, 37:49, 54:55), ] ), mails = list( "2004-10-09 18:38:13-2016-07-12 16:05:38" = data$mails @@ -440,6 +442,7 @@ patrick::with_parameters_test_that("Split a data object activity-based (activity ## configuration objects proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, ARTIFACT) + proj.conf$update.value("issues.from.source", c("jira", "github")) proj.conf$update.value("issues.only.comments", FALSE) proj.conf$update.values(list(pasta = test.pasta, synchronicity = test.synchronicity)) net.conf = NetworkConf$new() @@ -513,12 +516,12 @@ patrick::with_parameters_test_that("Split a data object activity-based (activity "2017-05-23 12:31:34-2017-05-23 12:32:40" = data$commit.messages ), issues = list( - "2013-04-21 23:52:09-2013-05-25 06:22:23" = data$issues[rownames(data$issues) %in% 1:10, ], - "2013-05-25 06:22:23-2016-07-12 15:59:59" = data$issues[rownames(data$issues) %in% c(11:13, 24:26, 33:34, 44:45, 50:51), ], - "2016-07-12 15:59:59-2016-07-12 16:06:30" = data$issues[rownames(data$issues) %in% c(18:19, 35, 46:48, 52:53, 55:57), ], - "2016-07-12 16:06:30-2016-08-07 15:37:02" = data$issues[rownames(data$issues) %in% c(20:23, 27:28, 31:32, 49, 54), ], - "2016-08-07 15:37:02-2017-05-23 12:31:34" = data$issues[rownames(data$issues) %in% c(14:17, 29:30, 36:40, 43), ], - "2017-05-23 12:31:34-2017-05-23 12:32:40" = data$issues[rownames(data$issues) %in% c(41:42), ] + "2013-04-21 23:52:09-2013-05-25 06:22:23" = data$issues[rownames(data$issues) %in% 37:46, ], + "2013-05-25 06:22:23-2016-07-12 15:59:59" = data$issues[rownames(data$issues) %in% c(1:3, 10:11, 21:22, 27:28, 47:49), ], + "2016-07-12 15:59:59-2016-07-12 16:06:30" = data$issues[rownames(data$issues) %in% c(12, 23:25, 29:30, 32:34, 54:55), ], + "2016-07-12 16:06:30-2016-08-07 15:37:02" = data$issues[rownames(data$issues) %in% c(4:5, 8:9, 26, 31, 56:59), ], + "2016-08-07 15:37:02-2017-05-23 12:31:34" = data$issues[rownames(data$issues) %in% c(6:7, 13:17, 20, 50:53), ], + "2017-05-23 12:31:34-2017-05-23 12:32:40" = data$issues[rownames(data$issues) %in% c(18:19), ] ), mails = list( ## comments indicate row names when pasta is not configured @@ -646,6 +649,7 @@ patrick::with_parameters_test_that("Split a data object activity-based (activity ## configuration objects proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, ARTIFACT) + proj.conf$update.value("issues.from.source", c("jira", "github")) proj.conf$update.value("issues.only.comments", FALSE) proj.conf$update.values(list(pasta = test.pasta, synchronicity = test.synchronicity)) net.conf = NetworkConf$new() @@ -713,10 +717,10 @@ patrick::with_parameters_test_that("Split a data object activity-based (activity "2016-07-12 16:06:20-2016-07-12 16:06:33" = data$commit.messages ), issues = list( - "2016-07-12 15:58:59-2016-07-12 16:06:10" = data$issues[rownames(data$issues) %in% c(18:19, 24:26, 35, 44:48, 52:53, 55:57), ], - "2016-07-12 16:00:45-2016-07-12 16:06:20" = data$issues[rownames(data$issues) %in% c(18:19, 35, 47:48, 52:53, 55:57), ], - "2016-07-12 16:06:10-2016-07-12 16:06:32" = data$issues[rownames(data$issues) == 27, ], - "2016-07-12 16:06:20-2016-07-12 16:06:33" = data$issues[rownames(data$issues) == 27, ] + "2016-07-12 15:58:59-2016-07-12 16:06:10" = data$issues[rownames(data$issues) %in% c(1:3, 12, 21:25, 29:30, 32:34, 54:55), ], + "2016-07-12 16:00:45-2016-07-12 16:06:20" = data$issues[rownames(data$issues) %in% c(12, 24:25, 29:30, 32:34, 54:55), ], + "2016-07-12 16:06:10-2016-07-12 16:06:32" = data$issues[rownames(data$issues) == 4, ], + "2016-07-12 16:06:20-2016-07-12 16:06:33" = data$issues[rownames(data$issues) == 4, ] ), mails = list( ## comments indicate row names when pasta is not configured @@ -798,7 +802,7 @@ patrick::with_parameters_test_that("Split a data object activity-based (activity "2016-07-12 15:58:59-2016-07-12 16:06:33" = data$commit.messages ), issues = list( - "2016-07-12 15:58:59-2016-07-12 16:06:33" = data$issues[rownames(data$issues) %in% c(18:19, 24:27, 35, 44:48, 52:53, 55:57), ] + "2016-07-12 15:58:59-2016-07-12 16:06:33" = data$issues[rownames(data$issues) %in% c(1:4, 12, 21:25, 29:30, 32:34, 54:55), ] ), mails = list( "2016-07-12 15:58:59-2016-07-12 16:06:33" = data$mails[15:16, ] # when pasta is not configured: rownames(data$mails) %in% 16:17 @@ -832,6 +836,7 @@ patrick::with_parameters_test_that("Split a data object activity-based (activity ## configuration objects proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, ARTIFACT) + proj.conf$update.value("issues.from.source", c("jira", "github")) proj.conf$update.value("issues.only.comments", FALSE) proj.conf$update.values(list(pasta = test.pasta, synchronicity = test.synchronicity)) net.conf = NetworkConf$new() @@ -912,10 +917,10 @@ patrick::with_parameters_test_that("Split a data object activity-based (activity "2016-07-12 16:06:32-2016-07-12 16:06:33" = data$commit.messages ), issues = list( - "2016-07-12 15:58:59-2016-07-12 16:06:10" = data$issues[rownames(data$issues) %in% c(18:19, 24:26, 35, 44:48, 52:53, 55:57), ], - "2016-07-12 16:00:45-2016-07-12 16:06:20" = data$issues[rownames(data$issues) %in% c(18:19, 35, 47:48, 52:53, 55:57), ], - "2016-07-12 16:06:10-2016-07-12 16:06:32" = data$issues[rownames(data$issues) == 27, ], - "2016-07-12 16:06:20-2016-07-12 16:06:32" = data$issues[rownames(data$issues) == 27, ], + "2016-07-12 15:58:59-2016-07-12 16:06:10" = data$issues[rownames(data$issues) %in% c(1:3, 12, 21:25, 29:30, 32:34, 54:55), ], + "2016-07-12 16:00:45-2016-07-12 16:06:20" = data$issues[rownames(data$issues) %in% c(12, 24:25, 29:30, 32:34, 54:55), ], + "2016-07-12 16:06:10-2016-07-12 16:06:32" = data$issues[rownames(data$issues) == 4, ], + "2016-07-12 16:06:20-2016-07-12 16:06:32" = data$issues[rownames(data$issues) == 4, ], "2016-07-12 16:06:32-2016-07-12 16:06:33" = data$issues[0, ] ), mails = list( @@ -969,6 +974,7 @@ patrick::with_parameters_test_that("Split a data object activity-based (activity ## configuration objects proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, ARTIFACT) + proj.conf$update.value("issues.from.source", c("jira", "github")) proj.conf$update.value("issues.only.comments", FALSE) proj.conf$update.values(list(pasta = test.pasta, synchronicity = test.synchronicity)) net.conf = NetworkConf$new() @@ -1061,10 +1067,10 @@ patrick::with_parameters_test_that("Split a data object activity-based (activity "2010-07-12 12:05:34-2010-07-12 12:05:42" = data$issues[0, ], "2010-07-12 12:05:41-2010-07-12 12:05:44" = data$issues[0, ], "2010-07-12 12:05:42-2010-07-12 12:05:45" = data$issues[0, ], - "2010-07-12 12:05:44-2016-07-12 15:58:40" = data$issues[rownames(data$issues) %in% c(1:13, 33:34, 50:51), ], - "2010-07-12 12:05:45-2016-07-12 15:58:50" = data$issues[rownames(data$issues) %in% c(1:13, 33:34, 50:51), ], - "2016-07-12 15:58:40-2016-07-12 16:05:37" = data$issues[rownames(data$issues) %in% c(18:19, 24:26, 35, 44:47, 52:53, 55:57), ], - "2016-07-12 15:58:50-2016-07-12 16:05:38" = data$issues[rownames(data$issues) %in% c(18:19, 24:26, 35, 44:47, 52:53, 55:57), ] + "2010-07-12 12:05:44-2016-07-12 15:58:40" = data$issues[rownames(data$issues) %in% c(10:11, 27:28, 37:49), ], + "2010-07-12 12:05:45-2016-07-12 15:58:50" = data$issues[rownames(data$issues) %in% c(10:11, 27:28, 37:49), ], + "2016-07-12 15:58:40-2016-07-12 16:05:37" = data$issues[rownames(data$issues) %in% c(1:3, 12, 21:24, 29:30, 32:34, 54:55), ], + "2016-07-12 15:58:50-2016-07-12 16:05:38" = data$issues[rownames(data$issues) %in% c(1:3, 12, 21:24, 29:30, 32:34, 54:55), ] ), mails = list( ## comments indicate row names when pasta is not configured @@ -1164,7 +1170,7 @@ patrick::with_parameters_test_that("Split a data object activity-based (activity "2004-10-09 18:38:13-2016-07-12 16:05:38" = data$commit.messages ), issues = list( - "2004-10-09 18:38:13-2016-07-12 16:05:38" = data$issues[rownames(data$issues) %in% c(1:13, 18:19, 24:26, 33:35, 44:47, 50:53, 55:57), ] + "2004-10-09 18:38:13-2016-07-12 16:05:38" = data$issues[rownames(data$issues) %in% c(1:3, 10:12, 21:24, 27:30, 32:34, 37:49, 54:55), ] ), mails = list( "2004-10-09 18:38:13-2016-07-12 16:05:38" = data$mails @@ -1203,6 +1209,7 @@ patrick::with_parameters_test_that("Split a data object activity-based (activity ## configuration objects proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, ARTIFACT) + proj.conf$update.value("issues.from.source", c("jira", "github")) proj.conf$update.value("issues.only.comments", FALSE) proj.conf$update.values(list(pasta = test.pasta, synchronicity = test.synchronicity)) net.conf = NetworkConf$new() @@ -1289,16 +1296,16 @@ patrick::with_parameters_test_that("Split a data object activity-based (activity "2016-10-05 16:45:09-2017-05-23 12:32:40" = data$commit.messages ), issues = list( - "2013-04-21 23:52:09-2013-05-25 06:22:23" = data$issues[rownames(data$issues) %in% 1:10, ], - "2013-05-06 01:04:34-2016-07-12 15:30:02" = data$issues[rownames(data$issues) %in% c(6:13, 50:51), ], - "2013-05-25 06:22:23-2016-07-12 15:59:59" = data$issues[rownames(data$issues) %in% c(11:13, 24:26, 33:34, 44:45, 50:51), ], - "2016-07-12 15:30:02-2016-07-12 16:02:02" = data$issues[rownames(data$issues) %in% c(18, 24:26, 33:34, 44:47, 55), ], - "2016-07-12 15:59:59-2016-07-12 16:06:30" = data$issues[rownames(data$issues) %in% c(18:19, 35, 46:48, 52:53, 55:57), ], - "2016-07-12 16:02:02-2016-07-27 20:12:08" = data$issues[rownames(data$issues) %in% c(19:21, 27, 35, 48:49, 52:53, 56:57), ], - "2016-07-12 16:06:30-2016-08-07 15:37:02" = data$issues[rownames(data$issues) %in% c(20:23, 27:28, 31:32, 49, 54), ], - "2016-07-27 20:12:08-2016-10-05 16:45:09" = data$issues[rownames(data$issues) %in% c(22:23, 28:29, 31:32, 36:37, 43, 54), ], - "2016-08-07 15:37:02-2017-05-23 12:31:34" = data$issues[rownames(data$issues) %in% c(14:17, 29:30, 36:40, 43), ], - "2016-10-05 16:45:09-2017-05-23 12:32:40" = data$issues[rownames(data$issues) %in% c(14:17, 30, 38:42), ] + "2013-04-21 23:52:09-2013-05-25 06:22:23" = data$issues[rownames(data$issues) %in% 37:46, ], + "2013-05-06 01:04:34-2016-07-12 15:30:02" = data$issues[rownames(data$issues) %in% c(27:28, 42:49), ], + "2013-05-25 06:22:23-2016-07-12 15:59:59" = data$issues[rownames(data$issues) %in% c(1:3, 10:11, 21:22, 27:28, 47:49), ], + "2016-07-12 15:30:02-2016-07-12 16:02:02" = data$issues[rownames(data$issues) %in% c(1:3, 10:11, 21:24, 32, 54), ], + "2016-07-12 15:59:59-2016-07-12 16:06:30" = data$issues[rownames(data$issues) %in% c(12, 23:25, 29:30, 32:34, 54:55), ], + "2016-07-12 16:02:02-2016-07-27 20:12:08" = data$issues[rownames(data$issues) %in% c(4, 12, 25:26, 29:30, 33:34, 55:57), ], + "2016-07-12 16:06:30-2016-08-07 15:37:02" = data$issues[rownames(data$issues) %in% c(4:5, 8:9, 26, 31, 56:59), ], + "2016-07-27 20:12:08-2016-10-05 16:45:09" = data$issues[rownames(data$issues) %in% c(5:6, 8:9, 13:14, 20, 31, 58:59), ], + "2016-08-07 15:37:02-2017-05-23 12:31:34" = data$issues[rownames(data$issues) %in% c(6:7, 13:17, 20, 50:53), ], + "2016-10-05 16:45:09-2017-05-23 12:32:40" = data$issues[rownames(data$issues) %in% c(7, 15:19, 50:53), ] ), mails = list( ## comments indicate row names when pasta is not configured @@ -1438,6 +1445,7 @@ patrick::with_parameters_test_that("Split a data object activity-based (number.w ## configuration objects proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, ARTIFACT) + proj.conf$update.value("issues.from.source", c("jira", "github")) proj.conf$update.value("issues.only.comments", FALSE) proj.conf$update.values(list(pasta = test.pasta, synchronicity = test.synchronicity)) net.conf = NetworkConf$new() @@ -1499,8 +1507,8 @@ patrick::with_parameters_test_that("Split a data object activity-based (number.w "2016-07-12 16:06:20-2016-07-12 16:06:33" = data$commit.messages ), issues = list( - "2016-07-12 15:58:59-2016-07-12 16:06:20" = data$issues[rownames(data$issues) %in% c(18:19, 24:26, 35, 44:48, 52:53, 55:57), ], - "2016-07-12 16:06:20-2016-07-12 16:06:33" = data$issues[rownames(data$issues) == 27, ] + "2016-07-12 15:58:59-2016-07-12 16:06:20" = data$issues[rownames(data$issues) %in% c(1:3, 12, 21:25, 29:30, 32:34, 54:55), ], + "2016-07-12 16:06:20-2016-07-12 16:06:33" = data$issues[rownames(data$issues) == 4, ] ), mails = list( "2016-07-12 15:58:59-2016-07-12 16:06:20" = data$mails[15:16, ], # when pasta is not configured: rownames(data$mails) %in% 16:17 @@ -1565,6 +1573,7 @@ patrick::with_parameters_test_that("Split a data object activity-based (number.w ## configuration objects proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, ARTIFACT) + proj.conf$update.value("issues.from.source", c("jira", "github")) proj.conf$update.value("issues.only.comments", FALSE) proj.conf$update.values(list(pasta = test.pasta, synchronicity = test.synchronicity)) net.conf = NetworkConf$new() @@ -1627,7 +1636,7 @@ patrick::with_parameters_test_that("Split a data object activity-based (number.w ), issues = list( "2004-10-09 18:38:13-2010-07-12 12:05:43" = data$issues[0, ], - "2010-07-12 12:05:43-2016-07-12 16:05:38" = data$issues[rownames(data$issues) %in% c(1:13, 18:19, 24:26, 33:35, 44:47, 50:53, 55:57), ] + "2010-07-12 12:05:43-2016-07-12 16:05:38" = data$issues[rownames(data$issues) %in% c(1:3, 10:12, 21:24, 27:30, 32:34, 37:49, 54:55), ] ), mails = list( ## comments indicate row names when pasta is not configured @@ -1693,6 +1702,7 @@ patrick::with_parameters_test_that("Split a data object activity-based (number.w ## configuration objects proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, ARTIFACT) + proj.conf$update.value("issues.from.source", c("jira", "github")) proj.conf$update.value("issues.only.comments", FALSE) proj.conf$update.values(list(pasta = test.pasta, synchronicity = test.synchronicity)) net.conf = NetworkConf$new() @@ -1754,8 +1764,8 @@ patrick::with_parameters_test_that("Split a data object activity-based (number.w "2016-07-12 16:03:59-2017-05-23 12:32:40" = data$commit.messages ), issues = list( - "2013-04-21 23:52:09-2016-07-12 16:03:59" = data$issues[rownames(data$issues) %in% c(1:13, 18:19, 24:26, 33:34, 44:47, 50:51, 55:57), ], - "2016-07-12 16:03:59-2017-05-23 12:32:40" = data$issues[rownames(data$issues) %in% c(14:17, 20:23, 28:31, 32, 35:42, 27, 43, 48:49, 52:54), ] + "2013-04-21 23:52:09-2016-07-12 16:03:59" = data$issues[rownames(data$issues) %in% c(1:3, 10:11, 21:24, 27:28, 32:34, 37:49, 54:55), ], + "2016-07-12 16:03:59-2017-05-23 12:32:40" = data$issues[rownames(data$issues) %in% c(4:9, 12:20, 25:26, 29:31, 50:53, 56:59), ] ), mails = list( ## comments indicate row names when pasta is not configured diff --git a/tests/test-split-data-time-based.R b/tests/test-split-data-time-based.R index f4488bf1..40381c6c 100644 --- a/tests/test-split-data-time-based.R +++ b/tests/test-split-data-time-based.R @@ -53,6 +53,7 @@ patrick::with_parameters_test_that("Split a data object time-based (split.basis ## configuration objects proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, ARTIFACT) + proj.conf$update.value("issues.from.source", c("jira", "github")) proj.conf$update.value("issues.only.comments", FALSE) proj.conf$update.values(list(pasta = test.pasta, synchronicity = test.synchronicity)) net.conf = NetworkConf$new() @@ -115,9 +116,9 @@ patrick::with_parameters_test_that("Split a data object time-based (split.basis "2016-07-12 16:04:59-2016-07-12 16:06:33" = data$commit.messages ), issues = list( - "2016-07-12 15:58:59-2016-07-12 16:01:59" = data$issues[rownames(data$issues) %in% c(18, 24:26, 44:47), ], - "2016-07-12 16:01:59-2016-07-12 16:04:59" = data$issues[rownames(data$issues) %in% c(19, 35, 55:57), ], - "2016-07-12 16:04:59-2016-07-12 16:06:33" = data$issues[rownames(data$issues) %in% c(27, 48, 52:53), ] + "2016-07-12 15:58:59-2016-07-12 16:01:59" = data$issues[rownames(data$issues) %in% c(1:3, 21:24, 54), ], + "2016-07-12 16:01:59-2016-07-12 16:04:59" = data$issues[rownames(data$issues) %in% c(12, 32:34, 55), ], + "2016-07-12 16:04:59-2016-07-12 16:06:33" = data$issues[rownames(data$issues) %in% c(4, 25, 29:30), ] ), mails = list( ## comments indicate row names when pasta is not configured @@ -164,6 +165,7 @@ patrick::with_parameters_test_that("Split a data object time-based (split.basis ## configuration objects proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, ARTIFACT) + proj.conf$update.value("issues.from.source", c("jira", "github")) proj.conf$update.value("issues.only.comments", FALSE) proj.conf$update.values(list(pasta = test.pasta, synchronicity = test.synchronicity)) net.conf = NetworkConf$new() @@ -232,8 +234,8 @@ patrick::with_parameters_test_that("Split a data object time-based (split.basis issues = list( "2004-10-09 18:38:13-2007-10-10 12:38:13" = data$issues[0, ], "2007-10-10 12:38:13-2010-10-10 06:38:13" = data$issues[0, ], - "2010-10-10 06:38:13-2013-10-10 00:38:13" = data$issues[rownames(data$issues) %in% 1:13, ], - "2013-10-10 00:38:13-2016-07-12 16:05:38" = data$issues[rownames(data$issues) %in% c(18:19, 24:26, 33:35, 44:47, 50:53, 55:57), ] + "2010-10-10 06:38:13-2013-10-10 00:38:13" = data$issues[rownames(data$issues) %in% 37:49, ], + "2013-10-10 00:38:13-2016-07-12 16:05:38" = data$issues[rownames(data$issues) %in% c(1:3, 10:12, 21:24, 27:30, 32:34, 54:55), ] ), mails = list( ## comments indicate row names when pasta is not configured @@ -282,6 +284,7 @@ patrick::with_parameters_test_that("Split a data object time-based (split.basis ## configuration objects proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, ARTIFACT) + proj.conf$update.value("issues.from.source", c("jira", "github")) proj.conf$update.value("issues.only.comments", FALSE) proj.conf$update.values(list(pasta = test.pasta, synchronicity = test.synchronicity)) net.conf = NetworkConf$new() @@ -344,9 +347,9 @@ patrick::with_parameters_test_that("Split a data object time-based (split.basis "2017-04-21 23:52:09-2017-05-23 12:32:40" = data$commit.messages ), issues = list( - "2013-04-21 23:52:09-2015-04-22 11:52:09" = data$issues[rownames(data$issues) %in% 1:13, ], - "2015-04-22 11:52:09-2017-04-21 23:52:09" = data$issues[rownames(data$issues) %in% c(18:40, 43:57), ], - "2017-04-21 23:52:09-2017-05-23 12:32:40" = data$issues[rownames(data$issues) %in% c(14:17, 41:42), ] + "2013-04-21 23:52:09-2015-04-22 11:52:09" = data$issues[rownames(data$issues) %in% 37:49, ], + "2015-04-22 11:52:09-2017-04-21 23:52:09" = data$issues[rownames(data$issues) %in% c(1:17, 20:34, 54:59), ], + "2017-04-21 23:52:09-2017-05-23 12:32:40" = data$issues[rownames(data$issues) %in% c(18:19, 50:53), ] ), mails = list( "2013-04-21 23:52:09-2015-04-22 11:52:09" = data$mails[0, ], @@ -393,6 +396,7 @@ patrick::with_parameters_test_that("Split a data object time-based (split.basis ## configuration objects proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, ARTIFACT) + proj.conf$update.value("issues.from.source", c("jira", "github")) proj.conf$update.value("issues.only.comments", FALSE) proj.conf$update.values(list(pasta = test.pasta, synchronicity = test.synchronicity)) net.conf = NetworkConf$new() @@ -462,11 +466,11 @@ patrick::with_parameters_test_that("Split a data object time-based (split.basis "2016-07-12 16:04:59-2016-07-12 16:06:33" = data$commit.messages ), issues = list( - "2016-07-12 15:58:59-2016-07-12 16:01:59" = data$issues[rownames(data$issues) %in% c(18, 24:26, 44:47), ], - "2016-07-12 16:00:29-2016-07-12 16:03:29" = data$issues[rownames(data$issues) %in% c(18:19, 47, 55:57), ], - "2016-07-12 16:01:59-2016-07-12 16:04:59" = data$issues[rownames(data$issues) %in% c(19, 35, 55:57), ], - "2016-07-12 16:03:29-2016-07-12 16:06:29" = data$issues[rownames(data$issues) %in% c(35, 48, 52:53), ], - "2016-07-12 16:04:59-2016-07-12 16:06:33" = data$issues[rownames(data$issues) %in% c(27, 48, 52:53), ] + "2016-07-12 15:58:59-2016-07-12 16:01:59" = data$issues[rownames(data$issues) %in% c(1:3, 21:24, 54), ], + "2016-07-12 16:00:29-2016-07-12 16:03:29" = data$issues[rownames(data$issues) %in% c(24, 32:34, 54:55), ], + "2016-07-12 16:01:59-2016-07-12 16:04:59" = data$issues[rownames(data$issues) %in% c(12, 32:34, 55), ], + "2016-07-12 16:03:29-2016-07-12 16:06:29" = data$issues[rownames(data$issues) %in% c(12, 25, 29:30), ], + "2016-07-12 16:04:59-2016-07-12 16:06:33" = data$issues[rownames(data$issues) %in% c(4, 25, 29:30), ] ), mails = list( ## comments indicate row names when pasta is not configured @@ -518,6 +522,7 @@ patrick::with_parameters_test_that("Split a data object time-based (split.basis ## configuration objects proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, ARTIFACT) + proj.conf$update.value("issues.from.source", c("jira", "github")) proj.conf$update.value("issues.only.comments", FALSE) proj.conf$update.values(list(pasta = test.pasta, synchronicity = test.synchronicity)) net.conf = NetworkConf$new() @@ -600,9 +605,9 @@ patrick::with_parameters_test_that("Split a data object time-based (split.basis "2006-04-10 15:38:13-2009-04-10 09:38:13" = data$issues[0, ], "2007-10-10 12:38:13-2010-10-10 06:38:13" = data$issues[0, ], "2009-04-10 09:38:13-2012-04-10 03:38:13" = data$issues[0, ], - "2010-10-10 06:38:13-2013-10-10 00:38:13" = data$issues[rownames(data$issues) %in% 1:13, ], - "2012-04-10 03:38:13-2015-04-10 21:38:13" = data$issues[rownames(data$issues) %in% 1:13, ], - "2013-10-10 00:38:13-2016-07-12 16:05:38" = data$issues[rownames(data$issues) %in% c(18:19, 24:26, 33:35, 44:47, 50:53, 55:57), ] + "2010-10-10 06:38:13-2013-10-10 00:38:13" = data$issues[rownames(data$issues) %in% 37:49, ], + "2012-04-10 03:38:13-2015-04-10 21:38:13" = data$issues[rownames(data$issues) %in% 37:49, ], + "2013-10-10 00:38:13-2016-07-12 16:05:38" = data$issues[rownames(data$issues) %in% c(1:3, 10:12, 21:24, 27:30, 32:34, 54:55), ] ), mails = list( ## comments indicate row names when pasta is not configured @@ -660,6 +665,7 @@ patrick::with_parameters_test_that("Split a data object time-based (split.basis ## configuration objects proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, ARTIFACT) + proj.conf$update.value("issues.from.source", c("jira", "github")) proj.conf$update.value("issues.only.comments", FALSE) proj.conf$update.values(list(pasta = test.pasta, synchronicity = test.synchronicity)) net.conf = NetworkConf$new() @@ -727,10 +733,10 @@ patrick::with_parameters_test_that("Split a data object time-based (split.basis "2016-04-21 17:52:09-2017-05-23 12:32:40" = data$commit.messages ), issues = list( - "2013-04-21 23:52:09-2015-04-22 11:52:09" = data$issues[rownames(data$issues) %in% 1:13, ], + "2013-04-21 23:52:09-2015-04-22 11:52:09" = data$issues[rownames(data$issues) %in% 37:49, ], "2014-04-22 05:52:09-2016-04-21 17:52:09" = data$issues[0, ], - "2015-04-22 11:52:09-2017-04-21 23:52:09" = data$issues[rownames(data$issues) %in% c(18:40, 43:57), ], - "2016-04-21 17:52:09-2017-05-23 12:32:40" = data$issues[rownames(data$issues) %in% c(14:57), ] + "2015-04-22 11:52:09-2017-04-21 23:52:09" = data$issues[rownames(data$issues) %in% c(1:17, 20:34, 54:59), ], + "2016-04-21 17:52:09-2017-05-23 12:32:40" = data$issues[rownames(data$issues) %in% c(1:34, 50:59), ] ), mails = list( ## comments indicate row names when pasta is not configured @@ -779,6 +785,7 @@ patrick::with_parameters_test_that("Split a data object time-based (split.basis ## configuration objects proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, ARTIFACT) + proj.conf$update.value("issues.from.source", c("jira", "github")) net.conf = NetworkConf$new() ## data object @@ -839,6 +846,7 @@ patrick::with_parameters_test_that("Split a data object time-based (bins = ... ) ## configuration objects proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, ARTIFACT) + proj.conf$update.value("issues.from.source", c("jira", "github")) proj.conf$update.value("issues.only.comments", FALSE) proj.conf$update.values(list(pasta = test.pasta, synchronicity = test.synchronicity)) net.conf = NetworkConf$new() @@ -896,7 +904,7 @@ patrick::with_parameters_test_that("Split a data object time-based (bins = ... ) "2016-01-01 00:00:00-2016-12-31 23:59:59" = data$commit.messages ), issues = list( - "2016-01-01 00:00:00-2016-12-31 23:59:59" = data$issues[rownames(data$issues) %in% c(18:40, 43:57), ] + "2016-01-01 00:00:00-2016-12-31 23:59:59" = data$issues[rownames(data$issues) %in% c(1:17, 20:34, 54:59), ] ), mails = list( "2016-01-01 00:00:00-2016-12-31 23:59:59" = data$mails[rownames(data$mails) %in% 13:17, ] @@ -934,6 +942,7 @@ patrick::with_parameters_test_that("Split a data object time-based (bins = ... , ## configuration objects proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, ARTIFACT) + proj.conf$update.value("issues.from.source", c("jira", "github")) proj.conf$update.value("issues.only.comments", FALSE) proj.conf$update.values(list(pasta = test.pasta, synchronicity = test.synchronicity)) net.conf = NetworkConf$new() @@ -994,8 +1003,8 @@ patrick::with_parameters_test_that("Split a data object time-based (bins = ... , "2016-12-31 23:59:59-2017-06-03 03:03:03" = data$commit.messages ), issues = list( - "2016-01-01 00:00:00-2016-12-31 23:59:59" = data$issues[rownames(data$issues) %in% c(18:40, 43:57), ], - "2016-12-31 23:59:59-2017-06-03 03:03:03" = data$issues[rownames(data$issues) %in% c(14:17, 41:42), ] + "2016-01-01 00:00:00-2016-12-31 23:59:59" = data$issues[rownames(data$issues) %in% c(1:17, 20:34, 54:59), ], + "2016-12-31 23:59:59-2017-06-03 03:03:03" = data$issues[rownames(data$issues) %in% c(18:19, 50:53), ] ), mails = list( "2016-01-01 00:00:00-2016-12-31 23:59:59" = data$mails[rownames(data$mails) %in% 13:17, ], @@ -1075,6 +1084,7 @@ patrick::with_parameters_test_that("Split a data object time-based using custom ## configuration objects proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, ARTIFACT) + proj.conf$update.value("issues.from.source", c("jira", "github")) proj.conf$update.value("issues.only.comments", FALSE) proj.conf$update.values(list(pasta = test.pasta, synchronicity = test.synchronicity)) proj.conf$update.value("custom.event.timestamps.file", "custom-events.list") @@ -1141,10 +1151,10 @@ patrick::with_parameters_test_that("Split a data object time-based using custom "2016-08-08 00:00:00-2016-10-05 09:00:00" = data$commit.messages ), issues = list( - "2016-07-12 15:00:00-2016-07-12 16:00:00" = data$issues[rownames(data$issues) %in% c(24:26, 33:34, 44:46), ], - "2016-07-12 16:00:00-2016-07-12 16:05:00" = data$issues[rownames(data$issues) %in% c(18:19, 35, 47, 52:53, 55:57), ], - "2016-07-12 16:05:00-2016-08-08 00:00:00" = data$issues[rownames(data$issues) %in% c(20:23, 27:28, 31:32, 43, 48:49, 54), ], - "2016-08-08 00:00:00-2016-10-05 09:00:00" = data$issues[rownames(data$issues) %in% c(29, 36), ] + "2016-07-12 15:00:00-2016-07-12 16:00:00" = data$issues[rownames(data$issues) %in% c(1:3, 10:11, 21:23), ], + "2016-07-12 16:00:00-2016-07-12 16:05:00" = data$issues[rownames(data$issues) %in% c(12, 24, 29:30, 32:34, 54:55), ], + "2016-07-12 16:05:00-2016-08-08 00:00:00" = data$issues[rownames(data$issues) %in% c(4:5, 8:9, 20, 25:26, 31, 56:59), ], + "2016-08-08 00:00:00-2016-10-05 09:00:00" = data$issues[rownames(data$issues) %in% c(6, 13), ] ), mails = list( ## comments indicate rownames when pasta is not configured @@ -1315,6 +1325,7 @@ patrick::with_parameters_test_that("Split a data object time-based with equal-si ## configuration objects proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, ARTIFACT) + proj.conf$update.value("issues.from.source", c("jira", "github")) proj.conf$update.value("issues.only.comments", FALSE) proj.conf$update.values(list(pasta = test.pasta, synchronicity = test.synchronicity)) net.conf = NetworkConf$new() @@ -1377,9 +1388,9 @@ patrick::with_parameters_test_that("Split a data object time-based with equal-si "2016-07-12 16:04:01-2016-07-12 16:06:33" = data$commit.messages ), issues = list( - "2016-07-12 15:58:59-2016-07-12 16:01:30" = data$issues[rownames(data$issues) %in% c(24:26, 44:47), ], - "2016-07-12 16:01:30-2016-07-12 16:04:01" = data$issues[rownames(data$issues) %in% c(18:19, 35, 55:57), ], - "2016-07-12 16:04:01-2016-07-12 16:06:33" = data$issues[rownames(data$issues) %in% c(27, 48, 52:53), ] + "2016-07-12 15:58:59-2016-07-12 16:01:30" = data$issues[rownames(data$issues) %in% c(1:3, 21:24), ], + "2016-07-12 16:01:30-2016-07-12 16:04:01" = data$issues[rownames(data$issues) %in% c(12, 32:34, 54:55), ], + "2016-07-12 16:04:01-2016-07-12 16:06:33" = data$issues[rownames(data$issues) %in% c(4, 25, 29:30), ] ), mails = list( "2016-07-12 15:58:59-2016-07-12 16:01:30" = data$mails[0, ], @@ -1425,6 +1436,7 @@ patrick::with_parameters_test_that("Split a data object time-based with equal-si ## configuration objects proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, ARTIFACT) + proj.conf$update.value("issues.from.source", c("jira", "github")) proj.conf$update.value("issues.only.comments", FALSE) proj.conf$update.values(list(pasta = test.pasta, synchronicity = test.synchronicity)) net.conf = NetworkConf$new() @@ -1493,8 +1505,8 @@ patrick::with_parameters_test_that("Split a data object time-based with equal-si issues = list( "2004-10-09 18:38:13-2007-09-18 06:00:04" = data$issues[0, ], "2007-09-18 06:00:04-2010-08-26 17:21:55" = data$issues[0, ], - "2010-08-26 17:21:55-2013-08-04 04:43:46" = data$issues[rownames(data$issues) %in% 1:13, ], - "2013-08-04 04:43:46-2016-07-12 16:05:38" = data$issues[rownames(data$issues) %in% c(18:19, 24:26, 33:35, 44:47, 50:53, 55:57), ] + "2010-08-26 17:21:55-2013-08-04 04:43:46" = data$issues[rownames(data$issues) %in% 37:49, ], + "2013-08-04 04:43:46-2016-07-12 16:05:38" = data$issues[rownames(data$issues) %in% c(1:3, 10:12, 21:24, 27:30, 32:34, 54:55), ] ), mails = list( ## comments indicate row names when pasta is not configured @@ -1543,6 +1555,7 @@ patrick::with_parameters_test_that("Split a data object time-based with equal-si ## configuration objects proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, ARTIFACT) + proj.conf$update.value("issues.from.source", c("github", "jira")) proj.conf$update.value("issues.only.comments", FALSE) proj.conf$update.values(list(pasta = test.pasta, synchronicity = test.synchronicity)) net.conf = NetworkConf$new() @@ -1605,9 +1618,9 @@ patrick::with_parameters_test_that("Split a data object time-based with equal-si "2016-01-12 00:19:09-2017-05-23 12:32:40" = data$commit.messages ), issues = list( - "2013-04-21 23:52:09-2014-09-01 12:05:39" = data$issues[rownames(data$issues) %in% 1:13, ], + "2013-04-21 23:52:09-2014-09-01 12:05:39" = data$issues[rownames(data$issues) %in% 37:49, ], "2014-09-01 12:05:39-2016-01-12 00:19:09" = data$issues[0, ], - "2016-01-12 00:19:09-2017-05-23 12:32:40" = data$issues[rownames(data$issues) %in% 14:57, ] + "2016-01-12 00:19:09-2017-05-23 12:32:40" = data$issues[rownames(data$issues) %in% c(1:34, 50:59), ] ), mails = list( "2013-04-21 23:52:09-2014-09-01 12:05:39" = data$mails[0, ], From c8e6f45111e487fadbe7f0a13c7595eb23f3af6e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maximilian=20L=C3=B6ffler?= Date: Wed, 26 Jun 2024 12:51:36 +0200 Subject: [PATCH 058/130] Remove support for R version 3.6 from CI and install script MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Deprecating 3.6 was decided in PR #264 because a subpackage of testthat now depends on R versions above 4.0. Signed-off-by: Maximilian Löffler --- .github/workflows/pull_request.yml | 4 ++-- install.R | 4 +--- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 3ddb30f4..7be58b2a 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -11,7 +11,7 @@ ## with this program; if not, write to the Free Software Foundation, Inc., ## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. ## -## Copyright 2023-2024 by Maximilian Löffler +## Copyright 2023-2024 by Maximilian Löffler ## Copyright 2024 by Thomas Bock ## All Rights Reserved. @@ -37,7 +37,7 @@ jobs: strategy: fail-fast: false matrix: - r-version: ['3.6', '4.0', '4.1', '4.2', '4.3', 'latest'] + r-version: ['4.0', '4.1', '4.2', '4.3', '4.4', 'latest'] steps: - name: Checkout Repo diff --git a/install.R b/install.R index cabc8352..1af5c35a 100644 --- a/install.R +++ b/install.R @@ -78,9 +78,7 @@ if (length(p) > 0) { Matrix.version = installed.packages()[rownames(installed.packages()) == "Matrix", "Version"] if (compareVersion(Matrix.version, "1.3.0") == -1) { print("WARNING: Matrix version 1.3.0 or higher is necessary for using coronet. Re-install package Matrix...") - #install.packages("Matrix", dependencies = NA, verbose = TRUE, quiet = TRUE) - matrix.1.3.4.url = "https://cran.r-project.org/src/contrib/Archive/Matrix/Matrix_1.3-4.tar.gz" - install.packages(matrix.1.3.4.url, repos = NULL, dependencies = NA, verbose = TRUE, quiet = TRUE) + install.packages("Matrix", dependencies = NA, verbose = TRUE, quiet = TRUE) Matrix.version = installed.packages()[rownames(installed.packages()) == "Matrix", "Version"] if (compareVersion(Matrix.version, "1.3.0") == -1) { print("WARNING: Re-installation of package Matrix did not end up in the necessary package version.") From fb3f5474259d4a88f4ff545691cca9d1ccde90e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maximilian=20L=C3=B6ffler?= Date: Wed, 26 Jun 2024 16:44:36 +0200 Subject: [PATCH 059/130] Adjust minimal and recommended R version to exclude 3.6 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Maximilian Löffler --- README.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index e8bc0877..92594def 100644 --- a/README.md +++ b/README.md @@ -59,14 +59,14 @@ While using the package, we require the following infrastructure. #### [`R`](https://www.r-project.org/) -Minimum requirement is `R` version `3.4.4`. Hence, later `R` versions also work. (Earlier `R` versions beginning from version `3.3.1` on should also work, but some packages are not available any more for these versions, so we do not test them any more in our CI pipeline.) +Minimum requirement is `R` version `4.0.5`. Hence, later `R` versions also work. (Earlier `R` versions beginning from version `3.3.1` on should also work, but some packages are not available any more for these versions, so we do not test them any more in our CI pipeline.) -We currently *recommend* `R` version `4.1.1` or `3.6.3` for reliability reasons and `packrat` compatibility, but also later `R` versions should work (and are tested using our CI script). +We currently *recommend* `R` version `4.1.1` or `4.3.0` for reliability reasons and `packrat` compatibility, but also later `R` versions should work (and are tested using our CI script). #### [`packrat`](http://rstudio.github.io/packrat/) (recommended) The local package manager of `R` enables the user to store all needed `R` packages for this repository inside the repository itself. -All `R` tools and IDEs should provide a more sophisticated interface for the interaction with `packrat`([RStudio](https://www.rstudio.com/) does). +All `R` tools and IDEs should provide a more sophisticated interface for the interaction with `packrat`([RStudio](https://www.rstudio.com/) does). #### Folder structure of the input data @@ -141,9 +141,9 @@ Alternatively, you can run `Rscript install.R` to install the packages. - `viridis`: For plotting of networks with nice colors - `jsonlite`: For parsing the issue data - `rTensor`: For calculating EDCPTD centrality -- `Matrix`: For sparse matrix representation of large adjacency matrices +- `Matrix`: For sparse matrix representation of large adjacency matrices (package version `1.3.0` or higher is mandatory) - `fastmap`: For fast implementation of a map -- `purrr`: For fast implementtion of a mapping function +- `purrr`: For fast implementation of a mapping function ### Submodule From 8bcbc81db521877908d2e5c2989082ed672f2a3b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maximilian=20L=C3=B6ffler?= Date: Tue, 2 Jul 2024 08:26:13 +0200 Subject: [PATCH 060/130] Improve consistency of 'issues.from.source' parameter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Maximilian Löffler --- README.md | 2 +- util-conf.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 92594def..86b2671c 100644 --- a/README.md +++ b/README.md @@ -574,7 +574,7 @@ There is no way to update the entries, except for the revision-based parameters. * [*`TRUE`*, `FALSE`] - `issues.from.source` * Choose from which sources the issue data on disk is read in. Multiple sources can be chosen. - * [*`github`, `jira`*] + * [*`github`*, `jira`] - `issues.locked` * Lock issues to prevent them from being read if not yet present when calling the getter. * [`TRUE`, *`FALSE`*] diff --git a/util-conf.R b/util-conf.R index 13b3a6f3..d1b8c0c8 100644 --- a/util-conf.R +++ b/util-conf.R @@ -424,7 +424,7 @@ ProjectConf = R6::R6Class("ProjectConf", inherit = Conf, issues.from.source = list( default = "github", type = "character", - allowed = c("jira", "github"), + allowed = c("github", "jira"), allowed.number = Inf ), issues.locked = list( From ff30f3238b1bf2539280d0d055a5d925c197c271 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maximilian=20L=C3=B6ffler?= Date: Tue, 2 Jul 2024 08:27:10 +0200 Subject: [PATCH 061/130] Add tests for uncovered functionality in 'util-misc' and 'util-networks' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Maximilian Löffler --- tests/test-misc.R | 29 +++++++++++++++++++++++++++++ tests/test-networks.R | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+) diff --git a/tests/test-misc.R b/tests/test-misc.R index 562540b5..0a2f4d0b 100644 --- a/tests/test-misc.R +++ b/tests/test-misc.R @@ -19,6 +19,35 @@ ## All Rights Reserved. +## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / +## Network data ------------------------------------------------------------ + +test_that("Get edgelist augmented with timestamps", { + + ## construct network + edges = list(list("A", "A"), list("D", "C"), list("C", "A"), list("B", "C")) + timestamps = c("2016-12-07 15:30:02", "2016-08-07 15:37:02", "2016-07-12 15:59:25", "2016-07-12 15:59:59") + network = + igraph::make_empty_graph(n = 0, directed = TRUE) + + igraph::vertices("A", "B", "C", "D") + + igraph::edges(edges, relation = "mail", date = timestamps) + + + ## get edgelist augmented with timestamps + edgelist = get.edgelist.with.timestamps(network) + + ## check correctness + expect_equal(names(edgelist), c("from", "to", "date")) + expect_equal(nrow(edgelist), 4) + lapply(1:4, function(i) { + actual = edgelist[i, ] + expect_equal(actual[["from"]], edges[[i]][[1]]) + expect_equal(actual[["to"]], edges[[i]][[2]]) + expect_equal(actual[["date"]], timestamps[i]) + }) +}) + + ## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / ## Parameter verification -------------------------------------------------- diff --git a/tests/test-networks.R b/tests/test-networks.R index cdf1634f..bda78eb0 100644 --- a/tests/test-networks.R +++ b/tests/test-networks.R @@ -271,6 +271,44 @@ test_that("Simplify multiple basic multi-relational networks", { } }) +test_that("Remove isolated vertices", { + + ## construct network + edges = c("A", "A", "D", "C", "E", "C") + network = + igraph::make_empty_graph(n = 0, directed = TRUE) + + igraph::vertices("A", "B", "C", "D", "E", "F") + + igraph::edges(edges, relation = "cochange") + + ## remove isolate vertices + network = delete.isolates(network) + + ## check correctness + expect_identical(igraph::vertex_attr(network, "name"), c("A", "C", "D", "E")) + +}) + +test_that("Remove isolated authors given a specific edge type", { + + ## construct network + edges_inter = c("A", "A", "D", "C", "E", "C") + edges_intra = c("F", "D", "A", "E", "D", "B") + network = + igraph::make_empty_graph(n = 0, directed = TRUE) + + igraph::vertices("A", "B", "C", "D", "E", "F", type = TYPE.AUTHOR) + + igraph::edges(edges_inter, relation = "cochange", type = TYPE.EDGES.INTER) + + igraph::edges(edges_intra, relation = "cochange", type = TYPE.EDGES.INTRA) + + ## remove isolate vertices + network.without.isolates.inter = delete.authors.without.specific.edges(network, specific.edge.type = TYPE.EDGES.INTER) + network.without.isolates.intra = delete.authors.without.specific.edges(network, specific.edge.type = TYPE.EDGES.INTRA) + + ## check correctness + expect_identical(igraph::vertex_attr(network.without.isolates.inter, "name"), c("A", "C", "D", "E")) + expect_identical(igraph::vertex_attr(network.without.isolates.intra, "name"), c("A", "B", "D", "E", "F")) + +}) + ## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / ## Merge ------------------------------------------------------------------- From af80551d0615a49b86e45ff596bd75941ee88f91 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maximilian=20L=C3=B6ffler?= Date: Wed, 3 Jul 2024 19:20:20 +0200 Subject: [PATCH 062/130] Test simplification of networks with graph attributes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Maximilian Löffler --- tests/test-networks.R | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/tests/test-networks.R b/tests/test-networks.R index bda78eb0..e77cecef 100644 --- a/tests/test-networks.R +++ b/tests/test-networks.R @@ -243,6 +243,11 @@ test_that("Simplify multiple basic multi-relational networks", { network.B = igraph::add_edges(network.B, c("C", "D"), type = TYPE.EDGES.INTRA, relation = "cochange") } + ## add graph attributes + network.A = igraph::set_graph_attr(network.A, "name", "network.A") + network.B = igraph::set_graph_attr(network.B, "name", "network.B") + networks = list(A = network.A, B = network.B) + network.A.expected = igraph::make_empty_graph(n = 0, directed = FALSE) + igraph::vertices("A", "B", type = TYPE.ARTIFACT, kind = "feature") + igraph::edges("A", "B", type = TYPE.EDGES.INTRA, relation = "mail") + @@ -251,7 +256,6 @@ test_that("Simplify multiple basic multi-relational networks", { igraph::vertices("C", "D", type = TYPE.AUTHOR, kind = TYPE.AUTHOR) + igraph::edges("C", "D", type = TYPE.EDGES.INTRA, relation = "mail") + igraph::edges("C", "D", type = TYPE.EDGES.INTRA, relation = "cochange") - networks = list(A = network.A, B = network.B) ## simplify networks without simplifying multiple relations into single edges networks.simplified = simplify.networks(networks, simplify.multiple.relations = FALSE) @@ -269,6 +273,10 @@ test_that("Simplify multiple basic multi-relational networks", { expect_identical(igraph::E(networks.simplified[[i]])$type[[1]], "Unipartite") expect_identical(igraph::E(networks.simplified[[i]])$relation[[1]], c("cochange", "mail")) } + + ## verify graph attributes + expect_identical(igraph::graph_attr(networks.simplified[["A"]], "name"), "network.A") + expect_identical(igraph::graph_attr(networks.simplified[["B"]], "name"), "network.B") }) test_that("Remove isolated vertices", { From 3c2f9109c36e7ec297265875001ef5f23c9f31c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maximilian=20L=C3=B6ffler?= Date: Sat, 6 Jul 2024 08:52:30 +0200 Subject: [PATCH 063/130] Update 'NEWS.md' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Maximilian Löffler --- NEWS.md | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index e1bac1c5..3d093756 100644 --- a/NEWS.md +++ b/NEWS.md @@ -11,12 +11,18 @@ - Add helper function for prefixing function names with file names in `util-read.R` (PR #252, f8ea987b138173cf0509c7910e0572d8ee1b3f1f) - Add line-based code coverage reports into CI pipeline. Coverage reports are generated by `coverage.R` (PR #262, 10cac49d005e87c3964cc61711e7f5acef749626, b3b9f4ac7a9911bd00293c68fac88e0f9033bdfb, c815d18dc6266d620a7a145493417b87ac08679e, e8093525fdaf46e54f2f7fcc6358ca7892e795e5, 32d04823e2007c63d2a43ce59bea3057327c19a7) - Add the possibility to split data time-based by multiple data sources (PR #261, 1088395f46b84028c8d7c463ca86b5dc38500c26, e1f79fc9e40cd6f41c946be42db364b2101cfe10, 0bb187fec0fd801d7634bf8d5180525770f6ab0b, 371a97ac6ebf3de4fe9360dea79d62e2ed3ef585) +- Add tests for uncovered functionality in `util-misc.R` and `util-networks.R` (PR #264, ff30f3238b1bf2539280d0d055a5d925c197c271, af80551d0615a49b86e45ff596bd75941ee88f91) ### Changed/Improved +- Change the default value for the `issues.from.source` configuration parameter. Instead of reading JIRA and GitHub issues together, which was the previous default, the new default value causes only GitHub issue data to be read. To restore the previous default behavior and read data from both issue sources, this now needs to be manually configured when needed. (PR #264, 5ff83c364f6bfc1e6ff95e9c5f1087e031c48a5d, 8c8080cb9caf115f19d9f145ad6e6c108b131a67, 8bcbc81db521877908d2e5c2989082ed672f2a3b) +- Replace deprecated `igraph` functions by their preferred alternatives (PR #264, 0df9d5bf6bafbb5d440f4c47db4ec901cf11f037) +- Deprecate support for R version 3.6 (PR #264, c8e6f45111e487fadbe7f0a13c7595eb23f3af6e, fb3f5474259d4a88f4ff545691cca9d1ccde90e3) +- Explicitly add R version 4.4 to the CI test pipeline (c8e6f45111e487fadbe7f0a13c7595eb23f3af6e) + ### Fixed -- Ensure the correct installation of a compatible `Matrix` version for R 3.6 (PR #262, cb1cf42b2136f35b7e85239d36d5f91ff05d8cd7) +- Fix the creation of edgelists for issue-based artifact-networks by correctly iterating over the issue data (PR #264, 321d85043112971c04998249c14a0677a32c9004) ## 4.4 From ab73271781e8e9a0715f784936df4b371d64c338 Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Mon, 6 May 2024 12:53:34 +0200 Subject: [PATCH 064/130] Add Config parameters and basic top-level method 'get.commit.network' will delegate calls to corresponding methods, depending on 'commit.relation' config parameter in NetworkConf Signed-off-by: Leo Sendelbach --- util-conf.R | 12 ++++++++++++ util-networks.R | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+) diff --git a/util-conf.R b/util-conf.R index d1b8c0c8..35e5303e 100644 --- a/util-conf.R +++ b/util-conf.R @@ -837,6 +837,18 @@ NetworkConf = R6::R6Class("NetworkConf", inherit = Conf, allowed = c(TRUE, FALSE), allowed.number = 1 ), + commit.relation = list( + default = "cochange", + type = "character", + allowed = c("cochange", "commit.interaction"), + allowed.number = Inf + ), + commit.directed = list( + default = FALSE, + type = "logical", + allowed = c(TRUE, FALSE), + allowed.number = 1 + ), edges.for.base.artifacts = list( default = TRUE, type = "logical", diff --git a/util-networks.R b/util-networks.R index a9b19e11..e4581fac 100644 --- a/util-networks.R +++ b/util-networks.R @@ -44,6 +44,7 @@ requireNamespace("lubridate") # for date conversion ## vertex types TYPE.AUTHOR = "Author" TYPE.ARTIFACT = "Artifact" +TYPE.COMMIT = "Commit" ## edge types TYPE.EDGES.INTRA = "Unipartite" @@ -929,6 +930,51 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", return(net) }, + #' Get the generic commit network. + #' + #' @return the generic artifact network + get.commit.network = function() { + logging::loginfo("Constructing artifact network.") + + ## construct network + relations = private$network.conf$get.value("commit.relation") + networks = lapply(relations, function(relation) { + network = switch( + relation, + cochange = private$get.commit.network.cochange(), + commit.interaction = private$get.commit.network.commit.interaction(), + stop(sprintf("The artifact relation '%s' does not exist.", relation)) + ) + + ## set edge attributes on all edges + igraph::E(network)$type = TYPE.EDGES.INTRA + igraph::E(network)$relation = relation + + ## set vertex attribute 'kind' on all edges, corresponding to relation + vertex.kind = private$get.vertex.kind.for.relation(relation) + network = igraph::set.vertex.attribute(network, "kind", value = vertex.kind) + + return(network) + }) + net = merge.networks(networks) + + ## set vertex and edge attributes for identifaction + igraph::V(net)$type = TYPE.COMMIT + + ## simplify network if wanted + if (private$network.conf$get.value("simplify")) { + net = simplify.network(net, simplify.multiple.relations = + private$network.conf$get.value("simplify.multiple.relations")) + } + + ## add range attribute for later analysis (if available) + if ("RangeData" %in% class(private$proj.data)) { + attr(net, "range") = private$proj.data$get.range() + } + + return(net) + }, + #' Get the (real) bipartite network. #' #' @return the bipartite network From 3ed87e9e5cffe247ab9bc3b4ae04d4fb5e838261 Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Mon, 6 May 2024 14:15:26 +0200 Subject: [PATCH 065/130] Add functions for network construction functions 'get.commit.network.cochange' and 'get.commit.network.commit.interaction' are called in 'get.commit.network'. Also add 'group.commits.by.data.column', a helper function used in constructing the cochange commit network. Signed-off-by: Leo Sendelbach --- util-data.R | 29 ++++++++++++++++ util-networks.R | 91 +++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 117 insertions(+), 3 deletions(-) diff --git a/util-data.R b/util-data.R index 988146a5..8d68765f 100644 --- a/util-data.R +++ b/util-data.R @@ -2143,6 +2143,35 @@ ProjectData = R6::R6Class("ProjectData", return(mylist) }, + ## * * processed data ---------------------------------------------- + + #' Group the commits of the given \code{data.source} by the given \code{group.column}. + #' For each group, the column \code{"hash"} is duplicated and prepended to each + #' group's data as first column (see below for details). + #' + #' Example: To obtain the commits that changed the same source-code artifact, + #' call \code{group.commits.by.data.column("commits", "artifact")}. + #' + #' @param data.source The specified data source. One of \code{"commits"}, + #' \code{"mails"}, and \code{"issues"}. [default: "commits"] + #' @param group.column The column to group the commits of the given \code{data.source} by + #' [default: "artifact"] + #' + #' @return a list mapping each distinct item in \code{group.column} to all corresponding + #' data items from \code{data.source}, with the column \code{"hash"} duplicated + #' as first column (with name \code{"data.vertices"}) + #' + #' @seealso ProjectData$group.data.by.column + group.commits.by.data.column = function(data.source = c("commits", "mails", "issues"), + group.column = "artifact") { + logging::loginfo("Grouping commits by data column.") + + ## store the commits per group that is determined by 'group.column' + mylist = self$group.data.by.column(data.source, group.column, "hash") + + return(mylist) + }, + #' Group the authors of the given \code{data.source} by the given \code{group.column}. #' For each group, the column \code{"author.name"} is duplicated and prepended to each #' group's data as first column (see below for details). diff --git a/util-networks.R b/util-networks.R index e4581fac..8e221021 100644 --- a/util-networks.R +++ b/util-networks.R @@ -123,6 +123,8 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", artifacts.network.callgraph = NULL, # igraph artifacts.network.mail = NULL, # igraph artifacts.network.issue = NULL, # igraph + commit.network.commit.interaction = NULL, #igraph + commit.network.cochange = NULL, #igraph ## * * relation-to-vertex-kind mapping ----------------------------- @@ -680,6 +682,87 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", return(artifacts.net) }, + #' Build and get the commit network with commit-interactions as the relation. + #' + #' @return the commit-interaction commit network + get.commit.network.commit.interaction = function() { + + logging::logdebug("get.commit.network.commit.interaction: starting.") + + ## do not compute anything more than once + if (!is.null(private$commit.network.commit.interaction)) { + logging::logdebug("get.commit.network.commit.interaction: finished. (already existing)") + return(private$commit.network.commit.interaction) + } + + ## get the authors that appear in the commit-interaction data as the vertices of the network + vertices = unique(c(private$proj.data$get.commit.interactions()[["base.hash"]], + private$proj.data$get.commit.interactions()[["commit.hash"]])) + vertices = data.frame(name = vertices) + + ## get the commit-interaction data as the edge data of the network + edges = private$proj.data$get.commit.interactions() + ## set the commits as the 'to' and 'from' of the network and order the dataframe + edges = edges[, c("base.hash", "commit.hash", "func", "interacting.author", + "file", "base.author", "base.func", "base.file")] + colnames(edges)[1] = "to" + colnames(edges)[2] = "from" + commit.net.data = list(vertices = vertices, edges = edges) + ## construct the network + commit.net = construct.network.from.edge.list( + commit.net.data[["vertices"]], + commit.net.data[["edges"]], + network.conf = private$network.conf, + directed = private$network.conf$get.value("commit.directed"), + available.edge.attributes = private$proj.data$ + get.data.columns.for.data.source("commit.interactions") + ) + + private$commit.network.commit.interaction = commit.net + logging::logdebug("get.commit.network.commit.interaction: finished.") + + return(commit.net) + }, + + #' Get the co-change-based commit network, + #' If it does not already exist build it first. + #' + #' @return the commit network with cochange realtion + get.commit.network.cochange = function() { + + logging::logdebug("get.commit.network.cochange: starting.") + + ## do not compute anything more than once + if (!is.null(private$artifacts.network.cochange)) { + logging::logdebug("get.commit.network.cochange: finished. (already existing)") + return(private$commit.network.cochange) + } + + ## construct edge list based on commit--artifact data + commit.net.data.raw = private$proj.data$group.commits.by.data.column("commits", "artifact") + commit.net.data = construct.edge.list.from.key.value.list( + commit.net.data.raw, + network.conf = private$network.conf, + directed = FALSE, + respect.temporal.order = TRUE + ) + + ## construct network from obtained data + commit.net = construct.network.from.edge.list( + commit.net.data[["vertices"]], + commit.net.data[["edges"]], + network.conf = private$network.conf, + directed = FALSE, + available.edge.attributes = private$proj.data$get.data.columns.for.data.source("commits") + ) + + ## store network + private$commit.network.cochange = commit.net + logging::logdebug("get.commit.network.cochange: finished.") + + return(commit.net) + }, + ## * * bipartite relations ------------------------------------------ #' Get the key-value data for the bipartite relations, @@ -754,6 +837,8 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", private$artifacts.network.cochange = NULL private$artifacts.network.issue = NULL private$artifacts.network.mail = NULL + private$commit.network.commit.interaction = NULL + private$commit.network.cochange = NULL private$proj.data = private$proj.data.original if (private$network.conf$get.value("unify.date.ranges")) { private$cut.data.to.same.timestamps() @@ -932,9 +1017,9 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", #' Get the generic commit network. #' - #' @return the generic artifact network + #' @return the generic commit network get.commit.network = function() { - logging::loginfo("Constructing artifact network.") + logging::loginfo("Constructing commit network.") ## construct network relations = private$network.conf$get.value("commit.relation") @@ -943,7 +1028,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", relation, cochange = private$get.commit.network.cochange(), commit.interaction = private$get.commit.network.commit.interaction(), - stop(sprintf("The artifact relation '%s' does not exist.", relation)) + stop(sprintf("The commit relation '%s' does not exist.", relation)) ) ## set edge attributes on all edges From 93b551875d46b8e0eba415871eb79746bba81e72 Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Wed, 15 May 2024 12:36:29 +0200 Subject: [PATCH 066/130] Add test file for commit network Also add first test for commit-interaction based commit network and fixed a minoir error in network creation Signed-off-by: Leo Sendelbach --- tests/test-networks-commit.R | 89 ++++++++++++++++++++++++++++++++++++ util-networks.R | 2 +- 2 files changed, 90 insertions(+), 1 deletion(-) create mode 100644 tests/test-networks-commit.R diff --git a/tests/test-networks-commit.R b/tests/test-networks-commit.R new file mode 100644 index 00000000..c5b310cf --- /dev/null +++ b/tests/test-networks-commit.R @@ -0,0 +1,89 @@ +## This file is part of coronet, which is free software: you +## can redistribute it and/or modify it under the terms of the GNU General +## Public License as published by the Free Software Foundation, version 2. +## +## This program is distributed in the hope that it will be useful, +## but WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +## GNU General Public License for more details. +## +## You should have received a copy of the GNU General Public License along +## with this program; if not, write to the Free Software Foundation, Inc., +## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +## +## Copyright 2024 by Leo Sendelbach + +## All Rights Reserved. + + +context("Network-building functionality.") + +## +## Context +## + +CF.DATA = file.path(".", "codeface-data") +CF.SELECTION.PROCESS = "testing" +CASESTUDY = "test" +ARTIFACT = "feature" # function, feature, file, featureexpression + +## use only when debugging this file independently +if (!dir.exists(CF.DATA)) CF.DATA = file.path(".", "tests", "codeface-data") + + +## +## Tests for author.all.authors and author.only.committers +## + + + +patrick::with_parameters_test_that("Network construction with commit-interactions as relation", { + ## configuration object for the datapath + proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, "file") + proj.conf$update.value("commit.interactions", TRUE) + proj.conf$update.value("commit.interactions.filter.global", FALSE) + proj.data = ProjectData$new(project.conf = proj.conf) + + net.conf = NetworkConf$new() + net.conf$update.values(updated.values = list(commit.relation = "commit.interaction", + commit.directed = test.directed)) + + network.builder = NetworkBuilder$new(project.data = proj.data, network.conf = net.conf) + network.built = network.builder$get.commit.network() + ## build the expected network + vertices = data.frame( + name = c("3a0ed78458b3976243db6829f63eba3eead26774", + "0a1a5c523d835459c42f33e863623138555e2526", + "1143db502761379c2bfcecc2007fc34282e7ee61", + "418d1dc4929ad1df251d2aeb833dd45757b04a6f", + "5a5ec9675e98187e1e92561e1888aa6f04faa338", + "d01921773fae4bed8186b0aa411d6a2f7a6626e6"), + kind = TYPE.COMMIT, + type = TYPE.COMMIT + ) + edges = data.frame( + base.hash = c("3a0ed78458b3976243db6829f63eba3eead26774", + "0a1a5c523d835459c42f33e863623138555e2526", + "1143db502761379c2bfcecc2007fc34282e7ee61", + "0a1a5c523d835459c42f33e863623138555e2526"), + hash = c("0a1a5c523d835459c42f33e863623138555e2526", + "418d1dc4929ad1df251d2aeb833dd45757b04a6f", + "5a5ec9675e98187e1e92561e1888aa6f04faa338", + "d01921773fae4bed8186b0aa411d6a2f7a6626e6"), + func = c("GLOBAL", "test2.c::test2", "GLOBAL", "test2.c::test2"), + interacting.author = c("Thomas", "Karl", "Olaf", "Thomas"), + file = c("GLOBAL", "test2.c", "GLOBAL", "test2.c"), + base.author = c("Olaf", "Thomas", "Karl", "Thomas"), + base.func = c("test2.c::test2", "test2.c::test2", + "test3.c::test_function", "test2.c::test2"), + base.file = c("test2.c", "test2.c", "test3.c", "test2.c"), + weight = c(1, 1, 1, 1), + type = c(TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA), + relation = c("commit.interaction", "commit.interaction", "commit.interaction", "commit.interaction") + ) + network = igraph::graph.data.frame(edges, directed = test.directed, vertices = vertices) + expect_true(igraph::identical_graphs(network.built, network)) +}, patrick::cases( + "directed: FALSE" = list(test.directed = FALSE), + "directed: TRUE" = list(test.directed = TRUE) +)) \ No newline at end of file diff --git a/util-networks.R b/util-networks.R index 8e221021..1a4de64a 100644 --- a/util-networks.R +++ b/util-networks.R @@ -1036,7 +1036,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", igraph::E(network)$relation = relation ## set vertex attribute 'kind' on all edges, corresponding to relation - vertex.kind = private$get.vertex.kind.for.relation(relation) + vertex.kind = TYPE.COMMIT network = igraph::set.vertex.attribute(network, "kind", value = vertex.kind) return(network) From dd90d9a8e2e521ca04fd7a659b1e0e6bb6fd622a Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Wed, 15 May 2024 13:34:32 +0200 Subject: [PATCH 067/130] Change how vertex kind is initialized Initializing vertex kind to 'TYPE.COMMIT' in the correct position Signed-off-by: Leo Sendelbach --- util-networks.R | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/util-networks.R b/util-networks.R index 1a4de64a..bee20195 100644 --- a/util-networks.R +++ b/util-networks.R @@ -1035,15 +1035,12 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", igraph::E(network)$type = TYPE.EDGES.INTRA igraph::E(network)$relation = relation - ## set vertex attribute 'kind' on all edges, corresponding to relation - vertex.kind = TYPE.COMMIT - network = igraph::set.vertex.attribute(network, "kind", value = vertex.kind) - return(network) }) net = merge.networks(networks) ## set vertex and edge attributes for identifaction + igraph::V(net)$kind = TYPE.COMMIT igraph::V(net)$type = TYPE.COMMIT ## simplify network if wanted From 8e863a2fa26dc6f4ddd0d242712627061dae283c Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Tue, 21 May 2024 15:18:53 +0200 Subject: [PATCH 068/130] Add tests and minor fixes for cochange network Tests for each artifact type, parameterized for directed attribute Signed-off-by: Leo Sendelbach --- tests/test-networks-commit.R | 136 ++++++++++++++++++++++++++++++++++- util-networks.R | 6 +- 2 files changed, 139 insertions(+), 3 deletions(-) diff --git a/tests/test-networks-commit.R b/tests/test-networks-commit.R index c5b310cf..249accfc 100644 --- a/tests/test-networks-commit.R +++ b/tests/test-networks-commit.R @@ -25,7 +25,6 @@ context("Network-building functionality.") CF.DATA = file.path(".", "codeface-data") CF.SELECTION.PROCESS = "testing" CASESTUDY = "test" -ARTIFACT = "feature" # function, feature, file, featureexpression ## use only when debugging this file independently if (!dir.exists(CF.DATA)) CF.DATA = file.path(".", "tests", "codeface-data") @@ -82,6 +81,141 @@ patrick::with_parameters_test_that("Network construction with commit-interaction relation = c("commit.interaction", "commit.interaction", "commit.interaction", "commit.interaction") ) network = igraph::graph.data.frame(edges, directed = test.directed, vertices = vertices) + expect_true(igraph::identical_graphs(network.built, network)) +}, patrick::cases( + "directed: FALSE" = list(test.directed = FALSE), + "directed: TRUE" = list(test.directed = TRUE) +)) + +patrick::with_parameters_test_that("Network construction with cochange as relation, file as artifact", { + ## configuration object for the datapath + proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, "file") + proj.data = ProjectData$new(project.conf = proj.conf) + + net.conf = NetworkConf$new() + net.conf$update.values(updated.values = list(commit.relation = "cochange", + commit.directed = test.directed)) + + network.builder = NetworkBuilder$new(project.data = proj.data, network.conf = net.conf) + network.built = network.builder$get.commit.network() + ## build the expected network + vertices = data.frame( + name = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", + "5a5ec9675e98187e1e92561e1888aa6f04faa338", + "3a0ed78458b3976243db6829f63eba3eead26774", + "0a1a5c523d835459c42f33e863623138555e2526", + "1143db502761379c2bfcecc2007fc34282e7ee61"), + kind = TYPE.COMMIT, + type = TYPE.COMMIT + ) + edges = data.frame( + from = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "3a0ed78458b3976243db6829f63eba3eead26774"), + to = c("5a5ec9675e98187e1e92561e1888aa6f04faa338", "0a1a5c523d835459c42f33e863623138555e2526"), + artifact.type = c("File", "File"), + artifact = c("test.c", "test2.c"), + weight = c(1, 1), + type = c(TYPE.EDGES.INTRA, TYPE.EDGES.INTRA), + relation = c("cochange", "cochange") + ) + + if (test.directed) { + edges <- edges[, c(2, 1, 3, 4, 5, 6, 7), ] + } + network = igraph::graph.data.frame(edges, directed = test.directed, vertices = vertices) + + expect_true(igraph::identical_graphs(network.built, network)) +}, patrick::cases( + "directed: FALSE" = list(test.directed = FALSE), + "directed: TRUE" = list(test.directed = TRUE) +)) + +patrick::with_parameters_test_that("Network construction with cochange as relation, function as artifact", { + ## configuration object for the datapath + proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, "function") + proj.conf$update.value("commits.filter.base.artifact", FALSE) + proj.data = ProjectData$new(project.conf = proj.conf) + + net.conf = NetworkConf$new() + net.conf$update.values(updated.values = list(commit.relation = "cochange", + commit.directed = test.directed)) + + network.builder = NetworkBuilder$new(project.data = proj.data, network.conf = net.conf) + network.built = network.builder$get.commit.network() + ## build the expected network + vertices = data.frame( + name = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", + "5a5ec9675e98187e1e92561e1888aa6f04faa338", + "3a0ed78458b3976243db6829f63eba3eead26774", + "0a1a5c523d835459c42f33e863623138555e2526", + "1143db502761379c2bfcecc2007fc34282e7ee61"), + kind = TYPE.COMMIT, + type = TYPE.COMMIT + ) + edges = data.frame( + from = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", + "5a5ec9675e98187e1e92561e1888aa6f04faa338", "72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", + "5a5ec9675e98187e1e92561e1888aa6f04faa338", "3a0ed78458b3976243db6829f63eba3eead26774"), + to = c("5a5ec9675e98187e1e92561e1888aa6f04faa338", "3a0ed78458b3976243db6829f63eba3eead26774", + "3a0ed78458b3976243db6829f63eba3eead26774", "0a1a5c523d835459c42f33e863623138555e2526", + "0a1a5c523d835459c42f33e863623138555e2526", "0a1a5c523d835459c42f33e863623138555e2526"), + artifact.type = c("Function", "Function", "Function", "Function", "Function", "Function"), + artifact = c("File_Level", "File_Level", "File_Level", "File_Level", "File_Level", "File_Level"), + weight = c(1, 1, 1, 1, 1, 1), + type = c(TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, + TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA), + relation = c("cochange", "cochange", "cochange", "cochange", "cochange", "cochange") + ) + + if (test.directed) { + edges <- edges[, c(2, 1, 3, 4, 5, 6, 7), ] + } + network = igraph::graph.data.frame(edges, directed = test.directed, vertices = vertices) + + expect_true(igraph::identical_graphs(network.built, network)) +}, patrick::cases( + "directed: FALSE" = list(test.directed = FALSE), + "directed: TRUE" = list(test.directed = TRUE) +)) + +patrick::with_parameters_test_that("Network construction with cochange as relation, feature as artifact", { + ## configuration object for the datapath + proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, "feature") + proj.conf$update.value("commits.filter.base.artifact", FALSE) + proj.data = ProjectData$new(project.conf = proj.conf) + + net.conf = NetworkConf$new() + net.conf$update.values(updated.values = list(commit.relation = "cochange", + commit.directed = test.directed)) + + network.builder = NetworkBuilder$new(project.data = proj.data, network.conf = net.conf) + network.built = network.builder$get.commit.network() + ## build the expected network + vertices = data.frame( + name = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", + "5a5ec9675e98187e1e92561e1888aa6f04faa338", + "3a0ed78458b3976243db6829f63eba3eead26774", + "1143db502761379c2bfcecc2007fc34282e7ee61", + "0a1a5c523d835459c42f33e863623138555e2526"), + kind = TYPE.COMMIT, + type = TYPE.COMMIT + ) + edges = data.frame( + from = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "3a0ed78458b3976243db6829f63eba3eead26774", + "3a0ed78458b3976243db6829f63eba3eead26774", "1143db502761379c2bfcecc2007fc34282e7ee61"), + to = c("5a5ec9675e98187e1e92561e1888aa6f04faa338", "1143db502761379c2bfcecc2007fc34282e7ee61", + "0a1a5c523d835459c42f33e863623138555e2526", "0a1a5c523d835459c42f33e863623138555e2526"), + artifact.type = c("Feature", "Feature", "Feature", "Feature"), + artifact = c("A", "Base_Feature", "Base_Feature", "Base_Feature"), + weight = c(1, 1, 1, 1), + type = c(TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA), + relation = c("cochange", "cochange", "cochange", "cochange") + ) + + if (test.directed) { + edges <- edges[, c(2, 1, 3, 4, 5, 6, 7), ] + } + network = igraph::graph.data.frame(edges, directed = test.directed, vertices = vertices) + expect_true(igraph::identical_graphs(network.built, network)) }, patrick::cases( "directed: FALSE" = list(test.directed = FALSE), diff --git a/util-networks.R b/util-networks.R index bee20195..9e6998a6 100644 --- a/util-networks.R +++ b/util-networks.R @@ -743,16 +743,18 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", commit.net.data = construct.edge.list.from.key.value.list( commit.net.data.raw, network.conf = private$network.conf, - directed = FALSE, + directed = private$network.conf$get.value("commit.directed"), respect.temporal.order = TRUE ) + commit.net.data$edges <- commit.net.data$edges[, -which(colnames(commit.net.data$edges) + %in% c("date", "hash", "file"))] ## construct network from obtained data commit.net = construct.network.from.edge.list( commit.net.data[["vertices"]], commit.net.data[["edges"]], network.conf = private$network.conf, - directed = FALSE, + directed = private$network.conf$get.value("commit.directed"), available.edge.attributes = private$proj.data$get.data.columns.for.data.source("commits") ) From 175d385eb67792a284216de698bf7980eeac0d35 Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Tue, 21 May 2024 17:28:24 +0200 Subject: [PATCH 069/130] Add commit network to 'get.networks' Commit Network now also built when calling function 'get.networks'. Signed-off-by: Leo Sendelbach --- util-networks.R | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/util-networks.R b/util-networks.R index 9e6998a6..e3f7e5dc 100644 --- a/util-networks.R +++ b/util-networks.R @@ -1180,12 +1180,15 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", authors.net = self$get.author.network() ## artifact relation artifacts.net = self$get.artifact.network() + ## commit relation + commit.net = self$get.commit.network() return(list( "authors.to.artifacts" = authors.to.artifacts, "bipartite.net" = bipartite.net, "authors.net" = authors.net, - "artifacts.net" = artifacts.net + "artifacts.net" = artifacts.net, + "commit.net" = commit.net )) }, From f9b329319e04471a9bc252a2a3541d9bfca9185c Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Wed, 29 May 2024 12:33:27 +0200 Subject: [PATCH 070/130] Add commit network to showcase show how to construct commit network in showcase. Also fixed bug that resulted in showcase crashing. Signed-off-by: Leo Sendelbach --- showcase.R | 9 ++++++++- util-networks.R | 2 +- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/showcase.R b/showcase.R index 74da2497..d115c1c7 100644 --- a/showcase.R +++ b/showcase.R @@ -24,6 +24,7 @@ ## Copyright 2021 by Niklas Schneider ## Copyright 2022 by Jonathan Baumann ## Copyright 2024 by Maximilian Löffler +## Copyright 2024 by Leo Sendelbach ## All Rights Reserved. @@ -65,6 +66,7 @@ ARTIFACT = "feature" # function, feature, file, featureexpression (only relevant AUTHOR.RELATION = "mail" # mail, cochange, issue ARTIFACT.RELATION = "cochange" # cochange, callgraph, mail, issue +COMMIT.RELATION = "commit.interaction" # commit.interaction, cochange ## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / @@ -73,13 +75,16 @@ ARTIFACT.RELATION = "cochange" # cochange, callgraph, mail, issue ## initialize project configuration proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, ARTIFACT) proj.conf$update.value("commits.filter.base.artifact", TRUE) +proj.conf$update.value("commit.interactions", TRUE) ## specify that custom event timestamps should be read from 'custom-events.list' proj.conf$update.value("custom.event.timestamps.file", "custom-events.list") proj.conf$print() ## initialize network configuration net.conf = NetworkConf$new() -net.conf$update.values(updated.values = list(author.relation = AUTHOR.RELATION, artifact.relation = ARTIFACT.RELATION)) +net.conf$update.values(updated.values = list(author.relation = AUTHOR.RELATION, + artifact.relation = ARTIFACT.RELATION, + commit.relation = COMMIT.RELATION)) net.conf$print() ## get ranges @@ -141,6 +146,7 @@ x$get.author.network() x$update.network.conf(updated.values = list(author.directed = FALSE)) x$get.author.network() x$get.artifact.network() +x$get.commit.network() x$reset.environment() x$get.networks() x$update.network.conf(updated.values = list(author.only.committers = FALSE, author.directed = FALSE)) @@ -201,6 +207,7 @@ y$update.network.conf(updated.values = list(edge.attributes = c("date"))) y$get.author.network() y$update.network.conf(updated.values = list(edge.attributes = c("hash"))) y$get.artifact.network() +y$get.commit.network() y$get.networks() y$update.network.conf(updated.values = list(author.only.committers = FALSE, author.directed = TRUE)) h = y$get.bipartite.network() diff --git a/util-networks.R b/util-networks.R index e3f7e5dc..b54e4e24 100644 --- a/util-networks.R +++ b/util-networks.R @@ -733,7 +733,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", logging::logdebug("get.commit.network.cochange: starting.") ## do not compute anything more than once - if (!is.null(private$artifacts.network.cochange)) { + if (!is.null(private$commit.network.cochange)) { logging::logdebug("get.commit.network.cochange: finished. (already existing)") return(private$commit.network.cochange) } From 05c3bc09cb1d396fd59c34a88030cdca58fd04dd Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Wed, 19 Jun 2024 15:45:07 +0200 Subject: [PATCH 071/130] Add date attribute to vertices In this process, also refactor 'construct.edge.list.from.key.value.list' method. Some more comments might be necessary. Signed-off-by: Leo Sendelbach --- tests/test-networks-commit.R | 15 ++ util-networks.R | 284 +++++++++++++++++++++-------------- 2 files changed, 190 insertions(+), 109 deletions(-) diff --git a/tests/test-networks-commit.R b/tests/test-networks-commit.R index 249accfc..d8023e3b 100644 --- a/tests/test-networks-commit.R +++ b/tests/test-networks-commit.R @@ -105,6 +105,11 @@ patrick::with_parameters_test_that("Network construction with cochange as relati "3a0ed78458b3976243db6829f63eba3eead26774", "0a1a5c523d835459c42f33e863623138555e2526", "1143db502761379c2bfcecc2007fc34282e7ee61"), + date = c("2016-07-12 15:58:59", + "2016-07-12 16:00:45", + "2016-07-12 16:05:41", + "2016-07-12 16:06:32", + "2016-07-12 16:06:10"), kind = TYPE.COMMIT, type = TYPE.COMMIT ) @@ -148,6 +153,11 @@ patrick::with_parameters_test_that("Network construction with cochange as relati "3a0ed78458b3976243db6829f63eba3eead26774", "0a1a5c523d835459c42f33e863623138555e2526", "1143db502761379c2bfcecc2007fc34282e7ee61"), + date = c("2016-07-12 15:58:59", + "2016-07-12 16:00:45", + "2016-07-12 16:05:41", + "2016-07-12 16:06:32", + "2016-07-12 16:06:10"), kind = TYPE.COMMIT, type = TYPE.COMMIT ) @@ -196,6 +206,11 @@ patrick::with_parameters_test_that("Network construction with cochange as relati "3a0ed78458b3976243db6829f63eba3eead26774", "1143db502761379c2bfcecc2007fc34282e7ee61", "0a1a5c523d835459c42f33e863623138555e2526"), + date = c("2016-07-12 15:58:59", + "2016-07-12 16:00:45", + "2016-07-12 16:05:41", + "2016-07-12 16:06:10", + "2016-07-12 16:06:32"), kind = TYPE.COMMIT, type = TYPE.COMMIT ) diff --git a/util-networks.R b/util-networks.R index b54e4e24..cfaece00 100644 --- a/util-networks.R +++ b/util-networks.R @@ -355,7 +355,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", network.conf = private$network.conf, directed = FALSE, respect.temporal.order = TRUE, - artifact.edges = TRUE + network.type = "artifact" ) ## construct network from obtained data @@ -740,15 +740,15 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", ## construct edge list based on commit--artifact data commit.net.data.raw = private$proj.data$group.commits.by.data.column("commits", "artifact") + commit.net.data = construct.edge.list.from.key.value.list( commit.net.data.raw, network.conf = private$network.conf, directed = private$network.conf$get.value("commit.directed"), - respect.temporal.order = TRUE + respect.temporal.order = TRUE, + network.type = "commit" ) - commit.net.data$edges <- commit.net.data$edges[, -which(colnames(commit.net.data$edges) - %in% c("date", "hash", "file"))] ## construct network from obtained data commit.net = construct.network.from.edge.list( commit.net.data[["vertices"]], @@ -1318,14 +1318,16 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", #' i.e., whether to only add edges from the later event to the previous one. #' If \code{NA} is passed, the default value is taken. #' [default: directed] -#' @param artifact.edges whether the key value data represents edges in an artifact network based -#' on the cochange relation -#' [default: FALSE] +#' @param network.type the type of network for which the key value data is to be used as edges [default: "author"] #' #' @return a list of two data.frames named 'vertices' and 'edges' (compatible with return value #' of \code{igraph::as.data.frame}) construct.edge.list.from.key.value.list = function(list, network.conf, directed = FALSE, - respect.temporal.order = directed, artifact.edges = FALSE) { + respect.temporal.order = directed, + network.type = c("author", "artifact", "commit")) { + + network.type = match.arg.or.default(network.type, default = "author", several.ok = FALSE) + logging::loginfo("Create edges.") logging::logdebug("construct.edge.list.from.key.value.list: starting.") @@ -1347,7 +1349,7 @@ construct.edge.list.from.key.value.list = function(list, network.conf, directed ## replace it with the \code{author.name} attribute as artifacts cannot cause ## edges in artifact networks, authors can edge.attributes = network.conf$get.value("edge.attributes") - if (artifact.edges) { + if (network.type == "artifact") { artifact.index = match("artifact", edge.attributes, nomatch = NA) if (!is.na(artifact.index)) { edge.attributes = edge.attributes[-artifact.index] @@ -1355,138 +1357,202 @@ construct.edge.list.from.key.value.list = function(list, network.conf, directed } } + ## if edges in a commit network contain 'date', 'hash' or 'file' attributes, remove them + ## as they belong to commits, which are the vertices in commit networks + if (network.type == "commit") { + cols.which = which(edge.attributes %in% c("date", "hash", "file")) + edge.attributes <- edge.attributes[-cols.which] + } + if (respect.temporal.order) { ## for all subsets (sets), connect all items in there with the previous ones - edge.list.data = parallel::mclapply(list, function(set) { - number.edges = sum(seq_len(nrow(set)) - 1) - logging::logdebug("[%s/%s] Constructing edges for %s '%s': starting (%s edges to construct).", - match(attr(set, "group.name"), keys), keys.number, - attr(set, "group.type"), attr(set, "group.name"), number.edges) - - ## Skip artifacts with many, many edges - if (number.edges > network.conf$get.value("skip.threshold")) { - logging::logwarn("Skipping edges for %s '%s' due to amount (> %s).", - attr(set, "group.type"), attr(set, "group.name"), network.conf$get.value("skip.threshold")) - return(NULL) - } + edge.list.data = parallel::mclapply(list, construct.edges.temporal.order, network.conf, + edge.attributes, keys, keys.number, network.type) - ## queue of already processed artifacts - edge.list.set = data.frame() - vertices.processed.set = c() + edge.list = plyr::rbind.fill(edge.list.data) + vertices.processed = unlist( parallel::mclapply(edge.list.data, function(data) attr(data, "vertices.processed")) ) - ## connect the current item to all previous ones - for (item.no in seq_len(nrow(set))) { - item = set[item.no, ] + } else { - ## get vertex data - item.vertex = item[["data.vertices"]] + ## for all items in the sublists, construct the cartesian product + edge.list.data = parallel::mclapply(list, construct.edges.no.temporal.order, network.conf, + edge.attributes, keys, keys.number) - ## get edge attributes - cols.which = edge.attributes %in% colnames(item) - item.edge.attrs = item[ , edge.attributes[cols.which], drop = FALSE] + edge.list = plyr::rbind.fill(edge.list.data) + vertices.processed = unlist( parallel::mclapply(edge.list.data, function(data) attr(data, "vertices.processed")) ) - ## construct edges - combinations = expand.grid(item.vertex, vertices.processed.set, stringsAsFactors = FALSE) - if (nrow(combinations) > 0 & nrow(item.edge.attrs) == 1) { - combinations = cbind(combinations, item.edge.attrs, row.names = NULL) # add edge attributes - } - edge.list.set = rbind(edge.list.set, combinations) # add to edge list + } - ## mark current item as processed - vertices.processed.set = c(vertices.processed.set, item.vertex) - } + logging::logdebug("construct.edge.list.from.key.value.list: finished.") - ## store set of processed vertices - attr(edge.list.set, "vertices.processed") = vertices.processed.set + if (network.type == "commit") { + vertices.dates.processed = unlist( parallel::mclapply(edge.list.data, function(data) attr(data, "vertices.dates.processed")) ) + return(list( + vertices = data.frame( + name = unique(vertices.processed), + date = unique(vertices.dates.processed) + ), + edges = edge.list + )) + } else { + return(list( + vertices = data.frame( + name = unique(vertices.processed) + ), + edges = edge.list + )) + } +} - logging::logdebug("Constructing edges for %s '%s': finished.", attr(set, "group.type"), attr(set, "group.name")) +#' Constructs edge list from the given key value list respecting temporal order. +#' Helper method which is called by 'construct.edge.list.by.key.value.list'. +#' +#' @param list the given key value list +#' @param network.conf the network configuration +#' @param edge.attributes the attributes that should be on the edges of the network +#' @param keys the keays of the key value list +#' @param keys.number the amount of keys in the key value list +#' @param network.type the type of network that should be created +#' +#' @return the data for the edge list +construct.edges.temporal.order = function(set, network.conf, edge.attributes, keys, keys.number, network.type) { + number.edges = sum(seq_len(nrow(set)) - 1) + logging::logdebug("[%s/%s] Constructing edges for %s '%s': starting (%s edges to construct).", + match(attr(set, "group.name"), keys), keys.number, + attr(set, "group.type"), attr(set, "group.name"), number.edges) + + ## Skip artifacts with many, many edges + if (number.edges > network.conf$get.value("skip.threshold")) { + logging::logwarn("Skipping edges for %s '%s' due to amount (> %s).", + attr(set, "group.type"), attr(set, "group.name"), network.conf$get.value("skip.threshold")) + return(NULL) + } - return(edge.list.set) - }) + ## queue of already processed artifacts + edge.list.set = data.frame() + vertices.processed.set = c() - edge.list = plyr::rbind.fill(edge.list.data) - vertices.processed = unlist( parallel::mclapply(edge.list.data, function(data) attr(data, "vertices.processed")) ) + ## connect the current item to all previous ones + for (item.no in seq_len(nrow(set))) { + item = set[item.no, ] - } else { + ## get vertex data + item.vertex = item[["data.vertices"]] + if (network.type == "commit") { + item.vertex = data.frame(commit = item.vertex, date = get.date.string(item[["date"]])) + } - ## for all items in the sublists, construct the cartesian product - edge.list.data = parallel::mclapply(list, function(set) { - number.edges = sum(table(set[["data.vertices"]]) * (dim(table(set[["data.vertices"]])) - 1)) - logging::logdebug("[%s/%s] Constructing edges for %s '%s': starting (%s edges to construct).", - match(attr(set, "group.name"), keys), keys.number, - attr(set, "group.type"), attr(set, "group.name"), number.edges) - - ## Skip artifacts with many, many edges - if (number.edges > network.conf$get.value("skip.threshold")) { - logging::logwarn("Skipping edges for %s '%s' due to amount (> %s).", - attr(set, "group.type"), attr(set, "group.name"), network.conf$get.value("skip.threshold")) - return(NULL) - } + ## get edge attributes + cols.which = edge.attributes %in% colnames(item) + item.edge.attrs = item[ , edge.attributes[cols.which], drop = FALSE] - ## get vertex data - vertices = unique(set[["data.vertices"]]) + ## construct edges + combinations = c() + if (network.type == "commit") { + combinations = expand.grid(item.vertex[["commit"]], vertices.processed.set[["commit"]], stringsAsFactors = FALSE) + } else { + combinations = expand.grid(item.vertex, vertices.processed.set, stringsAsFactors = FALSE) + } - ## break if there is no author - if (length(vertices) < 1) { - return(NULL) - } + if (nrow(combinations) > 0 & nrow(item.edge.attrs) == 1) { + combinations = cbind(combinations, item.edge.attrs, row.names = NULL) # add edge attributes + } + edge.list.set = rbind(edge.list.set, combinations) # add to edge list + + ## mark current item as processed + if (network.type == "commit") { + vertices.processed.set = data.frame(commit = c(vertices.processed.set[["commit"]], item.vertex[["commit"]]), + date = c(vertices.processed.set[["date"]], item.vertex[["date"]])) + } else { + vertices.processed.set = c(vertices.processed.set, item.vertex) + } + } - ## if there is only one author, just create the vertex, but no edges - if (length(vertices) == 1) { - edges = data.frame() - attr(edges, "vertices.processed") = vertices # store set of processed vertices - return(edges) - } + ## store set of processed vertices + if (network.type == "commit") { + attr(edge.list.set, "vertices.processed") = vertices.processed.set[["commit"]] + attr(edge.list.set, "vertices.dates.processed") = vertices.processed.set[["date"]] + } else { + attr(edge.list.set, "vertices.processed") = vertices.processed.set + } + + logging::logdebug("Constructing edges for %s '%s': finished.", attr(set, "group.type"), attr(set, "group.name")) - ## get combinations - combinations = combn(vertices, 2) # all unique pairs of authors + return(edge.list.set) +} + +#' Constructs edge list from the given key value list not respecting temporal order. +#' Helper method which is called by 'construct.edge.list.by.key.value.list'. +#' +#' @param list the given key value list +#' @param network.conf the network configuration +#' @param edge.attributes the attributes that should be on the edges of the network +#' @param keys the keays of the key value list +#' @param keys.number the amount of keys in the key value list +#' +#' @return the data for the edge list +construct.edges.no.temporal.order = function(set, network.conf, edge.attributes, keys, keys.number) { + number.edges = sum(table(set[["data.vertices"]]) * (dim(table(set[["data.vertices"]])) - 1)) + logging::logdebug("[%s/%s] Constructing edges for %s '%s': starting (%s edges to construct).", + match(attr(set, "group.name"), keys), keys.number, + attr(set, "group.type"), attr(set, "group.name"), number.edges) + + ## Skip artifacts with many, many edges + if (number.edges > network.conf$get.value("skip.threshold")) { + logging::logwarn("Skipping edges for %s '%s' due to amount (> %s).", + attr(set, "group.type"), attr(set, "group.name"), network.conf$get.value("skip.threshold")) + return(NULL) + } - ## construct edge list - edges = apply(combinations, 2, function(comb) { + ## get vertex data + vertices = unique(set[["data.vertices"]]) - ## iterate over each of the two data vertices of the current combination to determine the edges - ## for which it is the sender of the edge and use the second one as the receiver of the edge - edges.by.comb.item = lapply(comb, function(comb.item) { - ## basic edge data - edge = data.frame(from = comb.item, to = comb[comb != comb.item]) + ## break if there is no author + if (length(vertices) < 1) { + return(NULL) + } - ## get edge attibutes - edge.attrs = set[set[["data.vertices"]] %in% comb.item, ] # get data for current combination item - cols.which = edge.attributes %in% colnames(edge.attrs) - edge.attrs = edge.attrs[ , edge.attributes[cols.which], drop = FALSE] + ## if there is only one author, just create the vertex, but no edges + if (length(vertices) == 1) { + edges = data.frame() + attr(edges, "vertices.processed") = vertices # store set of processed vertices + return(edges) + } - # add edge attributes to edge list - edgelist = cbind(edge, edge.attrs) - return(edgelist) - }) + ## get combinations + combinations = combn(vertices, 2) # all unique pairs of authors - ## union the edge lists for the combination items - edges.union = plyr::rbind.fill(edges.by.comb.item) - return(edges.union) + ## construct edge list + edges = apply(combinations, 2, function(comb) { - }) - edges = plyr::rbind.fill(edges) + ## iterate over each of the two data vertices of the current combination to determine the edges + ## for which it is the sender of the edge and use the second one as the receiver of the edge + edges.by.comb.item = lapply(comb, function(comb.item) { + ## basic edge data + edge = data.frame(from = comb.item, to = comb[comb != comb.item]) - ## store set of processed vertices - attr(edges, "vertices.processed") = vertices + ## get edge attibutes + edge.attrs = set[set[["data.vertices"]] %in% comb.item, ] # get data for current combination item + cols.which = edge.attributes %in% colnames(edge.attrs) + edge.attrs = edge.attrs[ , edge.attributes[cols.which], drop = FALSE] - return(edges) + # add edge attributes to edge list + edgelist = cbind(edge, edge.attrs) + return(edgelist) }) - edge.list = plyr::rbind.fill(edge.list.data) - vertices.processed = unlist( parallel::mclapply(edge.list.data, function(data) attr(data, "vertices.processed")) ) + ## union the edge lists for the combination items + edges.union = plyr::rbind.fill(edges.by.comb.item) + return(edges.union) - } + }) + edges = plyr::rbind.fill(edges) - logging::logdebug("construct.edge.list.from.key.value.list: finished.") + ## store set of processed vertices + attr(edges, "vertices.processed") = vertices - return(list( - vertices = data.frame( - name = unique(vertices.processed) - ), - edges = edge.list - )) + return(edges) } #' Construct a network from the given lists of vertices and edges. From cd9a930fcb54ff465c2a5a7c43cfe82ac15c134d Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Wed, 26 Jun 2024 14:50:34 +0200 Subject: [PATCH 072/130] Add new function for adding vertex attributes New function allows adding vertex attributes from commit data to commit network vertices Signed-off-by: Leo Sendelbach --- tests/test-networks-commit.R | 52 +++++++++++++++++++++++++++++++++++- util-networks-covariates.R | 35 ++++++++++++++++++++++++ 2 files changed, 86 insertions(+), 1 deletion(-) diff --git a/tests/test-networks-commit.R b/tests/test-networks-commit.R index d8023e3b..8d94bec1 100644 --- a/tests/test-networks-commit.R +++ b/tests/test-networks-commit.R @@ -235,4 +235,54 @@ patrick::with_parameters_test_that("Network construction with cochange as relati }, patrick::cases( "directed: FALSE" = list(test.directed = FALSE), "directed: TRUE" = list(test.directed = TRUE) -)) \ No newline at end of file +)) + +test_that("Adding vertex attributes to a commit network", { + ## configuration object for the datapath + proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, "feature") + proj.conf$update.value("commits.filter.base.artifact", FALSE) + proj.data = ProjectData$new(project.conf = proj.conf) + + net.conf = NetworkConf$new() + net.conf$update.values(updated.values = list(commit.relation = "cochange", + commit.directed = FALSE)) + + network.builder = NetworkBuilder$new(project.data = proj.data, network.conf = net.conf) + network.built = network.builder$get.commit.network() + network.new.attr = add.vertex.attribute.commit.network(network.built,proj.data, "author.name", "NO_AUTHOR") + ## build the expected network + vertices = data.frame( + name = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", + "5a5ec9675e98187e1e92561e1888aa6f04faa338", + "3a0ed78458b3976243db6829f63eba3eead26774", + "1143db502761379c2bfcecc2007fc34282e7ee61", + "0a1a5c523d835459c42f33e863623138555e2526"), + date = c("2016-07-12 15:58:59", + "2016-07-12 16:00:45", + "2016-07-12 16:05:41", + "2016-07-12 16:06:10", + "2016-07-12 16:06:32"), + kind = TYPE.COMMIT, + type = TYPE.COMMIT, + author.name = c("Björn", + "Olaf", + "Olaf", + "Karl", + "Thomas") + ) + edges = data.frame( + from = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "3a0ed78458b3976243db6829f63eba3eead26774", + "3a0ed78458b3976243db6829f63eba3eead26774", "1143db502761379c2bfcecc2007fc34282e7ee61"), + to = c("5a5ec9675e98187e1e92561e1888aa6f04faa338", "1143db502761379c2bfcecc2007fc34282e7ee61", + "0a1a5c523d835459c42f33e863623138555e2526", "0a1a5c523d835459c42f33e863623138555e2526"), + artifact.type = c("Feature", "Feature", "Feature", "Feature"), + artifact = c("A", "Base_Feature", "Base_Feature", "Base_Feature"), + weight = c(1, 1, 1, 1), + type = c(TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA), + relation = c("cochange", "cochange", "cochange", "cochange") + ) + + network = igraph::graph.data.frame(edges, directed = FALSE, vertices = vertices) + + expect_true(igraph::identical_graphs(network.new.attr, network)) +}) \ No newline at end of file diff --git a/util-networks-covariates.R b/util-networks-covariates.R index 95a3021a..ed9c2ea2 100644 --- a/util-networks-covariates.R +++ b/util-networks-covariates.R @@ -22,6 +22,7 @@ ## Copyright 2022 by Niklas Schneider ## Copyright 2022 by Jonathan Baumann ## Copyright 2024 by Maximilian Löffler +## Copyright 2024 by Leo Sendelbach ## All Rights Reserved. ## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / @@ -140,6 +141,40 @@ add.vertex.attribute = function(net.to.range.list, attr.name, default.value, com return(nets.with.attr) } +#' Utility function to add a vertex attribute from commit data to a commit network. +#' +#' @param network the commit network +#' @param project.data the project data from which to extract the values +#' @param attr.name the name of the attribute +#' @param default.value the dafault value of the attribute +#' if it does not occur in the commit data +#' +#' @return a networks with new vertex attribute +add.vertex.attribute.commit.network = function(network, project.data, + attr.name, default.value) { + # get the commit data and extract the required data + commit.data = project.data$get.commits() + hashes = commit.data[["hash"]] + attribute = commit.data[[attr.name]] + attribute.values = c() + for (hash.num in seq_along(igraph::V(network))) { + # for each vertex, finc the position in the data frame + hash = igraph::V(network)[hash.num]$name + hash.index = match(hash, hashes, nomatch = NA) + + value = c() + # extract the correct value from the data or use the default value + if (!is.na(hash.index)) { + value = attribute[[hash.index]] + } else { + value = default.value + } + attribute.values = c(attribute.values, value) + } + net.with.attr = igraph::set.vertex.attribute(network, attr.name, value = attribute.values) + +} + ## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / ## Author network functions ------------------------------------------------ From 21c67c1644e85a30e3a97abb93e6cfb2621e0801 Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Wed, 3 Jul 2024 15:21:22 +0200 Subject: [PATCH 073/130] Add usage of new utility method to showcase 'add.vertex.attribute.commit.network' is now used in showcase. Also minor changes to documentation and performance improvement in cochange commit network creation. Signed-off-by: Leo Sendelbach --- showcase.R | 2 ++ tests/test-networks-commit.R | 42 +++++++++++++++++++++++++++++++++++- util-networks-covariates.R | 3 +++ util-networks.R | 3 +-- 4 files changed, 47 insertions(+), 3 deletions(-) diff --git a/showcase.R b/showcase.R index d115c1c7..3d2aece7 100644 --- a/showcase.R +++ b/showcase.R @@ -239,6 +239,8 @@ sample.pull.requests = add.vertex.attribute.author.issue.count(my.networks, x.da ## add vertex attributes for the project-level network x.net.as.list = list("1970-01-01 00:00:00-2030-01-01 00:00:00" = x$get.author.network()) sample.entire = add.vertex.attribute.author.commit.count(x.net.as.list, x.data, aggregation.level = "complete") +## add vertex attributes to commit network +add.vertex.attribute.commit.network(x$get.commit.network(), x.data, "author.name", "NO_AUTHOR") ## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / diff --git a/tests/test-networks-commit.R b/tests/test-networks-commit.R index 8d94bec1..fa3f70c4 100644 --- a/tests/test-networks-commit.R +++ b/tests/test-networks-commit.R @@ -249,7 +249,7 @@ test_that("Adding vertex attributes to a commit network", { network.builder = NetworkBuilder$new(project.data = proj.data, network.conf = net.conf) network.built = network.builder$get.commit.network() - network.new.attr = add.vertex.attribute.commit.network(network.built,proj.data, "author.name", "NO_AUTHOR") + network.new.attr = add.vertex.attribute.commit.network(network.built, proj.data, "author.name", "NO_AUTHOR") ## build the expected network vertices = data.frame( name = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", @@ -285,4 +285,44 @@ test_that("Adding vertex attributes to a commit network", { network = igraph::graph.data.frame(edges, directed = FALSE, vertices = vertices) expect_true(igraph::identical_graphs(network.new.attr, network)) + + network.new.attr = add.vertex.attribute.commit.network(network.new.attr, proj.data, "commit.id", "NO_ID") + + ## build the expected network + vertices = data.frame( + name = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", + "5a5ec9675e98187e1e92561e1888aa6f04faa338", + "3a0ed78458b3976243db6829f63eba3eead26774", + "1143db502761379c2bfcecc2007fc34282e7ee61", + "0a1a5c523d835459c42f33e863623138555e2526"), + date = c("2016-07-12 15:58:59", + "2016-07-12 16:00:45", + "2016-07-12 16:05:41", + "2016-07-12 16:06:10", + "2016-07-12 16:06:32"), + kind = TYPE.COMMIT, + type = TYPE.COMMIT, + author.name = c("Björn", + "Olaf", + "Olaf", + "Karl", + "Thomas"), + commit.id = c("", "", + "", "", "") + ) + edges = data.frame( + from = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "3a0ed78458b3976243db6829f63eba3eead26774", + "3a0ed78458b3976243db6829f63eba3eead26774", "1143db502761379c2bfcecc2007fc34282e7ee61"), + to = c("5a5ec9675e98187e1e92561e1888aa6f04faa338", "1143db502761379c2bfcecc2007fc34282e7ee61", + "0a1a5c523d835459c42f33e863623138555e2526", "0a1a5c523d835459c42f33e863623138555e2526"), + artifact.type = c("Feature", "Feature", "Feature", "Feature"), + artifact = c("A", "Base_Feature", "Base_Feature", "Base_Feature"), + weight = c(1, 1, 1, 1), + type = c(TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA), + relation = c("cochange", "cochange", "cochange", "cochange") + ) + + network.two = igraph::graph.data.frame(edges, directed = FALSE, vertices = vertices) + + expect_true(igraph::identical_graphs(network.new.attr, network.two)) }) \ No newline at end of file diff --git a/util-networks-covariates.R b/util-networks-covariates.R index ed9c2ea2..1e476277 100644 --- a/util-networks-covariates.R +++ b/util-networks-covariates.R @@ -142,6 +142,9 @@ add.vertex.attribute = function(net.to.range.list, attr.name, default.value, com } #' Utility function to add a vertex attribute from commit data to a commit network. +#' Attribute name should be a column name of the commit data dataframe. +#' Default column names can be seen in 'COMMITS.LIST.COLUMNS' in 'util-read.R', +#' though more might be possible. #' #' @param network the commit network #' @param project.data the project data from which to extract the values diff --git a/util-networks.R b/util-networks.R index cfaece00..d1c49325 100644 --- a/util-networks.R +++ b/util-networks.R @@ -1462,8 +1462,7 @@ construct.edges.temporal.order = function(set, network.conf, edge.attributes, ke ## mark current item as processed if (network.type == "commit") { - vertices.processed.set = data.frame(commit = c(vertices.processed.set[["commit"]], item.vertex[["commit"]]), - date = c(vertices.processed.set[["date"]], item.vertex[["date"]])) + vertices.processed.set = rbind(vertices.processed.set, item.vertex) } else { vertices.processed.set = c(vertices.processed.set, item.vertex) } From 94207542c5407382078dfa54ae82bc4f25ccbdb9 Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Wed, 17 Jul 2024 16:48:29 +0200 Subject: [PATCH 074/130] Add missing edge attributes attribute 'date' added to cochange commit network edges, attribute artifact.type added to all networks based on commit interactions Signed-off-by: Leo Sendelbach --- tests/test-networks-artifact.R | 2 ++ tests/test-networks-author.R | 1 + tests/test-networks-commit.R | 63 +++++++++++++++++++--------------- util-networks.R | 12 +++++-- 4 files changed, 48 insertions(+), 30 deletions(-) diff --git a/tests/test-networks-artifact.R b/tests/test-networks-artifact.R index 432840fc..1d847b54 100644 --- a/tests/test-networks-artifact.R +++ b/tests/test-networks-artifact.R @@ -252,6 +252,7 @@ patrick::with_parameters_test_that("Network construction with commit-interaction "test3.c::test_function", "test2.c::test2"), base.author = c("Olaf", "Thomas", "Karl", "Thomas"), interacting.author = c("Thomas", "Karl", "Olaf", "Thomas"), + artifact.type = c("File", "File", "File", "File"), weight = c(1, 1, 1, 1), type = c(TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA), relation = c("commit.interaction", "commit.interaction", "commit.interaction", "commit.interaction") @@ -301,6 +302,7 @@ patrick::with_parameters_test_that("Network construction with commit-interaction base.file = c("test2.c", "test2.c", "test3.c", "test2.c"), base.author = c("Olaf", "Thomas", "Karl", "Thomas"), interacting.author = c("Thomas", "Karl", "Olaf", "Thomas"), + artifact.type = c("Function", "Function", "Function", "Function"), weight = c(1, 1, 1, 1), type = c(TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA), relation = c("commit.interaction", "commit.interaction", "commit.interaction", "commit.interaction") diff --git a/tests/test-networks-author.R b/tests/test-networks-author.R index 2910ba51..d343a0c5 100644 --- a/tests/test-networks-author.R +++ b/tests/test-networks-author.R @@ -720,6 +720,7 @@ patrick::with_parameters_test_that("Network construction with commit-interaction base.func = c("test2.c::test2", "test2.c::test2", "test3.c::test_function", "test2.c::test2"), base.file = c("test2.c", "test2.c", "test3.c", "test2.c"), + artifact.type = c("CommitInteraction", "CommitInteraction", "CommitInteraction", "CommitInteraction"), weight = c(1, 1, 1, 1), type = c(TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA), relation = c("commit.interaction", "commit.interaction", "commit.interaction", "commit.interaction") diff --git a/tests/test-networks-commit.R b/tests/test-networks-commit.R index fa3f70c4..8ddb87db 100644 --- a/tests/test-networks-commit.R +++ b/tests/test-networks-commit.R @@ -76,6 +76,7 @@ patrick::with_parameters_test_that("Network construction with commit-interaction base.func = c("test2.c::test2", "test2.c::test2", "test3.c::test_function", "test2.c::test2"), base.file = c("test2.c", "test2.c", "test3.c", "test2.c"), + artifact.type = c("CommitInteraction", "CommitInteraction", "CommitInteraction", "CommitInteraction"), weight = c(1, 1, 1, 1), type = c(TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA), relation = c("commit.interaction", "commit.interaction", "commit.interaction", "commit.interaction") @@ -105,17 +106,18 @@ patrick::with_parameters_test_that("Network construction with cochange as relati "3a0ed78458b3976243db6829f63eba3eead26774", "0a1a5c523d835459c42f33e863623138555e2526", "1143db502761379c2bfcecc2007fc34282e7ee61"), - date = c("2016-07-12 15:58:59", - "2016-07-12 16:00:45", - "2016-07-12 16:05:41", - "2016-07-12 16:06:32", - "2016-07-12 16:06:10"), + date = get.date.from.string(c("2016-07-12 15:58:59", + "2016-07-12 16:00:45", + "2016-07-12 16:05:41", + "2016-07-12 16:06:32", + "2016-07-12 16:06:10")), kind = TYPE.COMMIT, type = TYPE.COMMIT ) edges = data.frame( from = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "3a0ed78458b3976243db6829f63eba3eead26774"), to = c("5a5ec9675e98187e1e92561e1888aa6f04faa338", "0a1a5c523d835459c42f33e863623138555e2526"), + date = get.date.from.string(c("2016-07-12 16:00:45", "2016-07-12 16:06:32")), artifact.type = c("File", "File"), artifact = c("test.c", "test2.c"), weight = c(1, 1), @@ -124,7 +126,7 @@ patrick::with_parameters_test_that("Network construction with cochange as relati ) if (test.directed) { - edges <- edges[, c(2, 1, 3, 4, 5, 6, 7), ] + edges <- edges[, c(2, 1, 3, 4, 5, 6, 7, 8), ] } network = igraph::graph.data.frame(edges, directed = test.directed, vertices = vertices) @@ -153,11 +155,11 @@ patrick::with_parameters_test_that("Network construction with cochange as relati "3a0ed78458b3976243db6829f63eba3eead26774", "0a1a5c523d835459c42f33e863623138555e2526", "1143db502761379c2bfcecc2007fc34282e7ee61"), - date = c("2016-07-12 15:58:59", - "2016-07-12 16:00:45", - "2016-07-12 16:05:41", - "2016-07-12 16:06:32", - "2016-07-12 16:06:10"), + date = get.date.from.string(c("2016-07-12 15:58:59", + "2016-07-12 16:00:45", + "2016-07-12 16:05:41", + "2016-07-12 16:06:32", + "2016-07-12 16:06:10")), kind = TYPE.COMMIT, type = TYPE.COMMIT ) @@ -168,6 +170,8 @@ patrick::with_parameters_test_that("Network construction with cochange as relati to = c("5a5ec9675e98187e1e92561e1888aa6f04faa338", "3a0ed78458b3976243db6829f63eba3eead26774", "3a0ed78458b3976243db6829f63eba3eead26774", "0a1a5c523d835459c42f33e863623138555e2526", "0a1a5c523d835459c42f33e863623138555e2526", "0a1a5c523d835459c42f33e863623138555e2526"), + date = get.date.from.string(c("2016-07-12 16:00:45", "2016-07-12 16:05:41", "2016-07-12 16:05:41", + "2016-07-12 16:06:32", "2016-07-12 16:06:32", "2016-07-12 16:06:32")), artifact.type = c("Function", "Function", "Function", "Function", "Function", "Function"), artifact = c("File_Level", "File_Level", "File_Level", "File_Level", "File_Level", "File_Level"), weight = c(1, 1, 1, 1, 1, 1), @@ -177,7 +181,7 @@ patrick::with_parameters_test_that("Network construction with cochange as relati ) if (test.directed) { - edges <- edges[, c(2, 1, 3, 4, 5, 6, 7), ] + edges <- edges[, c(2, 1, 3, 4, 5, 6, 7, 8), ] } network = igraph::graph.data.frame(edges, directed = test.directed, vertices = vertices) @@ -206,11 +210,11 @@ patrick::with_parameters_test_that("Network construction with cochange as relati "3a0ed78458b3976243db6829f63eba3eead26774", "1143db502761379c2bfcecc2007fc34282e7ee61", "0a1a5c523d835459c42f33e863623138555e2526"), - date = c("2016-07-12 15:58:59", - "2016-07-12 16:00:45", - "2016-07-12 16:05:41", - "2016-07-12 16:06:10", - "2016-07-12 16:06:32"), + date = get.date.from.string(c("2016-07-12 15:58:59", + "2016-07-12 16:00:45", + "2016-07-12 16:05:41", + "2016-07-12 16:06:10", + "2016-07-12 16:06:32")), kind = TYPE.COMMIT, type = TYPE.COMMIT ) @@ -219,6 +223,7 @@ patrick::with_parameters_test_that("Network construction with cochange as relati "3a0ed78458b3976243db6829f63eba3eead26774", "1143db502761379c2bfcecc2007fc34282e7ee61"), to = c("5a5ec9675e98187e1e92561e1888aa6f04faa338", "1143db502761379c2bfcecc2007fc34282e7ee61", "0a1a5c523d835459c42f33e863623138555e2526", "0a1a5c523d835459c42f33e863623138555e2526"), + date = get.date.from.string(c("2016-07-12 16:00:45", "2016-07-12 16:06:10", "2016-07-12 16:06:32", "2016-07-12 16:06:32")), artifact.type = c("Feature", "Feature", "Feature", "Feature"), artifact = c("A", "Base_Feature", "Base_Feature", "Base_Feature"), weight = c(1, 1, 1, 1), @@ -227,7 +232,7 @@ patrick::with_parameters_test_that("Network construction with cochange as relati ) if (test.directed) { - edges <- edges[, c(2, 1, 3, 4, 5, 6, 7), ] + edges <- edges[, c(2, 1, 3, 4, 5, 6, 7, 8), ] } network = igraph::graph.data.frame(edges, directed = test.directed, vertices = vertices) @@ -257,11 +262,11 @@ test_that("Adding vertex attributes to a commit network", { "3a0ed78458b3976243db6829f63eba3eead26774", "1143db502761379c2bfcecc2007fc34282e7ee61", "0a1a5c523d835459c42f33e863623138555e2526"), - date = c("2016-07-12 15:58:59", - "2016-07-12 16:00:45", - "2016-07-12 16:05:41", - "2016-07-12 16:06:10", - "2016-07-12 16:06:32"), + date = get.date.from.string(c("2016-07-12 15:58:59", + "2016-07-12 16:00:45", + "2016-07-12 16:05:41", + "2016-07-12 16:06:10", + "2016-07-12 16:06:32")), kind = TYPE.COMMIT, type = TYPE.COMMIT, author.name = c("Björn", @@ -275,6 +280,7 @@ test_that("Adding vertex attributes to a commit network", { "3a0ed78458b3976243db6829f63eba3eead26774", "1143db502761379c2bfcecc2007fc34282e7ee61"), to = c("5a5ec9675e98187e1e92561e1888aa6f04faa338", "1143db502761379c2bfcecc2007fc34282e7ee61", "0a1a5c523d835459c42f33e863623138555e2526", "0a1a5c523d835459c42f33e863623138555e2526"), + date = get.date.from.string(c("2016-07-12 16:00:45", "2016-07-12 16:06:10", "2016-07-12 16:06:32", "2016-07-12 16:06:32")), artifact.type = c("Feature", "Feature", "Feature", "Feature"), artifact = c("A", "Base_Feature", "Base_Feature", "Base_Feature"), weight = c(1, 1, 1, 1), @@ -295,11 +301,11 @@ test_that("Adding vertex attributes to a commit network", { "3a0ed78458b3976243db6829f63eba3eead26774", "1143db502761379c2bfcecc2007fc34282e7ee61", "0a1a5c523d835459c42f33e863623138555e2526"), - date = c("2016-07-12 15:58:59", - "2016-07-12 16:00:45", - "2016-07-12 16:05:41", - "2016-07-12 16:06:10", - "2016-07-12 16:06:32"), + date = get.date.from.string(c("2016-07-12 15:58:59", + "2016-07-12 16:00:45", + "2016-07-12 16:05:41", + "2016-07-12 16:06:10", + "2016-07-12 16:06:32")), kind = TYPE.COMMIT, type = TYPE.COMMIT, author.name = c("Björn", @@ -315,6 +321,7 @@ test_that("Adding vertex attributes to a commit network", { "3a0ed78458b3976243db6829f63eba3eead26774", "1143db502761379c2bfcecc2007fc34282e7ee61"), to = c("5a5ec9675e98187e1e92561e1888aa6f04faa338", "1143db502761379c2bfcecc2007fc34282e7ee61", "0a1a5c523d835459c42f33e863623138555e2526", "0a1a5c523d835459c42f33e863623138555e2526"), + date = get.date.from.string(c("2016-07-12 16:00:45", "2016-07-12 16:06:10", "2016-07-12 16:06:32", "2016-07-12 16:06:32")), artifact.type = c("Feature", "Feature", "Feature", "Feature"), artifact = c("A", "Base_Feature", "Base_Feature", "Base_Feature"), weight = c(1, 1, 1, 1), diff --git a/util-networks.R b/util-networks.R index d1c49325..16d7f064 100644 --- a/util-networks.R +++ b/util-networks.R @@ -248,6 +248,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", colnames(edges)[1] = "to" colnames(edges)[2] = "from" colnames(edges)[4] = "hash" + edges = cbind(edges, data.frame(artifact.type = c("CommitInteraction"))) author.net.data = list(vertices = vertices, edges = edges) ## construct the network author.net = construct.network.from.edge.list( @@ -401,6 +402,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", edges = edges[, c("file", "base.file", "func", "commit.hash", "base.hash", "base.func", "base.author", "interacting.author")] + edges = cbind(edges, data.frame(artifact.type = c("File"))) colnames(edges)[colnames(edges) == "commit.hash"] = "hash" } else if (proj.conf.artifact == "function") { ## change the vertices to the functions from the commit-interaction data @@ -410,6 +412,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", edges = edges[, c("func", "base.func", "commit.hash", "file", "base.hash", "base.file", "base.author", "interacting.author")] + edges = cbind(edges, data.frame(artifact.type = c("Function"))) colnames(edges)[colnames(edges) == "commit.hash"] = "hash" } else { ## If neither 'function' nor 'file' was configured, send a warning @@ -705,6 +708,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", ## set the commits as the 'to' and 'from' of the network and order the dataframe edges = edges[, c("base.hash", "commit.hash", "func", "interacting.author", "file", "base.author", "base.func", "base.file")] + edges = cbind(edges, data.frame(artifact.type = c("CommitInteraction"))) colnames(edges)[1] = "to" colnames(edges)[2] = "from" commit.net.data = list(vertices = vertices, edges = edges) @@ -1360,7 +1364,7 @@ construct.edge.list.from.key.value.list = function(list, network.conf, directed ## if edges in a commit network contain 'date', 'hash' or 'file' attributes, remove them ## as they belong to commits, which are the vertices in commit networks if (network.type == "commit") { - cols.which = which(edge.attributes %in% c("date", "hash", "file")) + cols.which = which(edge.attributes %in% c("hash", "file")) edge.attributes <- edge.attributes[-cols.which] } @@ -1391,7 +1395,7 @@ construct.edge.list.from.key.value.list = function(list, network.conf, directed return(list( vertices = data.frame( name = unique(vertices.processed), - date = unique(vertices.dates.processed) + date = get.date.from.string(unique(vertices.dates.processed)) ), edges = edge.list )) @@ -1429,6 +1433,10 @@ construct.edges.temporal.order = function(set, network.conf, edge.attributes, ke return(NULL) } + if (network.type == "commit") { + set = set[order(set[["date"]]), ] + } + ## queue of already processed artifacts edge.list.set = data.frame() vertices.processed.set = c() From 860d4ee551255e3d0ff393133d0ae8ad7ff5e749 Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Thu, 25 Jul 2024 13:16:46 +0200 Subject: [PATCH 075/130] Add minor non-functional fixes to adress review Added linebreaks, fixed spelling, removed cbind Signed-off-by: Leo Sendelbach --- util-networks-covariates.R | 7 +++---- util-networks.R | 26 ++++++++++++++------------ 2 files changed, 17 insertions(+), 16 deletions(-) diff --git a/util-networks-covariates.R b/util-networks-covariates.R index 1e476277..5709126a 100644 --- a/util-networks-covariates.R +++ b/util-networks-covariates.R @@ -152,7 +152,7 @@ add.vertex.attribute = function(net.to.range.list, attr.name, default.value, com #' @param default.value the dafault value of the attribute #' if it does not occur in the commit data #' -#' @return a networks with new vertex attribute +#' @return a network with new vertex attribute add.vertex.attribute.commit.network = function(network, project.data, attr.name, default.value) { # get the commit data and extract the required data @@ -160,9 +160,8 @@ add.vertex.attribute.commit.network = function(network, project.data, hashes = commit.data[["hash"]] attribute = commit.data[[attr.name]] attribute.values = c() - for (hash.num in seq_along(igraph::V(network))) { - # for each vertex, finc the position in the data frame - hash = igraph::V(network)[hash.num]$name + for (hash in igraph::V(network)$name) { + # for each vertex, find the position in the data frame hash.index = match(hash, hashes, nomatch = NA) value = c() diff --git a/util-networks.R b/util-networks.R index 16d7f064..352794ad 100644 --- a/util-networks.R +++ b/util-networks.R @@ -248,7 +248,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", colnames(edges)[1] = "to" colnames(edges)[2] = "from" colnames(edges)[4] = "hash" - edges = cbind(edges, data.frame(artifact.type = c("CommitInteraction"))) + edges[["artifact.type"]] = "CommitInteraction" author.net.data = list(vertices = vertices, edges = edges) ## construct the network author.net = construct.network.from.edge.list( @@ -402,7 +402,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", edges = edges[, c("file", "base.file", "func", "commit.hash", "base.hash", "base.func", "base.author", "interacting.author")] - edges = cbind(edges, data.frame(artifact.type = c("File"))) + edges[["artifact.type"]] = "File" colnames(edges)[colnames(edges) == "commit.hash"] = "hash" } else if (proj.conf.artifact == "function") { ## change the vertices to the functions from the commit-interaction data @@ -412,7 +412,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", edges = edges[, c("func", "base.func", "commit.hash", "file", "base.hash", "base.file", "base.author", "interacting.author")] - edges = cbind(edges, data.frame(artifact.type = c("Function"))) + edges[["artifact.type"]] = "Function" colnames(edges)[colnames(edges) == "commit.hash"] = "hash" } else { ## If neither 'function' nor 'file' was configured, send a warning @@ -698,7 +698,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", return(private$commit.network.commit.interaction) } - ## get the authors that appear in the commit-interaction data as the vertices of the network + ## get the hashes that appear in the commit-interaction data as the vertices of the network vertices = unique(c(private$proj.data$get.commit.interactions()[["base.hash"]], private$proj.data$get.commit.interactions()[["commit.hash"]])) vertices = data.frame(name = vertices) @@ -708,7 +708,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", ## set the commits as the 'to' and 'from' of the network and order the dataframe edges = edges[, c("base.hash", "commit.hash", "func", "interacting.author", "file", "base.author", "base.func", "base.file")] - edges = cbind(edges, data.frame(artifact.type = c("CommitInteraction"))) + edges[["artifact.type"]] = "CommitInteraction" colnames(edges)[1] = "to" colnames(edges)[2] = "from" commit.net.data = list(vertices = vertices, edges = edges) @@ -1391,7 +1391,8 @@ construct.edge.list.from.key.value.list = function(list, network.conf, directed logging::logdebug("construct.edge.list.from.key.value.list: finished.") if (network.type == "commit") { - vertices.dates.processed = unlist( parallel::mclapply(edge.list.data, function(data) attr(data, "vertices.dates.processed")) ) + vertices.dates.processed = unlist( parallel::mclapply(edge.list.data, + function(data) attr(data, "vertices.dates.processed")) ) return(list( vertices = data.frame( name = unique(vertices.processed), @@ -1412,10 +1413,10 @@ construct.edge.list.from.key.value.list = function(list, network.conf, directed #' Constructs edge list from the given key value list respecting temporal order. #' Helper method which is called by 'construct.edge.list.by.key.value.list'. #' -#' @param list the given key value list +#' @param set the given key value list #' @param network.conf the network configuration #' @param edge.attributes the attributes that should be on the edges of the network -#' @param keys the keays of the key value list +#' @param keys the keys of the key value list #' @param keys.number the amount of keys in the key value list #' @param network.type the type of network that should be created #' @@ -1458,12 +1459,13 @@ construct.edges.temporal.order = function(set, network.conf, edge.attributes, ke ## construct edges combinations = c() if (network.type == "commit") { - combinations = expand.grid(item.vertex[["commit"]], vertices.processed.set[["commit"]], stringsAsFactors = FALSE) + combinations = expand.grid(item.vertex[["commit"]], + vertices.processed.set[["commit"]], stringsAsFactors = FALSE) } else { combinations = expand.grid(item.vertex, vertices.processed.set, stringsAsFactors = FALSE) } - if (nrow(combinations) > 0 & nrow(item.edge.attrs) == 1) { + if (nrow(combinations) > 0 && nrow(item.edge.attrs) == 1) { combinations = cbind(combinations, item.edge.attrs, row.names = NULL) # add edge attributes } edge.list.set = rbind(edge.list.set, combinations) # add to edge list @@ -1492,10 +1494,10 @@ construct.edges.temporal.order = function(set, network.conf, edge.attributes, ke #' Constructs edge list from the given key value list not respecting temporal order. #' Helper method which is called by 'construct.edge.list.by.key.value.list'. #' -#' @param list the given key value list +#' @param set the given key value list #' @param network.conf the network configuration #' @param edge.attributes the attributes that should be on the edges of the network -#' @param keys the keays of the key value list +#' @param keys the keys of the key value list #' @param keys.number the amount of keys in the key value list #' #' @return the data for the edge list From 849123a8b7d898fbb1343745ecffc1f6000c9367 Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Thu, 25 Jul 2024 13:24:05 +0200 Subject: [PATCH 076/130] Add missing 'artifact.type' to networks Networks based on commit interaction data now correctly have an edge attribute called 'artifact.type'. Value of column 'artifact.type' in commit interaction data is 'CommitInteraction' until potentially overwritten in artifact network construction Signed-off-by: Leo Sendelbach --- util-read.R | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/util-read.R b/util-read.R index f4fe7025..ecf60c27 100644 --- a/util-read.R +++ b/util-read.R @@ -863,14 +863,15 @@ create.empty.pasta.list = function() { COMMIT.INTERACTION.LIST.COLUMNS = c( "func", "commit.hash", "file", "base.hash", "base.func", "base.file", - "base.author", "interacting.author" + "base.author", "interacting.author", + "artifact.type" ) ## declare the datatype for each column in the constant 'COMMIT.INTERACTION.LIST.COLUMNS' COMMIT.INTERACTION.LIST.DATA.TYPES = c( "character", "character", "character", "character", "character", "character", - "character", "character" + "character", "character", "character" ) COMMIT.INTERACTION.GLOBAL.FILE.FUNCTION.NAME = "GLOBAL" @@ -952,6 +953,7 @@ read.commit.interactions = function(data.path = NULL) { ## Author data will be merged from commit data in \code{update.commit.interactions}. interactions["base.author"] = NA_character_ interactions["interacting.author"] = NA_character_ + interactions["artifact.type"] = "CommitInteraction" return(interactions) }))) From 3fb7437b68950303916b62984fa449732c70353e Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Thu, 25 Jul 2024 14:22:03 +0200 Subject: [PATCH 077/130] Fix endless recursion problem Add check for calling function in the beginning of 'update.commit.interactions'. Also contains minor fixes to adress PR comments and updates tests to reflect changes made in previous commit. Signed-off-by: Leo Sendelbach --- README.md | 7 +++ showcase.R | 4 +- tests/test-data.R | 10 +++-- tests/test-networks-commit.R | 3 ++ tests/test-read.R | 14 +++--- util-data.R | 12 ++--- util-networks-covariates.R | 5 +-- util-networks.R | 86 +++++++++++++++++++++--------------- 8 files changed, 84 insertions(+), 57 deletions(-) diff --git a/README.md b/README.md index 86b2671c..58b2c82e 100644 --- a/README.md +++ b/README.md @@ -234,6 +234,11 @@ There are four types of networks that can be built using this library: author ne * The vertices in an artifact network denote any kind of artifact, e.g., source-code artifact (such as features or files) or communication artifact (such as mail threads or issues). All artifact-type vertices are uniquely identifiable by their name. There are only unipartite edges among artifacts in this type of network. * The relations (i.e., the edges' meaning and source) can be configured using the [`NetworkConf`](#networkconf) attribute `artifact.relation`. The relation also describes which kinds of artifacts are represented as vertices in the network. (For example, if "mail" is selected as `artifact.relation`, only mail-thread vertices are included in the network.) +- Commit networks + * The vertices in a commit network denote any commits in the data. All vertices + are uniquely identifyable by the hash of the commit. There are only unipartite edges among commits in this type of network. + * The relations (i.e., the edges meaning and source) can be configured using the [`networkConf`](#networkconf) attribute `commit.relation`. The relation also describes the type of data used for network construction (`cochange` uses commit data, `commit.interaction` uses commit interaction data). + - Bipartite networks * The vertices in a bipartite network denote both authors and artifacts. There are only bipartite edges from authors to artifacts in this type of network. * The relations (i.e., the edges' meaning and source) can be configured using the [`NetworkConf`](#networkconf) attribute `artifact.relation`. @@ -249,6 +254,7 @@ Relations determine which information is used to construct edges among the verti - `cochange` * For author networks (configured via `author.relation` in the [`NetworkConf`](#networkconf)), authors who change the same source-code artifact are connected with an edge. * For artifact networks (configured via `artifact.relation` in the [`NetworkConf`](#networkconf)), source-code artifacts that are concurrently changed in the same commit are connected with an edge. + * For commit networks (configured vie `commit.relation` in the [`NetworkConf`](#networkconf)), commits are connected if they change the same artifact. * For bipartite networks (configured via `artifact.relation` in the [`NetworkConf`](#networkconf)), authors get linked to all source-code artifacts they have changed in their respective commits. - `mail` @@ -269,6 +275,7 @@ Relations determine which information is used to construct edges among the verti - `commit.interaction` * For author networks (configured via `author.relation` in the [`NetworkConf`](#networkconf)), authors who contribute to interacting commits are connected with an edge. * For artifact networks (configured via `artifact.relation` in the [`NetworkConf`](#networkconf)), artifacts are connected when there is an interaction between two commits that occur in the artifacts. + * For commit networks (configured via `commit.relation` in the [`NetworkConf`](#networkconf)), commits are connected when they interact in the commit interaction data. * This relation does not apply for bipartite networks. #### Edge-construction algorithms for author networks diff --git a/showcase.R b/showcase.R index 3d2aece7..4cb95d4a 100644 --- a/showcase.R +++ b/showcase.R @@ -239,8 +239,8 @@ sample.pull.requests = add.vertex.attribute.author.issue.count(my.networks, x.da ## add vertex attributes for the project-level network x.net.as.list = list("1970-01-01 00:00:00-2030-01-01 00:00:00" = x$get.author.network()) sample.entire = add.vertex.attribute.author.commit.count(x.net.as.list, x.data, aggregation.level = "complete") -## add vertex attributes to commit network -add.vertex.attribute.commit.network(x$get.commit.network(), x.data, "author.name", "NO_AUTHOR") +## add vertex attributes to commit network. Default value 'NO_AUTHOR' is used if vertex is not in commit data +add.vertex.attribute.commit.network(x$get.commit.network(), x.data, attr.name = "author.name", default.value = "NO_AUTHOR") ## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / diff --git a/tests/test-data.R b/tests/test-data.R index 88ce0e42..c983946d 100644 --- a/tests/test-data.R +++ b/tests/test-data.R @@ -564,15 +564,15 @@ test_that("Compare two ProjectData Objects with commit.interactions", { proj.data.two$set.commits(create.empty.commits.list()) ## create empty data frame of correct size - commit.interactions.data.expected = data.frame(matrix(nrow = 4, ncol = 8)) + commit.interactions.data.expected = data.frame(matrix(nrow = 4, ncol = 9)) ## assure that the correct type is used - for(i in seq_len(8)) { + for(i in seq_len(9)) { commit.interactions.data.expected[[i]] = as.character(commit.interactions.data.expected[[i]]) } ## set everything except for authors as expected colnames(commit.interactions.data.expected) = c("commit.hash", "base.hash", "func", "file", - "base.func", "base.file", "base.author", - "interacting.author") + "base.func", "base.file","artifact.type", + "base.author", "interacting.author") commit.interactions.data.expected[["commit.hash"]] = c("0a1a5c523d835459c42f33e863623138555e2526", "418d1dc4929ad1df251d2aeb833dd45757b04a6f", @@ -588,6 +588,8 @@ test_that("Compare two ProjectData Objects with commit.interactions", { commit.interactions.data.expected[["base.func"]] = c("test2.c::test2", "test2.c::test2", "test3.c::test_function", "test2.c::test2") commit.interactions.data.expected[["base.file"]] = c("test2.c", "test2.c", "test3.c", "test2.c") + commit.interactions.data.expected[["artifact.type"]] = c("CommitInteraction", "CommitInteraction", + "CommitInteraction", "CommitInteraction") expect_equal(proj.data.two$get.commit.interactions(), commit.interactions.data.expected) diff --git a/tests/test-networks-commit.R b/tests/test-networks-commit.R index 8ddb87db..7de34eed 100644 --- a/tests/test-networks-commit.R +++ b/tests/test-networks-commit.R @@ -83,6 +83,9 @@ patrick::with_parameters_test_that("Network construction with commit-interaction ) network = igraph::graph.data.frame(edges, directed = test.directed, vertices = vertices) expect_true(igraph::identical_graphs(network.built, network)) + + network.new.attr = add.vertex.attribute.commit.network(network.built, proj.data, "deleted.lines", "NO_DATA") + expect_identical(igraph::V(network.new.attr)$deleted.lines, c("0", "0","0", "NO_DATA", "0", "NO_DATA")) }, patrick::cases( "directed: FALSE" = list(test.directed = FALSE), "directed: TRUE" = list(test.directed = TRUE) diff --git a/tests/test-read.R b/tests/test-read.R index c617e091..f01d16c1 100644 --- a/tests/test-read.R +++ b/tests/test-read.R @@ -505,15 +505,15 @@ test_that("Read the commit-interactions data.", { commit.interactions.data.read = read.commit.interactions(proj.conf$get.value("datapath")) ## build the expected data.frame - commit.interactions.data.expected = data.frame(matrix(nrow = 4, ncol = 8)) + commit.interactions.data.expected = data.frame(matrix(nrow = 4, ncol = 9)) ## assure that the correct type is used - for(i in seq_len(8)) { + for(i in seq_len(ncol(commit.interactions.data.expected))) { commit.interactions.data.expected[[i]] = as.character(commit.interactions.data.expected[[i]]) } ## set everything except for authors as expected colnames(commit.interactions.data.expected) = c("func", "commit.hash", "file", "base.hash", "base.func", "base.file", "base.author", - "interacting.author") + "interacting.author", "artifact.type") commit.interactions.data.expected[["commit.hash"]] = c("5a5ec9675e98187e1e92561e1888aa6f04faa338", "0a1a5c523d835459c42f33e863623138555e2526", @@ -529,6 +529,8 @@ test_that("Read the commit-interactions data.", { commit.interactions.data.expected[["base.func"]] = c("test3.c::test_function", "test2.c::test2", "test2.c::test2", "test2.c::test2") commit.interactions.data.expected[["base.file"]] = c("test3.c", "test2.c", "test2.c", "test2.c") + commit.interactions.data.expected[["artifact.type"]] = c("CommitInteraction", "CommitInteraction", + "CommitInteraction", "CommitInteraction") ## check the results expect_identical(commit.interactions.data.read, commit.interactions.data.expected, info = "commit interaction data.") @@ -543,11 +545,11 @@ test_that("Read the empty commit-interactions data.", { commit.interactions.data.read = read.commit.interactions("./codeface-data/results/testing/ test_empty_proximity/proximity") ## build the expected data.frame - commit.interactions.data.expected = data.frame(matrix(nrow = 0, ncol = 8)) + commit.interactions.data.expected = data.frame(matrix(nrow = 0, ncol = 9)) colnames(commit.interactions.data.expected) = c("func", "commit.hash", "file", "base.hash", "base.func", "base.file", - "base.author", "interacting.author") - for(i in seq_len(8)) { + "base.author", "interacting.author", "artifact.type") + for(i in seq_len(ncol(commit.interactions.data.expected))) { commit.interactions.data.expected[[i]] = as.character(commit.interactions.data.expected[[i]]) } ## check the results diff --git a/util-data.R b/util-data.R index 8d68765f..7f2a971a 100644 --- a/util-data.R +++ b/util-data.R @@ -415,7 +415,10 @@ ProjectData = R6::R6Class("ProjectData", #' #' This method should be called whenever the field \code{commit.interactions} is changed. update.commit.interactions = function() { - if (self$is.data.source.cached("commit.interactions")) { + stacktrace = get.stacktrace(sys.calls()) + caller = get.second.last.element(stacktrace) + if (self$is.data.source.cached("commit.interactions") && + (is.na(caller)|| paste(caller, collapse = " ") != "self$set.commits(commit.data)")) { if (!self$is.data.source.cached("commits.unfiltered")) { self$get.commits() } @@ -2143,8 +2146,6 @@ ProjectData = R6::R6Class("ProjectData", return(mylist) }, - ## * * processed data ---------------------------------------------- - #' Group the commits of the given \code{data.source} by the given \code{group.column}. #' For each group, the column \code{"hash"} is duplicated and prepended to each #' group's data as first column (see below for details). @@ -2162,12 +2163,11 @@ ProjectData = R6::R6Class("ProjectData", #' as first column (with name \code{"data.vertices"}) #' #' @seealso ProjectData$group.data.by.column - group.commits.by.data.column = function(data.source = c("commits", "mails", "issues"), - group.column = "artifact") { + group.commits.by.data.column = function(group.column = "artifact") { logging::loginfo("Grouping commits by data column.") ## store the commits per group that is determined by 'group.column' - mylist = self$group.data.by.column(data.source, group.column, "hash") + mylist = self$group.data.by.column("commits", group.column, "hash") return(mylist) }, diff --git a/util-networks-covariates.R b/util-networks-covariates.R index 5709126a..700b5e9f 100644 --- a/util-networks-covariates.R +++ b/util-networks-covariates.R @@ -149,8 +149,8 @@ add.vertex.attribute = function(net.to.range.list, attr.name, default.value, com #' @param network the commit network #' @param project.data the project data from which to extract the values #' @param attr.name the name of the attribute -#' @param default.value the dafault value of the attribute -#' if it does not occur in the commit data +#' @param default.value the default value that is used if the current hash +#' is not contained in the commit data at all #' #' @return a network with new vertex attribute add.vertex.attribute.commit.network = function(network, project.data, @@ -174,7 +174,6 @@ add.vertex.attribute.commit.network = function(network, project.data, attribute.values = c(attribute.values, value) } net.with.attr = igraph::set.vertex.attribute(network, attr.name, value = attribute.values) - } diff --git a/util-networks.R b/util-networks.R index 352794ad..dd27f36f 100644 --- a/util-networks.R +++ b/util-networks.R @@ -123,8 +123,8 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", artifacts.network.callgraph = NULL, # igraph artifacts.network.mail = NULL, # igraph artifacts.network.issue = NULL, # igraph - commit.network.commit.interaction = NULL, #igraph - commit.network.cochange = NULL, #igraph + commits.network.commit.interaction = NULL, #igraph + commits.network.cochange = NULL, #igraph ## * * relation-to-vertex-kind mapping ----------------------------- @@ -248,7 +248,9 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", colnames(edges)[1] = "to" colnames(edges)[2] = "from" colnames(edges)[4] = "hash" - edges[["artifact.type"]] = "CommitInteraction" + if (nrow(edges) > 0) { + edges[["artifact.type"]] = "CommitInteraction" + } author.net.data = list(vertices = vertices, edges = edges) ## construct the network author.net = construct.network.from.edge.list( @@ -402,7 +404,9 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", edges = edges[, c("file", "base.file", "func", "commit.hash", "base.hash", "base.func", "base.author", "interacting.author")] - edges[["artifact.type"]] = "File" + if (nrow(edges) > 0) { + edges[["artifact.type"]] = ARTIFACT.CODEFACE[[proj.conf.artifact]] + } colnames(edges)[colnames(edges) == "commit.hash"] = "hash" } else if (proj.conf.artifact == "function") { ## change the vertices to the functions from the commit-interaction data @@ -412,7 +416,9 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", edges = edges[, c("func", "base.func", "commit.hash", "file", "base.hash", "base.file", "base.author", "interacting.author")] - edges[["artifact.type"]] = "Function" + if (nrow(edges) > 0) { + edges[["artifact.type"]] = ARTIFACT.CODEFACE[[proj.conf.artifact]] + } colnames(edges)[colnames(edges) == "commit.hash"] = "hash" } else { ## If neither 'function' nor 'file' was configured, send a warning @@ -693,9 +699,9 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", logging::logdebug("get.commit.network.commit.interaction: starting.") ## do not compute anything more than once - if (!is.null(private$commit.network.commit.interaction)) { + if (!is.null(private$commits.network.commit.interaction)) { logging::logdebug("get.commit.network.commit.interaction: finished. (already existing)") - return(private$commit.network.commit.interaction) + return(private$commits.network.commit.interaction) } ## get the hashes that appear in the commit-interaction data as the vertices of the network @@ -708,7 +714,9 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", ## set the commits as the 'to' and 'from' of the network and order the dataframe edges = edges[, c("base.hash", "commit.hash", "func", "interacting.author", "file", "base.author", "base.func", "base.file")] - edges[["artifact.type"]] = "CommitInteraction" + if (nrow(edges) > 0) { + edges[["artifact.type"]] = "CommitInteraction" + } colnames(edges)[1] = "to" colnames(edges)[2] = "from" commit.net.data = list(vertices = vertices, edges = edges) @@ -722,13 +730,13 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", get.data.columns.for.data.source("commit.interactions") ) - private$commit.network.commit.interaction = commit.net + private$commits.network.commit.interaction = commit.net logging::logdebug("get.commit.network.commit.interaction: finished.") return(commit.net) }, - #' Get the co-change-based commit network, + #' Get the cochange-based commit network, #' If it does not already exist build it first. #' #' @return the commit network with cochange realtion @@ -737,13 +745,13 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", logging::logdebug("get.commit.network.cochange: starting.") ## do not compute anything more than once - if (!is.null(private$commit.network.cochange)) { + if (!is.null(private$commits.network.cochange)) { logging::logdebug("get.commit.network.cochange: finished. (already existing)") - return(private$commit.network.cochange) + return(private$commits.network.cochange) } ## construct edge list based on commit--artifact data - commit.net.data.raw = private$proj.data$group.commits.by.data.column("commits", "artifact") + commit.net.data.raw = private$proj.data$group.commits.by.data.column("artifact") commit.net.data = construct.edge.list.from.key.value.list( commit.net.data.raw, @@ -763,7 +771,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", ) ## store network - private$commit.network.cochange = commit.net + private$commits.network.cochange = commit.net logging::logdebug("get.commit.network.cochange: finished.") return(commit.net) @@ -843,8 +851,8 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", private$artifacts.network.cochange = NULL private$artifacts.network.issue = NULL private$artifacts.network.mail = NULL - private$commit.network.commit.interaction = NULL - private$commit.network.cochange = NULL + private$commits.network.commit.interaction = NULL + private$commits.network.cochange = NULL private$proj.data = private$proj.data.original if (private$network.conf$get.value("unify.date.ranges")) { private$cut.data.to.same.timestamps() @@ -1192,7 +1200,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", "bipartite.net" = bipartite.net, "authors.net" = authors.net, "artifacts.net" = artifacts.net, - "commit.net" = commit.net + "commits.net" = commit.net )) }, @@ -1322,7 +1330,8 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", #' i.e., whether to only add edges from the later event to the previous one. #' If \code{NA} is passed, the default value is taken. #' [default: directed] -#' @param network.type the type of network for which the key value data is to be used as edges [default: "author"] +#' @param network.type the type of network for which the key value data is to be used as edges +#' (one out of "author", "artifact", or "commit")[default: "author"] #' #' @return a list of two data.frames named 'vertices' and 'edges' (compatible with return value #' of \code{igraph::as.data.frame}) @@ -1361,11 +1370,11 @@ construct.edge.list.from.key.value.list = function(list, network.conf, directed } } - ## if edges in a commit network contain 'date', 'hash' or 'file' attributes, remove them + ## if edges in a commit network contain 'hash' or 'file' attributes, remove them ## as they belong to commits, which are the vertices in commit networks if (network.type == "commit") { cols.which = which(edge.attributes %in% c("hash", "file")) - edge.attributes <- edge.attributes[-cols.which] + edge.attributes = edge.attributes[-cols.which] } if (respect.temporal.order) { @@ -1375,7 +1384,9 @@ construct.edge.list.from.key.value.list = function(list, network.conf, directed edge.attributes, keys, keys.number, network.type) edge.list = plyr::rbind.fill(edge.list.data) - vertices.processed = unlist( parallel::mclapply(edge.list.data, function(data) attr(data, "vertices.processed")) ) + vertices.processed = unlist(parallel::mclapply(edge.list.data, function(data) { + return(attr(data, "vertices.processed")) + })) } else { @@ -1384,28 +1395,31 @@ construct.edge.list.from.key.value.list = function(list, network.conf, directed edge.attributes, keys, keys.number) edge.list = plyr::rbind.fill(edge.list.data) - vertices.processed = unlist( parallel::mclapply(edge.list.data, function(data) attr(data, "vertices.processed")) ) + vertices.processed = unlist(parallel::mclapply(edge.list.data, function(data) { + return(attr(data, "vertices.processed")) + })) } logging::logdebug("construct.edge.list.from.key.value.list: finished.") if (network.type == "commit") { - vertices.dates.processed = unlist( parallel::mclapply(edge.list.data, - function(data) attr(data, "vertices.dates.processed")) ) + vertices.dates.processed = unlist(parallel::mclapply(edge.list.data, function(data) { + return (attr(data, "vertices.dates.processed")) + })) return(list( - vertices = data.frame( - name = unique(vertices.processed), - date = get.date.from.string(unique(vertices.dates.processed)) - ), - edges = edge.list + vertices = data.frame( + name = unique(vertices.processed), + date = get.date.from.string(unique(vertices.dates.processed)) + ), + edges = edge.list )) } else { return(list( - vertices = data.frame( - name = unique(vertices.processed) - ), - edges = edge.list + vertices = data.frame( + name = unique(vertices.processed) + ), + edges = edge.list )) } } @@ -1504,13 +1518,13 @@ construct.edges.temporal.order = function(set, network.conf, edge.attributes, ke construct.edges.no.temporal.order = function(set, network.conf, edge.attributes, keys, keys.number) { number.edges = sum(table(set[["data.vertices"]]) * (dim(table(set[["data.vertices"]])) - 1)) logging::logdebug("[%s/%s] Constructing edges for %s '%s': starting (%s edges to construct).", - match(attr(set, "group.name"), keys), keys.number, - attr(set, "group.type"), attr(set, "group.name"), number.edges) + match(attr(set, "group.name"), keys), keys.number, + attr(set, "group.type"), attr(set, "group.name"), number.edges) ## Skip artifacts with many, many edges if (number.edges > network.conf$get.value("skip.threshold")) { logging::logwarn("Skipping edges for %s '%s' due to amount (> %s).", - attr(set, "group.type"), attr(set, "group.name"), network.conf$get.value("skip.threshold")) + attr(set, "group.type"), attr(set, "group.name"), network.conf$get.value("skip.threshold")) return(NULL) } From 170bc66eb779d7cf2ab504db7c3f4ec483103838 Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Wed, 7 Aug 2024 15:30:30 +0200 Subject: [PATCH 078/130] Update News.md and minor fix Include this PR's changelog in the NEWS.md Add constant for commit interaction artifact type Move check for avoiding infinite recursion to the correct position and add commentary Signed-off-by: Leo Sendelbach --- NEWS.md | 4 ++++ README.md | 4 ++-- util-conf.R | 2 ++ util-data.R | 12 +++++++----- util-networks.R | 4 ++-- 5 files changed, 17 insertions(+), 9 deletions(-) diff --git a/NEWS.md b/NEWS.md index 3d093756..1047a961 100644 --- a/NEWS.md +++ b/NEWS.md @@ -12,6 +12,7 @@ - Add line-based code coverage reports into CI pipeline. Coverage reports are generated by `coverage.R` (PR #262, 10cac49d005e87c3964cc61711e7f5acef749626, b3b9f4ac7a9911bd00293c68fac88e0f9033bdfb, c815d18dc6266d620a7a145493417b87ac08679e, e8093525fdaf46e54f2f7fcc6358ca7892e795e5, 32d04823e2007c63d2a43ce59bea3057327c19a7) - Add the possibility to split data time-based by multiple data sources (PR #261, 1088395f46b84028c8d7c463ca86b5dc38500c26, e1f79fc9e40cd6f41c946be42db364b2101cfe10, 0bb187fec0fd801d7634bf8d5180525770f6ab0b, 371a97ac6ebf3de4fe9360dea79d62e2ed3ef585) - Add tests for uncovered functionality in `util-misc.R` and `util-networks.R` (PR #264, ff30f3238b1bf2539280d0d055a5d925c197c271, af80551d0615a49b86e45ff596bd75941ee88f91) +- Add commit network as a new type of network. It uses commits as vertices and connects them either via cochange or commit interactions. This includes adding new config parameters and a function for adding vertex attributes to a commit network(PR #263, ab73271781e8e9a0715f784936df4b371d64c338, ab73271781e8e9a0715f784936df4b371d64c338, cd9a930fcb54ff465c2a5a7c43cfe82ac15c134d) ### Changed/Improved @@ -19,10 +20,13 @@ - Replace deprecated `igraph` functions by their preferred alternatives (PR #264, 0df9d5bf6bafbb5d440f4c47db4ec901cf11f037) - Deprecate support for R version 3.6 (PR #264, c8e6f45111e487fadbe7f0a13c7595eb23f3af6e, fb3f5474259d4a88f4ff545691cca9d1ccde90e3) - Explicitly add R version 4.4 to the CI test pipeline (c8e6f45111e487fadbe7f0a13c7595eb23f3af6e) +- Refactor function `construct.edge.list.from.key.value.list` to be more readable(PR #263, 05c3bc09cb1d396fd59c34a88030cdca58fd04dd) ### Fixed - Fix the creation of edgelists for issue-based artifact-networks by correctly iterating over the issue data (PR #264, 321d85043112971c04998249c14a0677a32c9004) +- Fix networks based upon commit interaction data to also have the attribute `artifact.type`(PR #263, 849123a8b7d898fbb1343745ecffc1f6000c9367) +- Fix endless recursion that could occur when commit interaction data was configured and commit data is empty (PR #263, 3fb7437b68950303916b62984fa449732c70353e) ## 4.4 diff --git a/README.md b/README.md index 58b2c82e..804b376c 100644 --- a/README.md +++ b/README.md @@ -630,7 +630,7 @@ Updates to the parameters can be done by calling `NetworkConf$update.variables(. - `author.relation` * The relation(s) among authors, encoded as edges in an author network * **Note**: The author--artifact relation in bipartite and multi networks is configured by `artifact.relation`! - * possible values: [*`"mail"`*, `"cochange"`, `"issue"`] + * possible values: [*`"mail"`*, `"cochange"`, `"issue"`, `commit.interaction`] - `author.directed` * The directedness of edges in an author network * [`TRUE`, *`FALSE`*] @@ -649,7 +649,7 @@ Updates to the parameters can be done by calling `NetworkConf$update.variables(. - `artifact.relation` * The relation(s) among artifacts, encoded as edges in an artifact network * **Note**: Additionally, this relation configures also the author--artifact relation in bipartite and multi networks! - * possible values: [*`"cochange"`*, `"callgraph"`, `"mail"`, `"issue"`] + * possible values: [*`"cochange"`*, `"callgraph"`, `"mail"`, `"issue"`, `commit.interaction`] - `artifact.directed` * The directedness of edges in an artifact network * **Note**: This parameter does only affect the `issue` relation, as the `cochange` relation is always undirected, while the `callgraph` relation is always directed. For the `mail`, we currently do not have data available to exhibit edge information. diff --git a/util-conf.R b/util-conf.R index 35e5303e..85aec34a 100644 --- a/util-conf.R +++ b/util-conf.R @@ -63,6 +63,8 @@ ARTIFACT.CODEFACE = list( "file" = "File" ) +ARTIFACT.COMMIT.INTERACTION = "CommitInteraction" + ## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / ## Conf -------------------------------------------------------------------- diff --git a/util-data.R b/util-data.R index 7f2a971a..90c01ca4 100644 --- a/util-data.R +++ b/util-data.R @@ -415,11 +415,13 @@ ProjectData = R6::R6Class("ProjectData", #' #' This method should be called whenever the field \code{commit.interactions} is changed. update.commit.interactions = function() { - stacktrace = get.stacktrace(sys.calls()) - caller = get.second.last.element(stacktrace) - if (self$is.data.source.cached("commit.interactions") && - (is.na(caller)|| paste(caller, collapse = " ") != "self$set.commits(commit.data)")) { - if (!self$is.data.source.cached("commits.unfiltered")) { + if (self$is.data.source.cached("commit.interactions")) { + ## check if caller was 'set.commits'. If so, or if commits are already filtered, + ## do not get the commits again. + stacktrace = get.stacktrace(sys.calls()) + caller = get.second.last.element(stacktrace) + if (!self$is.data.source.cached("commits.unfiltered") && + (is.na(caller) || paste(caller, collapse = " ") != "self$set.commits(commit.data)")) { self$get.commits() } diff --git a/util-networks.R b/util-networks.R index dd27f36f..da1b1da6 100644 --- a/util-networks.R +++ b/util-networks.R @@ -249,7 +249,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", colnames(edges)[2] = "from" colnames(edges)[4] = "hash" if (nrow(edges) > 0) { - edges[["artifact.type"]] = "CommitInteraction" + edges[["artifact.type"]] = ARTIFACT.COMMIT.INTERACTION } author.net.data = list(vertices = vertices, edges = edges) ## construct the network @@ -715,7 +715,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", edges = edges[, c("base.hash", "commit.hash", "func", "interacting.author", "file", "base.author", "base.func", "base.file")] if (nrow(edges) > 0) { - edges[["artifact.type"]] = "CommitInteraction" + edges[["artifact.type"]] = ARTIFACT.COMMIT.INTERACTION } colnames(edges)[1] = "to" colnames(edges)[2] = "from" From 5842073b8d97622bdae87907c7de7bc370cc2bbb Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Thu, 8 Aug 2024 11:18:47 +0200 Subject: [PATCH 079/130] Update `README.md` and `NEWS.md` Minor changes in response to reviews. Also added a use for constant `ARTIFACT.COMMIT.INTERACTION` that was previously overlooked. Signed-off-by: Leo Sendelbach --- NEWS.md | 8 +++----- README.md | 14 ++++++++++---- util-read.R | 2 +- 3 files changed, 14 insertions(+), 10 deletions(-) diff --git a/NEWS.md b/NEWS.md index 1047a961..dddf0ac9 100644 --- a/NEWS.md +++ b/NEWS.md @@ -6,13 +6,13 @@ ### Added -- Add commit-interaction data and add functions `read.commit.interactions` for reading, as well as `get.commit.interactions`, `set.commit.interactions` and utility functions for working with commit-interaction data (PR #252, d82857fbebd1111bb16588a4223bb24a8dcd07de, b4fd2a29c9b5fd561b1106c6febb54a32b0085ab, fd0aa05f824b93545ae8e05833b95b3bd9809286, bca35760eb0aac86c04923f2d534b2d8cece204e) as well as tests for these features (PR #252, eeba7e29932bc973513c963fb9e716e9230d570f, 8bb39f4df39b49dfaff8f19feb6db5e5fbd81fac, 54b6f655248720436af116fe72521f9cb0348429, 7a5497aaf9114017d1b3b9b68b6cccd7ca8ac114, 7b8585f87675795822c07230192d6454de31dcc7, ef725407bf8818c8fff96ea6f343338b7162cbe0) +- Add commit-interaction data and add functions `read.commit.interactions` for reading, as well as `get.commit.interactions`, `set.commit.interactions` and utility functions for working with commit-interaction data (PR #252, d82857fbebd1111bb16588a4223bb24a8dcd07de, b4fd2a29c9b5fd561b1106c6febb54a32b0085ab, fd0aa05f824b93545ae8e05833b95b3bd9809286, bca35760eb0aac86c04923f2d534b2d8cece204e, PR #263, 849123a8b7d898fbb1343745ecffc1f6000c9367, 3fb7437b68950303916b62984fa449732c70353e, 170bc66eb779d7cf2ab504db7c3f4ec483103838) as well as tests for these features (PR #252, eeba7e29932bc973513c963fb9e716e9230d570f, 8bb39f4df39b49dfaff8f19feb6db5e5fbd81fac, 54b6f655248720436af116fe72521f9cb0348429, 7a5497aaf9114017d1b3b9b68b6cccd7ca8ac114, 7b8585f87675795822c07230192d6454de31dcc7, ef725407bf8818c8fff96ea6f343338b7162cbe0,) - Add commit-interaction networks that can be created with `create.author.network` and `create.artifact.network` if the `artifact.relation` and `author.relation` is configured to be `commit.interaction` (PR #252, d82857fbebd1111bb16588a4223bb24a8dcd07de, 329d97ec3de36a9e1bcadc0c7a53c1d92e8b481c) as well as tests for these features (PR #252, 07e7ed744209b0251217fa8f7f35d9b9875face2, 7068cfa10d993dcae3f5e3f76f8cafa99fa8b350) - Add helper function for prefixing function names with file names in `util-read.R` (PR #252, f8ea987b138173cf0509c7910e0572d8ee1b3f1f) - Add line-based code coverage reports into CI pipeline. Coverage reports are generated by `coverage.R` (PR #262, 10cac49d005e87c3964cc61711e7f5acef749626, b3b9f4ac7a9911bd00293c68fac88e0f9033bdfb, c815d18dc6266d620a7a145493417b87ac08679e, e8093525fdaf46e54f2f7fcc6358ca7892e795e5, 32d04823e2007c63d2a43ce59bea3057327c19a7) - Add the possibility to split data time-based by multiple data sources (PR #261, 1088395f46b84028c8d7c463ca86b5dc38500c26, e1f79fc9e40cd6f41c946be42db364b2101cfe10, 0bb187fec0fd801d7634bf8d5180525770f6ab0b, 371a97ac6ebf3de4fe9360dea79d62e2ed3ef585) - Add tests for uncovered functionality in `util-misc.R` and `util-networks.R` (PR #264, ff30f3238b1bf2539280d0d055a5d925c197c271, af80551d0615a49b86e45ff596bd75941ee88f91) -- Add commit network as a new type of network. It uses commits as vertices and connects them either via cochange or commit interactions. This includes adding new config parameters and a function for adding vertex attributes to a commit network(PR #263, ab73271781e8e9a0715f784936df4b371d64c338, ab73271781e8e9a0715f784936df4b371d64c338, cd9a930fcb54ff465c2a5a7c43cfe82ac15c134d) +- Add commit network as a new type of network. It uses commits as vertices and connects them either via cochange or commit interactions. This includes adding new config parameters and the function `add.vertex.attribute.commit.network` for adding vertex attributes to a commit network (PR #263, ab73271781e8e9a0715f784936df4b371d64c338, ab73271781e8e9a0715f784936df4b371d64c338, cd9a930fcb54ff465c2a5a7c43cfe82ac15c134d) ### Changed/Improved @@ -20,13 +20,11 @@ - Replace deprecated `igraph` functions by their preferred alternatives (PR #264, 0df9d5bf6bafbb5d440f4c47db4ec901cf11f037) - Deprecate support for R version 3.6 (PR #264, c8e6f45111e487fadbe7f0a13c7595eb23f3af6e, fb3f5474259d4a88f4ff545691cca9d1ccde90e3) - Explicitly add R version 4.4 to the CI test pipeline (c8e6f45111e487fadbe7f0a13c7595eb23f3af6e) -- Refactor function `construct.edge.list.from.key.value.list` to be more readable(PR #263, 05c3bc09cb1d396fd59c34a88030cdca58fd04dd) +- Refactor function `construct.edge.list.from.key.value.list` to be more readable (PR #263, 05c3bc09cb1d396fd59c34a88030cdca58fd04dd) ### Fixed - Fix the creation of edgelists for issue-based artifact-networks by correctly iterating over the issue data (PR #264, 321d85043112971c04998249c14a0677a32c9004) -- Fix networks based upon commit interaction data to also have the attribute `artifact.type`(PR #263, 849123a8b7d898fbb1343745ecffc1f6000c9367) -- Fix endless recursion that could occur when commit interaction data was configured and commit data is empty (PR #263, 3fb7437b68950303916b62984fa449732c70353e) ## 4.4 diff --git a/README.md b/README.md index 804b376c..dc2cba45 100644 --- a/README.md +++ b/README.md @@ -237,7 +237,7 @@ There are four types of networks that can be built using this library: author ne - Commit networks * The vertices in a commit network denote any commits in the data. All vertices are uniquely identifyable by the hash of the commit. There are only unipartite edges among commits in this type of network. - * The relations (i.e., the edges meaning and source) can be configured using the [`networkConf`](#networkconf) attribute `commit.relation`. The relation also describes the type of data used for network construction (`cochange` uses commit data, `commit.interaction` uses commit interaction data). + * The relations (i.e., the edges' meaning and source) can be configured using the [`networkConf`](#networkconf) attribute `commit.relation`. The relation also describes the type of data used for network construction (`cochange` uses commit data, `commit.interaction` uses commit interaction data). - Bipartite networks * The vertices in a bipartite network denote both authors and artifacts. There are only bipartite edges from authors to artifacts in this type of network. @@ -275,7 +275,7 @@ Relations determine which information is used to construct edges among the verti - `commit.interaction` * For author networks (configured via `author.relation` in the [`NetworkConf`](#networkconf)), authors who contribute to interacting commits are connected with an edge. * For artifact networks (configured via `artifact.relation` in the [`NetworkConf`](#networkconf)), artifacts are connected when there is an interaction between two commits that occur in the artifacts. - * For commit networks (configured via `commit.relation` in the [`NetworkConf`](#networkconf)), commits are connected when they interact in the commit interaction data. + * For commit networks (configured via `commit.relation` in the [`NetworkConf`](#networkconf)), commits are connected when they interact in the commit-interaction data. * This relation does not apply for bipartite networks. #### Edge-construction algorithms for author networks @@ -630,7 +630,7 @@ Updates to the parameters can be done by calling `NetworkConf$update.variables(. - `author.relation` * The relation(s) among authors, encoded as edges in an author network * **Note**: The author--artifact relation in bipartite and multi networks is configured by `artifact.relation`! - * possible values: [*`"mail"`*, `"cochange"`, `"issue"`, `commit.interaction`] + * possible values: [*`"mail"`*, `"cochange"`, `"issue"`, `"commit.interaction"`] - `author.directed` * The directedness of edges in an author network * [`TRUE`, *`FALSE`*] @@ -649,11 +649,17 @@ Updates to the parameters can be done by calling `NetworkConf$update.variables(. - `artifact.relation` * The relation(s) among artifacts, encoded as edges in an artifact network * **Note**: Additionally, this relation configures also the author--artifact relation in bipartite and multi networks! - * possible values: [*`"cochange"`*, `"callgraph"`, `"mail"`, `"issue"`, `commit.interaction`] + * possible values: [*`"cochange"`*, `"callgraph"`, `"mail"`, `"issue"`, `"commit.interaction"`] - `artifact.directed` * The directedness of edges in an artifact network * **Note**: This parameter does only affect the `issue` relation, as the `cochange` relation is always undirected, while the `callgraph` relation is always directed. For the `mail`, we currently do not have data available to exhibit edge information. * [`TRUE`, *`FALSE`*] +- `commit.relation` + * The relation(s) among commits, encoded as edges in a commit network + * possible values: [*`"cochange"`*, `"commit.interaction"`] +- `commit.directed` + * The directedness of edges in a commit network + * [`TRUE`, *`FALSE`*] - `edge.attributes` * The list of edge-attribute names and information * a subset of the following as a single vector: diff --git a/util-read.R b/util-read.R index ecf60c27..06c082e5 100644 --- a/util-read.R +++ b/util-read.R @@ -953,7 +953,7 @@ read.commit.interactions = function(data.path = NULL) { ## Author data will be merged from commit data in \code{update.commit.interactions}. interactions["base.author"] = NA_character_ interactions["interacting.author"] = NA_character_ - interactions["artifact.type"] = "CommitInteraction" + interactions["artifact.type"] = ARTIFACT.COMMIT.INTERACTION return(interactions) }))) From 7ac840d287a862eff61b1a84e194a4cba399f9e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maximilian=20L=C3=B6ffler?= Date: Tue, 27 Aug 2024 16:53:12 +0200 Subject: [PATCH 080/130] Replace deprecated igraph functions by their newer equivalent MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Works towards #260. Signed-off-by: Maximilian Löffler --- util-plot.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/util-plot.R b/util-plot.R index 10428567..92e16e21 100644 --- a/util-plot.R +++ b/util-plot.R @@ -125,13 +125,13 @@ plot.get.plot.for.network = function(network, labels = TRUE) { names(PLOT.VERTEX.TYPES) = c(TYPE.AUTHOR, TYPE.ARTIFACT) ## remove loops because of weird behavior when plotting - network = igraph::delete_edges(network, igraph::E(network)[igraph::is.loop(network)]) + network = igraph::delete_edges(network, igraph::E(network)[igraph::which_loop(network)]) ## fix the type attributes (add new ones, also named) network = plot.fix.type.attributes(network) ## set igraph network layout if no layout is set yet - if (!("layout" %in% igraph::list.graph.attributes(network))) { + if (!("layout" %in% igraph::graph_attr_names(network))) { network = igraph::set_graph_attr(network, "layout", "kk") } layout.algorithm = igraph::graph_attr(network, "layout") From 74c4dd2dc5c0ea1cb1ae81b026c2a321eea2f90d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maximilian=20L=C3=B6ffler?= Date: Tue, 27 Aug 2024 17:04:45 +0200 Subject: [PATCH 081/130] Remove elements that occur before the first bin when splitting by bins MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This change is in-line with the current treatment of the elements that occur after the last bin. Further, this fixes a minor "bug" where bin indecees are used to determine bin labels by indexing into a list of labels. Elements before the first bin received bin index 0, however, R is 1-indexed. Note: This "bug" did not cause problems because the bin with the incorrect label will be ignored later anyways. Works towards #267. Signed-off-by: Maximilian Löffler --- util-split.R | 3 +++ 1 file changed, 3 insertions(+) diff --git a/util-split.R b/util-split.R index 0d2b62f9..0f15ad13 100644 --- a/util-split.R +++ b/util-split.R @@ -1016,6 +1016,9 @@ split.data.by.time.or.bins = function(project.data, splitting.length, bins, spli findInterval(df[["date"]], bins.date, all.inside = FALSE) ## split data according to df.bins df.split = split(df, df.bins) + ## remove events that occur before the first bin. + ## these events are put in a bin with the name "0" by 'findInterval' + df.split = df.split[names(df.split) != "0"] ## add proper labels/names names(df.split) = sapply(as.integer(names(df.split)), function(bin) bins[bin]) return(df.split) From d9a4be417b340812b744f59398ba6460ba527e1c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maximilian=20L=C3=B6ffler?= Date: Tue, 27 Aug 2024 17:08:04 +0200 Subject: [PATCH 082/130] Add 'remove.duplicate.edges' function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a function that takes a network as input and simply removes all edges that are exact duplicates of each other. Works towards #138. Signed-off-by: Maximilian Löffler --- util-networks.R | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/util-networks.R b/util-networks.R index da1b1da6..1afad546 100644 --- a/util-networks.R +++ b/util-networks.R @@ -1993,6 +1993,26 @@ delete.isolates = function(network) { return(network.no.isolates) } +#' Remove duplicate edges from the given network. +#' +#' Keep exactly one edge from all equivalence classes of edges over identity. +#' This function retains all set network, vertex, and edge attributes. +#' +#' @param network the given network +#' +#' @return the simplified network +remove.duplicate.edges = function(network) { + + logging::logdebug("remove.duplicate.edges: starting.") + + ## remove all duplicates + edges = igraph::as_data_frame(network, "edges") + network = igraph::delete_edges(network, which(duplicated(edges))) + + logging::logdebug("remove.duplicate.edges: finished.") + return(network) +} + ## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / ## Multi-network views ----------------------------------------------------- From 0c2f47c4fea6f5f2f582c0259f8cf23af985058a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maximilian=20L=C3=B6ffler?= Date: Wed, 28 Aug 2024 12:14:08 +0200 Subject: [PATCH 083/130] Add test for 'remove.duplicate.edges' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Works towards #138. Signed-off-by: Maximilian Löffler --- tests/test-networks.R | 52 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/tests/test-networks.R b/tests/test-networks.R index e77cecef..feee53e9 100644 --- a/tests/test-networks.R +++ b/tests/test-networks.R @@ -317,6 +317,58 @@ test_that("Remove isolated authors given a specific edge type", { }) +test_that("Remove duplicate edges", { + + ## + ## Remove duplicate edges from a network + ## + + ## configurations + proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, ARTIFACT) + net.conf = NetworkConf$new() + net.conf$update.values(list("author.respect.temporal.order" = TRUE, author.directed = FALSE)) + proj.data = ProjectData$new(project.conf = proj.conf) + network.builder = NetworkBuilder$new(project.data = proj.data, network.conf = net.conf) + network.builder$update.network.conf(updated.values = list(author.relation = "mail")) + + ## construct data for expected network + edges = data.frame(comb.1. = c("Björn", "Olaf", rep("Hans", 5)), + comb.2. = c("Olaf", "Thomas", rep("Hans", 5)), + date = get.date.from.string(c("2016-07-12 15:58:50", "2016-07-12 16:05:37", + "2010-07-12 12:05:41", "2010-07-12 12:05:42", + "2010-07-12 12:05:43", "2010-07-12 12:05:44", + "2010-07-12 12:05:45")), + artifact.type = rep("Mail", 7), + message.id = c("<6784529b0802032245r5164f984l342f0f0dc94aa420@mail.gmail.com>", + "<9b06e8d20801220234h659c18a3g95c12ac38248c7e0@mail.gmail.com>", + "", "", + "", "", + ""), + thread = c("", "", rep("", 5)), + weight = rep(1, 7), + type = rep(TYPE.EDGES.INTRA, 7), + relation = rep("mail", 7)) + vertices = data.frame(name = c("Björn", "udo", "Olaf", "Thomas", "Fritz fritz@example.org", "georg", "Hans"), + kind = rep("Author", 7), + type = rep("Author", 7)) + + ## build expected network + network.expected = igraph::graph_from_data_frame(edges, directed = FALSE, vertices = vertices) + + ## build network with unique edges + network = network.builder$get.author.network() + network.built = remove.duplicate.edges(network) + + assert.networks.equal(network.expected, network.built) + + ## + ## Attempt to remove non-existent duplicate edges should not change anything + ## + + assert.networks.equal(network.built, remove.duplicate.edges(network.built)) + +}) + ## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / ## Merge ------------------------------------------------------------------- From a135f6bb6f83ccb03ae27c735c2700fccc1ee0c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maximilian=20L=C3=B6ffler?= Date: Tue, 3 Sep 2024 18:08:28 +0200 Subject: [PATCH 084/130] Add parameter to 'construct.ranges' to construct cumulative ranges MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Works towards #265. Signed-off-by: Maximilian Löffler --- util-misc.R | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/util-misc.R b/util-misc.R index 03f07420..65e64f94 100644 --- a/util-misc.R +++ b/util-misc.R @@ -522,12 +522,13 @@ get.time.period.by.amount = function(start.date, end.date, amount) { #' @param revs the revisions #' @param sliding.window whether sliding window splitting is enabled or not #' [default: FALSE] +#' @param cumulative whether to construct cumulative ranges [default: FALSE] #' @param raw whether to return pairs of POSIXct objects or strings rather than #' formatted strings [default: FALSE] #' #' @return the constructed ranges, either formatted or raw; the raw ranges are a named list, #' for which the formatted ranges are the names -construct.ranges = function(revs, sliding.window = FALSE, raw = FALSE) { +construct.ranges = function(revs, sliding.window = FALSE, cumulative = FALSE, raw = FALSE) { ## make sure that, at least, two revisions are provided if (length(revs) < 2) { @@ -544,8 +545,14 @@ construct.ranges = function(revs, sliding.window = FALSE, raw = FALSE) { if (sliding.window) offset = 2 - ## extract sequences of revisions - seq1 = revs[ 1:(length(revs) - offset) ] + ## extract start of ranges + if (cumulative) { + seq1 = rep(revs[1], length(revs) - offset) + } else { + seq1 = revs[ 1:(length(revs) - offset) ] + } + + ## extract end of ranges if ((offset + 1) <= length(revs)) { seq2 = revs[ (offset + 1):length(revs) ] } else { From 8ec207f1e306ef6a641fb0205a9982fa89c7e0d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maximilian=20L=C3=B6ffler?= Date: Tue, 3 Sep 2024 18:09:18 +0200 Subject: [PATCH 085/130] Add tests for 'construct.ranges' for regular and cumulative ranges MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Works towards #265. Signed-off-by: Maximilian Löffler --- tests/test-misc.R | 72 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) diff --git a/tests/test-misc.R b/tests/test-misc.R index 0a2f4d0b..47be5454 100644 --- a/tests/test-misc.R +++ b/tests/test-misc.R @@ -378,6 +378,44 @@ test_that("Generate a date sequence.", { ## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / ## Range construction and handling ----------------------------------------- +## +## Construct ranges from revisions. +## + +test_that("Construct ranges from revisions", { + + revisions = c("2016-12-07 15:30:02", "2016-11-09 09:37:45", "2016-19-10 15:59:25", "2016-07-12 20:14:45") + + ## wihtout sliding windows + expected.ranges = c(paste0(revisions[1], "-", revisions[2]), + paste0(revisions[2], "-", revisions[3]), + paste0(revisions[3], "-", revisions[4])) + ranges = construct.ranges(revs = revisions, sliding.window = FALSE) + expect_identical(ranges, expected.ranges, info = "No sliding windows.") + + ## raw ranges (without sliding windows) + expected.ranges.raw = list(c(revisions[1], revisions[2]), + c(revisions[2], revisions[3]), + c(revisions[3], revisions[4])) + names(expected.ranges.raw) = expected.ranges + ranges = construct.ranges(revs = revisions, raw = TRUE) + expect_identical(ranges, expected.ranges.raw, info = "Raw ranges without sliding windows.") + + ## with sliding windows (combine each second revision) + expected.ranges = c(paste0(revisions[1], "-", revisions[3]), + paste0(revisions[2], "-", revisions[4])) + ranges = construct.ranges(revs = revisions, sliding.window = TRUE) + expect_identical(ranges, expected.ranges, info = "Sliding windows.") + + ## raw ranges (with sliding windows) + expected.ranges.raw = list(c(revisions[1], revisions[3]), + c(revisions[2], revisions[4])) + names(expected.ranges.raw) = expected.ranges + ranges = construct.ranges(revs = revisions, sliding.window = TRUE, raw = TRUE) + expect_identical(ranges, expected.ranges.raw, info = "Raw ranges with sliding windows.") + +}) + ## ## Construct consecutive and overlapping ranges. ## @@ -504,6 +542,40 @@ test_that("Construct consecutive and overlapping ranges.", { ## Construct cumulative ranges. ## +test_that("Construct cumulative ranges from revisions", { + + revisions = c("2016-12-07 15:30:02", "2016-11-09 09:37:45", "2016-19-10 15:59:25", "2016-07-12 20:14:45") + + ## wihtout sliding windows + expected.ranges = c(paste0(revisions[1], "-", revisions[2]), + paste0(revisions[1], "-", revisions[3]), + paste0(revisions[1], "-", revisions[4])) + ranges = construct.ranges(revs = revisions, sliding.window = FALSE, cumulative = TRUE) + expect_identical(ranges, expected.ranges, info = "No sliding windows.") + + ## raw ranges (with sliding windows) + expected.ranges.raw = list(c(revisions[1], revisions[2]), + c(revisions[1], revisions[3]), + c(revisions[1], revisions[4])) + names(expected.ranges.raw) = expected.ranges + ranges = construct.ranges(revs = revisions, sliding.window = FALSE, cumulative = TRUE, raw = TRUE) + expect_identical(ranges, expected.ranges.raw, info = "Raw cumulative ranges without sliding windows.") + + ## with sliding windows (combine each second revision) + expected.ranges = c(paste0(revisions[1], "-", revisions[3]), + paste0(revisions[1], "-", revisions[4])) + ranges = construct.ranges(revs = revisions, sliding.window = TRUE, cumulative = TRUE) + expect_identical(ranges, expected.ranges, info = "Sliding windows.") + + ## raw ranges (with sliding windows) + expected.ranges.raw = list(c(revisions[1], revisions[3]), + c(revisions[1], revisions[4])) + names(expected.ranges.raw) = expected.ranges + ranges = construct.ranges(revs = revisions, sliding.window = TRUE, cumulative = TRUE, raw = TRUE) + expect_identical(ranges, expected.ranges.raw, info = "Raw cumulative ranges with sliding windows.") + +}) + test_that("Construct cumulative ranges.", { start = ("2018-01-01 00:00:00") From c6e90dd9cb462232563f753f414da14a24b392a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maximilian=20L=C3=B6ffler?= Date: Thu, 12 Sep 2024 14:30:26 +0200 Subject: [PATCH 086/130] Add reference to 'remove.duplicate.edges' into 'README.md' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Maximilian Löffler --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index dc2cba45..8c40a447 100644 --- a/README.md +++ b/README.md @@ -278,7 +278,7 @@ Relations determine which information is used to construct edges among the verti * For commit networks (configured via `commit.relation` in the [`NetworkConf`](#networkconf)), commits are connected when they interact in the commit-interaction data. * This relation does not apply for bipartite networks. -#### Edge-construction algorithms for author networks +#### Edge-construction algorithms for networks When constructing author networks, we use events in time (i.e., commits, e-mails, issue events) to model interactions among authors on the same artifact as edges. Therefore, we group the events on artifacts, based on the configured relation (see the [previous section](#relations)). @@ -318,7 +318,7 @@ Based on the above raw data, we get the following author networks with relation -When constructing author networks with respecting the temporal order, there is one edge for each answer in a mail thread from the answer's author to the senders of every previous e-mail in this mail thread. Note that this can lead to duplicated edges if an author has sent several previous e-mails to the mail thread (see the duplicated edges `A –(3)– B` in the above example). This also leads to loop edges if an author of an answer has already sent an e-mail to this thread before (see the edge `A –(2)– A`). +When constructing author networks with respecting the temporal order, there is one edge for each answer in a mail thread from the answer's author to the senders of every previous e-mail in this mail thread. Note that this can lead to duplicated edges if an author has sent several previous e-mails to the mail thread (see the duplicated edges `A –(3)– B` in the above example). These may be conflated again using the `remove.duplicate.edges` function. Furthermore, respecting the temporal order also leads to loop edges if an author of an answer has already sent an e-mail to this thread before (see the edge `A –(2)– A`). If the temporal order is not respected, for each e-mail in a mail thread, there is an edge from the sender of the e-mail to every other author participating in this mail thread (regardless of in which order the e-mails were sent). In this case, no loop edges are contained in the network. However, it is possible that there are several edges (having different timestamps) between two authors (see the edges `A –(1)– B` and `A –(2)– B` in the example above). If directedness is configured, the edges are directed from the sender of an e-mail to the other authors. From 7c8b8f87dfc69aa88ce3ffdd2a136dde1f33e665 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maximilian=20L=C3=B6ffler?= Date: Thu, 12 Sep 2024 16:09:07 +0200 Subject: [PATCH 087/130] Update 'NEWS.md' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Maximilian Löffler --- NEWS.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index dddf0ac9..3756263b 100644 --- a/NEWS.md +++ b/NEWS.md @@ -13,11 +13,13 @@ - Add the possibility to split data time-based by multiple data sources (PR #261, 1088395f46b84028c8d7c463ca86b5dc38500c26, e1f79fc9e40cd6f41c946be42db364b2101cfe10, 0bb187fec0fd801d7634bf8d5180525770f6ab0b, 371a97ac6ebf3de4fe9360dea79d62e2ed3ef585) - Add tests for uncovered functionality in `util-misc.R` and `util-networks.R` (PR #264, ff30f3238b1bf2539280d0d055a5d925c197c271, af80551d0615a49b86e45ff596bd75941ee88f91) - Add commit network as a new type of network. It uses commits as vertices and connects them either via cochange or commit interactions. This includes adding new config parameters and the function `add.vertex.attribute.commit.network` for adding vertex attributes to a commit network (PR #263, ab73271781e8e9a0715f784936df4b371d64c338, ab73271781e8e9a0715f784936df4b371d64c338, cd9a930fcb54ff465c2a5a7c43cfe82ac15c134d) +- Add `remove.duplicate.edges` function that takes a network as input and conflates identical edges (PR #268, d9a4be417b340812b744f59398ba6460ba527e1c, 0c2f47c4fea6f5f2f582c0259f8cf23af985058a, c6e90dd9cb462232563f753f414da14a24b392a3) +- Add `cumulative` as an argument to `construct.ranges` which enables the creation of cumulative ranges from given revisions (PR #268, a135f6bb6f83ccb03ae27c735c2700fccc1ee0c8, 8ec207f1e306ef6a641fb0205a9982fa89c7e0d9) ### Changed/Improved - Change the default value for the `issues.from.source` configuration parameter. Instead of reading JIRA and GitHub issues together, which was the previous default, the new default value causes only GitHub issue data to be read. To restore the previous default behavior and read data from both issue sources, this now needs to be manually configured when needed. (PR #264, 5ff83c364f6bfc1e6ff95e9c5f1087e031c48a5d, 8c8080cb9caf115f19d9f145ad6e6c108b131a67, 8bcbc81db521877908d2e5c2989082ed672f2a3b) -- Replace deprecated `igraph` functions by their preferred alternatives (PR #264, 0df9d5bf6bafbb5d440f4c47db4ec901cf11f037) +- Replace deprecated `igraph` functions by their preferred alternatives (PR #264, PR #268, 0df9d5bf6bafbb5d440f4c47db4ec901cf11f037, 7ac840d287a862eff61b1a84e194a4cba399f9e5) - Deprecate support for R version 3.6 (PR #264, c8e6f45111e487fadbe7f0a13c7595eb23f3af6e, fb3f5474259d4a88f4ff545691cca9d1ccde90e3) - Explicitly add R version 4.4 to the CI test pipeline (c8e6f45111e487fadbe7f0a13c7595eb23f3af6e) - Refactor function `construct.edge.list.from.key.value.list` to be more readable (PR #263, 05c3bc09cb1d396fd59c34a88030cdca58fd04dd) From 10696e4cf4ae92371917ed8ccaec2b0183da145c Mon Sep 17 00:00:00 2001 From: Thomas Bock Date: Thu, 19 Sep 2024 18:20:52 +0200 Subject: [PATCH 088/130] Ensure correct data type of NA timestamps Up until now, `get.data.cut.to.same.date(data.sources = c("issues", "mails", "commits"))` failed if some of the first data source was empty, but not if the second one was empty. The reason was that `NA` values introduced by empty data sources at the beginning of the data frame turned the data frame into a data frame of numeric objects instead of POSIXct objects. If there were already POSIXct objects in the data frame, this did not happen. To prevent the timestamps to be interpreted as numeric values, make sure that the `NA` values are always POSIXct objects. This fixes #269. Signed-off-by: Thomas Bock --- util-data.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/util-data.R b/util-data.R index 90c01ca4..6edc899b 100644 --- a/util-data.R +++ b/util-data.R @@ -797,8 +797,8 @@ ProjectData = R6::R6Class("ProjectData", } ## NAs otherwise else { - source.date.min = NA - source.date.max = NA + source.date.min = as.POSIXct(NA) + source.date.max = as.POSIXct(NA) } ## remove old line if existing From 646c01a42ad8decfbc9040030e790e51cb65cffd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maximilian=20L=C3=B6ffler?= Date: Wed, 16 Oct 2024 16:33:33 +0200 Subject: [PATCH 089/130] Test 'get.data.cut.to.same.date' in presence of an empty data source MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This test fails without the previous fix by Thomas Bock but does not fail when the fix is in place. This works towards fixing #269. Signed-off-by: Maximilian Löffler --- tests/test-data-cut.R | 61 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/tests/test-data-cut.R b/tests/test-data-cut.R index d1f3ef2a..26b1bf08 100644 --- a/tests/test-data-cut.R +++ b/tests/test-data-cut.R @@ -18,6 +18,7 @@ ## Copyright 2018 by Thomas Bock ## Copyright 2020 by Thomas Bock ## Copyright 2018 by Jakob Kronawitter +## Copyright 2024 by Maximilian Löffler ## All Rights Reserved. @@ -82,3 +83,63 @@ test_that("Cut commit and mail data to same date range.", { expect_identical(mail.data, mail.data.expected, info = "Cut mail data.") }) + +test_that("Cut data to same date range with one empty data source.", { + + ## configurations + + proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, ARTIFACT) + + ## in order to properly test whether the data types of timestamps originating from empty data are correct, + ## ensure that the first provided data source contains empty data. This is important as R usually uses the + ## first entry of a data frame to determine the data type of it. However, the data type of the project timestamps + ## should be properly set regardless of the empty data. + proj.conf$update.value("issues.locked", TRUE) + data.sources = c("issues", "mails", "commits") + + ## construct objects + + x.data = ProjectData$new(proj.conf) + x.data$set.issues(NULL) + + commit.data.expected = data.frame(commit.id = sprintf("", c(32712, 32713)), + date = get.date.from.string(c("2016-07-12 15:58:59", "2016-07-12 16:00:45")), + author.name = c("Björn", "Olaf"), + author.email = c("bjoern@example.org", "olaf@example.org"), + committer.date = get.date.from.string(c("2016-07-12 15:58:59", "2016-07-20 10:00:44")), + committer.name = c("Björn", "Björn"), + committer.email = c("bjoern@example.org", "bjoern@example.org"), + hash = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "5a5ec9675e98187e1e92561e1888aa6f04faa338"), + changed.files = as.integer(c(1, 1)), + added.lines = as.integer(c(1, 1)), + deleted.lines = as.integer(c(1, 0)), + diff.size = as.integer(c(2, 1)), + file = c("test.c", "test.c"), + artifact = c("A", "A"), + artifact.type = c("Feature", "Feature"), + artifact.diff.size = as.integer(c(1, 1))) + + mail.data.expected = data.frame(author.name = c("Thomas", "Olaf"), + author.email = c("thomas@example.org", "olaf@example.org"), + message.id = c("<65a1sf31sagd684dfv31@mail.gmail.com>", "<9b06e8d20801220234h659c18a3g95c12ac38248c7e0@mail.gmail.com>"), + date = get.date.from.string(c("2016-07-12 16:04:40", "2016-07-12 16:05:37")), + date.offset = as.integer(c(100, 200)), + subject = c("Re: Fw: busybox 2 tab", "Re: Fw: busybox 10"), + thread = sprintf("", c("13#9", "13#9")), + artifact.type = c("Mail", "Mail")) + + issue.data.expected = create.empty.issues.list() + + commit.data = x.data$get.data.cut.to.same.date(data.sources = data.sources)$get.commits.unfiltered() + rownames(commit.data) = 1:nrow(commit.data) + + mail.data = x.data$get.data.cut.to.same.date(data.sources = data.sources)$get.mails() + rownames(mail.data) = 1:nrow(mail.data) + + issue.data = x.data$get.data.cut.to.same.date(data.sources = data.sources)$get.issues() + + expect_identical(commit.data, commit.data.expected, info = "Cut Raw commit data.") + expect_identical(mail.data, mail.data.expected, info = "Cut mail data.") + expect_identical(issue.data, issue.data.expected, info = "Cut issue data (empty).") + +}) From 7c525b9c4f18dd1eb0e961d4fb47c34adffcb804 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maximilian=20L=C3=B6ffler?= Date: Wed, 16 Oct 2024 16:52:08 +0200 Subject: [PATCH 090/130] Define 'PATCHSTACK.MAIL.DECAY.THRESHOLD' as Duration object MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Maximilian Löffler --- util-data.R | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/util-data.R b/util-data.R index 6edc899b..f137a928 100644 --- a/util-data.R +++ b/util-data.R @@ -25,7 +25,7 @@ ## Copyright 2021 by Johannes Hostert ## Copyright 2021 by Mirabdulla Yusifli ## Copyright 2022 by Jonathan Baumann -## Copyright 2022-2023 by Maximilian Löffler +## Copyright 2022-2024 by Maximilian Löffler ## Copyright 2024 by Leo Sendelbach ## All Rights Reserved. @@ -113,7 +113,7 @@ DATASOURCE.TO.ARTIFACT.COLUMN = list( ## the maximum time difference between subsequent mails of a patchstack -PATCHSTACK.MAIL.DECAY.THRESHOLD = "30 seconds" +PATCHSTACK.MAIL.DECAY.THRESHOLD = lubridate::as.duration("30 seconds") ## configuration parameters that do not reset the environment when changed CONF.PARAMETERS.NO.RESET.ENVIRONMENT = c("commit.messages", @@ -283,8 +283,7 @@ ProjectData = R6::R6Class("ProjectData", ## of 'PATCHSTACK.MAIL.DECAY.THRESHOLD' while (i < nrow(thread) && running) { if (thread[1, "author.name"] == thread[i + 1, "author.name"] && - thread[i + 1, "date"] - thread[i, "date"] <= - lubridate::as.duration(PATCHSTACK.MAIL.DECAY.THRESHOLD)) { + thread[i + 1, "date"] - thread[i, "date"] <= PATCHSTACK.MAIL.DECAY.THRESHOLD) { i = i + 1 } else { running = FALSE From 1d1fe7fc0d12c1a9a7c1c1d843a4cdb7e28d96a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maximilian=20L=C3=B6ffler?= Date: Fri, 18 Oct 2024 19:14:03 +0200 Subject: [PATCH 091/130] Update 'NEWS.md' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Maximilian Löffler --- NEWS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/NEWS.md b/NEWS.md index 3756263b..d4e457bc 100644 --- a/NEWS.md +++ b/NEWS.md @@ -27,6 +27,7 @@ ### Fixed - Fix the creation of edgelists for issue-based artifact-networks by correctly iterating over the issue data (PR #264, 321d85043112971c04998249c14a0677a32c9004) +- Fix a bug in `extract.timestamps` that occurs when the first `data.source` contains empty data and that leads to a return value of type numeric which should be POSIXct (PR #270, 10696e4cf4ae92371917ed8ccaec2b0183da145c, 646c01a42ad8decfbc9040030e790e51cb65cffd) ## 4.4 From e3617b8c6b21fb4242c1d392124813501069ca84 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maximilian=20L=C3=B6ffler?= Date: Fri, 8 Nov 2024 16:26:23 +0100 Subject: [PATCH 092/130] Replace deprecated igraph functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Maximilian Löffler --- showcase.R | 2 +- tests/test-networks-commit.R | 13 +++--- tests/test-networks.R | 2 +- tests/test-split-misc.R | 10 ++--- tests/test-split-network-activity-based.R | 54 +++++++++++------------ tests/test-split-network-time-based.R | 38 ++++++++-------- util-misc.R | 2 +- util-networks-covariates.R | 2 +- util-networks.R | 8 ++-- util-split.R | 2 +- 10 files changed, 68 insertions(+), 65 deletions(-) diff --git a/showcase.R b/showcase.R index 4cb95d4a..9ab5934a 100644 --- a/showcase.R +++ b/showcase.R @@ -404,7 +404,7 @@ plot.network(g.simplified) ## construct sample network for plotting g = get.sample.network() -g = igraph::as.directed(g, mode = "arbitrary") +g = igraph::as_directed(g, mode = "arbitrary") g = g + igraph::edges("A6", "A5", type = TYPE.EDGES.INTRA, weight = 2, relation = "callgraph", artifact.type = "Feature") g = simplify.network(g) diff --git a/tests/test-networks-commit.R b/tests/test-networks-commit.R index 7de34eed..b992846e 100644 --- a/tests/test-networks-commit.R +++ b/tests/test-networks-commit.R @@ -12,6 +12,7 @@ ## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. ## ## Copyright 2024 by Leo Sendelbach +## Copyright 2024 by Maximilian Löffler ## All Rights Reserved. @@ -81,7 +82,7 @@ patrick::with_parameters_test_that("Network construction with commit-interaction type = c(TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA), relation = c("commit.interaction", "commit.interaction", "commit.interaction", "commit.interaction") ) - network = igraph::graph.data.frame(edges, directed = test.directed, vertices = vertices) + network = igraph::graph_from_data_frame(edges, directed = test.directed, vertices = vertices) expect_true(igraph::identical_graphs(network.built, network)) network.new.attr = add.vertex.attribute.commit.network(network.built, proj.data, "deleted.lines", "NO_DATA") @@ -131,7 +132,7 @@ patrick::with_parameters_test_that("Network construction with cochange as relati if (test.directed) { edges <- edges[, c(2, 1, 3, 4, 5, 6, 7, 8), ] } - network = igraph::graph.data.frame(edges, directed = test.directed, vertices = vertices) + network = igraph::graph_from_data_frame(edges, directed = test.directed, vertices = vertices) expect_true(igraph::identical_graphs(network.built, network)) }, patrick::cases( @@ -186,7 +187,7 @@ patrick::with_parameters_test_that("Network construction with cochange as relati if (test.directed) { edges <- edges[, c(2, 1, 3, 4, 5, 6, 7, 8), ] } - network = igraph::graph.data.frame(edges, directed = test.directed, vertices = vertices) + network = igraph::graph_from_data_frame(edges, directed = test.directed, vertices = vertices) expect_true(igraph::identical_graphs(network.built, network)) }, patrick::cases( @@ -237,7 +238,7 @@ patrick::with_parameters_test_that("Network construction with cochange as relati if (test.directed) { edges <- edges[, c(2, 1, 3, 4, 5, 6, 7, 8), ] } - network = igraph::graph.data.frame(edges, directed = test.directed, vertices = vertices) + network = igraph::graph_from_data_frame(edges, directed = test.directed, vertices = vertices) expect_true(igraph::identical_graphs(network.built, network)) }, patrick::cases( @@ -291,7 +292,7 @@ test_that("Adding vertex attributes to a commit network", { relation = c("cochange", "cochange", "cochange", "cochange") ) - network = igraph::graph.data.frame(edges, directed = FALSE, vertices = vertices) + network = igraph::graph_from_data_frame(edges, directed = FALSE, vertices = vertices) expect_true(igraph::identical_graphs(network.new.attr, network)) @@ -332,7 +333,7 @@ test_that("Adding vertex attributes to a commit network", { relation = c("cochange", "cochange", "cochange", "cochange") ) - network.two = igraph::graph.data.frame(edges, directed = FALSE, vertices = vertices) + network.two = igraph::graph_from_data_frame(edges, directed = FALSE, vertices = vertices) expect_true(igraph::identical_graphs(network.new.attr, network.two)) }) \ No newline at end of file diff --git a/tests/test-networks.R b/tests/test-networks.R index feee53e9..96069a3c 100644 --- a/tests/test-networks.R +++ b/tests/test-networks.R @@ -632,7 +632,7 @@ test_that("Construction of networks from empty edge list (with vertices)", { ## net.constructed = construct.network.from.edge.list(vertices, edge.list, net.conf) - net.expected = igraph::graph.empty(n = 0, directed = directed) + + net.expected = igraph::make_empty_graph(n = 0, directed = directed) + igraph::vertices(vertices.as.sequence) + igraph::edges(edge.list.as.sequence, weight = 1) diff --git a/tests/test-split-misc.R b/tests/test-split-misc.R index 78ceb438..da63b069 100644 --- a/tests/test-split-misc.R +++ b/tests/test-split-misc.R @@ -109,11 +109,11 @@ test_that("Split network and data on low level (split.dataframe.by.bins, split.n ## results expected = list( - igraph::subgraph.edges(net, c(1, 5, 7)), - igraph::subgraph.edges(net, c(9, 12)), - igraph::subgraph.edges(net, c(2, 6, 8, 14)), - igraph::subgraph.edges(net, c(4, 11, 13)), - igraph::subgraph.edges(net, c(3, 10, 15)) + igraph::subgraph_from_edges(net, c(1, 5, 7)), + igraph::subgraph_from_edges(net, c(9, 12)), + igraph::subgraph_from_edges(net, c(2, 6, 8, 14)), + igraph::subgraph_from_edges(net, c(4, 11, 13)), + igraph::subgraph_from_edges(net, c(3, 10, 15)) ) results = split.network.by.bins(net, bins, bins.vector) diff --git a/tests/test-split-network-activity-based.R b/tests/test-split-network-activity-based.R index 5c903641..c044cb86 100644 --- a/tests/test-split-network-activity-based.R +++ b/tests/test-split-network-activity-based.R @@ -62,10 +62,10 @@ patrick::with_parameters_test_that("Split a network activity-based (number.edges ## results expected = list( - "2016-07-12 15:58:59-2016-07-12 16:05:41" = igraph::subgraph.edges(author.net, c(1, 2)), - "2016-07-12 16:05:41-2016-07-12 16:06:10" = igraph::subgraph.edges(author.net, c(3, 5)), - "2016-07-12 16:06:10-2016-07-12 16:06:32" = igraph::subgraph.edges(author.net, c(4, 7)), - "2016-07-12 16:06:32-2016-07-12 16:06:33" = igraph::subgraph.edges(author.net, c(6, 8)) + "2016-07-12 15:58:59-2016-07-12 16:05:41" = igraph::subgraph_from_edges(author.net, c(1, 2)), + "2016-07-12 16:05:41-2016-07-12 16:06:10" = igraph::subgraph_from_edges(author.net, c(3, 5)), + "2016-07-12 16:06:10-2016-07-12 16:06:32" = igraph::subgraph_from_edges(author.net, c(4, 7)), + "2016-07-12 16:06:32-2016-07-12 16:06:33" = igraph::subgraph_from_edges(author.net, c(6, 8)) ) results = split.network.activity.based(author.net, number.edges = 2) @@ -89,7 +89,7 @@ patrick::with_parameters_test_that("Split a network activity-based (number.edges ## results expected = list( - "2016-07-12 15:58:59-2016-07-12 16:06:33" = igraph::subgraph.edges(author.net, c(1:igraph::ecount(author.net))) + "2016-07-12 15:58:59-2016-07-12 16:06:33" = igraph::subgraph_from_edges(author.net, c(1:igraph::ecount(author.net))) ) results = split.network.activity.based(author.net, number.edges = igraph::ecount(author.net) + 10) @@ -112,9 +112,9 @@ patrick::with_parameters_test_that("Split a network activity-based (number.edges ## results expected = list( - "2016-07-12 15:58:59-2016-07-12 16:05:41" = igraph::subgraph.edges(author.net, c(1, 2, 3)), - "2016-07-12 16:05:41-2016-07-12 16:06:32" = igraph::subgraph.edges(author.net, c(4, 5, 7)), - "2016-07-12 16:06:32-2016-07-12 16:06:33" = igraph::subgraph.edges(author.net, c(6, 8)) + "2016-07-12 15:58:59-2016-07-12 16:05:41" = igraph::subgraph_from_edges(author.net, c(1, 2, 3)), + "2016-07-12 16:05:41-2016-07-12 16:06:32" = igraph::subgraph_from_edges(author.net, c(4, 5, 7)), + "2016-07-12 16:06:32-2016-07-12 16:06:33" = igraph::subgraph_from_edges(author.net, c(6, 8)) ) results = split.network.activity.based(author.net, number.windows = 3) @@ -172,13 +172,13 @@ patrick::with_parameters_test_that("Split a network activity-based (number.edges ## results expected = list( - "2016-07-12 15:58:59-2016-07-12 16:05:41" = igraph::subgraph.edges(author.net, c(1, 2)), - "2016-07-12 16:00:45-2016-07-12 16:05:41" = igraph::subgraph.edges(author.net, c(2, 3)), - "2016-07-12 16:05:41-2016-07-12 16:06:10" = igraph::subgraph.edges(author.net, c(3, 5)), - "2016-07-12 16:05:41-2016-07-12 16:06:10" = igraph::subgraph.edges(author.net, c(5, 4)), - "2016-07-12 16:06:10-2016-07-12 16:06:32" = igraph::subgraph.edges(author.net, c(4, 7)), - "2016-07-12 16:06:10-2016-07-12 16:06:32" = igraph::subgraph.edges(author.net, c(7, 6)), - "2016-07-12 16:06:32-2016-07-12 16:06:33" = igraph::subgraph.edges(author.net, c(6, 8)) + "2016-07-12 15:58:59-2016-07-12 16:05:41" = igraph::subgraph_from_edges(author.net, c(1, 2)), + "2016-07-12 16:00:45-2016-07-12 16:05:41" = igraph::subgraph_from_edges(author.net, c(2, 3)), + "2016-07-12 16:05:41-2016-07-12 16:06:10" = igraph::subgraph_from_edges(author.net, c(3, 5)), + "2016-07-12 16:05:41-2016-07-12 16:06:10" = igraph::subgraph_from_edges(author.net, c(5, 4)), + "2016-07-12 16:06:10-2016-07-12 16:06:32" = igraph::subgraph_from_edges(author.net, c(4, 7)), + "2016-07-12 16:06:10-2016-07-12 16:06:32" = igraph::subgraph_from_edges(author.net, c(7, 6)), + "2016-07-12 16:06:32-2016-07-12 16:06:33" = igraph::subgraph_from_edges(author.net, c(6, 8)) ) results = split.network.activity.based(author.net, number.edges = 2, sliding.window = TRUE) @@ -203,7 +203,7 @@ patrick::with_parameters_test_that("Split a network activity-based (number.edges ## results expected = list( - "2016-07-12 15:58:59-2016-07-12 16:06:33" = igraph::subgraph.edges(author.net, c(1:igraph::ecount(author.net))) + "2016-07-12 15:58:59-2016-07-12 16:06:33" = igraph::subgraph_from_edges(author.net, c(1:igraph::ecount(author.net))) ) results = split.network.activity.based(author.net, number.edges = igraph::ecount(author.net) + 10, sliding.window = TRUE) @@ -227,9 +227,9 @@ patrick::with_parameters_test_that("Split a network activity-based (number.edges ## results expected = list( - "2016-07-12 15:58:59-2016-07-12 16:05:41" = igraph::subgraph.edges(author.net, c(1, 2, 3)), - "2016-07-12 16:05:41-2016-07-12 16:06:32" = igraph::subgraph.edges(author.net, c(4, 5, 7)), - "2016-07-12 16:06:32-2016-07-12 16:06:33" = igraph::subgraph.edges(author.net, c(6, 8)) + "2016-07-12 15:58:59-2016-07-12 16:05:41" = igraph::subgraph_from_edges(author.net, c(1, 2, 3)), + "2016-07-12 16:05:41-2016-07-12 16:06:32" = igraph::subgraph_from_edges(author.net, c(4, 5, 7)), + "2016-07-12 16:06:32-2016-07-12 16:06:33" = igraph::subgraph_from_edges(author.net, c(6, 8)) ) results = split.network.activity.based(author.net, number.windows = 3, sliding.window = TRUE) @@ -284,14 +284,14 @@ patrick::with_parameters_test_that("Split a network activity-based (number.edges ## results expected = list( - "2016-07-12 15:58:59-2016-07-12 16:05:41" = igraph::subgraph.edges(author.net, c(1, 2)), - "2016-07-12 16:00:45-2016-07-12 16:05:41" = igraph::subgraph.edges(author.net, c(2, 3)), - "2016-07-12 16:05:41-2016-07-12 16:06:10" = igraph::subgraph.edges(author.net, c(3, 5)), - "2016-07-12 16:05:41-2016-07-12 16:06:10" = igraph::subgraph.edges(author.net, c(5, 4)), - "2016-07-12 16:06:10-2016-07-12 16:06:32" = igraph::subgraph.edges(author.net, c(4, 7)), - "2016-07-12 16:06:10-2016-07-12 16:06:32" = igraph::subgraph.edges(author.net, c(7, 6)), - "2016-07-12 16:06:32-2020-02-20 20:20:20" = igraph::subgraph.edges(author.net, c(6, 8)), - "2016-07-12 16:06:32-2020-02-20 20:20:21" = igraph::subgraph.edges(author.net, c(8, 9)) + "2016-07-12 15:58:59-2016-07-12 16:05:41" = igraph::subgraph_from_edges(author.net, c(1, 2)), + "2016-07-12 16:00:45-2016-07-12 16:05:41" = igraph::subgraph_from_edges(author.net, c(2, 3)), + "2016-07-12 16:05:41-2016-07-12 16:06:10" = igraph::subgraph_from_edges(author.net, c(3, 5)), + "2016-07-12 16:05:41-2016-07-12 16:06:10" = igraph::subgraph_from_edges(author.net, c(5, 4)), + "2016-07-12 16:06:10-2016-07-12 16:06:32" = igraph::subgraph_from_edges(author.net, c(4, 7)), + "2016-07-12 16:06:10-2016-07-12 16:06:32" = igraph::subgraph_from_edges(author.net, c(7, 6)), + "2016-07-12 16:06:32-2020-02-20 20:20:20" = igraph::subgraph_from_edges(author.net, c(6, 8)), + "2016-07-12 16:06:32-2020-02-20 20:20:21" = igraph::subgraph_from_edges(author.net, c(8, 9)) ) results = split.network.activity.based(author.net, number.edges = 2, sliding.window = TRUE) diff --git a/tests/test-split-network-time-based.R b/tests/test-split-network-time-based.R index c878d4cb..b8b10279 100644 --- a/tests/test-split-network-time-based.R +++ b/tests/test-split-network-time-based.R @@ -67,10 +67,10 @@ patrick::with_parameters_test_that("Split a network time-based (time.period = .. author.net = net.builder$get.author.network() expected = list( - "2016-07-12 15:58:59-2016-07-12 16:00:59" = igraph::subgraph.edges(author.net, c(1:2)), - "2016-07-12 16:00:59-2016-07-12 16:02:59" = igraph::subgraph.edges(author.net, c()), - "2016-07-12 16:02:59-2016-07-12 16:04:59" = igraph::subgraph.edges(author.net, c()), - "2016-07-12 16:04:59-2016-07-12 16:06:33" = igraph::subgraph.edges(author.net, c(3:8)) + "2016-07-12 15:58:59-2016-07-12 16:00:59" = igraph::subgraph_from_edges(author.net, c(1:2)), + "2016-07-12 16:00:59-2016-07-12 16:02:59" = igraph::subgraph_from_edges(author.net, c()), + "2016-07-12 16:02:59-2016-07-12 16:04:59" = igraph::subgraph_from_edges(author.net, c()), + "2016-07-12 16:04:59-2016-07-12 16:06:33" = igraph::subgraph_from_edges(author.net, c(3:8)) ) results = split.network.time.based(author.net, time.period = "2 mins") @@ -189,13 +189,13 @@ patrick::with_parameters_test_that("Split a network time-based (time.period = .. author.net = net.builder$get.author.network() expected = list( - "2016-07-12 15:58:59-2016-07-12 16:00:59" = igraph::subgraph.edges(author.net, c(1:2)), - "2016-07-12 15:59:59-2016-07-12 16:01:59" = igraph::subgraph.edges(author.net, c(2)), - "2016-07-12 16:00:59-2016-07-12 16:02:59" = igraph::subgraph.edges(author.net, c()), - "2016-07-12 16:01:59-2016-07-12 16:03:59" = igraph::subgraph.edges(author.net, c()), - "2016-07-12 16:02:59-2016-07-12 16:04:59" = igraph::subgraph.edges(author.net, c()), - "2016-07-12 16:03:59-2016-07-12 16:05:59" = igraph::subgraph.edges(author.net, c(3,5)), - "2016-07-12 16:04:59-2016-07-12 16:06:33" = igraph::subgraph.edges(author.net, c(3:8)) + "2016-07-12 15:58:59-2016-07-12 16:00:59" = igraph::subgraph_from_edges(author.net, c(1:2)), + "2016-07-12 15:59:59-2016-07-12 16:01:59" = igraph::subgraph_from_edges(author.net, c(2)), + "2016-07-12 16:00:59-2016-07-12 16:02:59" = igraph::subgraph_from_edges(author.net, c()), + "2016-07-12 16:01:59-2016-07-12 16:03:59" = igraph::subgraph_from_edges(author.net, c()), + "2016-07-12 16:02:59-2016-07-12 16:04:59" = igraph::subgraph_from_edges(author.net, c()), + "2016-07-12 16:03:59-2016-07-12 16:05:59" = igraph::subgraph_from_edges(author.net, c(3,5)), + "2016-07-12 16:04:59-2016-07-12 16:06:33" = igraph::subgraph_from_edges(author.net, c(3:8)) ) results = split.network.time.based(author.net, time.period = "2 mins", sliding.window = TRUE) @@ -264,10 +264,10 @@ patrick::with_parameters_test_that("Split a network time-based (bins = ...), ", ## results expected = list( - "2016-07-12 15:58:00-2016-07-12 16:00:59" = igraph::subgraph.edges(author.net, c(1:2)), - "2016-07-12 16:00:59-2016-07-12 16:02:59" = igraph::subgraph.edges(author.net, c()), - "2016-07-12 16:02:59-2016-07-12 16:04:59" = igraph::subgraph.edges(author.net, c()), - "2016-07-12 16:04:59-2016-07-12 17:21:43" = igraph::subgraph.edges(author.net, c(3:8)) + "2016-07-12 15:58:00-2016-07-12 16:00:59" = igraph::subgraph_from_edges(author.net, c(1:2)), + "2016-07-12 16:00:59-2016-07-12 16:02:59" = igraph::subgraph_from_edges(author.net, c()), + "2016-07-12 16:02:59-2016-07-12 16:04:59" = igraph::subgraph_from_edges(author.net, c()), + "2016-07-12 16:04:59-2016-07-12 17:21:43" = igraph::subgraph_from_edges(author.net, c(3:8)) ) results = split.network.time.based(author.net, bins = bins, sliding.window = test.sliding.window) @@ -377,10 +377,10 @@ patrick::with_parameters_test_that("Split a network time-based with equal-sized author.net = net.builder$get.author.network() expected = list( - "2016-07-12 15:58:59-2016-07-12 16:00:53" = igraph::subgraph.edges(author.net, c(1:2)), - "2016-07-12 16:00:53-2016-07-12 16:02:47" = igraph::subgraph.edges(author.net, c()), - "2016-07-12 16:02:47-2016-07-12 16:04:41" = igraph::subgraph.edges(author.net, c()), - "2016-07-12 16:04:41-2016-07-12 16:06:33" = igraph::subgraph.edges(author.net, c(3:8)) + "2016-07-12 15:58:59-2016-07-12 16:00:53" = igraph::subgraph_from_edges(author.net, c(1:2)), + "2016-07-12 16:00:53-2016-07-12 16:02:47" = igraph::subgraph_from_edges(author.net, c()), + "2016-07-12 16:02:47-2016-07-12 16:04:41" = igraph::subgraph_from_edges(author.net, c()), + "2016-07-12 16:04:41-2016-07-12 16:06:33" = igraph::subgraph_from_edges(author.net, c(3:8)) ) results = split.network.time.based(author.net, number.windows = 4) diff --git a/util-misc.R b/util-misc.R index 65e64f94..97900539 100644 --- a/util-misc.R +++ b/util-misc.R @@ -47,7 +47,7 @@ requireNamespace("lubridate") # for date conversion #' @return the new edgelist get.edgelist.with.timestamps = function(net) { ## get edge list as data.frame - edges = as.data.frame(igraph::get.edgelist(net)) + edges = as.data.frame(igraph::as_edgelist(net)) colnames(edges) = c("from", "to") ## get timestamps dates = igraph::edge_attr(net, "date") diff --git a/util-networks-covariates.R b/util-networks-covariates.R index 700b5e9f..b1be9f28 100644 --- a/util-networks-covariates.R +++ b/util-networks-covariates.R @@ -173,7 +173,7 @@ add.vertex.attribute.commit.network = function(network, project.data, } attribute.values = c(attribute.values, value) } - net.with.attr = igraph::set.vertex.attribute(network, attr.name, value = attribute.values) + net.with.attr = igraph::set_vertex_attr(network, attr.name, value = attribute.values) } diff --git a/util-networks.R b/util-networks.R index 1afad546..648fbac9 100644 --- a/util-networks.R +++ b/util-networks.R @@ -1263,7 +1263,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", if (igraph::is_directed(authors.net) && !igraph::is_directed(artifacts.net)) { logging::logwarn(paste0("Author network is directed, but artifact network is not.", "Converting artifact network...")) - artifacts.net = igraph::as.directed(artifacts.net, mode = "mutual") + artifacts.net = igraph::as_directed(artifacts.net, mode = "mutual") } else if (!igraph::is_directed(authors.net) && igraph::is_directed(artifacts.net)) { logging::logwarn(paste0("Author network is undirected, but artifact network is not.", "Converting artifact network...")) @@ -1807,7 +1807,7 @@ add.edges.for.bipartite.relation = function(net, bipartite.relations, network.co #' @return the new empty network create.empty.network = function(directed = TRUE, add.attributes = FALSE) { ## create empty network - net = igraph::graph.empty(0, directed = directed) + net = igraph::make_empty_graph(0, directed = directed) # set proper attributes if wanted if (add.attributes) { @@ -2074,7 +2074,9 @@ extract.bipartite.network.from.network = function(network, remove.isolates = FAL } ## only retain all bipartite edges and induced vertices - bip.network = igraph::subgraph.edges(network, igraph::E(network)[type == TYPE.EDGES.INTER], delete.vertices = remove.isolates) + bip.network = igraph::subgraph_from_edges(network, + igraph::E(network)[type == TYPE.EDGES.INTER], + delete.vertices = remove.isolates) return(bip.network) } diff --git a/util-split.R b/util-split.R index 0f15ad13..01eda629 100644 --- a/util-split.R +++ b/util-split.R @@ -876,7 +876,7 @@ split.network.by.bins = function(network, bins, bins.vector, bins.date = NULL, r ## identify edges in the current bin edges = igraph::E(network)[ bins.vector == bin ] ## create network based on the current set of edges - g = igraph::subgraph.edges(network, edges, delete.vertices = remove.isolates) + g = igraph::subgraph_from_edges(network, edges, delete.vertices = remove.isolates) return(g) }) ## set 'bins' attribute, if specified From f591528a0f1f11b1a4390949ab770f3f74a766f9 Mon Sep 17 00:00:00 2001 From: Christian Hechtl Date: Thu, 24 Oct 2024 16:16:22 +0200 Subject: [PATCH 093/130] Fix read.commit.interactions() The interaction reading breaks if an interaction is empty in the data. This commit adds a check for empty interactions. Signed-off-by: Christian Hechtl --- util-read.R | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/util-read.R b/util-read.R index 06c082e5..a984ca18 100644 --- a/util-read.R +++ b/util-read.R @@ -949,6 +949,10 @@ read.commit.interactions = function(data.path = NULL) { interacting.hashes.df[["base.file"]] = base.file.name return(interacting.hashes.df) }))) + ## Return an empty data frame if the current interaction is empty (i.e., if an interaction is not existent). + if (nrow(interactions) == 0) { + return(data.frame()) + } ## Initialize author data as 'NA', since it is not available from the commit-interaction data. ## Author data will be merged from commit data in \code{update.commit.interactions}. interactions["base.author"] = NA_character_ From 4b0d5221dd56bb3c9ddf196f67719d4f503d9b61 Mon Sep 17 00:00:00 2001 From: Thomas Bock Date: Wed, 30 Oct 2024 14:57:31 +0100 Subject: [PATCH 094/130] Adjust `metrics.scale.freeness` to igraph changes Since version 2.0.0 of package `igraph`, the function `igraph::power_law_fit` (and its deprecated alias `igraph::fit.power.law`) does not automatically compute the p-value any more, which is needed by `metrics.scale.freeness`. In version 2.1.1 of package `igraph`, they have added a new parameter `p.value` to provide the possibility to enable p-value computation again. In addition, they have added a parameter `p.precision` which defaults to a precision of 0.01. To make `metrics.scale.freeness` work again with the most recent version of package `igraph`, the deprecated function calls to `igraph::fit.power.law` are replaced by `igraph::power_law_fit` and are adjusted to use the new parameter `p.value` to automatically compute the p-value again. However, we do not make use of `p.precision` and rely on the default precision instead. Signed-off-by: Thomas Bock --- util-networks-metrics.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/util-networks-metrics.R b/util-networks-metrics.R index dcdbbcf1..bf391bcf 100644 --- a/util-networks-metrics.R +++ b/util-networks-metrics.R @@ -234,7 +234,7 @@ metrics.scale.freeness = function(network, minimum.number.vertices = 30) { ## Power-law fiting ## (by Mitchell Joblin , Siemens AG, 2012, 2013) - p.fit = igraph::power.law.fit(v.degree, implementation = "plfit") + p.fit = igraph::fit_power_law(v.degree, implementation = "plfit", p.value = TRUE) param.names = c("alpha", "xmin", "KS.p") res = list() res[param.names] = p.fit[param.names] @@ -250,7 +250,7 @@ metrics.scale.freeness = function(network, minimum.number.vertices = 30) { & non.zero.degree.v.count >= minimum.number.vertices) { ## vertex degree is sorted above x.min = v.degree[[minimum.number.vertices]] - p.fit = power.law.fit(v.degree, implementation = "plfit", xmin = x.min) + p.fit = igraph::fit_power_law(v.degree, implementation = "plfit", xmin = x.min, p.value = TRUE) res[param.names] = p.fit[param.names] ## Check percent of vertices under power-law From 87911ade231c44b93be194a1d6734f7de043a4af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maximilian=20L=C3=B6ffler?= Date: Tue, 12 Nov 2024 14:32:20 +0100 Subject: [PATCH 095/130] Add range information to splits in 'split.network.time.based.by.ranges' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This change makes splitting by ranges more consistent with time-based splitting. This works towards #273. Signed-off-by: Maximilian Löffler --- util-split.R | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/util-split.R b/util-split.R index 01eda629..04626ac2 100644 --- a/util-split.R +++ b/util-split.R @@ -834,6 +834,11 @@ split.network.time.based.by.ranges = function(network, ranges, remove.isolates = } ) + # add range information + if (is.null(names(nets.split))) { + names(nets.split) = ranges + } + ## convert ranges to bins bins = get.bin.dates.from.ranges(ranges.bounds) attr(nets.split, "bins") = bins From 1c35d1fa2548deb297dbfa5e2b07fce31962c5b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maximilian=20L=C3=B6ffler?= Date: Tue, 3 Dec 2024 15:20:38 +0100 Subject: [PATCH 096/130] Convert edge attributes to list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since igraph version 2.1, when joining networks using 'igraph::disjoint_union', edge attributes of the joining networks require identical types. As simplifiying networks necessarily converts types of edge attributes to list when merging edges, attributes now have to be of type list by default. Edge attributes that are explicitly considered during simplification and, therefore, are not converted to lists are excluded from this rule. This works towards fixing #271. Signed-off-by: Maximilian Löffler --- util-networks.R | 60 +++++++++++++++++++++++++++++++++++-------------- util-split.R | 8 ++++--- 2 files changed, 48 insertions(+), 20 deletions(-) diff --git a/util-networks.R b/util-networks.R index 648fbac9..06e65155 100644 --- a/util-networks.R +++ b/util-networks.R @@ -978,6 +978,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", attr(net, "range") = private$proj.data$get.range() } + net = convert.edge.attributes.to.list(net) return(net) }, @@ -1026,6 +1027,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", attr(net, "range") = private$proj.data$get.range() } + net = convert.edge.attributes.to.list(net) return(net) }, @@ -1068,6 +1070,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", attr(net, "range") = private$proj.data$get.range() } + net = convert.edge.attributes.to.list(net) return(net) }, @@ -1174,6 +1177,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", attr(network, "range") = private$proj.data$get.range() } + network = convert.edge.attributes.to.list(network) return(network) }, @@ -1279,22 +1283,6 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", ## 1) merge the existing networks u = igraph::disjoint_union(authors.net, artifacts.net) - ## As there is a bug in 'igraph::disjoint_union' in igraph from its version 1.4.0 on, which is still - ## present, at least, until its version 2.0.3 (see https://github.com/igraph/rigraph/issues/761), we need - ## to adjust the type of the date attribute of the outcome of 'igraph::disjoint_union'. - ## Note: The following temporary fix only considers the 'date' attribute. However, this problem could also - ## affect several other attributes, whose classes are not adjusted in our temporary fix. - ## The following code block should be redundant as soon as igraph has fixed their bug. - u.actual.edge.attribute.date = igraph::edge_attr(u, "date") - if (!is.null(u.actual.edge.attribute.date)) { - if (is.list(u.actual.edge.attribute.date)) { - u.expected.edge.attribute.date = lapply(u.actual.edge.attribute.date, get.date.from.unix.timestamp) - } else { - u.expected.edge.attribute.date = get.date.from.unix.timestamp(u.actual.edge.attribute.date) - } - u = igraph::set_edge_attr(u, "date", value = u.expected.edge.attribute.date) - } - ## 2) add the bipartite edges u = add.edges.for.bipartite.relation(u, authors.to.artifacts, private$network.conf) @@ -1632,6 +1620,7 @@ construct.network.from.edge.list = function(vertices, edge.list, network.conf, d ## initialize edge weights net = igraph::set_edge_attr(net, "weight", value = 1) + net = convert.edge.attributes.to.list(net) logging::logdebug("construct.network.from.edge.list: finished.") @@ -1792,6 +1781,15 @@ add.edges.for.bipartite.relation = function(net, bipartite.relations, network.co extra.edge.attributes["type"] = TYPE.EDGES.INTER # add egde type extra.edge.attributes["relation"] = relation # add relation type + ## Convert edge attributes to list similarly to 'convert.edge.attributes.to.list'. + ## We cannot use 'convert.edge.attributes.to.list', as we operate on edge + ## data directly, instead of a network. + edge.attrs = names(extra.edge.attributes) + which.attrs = !(edge.attrs %in% names(EDGE.ATTR.HANDLING)) + for (attr in edge.attrs[which.attrs]) { + extra.edge.attributes[[attr]] = as.list(extra.edge.attributes[[attr]]) + } + ## add the vertex sequences as edges to the network net = igraph::add_edges(net, unlist(vertex.sequence.for.edges), attr = extra.edge.attributes) } @@ -1812,7 +1810,7 @@ create.empty.network = function(directed = TRUE, add.attributes = FALSE) { # set proper attributes if wanted if (add.attributes) { mandatory.edge.attributes.classes = list( - date = c("POSIXct", "POSIXt"), artifact.type = "character", weight = "numeric", + date = "list", artifact.type = "list", weight = "numeric", type = "character", relation = "character" ) mandatory.vertex.attributes.classes = list(name = "character", kind = "character", type = "character") @@ -2146,6 +2144,34 @@ get.data.sources.from.relations = function(network) { return(data.sources) } +#' Convert edge attributes to list type. +#' +#' This conversion is necessary to ensure merging networks works in all cases, +#' especially when merging simplified networks with unsimplified networks as +#' simplification may convert edge attributes to list type. Attributes that are +#' explicitly considered during simplification (through EDGE.ATTR.HANDLING) +#' generally do not need to be converted. +#' +#' @param network the network of which the edge attributes are to be converted +#' @param remain.as.is the edge attributes to remain as they are +#' [default: names(EDGE.ATTR.HANDLING)] +#' +#' @return the network with converted edge attributes +convert.edge.attributes.to.list = function(network, remain.as.is = names(EDGE.ATTR.HANDLING)) { + + ## get edge attributes + edge.attrs = igraph::edge_attr_names(network) + which.attrs = !(edge.attrs %in% remain.as.is) + + ## convert edge attributes to list type + for (attr in edge.attrs[which.attrs]) { + list.attr = as.list(igraph::edge_attr(network, attr)) + network = igraph::set_edge_attr(network, attr, value = list.attr) + } + + return(network) +} + ## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / ## Sample network ---------------------------------------------------------- diff --git a/util-split.R b/util-split.R index 04626ac2..464ce2d1 100644 --- a/util-split.R +++ b/util-split.R @@ -524,7 +524,8 @@ split.network.time.based = function(network, time.period = "3 months", bins = NU number.windows = NULL, sliding.window = FALSE, remove.isolates = TRUE) { ## extract date attributes from edges - dates = get.date.from.unix.timestamp(igraph::edge_attr(network, "date")) + dates = do.call(base::c, igraph::edge_attr(network, "date")) + dates = get.date.from.unix.timestamp(dates) ## number of windows given (ignoring time period and bins) if (!is.null(number.windows)) { @@ -619,7 +620,7 @@ split.networks.time.based = function(networks, time.period = "3 months", bins = dates = igraph::E(net)$date return(dates) }) - dates = unlist(networks.dates, recursive = FALSE) + dates = unlist(networks.dates) dates = get.date.from.unix.timestamp(dates) ## 2) get bin information @@ -708,8 +709,9 @@ split.network.activity.based = function(network, number.edges = 5000, number.win number.edges, number.windows) ## get dates in a data.frame for splitting purposes + dates = do.call(base::c, igraph::edge_attr(network, "date")) df = data.frame( - date = get.date.from.unix.timestamp(igraph::edge_attr(network, "date")), + date = get.date.from.unix.timestamp(dates), my.unique.id = seq_len(edge.count) # as a unique identifier only ) ## sort by date From eda30b838369ec46376812298a3ea8159eec5789 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maximilian=20L=C3=B6ffler?= Date: Tue, 3 Dec 2024 15:24:01 +0100 Subject: [PATCH 097/130] Adjust tests to expect edge attributes as lists MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adjust the tests in accordance to converting edge attributes to list type in the implementation. This works towards fixing #271. Signed-off-by: Maximilian Löffler --- tests/test-networks-artifact.R | 5 + tests/test-networks-author.R | 35 +- tests/test-networks-bipartite.R | 10 + tests/test-networks-commit.R | 6 + tests/test-networks-equal-constructions.R | 4 +- tests/test-networks-multi-relation.R | 568 ++++++++++++---------- tests/test-networks-multi.R | 13 +- tests/test-networks.R | 108 ++-- tests/test-split-misc.R | 1 + 9 files changed, 427 insertions(+), 323 deletions(-) diff --git a/tests/test-networks-artifact.R b/tests/test-networks-artifact.R index 1d847b54..56300d5a 100644 --- a/tests/test-networks-artifact.R +++ b/tests/test-networks-artifact.R @@ -57,6 +57,7 @@ test_that("Network construction of the undirected artifact-cochange network", { ) ## 3) build expected network network.expected = igraph::graph_from_data_frame(edges, directed = FALSE, vertices = vertices) + network.expected = convert.edge.attributes.to.list(network.expected) ## @@ -158,6 +159,7 @@ patrick::with_parameters_test_that("Network construction of an issue-based artif ## build expected network network.expected = igraph::graph_from_data_frame(edges, directed = test.directed, vertices = vertices) + network.expected = convert.edge.attributes.to.list(network.expected) ## build network network.built = network.builder$get.artifact.network() @@ -207,6 +209,7 @@ patrick::with_parameters_test_that("Network construction of an empty 'comments-o ## build expected network network.expected = igraph::graph_from_data_frame(edges, directed = test.directed, vertices = vertices) + network.expected = convert.edge.attributes.to.list(network.expected) ## test assert.networks.equal(network.built, network.expected) @@ -258,6 +261,7 @@ patrick::with_parameters_test_that("Network construction with commit-interaction relation = c("commit.interaction", "commit.interaction", "commit.interaction", "commit.interaction") ) network = igraph::graph_from_data_frame(edges, directed = test.directed, vertices = vertices) + network = convert.edge.attributes.to.list(network) expect_true(igraph::identical_graphs(network.built, network)) }, patrick::cases( @@ -308,6 +312,7 @@ patrick::with_parameters_test_that("Network construction with commit-interaction relation = c("commit.interaction", "commit.interaction", "commit.interaction", "commit.interaction") ) network = igraph::graph_from_data_frame(edges, directed = test.directed, vertices = vertices) + network = convert.edge.attributes.to.list(network) expect_true(igraph::identical_graphs(network.built, network)) }, patrick::cases( diff --git a/tests/test-networks-author.R b/tests/test-networks-author.R index d343a0c5..43fb347f 100644 --- a/tests/test-networks-author.R +++ b/tests/test-networks-author.R @@ -232,6 +232,7 @@ test_that("Network construction of the undirected author-cochange network", { ) ## 3) build expected network network.expected = igraph::graph_from_data_frame(data, directed = FALSE, vertices = authors) + network.expected = convert.edge.attributes.to.list(network.expected) ## @@ -316,6 +317,7 @@ test_that("Network construction of the undirected but temorally ordered author-c ## build expected network network.expected = igraph::graph_from_data_frame(data, directed = FALSE, vertices = authors) + network.expected = convert.edge.attributes.to.list(network.expected) expect_true(igraph::identical_graphs(network.built, network.expected)) }) @@ -357,6 +359,7 @@ test_that("Network construction of the directed author-cochange network", { ## build expected network network.expected = igraph::graph_from_data_frame(data, directed = TRUE, vertices = authors) + network.expected = convert.edge.attributes.to.list(network.expected) expect_true(igraph::identical_graphs(network.built, network.expected)) }) @@ -403,6 +406,7 @@ test_that("Network construction of the directed author-cochange network without ## build expected network network.expected = igraph::graph_from_data_frame(data, directed = TRUE, vertices = authors) + network.expected = convert.edge.attributes.to.list(network.expected) expect_true(igraph::identical_graphs(network.built, network.expected)) }) @@ -431,25 +435,26 @@ test_that("Network construction of the undirected simplified author-cochange net date.attr = igraph::edge_attr(network.built, "date") date.conversion.function = ifelse(all(sapply(date.attr, lubridate::is.POSIXct)), get.date.from.unix.timestamp, identity) + date.conversion.function = get.date.from.unix.timestamp ## edge attributes data = data.frame( from = c("Björn", "Olaf", "Olaf", "Karl"), to = c("Olaf", "Karl", "Thomas", "Thomas"), - date = I(list(date.conversion.function(c(1468339139, 1468339245)), - date.conversion.function(c(1468339541, 1468339570)), - date.conversion.function(c(1468339541, 1468339592)), - date.conversion.function(c(1468339570, 1468339592)))), - artifact.type = I(list(c("Feature", "Feature"), c("Feature", "Feature"), c("Feature", "Feature"), - c("Feature", "Feature"))), + date = I(list(as.list(date.conversion.function(c(1468339139, 1468339245))), + as.list(date.conversion.function(c(1468339541, 1468339570))), + as.list(date.conversion.function(c(1468339541, 1468339592))), + as.list(date.conversion.function(c(1468339570, 1468339592))))), + artifact.type = I(list(list("Feature", "Feature"), list("Feature", "Feature"), list("Feature", "Feature"), + list("Feature", "Feature"))), hash = I(list( - c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "5a5ec9675e98187e1e92561e1888aa6f04faa338"), - c("3a0ed78458b3976243db6829f63eba3eead26774", "1143db502761379c2bfcecc2007fc34282e7ee61"), - c("3a0ed78458b3976243db6829f63eba3eead26774", "0a1a5c523d835459c42f33e863623138555e2526"), - c("1143db502761379c2bfcecc2007fc34282e7ee61", "0a1a5c523d835459c42f33e863623138555e2526"))), - file = I(list(c("test.c", "test.c"), c("test2.c", "test3.c"), c("test2.c", "test2.c"), c("test3.c", "test2.c"))), - artifact = I(list(c("A", "A"), c("Base_Feature", "Base_Feature"), c("Base_Feature", "Base_Feature"), - c("Base_Feature", "Base_Feature"))), + list("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "5a5ec9675e98187e1e92561e1888aa6f04faa338"), + list("3a0ed78458b3976243db6829f63eba3eead26774", "1143db502761379c2bfcecc2007fc34282e7ee61"), + list("3a0ed78458b3976243db6829f63eba3eead26774", "0a1a5c523d835459c42f33e863623138555e2526"), + list("1143db502761379c2bfcecc2007fc34282e7ee61", "0a1a5c523d835459c42f33e863623138555e2526"))), + file = I(list(list("test.c", "test.c"), list("test2.c", "test3.c"), list("test2.c", "test2.c"), list("test3.c", "test2.c"))), + artifact = I(list(list("A", "A"), list("Base_Feature", "Base_Feature"), list("Base_Feature", "Base_Feature"), + list("Base_Feature", "Base_Feature"))), weight = 2, type = TYPE.EDGES.INTRA, relation = "cochange" @@ -589,6 +594,7 @@ test_that("Network construction of the undirected author-issue network with all ## build expected network network.expected = igraph::graph_from_data_frame(edges, directed = FALSE, vertices = vertices) + network.expected = convert.edge.attributes.to.list(network.expected) expect_true(igraph::identical_graphs(network.built, network.expected)) }) @@ -649,6 +655,7 @@ test_that("Network construction of the undirected author-issue network with just ## build expected network network.expected = igraph::graph_from_data_frame(edges, directed = FALSE, vertices = vertices) + network.expected = convert.edge.attributes.to.list(network.expected) expect_true(igraph::identical_graphs(network.built, network.expected)) }) @@ -677,6 +684,7 @@ test_that("Network construction with only untracked files (no edges expected)", vertices = list(name = c("Karl", "Thomas"), kind = TYPE.AUTHOR, type = TYPE.AUTHOR) network.expected = create.empty.network(directed = FALSE, add.attributes = TRUE) network.expected = igraph::add_vertices(network.expected, nv = max(lengths(vertices)), attr = vertices) + network.expected = convert.edge.attributes.to.list(network.expected) ## test expect_true(igraph::identical_graphs(network.built, network.expected)) @@ -726,6 +734,7 @@ patrick::with_parameters_test_that("Network construction with commit-interaction relation = c("commit.interaction", "commit.interaction", "commit.interaction", "commit.interaction") ) network = igraph::graph_from_data_frame(edges, directed = test.directed, vertices = vertices) + network = convert.edge.attributes.to.list(network) expect_true(igraph::identical_graphs(network.built, network)) }, patrick::cases( diff --git a/tests/test-networks-bipartite.R b/tests/test-networks-bipartite.R index c6725281..3c6fd3b4 100644 --- a/tests/test-networks-bipartite.R +++ b/tests/test-networks-bipartite.R @@ -84,6 +84,7 @@ test_that("Construction of the bipartite network for the feature artifact with a ## 3) construct expected network network.expected = igraph::graph_from_data_frame(network.expected.data, vertices = vertices, directed = net.conf$get.value("author.directed")) + network.expected = convert.edge.attributes.to.list(network.expected) expect_true(igraph::identical_graphs(network.built, network.expected)) }) @@ -137,6 +138,7 @@ test_that("Construction of the bipartite network for the file artifact with auth ## 3) construct expected network network.expected = igraph::graph_from_data_frame(network.expected.data, vertices = vertices, directed = net.conf$get.value("author.directed")) + network.expected = convert.edge.attributes.to.list(network.expected) expect_true(igraph::identical_graphs(network.built, network.expected)) }) @@ -189,6 +191,7 @@ test_that("Construction of the bipartite network for the function artifact with ) ## 3) construct expected network network.expected = igraph::graph_from_data_frame(network.expected.data, directed = net.conf$get.value("author.directed"), vertices = vertices) + network.expected = convert.edge.attributes.to.list(network.expected) expect_true(igraph::identical_graphs(network.built, network.expected)) }) @@ -240,6 +243,7 @@ test_that("Construction of the bipartite network for the featureexpression artif ## 3) construct expected network network.expected = igraph::graph_from_data_frame(network.expected.data, vertices = vertices, directed = net.conf$get.value("author.directed")) + network.expected = convert.edge.attributes.to.list(network.expected) expect_true(igraph::identical_graphs(network.built, network.expected)) }) @@ -309,6 +313,7 @@ test_that("Construction of the bipartite network for the feature artifact with a ) ## 3) construct expected network network.expected = igraph::graph_from_data_frame(network.expected.data, directed = net.conf$get.value("author.directed"), vertices = vertices) + network.expected = convert.edge.attributes.to.list(network.expected) expect_true(igraph::identical_graphs(network.built, network.expected)) }) @@ -362,6 +367,7 @@ test_that("Construction of the directed bipartite network for the feature artifa ## 3) construct expected network network.expected = igraph::graph_from_data_frame(network.expected.data, vertices = vertices, directed = net.conf$get.value("author.directed")) + network.expected = convert.edge.attributes.to.list(network.expected) expect_true(igraph::identical_graphs(network.built, network.expected)) }) @@ -415,6 +421,7 @@ test_that("Construction of the directed bipartite network for the file artifact ## 3) construct expected network network.expected = igraph::graph_from_data_frame(network.expected.data, vertices = vertices, directed = net.conf$get.value("author.directed")) + network.expected = convert.edge.attributes.to.list(network.expected) expect_true(igraph::identical_graphs(network.built, network.expected)) }) @@ -469,6 +476,7 @@ test_that("Construction of the directed bipartite network for the function artif ## 3) construct expected network network.expected = igraph::graph_from_data_frame(network.expected.data, vertices = vertices, directed = net.conf$get.value("author.directed")) + network.expected = convert.edge.attributes.to.list(network.expected) expect_true(igraph::identical_graphs(network.built, network.expected)) }) @@ -521,6 +529,7 @@ test_that("Construction of the directed bipartite network for the featureexpress ## 3) construct expected network network.expected = igraph::graph_from_data_frame(network.expected.data, vertices = vertices, directed = net.conf$get.value("author.directed")) + network.expected = convert.edge.attributes.to.list(network.expected) expect_true(igraph::identical_graphs(network.built, network.expected)) }) @@ -563,6 +572,7 @@ test_that("Network construction with only untracked files (no edges and artifact directed = net.conf$get.value("author.directed")) ## 4) remove edge again network.expected = igraph::delete_edges(network.expected, 1) + network.expected = convert.edge.attributes.to.list(network.expected) ## test expect_true(igraph::identical_graphs(network.built, network.expected)) diff --git a/tests/test-networks-commit.R b/tests/test-networks-commit.R index b992846e..e5c39672 100644 --- a/tests/test-networks-commit.R +++ b/tests/test-networks-commit.R @@ -83,6 +83,7 @@ patrick::with_parameters_test_that("Network construction with commit-interaction relation = c("commit.interaction", "commit.interaction", "commit.interaction", "commit.interaction") ) network = igraph::graph_from_data_frame(edges, directed = test.directed, vertices = vertices) + network = convert.edge.attributes.to.list(network) expect_true(igraph::identical_graphs(network.built, network)) network.new.attr = add.vertex.attribute.commit.network(network.built, proj.data, "deleted.lines", "NO_DATA") @@ -133,6 +134,7 @@ patrick::with_parameters_test_that("Network construction with cochange as relati edges <- edges[, c(2, 1, 3, 4, 5, 6, 7, 8), ] } network = igraph::graph_from_data_frame(edges, directed = test.directed, vertices = vertices) + network = convert.edge.attributes.to.list(network) expect_true(igraph::identical_graphs(network.built, network)) }, patrick::cases( @@ -188,6 +190,7 @@ patrick::with_parameters_test_that("Network construction with cochange as relati edges <- edges[, c(2, 1, 3, 4, 5, 6, 7, 8), ] } network = igraph::graph_from_data_frame(edges, directed = test.directed, vertices = vertices) + network = convert.edge.attributes.to.list(network) expect_true(igraph::identical_graphs(network.built, network)) }, patrick::cases( @@ -239,6 +242,7 @@ patrick::with_parameters_test_that("Network construction with cochange as relati edges <- edges[, c(2, 1, 3, 4, 5, 6, 7, 8), ] } network = igraph::graph_from_data_frame(edges, directed = test.directed, vertices = vertices) + network = convert.edge.attributes.to.list(network) expect_true(igraph::identical_graphs(network.built, network)) }, patrick::cases( @@ -293,6 +297,7 @@ test_that("Adding vertex attributes to a commit network", { ) network = igraph::graph_from_data_frame(edges, directed = FALSE, vertices = vertices) + network = convert.edge.attributes.to.list(network) expect_true(igraph::identical_graphs(network.new.attr, network)) @@ -334,6 +339,7 @@ test_that("Adding vertex attributes to a commit network", { ) network.two = igraph::graph_from_data_frame(edges, directed = FALSE, vertices = vertices) + network.two = convert.edge.attributes.to.list(network.two) expect_true(igraph::identical_graphs(network.new.attr, network.two)) }) \ No newline at end of file diff --git a/tests/test-networks-equal-constructions.R b/tests/test-networks-equal-constructions.R index eae2bf3b..e64972da 100644 --- a/tests/test-networks-equal-constructions.R +++ b/tests/test-networks-equal-constructions.R @@ -42,12 +42,12 @@ compare.edge.and.vertex.lists = function(split.networks.one, split.networks.two) for (i in seq_along(split.networks.one)) { edges.one = igraph::as_data_frame(split.networks.one[[i]], what = "edges") ordering = order(edges.one[["from"]], edges.one[["to"]], - edges.one[["date"]]) + as.vector(edges.one[["date"]], "numeric")) edges.one = edges.one[ordering, ] rownames(edges.one) = seq_len(nrow(edges.one)) edges.two = igraph::as_data_frame(split.networks.two[[i]], what = "edges") ordering = order(edges.two[["from"]], edges.two[["to"]], - edges.two[["date"]]) + as.vector(edges.two[["date"]], "numeric")) edges.two = edges.two[ordering, ] rownames(edges.two) = seq_len(nrow(edges.two)) vertices.one = igraph::as_data_frame(split.networks.one[[i]], what = "vertices") diff --git a/tests/test-networks-multi-relation.R b/tests/test-networks-multi-relation.R index f215ae4b..d536cade 100644 --- a/tests/test-networks-multi-relation.R +++ b/tests/test-networks-multi-relation.R @@ -63,34 +63,44 @@ test_that("Network construction of the undirected author network with relation = "Björn", "Björn", "Olaf", "Olaf"), # mail comb.2. = c("Olaf", "Olaf", "Karl", "Karl", "Thomas", "Thomas", "Thomas", "Thomas", # cochange "Olaf", "Olaf", "Thomas", "Thomas"), # mail - date = get.date.from.string(c("2016-07-12 15:58:59", "2016-07-12 16:00:45", "2016-07-12 16:05:41", # cochange - "2016-07-12 16:06:10", "2016-07-12 16:05:41", "2016-07-12 16:06:32", - "2016-07-12 16:06:10", "2016-07-12 16:06:32", - "2016-07-12 15:58:40", "2016-07-12 15:58:50", "2016-07-12 16:04:40", # mail - "2016-07-12 16:05:37")), - artifact.type = c(rep("Feature", 8), # cochange - rep("Mail", 4)), # mail - hash = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "5a5ec9675e98187e1e92561e1888aa6f04faa338", - "3a0ed78458b3976243db6829f63eba3eead26774", "1143db502761379c2bfcecc2007fc34282e7ee61", - "3a0ed78458b3976243db6829f63eba3eead26774", "0a1a5c523d835459c42f33e863623138555e2526", - "1143db502761379c2bfcecc2007fc34282e7ee61", "0a1a5c523d835459c42f33e863623138555e2526", - rep(NA, 4)), - file = c("test.c", "test.c", "test2.c", "test3.c", "test2.c", "test2.c", "test3.c", "test2.c", - rep(NA, 4)), - artifact = c("A", "A", "Base_Feature", "Base_Feature", "Base_Feature", "Base_Feature", "Base_Feature", "Base_Feature", - rep(NA, 4)), + date = I(as.list(get.date.from.string(c("2016-07-12 15:58:59", "2016-07-12 16:00:45", "2016-07-12 16:05:41", # cochange + "2016-07-12 16:06:10", "2016-07-12 16:05:41", "2016-07-12 16:06:32", + "2016-07-12 16:06:10", "2016-07-12 16:06:32", + "2016-07-12 15:58:40", "2016-07-12 15:58:50", "2016-07-12 16:04:40", # mail + "2016-07-12 16:05:37")))), + artifact.type = I(c(as.list(rep("Feature", 8)), # cochange + as.list(rep("Mail", 4)))), # mail + hash = I(c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "5a5ec9675e98187e1e92561e1888aa6f04faa338", + "3a0ed78458b3976243db6829f63eba3eead26774", "1143db502761379c2bfcecc2007fc34282e7ee61", + "3a0ed78458b3976243db6829f63eba3eead26774", "0a1a5c523d835459c42f33e863623138555e2526", + "1143db502761379c2bfcecc2007fc34282e7ee61", "0a1a5c523d835459c42f33e863623138555e2526", + as.list(rep(NA, 4)))), + file = I(c("test.c", "test.c", "test2.c", "test3.c", "test2.c", "test2.c", "test3.c", "test2.c", + as.list(rep(NA, 4)))), + artifact = I(c("A", "A", "Base_Feature", "Base_Feature", "Base_Feature", "Base_Feature", "Base_Feature", "Base_Feature", + as.list(rep(NA, 4)))), weight = 1, type = TYPE.EDGES.INTRA, relation = c(rep("cochange", 8), rep("mail", 4)), - message.id = c(NA, NA, NA, NA, NA, NA, NA, NA, - "<4cbaa9ef0802201124v37f1eec8g89a412dfbfc8383a@mail.gmail.com>", - "<6784529b0802032245r5164f984l342f0f0dc94aa420@mail.gmail.com>", - "<65a1sf31sagd684dfv31@mail.gmail.com>", "<9b06e8d20801220234h659c18a3g95c12ac38248c7e0@mail.gmail.com>"), - thread = c(NA, NA, NA, NA, NA, NA, NA, NA, - "", "", "", "") + message.id = I(c(as.list(rep(NA, 8)), + "<4cbaa9ef0802201124v37f1eec8g89a412dfbfc8383a@mail.gmail.com>", + "<6784529b0802032245r5164f984l342f0f0dc94aa420@mail.gmail.com>", + "<65a1sf31sagd684dfv31@mail.gmail.com>", + "<9b06e8d20801220234h659c18a3g95c12ac38248c7e0@mail.gmail.com>")), + thread = I(c(as.list(rep(NA, 8)), + "", "", "", "")) ) + ## remove the 'AsIs' class from the edge attributes that have been inserted via `I(...)` + data[["date"]] = unclass(data[["date"]]) + data[["artifact.type"]] = unclass(data[["artifact.type"]]) + data[["hash"]] = unclass(data[["hash"]]) + data[["file"]] = unclass(data[["file"]]) + data[["artifact"]] = unclass(data[["artifact"]]) + data[["message.id"]] = unclass(data[["message.id"]]) + data[["thread"]] = unclass(data[["thread"]]) + ## build expected network network.expected = igraph::graph_from_data_frame(data, vertices = authors, directed = net.conf$get.value("author.directed")) @@ -156,48 +166,56 @@ test_that("Construction of the bipartite network for the feature artifact with a "", "", "", "", "", "", "", # mail "", "", "", "", "", "", "", "", ""), - date = get.date.from.string(c("2013-05-05 21:46:30", "2013-05-05 21:49:21", "2013-05-05 21:49:34", # issue - "2013-05-06 01:04:34", "2013-05-25 03:48:41", "2013-05-25 04:08:07", - "2016-07-12 14:59:25", "2016-07-12 16:02:30", "2016-07-12 16:06:01", - "2016-07-15 19:55:39", "2017-05-23 12:32:39", "2016-07-12 15:59:59", - "2016-07-15 20:07:47", "2016-07-27 20:12:08", "2016-07-28 06:27:52", - "2013-05-25 03:25:06", "2013-05-25 06:06:53", "2013-05-25 06:22:23", - "2013-06-01 06:50:26", "2016-07-12 16:01:01", "2016-07-12 16:02:02", - "2013-04-21 23:52:09", "2016-07-12 15:59:25", "2016-07-12 16:03:59", - "2004-10-09 18:38:13", "2005-02-09 18:49:49", "2016-07-12 15:58:40", # mail - "2010-07-12 11:05:35", "2010-07-12 12:05:34", "2010-07-12 12:05:40", - "2010-07-12 12:05:41", "2010-07-12 12:05:42", "2010-07-12 12:05:43", - "2010-07-12 12:05:44", "2010-07-12 12:05:45", "2010-07-12 12:05:46", - "2016-07-12 15:58:50", "2016-07-12 16:05:37", "2016-07-12 16:04:40", - "2010-07-12 10:05:36")), - artifact.type = c(rep("IssueEvent", 24), rep("Mail", 16)), - message.id = c(rep(NA, 24), - "", "<1107974989.17910.6.camel@jmcmullan>", - "<4cbaa9ef0802201124v37f1eec8g89a412dfbfc8383a@mail.gmail.com>", - "", "", - "", "", "", - "", "", "", - "", "<6784529b0802032245r5164f984l342f0f0dc94aa420@mail.gmail.com>", - "<9b06e8d20801220234h659c18a3g95c12ac38248c7e0@mail.gmail.com>", "<65a1sf31sagd684dfv31@mail.gmail.com>", - ""), - thread = c(rep(NA, 24), - "", "", "", "", "", "", - "", "", "", "", "", "", - "", "", "", ""), - issue.id = c("", "", "", "", # issue - "", "", - "", "", "", "", "", - "", "", "", "", - "", "", "", "", - "", "", "", "", "", - rep(NA,16)), - event.name = c(rep("commented", 24), - rep(NA, 16)), + date = I(as.list(get.date.from.string(c("2013-05-05 21:46:30", "2013-05-05 21:49:21", "2013-05-05 21:49:34", # issue + "2013-05-06 01:04:34", "2013-05-25 03:48:41", "2013-05-25 04:08:07", + "2016-07-12 14:59:25", "2016-07-12 16:02:30", "2016-07-12 16:06:01", + "2016-07-15 19:55:39", "2017-05-23 12:32:39", "2016-07-12 15:59:59", + "2016-07-15 20:07:47", "2016-07-27 20:12:08", "2016-07-28 06:27:52", + "2013-05-25 03:25:06", "2013-05-25 06:06:53", "2013-05-25 06:22:23", + "2013-06-01 06:50:26", "2016-07-12 16:01:01", "2016-07-12 16:02:02", + "2013-04-21 23:52:09", "2016-07-12 15:59:25", "2016-07-12 16:03:59", + "2004-10-09 18:38:13", "2005-02-09 18:49:49", "2016-07-12 15:58:40", # mail + "2010-07-12 11:05:35", "2010-07-12 12:05:34", "2010-07-12 12:05:40", + "2010-07-12 12:05:41", "2010-07-12 12:05:42", "2010-07-12 12:05:43", + "2010-07-12 12:05:44", "2010-07-12 12:05:45", "2010-07-12 12:05:46", + "2016-07-12 15:58:50", "2016-07-12 16:05:37", "2016-07-12 16:04:40", + "2010-07-12 10:05:36")))), + artifact.type = I(c(as.list(rep("IssueEvent", 24)), as.list(rep("Mail", 16)))), + message.id = I(c(as.list(rep(NA, 24)), + "", "<1107974989.17910.6.camel@jmcmullan>", + "<4cbaa9ef0802201124v37f1eec8g89a412dfbfc8383a@mail.gmail.com>", + "", "", + "", "", "", + "", "", "", + "", "<6784529b0802032245r5164f984l342f0f0dc94aa420@mail.gmail.com>", + "<9b06e8d20801220234h659c18a3g95c12ac38248c7e0@mail.gmail.com>", "<65a1sf31sagd684dfv31@mail.gmail.com>", + "")), + thread = I(c(as.list(rep(NA, 24)), + "", "", "", "", "", "", + "", "", "", "", "", "", + "", "", "", "")), + issue.id = I(c("", "", "", "", # issue + "", "", + "", "", "", "", "", + "", "", "", "", + "", "", "", "", + "", "", "", "", "", + as.list(rep(NA,16)))), + event.name = I(c(rep("commented", 24), + as.list(rep(NA, 16)))), weight = 1, type = TYPE.EDGES.INTER, relation = c(rep("issue", 24), rep("mail", 16)) ) + ## remove the 'AsIs' class from the edge attributes that have been inserted via `I(...)` + network.expected.data[["date"]] = unclass(network.expected.data[["date"]]) + network.expected.data[["artifact.type"]] = unclass(network.expected.data[["artifact.type"]]) + network.expected.data[["message.id"]] = unclass(network.expected.data[["message.id"]]) + network.expected.data[["thread"]] = unclass(network.expected.data[["thread"]]) + network.expected.data[["issue.id"]] = unclass(network.expected.data[["issue.id"]]) + network.expected.data[["event.name"]] = unclass(network.expected.data[["event.name"]]) + ## 3) build expected network network.expected = igraph::graph_from_data_frame(network.expected.data, vertices = vertices, directed = net.conf$get.value("author.directed")) @@ -251,66 +269,78 @@ test_that("Construction of the multi network for the feature artifact with autho "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", ""), - date = get.date.from.string(c("2016-07-12 15:58:59", "2016-07-12 16:00:45", "2016-07-12 16:05:41", # author cochange - "2016-07-12 16:06:10", "2016-07-12 16:05:41", "2016-07-12 16:06:32", - "2016-07-12 16:06:10", "2016-07-12 16:06:32", - "2016-07-12 15:58:40", "2016-07-12 15:58:50", "2016-07-12 16:04:40", - "2016-07-12 16:05:37", - "2016-07-12 16:06:32", # artifact cochange - "2016-07-12 15:58:59", "2016-07-12 16:00:45", "2016-07-12 16:05:41", # bipartite cochange - "2016-07-12 16:06:10", "2016-07-12 16:06:32", "2016-07-12 16:06:32", - "2013-05-05 21:46:30", "2013-05-05 21:49:21", "2013-05-05 21:49:34", # bipartite issue - "2013-05-06 01:04:34", "2013-05-25 03:48:41", "2013-05-25 04:08:07", "2016-07-12 14:59:25", - "2016-07-12 16:02:30", "2016-07-12 16:06:01", "2016-07-15 19:55:39", "2017-05-23 12:32:39", - "2013-05-25 03:25:06", "2013-05-25 06:06:53", "2013-05-25 06:22:23", - "2013-06-01 06:50:26", "2016-07-12 16:01:01", "2016-07-12 16:02:02", - "2016-07-12 15:59:59", "2013-04-21 23:52:09", "2016-07-12 15:59:25", - "2016-07-12 16:03:59")), - artifact.type = c(rep("Feature", 8), rep("Mail", 4), rep("Feature", 1), rep("Feature", 6), - rep("IssueEvent", 21)), - hash = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "5a5ec9675e98187e1e92561e1888aa6f04faa338", # author cochange - "3a0ed78458b3976243db6829f63eba3eead26774", "1143db502761379c2bfcecc2007fc34282e7ee61", - "3a0ed78458b3976243db6829f63eba3eead26774", "0a1a5c523d835459c42f33e863623138555e2526", - "1143db502761379c2bfcecc2007fc34282e7ee61", "0a1a5c523d835459c42f33e863623138555e2526", - NA, NA, NA, NA, # author mail - "0a1a5c523d835459c42f33e863623138555e2526", # artifact cochange - "72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "5a5ec9675e98187e1e92561e1888aa6f04faa338", # bipartite cochange - "3a0ed78458b3976243db6829f63eba3eead26774", "1143db502761379c2bfcecc2007fc34282e7ee61", - "0a1a5c523d835459c42f33e863623138555e2526", "0a1a5c523d835459c42f33e863623138555e2526", - rep(NA, 21)), # bipartite issue - file = c("test.c", "test.c", "test2.c", "test3.c", "test2.c", "test2.c", "test3.c", "test2.c", # author cochange - NA, NA, NA, NA, - "test2.c", # artifact cochange - "test.c", "test.c", "test2.c", "test3.c", "test2.c", "test2.c", # bipartite cochange - rep(NA, 21)), - artifact = c("A", "A", "Base_Feature", "Base_Feature", "Base_Feature", "Base_Feature", "Base_Feature", # author cochange - "Base_Feature", - rep(NA, 4), - NA, # artifact cochange - "A", "A", "Base_Feature", "Base_Feature", "Base_Feature", "foo", # bipartite cochange - rep(NA, 21)), - weight = 1, - type = c(rep(TYPE.EDGES.INTRA, 13), rep(TYPE.EDGES.INTER, 27)), - relation = c(rep("cochange", 8), rep("mail", 4), rep("cochange", 1), rep("cochange", 6), - rep("issue", 21)), - message.id = c(rep(NA, 8), - "<4cbaa9ef0802201124v37f1eec8g89a412dfbfc8383a@mail.gmail.com>", - "<6784529b0802032245r5164f984l342f0f0dc94aa420@mail.gmail.com>", - "<65a1sf31sagd684dfv31@mail.gmail.com>", - "<9b06e8d20801220234h659c18a3g95c12ac38248c7e0@mail.gmail.com>", - rep(NA, 28)), - thread = c(rep(NA, 8), - "", "", "", "", - rep(NA, 28)), - author.name = c(rep(NA, 12), "Thomas", rep(NA, 27)), - issue.id = c(rep(NA, 19), - "", "", "", "", # bipartite issue - "", "", "", "", "", "", - "", "", "", "", "", - "", "", "", "", "", ""), - event.name = c(rep(NA, 19), rep("commented", 21)) + date = I(as.list(get.date.from.string(c("2016-07-12 15:58:59", "2016-07-12 16:00:45", "2016-07-12 16:05:41", # author cochange + "2016-07-12 16:06:10", "2016-07-12 16:05:41", "2016-07-12 16:06:32", + "2016-07-12 16:06:10", "2016-07-12 16:06:32", + "2016-07-12 15:58:40", "2016-07-12 15:58:50", "2016-07-12 16:04:40", + "2016-07-12 16:05:37", + "2016-07-12 16:06:32", # artifact cochange + "2016-07-12 15:58:59", "2016-07-12 16:00:45", "2016-07-12 16:05:41", # bipartite cochange + "2016-07-12 16:06:10", "2016-07-12 16:06:32", "2016-07-12 16:06:32", + "2013-05-05 21:46:30", "2013-05-05 21:49:21", "2013-05-05 21:49:34", # bipartite issue + "2013-05-06 01:04:34", "2013-05-25 03:48:41", "2013-05-25 04:08:07", "2016-07-12 14:59:25", + "2016-07-12 16:02:30", "2016-07-12 16:06:01", "2016-07-15 19:55:39", "2017-05-23 12:32:39", + "2013-05-25 03:25:06", "2013-05-25 06:06:53", "2013-05-25 06:22:23", + "2013-06-01 06:50:26", "2016-07-12 16:01:01", "2016-07-12 16:02:02", + "2016-07-12 15:59:59", "2013-04-21 23:52:09", "2016-07-12 15:59:25", + "2016-07-12 16:03:59")))), + artifact.type = I(c(as.list(rep("Feature", 8)), as.list(rep("Mail", 4)), as.list(rep("Feature", 1)), as.list(rep("Feature", 6)), + as.list(rep("IssueEvent", 21)))), + hash = I(c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "5a5ec9675e98187e1e92561e1888aa6f04faa338", # author cochange + "3a0ed78458b3976243db6829f63eba3eead26774", "1143db502761379c2bfcecc2007fc34282e7ee61", + "3a0ed78458b3976243db6829f63eba3eead26774", "0a1a5c523d835459c42f33e863623138555e2526", + "1143db502761379c2bfcecc2007fc34282e7ee61", "0a1a5c523d835459c42f33e863623138555e2526", + as.list(rep(NA, 4)), # author mail + "0a1a5c523d835459c42f33e863623138555e2526", # artifact cochange + "72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "5a5ec9675e98187e1e92561e1888aa6f04faa338", # bipartite cochange + "3a0ed78458b3976243db6829f63eba3eead26774", "1143db502761379c2bfcecc2007fc34282e7ee61", + "0a1a5c523d835459c42f33e863623138555e2526", "0a1a5c523d835459c42f33e863623138555e2526", + as.list(rep(NA, 21)))), # bipartite issue + file = I(c("test.c", "test.c", "test2.c", "test3.c", "test2.c", "test2.c", "test3.c", "test2.c", # author cochange + as.list(rep(NA, 4)), + "test2.c", # artifact cochange + "test.c", "test.c", "test2.c", "test3.c", "test2.c", "test2.c", # bipartite cochange + as.list(rep(NA, 21)))), + artifact = I(c("A", "A", "Base_Feature", "Base_Feature", "Base_Feature", "Base_Feature", "Base_Feature", # author cochange + "Base_Feature", + as.list(rep(NA, 4)), + NA, # artifact cochange + "A", "A", "Base_Feature", "Base_Feature", "Base_Feature", "foo", # bipartite cochange + as.list(rep(NA, 21)))), + weight = 1, + type = c(rep(TYPE.EDGES.INTRA, 13), rep(TYPE.EDGES.INTER, 27)), + relation = c(rep("cochange", 8), rep("mail", 4), rep("cochange", 1), rep("cochange", 6), + rep("issue", 21)), + message.id = I(c(as.list(rep(NA, 8)), + "<4cbaa9ef0802201124v37f1eec8g89a412dfbfc8383a@mail.gmail.com>", + "<6784529b0802032245r5164f984l342f0f0dc94aa420@mail.gmail.com>", + "<65a1sf31sagd684dfv31@mail.gmail.com>", + "<9b06e8d20801220234h659c18a3g95c12ac38248c7e0@mail.gmail.com>", + as.list(rep(NA, 28)))), + thread = I(c(as.list(rep(NA, 8)), + "", "", "", "", + as.list(rep(NA, 28)))), + author.name = I(c(as.list(rep(NA, 12)), "Thomas", as.list(rep(NA, 27)))), + issue.id = I(c(as.list(rep(NA, 19)), + "", "", "", "", # bipartite issue + "", "", "", "", "", "", + "", "", "", "", "", + "", "", "", "", "", "")), + event.name = I(c(as.list(rep(NA, 19)), rep("commented", 21))) ) + ## Remove the 'AsIs' class from the edge attributes that have been inserted via `I(...)` + edges[["date"]] = unclass(edges[["date"]]) + edges[["artifact.type"]] = unclass(edges[["artifact.type"]]) + edges[["hash"]] = unclass(edges[["hash"]]) + edges[["file"]] = unclass(edges[["file"]]) + edges[["artifact"]] = unclass(edges[["artifact"]]) + edges[["message.id"]] = unclass(edges[["message.id"]]) + edges[["thread"]] = unclass(edges[["thread"]]) + edges[["author.name"]] = unclass(edges[["author.name"]]) + edges[["issue.id"]] = unclass(edges[["issue.id"]]) + edges[["event.name"]] = unclass(edges[["event.name"]]) + ## 3) build expected network network.expected = igraph::graph_from_data_frame(edges, vertices = vertices, directed = net.conf$get.value("author.directed")) @@ -371,44 +401,52 @@ test_that("Construction of the multi-artifact bipartite network with artifact re "","","", "","", "", "","", ""), - date = get.date.from.string(c("2016-07-12 15:58:59 UTC", "2016-07-12 16:06:10 UTC", - "2016-07-12 16:00:45 UTC", "2016-07-12 16:05:41 UTC", - "2016-07-12 16:06:32 UTC", "2016-07-12 16:06:32 UTC", - "2013-05-05 21:46:30 UTC", "2013-05-05 21:49:21 UTC", - "2013-05-05 21:49:34 UTC", "2013-05-06 01:04:34 UTC", - "2013-05-25 03:48:41 UTC", "2013-05-25 04:08:07 UTC", - "2016-07-12 14:59:25 UTC", "2016-07-12 16:02:30 UTC", - "2016-07-12 16:06:01 UTC", "2016-07-15 19:55:39 UTC", - "2017-05-23 12:32:39 UTC", "2016-07-12 15:59:59 UTC", - "2016-07-15 20:07:47 UTC", "2016-07-27 20:12:08 UTC", - "2016-07-28 06:27:52 UTC", "2013-05-25 03:25:06 UTC", - "2013-05-25 06:06:53 UTC", "2013-05-25 06:22:23 UTC", - "2013-06-01 06:50:26 UTC", "2016-07-12 16:01:01 UTC", - "2016-07-12 16:02:02 UTC", "2013-04-21 23:52:09 UTC", - "2016-07-12 15:59:25 UTC", "2016-07-12 16:03:59 UTC")), - artifact.type = c(rep("Feature", 6), rep("IssueEvent", 24)), - hash = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "1143db502761379c2bfcecc2007fc34282e7ee61", - "5a5ec9675e98187e1e92561e1888aa6f04faa338", "3a0ed78458b3976243db6829f63eba3eead26774", - "0a1a5c523d835459c42f33e863623138555e2526", "0a1a5c523d835459c42f33e863623138555e2526", - rep(NA, 24)), - file = c("test.c", "test3.c", "test.c", "test2.c", "test2.c", "test2.c", rep(NA, 24)), - artifact = c("A", "Base_Feature", "A", "Base_Feature", "Base_Feature", "foo", rep(NA, 24)), + date = I(as.list(get.date.from.string(c("2016-07-12 15:58:59 UTC", "2016-07-12 16:06:10 UTC", + "2016-07-12 16:00:45 UTC", "2016-07-12 16:05:41 UTC", + "2016-07-12 16:06:32 UTC", "2016-07-12 16:06:32 UTC", + "2013-05-05 21:46:30 UTC", "2013-05-05 21:49:21 UTC", + "2013-05-05 21:49:34 UTC", "2013-05-06 01:04:34 UTC", + "2013-05-25 03:48:41 UTC", "2013-05-25 04:08:07 UTC", + "2016-07-12 14:59:25 UTC", "2016-07-12 16:02:30 UTC", + "2016-07-12 16:06:01 UTC", "2016-07-15 19:55:39 UTC", + "2017-05-23 12:32:39 UTC", "2016-07-12 15:59:59 UTC", + "2016-07-15 20:07:47 UTC", "2016-07-27 20:12:08 UTC", + "2016-07-28 06:27:52 UTC", "2013-05-25 03:25:06 UTC", + "2013-05-25 06:06:53 UTC", "2013-05-25 06:22:23 UTC", + "2013-06-01 06:50:26 UTC", "2016-07-12 16:01:01 UTC", + "2016-07-12 16:02:02 UTC", "2013-04-21 23:52:09 UTC", + "2016-07-12 15:59:25 UTC", "2016-07-12 16:03:59 UTC")))), + artifact.type = I(c(as.list(rep("Feature", 6)), as.list(rep("IssueEvent", 24)))), + hash = I(c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "1143db502761379c2bfcecc2007fc34282e7ee61", + "5a5ec9675e98187e1e92561e1888aa6f04faa338", "3a0ed78458b3976243db6829f63eba3eead26774", + "0a1a5c523d835459c42f33e863623138555e2526", "0a1a5c523d835459c42f33e863623138555e2526", + as.list(rep(NA, 24)))), + file = I(c("test.c", "test3.c", "test.c", "test2.c", "test2.c", "test2.c", as.list(rep(NA, 24)))), + artifact = I(c("A", "Base_Feature", "A", "Base_Feature", "Base_Feature", "foo", as.list(rep(NA, 24)))), weight = c(rep(1, 30)), type = c(rep("Bipartite", 30)), relation = c(rep("cochange", 6), rep("issue", 24)), - issue.id = c(NA, NA, NA, - NA, NA, NA, - "", "", "", - "", "", "", - "", "", "", - "", "", "", - "", "", "", - "", "", "", - "", "", "", - "", "", ""), - event.name = c(rep(NA, 6), rep("commented", 24)) + issue.id = I(c(as.list(rep(NA, 6)), + "", "", "", + "", "", "", + "", "", "", + "", "", "", + "", "", "", + "", "", "", + "", "", "", + "", "", "")), + event.name = I(c(as.list(rep(NA, 6)), rep("commented", 24))) ) + ## Remove the 'AsIs' class from the edge attributes that have been inserted via `I(...)` + edges[["date"]] = unclass(edges[["date"]]) + edges[["artifact.type"]] = unclass(edges[["artifact.type"]]) + edges[["hash"]] = unclass(edges[["hash"]]) + edges[["file"]] = unclass(edges[["file"]]) + edges[["artifact"]] = unclass(edges[["artifact"]]) + edges[["issue.id"]] = unclass(edges[["issue.id"]]) + edges[["event.name"]] = unclass(edges[["event.name"]]) + net.expected = igraph::graph_from_data_frame(edges, directed = FALSE, vertices = vertices) assert.networks.equal(net.expected, net.combined) @@ -463,39 +501,48 @@ test_that("Construction of the multi-artifact bipartite network with artifact re "", "", "", "", "", "", "", "", "", "", "", "", "", "", ""), - date = get.date.from.string(c("2016-07-12 15:58:59", "2016-07-12 16:06:10", "2016-07-12 16:00:45", - "2016-07-12 16:05:41", "2016-07-12 16:06:32", "2016-07-12 16:06:32", - "2004-10-09 18:38:13", "2005-02-09 18:49:49", "2016-07-12 15:58:40", - "2010-07-12 11:05:35", "2010-07-12 12:05:34", "2010-07-12 12:05:40", - "2010-07-12 12:05:41", "2010-07-12 12:05:42", "2010-07-12 12:05:43", - "2010-07-12 12:05:44", "2010-07-12 12:05:45", "2010-07-12 12:05:46", - "2016-07-12 15:58:50", "2016-07-12 16:05:37", "2016-07-12 16:04:40", - "2010-07-12 10:05:36")), - artifact.type = c(rep("Feature", 6), rep("Mail", 16)), - hash = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "1143db502761379c2bfcecc2007fc34282e7ee61", - "5a5ec9675e98187e1e92561e1888aa6f04faa338", "3a0ed78458b3976243db6829f63eba3eead26774", - "0a1a5c523d835459c42f33e863623138555e2526", "0a1a5c523d835459c42f33e863623138555e2526", - rep(NA, 16)), - file = c("test.c", "test3.c", "test.c", "test2.c", "test2.c", "test2.c", rep(NA, 16)), - artifact = c("A", "Base_Feature", "A", "Base_Feature", "Base_Feature", "foo", rep(NA, 16)), + date = I(as.list(get.date.from.string(c("2016-07-12 15:58:59", "2016-07-12 16:06:10", "2016-07-12 16:00:45", + "2016-07-12 16:05:41", "2016-07-12 16:06:32", "2016-07-12 16:06:32", + "2004-10-09 18:38:13", "2005-02-09 18:49:49", "2016-07-12 15:58:40", + "2010-07-12 11:05:35", "2010-07-12 12:05:34", "2010-07-12 12:05:40", + "2010-07-12 12:05:41", "2010-07-12 12:05:42", "2010-07-12 12:05:43", + "2010-07-12 12:05:44", "2010-07-12 12:05:45", "2010-07-12 12:05:46", + "2016-07-12 15:58:50", "2016-07-12 16:05:37", "2016-07-12 16:04:40", + "2010-07-12 10:05:36")))), + artifact.type = I(c(as.list(rep("Feature", 6)), as.list(rep("Mail", 16)))), + hash = I(c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "1143db502761379c2bfcecc2007fc34282e7ee61", + "5a5ec9675e98187e1e92561e1888aa6f04faa338", "3a0ed78458b3976243db6829f63eba3eead26774", + "0a1a5c523d835459c42f33e863623138555e2526", "0a1a5c523d835459c42f33e863623138555e2526", + as.list(rep(NA, 16)))), + file = I(c("test.c", "test3.c", "test.c", "test2.c", "test2.c", "test2.c", as.list(rep(NA, 16)))), + artifact = I(c("A", "Base_Feature", "A", "Base_Feature", "Base_Feature", "foo", as.list(rep(NA, 16)))), weight = rep(1,22), type = rep("Bipartite", 22), relation = c(rep("cochange", 6), rep("mail", 16)), - message.id = c(rep(NA, 6), "", - "<1107974989.17910.6.camel@jmcmullan>", "<4cbaa9ef0802201124v37f1eec8g89a412dfbfc8383a@mail.gmail.com>", - "", "", - "", "", "", - "", "", "", - "", "<6784529b0802032245r5164f984l342f0f0dc94aa420@mail.gmail.com>", - "<9b06e8d20801220234h659c18a3g95c12ac38248c7e0@mail.gmail.com>", - "<65a1sf31sagd684dfv31@mail.gmail.com>", "" - ), - thread = c(rep(NA, 6), "", "", "", "", - "", "", "", "", "", - "", "", "", "", "", - "", "") + message.id = I(c(as.list(rep(NA, 6)), "", + "<1107974989.17910.6.camel@jmcmullan>", "<4cbaa9ef0802201124v37f1eec8g89a412dfbfc8383a@mail.gmail.com>", + "", "", + "", "", "", + "", "", "", + "", "<6784529b0802032245r5164f984l342f0f0dc94aa420@mail.gmail.com>", + "<9b06e8d20801220234h659c18a3g95c12ac38248c7e0@mail.gmail.com>", + "<65a1sf31sagd684dfv31@mail.gmail.com>", "" + )), + thread = I(c(as.list(rep(NA, 6)), "", "", "", "", + "", "", "", "", "", + "", "", "", "", "", + "", "")) ) + ## Remove the 'AsIs' class from the edge attributes that have been inserted via `I(...)` + edges[["date"]] = unclass(edges[["date"]]) + edges[["artifact.type"]] = unclass(edges[["artifact.type"]]) + edges[["hash"]] = unclass(edges[["hash"]]) + edges[["file"]] = unclass(edges[["file"]]) + edges[["artifact"]] = unclass(edges[["artifact"]]) + edges[["message.id"]] = unclass(edges[["message.id"]]) + edges[["thread"]] = unclass(edges[["thread"]]) + net.expected = igraph::graph_from_data_frame(edges, directed = FALSE, vertices = vertices) assert.networks.equal(net.expected, net.combined) @@ -557,48 +604,56 @@ test_that("Construction of the multi-artifact bipartite network with artifact re "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", ""), - date = get.date.from.string(c("2013-05-05 21:46:30", "2013-05-05 21:49:21", "2013-05-05 21:49:34", - "2013-05-06 01:04:34", "2013-05-25 03:48:41", "2013-05-25 04:08:07", - "2016-07-12 14:59:25", "2016-07-12 16:02:30", "2016-07-12 16:06:01", - "2016-07-15 19:55:39", "2017-05-23 12:32:39", "2016-07-12 15:59:59", - "2016-07-15 20:07:47", "2016-07-27 20:12:08", "2016-07-28 06:27:52", - "2013-05-25 03:25:06", "2013-05-25 06:06:53", "2013-05-25 06:22:23", - "2013-06-01 06:50:26", "2016-07-12 16:01:01", "2016-07-12 16:02:02", - "2013-04-21 23:52:09", "2016-07-12 15:59:25", "2016-07-12 16:03:59", - "2004-10-09 18:38:13", "2005-02-09 18:49:49", "2016-07-12 15:58:40", - "2010-07-12 11:05:35", "2010-07-12 12:05:34", "2010-07-12 12:05:40", - "2010-07-12 12:05:41", "2010-07-12 12:05:42", "2010-07-12 12:05:43", - "2010-07-12 12:05:44", "2010-07-12 12:05:45", "2010-07-12 12:05:46", - "2016-07-12 15:58:50", "2016-07-12 16:05:37", "2016-07-12 16:04:40", - "2010-07-12 10:05:36")), - artifact.type = c(rep("IssueEvent", 24), rep("Mail", 16)), - issue.id = c("", "", "", - "", "", "", - "", "", "", - "", "", "", - "", "", "", - "", "", "", - "", "", "", - "", "", "", rep(NA, 16)), - event.name = c(rep("commented", 24), rep(NA, 16)), + date = I(as.list(get.date.from.string(c("2013-05-05 21:46:30", "2013-05-05 21:49:21", "2013-05-05 21:49:34", + "2013-05-06 01:04:34", "2013-05-25 03:48:41", "2013-05-25 04:08:07", + "2016-07-12 14:59:25", "2016-07-12 16:02:30", "2016-07-12 16:06:01", + "2016-07-15 19:55:39", "2017-05-23 12:32:39", "2016-07-12 15:59:59", + "2016-07-15 20:07:47", "2016-07-27 20:12:08", "2016-07-28 06:27:52", + "2013-05-25 03:25:06", "2013-05-25 06:06:53", "2013-05-25 06:22:23", + "2013-06-01 06:50:26", "2016-07-12 16:01:01", "2016-07-12 16:02:02", + "2013-04-21 23:52:09", "2016-07-12 15:59:25", "2016-07-12 16:03:59", + "2004-10-09 18:38:13", "2005-02-09 18:49:49", "2016-07-12 15:58:40", + "2010-07-12 11:05:35", "2010-07-12 12:05:34", "2010-07-12 12:05:40", + "2010-07-12 12:05:41", "2010-07-12 12:05:42", "2010-07-12 12:05:43", + "2010-07-12 12:05:44", "2010-07-12 12:05:45", "2010-07-12 12:05:46", + "2016-07-12 15:58:50", "2016-07-12 16:05:37", "2016-07-12 16:04:40", + "2010-07-12 10:05:36")))), + artifact.type = I(c(as.list(rep("IssueEvent", 24)), as.list(rep("Mail", 16)))), + issue.id = I(c("", "", "", + "", "", "", + "", "", "", + "", "", "", + "", "", "", + "", "", "", + "", "", "", + "", "", "", as.list(rep(NA, 16)))), + event.name = I(c(rep("commented", 24), as.list(rep(NA, 16)))), weight = rep(1, 40), type = rep("Bipartite", 40), relation = c(rep("issue", 24), rep("mail", 16)), - message.id = c(rep(NA, 24), - "", "<1107974989.17910.6.camel@jmcmullan>", - "<4cbaa9ef0802201124v37f1eec8g89a412dfbfc8383a@mail.gmail.com>", - "", "", - "", "", "", - "", "", "", - "", "<6784529b0802032245r5164f984l342f0f0dc94aa420@mail.gmail.com>", - "<9b06e8d20801220234h659c18a3g95c12ac38248c7e0@mail.gmail.com>", - "<65a1sf31sagd684dfv31@mail.gmail.com>", "" - ), - thread = c(rep(NA, 24), "", "", "", "", "", - "", "", "", "", "", "", - "", "", "", "", "") + message.id = I(c(as.list(rep(NA, 24)), + "", "<1107974989.17910.6.camel@jmcmullan>", + "<4cbaa9ef0802201124v37f1eec8g89a412dfbfc8383a@mail.gmail.com>", + "", "", + "", "", "", + "", "", "", + "", "<6784529b0802032245r5164f984l342f0f0dc94aa420@mail.gmail.com>", + "<9b06e8d20801220234h659c18a3g95c12ac38248c7e0@mail.gmail.com>", + "<65a1sf31sagd684dfv31@mail.gmail.com>", "" + )), + thread = I(c(as.list(rep(NA, 24)), "", "", "", "", "", + "", "", "", "", "", "", + "", "", "", "", "")) ) + ## Remove the 'AsIs' class from the edge attributes that have been inserted via `I(...)` + edges[["date"]] = unclass(edges[["date"]]) + edges[["artifact.type"]] = unclass(edges[["artifact.type"]]) + edges[["issue.id"]] = unclass(edges[["issue.id"]]) + edges[["event.name"]] = unclass(edges[["event.name"]]) + edges[["message.id"]] = unclass(edges[["message.id"]]) + edges[["thread"]] = unclass(edges[["thread"]]) + net.expected = igraph::graph_from_data_frame(edges, directed = FALSE, vertices = vertices) assert.networks.equal(net.expected, net.combined) @@ -670,33 +725,33 @@ test_that("Construction of the multi-artifact bipartite network with artifact re "", "", "", "", "", "", "", "", "", "", "", "", "", ""), - date = get.date.from.string(c("2016-07-12 15:58:59", "2016-07-12 16:06:10", "2016-07-12 16:00:45", - "2016-07-12 16:05:41", "2016-07-12 16:06:32", "2016-07-12 16:06:32", - "2013-05-05 21:46:30", "2013-05-05 21:49:21", "2013-05-05 21:49:34", - "2013-05-06 01:04:34", "2013-05-25 03:48:41", "2013-05-25 04:08:07", - "2016-07-12 14:59:25", "2016-07-12 16:02:30", "2016-07-12 16:06:01", - "2016-07-15 19:55:39", "2017-05-23 12:32:39", "2016-07-12 15:59:59", - "2016-07-15 20:07:47", "2016-07-27 20:12:08", "2016-07-28 06:27:52", - "2013-05-25 03:25:06", "2013-05-25 06:06:53", "2013-05-25 06:22:23", - "2013-06-01 06:50:26", "2016-07-12 16:01:01", "2016-07-12 16:02:02", - "2013-04-21 23:52:09", "2016-07-12 15:59:25", "2016-07-12 16:03:59", - "2004-10-09 18:38:13", "2005-02-09 18:49:49", "2016-07-12 15:58:40", - "2010-07-12 11:05:35", "2010-07-12 12:05:34", "2010-07-12 12:05:40", - "2010-07-12 12:05:41", "2010-07-12 12:05:42", "2010-07-12 12:05:43", - "2010-07-12 12:05:44", "2010-07-12 12:05:45", "2010-07-12 12:05:46", - "2016-07-12 15:58:50", "2016-07-12 16:05:37", "2016-07-12 16:04:40", - "2010-07-12 10:05:36")), - artifact.type = c(rep("Feature", 6), rep("IssueEvent", 24), rep("Mail", 16)), - hash = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "1143db502761379c2bfcecc2007fc34282e7ee61", - "5a5ec9675e98187e1e92561e1888aa6f04faa338", "3a0ed78458b3976243db6829f63eba3eead26774", - "0a1a5c523d835459c42f33e863623138555e2526", "0a1a5c523d835459c42f33e863623138555e2526", - rep(NA, 40)), - file = c("test.c", "test3.c", "test.c", "test2.c", "test2.c", "test2.c", rep(NA, 40)), - artifact = c("A", "Base_Feature", "A", "Base_Feature", "Base_Feature", "foo", rep(NA, 40)), + date = I(as.list(get.date.from.string(c("2016-07-12 15:58:59", "2016-07-12 16:06:10", "2016-07-12 16:00:45", + "2016-07-12 16:05:41", "2016-07-12 16:06:32", "2016-07-12 16:06:32", + "2013-05-05 21:46:30", "2013-05-05 21:49:21", "2013-05-05 21:49:34", + "2013-05-06 01:04:34", "2013-05-25 03:48:41", "2013-05-25 04:08:07", + "2016-07-12 14:59:25", "2016-07-12 16:02:30", "2016-07-12 16:06:01", + "2016-07-15 19:55:39", "2017-05-23 12:32:39", "2016-07-12 15:59:59", + "2016-07-15 20:07:47", "2016-07-27 20:12:08", "2016-07-28 06:27:52", + "2013-05-25 03:25:06", "2013-05-25 06:06:53", "2013-05-25 06:22:23", + "2013-06-01 06:50:26", "2016-07-12 16:01:01", "2016-07-12 16:02:02", + "2013-04-21 23:52:09", "2016-07-12 15:59:25", "2016-07-12 16:03:59", + "2004-10-09 18:38:13", "2005-02-09 18:49:49", "2016-07-12 15:58:40", + "2010-07-12 11:05:35", "2010-07-12 12:05:34", "2010-07-12 12:05:40", + "2010-07-12 12:05:41", "2010-07-12 12:05:42", "2010-07-12 12:05:43", + "2010-07-12 12:05:44", "2010-07-12 12:05:45", "2010-07-12 12:05:46", + "2016-07-12 15:58:50", "2016-07-12 16:05:37", "2016-07-12 16:04:40", + "2010-07-12 10:05:36")))), + artifact.type = I(c(as.list(rep("Feature", 6)), as.list(rep("IssueEvent", 24)), as.list(rep("Mail", 16)))), + hash = I(c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "1143db502761379c2bfcecc2007fc34282e7ee61", + "5a5ec9675e98187e1e92561e1888aa6f04faa338", "3a0ed78458b3976243db6829f63eba3eead26774", + "0a1a5c523d835459c42f33e863623138555e2526", "0a1a5c523d835459c42f33e863623138555e2526", + as.list(rep(NA, 40)))), + file = I(c("test.c", "test3.c", "test.c", "test2.c", "test2.c", "test2.c", as.list(rep(NA, 40)))), + artifact = I(c("A", "Base_Feature", "A", "Base_Feature", "Base_Feature", "foo", as.list(rep(NA, 40)))), weight = rep(1, 46), type = rep("Bipartite", 46), relation = c(rep("cochange", 6), rep("issue", 24), rep("mail", 16)), - issue.id = c(rep(NA, 6), "", "", + issue.id = I(c(as.list(rep(NA, 6)), "", "", "", "", "", "", "", "", "", "", "", @@ -704,22 +759,33 @@ test_that("Construction of the multi-artifact bipartite network with artifact re "", "", "", "", "", "", "", "", "", - "", rep(NA, 16)), - event.name = c(rep(NA, 6), rep("commented", 24), rep(NA, 16)), - message.id = c(rep(NA, 30), "", - "<1107974989.17910.6.camel@jmcmullan>", "<4cbaa9ef0802201124v37f1eec8g89a412dfbfc8383a@mail.gmail.com>", - "", "", - "", "", "", - "", "", "", - "", "<6784529b0802032245r5164f984l342f0f0dc94aa420@mail.gmail.com>", - "<9b06e8d20801220234h659c18a3g95c12ac38248c7e0@mail.gmail.com>", - "<65a1sf31sagd684dfv31@mail.gmail.com>", ""), - thread = c(rep(NA, 30), "", "", "", "", + "", as.list(rep(NA, 16)))), + event.name = I(c(as.list(rep(NA, 6)), rep("commented", 24), as.list(rep(NA, 16)))), + message.id = I(c(as.list(rep(NA, 30)), "", + "<1107974989.17910.6.camel@jmcmullan>", "<4cbaa9ef0802201124v37f1eec8g89a412dfbfc8383a@mail.gmail.com>", + "", "", + "", "", "", + "", "", "", + "", "<6784529b0802032245r5164f984l342f0f0dc94aa420@mail.gmail.com>", + "<9b06e8d20801220234h659c18a3g95c12ac38248c7e0@mail.gmail.com>", + "<65a1sf31sagd684dfv31@mail.gmail.com>", "")), + thread = I(c(as.list(rep(NA, 30)), "", "", "", "", "", "", "", "", "", "", "", "", "", "", - "", "") + "", "")) ) + ## Remove the 'AsIs' class from the edge attributes that have been inserted via `I(...)` + edges[["date"]] = unclass(edges[["date"]]) + edges[["artifact.type"]] = unclass(edges[["artifact.type"]]) + edges[["hash"]] = unclass(edges[["hash"]]) + edges[["file"]] = unclass(edges[["file"]]) + edges[["artifact"]] = unclass(edges[["artifact"]]) + edges[["issue.id"]] = unclass(edges[["issue.id"]]) + edges[["event.name"]] = unclass(edges[["event.name"]]) + edges[["message.id"]] = unclass(edges[["message.id"]]) + edges[["thread"]] = unclass(edges[["thread"]]) + net.expected = igraph::graph_from_data_frame(edges, directed = FALSE, vertices = vertices) assert.networks.equal(net.expected, net.combined) diff --git a/tests/test-networks-multi.R b/tests/test-networks-multi.R index 41fec588..52770fa4 100644 --- a/tests/test-networks-multi.R +++ b/tests/test-networks-multi.R @@ -82,16 +82,21 @@ test_that("Construction of the multi network for the feature artifact with autho "0a1a5c523d835459c42f33e863623138555e2526"), file = c("test.c", "test.c", "test2.c", "test3.c", "test2.c", "test2.c", "test3.c", "test2.c", "test2.c", "test.c", "test.c", "test2.c", "test3.c", "test2.c", "test2.c"), - artifact = c("A", "A", "Base_Feature", "Base_Feature", "Base_Feature", "Base_Feature", "Base_Feature", - "Base_Feature", NA, "A", "A", "Base_Feature", "Base_Feature", "Base_Feature", - "foo"), + artifact = I(list("A", "A", "Base_Feature", "Base_Feature", "Base_Feature", "Base_Feature", "Base_Feature", + "Base_Feature", NA, "A", "A", "Base_Feature", "Base_Feature", "Base_Feature", + "foo")), weight = 1, type = c(rep(TYPE.EDGES.INTRA, 9), rep(TYPE.EDGES.INTER, 6)), relation = "cochange", - author.name = c(NA, NA, NA, NA, NA, NA, NA, NA, "Thomas", NA, NA, NA, NA, NA, NA) + author.name = I(list(NA, NA, NA, NA, NA, NA, NA, NA, "Thomas", NA, NA, NA, NA, NA, NA)) ) + ## remove the 'AsIs' class from the edge attributes that have been inserted via `I(...)` + edges[["artifact"]] = unclass(edges[["artifact"]]) + edges[["author.name"]] = unclass(edges[["author.name"]]) + network.expected = igraph::graph_from_data_frame(edges, directed = FALSE, vertices = vertices) + network.expected = convert.edge.attributes.to.list(network.expected) assert.networks.equal(network.expected, network.built) }) diff --git a/tests/test-networks.R b/tests/test-networks.R index 96069a3c..f1536ed9 100644 --- a/tests/test-networks.R +++ b/tests/test-networks.R @@ -135,34 +135,34 @@ test_that("Simplify author-network with relation = c('cochange', 'mail') using b "Björn", "Olaf"), # mail comb.2. = c("Olaf", "Karl", "Thomas", "Thomas", # cochange "Olaf", "Thomas")) # mail - data$date = list(get.date.from.string(c("2016-07-12 15:58:59", "2016-07-12 16:00:45")), - get.date.from.string(c("2016-07-12 16:05:41", "2016-07-12 16:06:10")), - get.date.from.string(c("2016-07-12 16:05:41", "2016-07-12 16:06:32")), - get.date.from.string(c("2016-07-12 16:06:10", "2016-07-12 16:06:32")), # cochange - get.date.from.string(c("2016-07-12 15:58:40", "2016-07-12 15:58:50")), - get.date.from.string(c("2016-07-12 16:04:40", "2016-07-12 16:05:37"))) # mail - data$artifact.type = list(c("Feature", "Feature"), c("Feature", "Feature"), - c("Feature", "Feature"), c("Feature", "Feature"), # cochange - c("Mail", "Mail"), c("Mail", "Mail")) # mail - data$hash = list(c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "5a5ec9675e98187e1e92561e1888aa6f04faa338"), - c("3a0ed78458b3976243db6829f63eba3eead26774", "1143db502761379c2bfcecc2007fc34282e7ee61"), - c("3a0ed78458b3976243db6829f63eba3eead26774", "0a1a5c523d835459c42f33e863623138555e2526"), - c("1143db502761379c2bfcecc2007fc34282e7ee61", "0a1a5c523d835459c42f33e863623138555e2526"), - as.character(c(NA, NA)), as.character(c(NA, NA))) - data$file = list(c("test.c", "test.c"), c("test2.c", "test3.c"), c("test2.c", "test2.c"), c("test3.c", "test2.c"), - as.character(c(NA, NA)), as.character(c(NA, NA))) - data$artifact = list(c("A", "A"), c("Base_Feature", "Base_Feature"), c("Base_Feature", "Base_Feature"), - c("Base_Feature", "Base_Feature"), as.character(c(NA, NA)), as.character(c(NA, NA))) + data$date = list(as.list(get.date.from.string(c("2016-07-12 15:58:59", "2016-07-12 16:00:45"))), + as.list(get.date.from.string(c("2016-07-12 16:05:41", "2016-07-12 16:06:10"))), + as.list(get.date.from.string(c("2016-07-12 16:05:41", "2016-07-12 16:06:32"))), + as.list(get.date.from.string(c("2016-07-12 16:06:10", "2016-07-12 16:06:32"))), # cochange + as.list(get.date.from.string(c("2016-07-12 15:58:40", "2016-07-12 15:58:50"))), + as.list(get.date.from.string(c("2016-07-12 16:04:40", "2016-07-12 16:05:37")))) # mail + data$artifact.type = list(list("Feature", "Feature"), list("Feature", "Feature"), + list("Feature", "Feature"), list("Feature", "Feature"), # cochange + list("Mail", "Mail"), list("Mail", "Mail")) # mail + data$hash = list(list("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "5a5ec9675e98187e1e92561e1888aa6f04faa338"), + list("3a0ed78458b3976243db6829f63eba3eead26774", "1143db502761379c2bfcecc2007fc34282e7ee61"), + list("3a0ed78458b3976243db6829f63eba3eead26774", "0a1a5c523d835459c42f33e863623138555e2526"), + list("1143db502761379c2bfcecc2007fc34282e7ee61", "0a1a5c523d835459c42f33e863623138555e2526"), + as.list(rep(NA, 2)), as.list(rep(NA, 2))) + data$file = list(list("test.c", "test.c"), list("test2.c", "test3.c"), list("test2.c", "test2.c"), list("test3.c", "test2.c"), + as.list(rep(NA, 2)), as.list(rep(NA, 2))) + data$artifact = list(list("A", "A"), list("Base_Feature", "Base_Feature"), list("Base_Feature", "Base_Feature"), + list("Base_Feature", "Base_Feature"), as.list(rep(NA, 2)), as.list(rep(NA, 2))) data$weight = rep(2, 6) data$type = rep(TYPE.EDGES.INTRA, 6) data$relation = c(rep("cochange", 4), rep("mail", 2)) - data$message.id = list(as.character(c(NA, NA)), as.character(c(NA, NA)), as.character(c(NA, NA)), as.character(c(NA, NA)), - c("<4cbaa9ef0802201124v37f1eec8g89a412dfbfc8383a@mail.gmail.com>", - "<6784529b0802032245r5164f984l342f0f0dc94aa420@mail.gmail.com>"), - c("<65a1sf31sagd684dfv31@mail.gmail.com>", - "<9b06e8d20801220234h659c18a3g95c12ac38248c7e0@mail.gmail.com>")) - data$thread = list(as.character(c(NA, NA)), as.character(c(NA, NA)), as.character(c(NA, NA)), as.character(c(NA, NA)), - c("", ""), c("", "")) + data$message.id = list(as.list(rep(NA, 2)), as.list(rep(NA, 2)), as.list(rep(NA, 2)), as.list(rep(NA, 2)), + list("<4cbaa9ef0802201124v37f1eec8g89a412dfbfc8383a@mail.gmail.com>", + "<6784529b0802032245r5164f984l342f0f0dc94aa420@mail.gmail.com>"), + list("<65a1sf31sagd684dfv31@mail.gmail.com>", + "<9b06e8d20801220234h659c18a3g95c12ac38248c7e0@mail.gmail.com>")) + data$thread = list(as.list(rep(NA, 2)), as.list(rep(NA, 2)), as.list(rep(NA, 2)), as.list(rep(NA, 2)), + list("", ""), list("", "")) ## build expected network network.expected = igraph::graph_from_data_frame(data, vertices = authors, @@ -179,37 +179,38 @@ test_that("Simplify author-network with relation = c('cochange', 'mail') using b data = data.frame(comb.1. = c("Björn", "Olaf", "Olaf", "Karl"), comb.2. = c("Olaf", "Karl", "Thomas", "Thomas")) - data$date = list(get.date.from.string(c("2016-07-12 15:58:59", "2016-07-12 16:00:45", # cochange - "2016-07-12 15:58:40", "2016-07-12 15:58:50")), # mail - get.date.from.string(c("2016-07-12 16:05:41", "2016-07-12 16:06:10")), # cochange - get.date.from.string(c("2016-07-12 16:05:41", "2016-07-12 16:06:32", # cochange - "2016-07-12 16:04:40", "2016-07-12 16:05:37")), # mail - get.date.from.string(c("2016-07-12 16:06:10", "2016-07-12 16:06:32"))) # cochange - data$artifact.type = list(c("Feature", "Feature", "Mail", "Mail"), - c("Feature", "Feature"), - c("Feature", "Feature", "Mail", "Mail"), - c("Feature", "Feature")) - data$hash = list(as.character(c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "5a5ec9675e98187e1e92561e1888aa6f04faa338", NA, NA)), - c("3a0ed78458b3976243db6829f63eba3eead26774", "1143db502761379c2bfcecc2007fc34282e7ee61"), - as.character(c("3a0ed78458b3976243db6829f63eba3eead26774", "0a1a5c523d835459c42f33e863623138555e2526", NA, NA)), - c("1143db502761379c2bfcecc2007fc34282e7ee61", "0a1a5c523d835459c42f33e863623138555e2526")) - data$file = list(as.character(c("test.c", "test.c", NA, NA)), c("test2.c", "test3.c"), - as.character(c("test2.c", "test2.c", NA, NA)), c("test3.c", "test2.c")) - data$artifact = list(as.character(c("A", "A", NA, NA)), c("Base_Feature", "Base_Feature"), - as.character(c("Base_Feature", "Base_Feature", NA, NA)), c("Base_Feature", "Base_Feature")) + data$date = list(as.list(get.date.from.string(c("2016-07-12 15:58:59", "2016-07-12 16:00:45", # cochange + "2016-07-12 15:58:40", "2016-07-12 15:58:50"))), # mail + as.list(get.date.from.string(c("2016-07-12 16:05:41", "2016-07-12 16:06:10"))), # cochange + as.list(get.date.from.string(c("2016-07-12 16:05:41", "2016-07-12 16:06:32", # cochange + "2016-07-12 16:04:40", "2016-07-12 16:05:37"))), # mail + as.list(get.date.from.string(c("2016-07-12 16:06:10", "2016-07-12 16:06:32")))) # cochange + data$artifact.type = list(list("Feature", "Feature", "Mail", "Mail"), + list("Feature", "Feature"), + list("Feature", "Feature", "Mail", "Mail"), + list("Feature", "Feature")) + + data$hash = list(list("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "5a5ec9675e98187e1e92561e1888aa6f04faa338", NA, NA), + list("3a0ed78458b3976243db6829f63eba3eead26774", "1143db502761379c2bfcecc2007fc34282e7ee61"), + list("3a0ed78458b3976243db6829f63eba3eead26774", "0a1a5c523d835459c42f33e863623138555e2526", NA, NA), + list("1143db502761379c2bfcecc2007fc34282e7ee61", "0a1a5c523d835459c42f33e863623138555e2526")) + data$file = list(list("test.c", "test.c", NA, NA), list("test2.c", "test3.c"), + list("test2.c", "test2.c", NA, NA), list("test3.c", "test2.c")) + data$artifact = list(list("A", "A", NA, NA), list("Base_Feature", "Base_Feature"), + list("Base_Feature", "Base_Feature", NA, NA), list("Base_Feature", "Base_Feature")) data$weight = c(4, 2, 4, 2) data$type = rep(TYPE.EDGES.INTRA, 4) data$relation = list(c("cochange", "mail"), c("cochange"), c("cochange", "mail"), c("cochange")) - data$message.id = list(as.character(c(NA, NA, "<4cbaa9ef0802201124v37f1eec8g89a412dfbfc8383a@mail.gmail.com>", - "<6784529b0802032245r5164f984l342f0f0dc94aa420@mail.gmail.com>")), - as.character(c(NA, NA)), - as.character(c(NA, NA, "<65a1sf31sagd684dfv31@mail.gmail.com>", - "<9b06e8d20801220234h659c18a3g95c12ac38248c7e0@mail.gmail.com>")), - as.character(c(NA, NA))) - data$thread = list(as.character(c(NA, NA, "", "")), - as.character(c(NA, NA)), - as.character(c(NA, NA, "", "")), - as.character(c(NA, NA))) + data$message.id = list(list(NA, NA, "<4cbaa9ef0802201124v37f1eec8g89a412dfbfc8383a@mail.gmail.com>", + "<6784529b0802032245r5164f984l342f0f0dc94aa420@mail.gmail.com>"), + list(NA, NA), + list(NA, NA, "<65a1sf31sagd684dfv31@mail.gmail.com>", + "<9b06e8d20801220234h659c18a3g95c12ac38248c7e0@mail.gmail.com>"), + list(NA, NA)) + data$thread = list(list(NA, NA, "", ""), + list(NA, NA), + list(NA, NA, "", ""), + list(NA, NA)) ## build expected network network.expected = igraph::graph_from_data_frame(data, vertices = authors, @@ -354,6 +355,7 @@ test_that("Remove duplicate edges", { ## build expected network network.expected = igraph::graph_from_data_frame(edges, directed = FALSE, vertices = vertices) + network.expected = convert.edge.attributes.to.list(network.expected) ## build network with unique edges network = network.builder$get.author.network() diff --git a/tests/test-split-misc.R b/tests/test-split-misc.R index da63b069..12f54126 100644 --- a/tests/test-split-misc.R +++ b/tests/test-split-misc.R @@ -394,6 +394,7 @@ test_that("Check and correct duplicate range names during network activity-based igraph::edges(rep(c("A", "B"), times = length(dates))) ## set some date attributes that are appropriate for the test case net = igraph::set_edge_attr(net, "date", value = dates) + net = convert.edge.attributes.to.list(net) ## define split arguments split.function = split.network.activity.based From 0c6b2eba79b37f8ef2af7ffc41d86f1f307581bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maximilian=20L=C3=B6ffler?= Date: Tue, 3 Dec 2024 15:26:56 +0100 Subject: [PATCH 098/130] !TEMPORARY! replace NULLs in edge attributes with NA MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is necessary since igraph falsely fills in non-present edge attributes with NULLs instead of NAs in certain cases when using 'igraph::disjoint_union' and 'igraph::add_edges'. Signed-off-by: Maximilian Löffler --- util-networks.R | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/util-networks.R b/util-networks.R index 06e65155..9066cfa1 100644 --- a/util-networks.R +++ b/util-networks.R @@ -1282,6 +1282,14 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", ## combine the networks: ## 1) merge the existing networks u = igraph::disjoint_union(authors.net, artifacts.net) + for (attr in igraph::edge_attr_names(u)) { + values = igraph::edge_attr(u, attr) + NULLs = sapply(values, is.null) + if (any(NULLs)) { + values[NULLs] = NA + u = igraph::set_edge_attr(u, attr, value = values) + } + } ## 2) add the bipartite edges u = add.edges.for.bipartite.relation(u, authors.to.artifacts, private$network.conf) @@ -1792,6 +1800,15 @@ add.edges.for.bipartite.relation = function(net, bipartite.relations, network.co ## add the vertex sequences as edges to the network net = igraph::add_edges(net, unlist(vertex.sequence.for.edges), attr = extra.edge.attributes) + + for (attr in igraph::edge_attr_names(net)) { + values = igraph::edge_attr(net, attr) + NULLs = sapply(values, is.null) + if (any(NULLs)) { + values[NULLs] = NA + net = igraph::set_edge_attr(net, attr, value = values) + } + } } return(net) From 44c7b72e3234cb332bb2713fb408c124e67255d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maximilian=20L=C3=B6ffler?= Date: Tue, 3 Dec 2024 16:46:19 +0100 Subject: [PATCH 099/130] Ensure missing edge attributes are filled with NA instead of NULL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 'plyr::rbind.fill' uses NULL to fill missing values in lists. As we now use lists for most edge attributes, we need to handle this case separately to ensure missing values are filled with NAs instead. To fix this issue, we need to instantiate missing columns in dataframes with NAs before calling 'plyr::rbind.fill'. This operation is constant with respect to the amount of rows and should not impact performance too much. This works towards fixing #271. Signed-off-by: Maximilian Löffler --- util-networks.R | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/util-networks.R b/util-networks.R index 9066cfa1..e384b1e1 100644 --- a/util-networks.R +++ b/util-networks.R @@ -1658,9 +1658,18 @@ merge.network.data = function(vertex.data, edge.data) { edge.data.filtered = Filter(function(ed) { return(nrow(ed) > 0) }, edge.data) - ## 2) call rbind + ## 2) add in missing columns + all.columns = Reduce(union, lapply(edge.data.filtered, colnames)) + edge.data.filtered = lapply(edge.data.filtered, function(edges) { + missing.columns = setdiff(all.columns, colnames(edges)) + for (column in missing.columns) { + edges[[column]] = NA + } + return(edges) + }) + ## 3) call rbind edges = plyr::rbind.fill(edge.data.filtered) - ## 3) correct empty results + ## 4) correct empty results if (is.null(edges)) { edges = create.empty.edge.list() } From 7303eabef6a78198575fe5bdfc02813fde3d3974 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maximilian=20L=C3=B6ffler?= Date: Wed, 4 Dec 2024 16:13:19 +0100 Subject: [PATCH 100/130] Add test for 'convert.edge.attributes.to.list' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This works towards fixing #271. Signed-off-by: Maximilian Löffler --- tests/test-networks.R | 48 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/tests/test-networks.R b/tests/test-networks.R index f1536ed9..1cd093b7 100644 --- a/tests/test-networks.R +++ b/tests/test-networks.R @@ -992,6 +992,54 @@ test_that("Addition of edge attributes with data", { }) + +patrick::with_parameters_test_that("Convert edge attributes to list", { + + ## configure edge attributes + edge.attributes = c("date", "message.id", "thread", "weight", "type", "relation") + attribute.defaults = list(get.date.from.string("2020-01-01 00:00:00"), "abc", "def", 1, TYPE.EDGES.INTRA, "mail") + + ## construct network + network = + igraph::make_empty_graph(n = 0, directed = FALSE) + + igraph::vertices("A", "B", "C", type = TYPE.AUTHOR, kind = TYPE.AUTHOR) + + igraph::edges("A", "B", "B", "C", "C", "A") + + ## assign edge attributes + for (i in seq_along(edge.attributes)) { + network = igraph::set_edge_attr(network, edge.attributes[i], value = attribute.defaults[[i]]) + } + + ## convert specified edge attributes to list + if (is.null(remain.as.is)) { + network.listified = convert.edge.attributes.to.list(network) + + ## set 'remain.as.is' to the default of 'convert.edge.attributes.to.list' + ## for later use in the validation process + remain.as.is = names(EDGE.ATTR.HANDLING) + } else { + network.listified = convert.edge.attributes.to.list(network, remain.as.is = remain.as.is) + } + + ## check edge attributes + for (attr in igraph::edge_attr_names(network)) { + conversion.function = ifelse(attr %in% remain.as.is, identity, as.list) + expect_equal( + conversion.function(igraph::edge_attr(network, attr)), + igraph::edge_attr(network.listified, attr), + info = paste("edge attribute", attr, "values") + ) + } + +}, patrick::cases( + "remain.as.is: weight" = list(remain.as.is = c("weight")), + "remain.as.is: date" = list(remain.as.is = c("date")), + "remain.as.is: weight, date" = list(remain.as.is = c("weight", "date")), + "remain.as.is: default" = list(remain.as.is = NULL) +)) + + + ## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / ## Extract data sources ---------------------------------------------------- test_that("Get the data sources from a network with two relations", { From 2d2a5f6e530264c670af33e44e631fd62a7c151a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maximilian=20L=C3=B6ffler?= Date: Mon, 16 Dec 2024 14:05:59 +0100 Subject: [PATCH 101/130] Update 'NEWS.md' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Maximilian Löffler --- NEWS.md | 10 +++++++++- util-split.R | 2 +- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/NEWS.md b/NEWS.md index d4e457bc..d890abe0 100644 --- a/NEWS.md +++ b/NEWS.md @@ -4,6 +4,10 @@ ## unversioned +### Announcement + +- Coronet is not compatible anymore with igraph versions below 2.1.0. This is due to the simultaneous deprecation of `subgraph.edges` and the introduction of the replacement for it, `subgraph_from_edges`, in igraph version 2.1.0. + ### Added - Add commit-interaction data and add functions `read.commit.interactions` for reading, as well as `get.commit.interactions`, `set.commit.interactions` and utility functions for working with commit-interaction data (PR #252, d82857fbebd1111bb16588a4223bb24a8dcd07de, b4fd2a29c9b5fd561b1106c6febb54a32b0085ab, fd0aa05f824b93545ae8e05833b95b3bd9809286, bca35760eb0aac86c04923f2d534b2d8cece204e, PR #263, 849123a8b7d898fbb1343745ecffc1f6000c9367, 3fb7437b68950303916b62984fa449732c70353e, 170bc66eb779d7cf2ab504db7c3f4ec483103838) as well as tests for these features (PR #252, eeba7e29932bc973513c963fb9e716e9230d570f, 8bb39f4df39b49dfaff8f19feb6db5e5fbd81fac, 54b6f655248720436af116fe72521f9cb0348429, 7a5497aaf9114017d1b3b9b68b6cccd7ca8ac114, 7b8585f87675795822c07230192d6454de31dcc7, ef725407bf8818c8fff96ea6f343338b7162cbe0,) @@ -18,8 +22,9 @@ ### Changed/Improved +- **Breaking Change**: Change the default representation of edge attributes from vectors to lists. This change is necessary for the interplay of coronet networks with certain igraph functionality since igraph version 2.1. (PR #274, 1c35d1fa2548deb297dbfa5e2b07fce31962c5b7, eda30b838369ec46376812298a3ea8159eec5789, 44c7b72e3234cb332bb2713fb408c124e67255d9, 7303eabef6a78198575fe5bdfc02813fde3d3974) - Change the default value for the `issues.from.source` configuration parameter. Instead of reading JIRA and GitHub issues together, which was the previous default, the new default value causes only GitHub issue data to be read. To restore the previous default behavior and read data from both issue sources, this now needs to be manually configured when needed. (PR #264, 5ff83c364f6bfc1e6ff95e9c5f1087e031c48a5d, 8c8080cb9caf115f19d9f145ad6e6c108b131a67, 8bcbc81db521877908d2e5c2989082ed672f2a3b) -- Replace deprecated `igraph` functions by their preferred alternatives (PR #264, PR #268, 0df9d5bf6bafbb5d440f4c47db4ec901cf11f037, 7ac840d287a862eff61b1a84e194a4cba399f9e5) +- Replace deprecated `igraph` functions by their preferred alternatives (PR #264, PR #268, PR #274, 0df9d5bf6bafbb5d440f4c47db4ec901cf11f037, 7ac840d287a862eff61b1a84e194a4cba399f9e5, e3617b8c6b21fb4242c1d392124813501069ca84, 4b0d5221dd56bb3c9ddf196f67719d4f503d9b61) - Deprecate support for R version 3.6 (PR #264, c8e6f45111e487fadbe7f0a13c7595eb23f3af6e, fb3f5474259d4a88f4ff545691cca9d1ccde90e3) - Explicitly add R version 4.4 to the CI test pipeline (c8e6f45111e487fadbe7f0a13c7595eb23f3af6e) - Refactor function `construct.edge.list.from.key.value.list` to be more readable (PR #263, 05c3bc09cb1d396fd59c34a88030cdca58fd04dd) @@ -28,6 +33,9 @@ - Fix the creation of edgelists for issue-based artifact-networks by correctly iterating over the issue data (PR #264, 321d85043112971c04998249c14a0677a32c9004) - Fix a bug in `extract.timestamps` that occurs when the first `data.source` contains empty data and that leads to a return value of type numeric which should be POSIXct (PR #270, 10696e4cf4ae92371917ed8ccaec2b0183da145c, 646c01a42ad8decfbc9040030e790e51cb65cffd) +- Fix `read.commit.interactions` by explicitly considering non-existent commit interaction data (PR #274, f591528a0f1f11b1a4390949ab770f3f74a766f9) +- Add range information to network-splits when splitting a network using `split.network.time.based.by.ranges`. This effect also propagates into `split.networks.time.based` (PR #274, 87911ade231c44b93be194a1d6734f7de043a4af) +- Adjust `metrics.scale.freeness` and `metrics.is.scale.free` functions be compatible with both older and newer igraph versions (PR #274, 4b0d5221dd56bb3c9ddf196f67719d4f503d9b61) ## 4.4 diff --git a/util-split.R b/util-split.R index 464ce2d1..31da7ee3 100644 --- a/util-split.R +++ b/util-split.R @@ -836,7 +836,7 @@ split.network.time.based.by.ranges = function(network, ranges, remove.isolates = } ) - # add range information + ## add range information if (is.null(names(nets.split))) { names(nets.split) = ranges } From 0c27012641d24e19e5fa037406b480034c93f1aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maximilian=20L=C3=B6ffler?= Date: Sun, 12 Jan 2025 16:50:05 +0100 Subject: [PATCH 102/130] Use Reduce in the (before temporary) NULL replacement for performance MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The igraph maintainers decided that they will not re-install consistency with their older versions (in terms of filling in non-existent values in 'igraph::disjoint_union' and 'igraph::add_edges' with NAs) yet (https://github.com/igraph/rigraph/issues/1587). The previously temporary fix is therefore now semi-permanent. Signed-off-by: Maximilian Löffler --- util-networks.R | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/util-networks.R b/util-networks.R index e384b1e1..73d7ab6a 100644 --- a/util-networks.R +++ b/util-networks.R @@ -22,7 +22,7 @@ ## Copyright 2020 by Anselm Fehnker ## Copyright 2021 by Niklas Schneider ## Copyright 2022 by Jonathan Baumann -## Copyright 2023-2024 by Maximilian Löffler +## Copyright 2023-2025 by Maximilian Löffler ## Copyright 2024 by Leo Sendelbach ## All Rights Reserved. @@ -1282,16 +1282,19 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", ## combine the networks: ## 1) merge the existing networks u = igraph::disjoint_union(authors.net, artifacts.net) - for (attr in igraph::edge_attr_names(u)) { + + ## 2) replace NULLs in edge attributes with NAs for consistency + u = Reduce(function(u, attr) { values = igraph::edge_attr(u, attr) NULLs = sapply(values, is.null) if (any(NULLs)) { values[NULLs] = NA u = igraph::set_edge_attr(u, attr, value = values) } - } + return(u) + }, igraph::edge_attr_names(u), u) - ## 2) add the bipartite edges + ## 3) add the bipartite edges u = add.edges.for.bipartite.relation(u, authors.to.artifacts, private$network.conf) ## add range attribute for later analysis (if available) @@ -1810,14 +1813,16 @@ add.edges.for.bipartite.relation = function(net, bipartite.relations, network.co ## add the vertex sequences as edges to the network net = igraph::add_edges(net, unlist(vertex.sequence.for.edges), attr = extra.edge.attributes) - for (attr in igraph::edge_attr_names(net)) { + ## replace NULLs in edge attributes with NAs for consistency + net = Reduce(function(net, attr) { values = igraph::edge_attr(net, attr) NULLs = sapply(values, is.null) if (any(NULLs)) { values[NULLs] = NA net = igraph::set_edge_attr(net, attr, value = values) } - } + return(net) + }, igraph::edge_attr_names(net), net) } return(net) From 6c3bcd1a2366d0d3a176d9fde95b8356b0158da3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maximilian=20L=C3=B6ffler?= Date: Sun, 12 Jan 2025 16:52:28 +0100 Subject: [PATCH 103/130] Bump required version of 'igraph' in 'README.md' to 2.1.0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Maximilian Löffler --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 8c40a447..f9001b12 100644 --- a/README.md +++ b/README.md @@ -125,7 +125,7 @@ Alternatively, you can run `Rscript install.R` to install the packages. - `yaml`: To read YAML configuration files (i.e., Codeface configuration files) - `R6`: For proper classes -- `igraph`: For the construction of networks (package version `1.3.0` or higher is recommended) +- `igraph`: For the construction of networks (package version `2.1.0` or higher is required) - `plyr`: For the `dlply` splitting-function and `rbind.fill` - `parallel`: For parallelization - `logging`: Logging From 0cc59bc74b67a7b839c9977b8ea3ecbeb0512ab7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maximilian=20L=C3=B6ffler?= Date: Sun, 12 Jan 2025 16:55:48 +0100 Subject: [PATCH 104/130] Update 'NEWS.md' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Maximilian Löffler --- NEWS.md | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/NEWS.md b/NEWS.md index d890abe0..4e807aaf 100644 --- a/NEWS.md +++ b/NEWS.md @@ -6,7 +6,7 @@ ### Announcement -- Coronet is not compatible anymore with igraph versions below 2.1.0. This is due to the simultaneous deprecation of `subgraph.edges` and the introduction of the replacement for it, `subgraph_from_edges`, in igraph version 2.1.0. +- Coronet is not compatible with `igraph` versions below 2.1.0 anymore. This is due to the simultaneous deprecation of `subgraph.edges` and the introduction of the replacement for it, `subgraph_from_edges`, in igraph version 2.1.0. ### Added @@ -22,20 +22,21 @@ ### Changed/Improved -- **Breaking Change**: Change the default representation of edge attributes from vectors to lists. This change is necessary for the interplay of coronet networks with certain igraph functionality since igraph version 2.1. (PR #274, 1c35d1fa2548deb297dbfa5e2b07fce31962c5b7, eda30b838369ec46376812298a3ea8159eec5789, 44c7b72e3234cb332bb2713fb408c124e67255d9, 7303eabef6a78198575fe5bdfc02813fde3d3974) +- **Breaking Change**: Change the default representation of edge attributes from vectors to lists. This change is necessary for the interplay of coronet networks with certain `igraph` functionality since igraph version 2.1.0 (PR #274, 1c35d1fa2548deb297dbfa5e2b07fce31962c5b7, eda30b838369ec46376812298a3ea8159eec5789, 0c6b2eba79b37f8ef2af7ffc41d86f1f307581bf, 44c7b72e3234cb332bb2713fb408c124e67255d9, 7303eabef6a78198575fe5bdfc02813fde3d3974, 0c27012641d24e19e5fa037406b480034c93f1aa) - Change the default value for the `issues.from.source` configuration parameter. Instead of reading JIRA and GitHub issues together, which was the previous default, the new default value causes only GitHub issue data to be read. To restore the previous default behavior and read data from both issue sources, this now needs to be manually configured when needed. (PR #264, 5ff83c364f6bfc1e6ff95e9c5f1087e031c48a5d, 8c8080cb9caf115f19d9f145ad6e6c108b131a67, 8bcbc81db521877908d2e5c2989082ed672f2a3b) - Replace deprecated `igraph` functions by their preferred alternatives (PR #264, PR #268, PR #274, 0df9d5bf6bafbb5d440f4c47db4ec901cf11f037, 7ac840d287a862eff61b1a84e194a4cba399f9e5, e3617b8c6b21fb4242c1d392124813501069ca84, 4b0d5221dd56bb3c9ddf196f67719d4f503d9b61) - Deprecate support for R version 3.6 (PR #264, c8e6f45111e487fadbe7f0a13c7595eb23f3af6e, fb3f5474259d4a88f4ff545691cca9d1ccde90e3) - Explicitly add R version 4.4 to the CI test pipeline (c8e6f45111e487fadbe7f0a13c7595eb23f3af6e) - Refactor function `construct.edge.list.from.key.value.list` to be more readable (PR #263, 05c3bc09cb1d396fd59c34a88030cdca58fd04dd) +- Update necessary `igraph` version to 2.1.0 in `README.md` (PR #274, 6c3bcd1a2366d0d3a176d9fde95b8356b0158da3) ### Fixed - Fix the creation of edgelists for issue-based artifact-networks by correctly iterating over the issue data (PR #264, 321d85043112971c04998249c14a0677a32c9004) - Fix a bug in `extract.timestamps` that occurs when the first `data.source` contains empty data and that leads to a return value of type numeric which should be POSIXct (PR #270, 10696e4cf4ae92371917ed8ccaec2b0183da145c, 646c01a42ad8decfbc9040030e790e51cb65cffd) -- Fix `read.commit.interactions` by explicitly considering non-existent commit interaction data (PR #274, f591528a0f1f11b1a4390949ab770f3f74a766f9) +- Fix `read.commit.interactions` by explicitly considering non-existent commit interactions (PR #274, f591528a0f1f11b1a4390949ab770f3f74a766f9) - Add range information to network-splits when splitting a network using `split.network.time.based.by.ranges`. This effect also propagates into `split.networks.time.based` (PR #274, 87911ade231c44b93be194a1d6734f7de043a4af) -- Adjust `metrics.scale.freeness` and `metrics.is.scale.free` functions be compatible with both older and newer igraph versions (PR #274, 4b0d5221dd56bb3c9ddf196f67719d4f503d9b61) +- Adjust `metrics.scale.freeness` and `metrics.is.scale.free` functions to be compatible with both older and newer igraph versions (PR #274, 4b0d5221dd56bb3c9ddf196f67719d4f503d9b61) ## 4.4 From 9f231612fcd33a283362c79b35a94295ff3d4ef9 Mon Sep 17 00:00:00 2001 From: Thomas Bock Date: Tue, 10 Dec 2024 19:23:12 +0100 Subject: [PATCH 105/130] Add function to compute last activity per person and activity type While a function to compute the first activity per person and activity type already existed, this was not the case for the last activity. Instead of copying the code from first-activity computation, reuse the code by establishing an additional helper function for aggregating activity dates. This is used by first and last activity now, and could potentially also be used for other aggregations (e.g., to compute the mid date of a person's activities). Signed-off-by: Thomas Bock --- util-networks-covariates.R | 63 +++++++++++++++++++++++++++++--------- 1 file changed, 49 insertions(+), 14 deletions(-) diff --git a/util-networks-covariates.R b/util-networks-covariates.R index b1be9f28..03432c0e 100644 --- a/util-networks-covariates.R +++ b/util-networks-covariates.R @@ -1582,43 +1582,78 @@ add.vertex.attribute.issue.is.pull.request = function(list.of.networks, project. ## Helper ------------------------------------------------------------------ #' Helper function for first activity: computes first activity information per person and activity type. +#’ +#' @param activity.types The activity types to compute information for [default: c("mails", "commits", "issues")] +#' @param range.data The data to base the computation on +#' @param default.value The default value to add if no information is available per author and activity type +#' [default: NA] #' -#' @param activity.types The activity types to compute information for. [default: c("mails", "commits", "issues")] -#' @param range.data The data to base the computation on. -#' @param default.value The default value to add if no information is available per author and activity type. -#' [default: NA] +#' @return A list containing per author a list of first activity dates named with the corresponding activity type. +#' Empty list if there are no activities in \code{range.data} at all or none corresponding to the configured +#' types in \code{activity.types} +#' +#' @seealso get.aggregated.activity.data +get.first.activity.data = function(range.data, activity.types = c("commits", "mails", "issues"), default.value = NA) { + return(get.aggregated.activity.data(range.data, activity.types, default.value, aggregation.function = min)) +} + +#' Helper function for last activity: computes last activity information per person and activity type. +# +#' @param activity.types The activity types to compute information for [default: c("mails", "commits", "issues")] +#' @param range.data The data to base the computation on +#' @param default.value The default value to add if no information is available per author and activity type +#' [default: NA] +#' +#' @return A list containing per author a list of last activity dates named with the corresponding activity type. +#' Empty list if there are no activities in \code{range.data} at all or none corresponding to the configured +#' types in \code{activity.types} +#' +#' @seealso get.aggregated.activity.data +get.last.activity.data = function(range.data, activity.types = c("commits", "mails", "issues"), default.value = NA) { + return(get.aggregated.activity.data(range.data, activity.types, default.value, aggregation.function = max)) +} + +#' Helper function to aggregate activity information (e.g., to compute first or last activity information per person +#' and activity type). +#' +#' @param activity.types The activity types to compute information for [default: c("mails", "commits", "issues")] +#' @param range.data The data to base the computation on +#' @param default.value The default value to add if no information is available per author and activity type +#' [default: NA] +#' @param aggregation.function The function that should be used to aggregate the activity data (e.g., minimum for +#' first activity, or maximum for last activity) #' -#' @return A list containing per author a list of first activity values named with the corresponding activity type. +#' @return A list containing per author a list of aggregated activity dates named with the corresponding activity type. #' Empty list if there are no activities in \code{range.data} at all or none corresponding to the configured #' types in \code{activity.types} -get.first.activity.data = function(range.data, activity.types = c("commits", "mails", "issues"), - default.value = NA) { +get.aggregated.activity.data = function(range.data, activity.types = c("commits", "mails", "issues"), + default.value = NA, aggregation.function) { ## make sure that the default value contains a tzone attribute (even if the default value is NA) default.value = get.date.from.string(default.value) - ## get data for each activity type and extract minimal date for each author in each type, + ## get data for each activity type and extract aggregated date for each author in each type, ## resulting in a list of activity types with each item containing a list of authors - ## mapped to their first activity for the current activity type; for example: + ## mapped to their aggregated activity date for the current activity type; for example: ## list( ## commits = list(authorA = list(commits = 1), authorB = list(commits = 0)), ## mails = list(authorB = list(mails = 2), authorC = list(mails = 3)), ## issues = list(authorA = list(issues = 2), authorD = list(issues = 2)) ## ) activity.by.type = parallel::mclapply(activity.types, function(type) { - ## compute minima - minima.per.person = lapply( + ## compute aggregation + aggregation.per.person = lapply( range.data$group.artifacts.by.data.column(type, "author.name"), function(x) { - ## get first date - m = list(min(x[["date"]])) + ## get aggregated date + m = list(aggregation.function(x[["date"]])) ## add activity type as name to the list names(m) = type return(m) } ) - return(minima.per.person) + return(aggregation.per.person) }) names(activity.by.type) = activity.types From 8660ed763ba4b69e909e7fbb01e27e1999522047 Mon Sep 17 00:00:00 2001 From: Thomas Bock Date: Tue, 10 Dec 2024 19:27:57 +0100 Subject: [PATCH 106/130] Add function to add vertex attribute "last.activity" Similar to enhancing first-activity computation by last-activity computation in the previous commit, this can also be done for the corresponding vertex attributes. We now have a new function to attribute the "last.activity" vertex attribute. Both functions to add first and last activity as a vertex attribute make use of a commonly used helper function that allows to add an aggregated activity date as a vertex attribute. Signed-off-by: Thomas Bock --- util-networks-covariates.R | 115 ++++++++++++++++++++++++++++++------- 1 file changed, 95 insertions(+), 20 deletions(-) diff --git a/util-networks-covariates.R b/util-networks-covariates.R index 03432c0e..7e91d9f1 100644 --- a/util-networks-covariates.R +++ b/util-networks-covariates.R @@ -688,30 +688,105 @@ add.vertex.attribute.author.email = function(list.of.networks, project.data, nam #' Add first activity attribute. #' -#' @param list.of.networks The network list. -#' @param project.data The project data. +#' @param list.of.networks The network list +#' @param project.data The project data #' @param activity.types The kinds of activity to use as basis: One or more of \code{mails}, \code{commits} and -#' \code{issues}. [default: c("mails", "commits", "issues")] -#' @param name The attribute name to add. [default: "first.activity"] +#' \code{issues} [default: c("mails", "commits", "issues")] +#' @param name The attribute name to add [default: "first.activity"] #' @param aggregation.level Determines the data to use for the attribute calculation. #' One of \code{"range"}, \code{"cumulative"}, \code{"all.ranges"}, #' \code{"project.cumulative"}, \code{"project.all.ranges"}, and #' \code{"complete"}. See \code{split.data.by.networks} for #' more details. [default: "complete"] -#' @param default.value The default value to add if a vertex has no matching value. [default: NA]. +#' @param default.value The default value to add if a vertex has no matching value [default: NA] #' @param combine.all.activity.types Flag indicating that one value, computed over all given -#' \code{activity.types} is of interest (instead of one value per type). -#' [default: FALSE] +#' \code{activity.types} is of interest (instead of one value per type) +#' [default: FALSE] +#' +#' @return A list of networks with the added attribute #' -#' @return A list of networks with the added attribute. +#' @seealso add.vertex.attribute.author.aggregated.activity add.vertex.attribute.author.first.activity = function(list.of.networks, project.data, - activity.types = c("mails", "commits", "issues"), - name = "first.activity", - aggregation.level = c("range", "cumulative", "all.ranges", - "project.cumulative", "project.all.ranges", - "complete"), - default.value = NA, - combine.activity.types = FALSE) { + activity.types = c("mails", "commits", "issues"), + name = "first.activity", + aggregation.level = c("range", "cumulative", "all.ranges", + "project.cumulative", "project.all.ranges", + "complete"), + default.value = NA, + combine.activity.types = FALSE) { + return(add.vertex.attribute.author.aggregated.activity(list.of.networks, project.data, activity.types, name, + aggregation.level, default.value, combine.activity.types, + aggregation.function = min, + data.aggregation.function = get.first.activity.data)) +} + + +#' Add last activity attribute. +#' +#' @param list.of.networks The network list +#' @param project.data The project data +#' @param activity.types The kinds of activity to use as basis: One or more of \code{mails}, \code{commits} and +#' \code{issues} [default: c("mails", "commits", "issues")] +#' @param name The attribute name to add [default: "last.activity"] +#' @param aggregation.level Determines the data to use for the attribute calculation. +#' One of \code{"range"}, \code{"cumulative"}, \code{"all.ranges"}, +#' \code{"project.cumulative"}, \code{"project.all.ranges"}, and +#' \code{"complete"}. See \code{split.data.by.networks} for +#' more details. [default: "complete"] +#' @param default.value The default value to add if a vertex has no matching value [default: NA] +#' @param combine.all.activity.types Flag indicating that one value, computed over all given +#' \code{activity.types} is of interest (instead of one value per type) +#' [default: FALSE] +#' +#' @return A list of networks with the added attribute +#' +#' @seealso add.vertex.attribute.author.aggregated.activity +add.vertex.attribute.author.last.activity = function(list.of.networks, project.data, + activity.types = c("mails", "commits", "issues"), + name = "last.activity", + aggregation.level = c("range", "cumulative", "all.ranges", + "project.cumulative", "project.all.ranges", + "complete"), + default.value = NA, + combine.activity.types = FALSE) { + return(add.vertex.attribute.author.aggregated.activity(list.of.networks, project.data, activity.types, name, + aggregation.level, default.value, combine.activity.types, + aggregation.function = max, + data.aggregation.function = get.last.activity.data)) +} + + +#' Add aggregated activity-date attribute. +#' +#' @param list.of.networks The network list +#' @param project.data The project data +#' @param activity.types The kinds of activity to use as basis: One or more of \code{mails}, \code{commits} and +#' \code{issues} [default: c("mails", "commits", "issues")] +#' @param name The attribute name to add [default: "aggregated.activity"] +#' @param aggregation.level Determines the data to use for the attribute calculation. +#' One of \code{"range"}, \code{"cumulative"}, \code{"all.ranges"}, +#' \code{"project.cumulative"}, \code{"project.all.ranges"}, and +#' \code{"complete"}. See \code{split.data.by.networks} for +#' more details [default: "complete"] +#' @param default.value The default value to add if a vertex has no matching value [default: NA] +#' @param combine.all.activity.types Flag indicating that one value, computed over all given +#' \code{activity.types} is of interest (instead of one value per type) +#' [default: FALSE] +#' @param aggregation.function The function that should be used to aggregate when combining all activity types +#' @param data.aggregation.function The function that should be used to aggregate the activity information per author +#' within activity types +#' +#' @return A list of networks with the added attribute +add.vertex.attribute.author.aggregated.activity = function(list.of.networks, project.data, + activity.types = c("mails", "commits", "issues"), + name = "aggregated.activity", + aggregation.level = c("range", "cumulative", "all.ranges", + "project.cumulative", "project.all.ranges", + "complete"), + default.value = NA, + combine.activity.types = FALSE, + aggregation.function, + data.aggregation.function) { aggregation.level = match.arg.or.default(aggregation.level, default = "complete") parsed.activity.types = match.arg.or.default(activity.types, several.ok = TRUE) @@ -730,17 +805,17 @@ add.vertex.attribute.author.first.activity = function(list.of.networks, project. } compute.attr = function(range, range.data, net) { - data = get.first.activity.data(range.data, parsed.activity.types, type.default) + data = data.aggregation.function(range.data, parsed.activity.types, type.default) - ## If configured, find minimum over all activity types per author, for example: + ## If configured, aggregate over all activity types per author. For example, for first activity ## data ## list(authorA = list(mails = 1, commits = 2), authorB = list(mails = 3, commits = 3)) ## yields ## list(authorA = list(all.activities = 1), authorB = list(all.activities = 3)) if (combine.activity.types) { data = parallel::mclapply(data, function(item.list) { - min.value = min(do.call(base::c, item.list), na.rm = TRUE) - return(list(all.activities = min.value)) + aggregated.value = aggregation.function(do.call(base::c, item.list), na.rm = TRUE) + return(list(all.activities = aggregated.value)) }) } return(data) @@ -760,7 +835,7 @@ add.vertex.attribute.author.first.activity = function(list.of.networks, project. #' @param project.data The project data #' @param name The attribute name to add [default: "active.ranges"] #' @param activity.types The kinds of activity to use as basis: One or more of \code{mails}, \code{commits} and -#' \code{issues}. [default: c("mails", "commits", "issues")] +#' \code{issues} [default: c("mails", "commits", "issues")] #' @param default.value The default value to add if a vertex has no matching value [default: list()] #' @param combine.activity.types Flag indicating that one value, computed over all given #' \code{activity.types} is of interest (instead of one value per type). From b4468cb27d49293b93a0990ff744a57463a4eeeb Mon Sep 17 00:00:00 2001 From: Thomas Bock Date: Tue, 10 Dec 2024 19:28:27 +0100 Subject: [PATCH 107/130] Fix inconsistent indentation in util-networks-covariates.R Signed-off-by: Thomas Bock --- util-networks-covariates.R | 166 ++++++++++++++++++------------------- 1 file changed, 80 insertions(+), 86 deletions(-) diff --git a/util-networks-covariates.R b/util-networks-covariates.R index 7e91d9f1..6db374a3 100644 --- a/util-networks-covariates.R +++ b/util-networks-covariates.R @@ -14,7 +14,7 @@ ## Copyright 2017 by Felix Prasse ## Copyright 2018-2019 by Claus Hunsen ## Copyright 2018-2019 by Thomas Bock -## Copyright 2021, 2023-2024 by Thomas Bock +## Copyright 2021, 2023-2025 by Thomas Bock ## Copyright 2018-2019 by Klara Schlüter ## Copyright 2018 by Jakob Kronawitter ## Copyright 2020 by Christian Hechtl @@ -325,13 +325,13 @@ add.vertex.attribute.author.commit.count.committer = function(list.of.networks, #' #' @return A list of networks with the added attribute add.vertex.attribute.author.commit.count.committer.not.author = function(list.of.networks, project.data, - name = "commit.count.committer.not.author", - aggregation.level = c("range", "cumulative", - "all.ranges", - "project.cumulative", - "project.all.ranges", - "complete"), - default.value = 0L) { + name = "commit.count.committer.not.author", + aggregation.level = c("range", "cumulative", + "all.ranges", + "project.cumulative", + "project.all.ranges", + "complete"), + default.value = 0L) { nets.with.attr = add.vertex.attribute.count.helper( list.of.networks, project.data, name, aggregation.level, default.value, get.committer.not.author.commit.count, "committer.name" @@ -354,13 +354,13 @@ add.vertex.attribute.author.commit.count.committer.not.author = function(list.of #' #' @return A list of networks with the added attribute add.vertex.attribute.author.commit.count.committer.and.author = function(list.of.networks, project.data, - name = "commit.count.committer.and.author", - aggregation.level = c("range", "cumulative", - "all.ranges", - "project.cumulative", - "project.all.ranges", - "complete"), - default.value = 0L) { + name = "commit.count.committer.and.author", + aggregation.level = c("range", "cumulative", + "all.ranges", + "project.cumulative", + "project.all.ranges", + "complete"), + default.value = 0L) { nets.with.attr = add.vertex.attribute.count.helper( list.of.networks, project.data, name, aggregation.level, default.value, get.committer.and.author.commit.count, "committer.name" @@ -384,13 +384,13 @@ add.vertex.attribute.author.commit.count.committer.and.author = function(list.of #' #' @return A list of networks with the added attribute add.vertex.attribute.author.commit.count.committer.or.author = function(list.of.networks, project.data, - name = "commit.count.committer.or.author", - aggregation.level = c("range", "cumulative", - "all.ranges", - "project.cumulative", - "project.all.ranges", - "complete"), - default.value = 0L) { + name = "commit.count.committer.or.author", + aggregation.level = c("range", "cumulative", + "all.ranges", + "project.cumulative", + "project.all.ranges", + "complete"), + default.value = 0L) { nets.with.attr = add.vertex.attribute.count.helper( list.of.networks, project.data, name, aggregation.level, default.value, get.committer.or.author.commit.count, "name" @@ -414,10 +414,10 @@ add.vertex.attribute.author.commit.count.committer.or.author = function(list.of. #' #' @return A list of networks with the added attribute add.vertex.attribute.author.artifact.count = function(list.of.networks, project.data, name = "artifact.count", - aggregation.level = c("range", "cumulative", "all.ranges", - "project.cumulative", "project.all.ranges", - "complete"), - default.value = 0L) { + aggregation.level = c("range", "cumulative", "all.ranges", + "project.cumulative", "project.all.ranges", + "complete"), + default.value = 0L) { aggregation.level = match.arg.or.default(aggregation.level, default = "range") nets.with.attr = split.and.add.vertex.attribute( @@ -452,11 +452,11 @@ add.vertex.attribute.author.artifact.count = function(list.of.networks, project. #' #' @return A list of networks with the added attribute add.vertex.attribute.author.mail.count = function(list.of.networks, project.data, - name = "mail.count", - aggregation.level = c("range", "cumulative", "all.ranges", - "project.cumulative", "project.all.ranges", - "complete"), - default.value = 0L) { + name = "mail.count", + aggregation.level = c("range", "cumulative", "all.ranges", + "project.cumulative", "project.all.ranges", + "complete"), + default.value = 0L) { nets.with.attr = add.vertex.attribute.count.helper( list.of.networks, project.data, name, aggregation.level, default.value, get.author.mail.count, "author.name" @@ -479,11 +479,11 @@ add.vertex.attribute.author.mail.count = function(list.of.networks, project.data #' #' @return A list of networks with the added attribute add.vertex.attribute.author.mail.thread.count = function(list.of.networks, project.data, - name = "mail.thread.count", - aggregation.level = c("range", "cumulative", "all.ranges", - "project.cumulative", "project.all.ranges", - "complete"), - default.value = 0L) { + name = "mail.thread.count", + aggregation.level = c("range", "cumulative", "all.ranges", + "project.cumulative", + "project.all.ranges", "complete"), + default.value = 0L) { nets.with.attr = add.vertex.attribute.count.helper( list.of.networks, project.data, name, aggregation.level, default.value, get.author.mail.thread.count, "author.name" @@ -511,12 +511,13 @@ add.vertex.attribute.author.mail.thread.count = function(list.of.networks, proje #' #' @return A list of networks with the added attribute add.vertex.attribute.author.issue.count = function(list.of.networks, project.data, - name = "issue.count", - aggregation.level = c("range", "cumulative", "all.ranges", - "project.cumulative", "project.all.ranges", - "complete"), - default.value = 0L, issue.type = c("all", "pull.requests", "issues"), - use.unfiltered.data = FALSE) { + name = "issue.count", + aggregation.level = c("range", "cumulative", "all.ranges", + "project.cumulative", "project.all.ranges", + "complete"), + default.value = 0L, + issue.type = c("all", "pull.requests", "issues"), + use.unfiltered.data = FALSE) { issue.type = match.arg(issue.type) if (missing(name) && identical(issue.type, "pull.requests")) { @@ -550,14 +551,13 @@ add.vertex.attribute.author.issue.count = function(list.of.networks, project.dat #' #' @return A list of networks with the added attribute add.vertex.attribute.author.issues.commented.count = function(list.of.networks, project.data, - name = "issues.commented.count", - aggregation.level = c("range", "cumulative", "all.ranges", - "project.cumulative", - "project.all.ranges", - "complete"), - default.value = 0L, issue.type = c("all", "pull.requests", - "issues"), - use.unfiltered.data = FALSE) { + name = "issues.commented.count", + aggregation.level = c("range", "cumulative", + "all.ranges", "project.cumulative", + "project.all.ranges", "complete"), + default.value = 0L, + issue.type = c("all", "pull.requests", "issues"), + use.unfiltered.data = FALSE) { issue.type = match.arg(issue.type) if (missing(name) && identical(issue.type, "pull.requests")) { @@ -594,14 +594,13 @@ add.vertex.attribute.author.issues.commented.count = function(list.of.networks, #' #' @return A list of networks with the added attribute add.vertex.attribute.author.issue.creation.count = function(list.of.networks, project.data, - name = "issue.creation.count", - aggregation.level = c("range", "cumulative", "all.ranges", - "project.cumulative", - "project.all.ranges", - "complete"), - default.value = 0L, issue.type = c("all", "pull.requests", - "issues"), - use.unfiltered.data = TRUE) { + name = "issue.creation.count", + aggregation.level = c("range", "cumulative", "all.ranges", + "project.cumulative", + "project.all.ranges", "complete"), + default.value = 0L, + issue.type = c("all", "pull.requests", "issues"), + use.unfiltered.data = TRUE) { issue.type = match.arg(issue.type) if (missing(name) && identical(issue.type, "pull.requests")) { @@ -634,14 +633,13 @@ add.vertex.attribute.author.issue.creation.count = function(list.of.networks, pr #' #' @return A list of networks with the added attribute add.vertex.attribute.author.issue.comment.count = function(list.of.networks, project.data, - name = "issue.comment.count", - aggregation.level = c("range", "cumulative", "all.ranges", - "project.cumulative", - "project.all.ranges", - "complete"), - default.value = 0L, issue.type = c("all", "pull.requests", - "issues"), - use.unfiltered.data = FALSE) { + name = "issue.comment.count", + aggregation.level = c("range", "cumulative", "all.ranges", + "project.cumulative", + "project.all.ranges", "complete"), + default.value = 0L, + issue.type = c("all", "pull.requests", "issues"), + use.unfiltered.data = FALSE) { issue.type = match.arg(issue.type) if (missing(name) && identical(issue.type, "pull.requests")) { @@ -781,8 +779,8 @@ add.vertex.attribute.author.aggregated.activity = function(list.of.networks, pro activity.types = c("mails", "commits", "issues"), name = "aggregated.activity", aggregation.level = c("range", "cumulative", "all.ranges", - "project.cumulative", "project.all.ranges", - "complete"), + "project.cumulative", + "project.all.ranges", "complete"), default.value = NA, combine.activity.types = FALSE, aggregation.function, @@ -843,9 +841,9 @@ add.vertex.attribute.author.aggregated.activity = function(list.of.networks, pro #' #' @return A list of networks with the added attribute add.vertex.attribute.author.active.ranges = function(list.of.networks, project.data, name = "active.ranges", - activity.types = c("mails", "commits", "issues"), - default.value = list(), - combine.activity.types = FALSE) { + activity.types = c("mails", "commits", "issues"), + default.value = list(), + combine.activity.types = FALSE) { net.to.range.list = split.data.by.networks(list.of.networks, project.data, "range") parsed.activity.types = match.arg.or.default(activity.types, several.ok = TRUE) @@ -940,8 +938,8 @@ add.vertex.attribute.author.role.simple = function(list.of.networks, project.dat add.vertex.attribute.author.role.function = function(list.of.networks, project.data, classification.function, name = "author.role", aggregation.level = c("range", "cumulative", "all.ranges", - "project.cumulative", "project.all.ranges", - "complete"), + "project.cumulative", "project.all.ranges", + "complete"), default.value = NA) { aggregation.level = match.arg.or.default(aggregation.level, default = "range") @@ -1113,8 +1111,7 @@ add.vertex.attribute.artifact.change.count = function(list.of.networks, project. add.vertex.attribute.artifact.first.occurrence = function(list.of.networks, project.data, name = "first.occurrence", aggregation.level = c("range", "cumulative", "all.ranges", "project.cumulative", - "project.all.ranges", - "complete"), + "project.all.ranges", "complete"), default.value = NA) { aggregation.level = match.arg.or.default(aggregation.level, default = "complete") @@ -1148,11 +1145,10 @@ add.vertex.attribute.artifact.first.occurrence = function(list.of.networks, proj #' #' @return A list of networks with the added attribute add.vertex.attribute.artifact.last.edited = function(list.of.networks, project.data, name = "last.edited", - aggregation.level = c("range", "cumulative", "all.ranges", - "project.cumulative", - "project.all.ranges", - "complete"), - default.value = NA) { + aggregation.level = c("range", "cumulative", "all.ranges", + "project.cumulative", + "project.all.ranges", "complete"), + default.value = NA) { aggregation.level = match.arg.or.default(aggregation.level, default = "complete") ## make sure that the default value contains a tzone attribute (even if the default value is NA) @@ -1219,8 +1215,7 @@ add.vertex.attribute.mail.thread.contributor.count = function(list.of.networks, add.vertex.attribute.mail.thread.message.count = function(list.of.networks, project.data, name = "thread.message.count", aggregation.level = c("range", "cumulative", "all.ranges", "project.cumulative", - "project.all.ranges", - "complete"), + "project.all.ranges", "complete"), default.value = NA) { aggregation.level = match.arg.or.default(aggregation.level, default = "complete") @@ -1249,8 +1244,7 @@ add.vertex.attribute.mail.thread.message.count = function(list.of.networks, proj add.vertex.attribute.mail.thread.start.date = function(list.of.networks, project.data, name = "thread.start.date", aggregation.level = c("range", "cumulative", "all.ranges", "project.cumulative", - "project.all.ranges", - "complete"), + "project.all.ranges", "complete"), default.value = NA) { aggregation.level = match.arg.or.default(aggregation.level, default = "complete") @@ -1392,8 +1386,8 @@ add.vertex.attribute.issue.event.count = function(list.of.networks, project.data aggregation.level = c("range", "cumulative", "all.ranges", "project.cumulative", "project.all.ranges", "complete"), - type = c("all", "issues", "pull.requests"), default.value = NA, - use.unfiltered.data = FALSE) { + type = c("all", "issues", "pull.requests"), + default.value = NA, use.unfiltered.data = FALSE) { type = match.arg(type) aggregation.level = match.arg.or.default(aggregation.level, default = "complete") if (missing(name) && identical(type, "pull.requests")) { From 2327d20bd781f0e153a39b7ad56ced2ab3b6daa5 Mon Sep 17 00:00:00 2001 From: Thomas Bock Date: Tue, 10 Dec 2024 19:30:32 +0100 Subject: [PATCH 108/130] Add tests for the new functionality regarding last activity The tests cover the new "last.activity" vertex attribute, which shall implicitly also cover the last-activity data computation. Signed-off-by: Thomas Bock --- tests/test-networks-covariates.R | 369 +++++++++++++++++++++++++++++++ 1 file changed, 369 insertions(+) diff --git a/tests/test-networks-covariates.R b/tests/test-networks-covariates.R index 427ff729..99f462b4 100644 --- a/tests/test-networks-covariates.R +++ b/tests/test-networks-covariates.R @@ -16,6 +16,7 @@ ## Copyright 2021 by Christian Hechtl ## Copyright 2017-2019 by Claus Hunsen ## Copyright 2018-2019 by Thomas Bock +## Copyright 2024 by Thomas Bock ## Copyright 2018-2019 by Klara Schlüter ## Copyright 2018-2019 by Jakob Kronawitter ## Copyright 2021 by Johannes Hostert @@ -316,6 +317,228 @@ get.expected.first.activity = function() { return(expected.attributes) } +#' Helper for the last activitity tests: Gets the last activity per person and data source for possible +#' aggregation levels as a nested list. +#' +#' @return A list (elements represent the levels) of lists (elements represent the networks after splitting) of lists +#' (elements represent the vertices which represent persons) of lists (elements represent the different data +#' sources) of dates as PoSIXct. +get.expected.last.activity = function() { + expected.attributes = list( + range = network.covariates.test.build.expected( + list( + list( + mails = "2016-07-12 15:58:40 UTC", + commits = "2016-07-12 15:58:59 UTC", + issues = NA + ) + ), + list( + list( + mails = NA, + commits = "2016-07-12 16:00:45 UTC", + issues = NA + ) + ), + list( + list( + mails = "2016-07-12 16:05:37 UTC", + commits = "2016-07-12 16:05:41 UTC", + issues = NA + ), + list( + mails = NA, + commits = "2016-07-12 16:06:10 UTC", + issues = NA + ), + list( + mails = NA, + commits = "2016-07-12 16:06:32 UTC", + issues = NA + ) + ) + ), + cumulative = network.covariates.test.build.expected( + list( + list( + mails = "2016-07-12 15:58:40 UTC", + commits = "2016-07-12 15:58:59 UTC", + issues = NA + ) + ), + list( + list( + mails = "2016-07-12 15:58:50 UTC", + commits = "2016-07-12 16:00:45 UTC", + issues = NA + ) + ), + list( + list( + mails = "2016-07-12 16:05:37 UTC", + commits = "2016-07-12 16:05:41 UTC", + issues = NA + ), + list( + mails = NA, + commits = "2016-07-12 16:06:10 UTC", + issues = NA + ), + list( + mails = "2016-07-12 16:04:40 UTC", + commits = "2016-07-12 16:06:32 UTC", + issues = NA + ) + ) + ), + all.ranges = network.covariates.test.build.expected( + list( + list( + mails = "2016-07-12 15:58:40 UTC", + commits = "2016-07-12 15:58:59 UTC", + issues = NA + ) + ), + list( + list( + mails = "2016-07-12 16:05:37 UTC", + commits = "2016-07-12 16:05:41 UTC", + issues = NA + ) + ), + list( + list( + mails = "2016-07-12 16:05:37 UTC", + commits = "2016-07-12 16:05:41 UTC", + issues = NA + ), + list( + mails = NA, + commits = "2016-07-12 16:06:10 UTC", + issues = NA + ), + list( + mails = "2016-07-12 16:04:40 UTC", + commits = "2016-07-12 16:06:32 UTC", + issues = NA + ) + ) + ), + project.cumulative = network.covariates.test.build.expected( + list( + list( + mails = "2016-07-12 15:58:40 UTC", + commits = "2016-07-12 15:58:59 UTC", + issues = NA + ) + ), + list( + list( + mails = "2016-07-12 15:58:50 UTC", + commits = "2016-07-12 16:00:45 UTC", + issues = NA + ) + ), + list( + list( + mails = "2016-07-12 16:05:37 UTC", + commits = "2016-07-12 16:05:41 UTC", + issues = NA + ), + list( + mails = NA, + commits = "2016-07-12 16:06:10 UTC", + issues = NA + ), + list( + mails = "2016-07-12 16:04:40 UTC", + commits = "2016-07-12 16:06:32 UTC", + issues = NA + ) + ) + ), + project.all.ranges = network.covariates.test.build.expected( + list( + list( + mails = "2016-07-12 15:58:40 UTC", + commits = "2016-07-12 15:58:59 UTC", + issues = NA + ) + ), + list( + list( + mails = "2016-07-12 16:05:37 UTC", + commits = "2016-07-12 16:05:41 UTC", + issues = NA + ) + ), + list( + list( + mails = "2016-07-12 16:05:37 UTC", + commits = "2016-07-12 16:05:41 UTC", + issues = NA + ), + list( + mails = NA, + commits = "2016-07-12 16:06:10 UTC", + issues = NA + ), + list( + mails = "2016-07-12 16:04:40 UTC", + commits = "2016-07-12 16:06:32 UTC", + issues = NA + ) + ) + ), + complete = network.covariates.test.build.expected( + list( + list( + mails = "2016-07-12 15:58:40 UTC", + commits = "2016-07-12 15:58:59 UTC", + issues = NA + ) + ), + list( + list( + mails = "2016-07-12 16:05:37", + commits = "2016-07-12 16:05:41 UTC", + issues = NA + ) + ), + list( + list( + mails = "2016-07-12 16:05:37", + commits = "2016-07-12 16:05:41 UTC", + issues = NA + ), + list( + mails = NA, + commits = "2016-07-12 16:06:10 UTC", + issues = NA + ), + list( + mails = "2016-07-12 16:04:40 UTC", + commits = "2016-07-12 16:06:32 UTC", + issues = NA + ) + ) + ) + ) + + ## convert date strings to POSIXct + expected.attributes = lapply(expected.attributes, function(level) { + lapply(level, function(network) { + lapply(network, function(person) { + lapply(person, function(date.per.datasource) { + return(get.date.from.string(date.per.datasource)) + }) + }) + }) + }) + + return(expected.attributes) +} + #' Helper for tests of the function add.vertex.attribute.author.active.ranges: Returns the expected active ranges per range, #' author and data source as a nested list. #' @@ -1044,6 +1267,152 @@ test_that("Test add.vertex.attribute.author.first.activity with one type and com }) }) +#' Test the add.vertex.attribute.author.last.activity method with computation over all types. +test_that("Test add.vertex.attribute.author.last.activity with multiple types and computation over all types", { + + ## Test setup + + networks.and.data = get.network.covariates.test.networks() + + ## lock issues in order to prevent them from being read because that alters the first activity dates + networks.and.data$project.data$set.project.conf.entry("issues.locked", TRUE) + + expected.attributes = list( + range = network.covariates.test.build.expected( + list(list(all.activities = "2016-07-12 15:58:59 UTC")), + list(list(all.activities = "2016-07-12 16:00:45 UTC")), + list(list(all.activities = "2016-07-12 16:05:41 UTC"), + list(all.activities = "2016-07-12 16:06:10 UTC"), + list(all.activities = "2016-07-12 16:06:32 UTC") + ) + ), + cumulative = network.covariates.test.build.expected( + list(list(all.activities = "2016-07-12 15:58:59 UTC")), + list(list(all.activities = "2016-07-12 16:00:45 UTC")), + list(list(all.activities = "2016-07-12 16:05:41 UTC"), + list(all.activities = "2016-07-12 16:06:10 UTC"), + list(all.activities = "2016-07-12 16:06:32 UTC") + ) + ), + all.ranges = network.covariates.test.build.expected( + list(list(all.activities = "2016-07-12 15:58:59 UTC")), + list(list(all.activities = "2016-07-12 16:05:41 UTC")), + list(list(all.activities = "2016-07-12 16:05:41 UTC"), + list(all.activities = "2016-07-12 16:06:10 UTC"), + list(all.activities = "2016-07-12 16:06:32 UTC") + ) + ), + project.cumulative = network.covariates.test.build.expected( + list(list(all.activities = "2016-07-12 15:58:59 UTC")), + list(list(all.activities = "2016-07-12 16:00:45 UTC")), + list(list(all.activities = "2016-07-12 16:05:41 UTC"), + list(all.activities = "2016-07-12 16:06:10 UTC"), + list(all.activities = "2016-07-12 16:06:32 UTC") + ) + ), + project.all.ranges = network.covariates.test.build.expected( + list(list(all.activities = "2016-07-12 15:58:59 UTC")), + list(list(all.activities = "2016-07-12 16:05:41 UTC")), + list(list(all.activities = "2016-07-12 16:05:41 UTC"), + list(all.activities = "2016-07-12 16:06:10 UTC"), + list(all.activities = "2016-07-12 16:06:32 UTC") + ) + ), + complete = network.covariates.test.build.expected( + list(list(all.activities = "2016-07-12 15:58:59 UTC")), + list(list(all.activities = "2016-07-12 16:05:41 UTC")), + list(list(all.activities = "2016-07-12 16:05:41 UTC"), + list(all.activities = "2016-07-12 16:06:10 UTC"), + list(all.activities = "2016-07-12 16:06:32 UTC") + ) + ) + ) + + ## convert date strings to POSIXct + expected.attributes = lapply(expected.attributes, function(level) { + lapply(level, function(network) { + lapply(network, function(person) { + lapply(person, function(date.per.datasource) { + return(get.date.from.string(date.per.datasource)) + }) + }) + }) + }) + + ## Test + + lapply(AGGREGATION.LEVELS, function(level) { + + networks.with.attributes = add.vertex.attribute.author.last.activity( + list.of.networks = networks.and.data[["networks"]], project.data = networks.and.data[["project.data"]], + activity.types = c("mails", "commits", "issues"), name = "last.activity", aggregation.level = level, + default.value = NA, combine.activity.types = TRUE + ) + actual.attributes = lapply(networks.with.attributes, igraph::vertex_attr, name = "last.activity") + + expect_equal(expected.attributes[[level]], actual.attributes) + }) +}) + +#' Test the add.vertex.attribute.author.last.activity method with multiple activity types and computation per type. +test_that("Test add.vertex.attribute.author.last.activity with multiple types and computation per type", { + + ## Test setup + + networks.and.data = get.network.covariates.test.networks() + + expected.attributes = get.expected.last.activity() + + ## lock issues in order to prevent them from being read because that alters the first activity dates + networks.and.data$project.data$set.project.conf.entry("issues.locked", TRUE) + + ## Test + + lapply(AGGREGATION.LEVELS, function(level) { + + networks.with.attributes = add.vertex.attribute.author.last.activity( + list.of.networks = networks.and.data[["networks"]], project.data = networks.and.data[["project.data"]], + activity.types = c("mails", "commits", "issues"), name = "last.activity", aggregation.level = level, + default.value = NA, combine.activity.types = FALSE + ) + actual.attributes = lapply(networks.with.attributes, igraph::vertex_attr, name = "last.activity") + + expect_equal(expected.attributes[[level]], actual.attributes) + }) +}) + +#' Test the add.vertex.attribute.author.last.activity method with one activity type and computation per type. +test_that("Test add.vertex.attribute.author.last.activity with one type and computation per type", { + + ## Test setup + + networks.and.data = get.network.covariates.test.networks() + + expected.attributes = get.expected.last.activity() + expected.attributes = lapply(expected.attributes, function(level) { + lapply(level, function(network) { + lapply(network, function(person) { + return(person["mails"]) + }) + }) + }) + + + ## Test + + lapply(AGGREGATION.LEVELS, function(level) { + + networks.with.attributes = add.vertex.attribute.author.last.activity( + list.of.networks = networks.and.data[["networks"]], project.data = networks.and.data[["project.data"]], + activity.types = c("mails"), name = "last.activity", aggregation.level = level, + default.value = NA, combine.activity.types = FALSE + ) + actual.attributes = lapply(networks.with.attributes, igraph::vertex_attr, name = "last.activity") + + expect_equal(expected.attributes[[level]], actual.attributes) + }) +}) + #' Test the add.vertex.attribute.author.active.ranges method with computation over all types test_that("Test add.vertex.attribute.author.active.ranges with computation over all types", { From 4791476cf3b0537143583429d10569bd96b51c24 Mon Sep 17 00:00:00 2001 From: Thomas Bock Date: Tue, 17 Dec 2024 13:04:30 +0100 Subject: [PATCH 109/130] Update changelog Signed-off-by: Thomas Bock --- NEWS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/NEWS.md b/NEWS.md index 4e807aaf..f04de00a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -19,6 +19,7 @@ - Add commit network as a new type of network. It uses commits as vertices and connects them either via cochange or commit interactions. This includes adding new config parameters and the function `add.vertex.attribute.commit.network` for adding vertex attributes to a commit network (PR #263, ab73271781e8e9a0715f784936df4b371d64c338, ab73271781e8e9a0715f784936df4b371d64c338, cd9a930fcb54ff465c2a5a7c43cfe82ac15c134d) - Add `remove.duplicate.edges` function that takes a network as input and conflates identical edges (PR #268, d9a4be417b340812b744f59398ba6460ba527e1c, 0c2f47c4fea6f5f2f582c0259f8cf23af985058a, c6e90dd9cb462232563f753f414da14a24b392a3) - Add `cumulative` as an argument to `construct.ranges` which enables the creation of cumulative ranges from given revisions (PR #268, a135f6bb6f83ccb03ae27c735c2700fccc1ee0c8, 8ec207f1e306ef6a641fb0205a9982fa89c7e0d9) +- Add function `get.last.activity.data` to compute developers' last activities in a project, as well as function `add.vertex.attribute.author.last.activity` to add a developer's date of last activity as vertex attribute to a network, as well as helper functions `get.aggregated.activity.data` and `add.vertex.attribute.author.aggregated.activity` to allow for other activity aggregations than first and last activity (PR #275, 9f231612fcd33a283362c79b35a94295ff3d4ef9, 8660ed763ba4b69e909e7fbb01e27e1999522047) ### Changed/Improved From 3e40555102d9f3eef4a9a9cb8ac61e1280a91a1f Mon Sep 17 00:00:00 2001 From: Thomas Bock Date: Mon, 13 Jan 2025 14:35:37 +0100 Subject: [PATCH 110/130] Update outdated Codeface links in README Some links to Codeface in our README.md still pointed to siemens/codeface, which had been archived already two years ago. Instead of these outdated links, we link to the se-sic/codeface repository now. This partly addresses #272. Signed-off-by: Thomas Bock --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index f9001b12..cd06a817 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ Have you ever wanted to build socio-technical developer networks the way you want? Here, you are in the right place. Using this network library, you are able to construct such networks based on various data sources (commits, e-mails, issues) in a configurable and modular way. Additionally, we provide, e.g., analysis methods for network motifs, network metrics, and developer classification. -The network library `coronet` can be used to construct analyzable networks based on data extracted from `Codeface` [[https://github.com/siemens/codeface](https://github.com/siemens/codeface)] and its companion tool `codeface-extraction` [[https://github.com/se-sic/codeface-extraction](https://github.com/se-sic/codeface-extraction)]. The library reads the written/extracted data from disk and constructs intermediate data structures for convenient data handling, either *data containers* or, more importantly, *developer networks*. +The network library `coronet` can be used to construct analyzable networks based on data extracted from `Codeface` [[https://github.com/se-sic/codeface](https://github.com/se-sic/codeface)] (originally developed by Siemens) and its companion tool `codeface-extraction` [[https://github.com/se-sic/codeface-extraction](https://github.com/se-sic/codeface-extraction)]. The library reads the written/extracted data from disk and constructs intermediate data structures for convenient data handling, either *data containers* or, more importantly, *developer networks*. If you wonder: The name `coronet` derives as an acronym from the words "configurable", "reproducible", and, most importantly, "network". The name says it all and very much conveys our goal. @@ -112,7 +112,7 @@ While `proximity` triggers a file/function-based commit analysis in `Codeface`, When using this network library, the user only needs to give the `artifact` parameter to the [`ProjectConf`](#projectconf) constructor, which automatically ensures that the correct tagging is selected. The configuration files `{project-name}_{tagging}.conf` are mandatory and contain some basic configuration regarding a performed `Codeface` analysis (e.g., project name, name of the corresponding repository, name of the mailing list, etc.). -For further details on those files, please have a look at some [example files](https://github.com/siemens/codeface/tree/master/conf) in the `Codeface` repository. +For further details on those files, please have a look at some [example files](https://github.com/se-sic/codeface/tree/infosaar-updates/conf) in the `Codeface` repository. All the `*.list` files listed above are output files of `codeface-extraction` and contain meta data of, e.g., commits or e-mails to the mailing list, etc., in CSV format. This network library lazily loads and processes these files when needed. From e27acb5ad7b4c4532e787d793301d06f13e8bbba Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Wed, 27 Nov 2024 15:28:44 +0100 Subject: [PATCH 111/130] Add base implementation Base implementation for new classification metrics. Documentation and testing still missing. Signed-off-by: Leo Sendelbach --- util-core-peripheral.R | 82 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 80 insertions(+), 2 deletions(-) diff --git a/util-core-peripheral.R b/util-core-peripheral.R index ef5a7997..3ca97030 100644 --- a/util-core-peripheral.R +++ b/util-core-peripheral.R @@ -22,6 +22,7 @@ ## Copyright 2019 by Thomas Bock ## Copyright 2019 by Jakob Kronawitter ## Copyright 2021 by Johannes Hostert +## Copyright 2024 by Leo Sendelbach ## All Rights Reserved. ## ## This file is derived from following Codeface script: @@ -59,6 +60,10 @@ CLASSIFICATION.TYPE.TO.CATEGORY = list( "network.degree" = "network", "network.eigen" = "network", "network.hierarchy" = "network", + "network.betweenness" = "network", + "network.closeness" = "network", + "network.pagerank" = "network", + "network.eccentricity" = "network", "commit.count" = "count", "loc.count" = "count", "mail.count" = "count", @@ -96,7 +101,7 @@ CLASSIFICATION.TYPE.TO.CATEGORY = list( #' Network-based options/metrics (parameter \code{network} has to be specified): #' - "network.degree" #' - "network.eigen" -#' - "network.hierarchy" +#' - "network.hierarchy" ###TODO check all documentation #' Count-based options/metrics (parameter \code{proj.data} has to be specified): #' - "commit.count" #' - "loc.count" @@ -126,7 +131,8 @@ CLASSIFICATION.TYPE.TO.CATEGORY = list( #' first column and their centrality values in the second column. get.author.class.by.type = function(network = NULL, proj.data = NULL, - type = c("network.degree", "network.eigen", "network.hierarchy", + type = c("network.degree", "network.eigen", "network.hierarchy", "network.betweenness", + "network.closeness", "network.pagerank", "network.eccentricity", "commit.count", "loc.count", "mail.count", "mail.thread.count", "issue.count", "issue.comment.count", "issue.commented.in.count", "issue.created.count"), @@ -144,6 +150,10 @@ get.author.class.by.type = function(network = NULL, "network.degree" = "vertex.degree", "network.eigen" = "eigen.centrality", "network.hierarchy" = "hierarchy", + "network.betweenness" = "betweenness.centrality", + "network.closeness" = "closeness.centrality", + "network.pagerank" = "pagerank.centrality", + "network.eccentricity" = "eccentricity", "commit.count" = "commit.count", "loc.count" = "loc.count", "mail.count" = "mail.count", @@ -231,6 +241,30 @@ get.author.class.by.type = function(network = NULL, ## Construct centrality dataframe centrality.dataframe = data.frame(author.name = row.names(hierarchy.base.df), centrality = hierarchy.calculated) + } else if (type == "network.betweenness") { + betweenness.centrality.vec = igraph::betweenness(network, directed = TRUE) + ## Construct centrality dataframe + centrality.dataframe = data.frame(author.name = names(betweenness.centrality.vec), + centrality = as.vector(betweenness.centrality.vec)) + } else if (type == "network.closeness") { + closeness.centrality.vec = igraph::closeness(network) + ## Construct centrality dataframe + centrality.dataframe = data.frame(author.name = names(closeness.centrality.vec), + centrality = as.vector(closeness.centrality.vec)) + } else if (type == "network.pagerank") { + pagerank.centrality.vec = igraph::page_rank(network, directed = TRUE)[["vector"]] + ## Construct centrality dataframe + centrality.dataframe = data.frame(author.name = names(pagerank.centrality.vec), + centrality = as.vector(pagerank.centrality.vec)) + } else if (type == "network.eccentricity") { + eccentricity.vec = igraph::eccentricity(network) + ## since core developers are expected to have a lower eccentricity, + ## we need to invert all non-zero values + indices = which(eccentricity.vec > 0) + eccentricity.vec[indices] = max(eccentricity.vec) - eccentricity.vec[indices] + ## Construct centrality dataframe + centrality.dataframe = data.frame(author.name = names(eccentricity.vec), + centrality = as.vector(eccentricity.vec)) } else if (type == "commit.count") { ## Construct centrality dataframe centrality.dataframe = get.author.commit.count(proj.data) @@ -669,6 +703,50 @@ get.author.class.network.hierarchy = function(network, result.limit = NULL, return(result) } +get.author.class.network.betweenness = function(network, result.limit = NULL, + restrict.classification.to.authors = NULL) { + logging::logdebug("get.author.class.network.betweenness: starting.") + + result = get.author.class.by.type(network = network, type = "network.betweenness", result.limit = result.limit, + restrict.classification.to.authors = restrict.classification.to.authors) + + logging::logdebug("get.author.class.network.betweenness: finished.") + return(result) +} + +get.author.class.network.closeness = function(network, result.limit = NULL, + restrict.classification.to.authors = NULL) { + logging::logdebug("get.author.class.network.closeness: starting.") + + result = get.author.class.by.type(network = network, type = "network.closeness", result.limit = result.limit, + restrict.classification.to.authors = restrict.classification.to.authors) + + logging::logdebug("get.author.class.network.closeness: finished.") + return(result) +} + +get.author.class.network.pagerank = function(network, result.limit = NULL, + restrict.classification.to.authors = NULL) { + logging::logdebug("get.author.class.network.pagerank: starting.") + + result = get.author.class.by.type(network = network, type = "network.pagerank", result.limit = result.limit, + restrict.classification.to.authors = restrict.classification.to.authors) + + logging::logdebug("get.author.class.network.pagerank: finished.") + return(result) +} + +get.author.class.network.eccentricity = function(network, result.limit = NULL, + restrict.classification.to.authors = NULL) { + logging::logdebug("get.author.class.network.eccentricity: starting.") + + result = get.author.class.by.type(network = network, type = "network.eccentricity", result.limit = result.limit, + restrict.classification.to.authors = restrict.classification.to.authors) + + logging::logdebug("get.author.class.network.eccentricity: finished.") + return(result) +} + ## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / ## Count-based classification --------------------------------------------- From 5f14ac26d8dbda4061ef682b5325a1d6dc386a43 Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Wed, 11 Dec 2024 14:41:16 +0100 Subject: [PATCH 112/130] Add tests for new classification methods Tests use already existing network, this test cases are quite small. Additional research into potential rounding errors may be required. Signed-off-by: Leo Sendelbach --- tests/test-core-peripheral.R | 69 ++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) diff --git a/tests/test-core-peripheral.R b/tests/test-core-peripheral.R index e719d651..4d8359bd 100644 --- a/tests/test-core-peripheral.R +++ b/tests/test-core-peripheral.R @@ -18,6 +18,7 @@ ## Copyright 2019 by Christian Hechtl ## Copyright 2021 by Christian Hechtl ## Copyright 2023-2024 by Maximilian Löffler +## Copyright 2024 by Leo Sendelbach ## All Rights Reserved. @@ -105,6 +106,74 @@ test_that("Eigenvector classification", { expect_equal(expected, result, tolerance = 0.0001) }) +test_that("Betweenness classification", { + + ## Act + result = get.author.class.network.betweenness(network) + + ## Assert + expected.core = data.frame(author.name = c("Olaf"), + betweenness.centrality = c(1)) + expected.peripheral = data.frame(author.name = c("Björn", "udo", "Thomas", "Fritz fritz@example.org", + "georg", "Hans"), + betweenness.centrality = c(0, 0, 0, 0, 0, 0)) + expected = list(core = expected.core, peripheral = expected.peripheral) + row.names(result[["core"]]) = NULL + row.names(result[["peripheral"]]) = NULL + expect_equal(expected, result) +}) + +test_that("Closeness classification", { + + ## Act + result = get.author.class.network.closeness(network) + + ## Assert + expected.core = data.frame(author.name = c("Olaf"), + closeness.centrality = c(0.5)) + expected.peripheral = data.frame(author.name = c("Björn", "Thomas", "udo", "Fritz fritz@example.org", + "georg", "Hans"), + closeness.centrality = c(0.33333, 0.33333, 0.0, 0.0, 0.0, 0.0)) + expected = list(core = expected.core, peripheral = expected.peripheral) + row.names(result[["core"]]) = NULL + row.names(result[["peripheral"]]) = NULL + expect_equal(expected, result, tolerance = 0.0001) +}) + +test_that("Pagerank classification", { + + ## Act + result = get.author.class.network.pagerank(network) + + ## Assert + expected.core = data.frame(author.name = c("Olaf"), + pagerank.centrality = c(0.40541)) + expected.peripheral = data.frame(author.name = c("Björn", "Thomas", "udo", "Fritz fritz@example.org", + "georg", "Hans"), + pagerank.centrality = c(0.21396, 0.21396, 0.041667, 0.041667, 0.041667, 0.041667)) + expected = list(core = expected.core, peripheral = expected.peripheral) + row.names(result[["core"]]) = NULL + row.names(result[["peripheral"]]) = NULL + expect_equal(expected, result, tolerance = 0.0001) +}) + +test_that("Eccentricity classification", { + + ## Act + result = get.author.class.network.eccentricity(network) + + ## Assert + expected.core = data.frame(author.name = c("Olaf"), + eccentricity = c(1)) + expected.peripheral = data.frame(author.name = c("Björn", "udo", "Thomas", "Fritz fritz@example.org", + "georg", "Hans"), + eccentricity = c(0, 0, 0, 0, 0, 0)) + expected = list(core = expected.core, peripheral = expected.peripheral) + row.names(result[["core"]]) = NULL + row.names(result[["peripheral"]]) = NULL + expect_equal(expected, result) +}) + # TODO: Add a test for hierarchy classification test_that("Commit-count classification using 'result.limit'" , { From 217880809a07f970b055bd3b84c05cb6e61ac6af Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Wed, 11 Dec 2024 14:49:52 +0100 Subject: [PATCH 113/130] Add documentation to new classification methods Add default documentation, same as for already existing classification methods Signed-off-by: Leo Sendelbach --- util-core-peripheral.R | 96 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 96 insertions(+) diff --git a/util-core-peripheral.R b/util-core-peripheral.R index 3ca97030..a6eda367 100644 --- a/util-core-peripheral.R +++ b/util-core-peripheral.R @@ -703,6 +703,30 @@ get.author.class.network.hierarchy = function(network, result.limit = NULL, return(result) } +## * Betweenness-based classification -------------------------------------- + +#' Classify authors into "core" and "peripheral" based on the betweenness-centrality of author vertices in the network +#' and return the classification result. +#' +#' The details of the classification algorithm is explained in the documentation of \code{get.author.class.by.type}. +#' +#' @param network the network containing the authors to classify +#' @param result.limit the maximum number of authors contained in the classification result. Only the top +#' \code{result.limit} authors of the classification stack will be contained within the returned +#' classification result. \code{NULL} means that all authors will be returned. [default: NULL] +#' @param restrict.classification.to.authors a vector of author names. Only authors that are contained within this +#' vector are to be classified. Authors that appear in the vector but are not +#' part of the classification result (i.e., they are not present in the +#' underlying data) will be added to it afterwards (with a centrality value +#' of \code{NA}). \code{NULL} means that no restriction is made. +#' [default: NULL] +#' +#' @return the classification result, that is, a list containing two named list members \code{core} and +#' \code{peripheral}, each of which holding the authors classified as core or peripheral, respectively. Both +#' entries in this list (\code{core} and \code{peripheral) are dataframes containing the authors' names in the +#' first column and their centrality values in the second column. +#' +#' @seealso get.author.class.by.type get.author.class.network.betweenness = function(network, result.limit = NULL, restrict.classification.to.authors = NULL) { logging::logdebug("get.author.class.network.betweenness: starting.") @@ -714,6 +738,30 @@ get.author.class.network.betweenness = function(network, result.limit = NULL, return(result) } +## * Closeness-based classification -------------------------------------- + +#' Classify authors into "core" and "peripheral" based on the closeness-centrality of author vertices in the network +#' and return the classification result. +#' +#' The details of the classification algorithm is explained in the documentation of \code{get.author.class.by.type}. +#' +#' @param network the network containing the authors to classify +#' @param result.limit the maximum number of authors contained in the classification result. Only the top +#' \code{result.limit} authors of the classification stack will be contained within the returned +#' classification result. \code{NULL} means that all authors will be returned. [default: NULL] +#' @param restrict.classification.to.authors a vector of author names. Only authors that are contained within this +#' vector are to be classified. Authors that appear in the vector but are not +#' part of the classification result (i.e., they are not present in the +#' underlying data) will be added to it afterwards (with a centrality value +#' of \code{NA}). \code{NULL} means that no restriction is made. +#' [default: NULL] +#' +#' @return the classification result, that is, a list containing two named list members \code{core} and +#' \code{peripheral}, each of which holding the authors classified as core or peripheral, respectively. Both +#' entries in this list (\code{core} and \code{peripheral) are dataframes containing the authors' names in the +#' first column and their centrality values in the second column. +#' +#' @seealso get.author.class.by.type get.author.class.network.closeness = function(network, result.limit = NULL, restrict.classification.to.authors = NULL) { logging::logdebug("get.author.class.network.closeness: starting.") @@ -725,6 +773,30 @@ get.author.class.network.closeness = function(network, result.limit = NULL, return(result) } +## * Pagerank-based classification -------------------------------------- + +#' Classify authors into "core" and "peripheral" based on the pagerank-centrality of author vertices in the network +#' and return the classification result. +#' +#' The details of the classification algorithm is explained in the documentation of \code{get.author.class.by.type}. +#' +#' @param network the network containing the authors to classify +#' @param result.limit the maximum number of authors contained in the classification result. Only the top +#' \code{result.limit} authors of the classification stack will be contained within the returned +#' classification result. \code{NULL} means that all authors will be returned. [default: NULL] +#' @param restrict.classification.to.authors a vector of author names. Only authors that are contained within this +#' vector are to be classified. Authors that appear in the vector but are not +#' part of the classification result (i.e., they are not present in the +#' underlying data) will be added to it afterwards (with a centrality value +#' of \code{NA}). \code{NULL} means that no restriction is made. +#' [default: NULL] +#' +#' @return the classification result, that is, a list containing two named list members \code{core} and +#' \code{peripheral}, each of which holding the authors classified as core or peripheral, respectively. Both +#' entries in this list (\code{core} and \code{peripheral) are dataframes containing the authors' names in the +#' first column and their centrality values in the second column. +#' +#' @seealso get.author.class.by.type get.author.class.network.pagerank = function(network, result.limit = NULL, restrict.classification.to.authors = NULL) { logging::logdebug("get.author.class.network.pagerank: starting.") @@ -736,6 +808,30 @@ get.author.class.network.pagerank = function(network, result.limit = NULL, return(result) } +## * Eccentricity-based classification -------------------------------------- + +#' Classify authors into "core" and "peripheral" based on the eccentricity of author vertices in the network +#' and return the classification result. +#' +#' The details of the classification algorithm is explained in the documentation of \code{get.author.class.by.type}. +#' +#' @param network the network containing the authors to classify +#' @param result.limit the maximum number of authors contained in the classification result. Only the top +#' \code{result.limit} authors of the classification stack will be contained within the returned +#' classification result. \code{NULL} means that all authors will be returned. [default: NULL] +#' @param restrict.classification.to.authors a vector of author names. Only authors that are contained within this +#' vector are to be classified. Authors that appear in the vector but are not +#' part of the classification result (i.e., they are not present in the +#' underlying data) will be added to it afterwards (with a centrality value +#' of \code{NA}). \code{NULL} means that no restriction is made. +#' [default: NULL] +#' +#' @return the classification result, that is, a list containing two named list members \code{core} and +#' \code{peripheral}, each of which holding the authors classified as core or peripheral, respectively. Both +#' entries in this list (\code{core} and \code{peripheral) are dataframes containing the authors' names in the +#' first column and their eccentricity values in the second column. +#' +#' @seealso get.author.class.by.type get.author.class.network.eccentricity = function(network, result.limit = NULL, restrict.classification.to.authors = NULL) { logging::logdebug("get.author.class.network.eccentricity: starting.") From d32a7d40e52640fa61273f13f3877df0d59839c6 Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Wed, 11 Dec 2024 15:09:17 +0100 Subject: [PATCH 114/130] Update NEWS.md add new entry under 'unversioned" Signed-off-by: Leo Sendelbach --- NEWS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/NEWS.md b/NEWS.md index f04de00a..0bba34b2 100644 --- a/NEWS.md +++ b/NEWS.md @@ -20,6 +20,7 @@ - Add `remove.duplicate.edges` function that takes a network as input and conflates identical edges (PR #268, d9a4be417b340812b744f59398ba6460ba527e1c, 0c2f47c4fea6f5f2f582c0259f8cf23af985058a, c6e90dd9cb462232563f753f414da14a24b392a3) - Add `cumulative` as an argument to `construct.ranges` which enables the creation of cumulative ranges from given revisions (PR #268, a135f6bb6f83ccb03ae27c735c2700fccc1ee0c8, 8ec207f1e306ef6a641fb0205a9982fa89c7e0d9) - Add function `get.last.activity.data` to compute developers' last activities in a project, as well as function `add.vertex.attribute.author.last.activity` to add a developer's date of last activity as vertex attribute to a network, as well as helper functions `get.aggregated.activity.data` and `add.vertex.attribute.author.aggregated.activity` to allow for other activity aggregations than first and last activity (PR #275, 9f231612fcd33a283362c79b35a94295ff3d4ef9, 8660ed763ba4b69e909e7fbb01e27e1999522047) +- Add four new metric which can be used for the classification of authors into core and peripheral: Betweenness, Closeness, Pagerank and Eccentricity (PR #276, 65d5c9cc86708777ef458b0c2e744ab4b846bdd1, b392d1a125d0f306b4bce8d95032162a328a3ce2, c5d37d40024e32ad5778fa5971a45bc08f7631e0) ### Changed/Improved From 6101e11f5c4ac1b5883e85cebd01a3cd7c76e056 Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Wed, 8 Jan 2025 15:51:06 +0100 Subject: [PATCH 115/130] Add core/peripheral classification to README Also minor fixes as requested in PR Signed-off-by: Leo Sendelbach --- README.md | 44 ++++++++++++++++++++++++++++++++++++++++++ util-core-peripheral.R | 11 ++++++----- 2 files changed, 50 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index cd06a817..2bd913b0 100644 --- a/README.md +++ b/README.md @@ -34,6 +34,7 @@ If you wonder: The name `coronet` derives as an acronym from the words "configur - [Splitting data and networks based on defined time windows](#splitting-data-and-networks-based-on-defined-time-windows) - [Cutting data to unified date ranges](#cutting-data-to-unified-date-ranges) - [Handling data independently](#handling-data-independently) + - [Core/Peripheral Classification](#coreperipheral-classification) - [How-to](#how-to) - [File/Module overview](#filemodule-overview) - [Configuration classes](#configuration-classes) @@ -375,6 +376,49 @@ Analogously, the `NetworkConf` parameter `unify.date.ranges` enables this very f In some cases, it is not necessary to build a network to get the information you need. Therefore, please remember that we offer the possibility to get the raw data or mappings between, e.g., authors and the files they edited. The data inside an instance of `ProjectData` can be accessed independently. Examples can be found in the file `showcase.R`. +#### Core/Peripheral Classification + +Core/Peripheral classification descibes the process of dividing the authors of a project into either `core` or `peripheral` developers based on the principle that the core developers contribute 80% of all work in a given project. In practice, this is done by assigning scores to developers to approximate their importance in a project and then dividing the authors into `core` or `peripheral` based on these scores such that the desired 80:20 split is achieved. + +In this section, we provide descriptions of the different algorithms we provide for classifying authors into core or peripheral authors. +- `network.degree` + * calculates scores for authors based on the vertex degrees in an author network + * the degree of a vertex is the number of adjacent edges +- `network.eigen` + * calculates scores for authors based on the eigenvector centralities in an author network + * eigenvector centrality measures the importance of vertices within a graph by granting a higher score for outgoing edges proportional to the score of the target vertex +- `network.hierarchy` + * calculates scores for authors based on the hierarchy found within an author network + * hierarchical scores are calculated by dividing the vertex degree by the clustering coefficient of each vertex +- `network.betweenness` + * calculates scores for authors based on the betweenness of vertices in an author network + * betweenness measures the number of shortest paths that go through each vertex +- `network.closeness` + * calculates scores for authors based on the closeness of vertices in an author network + * closeness measures how close vertices are to each other by taking the inverse if the sum of all their shortest paths +- `network.pagerank` + * calculates scores for authors based on the pagerank of vertices in an author network + * pagerank refers to the pagerank algorithm employed by google, which is closely related to eigenvector centrality +- `network.eccentricity` + * calculates scores for authors based on the eccentricity of vertices in an author network + * eccentricity measures the distance to each vertices' furthest reachablke vertex +- `commit.count` + * calculates scores based on the number of commits per author +- `loc.count` + * calculates scores based on the number of lines of code changed by each author +- `mail.count` + * calculates scores based on the number of mails sent per author +- `mail.thread.count` + * calculates scores based on the number of mail threads each author participated in +- `issue.count` + * calculates scores based on the number of issues each author participated in +- `issue.comment.count` + * calculates scores based on the number of comments each author made in issues +- `issue.commented.in.count` + * calculates scores based on the number of issues each author commented in +- `issue.created.count` + * calculates scores based on the number of issues each created + ### How-to In this section, we give a short example on how to initialize all needed objects and build a bipartite network. diff --git a/util-core-peripheral.R b/util-core-peripheral.R index a6eda367..3cda2fe9 100644 --- a/util-core-peripheral.R +++ b/util-core-peripheral.R @@ -101,7 +101,7 @@ CLASSIFICATION.TYPE.TO.CATEGORY = list( #' Network-based options/metrics (parameter \code{network} has to be specified): #' - "network.degree" #' - "network.eigen" -#' - "network.hierarchy" ###TODO check all documentation +#' - "network.hierarchy" #' Count-based options/metrics (parameter \code{proj.data} has to be specified): #' - "commit.count" #' - "loc.count" @@ -247,7 +247,7 @@ get.author.class.by.type = function(network = NULL, centrality.dataframe = data.frame(author.name = names(betweenness.centrality.vec), centrality = as.vector(betweenness.centrality.vec)) } else if (type == "network.closeness") { - closeness.centrality.vec = igraph::closeness(network) + closeness.centrality.vec = igraph::closeness(network, mode = "all") ## Construct centrality dataframe centrality.dataframe = data.frame(author.name = names(closeness.centrality.vec), centrality = as.vector(closeness.centrality.vec)) @@ -260,6 +260,7 @@ get.author.class.by.type = function(network = NULL, eccentricity.vec = igraph::eccentricity(network) ## since core developers are expected to have a lower eccentricity, ## we need to invert all non-zero values + ## all entries with value zero are isolated vertices, so the value is already correct indices = which(eccentricity.vec > 0) eccentricity.vec[indices] = max(eccentricity.vec) - eccentricity.vec[indices] ## Construct centrality dataframe @@ -728,7 +729,7 @@ get.author.class.network.hierarchy = function(network, result.limit = NULL, #' #' @seealso get.author.class.by.type get.author.class.network.betweenness = function(network, result.limit = NULL, - restrict.classification.to.authors = NULL) { + restrict.classification.to.authors = NULL) { logging::logdebug("get.author.class.network.betweenness: starting.") result = get.author.class.by.type(network = network, type = "network.betweenness", result.limit = result.limit, @@ -798,7 +799,7 @@ get.author.class.network.closeness = function(network, result.limit = NULL, #' #' @seealso get.author.class.by.type get.author.class.network.pagerank = function(network, result.limit = NULL, - restrict.classification.to.authors = NULL) { + restrict.classification.to.authors = NULL) { logging::logdebug("get.author.class.network.pagerank: starting.") result = get.author.class.by.type(network = network, type = "network.pagerank", result.limit = result.limit, @@ -833,7 +834,7 @@ get.author.class.network.pagerank = function(network, result.limit = NULL, #' #' @seealso get.author.class.by.type get.author.class.network.eccentricity = function(network, result.limit = NULL, - restrict.classification.to.authors = NULL) { + restrict.classification.to.authors = NULL) { logging::logdebug("get.author.class.network.eccentricity: starting.") result = get.author.class.by.type(network = network, type = "network.eccentricity", result.limit = result.limit, From 1549a4f73d7444699e9fe88ea31d431fb027b5b1 Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Tue, 14 Jan 2025 13:17:04 +0100 Subject: [PATCH 116/130] Implement feedback on PR Changed ordering in readme Signed-off-by: Leo Sendelbach --- README.md | 56 ++++++++++++++++++++++++------------------ util-core-peripheral.R | 5 ++-- 2 files changed, 35 insertions(+), 26 deletions(-) diff --git a/README.md b/README.md index 2bd913b0..b9df1e26 100644 --- a/README.md +++ b/README.md @@ -34,7 +34,9 @@ If you wonder: The name `coronet` derives as an acronym from the words "configur - [Splitting data and networks based on defined time windows](#splitting-data-and-networks-based-on-defined-time-windows) - [Cutting data to unified date ranges](#cutting-data-to-unified-date-ranges) - [Handling data independently](#handling-data-independently) - - [Core/Peripheral Classification](#coreperipheral-classification) + - [Core/Peripheral classification](#coreperipheral-classification) + - [Count-based metrics](#count-based-metrics) + - [Network-based metrics](#network-based-metrics) - [How-to](#how-to) - [File/Module overview](#filemodule-overview) - [Configuration classes](#configuration-classes) @@ -376,48 +378,54 @@ Analogously, the `NetworkConf` parameter `unify.date.ranges` enables this very f In some cases, it is not necessary to build a network to get the information you need. Therefore, please remember that we offer the possibility to get the raw data or mappings between, e.g., authors and the files they edited. The data inside an instance of `ProjectData` can be accessed independently. Examples can be found in the file `showcase.R`. -#### Core/Peripheral Classification +#### Core/Peripheral classification -Core/Peripheral classification descibes the process of dividing the authors of a project into either `core` or `peripheral` developers based on the principle that the core developers contribute 80% of all work in a given project. In practice, this is done by assigning scores to developers to approximate their importance in a project and then dividing the authors into `core` or `peripheral` based on these scores such that the desired 80:20 split is achieved. +Core/Peripheral classification descibes the process of dividing the authors of a project into either `core` or `peripheral` developers based on the principle that the core developers contribute most of the work in a given project. The concrete threshold can be configured in `CORE.THRESHOLD` and is per default set to 80%, a value commonly used in literature. In practice, this is done by assigning scores to developers to approximate their importance in a project and then dividing the authors into `core` or `peripheral` based on these scores such that the desired split is achieved. -In this section, we provide descriptions of the different algorithms we provide for classifying authors into core or peripheral authors. +##### Count-based metrics + +In this section, we provide descriptions of the different algorithms we provide for classifying authors into core or peripheral authors using count-based metrics. +- `commit.count` + * calculates scores based on the number of commits per author +- `loc.count` + * calculates scores based on the number of lines of code changed by each author +- `mail.count` + * calculates scores based on the number of mails sent per author +- `mail.thread.count` + * calculates scores based on the number of mail threads each author participated in +- `issue.count` + * calculates scores based on the number of issues each author participated in +- `issue.comment.count` + * calculates scores based on the number of comments each author made in issues +- `issue.commented.in.count` + * calculates scores based on the number of issues each author commented in +- `issue.created.count` + * calculates scores based on the number of issues each author created + +##### Network-based metrics + +In this section, we provide descriptions of the different algorithms we provide for classifying authors into core or peripheral authors using metrics that are used on author networks. - `network.degree` * calculates scores for authors based on the vertex degrees in an author network * the degree of a vertex is the number of adjacent edges - `network.eigen` * calculates scores for authors based on the eigenvector centralities in an author network - * eigenvector centrality measures the importance of vertices within a graph by granting a higher score for outgoing edges proportional to the score of the target vertex + * eigenvector centrality measures the importance of vertices within a network by awarding scores for adjacent edges proportional to the score of the connected vertex - `network.hierarchy` * calculates scores for authors based on the hierarchy found within an author network * hierarchical scores are calculated by dividing the vertex degree by the clustering coefficient of each vertex - `network.betweenness` * calculates scores for authors based on the betweenness of vertices in an author network - * betweenness measures the number of shortest paths that go through each vertex + * betweenness measures the number of shortest paths between any two vertices that go through each vertex - `network.closeness` * calculates scores for authors based on the closeness of vertices in an author network - * closeness measures how close vertices are to each other by taking the inverse if the sum of all their shortest paths + * closeness measures how close vertices are to each other by calculating the sum of their shortest paths to all other vertices - `network.pagerank` * calculates scores for authors based on the pagerank of vertices in an author network * pagerank refers to the pagerank algorithm employed by google, which is closely related to eigenvector centrality - `network.eccentricity` * calculates scores for authors based on the eccentricity of vertices in an author network - * eccentricity measures the distance to each vertices' furthest reachablke vertex -- `commit.count` - * calculates scores based on the number of commits per author -- `loc.count` - * calculates scores based on the number of lines of code changed by each author -- `mail.count` - * calculates scores based on the number of mails sent per author -- `mail.thread.count` - * calculates scores based on the number of mail threads each author participated in -- `issue.count` - * calculates scores based on the number of issues each author participated in -- `issue.comment.count` - * calculates scores based on the number of comments each author made in issues -- `issue.commented.in.count` - * calculates scores based on the number of issues each author commented in -- `issue.created.count` - * calculates scores based on the number of issues each created + * eccentricity measures the length of the shortest path to each vertex's furthest reachable vertex ### How-to diff --git a/util-core-peripheral.R b/util-core-peripheral.R index 3cda2fe9..d82439c0 100644 --- a/util-core-peripheral.R +++ b/util-core-peripheral.R @@ -259,8 +259,9 @@ get.author.class.by.type = function(network = NULL, } else if (type == "network.eccentricity") { eccentricity.vec = igraph::eccentricity(network) ## since core developers are expected to have a lower eccentricity, - ## we need to invert all non-zero values - ## all entries with value zero are isolated vertices, so the value is already correct + ## we need to invert all non-zero values. + ## all entries with value zero are isolated vertices, so the expected value is also zero. + ## thus, they should not be inverted. indices = which(eccentricity.vec > 0) eccentricity.vec[indices] = max(eccentricity.vec) - eccentricity.vec[indices] ## Construct centrality dataframe From b54cc709cf3a2786d31f8f09aedef1ad49ab3223 Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Tue, 14 Jan 2025 13:29:11 +0100 Subject: [PATCH 117/130] Add test for hierarchy classification tests hierarchy with network previously used for another unit test Signed-off-by: Leo Sendelbach --- tests/test-core-peripheral.R | 43 ++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/tests/test-core-peripheral.R b/tests/test-core-peripheral.R index 4d8359bd..0f7e7fdf 100644 --- a/tests/test-core-peripheral.R +++ b/tests/test-core-peripheral.R @@ -106,6 +106,49 @@ test_that("Eigenvector classification", { expect_equal(expected, result, tolerance = 0.0001) }) +test_that("Hierarchy classification", { + + vertices = data.frame( + name = c("Olaf", "Thomas", "Karl"), + kind = TYPE.AUTHOR, + type = TYPE.AUTHOR + ) + edges = data.frame( + from = c("Olaf", "Thomas", "Karl", "Thomas"), + to = c("Thomas", "Karl", "Olaf", "Thomas"), + func = c("GLOBAL", "test2.c::test2", "GLOBAL", "test2.c::test2"), + hash = c("0a1a5c523d835459c42f33e863623138555e2526", + "418d1dc4929ad1df251d2aeb833dd45757b04a6f", + "5a5ec9675e98187e1e92561e1888aa6f04faa338", + "d01921773fae4bed8186b0aa411d6a2f7a6626e6"), + file = c("GLOBAL", "test2.c", "GLOBAL", "test2.c"), + base.hash = c("3a0ed78458b3976243db6829f63eba3eead26774", + "0a1a5c523d835459c42f33e863623138555e2526", + "1143db502761379c2bfcecc2007fc34282e7ee61", + "0a1a5c523d835459c42f33e863623138555e2526"), + base.func = c("test2.c::test2", "test2.c::test2", + "test3.c::test_function", "test2.c::test2"), + base.file = c("test2.c", "test2.c", "test3.c", "test2.c"), + artifact.type = c("CommitInteraction", "CommitInteraction", "CommitInteraction", "CommitInteraction"), + weight = c(1, 1, 1, 1), + type = c(TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA), + relation = c("commit.interaction", "commit.interaction", "commit.interaction", "commit.interaction") + ) + test.network = igraph::graph_from_data_frame(edges, directed = FALSE, vertices = vertices) + + ## Act + result = get.author.class.network.hierarchy(test.network) + ## Assert + expected.core = data.frame(author.name = c("Thomas"), + hierarchy = c(4)) + expected.peripheral = data.frame(author.name = c("Olaf", "Karl"), + hierarchy = c(2, 2)) + expected = list(core = expected.core, peripheral = expected.peripheral) + row.names(result[["core"]]) = NULL + row.names(result[["peripheral"]]) = NULL + expect_equal(expected, result) +}) + test_that("Betweenness classification", { ## Act From c6744c00d3dc0d4e45a96c2d80ae78727e22cce2 Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Tue, 14 Jan 2025 15:48:42 +0100 Subject: [PATCH 118/130] Change formulation in 'README.md' formulation change as requested Signed-off-by: Leo Sendelbach --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index b9df1e26..3f17bb87 100644 --- a/README.md +++ b/README.md @@ -380,7 +380,7 @@ In some cases, it is not necessary to build a network to get the information you #### Core/Peripheral classification -Core/Peripheral classification descibes the process of dividing the authors of a project into either `core` or `peripheral` developers based on the principle that the core developers contribute most of the work in a given project. The concrete threshold can be configured in `CORE.THRESHOLD` and is per default set to 80%, a value commonly used in literature. In practice, this is done by assigning scores to developers to approximate their importance in a project and then dividing the authors into `core` or `peripheral` based on these scores such that the desired split is achieved. +Core/Peripheral classification descibes the process of dividing the authors of a project into either `core` or `peripheral` developers based on the principle that the core developers contribute most of the work in a given project. The concrete threshold can be configured in `CORE.THRESHOLD` and is set to 80% per default, a value commonly used in literature. In practice, this is done by assigning scores to developers to approximate their importance in a project and then dividing the authors into `core` or `peripheral` based on these scores such that the desired split is achieved. ##### Count-based metrics From 53013248863ea9411c2d0ce4b566034e3e8fa3bf Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Tue, 21 Jan 2025 11:24:17 +0100 Subject: [PATCH 119/130] Update NEWS.md NEWS.md now includes update to README Signed-off-by: Leo Sendelbach --- NEWS.md | 1 + util-core-peripheral.R | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 0bba34b2..8bdb9654 100644 --- a/NEWS.md +++ b/NEWS.md @@ -31,6 +31,7 @@ - Explicitly add R version 4.4 to the CI test pipeline (c8e6f45111e487fadbe7f0a13c7595eb23f3af6e) - Refactor function `construct.edge.list.from.key.value.list` to be more readable (PR #263, 05c3bc09cb1d396fd59c34a88030cdca58fd04dd) - Update necessary `igraph` version to 2.1.0 in `README.md` (PR #274, 6c3bcd1a2366d0d3a176d9fde95b8356b0158da3) +- Include core/peripheral classification in the `README.md` (PR #276, ) ### Fixed diff --git a/util-core-peripheral.R b/util-core-peripheral.R index d82439c0..c38340ad 100644 --- a/util-core-peripheral.R +++ b/util-core-peripheral.R @@ -22,7 +22,7 @@ ## Copyright 2019 by Thomas Bock ## Copyright 2019 by Jakob Kronawitter ## Copyright 2021 by Johannes Hostert -## Copyright 2024 by Leo Sendelbach +## Copyright 2024-2025 by Leo Sendelbach ## All Rights Reserved. ## ## This file is derived from following Codeface script: From 5fc2da5ece6604a6a87d8dd5f79237a82fb2b5ca Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Tue, 21 Jan 2025 11:53:31 +0100 Subject: [PATCH 120/130] Change README.md expanded descripotion of network-based metrics to make clear that they can be used on any type of network Signed-off-by: Leo Sendelbach --- README.md | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 3f17bb87..2ada38f9 100644 --- a/README.md +++ b/README.md @@ -404,27 +404,27 @@ In this section, we provide descriptions of the different algorithms we provide ##### Network-based metrics -In this section, we provide descriptions of the different algorithms we provide for classifying authors into core or peripheral authors using metrics that are used on author networks. +In this section, we provide descriptions of the different algorithms we provide for classifying authors into core or peripheral authors using metrics that are used on author networks. Note that the provided methods can be used for any network and not just author networks. The classification would then occur regarding the type of the vertices, i.e. an artifact network would result in a classification of the artifacts based on their importance in the network. - `network.degree` - * calculates scores for authors based on the vertex degrees in an author network + * calculates scores based on the vertex degrees in a network * the degree of a vertex is the number of adjacent edges - `network.eigen` - * calculates scores for authors based on the eigenvector centralities in an author network + * calculates scores based on the eigenvector centralities in a network * eigenvector centrality measures the importance of vertices within a network by awarding scores for adjacent edges proportional to the score of the connected vertex - `network.hierarchy` - * calculates scores for authors based on the hierarchy found within an author network + * calculates scores based on the hierarchy found within a network * hierarchical scores are calculated by dividing the vertex degree by the clustering coefficient of each vertex - `network.betweenness` - * calculates scores for authors based on the betweenness of vertices in an author network + * calculates scores based on the betweenness of vertices in a network * betweenness measures the number of shortest paths between any two vertices that go through each vertex - `network.closeness` - * calculates scores for authors based on the closeness of vertices in an author network + * calculates scores based on the closeness of vertices in a network * closeness measures how close vertices are to each other by calculating the sum of their shortest paths to all other vertices - `network.pagerank` - * calculates scores for authors based on the pagerank of vertices in an author network + * calculates scores based on the pagerank of vertices in a network * pagerank refers to the pagerank algorithm employed by google, which is closely related to eigenvector centrality - `network.eccentricity` - * calculates scores for authors based on the eccentricity of vertices in an author network + * calculates scores based on the eccentricity of vertices in a network * eccentricity measures the length of the shortest path to each vertex's furthest reachable vertex ### How-to From edb495506396094d9d03eb1e15459adbed053fae Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Tue, 21 Jan 2025 16:20:43 +0100 Subject: [PATCH 121/130] Add non-trivial test for hierarchy also changed some wording issues in NEWS and README Signed-off-by: Leo Sendelbach --- NEWS.md | 2 +- README.md | 4 ++-- tests/test-core-peripheral.R | 29 ++++++++++++++++++++++++++--- util-core-peripheral.R | 4 ++++ 4 files changed, 33 insertions(+), 6 deletions(-) diff --git a/NEWS.md b/NEWS.md index 8bdb9654..a4b0fe79 100644 --- a/NEWS.md +++ b/NEWS.md @@ -20,7 +20,7 @@ - Add `remove.duplicate.edges` function that takes a network as input and conflates identical edges (PR #268, d9a4be417b340812b744f59398ba6460ba527e1c, 0c2f47c4fea6f5f2f582c0259f8cf23af985058a, c6e90dd9cb462232563f753f414da14a24b392a3) - Add `cumulative` as an argument to `construct.ranges` which enables the creation of cumulative ranges from given revisions (PR #268, a135f6bb6f83ccb03ae27c735c2700fccc1ee0c8, 8ec207f1e306ef6a641fb0205a9982fa89c7e0d9) - Add function `get.last.activity.data` to compute developers' last activities in a project, as well as function `add.vertex.attribute.author.last.activity` to add a developer's date of last activity as vertex attribute to a network, as well as helper functions `get.aggregated.activity.data` and `add.vertex.attribute.author.aggregated.activity` to allow for other activity aggregations than first and last activity (PR #275, 9f231612fcd33a283362c79b35a94295ff3d4ef9, 8660ed763ba4b69e909e7fbb01e27e1999522047) -- Add four new metric which can be used for the classification of authors into core and peripheral: Betweenness, Closeness, Pagerank and Eccentricity (PR #276, 65d5c9cc86708777ef458b0c2e744ab4b846bdd1, b392d1a125d0f306b4bce8d95032162a328a3ce2, c5d37d40024e32ad5778fa5971a45bc08f7631e0) +- Add four new metric that can be used for the classification of authors into core and peripheral: betweenness, closeness, pagerank, and eccentricity (PR #276, 65d5c9cc86708777ef458b0c2e744ab4b846bdd1, b392d1a125d0f306b4bce8d95032162a328a3ce2, c5d37d40024e32ad5778fa5971a45bc08f7631e0) ### Changed/Improved diff --git a/README.md b/README.md index 2ada38f9..f5f4c383 100644 --- a/README.md +++ b/README.md @@ -404,7 +404,7 @@ In this section, we provide descriptions of the different algorithms we provide ##### Network-based metrics -In this section, we provide descriptions of the different algorithms we provide for classifying authors into core or peripheral authors using metrics that are used on author networks. Note that the provided methods can be used for any network and not just author networks. The classification would then occur regarding the type of the vertices, i.e. an artifact network would result in a classification of the artifacts based on their importance in the network. +In this section, we provide descriptions of the different algorithms we provide for classifying authors into core or peripheral authors using metrics that are used on author networks. Note that the provided methods can be used for any network and not just author networks. The classification would then occur regarding the type of the vertices, e.g. an artifact network would result in a classification of the artifacts based on their centrality in the network. - `network.degree` * calculates scores based on the vertex degrees in a network * the degree of a vertex is the number of adjacent edges @@ -422,7 +422,7 @@ In this section, we provide descriptions of the different algorithms we provide * closeness measures how close vertices are to each other by calculating the sum of their shortest paths to all other vertices - `network.pagerank` * calculates scores based on the pagerank of vertices in a network - * pagerank refers to the pagerank algorithm employed by google, which is closely related to eigenvector centrality + * pagerank refers to the pagerank algorithm, which is closely related to eigenvector centrality - `network.eccentricity` * calculates scores based on the eccentricity of vertices in a network * eccentricity measures the length of the shortest path to each vertex's furthest reachable vertex diff --git a/tests/test-core-peripheral.R b/tests/test-core-peripheral.R index 0f7e7fdf..d8c536a5 100644 --- a/tests/test-core-peripheral.R +++ b/tests/test-core-peripheral.R @@ -106,7 +106,7 @@ test_that("Eigenvector classification", { expect_equal(expected, result, tolerance = 0.0001) }) -test_that("Hierarchy classification", { +test_that("Trivial hierarchy classification", { vertices = data.frame( name = c("Olaf", "Thomas", "Karl"), @@ -149,6 +149,31 @@ test_that("Hierarchy classification", { expect_equal(expected, result) }) +test_that("Non-trivial hierarchy classification", { + + vertices = data.frame( + name = c("Heinz", "Olaf", "Thomas", "Karl", "Christian", "Maxi", "Leo") + ) + edges = data.frame( + from = c("Heinz", "Heinz", "Olaf", "Karl", "Karl", "Karl", "Christian", "Christian", "Maxi", "Christian", "Leo", "Christian"), + to = c("Karl", "Olaf", "Karl", "Olaf", "Karl", "Thomas", "Thomas", "Maxi", "Leo", "Leo", "Maxi", "Karl") + ) + test.network = igraph::graph_from_data_frame(edges, directed = FALSE, vertices = vertices) + + ## Act + result = get.author.class.network.hierarchy(test.network) + ## Assert + expected.core = data.frame(author.name = c("Karl", "Christian"), + hierarchy = c(21, 12)) + expected.peripheral = data.frame(author.name = c("Olaf", "Maxi", "Leo", "Heinz", "Thomas"), + hierarchy = c(3, 3, 3, 2, 2)) + + expected = list(core = expected.core, peripheral = expected.peripheral) + row.names(result[["core"]]) = NULL + row.names(result[["peripheral"]]) = NULL + expect_equal(expected, result) +}) + test_that("Betweenness classification", { ## Act @@ -217,8 +242,6 @@ test_that("Eccentricity classification", { expect_equal(expected, result) }) -# TODO: Add a test for hierarchy classification - test_that("Commit-count classification using 'result.limit'" , { ## Act diff --git a/util-core-peripheral.R b/util-core-peripheral.R index c38340ad..607ef300 100644 --- a/util-core-peripheral.R +++ b/util-core-peripheral.R @@ -236,6 +236,10 @@ get.author.class.by.type = function(network = NULL, ## is 'Inf'. We do not get any complications here because there are no authors with ## degree == 0 and a CC > 0 (i.e., the hierarchy value would really be 0). Authors with ## a CC == NaN (degree < 2) will stay with their hierarchy value of NaN, accordingly. + ## Note that this could lead to authors who should be high up in the hierarchy + ## being set to hierarchy 0 because they interact with clusters that + ## have no connection between them. We accept this because the case should + ## almost never occur in all but the smallest networks. hierarchy.calculated[is.infinite(hierarchy.calculated)] = 0 ## Construct centrality dataframe From 728414b7f9a285c807d047304894a97e113d3bd7 Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Tue, 21 Jan 2025 16:28:45 +0100 Subject: [PATCH 122/130] Change commit hashes in NEWS.md also corrected copyright headers Signed-off-by: Leo Sendelbach --- NEWS.md | 4 ++-- tests/test-core-peripheral.R | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/NEWS.md b/NEWS.md index a4b0fe79..89d8c0c4 100644 --- a/NEWS.md +++ b/NEWS.md @@ -20,7 +20,7 @@ - Add `remove.duplicate.edges` function that takes a network as input and conflates identical edges (PR #268, d9a4be417b340812b744f59398ba6460ba527e1c, 0c2f47c4fea6f5f2f582c0259f8cf23af985058a, c6e90dd9cb462232563f753f414da14a24b392a3) - Add `cumulative` as an argument to `construct.ranges` which enables the creation of cumulative ranges from given revisions (PR #268, a135f6bb6f83ccb03ae27c735c2700fccc1ee0c8, 8ec207f1e306ef6a641fb0205a9982fa89c7e0d9) - Add function `get.last.activity.data` to compute developers' last activities in a project, as well as function `add.vertex.attribute.author.last.activity` to add a developer's date of last activity as vertex attribute to a network, as well as helper functions `get.aggregated.activity.data` and `add.vertex.attribute.author.aggregated.activity` to allow for other activity aggregations than first and last activity (PR #275, 9f231612fcd33a283362c79b35a94295ff3d4ef9, 8660ed763ba4b69e909e7fbb01e27e1999522047) -- Add four new metric that can be used for the classification of authors into core and peripheral: betweenness, closeness, pagerank, and eccentricity (PR #276, 65d5c9cc86708777ef458b0c2e744ab4b846bdd1, b392d1a125d0f306b4bce8d95032162a328a3ce2, c5d37d40024e32ad5778fa5971a45bc08f7631e0) +- Add four new metric that can be used for the classification of authors into core and peripheral: betweenness, closeness, pagerank, and eccentricity (PR #276, e27acb5ad7b4c4532e787d793301d06f13e8bbba, 217880809a07f970b055bd3b84c05cb6e61ac6af) ### Changed/Improved @@ -31,7 +31,7 @@ - Explicitly add R version 4.4 to the CI test pipeline (c8e6f45111e487fadbe7f0a13c7595eb23f3af6e) - Refactor function `construct.edge.list.from.key.value.list` to be more readable (PR #263, 05c3bc09cb1d396fd59c34a88030cdca58fd04dd) - Update necessary `igraph` version to 2.1.0 in `README.md` (PR #274, 6c3bcd1a2366d0d3a176d9fde95b8356b0158da3) -- Include core/peripheral classification in the `README.md` (PR #276, ) +- Include core/peripheral classification in the `README.md` (PR #276, 6101e11f5c4ac1b5883e85cebd01a3cd7c76e056, c6744c00d3dc0d4e45a96c2d80ae78727e22cce2, 5fc2da5ece6604a6a87d8dd5f79237a82fb2b5ca) ### Fixed diff --git a/tests/test-core-peripheral.R b/tests/test-core-peripheral.R index d8c536a5..a027c356 100644 --- a/tests/test-core-peripheral.R +++ b/tests/test-core-peripheral.R @@ -18,7 +18,7 @@ ## Copyright 2019 by Christian Hechtl ## Copyright 2021 by Christian Hechtl ## Copyright 2023-2024 by Maximilian Löffler -## Copyright 2024 by Leo Sendelbach +## Copyright 2024-2025 by Leo Sendelbach ## All Rights Reserved. From a5b5df73b7cb202e9506cf342796ac8e83f0b41a Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Fri, 24 Jan 2025 16:37:53 +0100 Subject: [PATCH 123/130] Change formulation Changed comment explaining potential issues with hierarchy classification Signed-off-by: Leo Sendelbach --- util-core-peripheral.R | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/util-core-peripheral.R b/util-core-peripheral.R index 607ef300..1d9cb533 100644 --- a/util-core-peripheral.R +++ b/util-core-peripheral.R @@ -236,10 +236,12 @@ get.author.class.by.type = function(network = NULL, ## is 'Inf'. We do not get any complications here because there are no authors with ## degree == 0 and a CC > 0 (i.e., the hierarchy value would really be 0). Authors with ## a CC == NaN (degree < 2) will stay with their hierarchy value of NaN, accordingly. - ## Note that this could lead to authors who should be high up in the hierarchy - ## being set to hierarchy 0 because they interact with clusters that - ## have no connection between them. We accept this because the case should - ## almost never occur in all but the smallest networks. + ## Note that this could lead to authors who should have a high hierarchy value + ## being set to 0 because they interact with clusters that have no connection + ## between them. In this case, we have no way of determining what the hierarchy + ## value should be without looking further into the connected clusters. We + ## assume that the case where hierarchy is set to 0 even though it should + ## be a high value rarely occurs. hierarchy.calculated[is.infinite(hierarchy.calculated)] = 0 ## Construct centrality dataframe From b7152ffcf9df23b310134f70b25083001e89727d Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Fri, 24 Jan 2025 18:28:09 +0100 Subject: [PATCH 124/130] Change formulation again Putting the verb before the adverb Signed-off-by: Leo Sendelbach --- util-core-peripheral.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/util-core-peripheral.R b/util-core-peripheral.R index 1d9cb533..3306a3a3 100644 --- a/util-core-peripheral.R +++ b/util-core-peripheral.R @@ -241,7 +241,7 @@ get.author.class.by.type = function(network = NULL, ## between them. In this case, we have no way of determining what the hierarchy ## value should be without looking further into the connected clusters. We ## assume that the case where hierarchy is set to 0 even though it should - ## be a high value rarely occurs. + ## be a high value occurs rarely. hierarchy.calculated[is.infinite(hierarchy.calculated)] = 0 ## Construct centrality dataframe From c70a0bb4587132055199d0feb67ee2c7885f3ef4 Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Tue, 28 Jan 2025 17:04:37 +0100 Subject: [PATCH 125/130] Change formulation implemented feedback on PR Signed-off-by: Leo Sendelbach --- NEWS.md | 2 +- README.md | 2 +- util-core-peripheral.R | 14 +++++++------- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/NEWS.md b/NEWS.md index 89d8c0c4..160c3c65 100644 --- a/NEWS.md +++ b/NEWS.md @@ -20,7 +20,7 @@ - Add `remove.duplicate.edges` function that takes a network as input and conflates identical edges (PR #268, d9a4be417b340812b744f59398ba6460ba527e1c, 0c2f47c4fea6f5f2f582c0259f8cf23af985058a, c6e90dd9cb462232563f753f414da14a24b392a3) - Add `cumulative` as an argument to `construct.ranges` which enables the creation of cumulative ranges from given revisions (PR #268, a135f6bb6f83ccb03ae27c735c2700fccc1ee0c8, 8ec207f1e306ef6a641fb0205a9982fa89c7e0d9) - Add function `get.last.activity.data` to compute developers' last activities in a project, as well as function `add.vertex.attribute.author.last.activity` to add a developer's date of last activity as vertex attribute to a network, as well as helper functions `get.aggregated.activity.data` and `add.vertex.attribute.author.aggregated.activity` to allow for other activity aggregations than first and last activity (PR #275, 9f231612fcd33a283362c79b35a94295ff3d4ef9, 8660ed763ba4b69e909e7fbb01e27e1999522047) -- Add four new metric that can be used for the classification of authors into core and peripheral: betweenness, closeness, pagerank, and eccentricity (PR #276, e27acb5ad7b4c4532e787d793301d06f13e8bbba, 217880809a07f970b055bd3b84c05cb6e61ac6af) +- Add four new metrics that can be used for the classification of authors into core and peripheral: betweenness, closeness, pagerank, and eccentricity (PR #276, e27acb5ad7b4c4532e787d793301d06f13e8bbba, 217880809a07f970b055bd3b84c05cb6e61ac6af) ### Changed/Improved diff --git a/README.md b/README.md index f5f4c383..6fd57df4 100644 --- a/README.md +++ b/README.md @@ -380,7 +380,7 @@ In some cases, it is not necessary to build a network to get the information you #### Core/Peripheral classification -Core/Peripheral classification descibes the process of dividing the authors of a project into either `core` or `peripheral` developers based on the principle that the core developers contribute most of the work in a given project. The concrete threshold can be configured in `CORE.THRESHOLD` and is set to 80% per default, a value commonly used in literature. In practice, this is done by assigning scores to developers to approximate their importance in a project and then dividing the authors into `core` or `peripheral` based on these scores such that the desired split is achieved. +Core/Peripheral classification describes the process of dividing the authors of a project into either `core` or `peripheral` developers based on the principle that the core developers contribute most of the work in a given project. The concrete threshold can be configured in `CORE.THRESHOLD` and is set to 80% per default, a value commonly used in literature. In practice, this is done by assigning scores to developers to approximate their importance in a project and then dividing the authors into `core` or `peripheral` based on these scores such that the desired split is achieved. ##### Count-based metrics diff --git a/util-core-peripheral.R b/util-core-peripheral.R index 3306a3a3..81c688f4 100644 --- a/util-core-peripheral.R +++ b/util-core-peripheral.R @@ -264,10 +264,10 @@ get.author.class.by.type = function(network = NULL, centrality = as.vector(pagerank.centrality.vec)) } else if (type == "network.eccentricity") { eccentricity.vec = igraph::eccentricity(network) - ## since core developers are expected to have a lower eccentricity, + ## Since core developers are expected to have a lower eccentricity, ## we need to invert all non-zero values. - ## all entries with value zero are isolated vertices, so the expected value is also zero. - ## thus, they should not be inverted. + ## All entries with value zero are isolated vertices, so the expected value is also zero. + ## Thus, they should not be inverted. indices = which(eccentricity.vec > 0) eccentricity.vec[indices] = max(eccentricity.vec) - eccentricity.vec[indices] ## Construct centrality dataframe @@ -716,7 +716,7 @@ get.author.class.network.hierarchy = function(network, result.limit = NULL, #' Classify authors into "core" and "peripheral" based on the betweenness-centrality of author vertices in the network #' and return the classification result. #' -#' The details of the classification algorithm is explained in the documentation of \code{get.author.class.by.type}. +#' The details of the classification algorithms are explained in the documentation of \code{get.author.class.by.type}. #' #' @param network the network containing the authors to classify #' @param result.limit the maximum number of authors contained in the classification result. Only the top @@ -751,7 +751,7 @@ get.author.class.network.betweenness = function(network, result.limit = NULL, #' Classify authors into "core" and "peripheral" based on the closeness-centrality of author vertices in the network #' and return the classification result. #' -#' The details of the classification algorithm is explained in the documentation of \code{get.author.class.by.type}. +#' The details of the classification algorithms are explained in the documentation of \code{get.author.class.by.type}. #' #' @param network the network containing the authors to classify #' @param result.limit the maximum number of authors contained in the classification result. Only the top @@ -786,7 +786,7 @@ get.author.class.network.closeness = function(network, result.limit = NULL, #' Classify authors into "core" and "peripheral" based on the pagerank-centrality of author vertices in the network #' and return the classification result. #' -#' The details of the classification algorithm is explained in the documentation of \code{get.author.class.by.type}. +#' The details of the classification algorithms are explained in the documentation of \code{get.author.class.by.type}. #' #' @param network the network containing the authors to classify #' @param result.limit the maximum number of authors contained in the classification result. Only the top @@ -821,7 +821,7 @@ get.author.class.network.pagerank = function(network, result.limit = NULL, #' Classify authors into "core" and "peripheral" based on the eccentricity of author vertices in the network #' and return the classification result. #' -#' The details of the classification algorithm is explained in the documentation of \code{get.author.class.by.type}. +#' The details of the classification algorithms are explained in the documentation of \code{get.author.class.by.type}. #' #' @param network the network containing the authors to classify #' @param result.limit the maximum number of authors contained in the classification result. Only the top From f29662b2c11768cc01eb0f86d32e039099c618ae Mon Sep 17 00:00:00 2001 From: Thomas Bock Date: Sun, 2 Feb 2025 12:48:30 +0100 Subject: [PATCH 126/130] Replace deprecated igraph functions in 'util-networks-metrics.R' This commit works towards #260. Signed-off-by: Thomas Bock --- util-networks-metrics.R | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/util-networks-metrics.R b/util-networks-metrics.R index bf391bcf..3c100599 100644 --- a/util-networks-metrics.R +++ b/util-networks-metrics.R @@ -12,7 +12,7 @@ ## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. ## ## Copyright 2015, 2019 by Thomas Bock -## Copyright 2021, 2023-2024 by Thomas Bock +## Copyright 2021, 2023-2025 by Thomas Bock ## Copyright 2017 by Raphael Nömmer ## Copyright 2017-2019 by Claus Hunsen ## Copyright 2017-2018 by Christian Hechtl @@ -94,7 +94,7 @@ metrics.vertex.degrees = function(network, sort = TRUE, sort.decreasing = TRUE, #' #' @return The density of the network. metrics.density = function(network) { - density = igraph::graph.density(network) + density = igraph::edge_density(network) return(c(density = density)) } @@ -110,7 +110,7 @@ metrics.density = function(network) { #' #' @return The average path length of the given network. metrics.avg.pathlength = function(network, directed = TRUE, unconnected = TRUE) { - avg.pathlength = igraph::average.path.length(network, directed = directed, unconnected = unconnected) + avg.pathlength = igraph::mean_distance(network, directed = directed, unconnected = unconnected) return(c(avg.pathlength = avg.pathlength)) } @@ -164,7 +164,7 @@ metrics.modularity = function(network, community.detection.algorithm = igraph::c #' @return The smallworldness value of the network. metrics.smallworldness = function(network) { ## first check whether the network is simplified - if (!is.simple(network)) { + if (!is_simple(network)) { ## if this is not the case, raise an error and stop the execution error.message = "The input network has too many edges. Try again with a simplified network." logging::logerror(error.message) @@ -173,17 +173,16 @@ metrics.smallworldness = function(network) { ## else construct Erdös-Renyi network 'h' with same number of vertices and edges as the given network 'network', ## as the requirement of the function is fulfilled - h = igraph::erdos.renyi.game(n = igraph::vcount(network), - p.or.m = igraph::ecount(network), - type = "gnm", - directed = FALSE) + h = igraph::sample_gnm(n = igraph::vcount(network), + m = igraph::ecount(network), + directed = FALSE) ## compute clustering coefficients g.cc = igraph::transitivity(network, type = "global") h.cc = igraph::transitivity(h, type = "global") ## compute average shortest-path length - g.l = igraph::average.path.length(network, unconnected = TRUE) - h.l = igraph::average.path.length(h, unconnected = TRUE) + g.l = igraph::mean_distance(network, unconnected = TRUE) + h.l = igraph::mean_distance(h, unconnected = TRUE) ## binary decision ## intermediate variables From 027ce79cdc85a81a5b386bf925f3545632426433 Mon Sep 17 00:00:00 2001 From: Thomas Bock Date: Sun, 2 Feb 2025 17:34:52 +0100 Subject: [PATCH 127/130] Remove deprecated `scale_name` parameter from plotting module As of `ggplot2` version 3.5.0, the function `discrete_scale`, which is used for `plot.network` in coronet, does not require a `scale_name` parameter anymore. Even more, this parameter has been deprecated. Therefore, we also don't use it any more in coronet. However, as a consequence of this, the plotting module of coronet requires `ggplot2` version 3.5.0 or higher, as of now. Signed-off-by: Thomas Bock --- README.md | 4 ++-- util-plot.R | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 6fd57df4..90cc0821 100644 --- a/README.md +++ b/README.md @@ -137,14 +137,14 @@ Alternatively, you can run `Rscript install.R` to install the packages. - `reshape2`: For reshaping of data - `testthat`: For the test suite - `patrick`: For the test suite -- `ggplot2`: For plotting of data +- `ggplot2`: For plotting of data (package version `3.5.0` or higher is required) - `ggraph`: For plotting of networks (needs `udunits2` system library, e.g., `libudunits2-dev` on Ubuntu!) - `markovchain`: For core/peripheral transition probabilities - `lubridate`: For convenient date conversion and parsing - `viridis`: For plotting of networks with nice colors - `jsonlite`: For parsing the issue data - `rTensor`: For calculating EDCPTD centrality -- `Matrix`: For sparse matrix representation of large adjacency matrices (package version `1.3.0` or higher is mandatory) +- `Matrix`: For sparse matrix representation of large adjacency matrices (package version `1.3.0` or higher is required) - `fastmap`: For fast implementation of a map - `purrr`: For fast implementation of a mapping function diff --git a/util-plot.R b/util-plot.R index 92e16e21..c1381d2f 100644 --- a/util-plot.R +++ b/util-plot.R @@ -14,7 +14,7 @@ ## Copyright 2017-2018, 2020 by Claus Hunsen ## Copyright 2018 by Barbara Eckl ## Copyright 2018 by Thomas Bock -## Copyright 2020-2021 by Thomas Bock +## Copyright 2020-2021, 2025 by Thomas Bock ## Copyright 2024 by Maximilian Löffler ## All Rights Reserved. @@ -168,8 +168,8 @@ plot.get.plot.for.network = function(network, labels = TRUE) { ## scale edges (colors and styles) ggraph::scale_edge_linetype(name = "Relation Types") + - ggplot2::discrete_scale(name = "Relations", "edge_colour", "viridis", - viridis::viridis_pal(option = "viridis", end = 0.8, begin = 0.25)) + + ggplot2::discrete_scale(name = "Relations", aesthetics = "edge_colour", + palette = viridis::viridis_pal(option = "viridis", end = 0.8, begin = 0.25)) + ## BROKEN RIGHT NOW due to bug in scale_edge_colour_viridis(): # ggraph::scale_edge_colour_viridis(name = "Relations", option = "magma", discrete = TRUE, # end = 0.85, begin = 0, direction = 1) + From e5dd429749a0c3895737424a861917bb4f53b817 Mon Sep 17 00:00:00 2001 From: Thomas Bock Date: Sun, 2 Feb 2025 12:54:49 +0100 Subject: [PATCH 128/130] Update changelog Signed-off-by: Thomas Bock --- NEWS.md | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/NEWS.md b/NEWS.md index 160c3c65..ac8d9548 100644 --- a/NEWS.md +++ b/NEWS.md @@ -6,7 +6,8 @@ ### Announcement -- Coronet is not compatible with `igraph` versions below 2.1.0 anymore. This is due to the simultaneous deprecation of `subgraph.edges` and the introduction of the replacement for it, `subgraph_from_edges`, in igraph version 2.1.0. +- coronet is not compatible with `igraph` versions below 2.1.0 anymore. This is due to the simultaneous deprecation of `subgraph.edges` and the introduction of the replacement for it, `subgraph_from_edges`, in `igraph` version 2.1.0. +- The plotting module of coronet is not compatible with `ggplot2` versions below 3.5.0 anymore. This is due to the simultaneous deprecation of the `scale_name` parameter of `discrete_scale`, which is used within the function `plot.network` of coronet. ### Added @@ -26,11 +27,13 @@ - **Breaking Change**: Change the default representation of edge attributes from vectors to lists. This change is necessary for the interplay of coronet networks with certain `igraph` functionality since igraph version 2.1.0 (PR #274, 1c35d1fa2548deb297dbfa5e2b07fce31962c5b7, eda30b838369ec46376812298a3ea8159eec5789, 0c6b2eba79b37f8ef2af7ffc41d86f1f307581bf, 44c7b72e3234cb332bb2713fb408c124e67255d9, 7303eabef6a78198575fe5bdfc02813fde3d3974, 0c27012641d24e19e5fa037406b480034c93f1aa) - Change the default value for the `issues.from.source` configuration parameter. Instead of reading JIRA and GitHub issues together, which was the previous default, the new default value causes only GitHub issue data to be read. To restore the previous default behavior and read data from both issue sources, this now needs to be manually configured when needed. (PR #264, 5ff83c364f6bfc1e6ff95e9c5f1087e031c48a5d, 8c8080cb9caf115f19d9f145ad6e6c108b131a67, 8bcbc81db521877908d2e5c2989082ed672f2a3b) -- Replace deprecated `igraph` functions by their preferred alternatives (PR #264, PR #268, PR #274, 0df9d5bf6bafbb5d440f4c47db4ec901cf11f037, 7ac840d287a862eff61b1a84e194a4cba399f9e5, e3617b8c6b21fb4242c1d392124813501069ca84, 4b0d5221dd56bb3c9ddf196f67719d4f503d9b61) +- Replace deprecated `igraph` functions by their preferred alternatives (PR #264, PR #268, PR #274, PR #279, 0df9d5bf6bafbb5d440f4c47db4ec901cf11f037, 7ac840d287a862eff61b1a84e194a4cba399f9e5, e3617b8c6b21fb4242c1d392124813501069ca84, 4b0d5221dd56bb3c9ddf196f67719d4f503d9b61, f29662b2c11768cc01eb0f86d32e039099c618ae) +- Remove deprecated parameter of `ggplot2::discrete_scale` (PR #279, 027ce79cdc85a81a5b386bf925f3545632426433) - Deprecate support for R version 3.6 (PR #264, c8e6f45111e487fadbe7f0a13c7595eb23f3af6e, fb3f5474259d4a88f4ff545691cca9d1ccde90e3) - Explicitly add R version 4.4 to the CI test pipeline (c8e6f45111e487fadbe7f0a13c7595eb23f3af6e) - Refactor function `construct.edge.list.from.key.value.list` to be more readable (PR #263, 05c3bc09cb1d396fd59c34a88030cdca58fd04dd) - Update necessary `igraph` version to 2.1.0 in `README.md` (PR #274, 6c3bcd1a2366d0d3a176d9fde95b8356b0158da3) +- Update necessary `ggplot2` version to 3.5.0 in `README.md' (PR #279, 027ce79cdc85a81a5b386bf925f3545632426433) - Include core/peripheral classification in the `README.md` (PR #276, 6101e11f5c4ac1b5883e85cebd01a3cd7c76e056, c6744c00d3dc0d4e45a96c2d80ae78727e22cce2, 5fc2da5ece6604a6a87d8dd5f79237a82fb2b5ca) ### Fixed From 4c4b6545ba03280f08dc6d1ccfc2610d6670df96 Mon Sep 17 00:00:00 2001 From: Thomas Bock Date: Mon, 3 Feb 2025 00:45:22 +0100 Subject: [PATCH 129/130] Add commit-interactions to data sources in `README.md` Signed-off-by: Thomas Bock --- README.md | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 90cc0821..4552725c 100644 --- a/README.md +++ b/README.md @@ -202,6 +202,8 @@ There are two distinguishable types of data sources that are both handled by the * Patch-stack analysis to link patches sent to mailing lists and upstream commits * Synchronicity information on commits (see also the parameter `synchronicity` in the [`ProjectConf`](#configurable-data-retrieval-related-parameters) class) * Synchronous commits are commits that change a source-code artifact that has also been changed by another author within a reasonable time-window. + * Commit-interaction data (see also the parameter `commit.interactions` in the [`ProjectConf`](#configurable-data-retrieval-related-parameters) class) + * Commit interactions represent data-flow interactions between two commits (i.e., there is a data flow between the changed source-code parts of the two commits). * Custom event timestamps, which have to be specified manually (see also the parameter `custom.event.timestamps.file` in the [`ProjectConf`](#configurable-data-retrieval-related-parameters) class) @@ -240,7 +242,7 @@ There are four types of networks that can be built using this library: author ne - Commit networks * The vertices in a commit network denote any commits in the data. All vertices are uniquely identifyable by the hash of the commit. There are only unipartite edges among commits in this type of network. - * The relations (i.e., the edges' meaning and source) can be configured using the [`networkConf`](#networkconf) attribute `commit.relation`. The relation also describes the type of data used for network construction (`cochange` uses commit data, `commit.interaction` uses commit interaction data). + * The relations (i.e., the edges' meaning and source) can be configured using the [`networkConf`](#networkconf) attribute `commit.relation`. The relation also describes the type of data used for network construction (`cochange` uses commit data, `commit.interaction` uses commit-interaction data). - Bipartite networks * The vertices in a bipartite network denote both authors and artifacts. There are only bipartite edges from authors to artifacts in this type of network. @@ -277,7 +279,7 @@ Relations determine which information is used to construct edges among the verti - `commit.interaction` * For author networks (configured via `author.relation` in the [`NetworkConf`](#networkconf)), authors who contribute to interacting commits are connected with an edge. - * For artifact networks (configured via `artifact.relation` in the [`NetworkConf`](#networkconf)), artifacts are connected when there is an interaction between two commits that occur in the artifacts. + * For artifact networks (configured via `artifact.relation` in the [`NetworkConf`](#networkconf)), artifacts are connected when there is an interaction between two commits that change these artifacts. * For commit networks (configured via `commit.relation` in the [`NetworkConf`](#networkconf)), commits are connected when they interact in the commit-interaction data. * This relation does not apply for bipartite networks. @@ -667,7 +669,7 @@ There is no way to update the entries, except for the revision-based parameters. * Allow construction of author and artifact networks using commit-interaction data * [`TRUE`, *`FALSE`*] - `commit.interactions.filter.global`: - * Filter out entries from commit interaction data that are not matched to a specific function or file + * Filter out entries from commit-interaction data that are not matched to a specific function or file * [*`TRUE`*, `FALSE`] ### NetworkConf From 80b7a994ccfa5e0cebe1ebae77a4186c5190d9b0 Mon Sep 17 00:00:00 2001 From: Thomas Bock Date: Sun, 2 Feb 2025 23:59:42 +0100 Subject: [PATCH 130/130] Version 5.0 Signed-off-by: Thomas Bock --- NEWS.md | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/NEWS.md b/NEWS.md index ac8d9548..ef3ad4e2 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,30 +2,30 @@ # coronet – Changelog -## unversioned +## 5.0 ### Announcement -- coronet is not compatible with `igraph` versions below 2.1.0 anymore. This is due to the simultaneous deprecation of `subgraph.edges` and the introduction of the replacement for it, `subgraph_from_edges`, in `igraph` version 2.1.0. -- The plotting module of coronet is not compatible with `ggplot2` versions below 3.5.0 anymore. This is due to the simultaneous deprecation of the `scale_name` parameter of `discrete_scale`, which is used within the function `plot.network` of coronet. +- `coronet` is not compatible with `igraph` versions below 2.1.0 anymore. This is due to the simultaneous deprecation of `subgraph.edges` and the introduction of the replacement for it, `subgraph_from_edges`, in `igraph` version 2.1.0. +- The plotting module of `coronet` is not compatible with `ggplot2` versions below 3.5.0 anymore. This is due to the simultaneous deprecation of the `scale_name` parameter of `discrete_scale` (which is used within the function `plot.network` of `coronet`) in `ggplot2` version 3.5.0. ### Added -- Add commit-interaction data and add functions `read.commit.interactions` for reading, as well as `get.commit.interactions`, `set.commit.interactions` and utility functions for working with commit-interaction data (PR #252, d82857fbebd1111bb16588a4223bb24a8dcd07de, b4fd2a29c9b5fd561b1106c6febb54a32b0085ab, fd0aa05f824b93545ae8e05833b95b3bd9809286, bca35760eb0aac86c04923f2d534b2d8cece204e, PR #263, 849123a8b7d898fbb1343745ecffc1f6000c9367, 3fb7437b68950303916b62984fa449732c70353e, 170bc66eb779d7cf2ab504db7c3f4ec483103838) as well as tests for these features (PR #252, eeba7e29932bc973513c963fb9e716e9230d570f, 8bb39f4df39b49dfaff8f19feb6db5e5fbd81fac, 54b6f655248720436af116fe72521f9cb0348429, 7a5497aaf9114017d1b3b9b68b6cccd7ca8ac114, 7b8585f87675795822c07230192d6454de31dcc7, ef725407bf8818c8fff96ea6f343338b7162cbe0,) -- Add commit-interaction networks that can be created with `create.author.network` and `create.artifact.network` if the `artifact.relation` and `author.relation` is configured to be `commit.interaction` (PR #252, d82857fbebd1111bb16588a4223bb24a8dcd07de, 329d97ec3de36a9e1bcadc0c7a53c1d92e8b481c) as well as tests for these features (PR #252, 07e7ed744209b0251217fa8f7f35d9b9875face2, 7068cfa10d993dcae3f5e3f76f8cafa99fa8b350) -- Add helper function for prefixing function names with file names in `util-read.R` (PR #252, f8ea987b138173cf0509c7910e0572d8ee1b3f1f) -- Add line-based code coverage reports into CI pipeline. Coverage reports are generated by `coverage.R` (PR #262, 10cac49d005e87c3964cc61711e7f5acef749626, b3b9f4ac7a9911bd00293c68fac88e0f9033bdfb, c815d18dc6266d620a7a145493417b87ac08679e, e8093525fdaf46e54f2f7fcc6358ca7892e795e5, 32d04823e2007c63d2a43ce59bea3057327c19a7) +- Add commit-interaction data and add functions `read.commit.interactions` for reading, as well as `get.commit.interactions`, `set.commit.interactions`, and utility functions for working with commit-interaction data (PR #252, d82857fbebd1111bb16588a4223bb24a8dcd07de, b4fd2a29c9b5fd561b1106c6febb54a32b0085ab, fd0aa05f824b93545ae8e05833b95b3bd9809286, bca35760eb0aac86c04923f2d534b2d8cece204e, PR #263, 849123a8b7d898fbb1343745ecffc1f6000c9367, 3fb7437b68950303916b62984fa449732c70353e, 170bc66eb779d7cf2ab504db7c3f4ec483103838, f591528a0f1f11b1a4390949ab770f3f74a766f9, 4c4b6545ba03280f08dc6d1ccfc2610d6670df96) as well as tests for these features (PR #252, eeba7e29932bc973513c963fb9e716e9230d570f, 8bb39f4df39b49dfaff8f19feb6db5e5fbd81fac, 54b6f655248720436af116fe72521f9cb0348429, 7a5497aaf9114017d1b3b9b68b6cccd7ca8ac114, 7b8585f87675795822c07230192d6454de31dcc7, ef725407bf8818c8fff96ea6f343338b7162cbe0) +- Add commit-interaction networks that can be created with `create.author.network` or `create.artifact.network` if the `artifact.relation` or `author.relation` is configured to be `commit.interaction` (PR #252, d82857fbebd1111bb16588a4223bb24a8dcd07de, 329d97ec3de36a9e1bcadc0c7a53c1d92e8b481c) as well as tests for these features (PR #252, 07e7ed744209b0251217fa8f7f35d9b9875face2, 7068cfa10d993dcae3f5e3f76f8cafa99fa8b350) +- Add commit network as a new type of network. It uses commits as vertices and connects them either via cochange or commit interactions. This includes adding new configuration parameters and the function `add.vertex.attribute.commit.network` for adding vertex attributes to a commit network (PR #263, ab73271781e8e9a0715f784936df4b371d64c338, ab73271781e8e9a0715f784936df4b371d64c338, cd9a930fcb54ff465c2a5a7c43cfe82ac15c134d) - Add the possibility to split data time-based by multiple data sources (PR #261, 1088395f46b84028c8d7c463ca86b5dc38500c26, e1f79fc9e40cd6f41c946be42db364b2101cfe10, 0bb187fec0fd801d7634bf8d5180525770f6ab0b, 371a97ac6ebf3de4fe9360dea79d62e2ed3ef585) -- Add tests for uncovered functionality in `util-misc.R` and `util-networks.R` (PR #264, ff30f3238b1bf2539280d0d055a5d925c197c271, af80551d0615a49b86e45ff596bd75941ee88f91) -- Add commit network as a new type of network. It uses commits as vertices and connects them either via cochange or commit interactions. This includes adding new config parameters and the function `add.vertex.attribute.commit.network` for adding vertex attributes to a commit network (PR #263, ab73271781e8e9a0715f784936df4b371d64c338, ab73271781e8e9a0715f784936df4b371d64c338, cd9a930fcb54ff465c2a5a7c43cfe82ac15c134d) - Add `remove.duplicate.edges` function that takes a network as input and conflates identical edges (PR #268, d9a4be417b340812b744f59398ba6460ba527e1c, 0c2f47c4fea6f5f2f582c0259f8cf23af985058a, c6e90dd9cb462232563f753f414da14a24b392a3) - Add `cumulative` as an argument to `construct.ranges` which enables the creation of cumulative ranges from given revisions (PR #268, a135f6bb6f83ccb03ae27c735c2700fccc1ee0c8, 8ec207f1e306ef6a641fb0205a9982fa89c7e0d9) - Add function `get.last.activity.data` to compute developers' last activities in a project, as well as function `add.vertex.attribute.author.last.activity` to add a developer's date of last activity as vertex attribute to a network, as well as helper functions `get.aggregated.activity.data` and `add.vertex.attribute.author.aggregated.activity` to allow for other activity aggregations than first and last activity (PR #275, 9f231612fcd33a283362c79b35a94295ff3d4ef9, 8660ed763ba4b69e909e7fbb01e27e1999522047) - Add four new metrics that can be used for the classification of authors into core and peripheral: betweenness, closeness, pagerank, and eccentricity (PR #276, e27acb5ad7b4c4532e787d793301d06f13e8bbba, 217880809a07f970b055bd3b84c05cb6e61ac6af) +- Add helper function for prefixing function names with file names in `util-read.R` (PR #252, f8ea987b138173cf0509c7910e0572d8ee1b3f1f) +- Add line-based code coverage reports into CI pipeline. Coverage reports are generated by `coverage.R` (PR #262, 10cac49d005e87c3964cc61711e7f5acef749626, b3b9f4ac7a9911bd00293c68fac88e0f9033bdfb, c815d18dc6266d620a7a145493417b87ac08679e, e8093525fdaf46e54f2f7fcc6358ca7892e795e5, 32d04823e2007c63d2a43ce59bea3057327c19a7) +- Add tests for uncovered functionality in `util-misc.R` and `util-networks.R` (PR #264, ff30f3238b1bf2539280d0d055a5d925c197c271, af80551d0615a49b86e45ff596bd75941ee88f91) ### Changed/Improved -- **Breaking Change**: Change the default representation of edge attributes from vectors to lists. This change is necessary for the interplay of coronet networks with certain `igraph` functionality since igraph version 2.1.0 (PR #274, 1c35d1fa2548deb297dbfa5e2b07fce31962c5b7, eda30b838369ec46376812298a3ea8159eec5789, 0c6b2eba79b37f8ef2af7ffc41d86f1f307581bf, 44c7b72e3234cb332bb2713fb408c124e67255d9, 7303eabef6a78198575fe5bdfc02813fde3d3974, 0c27012641d24e19e5fa037406b480034c93f1aa) +- **Breaking Change**: Change the default representation of edge attributes from vectors to lists. This change is necessary for the interplay of `coronet` networks with certain `igraph` functionality since igraph version 2.1.0 (PR #274, 1c35d1fa2548deb297dbfa5e2b07fce31962c5b7, eda30b838369ec46376812298a3ea8159eec5789, 0c6b2eba79b37f8ef2af7ffc41d86f1f307581bf, 44c7b72e3234cb332bb2713fb408c124e67255d9, 7303eabef6a78198575fe5bdfc02813fde3d3974, 0c27012641d24e19e5fa037406b480034c93f1aa) - Change the default value for the `issues.from.source` configuration parameter. Instead of reading JIRA and GitHub issues together, which was the previous default, the new default value causes only GitHub issue data to be read. To restore the previous default behavior and read data from both issue sources, this now needs to be manually configured when needed. (PR #264, 5ff83c364f6bfc1e6ff95e9c5f1087e031c48a5d, 8c8080cb9caf115f19d9f145ad6e6c108b131a67, 8bcbc81db521877908d2e5c2989082ed672f2a3b) - Replace deprecated `igraph` functions by their preferred alternatives (PR #264, PR #268, PR #274, PR #279, 0df9d5bf6bafbb5d440f4c47db4ec901cf11f037, 7ac840d287a862eff61b1a84e194a4cba399f9e5, e3617b8c6b21fb4242c1d392124813501069ca84, 4b0d5221dd56bb3c9ddf196f67719d4f503d9b61, f29662b2c11768cc01eb0f86d32e039099c618ae) - Remove deprecated parameter of `ggplot2::discrete_scale` (PR #279, 027ce79cdc85a81a5b386bf925f3545632426433) @@ -33,17 +33,17 @@ - Explicitly add R version 4.4 to the CI test pipeline (c8e6f45111e487fadbe7f0a13c7595eb23f3af6e) - Refactor function `construct.edge.list.from.key.value.list` to be more readable (PR #263, 05c3bc09cb1d396fd59c34a88030cdca58fd04dd) - Update necessary `igraph` version to 2.1.0 in `README.md` (PR #274, 6c3bcd1a2366d0d3a176d9fde95b8356b0158da3) -- Update necessary `ggplot2` version to 3.5.0 in `README.md' (PR #279, 027ce79cdc85a81a5b386bf925f3545632426433) -- Include core/peripheral classification in the `README.md` (PR #276, 6101e11f5c4ac1b5883e85cebd01a3cd7c76e056, c6744c00d3dc0d4e45a96c2d80ae78727e22cce2, 5fc2da5ece6604a6a87d8dd5f79237a82fb2b5ca) +- Update necessary `ggplot2` version to 3.5.0 in `README.md` (PR #279, 027ce79cdc85a81a5b386bf925f3545632426433) +- Include core/peripheral classification in`README.md` (PR #276, 6101e11f5c4ac1b5883e85cebd01a3cd7c76e056, c6744c00d3dc0d4e45a96c2d80ae78727e22cce2, 5fc2da5ece6604a6a87d8dd5f79237a82fb2b5ca) ### Fixed - Fix the creation of edgelists for issue-based artifact-networks by correctly iterating over the issue data (PR #264, 321d85043112971c04998249c14a0677a32c9004) -- Fix a bug in `extract.timestamps` that occurs when the first `data.source` contains empty data and that leads to a return value of type numeric which should be POSIXct (PR #270, 10696e4cf4ae92371917ed8ccaec2b0183da145c, 646c01a42ad8decfbc9040030e790e51cb65cffd) -- Fix `read.commit.interactions` by explicitly considering non-existent commit interactions (PR #274, f591528a0f1f11b1a4390949ab770f3f74a766f9) - Add range information to network-splits when splitting a network using `split.network.time.based.by.ranges`. This effect also propagates into `split.networks.time.based` (PR #274, 87911ade231c44b93be194a1d6734f7de043a4af) +- Fix a bug in `extract.timestamps` that occurs when the first `data.source` contains empty data and that leads to a return value of type numeric which should be POSIXct (PR #270, 10696e4cf4ae92371917ed8ccaec2b0183da145c, 646c01a42ad8decfbc9040030e790e51cb65cffd) - Adjust `metrics.scale.freeness` and `metrics.is.scale.free` functions to be compatible with both older and newer igraph versions (PR #274, 4b0d5221dd56bb3c9ddf196f67719d4f503d9b61) + ## 4.4 ### Announcement