Skip to content

Commit

Permalink
Add functions to merge gender data to author data
Browse files Browse the repository at this point in the history
Add the base functions 'get.data.path.gender', 'get.gender', 'set.gender',
and 'update.gender.data' to manage gender data

Signed-off-by: Mirabdulla Yusifli <s8miyusi@stud.uni-saarland.de>
  • Loading branch information
Mirabdulla Yusifli authored and Mirabdulla Yusifli committed Oct 26, 2021
1 parent 27bf762 commit 20a4e90
Showing 1 changed file with 103 additions and 2 deletions.
105 changes: 103 additions & 2 deletions util-data.R
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
## Copyright 2019-2020 by Anselm Fehnker <anselm@muenster.de>
## Copyright 2020-2021 by Niklas Schneider <s8nlschn@stud.uni-saarland.de>
## Copyright 2021 by Johannes Hostert <s8johost@stud.uni-saarland.de>
## Copyright 2021 by Mirabdulla Yusifli <s8miyusi@stud.uni-saarland.de>
## All Rights Reserved.


Expand Down Expand Up @@ -167,6 +168,7 @@ ProjectData = R6::R6Class("ProjectData",
pasta = create.empty.pasta.list(), # data.frame
pasta.mails = create.empty.pasta.list(), # data.frame
pasta.commits = create.empty.pasta.list(), # data.frame
gender = create.empty.gender.list(), # data.frame
## timestamps of mail, issue and commit data
data.timestamps = data.frame(start = numeric(0), end = numeric(0)), # data.frame

Expand Down Expand Up @@ -633,6 +635,28 @@ ProjectData = R6::R6Class("ProjectData",
logging::logdebug("update.pasta.data: finished.")
},

#' Update the gender related fields of: \code{authors}
#'
#' This method should be called whenever the field \code{gender} is changed.
update.gender.data = function() {
logging::logdebug("update.gender.data: starting.")

## update author data by attaching gender data
if (!is.null(private$authors)) {

## remove previous gender data
private$authors["gender"] = NULL

## merge gender data
private$authors = merge(private$authors, private$gender,
by = "author.name", all.x = TRUE, sort = FALSE)

}

logging::logdebug("update.gender.data: finished.")
},


## * * synchronicity data ------------------------------------------

#' Update the column \code{synchronicity} that is appended to commits using the currently available
Expand Down Expand Up @@ -790,6 +814,7 @@ ProjectData = R6::R6Class("ProjectData",
private$pasta.mails = create.empty.pasta.list()
private$pasta.commits = create.empty.pasta.list()
private$synchronicity = create.empty.synchronicity.list()
private$gender = create.empty.gender.list()
},

## * * configuration -----------------------------------------------
Expand Down Expand Up @@ -923,6 +948,14 @@ ProjectData = R6::R6Class("ProjectData",
return(data.path)
},

#' Get the absolute path to the result folder for gender data.
#'
#' @return the path to the gender data
get.data.path.gender = function() {
data.path = private$project.conf$get.value("datapath.gender")
return(data.path)
},

#' Get the absolute path to the range's result folder for synchronicity data.
#'
#' @return the path to the synchronicity files
Expand Down Expand Up @@ -1249,6 +1282,33 @@ ProjectData = R6::R6Class("ProjectData",
return(private$pasta)
},

#' Get the gender data. If it is not already stored in the ProjectData, this function triggers a read
#' from disk.
#'
#' @return the gender data
get.gender = function() {
logging::loginfo("Getting gender data.")

## if gender data are to be read, do this
if (private$project.conf$get.value("gender")) {

## if data are not read already, read them
if (is.null(private$gender)) {

## read gender data from disk
gender.data = read.gender(self$get.data.path.gender())
self$set.gender(gender.data)
}
} else {
logging::logwarn("You have not set the ProjectConf parameter 'gender' to 'TRUE'! Ignoring...")

## mark gender data as empty
self$set.gender(NULL)
}

return(private$gender)
},

#' Set the PaStA data to the given new data and,
#' if configured in the field \code{project.conf},
#' also update it for the mail and commit data.
Expand Down Expand Up @@ -1279,6 +1339,30 @@ ProjectData = R6::R6Class("ProjectData",
}
},


#' Set the gender data to the given new data and,
#' if configured in the field \code{project.conf},
#' also update it for the author data.
#'
#' @param data the new gender data
set.gender = function(data) {
logging::loginfo("Setting gender data.")

if (is.null(data)) {
data = create.empty.gender.list()
}

## set the actual data
private$gender = data

## add gender data to author data if configured
if (private$project.conf$get.value("gender")) {

## update all gender-related data
private$update.gender.data()

}
},
#' Remove lines in the PaStA data that contain message ids or commit hashes
#' that don't appear in the commit or mail data.
cleanup.pasta.data = function() {
Expand Down Expand Up @@ -1394,18 +1478,35 @@ ProjectData = R6::R6Class("ProjectData",

## if authors are not read already, do this
if (!self$is.data.source.cached("authors")) {
private$authors = read.authors(self$get.data.path())

# read gender data
author.data = read.authors(self$get.data.path());

# set author data and add gender data (if configured in the 'project.conf')
self$set.authors(author.data)
}

return(private$authors)
},

#' Set the atuhor data to the given new data.
#' Set the author data to the given new data.
#'
#' @param data the new author data
set.authors = function(data) {
logging::loginfo("Setting author data.")
private$authors = data

## add gender data if wanted
if (private$project.conf$get.value("gender")) {
if (is.null(private$gender)) {
## get data (no assignment because we just want to trigger anything gender-related)
self$get.gender()
} else {
## update all gender-related data
private$update.gender.data()
}
}

},

#' Filter bots from given data.
Expand Down

0 comments on commit 20a4e90

Please sign in to comment.