Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add functionality to read and merge gender data to author data #216

Merged
merged 29 commits into from
Dec 21, 2021
Merged
Changes from 1 commit
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
8868ff4
Add function 'read.gender'
Oct 26, 2021
bfbe4de
Add functions to merge gender data to author data
Oct 26, 2021
0a23862
Add gender related configuration attributes
Oct 26, 2021
a7744b5
Add a test and sample data for the 'read.gender'
Oct 26, 2021
1eeca29
Add new changes regarding gender data to NEWS.md
Oct 26, 2021
6a50fd1
Add a folder 'test_empty_gender' in the test data
Nov 5, 2021
53ef8cd
Place gender section below author section
Nov 24, 2021
d795dac
Remove unnecessary comment regarding gender file
Nov 24, 2021
faa5b34
Reorder functions in util-read.R
Nov 24, 2021
15edca6
Reorder attributes in the util-conf.R
Nov 24, 2021
2f8480b
Reorder data paths in util-conf.R
Nov 24, 2021
c332c91
Refactor read.gender function to reduce complexity
Nov 24, 2021
5c50742
Refactor functions related to gender in util-data
Nov 24, 2021
cf2cce8
Replace is.null with empty for gender data
Dec 7, 2021
cd99e5c
Add ".list" ending to the gender file
Dec 7, 2021
413e24c
Add "cleanup.gender.data" function
Dec 7, 2021
cbcd552
Reorder tests in "test-read.R" and fix typo
Dec 8, 2021
39db315
Add gender data to ProjectData comparison tests
Dec 9, 2021
85c3056
Add info about fixed errors to NEWS.md
Dec 9, 2021
bc30c40
Add gender data to the necessary section of README
Dec 9, 2021
25fb862
Fix failing test due to updated igraph calculation
hechtlC Nov 30, 2021
1b4072c
Fix filtering of the deleted user
hechtlC Nov 30, 2021
c3ada92
Add gender to necessary additional resource lists
Dec 13, 2021
1e4026d
Restrict gender labels by predefined lables
Dec 14, 2021
be56183
Update gender test because of predefined lables
Dec 14, 2021
4d631bd
Reorder functions in util-data.r
Dec 15, 2021
8769ccf
Remove rownames while reading gender data
Dec 19, 2021
50292cb
Edit information about gender data in README.md
Dec 19, 2021
17811be
Update broken commit hashes in NEWS.md
Dec 19, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
105 changes: 103 additions & 2 deletions util-data.R
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
## Copyright 2019-2020 by Anselm Fehnker <anselm@muenster.de>
## Copyright 2020-2021 by Niklas Schneider <s8nlschn@stud.uni-saarland.de>
## Copyright 2021 by Johannes Hostert <s8johost@stud.uni-saarland.de>
## Copyright 2021 by Mirabdulla Yusifli <s8miyusi@stud.uni-saarland.de>
## All Rights Reserved.


Expand Down Expand Up @@ -167,6 +168,7 @@ ProjectData = R6::R6Class("ProjectData",
pasta = create.empty.pasta.list(), # data.frame
pasta.mails = create.empty.pasta.list(), # data.frame
pasta.commits = create.empty.pasta.list(), # data.frame
gender = create.empty.gender.list(), # data.frame
miriyusifli marked this conversation as resolved.
Show resolved Hide resolved
## timestamps of mail, issue and commit data
data.timestamps = data.frame(start = numeric(0), end = numeric(0)), # data.frame

Expand Down Expand Up @@ -633,6 +635,28 @@ ProjectData = R6::R6Class("ProjectData",
logging::logdebug("update.pasta.data: finished.")
},

#' Update the gender related fields of: \code{authors}
#'
#' This method should be called whenever the field \code{gender} is changed.
update.gender.data = function() {
miriyusifli marked this conversation as resolved.
Show resolved Hide resolved
logging::logdebug("update.gender.data: starting.")

## update author data by attaching gender data
if (!is.null(private$authors)) {

## remove previous gender data
private$authors["gender"] = NULL

## merge gender data
private$authors = merge(private$authors, private$gender,
by = "author.name", all.x = TRUE, sort = FALSE)
miriyusifli marked this conversation as resolved.
Show resolved Hide resolved

}

logging::logdebug("update.gender.data: finished.")
},


## * * synchronicity data ------------------------------------------

#' Update the column \code{synchronicity} that is appended to commits using the currently available
Expand Down Expand Up @@ -790,6 +814,7 @@ ProjectData = R6::R6Class("ProjectData",
private$pasta.mails = create.empty.pasta.list()
private$pasta.commits = create.empty.pasta.list()
private$synchronicity = create.empty.synchronicity.list()
private$gender = create.empty.gender.list()
miriyusifli marked this conversation as resolved.
Show resolved Hide resolved
},

## * * configuration -----------------------------------------------
Expand Down Expand Up @@ -923,6 +948,14 @@ ProjectData = R6::R6Class("ProjectData",
return(data.path)
},

#' Get the absolute path to the result folder for gender data.
#'
#' @return the path to the gender data
get.data.path.gender = function() {
data.path = private$project.conf$get.value("datapath.gender")
return(data.path)
},

#' Get the absolute path to the range's result folder for synchronicity data.
#'
#' @return the path to the synchronicity files
Expand Down Expand Up @@ -1249,6 +1282,33 @@ ProjectData = R6::R6Class("ProjectData",
return(private$pasta)
},

#' Get the gender data. If it is not already stored in the ProjectData, this function triggers a read
#' from disk.
#'
#' @return the gender data
get.gender = function() {
logging::loginfo("Getting gender data.")

## if gender data are to be read, do this
if (private$project.conf$get.value("gender")) {

## if data are not read already, read them
if (is.null(private$gender)) {

## read gender data from disk
gender.data = read.gender(self$get.data.path.gender())
self$set.gender(gender.data)
}
} else {
logging::logwarn("You have not set the ProjectConf parameter 'gender' to 'TRUE'! Ignoring...")

## mark gender data as empty
self$set.gender(NULL)
}

return(private$gender)
},

miriyusifli marked this conversation as resolved.
Show resolved Hide resolved
#' Set the PaStA data to the given new data and,
#' if configured in the field \code{project.conf},
#' also update it for the mail and commit data.
Expand Down Expand Up @@ -1279,6 +1339,30 @@ ProjectData = R6::R6Class("ProjectData",
}
},


#' Set the gender data to the given new data and,
#' if configured in the field \code{project.conf},
#' also update it for the author data.
#'
#' @param data the new gender data
set.gender = function(data) {
logging::loginfo("Setting gender data.")

if (is.null(data)) {
data = create.empty.gender.list()
}

## set the actual data
private$gender = data

## add gender data to author data if configured
if (private$project.conf$get.value("gender")) {

## update all gender-related data
private$update.gender.data()

}
},
miriyusifli marked this conversation as resolved.
Show resolved Hide resolved
#' Remove lines in the PaStA data that contain message ids or commit hashes
#' that don't appear in the commit or mail data.
cleanup.pasta.data = function() {
Expand Down Expand Up @@ -1394,18 +1478,35 @@ ProjectData = R6::R6Class("ProjectData",

## if authors are not read already, do this
if (!self$is.data.source.cached("authors")) {
private$authors = read.authors(self$get.data.path())

# read gender data
author.data = read.authors(self$get.data.path());
miriyusifli marked this conversation as resolved.
Show resolved Hide resolved

# set author data and add gender data (if configured in the 'project.conf')
self$set.authors(author.data)
}

return(private$authors)
},

#' Set the atuhor data to the given new data.
#' Set the author data to the given new data.
#'
#' @param data the new author data
set.authors = function(data) {
logging::loginfo("Setting author data.")
private$authors = data

## add gender data if wanted
if (private$project.conf$get.value("gender")) {
if (is.null(private$gender)) {
## get data (no assignment because we just want to trigger anything gender-related)
self$get.gender()
} else {
## update all gender-related data
private$update.gender.data()
}
}

},

#' Filter bots from given data.
Expand Down