Skip to content

Commit

Permalink
Add dataframe structure verification
Browse files Browse the repository at this point in the history
Add call to 'verify.data.frame.columns' to relevant setters and read functions

This works towards fixing #208.

Signed-off-by: Maximilian Löffler <s8maloef@stud.uni-saarland.de>
  • Loading branch information
maxloeffler committed Dec 24, 2022
1 parent d1d9a03 commit b7a9588
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 1 deletion.
29 changes: 29 additions & 0 deletions util-data.R
Original file line number Diff line number Diff line change
Expand Up @@ -1064,6 +1064,9 @@ ProjectData = R6::R6Class("ProjectData",

if (is.null(commit.data)) {
commit.data = create.empty.commits.list()
} else {
## check that dataframe is of correct shape
verify.data.frame.columns(commit.data, COMMITS.LIST.COLUMNS, COMMITS.LIST.DATA.TYPES)
}

## store commit data
Expand Down Expand Up @@ -1145,6 +1148,9 @@ ProjectData = R6::R6Class("ProjectData",

if (is.null(data)) {
data = create.empty.commit.message.list()
} else {
## check that dataframe is of correct shape
verify.data.frame.columns(data, COMMIT.MESSAGE.LIST.COLUMNS, COMMIT.MESSAGE.LIST.DATA.TYPES)
}

## set the actual data
Expand Down Expand Up @@ -1214,6 +1220,9 @@ ProjectData = R6::R6Class("ProjectData",

if (is.null(data)) {
data = create.empty.synchronicity.list()
} else {
## check that dataframe is of correct shape
verify.data.frame.columns(data, SYNCHRONICITY.LIST.COLUMNS, SYNCHRONICITY.LIST.DATA.TYPES)
}

## set the actual data
Expand Down Expand Up @@ -1287,6 +1296,9 @@ ProjectData = R6::R6Class("ProjectData",

if (is.null(data)) {
data = create.empty.pasta.list()
} else {
## check that dataframe is of correct shape
verify.data.frame.columns(data, PASTA.LIST.COLUMNS, PASTA.LIST.DATA.TYPES)
}

## set the actual data
Expand Down Expand Up @@ -1368,6 +1380,9 @@ ProjectData = R6::R6Class("ProjectData",

if (is.null(data)) {
data = create.empty.gender.list()
} else {
## check that dataframe is of correct shape
verify.data.frame.columns(data, GENDER.LIST.COLUMNS, GENDER.LIST.DATA.TYPES)
}

## set the actual data
Expand Down Expand Up @@ -1444,6 +1459,9 @@ ProjectData = R6::R6Class("ProjectData",

if (is.null(mail.data)) {
mail.data = create.empty.mails.list()
} else {
## check that dataframe is of correct shape
verify.data.frame.columns(mail.data, MAILS.LIST.COLUMNS, MAILS.LIST.DATA.TYPES)
}

## store mail data
Expand Down Expand Up @@ -1502,6 +1520,14 @@ ProjectData = R6::R6Class("ProjectData",
set.authors = function(data) {
logging::loginfo("Setting author data.")
private$authors = data

if (is.null(data)) {
data = create.empty.authors.list(
} else {
## check that dataframe is of correct shape
verify.data.frame.columns(data, AUTHORS.LIST.COLUMNS, AUTHORS.LIST.DATA.TYPES)
}

## add gender data if wanted
if (private$project.conf$get.value("gender")) {

Expand Down Expand Up @@ -1606,6 +1632,9 @@ ProjectData = R6::R6Class("ProjectData",

if (is.null(data)) {
data = create.empty.issues.list()
} else {
## check that dataframe is of correct shape
verify.data.frame.columns(data, ISSUES.LIST.COLUMNS, ISSUES.LIST.DATA.TYPES)
}

private$issues.unfiltered = data
Expand Down
30 changes: 29 additions & 1 deletion util-read.R
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,9 @@ read.commits = function(data.path, artifact) {
commit.data[["commit.id"]] = format.commit.ids(commit.data[["commit.id"]])
row.names(commit.data) = seq_len(nrow(commit.data))

## check that dataframe is of correct shape
verify.data.frame.columns(commit.data, COMMITS.LIST.COLUMNS, COMMITS.LIST.DATA.TYPES)

## store the commit data
logging::logdebug("read.commits: finished.")
return(commit.data)
Expand Down Expand Up @@ -264,6 +267,9 @@ read.mails = function(data.path) {
}
mail.data = remove.deleted.and.empty.user(mail.data) # filter deleted user

## check that dataframe is of correct shape
verify.data.frame.columns(mail.data, MAILS.LIST.COLUMNS, MAILS.LIST.DATA.TYPES)

## store the mail data
logging::logdebug("read.mails: finished.")
return(mail.data)
Expand Down Expand Up @@ -384,6 +390,9 @@ read.issues = function(data.path, issues.sources = c("jira", "github")) {
function(event) { digest::digest(event, algo="sha1", serialize = FALSE) }
)

## check that dataframe is of correct shape
verify.data.frame.columns(issue.data, ISSUES.LIST.COLUMNS, ISSUES.LIST.DATA.TYPES)

logging::logdebug("read.issues: finished.")
return(issue.data)
}
Expand Down Expand Up @@ -438,6 +447,10 @@ read.bot.info = function(data.path) {
## set column names for new data frame
colnames(bot.data) = BOT.LIST.COLUMNS
bot.data["is.bot"] = sapply(bot.data[["is.bot"]], function(x) switch(x, Bot = TRUE, Human = FALSE, NA))

## check that dataframe is of correct shape
verify.data.frame.columns(bot.data, BOT.LIST.COLUMNS)

logging::logdebug("read.bot.info: finished.")
return(bot.data)
}
Expand Down Expand Up @@ -499,6 +512,9 @@ read.authors = function(data.path) {
authors.df = authors.df[, AUTHORS.LIST.COLUMNS]
authors.df = remove.deleted.and.empty.user(authors.df)

## check that dataframe is of correct shape
verify.data.frame.columns(authors.df, AUTHORS.LIST.COLUMNS, AUTHORS.LIST.DATA.TYPES)

## store the ID--author mapping
logging::logdebug("read.authors: finished.")
return(authors.df)
Expand Down Expand Up @@ -583,6 +599,9 @@ read.gender = function(data.path) {
## remove rownames
rownames(gender.data) = NULL

## check that dataframe is of correct shape
verify.data.frame.columns(gender.data, GENDER.LIST.COLUMNS, GENDER.LIST.DATA.TYPES)

logging::logdebug("read.gender: finished.")
return(gender.data)

Expand Down Expand Up @@ -691,8 +710,10 @@ read.commit.messages = function(data.path) {
commit.message.data[["commit.id"]] = format.commit.ids(commit.message.data[["commit.id"]])
row.names(commit.message.data) = seq_len(nrow(commit.message.data))

logging::logdebug("read.commit.messages: finished.")
## check that dataframe is of correct shape
verify.data.frame.columns(commit.message.data, COMMIT.MESSAGE.LIST.COLUMNS, COMMIT.MESSAGE.LIST.DATA.TYPES)

logging::logdebug("read.commit.messages: finished.")
return(commit.message.data)
}

Expand Down Expand Up @@ -775,6 +796,10 @@ read.pasta = function(data.path) {
return(df)
})
result.df = plyr::rbind.fill(result.list)

## check that dataframe is of correct shape
verify.data.frame.columns(result.df, PASTA.LIST.COLUMNS, PASTA.LIST.DATA.TYPES)

logging::logdebug("read.pasta: finished.")
return(result.df)
}
Expand Down Expand Up @@ -838,6 +863,9 @@ read.synchronicity = function(data.path, artifact, time.window) {
## ensure proper column names
colnames(synchronicity) = SYNCHRONICITY.LIST.COLUMNS

## check that dataframe is of correct shape
verify.data.frame.columns(synchronicity, SYNCHRONICITY.LIST.COLUMNS, SYNCHRONICITY.LIST.DATA.TYPES)

## store the synchronicity data
logging::logdebug("read.synchronicity: finished.")
return(synchronicity)
Expand Down

0 comments on commit b7a9588

Please sign in to comment.