Skip to content

Commit

Permalink
clean up and extra unit tests
Browse files Browse the repository at this point in the history
  • Loading branch information
jessekps committed Jul 3, 2024
1 parent 5a784f6 commit 616cb99
Show file tree
Hide file tree
Showing 25 changed files with 266 additions and 189 deletions.
1 change: 0 additions & 1 deletion .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,3 @@
^LICENSE$
^CRAN-SUBMISSION$
^smoke_testing
aaa.R$
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,10 @@
*.o
*.so
*.dll
*.gcda
*.gcno
/revdep/.cache.rds
tmp
dexter.Rproj
docs
smoke_testing
6 changes: 4 additions & 2 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
# dexter 1.5.0

* bugfix: previous dexter versions could place the wrong domain name with the profile categories if not all booklets had the same domains
* bugfix in `profiles()`: correct domain names when different booklets don't have the same domains

* lower autocorrelation for `fit_enorm(..., method='Bayes')`
* improvements leading to lower autocorrelation for `fit_enorm(..., method='Bayes')`

* `plausible_scores` can now return scores per item to generate complete partially imputed datasets


# dexter 1.4.2
Expand Down
3 changes: 0 additions & 3 deletions R/data_selection.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
# TO DO: rlang has an is_reference method, don't have to use home baked one in cpp
# test and use


#' Selecting data
#'
Expand Down
55 changes: 30 additions & 25 deletions R/dif.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,19 @@
PairDIF_ = function(beta1, beta2, acov.beta1, acov.beta2)
{
labs = rownames(beta1)
DR = kronecker(beta2,t(beta2),FUN="-")-kronecker(beta1,t(beta1),FUN="-")
beta1=drop(beta1);beta2=drop(beta2)
DR = outer(beta2,beta2,FUN='-') - outer(beta1,beta1,FUN='-')
var1 = diag(acov.beta1)
var2 = diag(acov.beta2)
S = (kronecker(var1,t(var1),FUN="+")-2*acov.beta1)+(kronecker(var2,t(var2),FUN="+")-2*acov.beta2)
S = outer(var1,var1,FUN='+') - 2*acov.beta1 + outer(var2,var2,FUN='+') -2*acov.beta2
diag(S) = 1
D = DR/sqrt(S)
colnames(D) = labs; rownames(D)=labs
colnames(DR) = labs; rownames(DR)=labs
colnames(D) = rownames(D) = labs
colnames(DR) = rownames(DR) = labs
return(list(D=D, Delta_R=DR))
}


## produces a statistics for overall-DIF
# beta1 and beta1 are both mean centered and do not contain the zero category.
# In general, this works if both sets of parameters have the same normalization.
Expand Down Expand Up @@ -177,8 +179,12 @@ print.DIF_stats <- function(x, ...)
#' The statistics are standard normal deviates and colored to distinguish significant from non-significant values.
#' If there is no DIF, a proportion alpha off the cells will be colored significant by chance alone.
#'
# experimental, currenlty testing, can use some more finetuning
plot.DIF_stats = function(x, items = NULL, itemsX = items, itemsY = items, alpha =.05,...)
{
oldpar = par(no.readonly = TRUE)
on.exit({par(oldpar)},add=TRUE)

if(is.null(itemsX)) itemsX = sort(unique(x$items$item_id))
if(is.null(itemsY)) itemsY = sort(unique(x$items$item_id))

Expand Down Expand Up @@ -215,14 +221,7 @@ plot.DIF_stats = function(x, items = NULL, itemsX = items, itemsY = items, alpha
}

max_ = max(x$DIF_pair)
default.args = list(main = paste(x$group_labels[1],'vs.',x$group_labels[2]),
axes=FALSE, zlim=c(0,max_),xlab='',ylab='',useRaster=TRUE)

oldpar = par(no.readonly = TRUE)
on.exit({par(oldpar)},add=TRUE)

graphics::layout(matrix(c(1,1,2,0),2,2), widths=c(7,1))


qn = qnorm(1-alpha/2)

breaks = seq(0, qn, .05)
Expand All @@ -237,11 +236,15 @@ plot.DIF_stats = function(x, items = NULL, itemsX = items, itemsY = items, alpha

# Reverse Y axis
# yLabels <- rev(yLabels)
xLabels <- rev(xLabels)
DIF_pair <- DIF_pair[nrow(DIF_pair) : 1,]
xLabels = rev(xLabels)
DIF_pair = DIF_pair[nrow(DIF_pair) : 1,]

mgp = par('mgp')
mgp[2] = .6 * mgp[2]
par(plt = oldpar$plt-c(0,.08,0,0), mgp=mgp, tck=coalesce(par('tck'),-.01))

# Data Map
par(mar = c(6,8,2.5,2))
default.args = list(main = paste(x$group_labels[1],'vs.',x$group_labels[2]),
axes=FALSE, zlim=c(0,max_),xlab='',ylab='',useRaster=TRUE)

user.args = list(...)
do.call(image,
Expand All @@ -250,23 +253,25 @@ plot.DIF_stats = function(x, items = NULL, itemsX = items, itemsY = items, alpha
col=col,breaks=breaks),
default = default.args))

cex.axis = coalesce(user.args$cex.axis, 0.6)
cex.axis = coalesce(user.args$cex.axis, 0.5)
axis(1, at=1:length(yLabels), labels=yLabels, las=3, cex.axis=cex.axis, hadj=1,padj=0.5)
axis(2, at=1:length(xLabels), labels=xLabels, las=1, cex.axis=cex.axis, hadj=1,padj=0.5)

#Color Scale
try({
par(mar=c(2,2,2.5,2))
image(1, seq(0, max(qn,min(20,max_)), by=.05),

par(new = TRUE, pty = "m", plt = c(.93,.97,.5,oldpar$plt[4]), err = -1,tck=NA)


image(1, seq(0, max(qn,min(20,max_)), by=.05),
matrix(seq(0, max(qn,min(20,max_)), by=.05),nrow=1),
col=col,
breaks=breaks,
xlab="",ylab="",
xaxt="n", axes=FALSE)
axis(2, at=0:min(20,max_),lwd=0,lwd.ticks=1,las=2,cex.axis=0.8)
}, silent=TRUE)
xaxt="n",yaxt="n", axes=FALSE)

axis(2, at=0:min(20,max_),lwd=0,lwd.ticks=1,las=2,cex.axis=coalesce(user.args$cex.axis, 0.5) * 1.4)


graphics::layout(1)
invisible(NULL)
}

2 changes: 0 additions & 2 deletions R/fit_enorm.R
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,6 @@ fit_enorm_ = function(dataSrc, qtpredicate = NULL, fixed_params = NULL, method=c
} else
{
# transform the fixed params to the b parametrization dexter uses internally
# TO DO: check that it is ungrouped
fixed_params = transform.df.parms(fixed_params, out.format = 'b')
}

Expand Down Expand Up @@ -551,7 +550,6 @@ calibrate_Bayes = function(ss, nIter, fixed_params=NULL,
} else
{
# different but slightly underdispersed start locations
# to do: check this again, 0 madness
sample_beta = rmvnorm(nchains,cml$beta, cml$acov.beta/2)

b = apply(sample_beta,1,function(beta){ beta2b(ss$ssIS$item_score,beta,ss$ssI$first, ss$ssI$last)})
Expand Down
2 changes: 1 addition & 1 deletion R/individual_differences.R
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ theta_score_distribution = function(b,a,first,last,scoretab)
## Get the score distribution of a booklet from fit_enorm
# based on a polynomial smoothing of the log-lambda's
# Currently only implemented for CML
# TO DO: Implement for Bayes.
# to~do: not yet for Bayes
# Check e.g., plot(0:48,log(lambda),col="green"); lines(0:48,log_l_pr)
# beta = as.numeric(qr$coefficients)[-1]
# n.obs is the exact observed score distributions if CML
Expand Down
5 changes: 1 addition & 4 deletions R/latent_cor.R
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,7 @@ latent_cor = function(dataSrc, item_property, predicate=NULL, nDraws=500, hpd=0.
{
stop("a matrix datasrc is not yet implemented for this function")
}
# to do: this is a bit tricky, we will often need to merge over persons, e.g. if booklets are administered
# per subject. But if the same person makes multiple tests for the same subject, this is not good.
# Still have to decide on a solution
# why not check which is the case?
# merge_within_persons might also be user set. But TRUE will be the most useful in nearly all cases

respData = get_resp_data(dataSrc, qtpredicate, env=env, extra_columns=item_property,
merge_within_persons=TRUE)
Expand Down
2 changes: 1 addition & 1 deletion R/misc.R
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ check_dataSrc = function(x)
if(is_db(x))
{
if(dbIsValid(x)) return(NULL)
stop_('your database connection is no longer valid, you need to reconnect. see: ?open_project for details')
stop_('your database connection is no longer valid, you need to reconnect. See: ?open_project for details')
}


Expand Down
5 changes: 0 additions & 5 deletions R/misc_anon.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,6 @@
# @param setA, setB: two mutually exclusive subsets of items as indexes in first/last
# @return a score-by-score matrix of probabilities:
# P(X^A_+=s_a, X^B_+=s_b|X_+=s) where s=s_a+s_b


#to do: this has probably changed, does it rely on this?
# @details NA's indicate that a total scores was not possible given the weights

# if cIM is not null, the interaction model will be used
# cIM should be per score
SSTable = function(b, a, first, last,setA, setB, cIM_score=NULL)
Expand Down
26 changes: 22 additions & 4 deletions R/plausible_scores.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
#' @param items vector of item_id's, this specifies the itemset to generate the testscores for. If \code{items} is \code{NULL}
#' all items occurring in \code{dataSrc} are used.
#' @param covariates name or a vector of names of the variables to group the population, used to update the prior.
#' A covariate must be a discrete person covariate (e.g. not a float) that indicates nominal categories, e.g. gender or school
#' A covariate must be a discrete person covariate that indicates nominal categories, e.g. gender or school
#' If dataSrc is a data.frame, it must contain the covariate.
#' @param keep.observed If responses to one or more of the items have been observed,
#' the user can choose to keep these observations or generate new ones.
Expand Down Expand Up @@ -104,9 +104,19 @@ plausible_scores = function(dataSrc, parms=NULL, predicate=NULL, items=NULL, par
bstep = 0L
}


if(by_item)
{
if(merge_within_persons)
{
pp = tibble(old_person_id=pv$person_id,booklet_id=pv$booklet_id, person_id=1:nrow(pv))
pv$person_id = pp$person_id

respData$x = respData$x |>
rename(old_person_id='person_id') |>
inner_join(pp,by=c('booklet_id','old_person_id')) |>
select(-'old_person_id')
}

pv = droplevels(pv)
if(is.unsorted(pv$person_id)) pv = arrange(pv,.data$person_id)

Expand Down Expand Up @@ -141,7 +151,7 @@ plausible_scores = function(dataSrc, parms=NULL, predicate=NULL, items=NULL, par
mutate(present=coalesce(.data$present,FALSE)) |>
arrange(.data$item_id) |>
mutate(cnt=cumsum(as.integer(!.data$present))*np) |>
filter(present)
filter(.data$present)

m = as.integer(m[unlist(w)])

Expand All @@ -153,11 +163,19 @@ plausible_scores = function(dataSrc, parms=NULL, predicate=NULL, items=NULL, par
x[[sprintf('PS%i',i)]][w] = m
}
}

if(!is.null(covariates))
pv = inner_join(select(pv,all_of(c('person_id',covariates))), x, by='person_id')
else
pv = x

if(merge_within_persons)
{
pv = inner_join(pv,pp,by='person_id') |>
select(-'person_id') |>
mutate(person_id='old_person_id')
}

} else
{
if(keep.observed && any(respData$design$item_id %in% items))
Expand Down Expand Up @@ -207,7 +225,7 @@ plausible_scores = function(dataSrc, parms=NULL, predicate=NULL, items=NULL, par
}
}
}

pv |>
select(-any_of('booklet_score')) |>
rename_with(gsub, pattern='^PV(?=\\d+$)',replacement='PS', perl=TRUE) |>
Expand Down
3 changes: 0 additions & 3 deletions R/plausible_values.R
Original file line number Diff line number Diff line change
Expand Up @@ -297,11 +297,8 @@ plausible_values_ = function(dataSrc, parms=NULL, qtpredicate=NULL, covariates=N

} else
{
# TO DO: ik vind dit onleesbaar: is de check nou dat alle parms er in moetne zitten of dat
# er geen scores zijn die neit door parms gedekt worden? Tweede toch?
if(inherits(parms,'data.frame'))
{
#parms = transform.df.parms(parms,'b')
pcheck = parms[,c('item_id','item_score')]
} else
{
Expand Down
1 change: 0 additions & 1 deletion R/resp_data.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@


# faster factor, does not handle or check for NA values
ffactor = function (x, levels=NULL, as_int=FALSE)
{
Expand Down
Loading

0 comments on commit 616cb99

Please sign in to comment.