From 0c93945d18b160779771ed5629c2e8948de927dc Mon Sep 17 00:00:00 2001 From: WD Date: Mon, 19 Aug 2024 14:25:51 +0200 Subject: [PATCH] preventing negative variance from R fast --- DESCRIPTION | 2 +- R/EM.R | 2 + R/computational_functions.R | 138 +++++++++++++++++++----------------- 3 files changed, 77 insertions(+), 65 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 408e188..4344c9f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Encoding: UTF-8 Type: Package Package: fsusieR -Version: 0.2.77 +Version: 0.2.78 Date: 2024-07-11 Title: Sum of Single Functions Authors@R: person("William R. P.","Denault",role = c("aut","cre"), diff --git a/R/EM.R b/R/EM.R index 734c157..871fbc9 100644 --- a/R/EM.R +++ b/R/EM.R @@ -47,6 +47,8 @@ EM_pi <- function(G_prior,Bhat, Shat, indx_lst, #static parameters ## Deal with overfitted cases + + Shat[ is.na(Shat) ] <- 1e-32 #some rare case in overfitting and numerical limitation of Rfast Shat[ Shat<=0 ] <- 1e-32 lBF <- log_BF(G_prior, Bhat,Shat, diff --git a/R/computational_functions.R b/R/computational_functions.R index 25cfcd6..9f10ff3 100644 --- a/R/computational_functions.R +++ b/R/computational_functions.R @@ -74,73 +74,75 @@ cal_Bhat_Shat <- function(Y, lowc_wc=NULL, ind_analysis, ... ) { - - - - if(missing(ind_analysis)){ - - - d <- colSums(X^2) - Bhat <- (t(X)%*%Y )/d - + + + + if(missing(ind_analysis)){ + + + d <- colSums(X^2) + Bhat <- (t(X)%*%Y )/d + + Shat <- do.call( cbind, + lapply( 1:ncol(Bhat), + function(i) (Rfast::colVars( Y [,i] -sweep( X,2, Bhat[,i], "*"))) + ) + ) + + Shat<- sqrt( pmax(Shat, 1e-64)) + Shat <- Shat/sqrt(nrow(Y)) + + }else{ + if( is.list(ind_analysis) ){ #usefull for running multiple univariate regression with different problematic ind + + + Bhat <- do.call(cbind,lapply(1:length(ind_analysis), + function(l){ + d <- Rfast::colsums(X[ind_analysis[[l]], ]^2) + out <- (t(X[ind_analysis[[l]], ])%*%Y[ind_analysis[[l]], l])/d + return(out) + } + + + ) ) + + + Shat <- matrix(mapply(function(l,j) + (Rfast::cova(matrix(Y[ind_analysis[[l]],l] - X[ind_analysis[[l]], j] * Bhat[j,l])) /(length(ind_analysis[[l]])-1)), + l=rep(1:dim(Y)[2],each= ncol(X)), + j=rep(1:dim(X)[2], ncol(Y)) + ), + ncol=dim(Y)[2] + ) + Shat<- sqrt( pmax(Shat, 1e-64)) + + }else{ + d <- Rfast::colsums(X[ind_analysis , ]^2) + Bhat <- (t(X[ind_analysis , ])%*%Y[ind_analysis , ])/d + Shat <- do.call( cbind, lapply( 1:ncol(Bhat), - function(i) sqrt(Rfast::colVars( Y [,i] -sweep( X,2, Bhat[,i], "*"))) + function(i) (Rfast::colVars(Y [ind_analysis,i] -sweep( X[ind_analysis,],2, Bhat[ ,i], "*"))) ) ) - Shat <- Shat/sqrt(nrow(Y)) - - }else{ - if( is.list(ind_analysis) ){ #usefull for running multiple univariate regression with different problematic ind - - - Bhat <- do.call(cbind,lapply(1:length(ind_analysis), - function(l){ - d <- Rfast::colsums(X[ind_analysis[[l]], ]^2) - out <- (t(X[ind_analysis[[l]], ])%*%Y[ind_analysis[[l]], l])/d - return(out) - } - - - ) ) - - - Shat <- matrix(mapply(function(l,j) - sqrt(Rfast::cova(matrix(Y[ind_analysis[[l]],l] - X[ind_analysis[[l]], j] * Bhat[j,l])) /(length(ind_analysis[[l]])-1)), - l=rep(1:dim(Y)[2],each= ncol(X)), - j=rep(1:dim(X)[2], ncol(Y)) - ), - ncol=dim(Y)[2] - ) - - - }else{ - d <- Rfast::colsums(X[ind_analysis , ]^2) - Bhat <- (t(X[ind_analysis , ])%*%Y[ind_analysis , ])/d - - Shat <- do.call( cbind, - lapply( 1:ncol(Bhat), - function(i)sqrt(Rfast::colVars(Y [ind_analysis,i] -sweep( X[ind_analysis,],2, Bhat[ ,i], "*"))) - ) - ) - Shat <- Shat/sqrt(nrow(Y[ind_analysis,])) - - } - - } - - - - - if( !is.null(lowc_wc)){ - Bhat[,lowc_wc] <- 0 - Shat[,lowc_wc] <- 1 + Shat<- sqrt( pmax(Shat, 1e-64)) + Shat <- Shat/sqrt(nrow(Y[ind_analysis,])) + } - out <- list( Bhat = Bhat, - Shat = Shat) - - return(out) - + + } + + + + + if( !is.null(lowc_wc)){ + Bhat[,lowc_wc] <- 0 + Shat[,lowc_wc] <- 1 + } + out <- list( Bhat = Bhat, + Shat = Shat) + + return(out) } @@ -871,10 +873,18 @@ HMM_regression.susiF <- function( obj, } ) ) - - + + + + N <- nrow(X) sub_X <- data.frame (X[, idx]) + if(length(idx)> length(unique(idx))){ + + sub_X[,duplicated(idx)]<- 0* sub_X[,duplicated(idx)] + + } + fitted_trend <- list() fitted_lfsr <- list()