Skip to content

Commit

Permalink
added grid search across all relevant parameters that might be affect…
Browse files Browse the repository at this point in the history
…ed by beta constraints
  • Loading branch information
Amy Wang committed Feb 26, 2015
1 parent a245b31 commit c481f40
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 42 deletions.
55 changes: 22 additions & 33 deletions R/tests/Utils/glmR.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,39 +16,28 @@ checkGLMModel <- function(myGLM.h2o, myGLM.r) {
checkEqualsNumeric(myGLM.h2o@model$null.deviance, myGLM.r$nulldev, tolerance = 1.5)
}

# Used to check glmnet models that have an extra intercept term
checkGLMModel2 <- function(myGLM.h2o, myGLM.r) {
coeff.mat = as.matrix(myGLM.r$beta)
numcol = ncol(coeff.mat)
coeff.R = c(coeff.mat[,numcol][1:length(coeff.mat)-1], Intercept = as.numeric(myGLM.r$a0[numcol]))
# print("H2O Coefficients")
# print(myGLM.h2o@model$coefficients)
# print("R Coefficients")
# print(coeff.R)

print("H2O NULL DEVIANCE and DEVIANCE")
print(myGLM.h2o@model$null.deviance)
print(myGLM.h2o@model$deviance)
print("GLMNET NULL DEVIANCE and DEVIANCE")
print(myGLM.r$nulldev)
print(deviance(myGLM.r))

### Functions to compare model deviance and coefficients
compare_deviance <- function(h2o_model, glmnet_model){
print(paste("Deviance in GLMnet Model : " , deviance(glmnet_model)))
print(paste("Deviance in H2O Model : " , h2o_model@model$deviance))
diff = deviance(glmnet_model) - h2o_model@model$deviance
if(diff < 2E-2) {
return("PASS")
} else {
return ("Deviance in H2O model doesn't match up to GLMnet!")
}
}


compare_coeff <- function(h2o_model, glmnet_model){
ncol = length(glmnet_model$a0)
h2o_coeff = h2o_model@model$coefficients
raw_glm_coeff = glmnet_model$beta[,ncol]
coeffNames = names(h2o_coeff)
fun <- function(coeffName) {
if(!coeffName=="Intercept"){
raw_glm_coeff[coeffName]
} else {
glmnet_model$a0[[ncol]]
}
}
glmnet_coeff = sapply(coeffNames, fun)
diff = abs(glmnet_coeff - h2o_coeff)
print(rbind(h2o_coeff, glmnet_coeff))
if(all(diff < 2E-2)) {
return("PASS")
} else {
return ("Coefficients in H2O model doesn't match up to GLMnet!")
}
print("SORTED COEFFS")
print("H2O Coefficients")
print(sort(myGLM.h2o@model$coefficients))
print("R Coefficients")
print(sort(coeff.R))
checkEqualsNumeric(myGLM.h2o@model$deviance, deviance(myGLM.r), tolerance = 0.5)
checkEqualsNumeric(sort(myGLM.h2o@model$coefficients), sort(coeff.R), tolerance = 1.5)
}
25 changes: 16 additions & 9 deletions R/tests/testdir_jira/runit_hex_2022_GLM_beta_constraints.R
Original file line number Diff line number Diff line change
Expand Up @@ -38,29 +38,36 @@ test.LR.betaConstraints <- function(conn) {
run_glm <- function( family_type = "gaussian",
alpha = 0.5,
standardization = T,
upper_bound = 1,
lower_bound = -1) {
Log.info(paste("Set Beta Constraints :", "upper bound =", upper_bound, "and lower bound =", lower_bound, "..."))
bounds = c(-1,1)
) {
upper_bound = bounds[2]
lower_bound = bounds[1]
Log.info(paste("Set Beta Constraints :", "lower bound =", lower_bound,"and upper bound =", upper_bound, "..."))
betaConstraints.hex = as.h2o(conn, betaConstraints, key = "betaConstraints.hex")
betaConstraints.hex$upper_bounds = upper_bound
betaConstraints.hex$lower_bounds = lower_bound

Log.info(paste("Run H2O's GLM with :", "family =", family_type, ", lower bound =", alpha, ", standardization =", standardization, "..."))
Log.info(paste("Run H2O's GLM with :", "family =", family_type, ", alpha =", alpha, ", standardization =", standardization, "..."))
glm_constraints.h2o = h2o.glm(x = myX, y = myY, data = prostate.hex, standardize = standardization,
family = family_type, alpha = alpha , beta_constraints = betaConstraints.hex)
lambda = glm_constraints.h2o@model$lambda

Log.info(paste("Run GLMnet with the same parameters, using lambda =", lambda))
glm_constraints.r = glmnet(x = as.matrix(xDataFrame), alpha = alpha, lambda = lambda,
glm_constraints.r = glmnet(x = as.matrix(xDataFrame), alpha = alpha, lambda = lambda, standardize = standardization,
y = prostate.csv[,myY], family = family_type, lower.limits = lower_bound, upper.limits = upper_bound)
compare_deviance(glm_constraints.h2o, glm_constraints.r)
compare_coeff(glm_constraints.h2o, glm_constraints.r)
checkGLMModel2(glm_constraints.h2o, glm_constraints.r)
}

families = c("gaussian", "binomial", "poisson")
familyTest <- sapply(families, function(family) run_glm(family_type = family))
print(familyTest)
alpha = c(0,0.5,1.0)
standard = c(T, F)

grid = expand.grid(families, alpha, standard)
names(grid) = c("Family", "Alpha", "Standardize")

fullTest <- mapply(run_glm, as.character(grid[,1]), grid[,2], grid[,3])
testResults <- cbind(grid,Passed = fullTest)
print(testResults)
testEnd()
}

Expand Down

0 comments on commit c481f40

Please sign in to comment.