Skip to content

Commit

Permalink
fix bug in IV est when all exo vars are removed, fixes: #371
Browse files Browse the repository at this point in the history
  • Loading branch information
lrberge committed Feb 14, 2024
1 parent db63f13 commit 4e7d0b6
Show file tree
Hide file tree
Showing 4 changed files with 50 additions and 24 deletions.
8 changes: 5 additions & 3 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@

- **the demeaning algorithm has been reworked!** The code has been condensed and four new parameters have been introduced to control the details of how the algorithm works. The new default values should lead to quicker convergence in general (for difficult cases). The new function `demeaning_algo` gives the user fine control over the internal parameters of the algorithm.

- all estimation functions gain the argument `data.save`. If `TRUE`, the data set used for the estimation is saved in the returned object. This ensures the consistency of post-processing (like fit statistics, predict, etc) even if the original data has been modified in the meantime. Suggestion by Vincent Arel-Bundock, #340
- all estimation functions gain the argument `data.save`. If `TRUE`, the data set used for the estimation is saved in the returned object. This ensures the consistency of post-processing (like fit statistics, predict, update, etc) even if the original data has been modified in the meantime. Suggestion by Vincent Arel-Bundock, #340

- new function `fixest_data` to access the original data set used at estimation-time. Suggestion by Kyle Butts, #465

- new method `df.residual.fixest`. Thanks to @rferrali, #455
- new method `df.residual.fixest`. Suggestion by @rferrali, #455

- now the `vcov` method inherits the small sample correction and type of VCOV used in the original estimation. Thanks to @mgoplerud, #356
- now the `vcov` method inherits the small sample correction and type of VCOV used in the original estimation. Thanks to @mgoplerud, #356

## Bugs

Expand All @@ -34,6 +34,8 @@
- fix bug in TSLS estimations when the formula contained multiple LHS to be expanded with the dot square bracket operator. Reported by @svraka, #395

- fix bug leading to the absence of warning when the convergence of the demeaning algorithm failed. Thanks to @ja-ortiz-uniandes, #323

- fix bug in IV estimation when all exogenous variables were removed because of collinearity with the fixed-effects. Thanks to @Oravishayrizi, #371

## Improvements

Expand Down
44 changes: 27 additions & 17 deletions R/estimation.R
Original file line number Diff line number Diff line change
Expand Up @@ -1075,15 +1075,16 @@ feols = function(fml, data, vcov, weights, offset, subset, split, fsplit, split.
if(do_iv){

if(isFixef){

iv_products = cpp_iv_products(X = X_demean, y = y_demean,
Z = iv.mat_demean, u = iv_lhs_demean,
w = weights, nthreads = nthreads)
Z = iv.mat_demean, u = iv_lhs_demean,
w = weights, nthreads = nthreads)
} else {
if(!is.matrix(X_all)){
X_all = as.matrix(X_all)
}
iv_products = cpp_iv_products(X = X_all, y = my_lhs, Z = iv.mat,
u = iv_lhs, w = weights, nthreads = nthreads)
u = iv_lhs, w = weights, nthreads = nthreads)
}

} else {
Expand Down Expand Up @@ -1132,26 +1133,28 @@ feols = function(fml, data, vcov, weights, offset, subset, split, fsplit, split.
}

my_iv_products = list(XtX = XtX,
Xty = Xty,
ZXtZX = iv_products$ZXtZX[qui_iv, qui_iv, drop = FALSE],
ZXtu = lapply(iv_products$ZXtu, function(x) x[qui_iv]))
Xty = Xty,
ZXtZX = iv_products$ZXtZX[qui_iv, qui_iv, drop = FALSE],
ZXtu = lapply(iv_products$ZXtu, function(x) x[qui_iv]))

if(isFixef){
my_res = feols(env = current_env, iv_products = my_iv_products,
X_demean = X_demean[ , qui_X, drop = FALSE],
y_demean = y_demean[[ii]],
iv.mat_demean = iv.mat_demean, iv_lhs_demean = iv_lhs_demean)
X_demean = X_demean[ , qui_X, drop = FALSE],
y_demean = y_demean[[ii]],
iv.mat_demean = iv.mat_demean, iv_lhs_demean = iv_lhs_demean)
} else {
my_res = feols(env = current_env, iv_products = my_iv_products)
}

} else {
if(isFixef){
my_res = feols(env = current_env, xwx = xwx[qui_X, qui_X, drop = FALSE], xwy = xwy[[ii]][qui_X],
X_demean = X_demean[ , qui_X, drop = FALSE],
y_demean = y_demean[[ii]])
my_res = feols(env = current_env, xwx = xwx[qui_X, qui_X, drop = FALSE],
xwy = xwy[[ii]][qui_X],
X_demean = X_demean[ , qui_X, drop = FALSE],
y_demean = y_demean[[ii]])
} else {
my_res = feols(env = current_env, xwx = xwx[qui_X, qui_X, drop = FALSE], xwy = xwy[[ii]][qui_X])
my_res = feols(env = current_env, xwx = xwx[qui_X, qui_X, drop = FALSE],
xwy = xwy[[ii]][qui_X])
}
}

Expand Down Expand Up @@ -1223,7 +1226,7 @@ feols = function(fml, data, vcov, weights, offset, subset, split, fsplit, split.
iv_lhs_demean = dots$iv_lhs_demean

iv_products = dots$iv_products

} else {
# fixef information
fixef_sizes = get("fixef_sizes", env)
Expand Down Expand Up @@ -1266,6 +1269,13 @@ feols = function(fml, data, vcov, weights, offset, subset, split, fsplit, split.
u = iv_lhs_demean, w = weights, nthreads = nthreads)

}


only_0 = cpppar_check_only_0(X_demean, nthreads)
no_X_Fstat = FALSE
if(all(only_0 == 1)){
no_X_Fstat = TRUE
}

if(n_vars_X == 0){
ZX_demean = iv.mat_demean
Expand All @@ -1284,13 +1294,13 @@ feols = function(fml, data, vcov, weights, offset, subset, split, fsplit, split.

for(i in 1:n_endo){
current_env = reshape_env(env, lhs = iv_lhs[[i]], rhs = ZX, fml_iv_endo = iv_lhs_names[i])

my_res = feols(env = current_env, xwx = ZXtZX, xwy = ZXtu[[i]],
X_demean = ZX_demean, y_demean = iv_lhs_demean[[i]],
add_fitted_demean = TRUE, iv_call = TRUE, notes = FALSE)

# For the F-stats
if(n_vars_X == 0){
if(n_vars_X == 0 || no_X_Fstat){
my_res$ssr_no_inst = cpp_ssq(iv_lhs_demean[[i]], weights)
} else {
fit_no_inst = ols_fit(iv_lhs_demean[[i]], X_demean, w = weights, correct_0w = FALSE,
Expand Down Expand Up @@ -1353,7 +1363,7 @@ feols = function(fml, data, vcov, weights, offset, subset, split, fsplit, split.
resid_1st_stage = resid_s1, iv_call = TRUE, notes = FALSE)

# For the F-stats
if(n_vars_X == 0){
if(n_vars_X == 0 || no_X_Fstat){
res_second_stage$ssr_no_endo = cpp_ssq(y_demean, weights)
} else {
fit_no_endo = ols_fit(y_demean, X_demean, w = weights, correct_0w = FALSE,
Expand Down
16 changes: 12 additions & 4 deletions R/test_fun.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,12 @@ test = function(x, y, type = "=", tol = 1e-6){

mc = match.call()
IS_Y = TRUE
if(missing(type) && length(y) == 1 && is.character(y) && y %in% c("err", "=", "~")){
if(missing(type) && length(y) == 1 && is.character(y) && y %in% c("err", "=", "~", "warn")){
IS_Y = FALSE
type = y
}

type = switch(type, "error" = "err", type)
type = switch(type, "error" = "err", "warning" = "warn", type)

plural_core = function(PLURAL, type, s, verb = FALSE, past = FALSE){

Expand Down Expand Up @@ -118,8 +118,16 @@ test = function(x, y, type = "=", tol = 1e-6){
type = "~"
if(missing(tol)) tol = 1e-12
}

if(type == "err"){

if(type == "warn"){
# we expect an warning
m = tryCatch(x, warning = function(w) structure(conditionMessage(w), class = "try-warning"))
if(!"try-warning" %in% class(m)){
stop("Expected a warning that did not occur.")
} else if(IS_Y && !grepl(tolower(y), tolower(m), fixed = TRUE)){
stop("This is a warning, but the messages don't match:\nEXPECTED: \n", y, "\n..ACTUAL: \n", x)
}
} else if(type == "err"){
# we expect an error
m = tryCatch(x, error = function(e) structure(conditionMessage(e), class = "try-error"))
if(!"try-error" %in% class(m)){
Expand Down
6 changes: 6 additions & 0 deletions tests/fixest_tests.R
Original file line number Diff line number Diff line change
Expand Up @@ -892,6 +892,12 @@ if(Sys.info()["sysname"] == "Windows"){
test(se(sum_2nd), se(est_iv))
}

# check no bug when all exogenous vars are removed bc of collinearity
df = data.frame(x = rnorm(8), y = rnorm(8),
z = rnorm(8), fe = rep(0:1, each = 4))

est_iv = feols(y ~ fe | fe | x ~ z, df)
est_iv = feols(y ~ sw(fe, fe) | fe | x ~ z, df)

# check no bug
etable(summary(est_iv, stage = 1:2))
Expand Down

0 comments on commit 4e7d0b6

Please sign in to comment.