fix bug in IV est when all exo vars are removed, fixes: #371

lrberge · Feb 14, 2024 · 4e7d0b6 · 4e7d0b6
1 parent db63f13
commit 4e7d0b6
Show file tree

Hide file tree

Showing 4 changed files with 50 additions and 24 deletions.
diff --git a/NEWS.md b/NEWS.md
@@ -5,13 +5,13 @@
 
 - **the demeaning algorithm has been reworked!** The code has been condensed and four new parameters have been introduced to control the details of how the algorithm works. The new default values should lead to quicker convergence in general (for difficult cases). The new function `demeaning_algo` gives the user fine control over the internal parameters of the algorithm.
 
-- all estimation functions gain the argument `data.save`. If `TRUE`, the data set used for the estimation is saved in the returned object. This ensures the consistency of post-processing (like fit statistics, predict, etc) even if the original data has been modified in the meantime. Suggestion by Vincent Arel-Bundock, #340
+- all estimation functions gain the argument `data.save`. If `TRUE`, the data set used for the estimation is saved in the returned object. This ensures the consistency of post-processing (like fit statistics, predict, update, etc) even if the original data has been modified in the meantime. Suggestion by Vincent Arel-Bundock, #340
 
 - new function `fixest_data` to access the original data set used at estimation-time. Suggestion by Kyle Butts, #465
 
-- new method `df.residual.fixest`. Thanks to @rferrali, #455
+- new method `df.residual.fixest`. Suggestion by @rferrali, #455
 
-- now the `vcov` method inherits the small sample correction and type of VCOV used in the original estimation. Thanks to @mgoplerud, #356 
+- now the `vcov` method inherits the small sample correction and type of VCOV used in the original estimation. Thanks to @mgoplerud, #356
 
 ## Bugs
 
@@ -34,6 +34,8 @@
 - fix bug in TSLS estimations when the formula contained multiple LHS to be expanded with the dot square bracket operator. Reported by @svraka, #395
 
 - fix bug leading to the absence of warning when the convergence of the demeaning algorithm failed. Thanks to @ja-ortiz-uniandes, #323
+
+- fix bug in IV estimation when all exogenous variables were removed because of collinearity with the fixed-effects. Thanks to @Oravishayrizi, #371
 
 ## Improvements
 

diff --git a/R/estimation.R b/R/estimation.R
@@ -1075,15 +1075,16 @@ feols = function(fml, data, vcov, weights, offset, subset, split, fsplit, split.
         if(do_iv){
 
           if(isFixef){
+
             iv_products = cpp_iv_products(X = X_demean, y = y_demean,
-                            Z = iv.mat_demean, u = iv_lhs_demean,
-                            w = weights, nthreads = nthreads)
+                                          Z = iv.mat_demean, u = iv_lhs_demean,
+                                          w = weights, nthreads = nthreads)
           } else {
             if(!is.matrix(X_all)){
               X_all = as.matrix(X_all)
             }
             iv_products = cpp_iv_products(X = X_all, y = my_lhs, Z = iv.mat,
-                            u = iv_lhs, w = weights, nthreads = nthreads)
+                                          u = iv_lhs, w = weights, nthreads = nthreads)
           }
 
         } else {
@@ -1132,26 +1133,28 @@ feols = function(fml, data, vcov, weights, offset, subset, split, fsplit, split.
               }
 
               my_iv_products = list(XtX = XtX,
-                          Xty = Xty,
-                          ZXtZX = iv_products$ZXtZX[qui_iv, qui_iv, drop = FALSE],
-                          ZXtu = lapply(iv_products$ZXtu, function(x) x[qui_iv]))
+                                    Xty = Xty,
+                                    ZXtZX = iv_products$ZXtZX[qui_iv, qui_iv, drop = FALSE],
+                                    ZXtu = lapply(iv_products$ZXtu, function(x) x[qui_iv]))
 
               if(isFixef){
                 my_res = feols(env = current_env, iv_products = my_iv_products,
-                         X_demean = X_demean[ , qui_X, drop = FALSE],
-                         y_demean = y_demean[[ii]],
-                         iv.mat_demean = iv.mat_demean, iv_lhs_demean = iv_lhs_demean)
+                               X_demean = X_demean[ , qui_X, drop = FALSE],
+                               y_demean = y_demean[[ii]],
+                               iv.mat_demean = iv.mat_demean, iv_lhs_demean = iv_lhs_demean)
               } else {
                 my_res = feols(env = current_env, iv_products = my_iv_products)
               }
 
             } else {
               if(isFixef){
-                my_res = feols(env = current_env, xwx = xwx[qui_X, qui_X, drop = FALSE], xwy = xwy[[ii]][qui_X],
-                         X_demean = X_demean[ , qui_X, drop = FALSE],
-                         y_demean = y_demean[[ii]])
+                my_res = feols(env = current_env, xwx = xwx[qui_X, qui_X, drop = FALSE], 
+                               xwy = xwy[[ii]][qui_X],
+                               X_demean = X_demean[ , qui_X, drop = FALSE],
+                               y_demean = y_demean[[ii]])
               } else {
-                my_res = feols(env = current_env, xwx = xwx[qui_X, qui_X, drop = FALSE], xwy = xwy[[ii]][qui_X])
+                my_res = feols(env = current_env, xwx = xwx[qui_X, qui_X, drop = FALSE], 
+                               xwy = xwy[[ii]][qui_X])
               }
             }
 
@@ -1223,7 +1226,7 @@ feols = function(fml, data, vcov, weights, offset, subset, split, fsplit, split.
         iv_lhs_demean = dots$iv_lhs_demean
 
         iv_products = dots$iv_products
-
+        
       } else {
         # fixef information
         fixef_sizes = get("fixef_sizes", env)
@@ -1266,6 +1269,13 @@ feols = function(fml, data, vcov, weights, offset, subset, split, fsplit, split.
                                       u = iv_lhs_demean, w = weights, nthreads = nthreads)
 
       }
+
+
+      only_0 = cpppar_check_only_0(X_demean, nthreads)
+      no_X_Fstat = FALSE
+      if(all(only_0 == 1)){
+        no_X_Fstat = TRUE
+      }
 
       if(n_vars_X == 0){
         ZX_demean = iv.mat_demean
@@ -1284,13 +1294,13 @@ feols = function(fml, data, vcov, weights, offset, subset, split, fsplit, split.
 
       for(i in 1:n_endo){
         current_env = reshape_env(env, lhs = iv_lhs[[i]], rhs = ZX, fml_iv_endo = iv_lhs_names[i])
-
+        
         my_res = feols(env = current_env, xwx = ZXtZX, xwy = ZXtu[[i]],
                        X_demean = ZX_demean, y_demean = iv_lhs_demean[[i]],
                        add_fitted_demean = TRUE, iv_call = TRUE, notes = FALSE)
 
         # For the F-stats
-        if(n_vars_X == 0){
+        if(n_vars_X == 0 || no_X_Fstat){
           my_res$ssr_no_inst = cpp_ssq(iv_lhs_demean[[i]], weights)
         } else {
           fit_no_inst = ols_fit(iv_lhs_demean[[i]], X_demean, w = weights, correct_0w = FALSE,
@@ -1353,7 +1363,7 @@ feols = function(fml, data, vcov, weights, offset, subset, split, fsplit, split.
                                resid_1st_stage = resid_s1, iv_call = TRUE, notes = FALSE)
 
       # For the F-stats
-      if(n_vars_X == 0){
+      if(n_vars_X == 0 || no_X_Fstat){
         res_second_stage$ssr_no_endo = cpp_ssq(y_demean, weights)
       } else {
         fit_no_endo = ols_fit(y_demean, X_demean, w = weights, correct_0w = FALSE,

diff --git a/R/test_fun.R b/R/test_fun.R
@@ -24,12 +24,12 @@ test = function(x, y, type = "=", tol = 1e-6){
 
   mc = match.call()
   IS_Y = TRUE
-  if(missing(type) && length(y) == 1 && is.character(y) && y %in% c("err", "=", "~")){
+  if(missing(type) && length(y) == 1 && is.character(y) && y %in% c("err", "=", "~", "warn")){
     IS_Y = FALSE
     type = y
   }
 
-  type = switch(type, "error" = "err", type)
+  type = switch(type, "error" = "err", "warning" = "warn", type)
 
   plural_core = function(PLURAL, type, s, verb = FALSE, past = FALSE){
 
@@ -118,8 +118,16 @@ test = function(x, y, type = "=", tol = 1e-6){
     type = "~"
     if(missing(tol)) tol = 1e-12
   }
-
-  if(type == "err"){
+
+  if(type  == "warn"){
+    # we expect an warning
+    m = tryCatch(x, warning = function(w) structure(conditionMessage(w), class = "try-warning"))
+    if(!"try-warning" %in% class(m)){
+      stop("Expected a warning that did not occur.")
+    } else if(IS_Y && !grepl(tolower(y), tolower(m), fixed = TRUE)){
+      stop("This is a warning, but the messages don't match:\nEXPECTED: \n", y, "\n..ACTUAL: \n", x)
+    }
+  } else if(type == "err"){
     # we expect an error
     m = tryCatch(x, error = function(e) structure(conditionMessage(e), class = "try-error"))
     if(!"try-error" %in% class(m)){

diff --git a/tests/fixest_tests.R b/tests/fixest_tests.R
@@ -892,6 +892,12 @@ if(Sys.info()["sysname"] == "Windows"){
   test(se(sum_2nd), se(est_iv))
 }
 
+# check no bug when all exogenous vars are removed bc of collinearity
+df = data.frame(x = rnorm(8), y = rnorm(8),
+                z = rnorm(8), fe = rep(0:1, each = 4))
+
+est_iv = feols(y ~ fe | fe | x ~ z, df)
+est_iv = feols(y ~ sw(fe, fe) | fe | x ~ z, df)
 
 # check no bug
 etable(summary(est_iv, stage = 1:2))