diff --git a/DESCRIPTION b/DESCRIPTION
index 2479f362..6155fe16 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -19,7 +19,7 @@ Authors@R:
              family = "Lenth",
              role = "ctb",
              email = "russell-lenth@uiowa.edu"))
-Imports: stats, graphics, grDevices, tools, utils, methods, numDeriv, nlme, sandwich, Rcpp(>= 1.0.5), dreamerr(>= 1.4.0), stringmagic(>= 1.1.2)
+Imports: stats, graphics, grDevices, tools, utils, methods, numDeriv, nlme, sandwich, Rcpp(>= 1.0.5), dreamerr(>= 1.4.0), stringmagic(>= 1.1.2), Matrix
 Suggests: knitr, rmarkdown, data.table, plm, MASS, pander, ggplot2, lfe, tinytex, pdftools, emmeans, estimability, AER
 LinkingTo: Rcpp
 Depends: R(>= 3.5.0)
diff --git a/NAMESPACE b/NAMESPACE
index 610057f2..baf1ff04 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -39,6 +39,9 @@ export(as.dict)
 # setters & getters
 exportPattern("^(s|g)etFixest")
 
+# sparse_model_matrix
+export(sparse_model_matrix)
+
 # coeftable and co
 export(coeftable, se, pvalue, tstat)
 S3method(coeftable, default)
diff --git a/NEWS.md b/NEWS.md
index 95003bcc..ba804165 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,6 +1,38 @@
 
 # fixest 0.12.1
 
+## New Features
+
+- Add the ability to create sparse model matrices from regression objects and formula in a memory-efficient way.
+
+```R
+est = feols(mpg ~ drat | cyl, mtcars)
+
+sparse_model_matrix(est, type = "lhs")
+#> 32 x 1 sparse Matrix of class "dgCMatrix"
+#>        mpg
+#>  [1,] 21.0
+#>  [2,] 21.0
+#>  [3,] 22.8
+
+sparse_model_matrix(est, type = c("rhs", "fixef"))
+#> 32 x 4 sparse Matrix of class "dgCMatrix"
+#>      drat cyl::4 cyl::6 cyl::8
+#> [1,] 3.90      .      1      .
+#> [2,] 3.90      .      1      .
+#> [3,] 3.85      1      .      .
+
+# Or you can pass a (linear) formula
+sparse_model_matrix(mpg ~ i(vs) | gear^cyl, data = mtcars, type = c("rhs", "fixef"))
+#> 32 x 10 sparse Matrix of class "dgCMatrix"
+#>   [[ suppressing 10 column names ‘vs::0’, ‘vs::1’, ‘gear^cyl::4_6’ ... ]]
+#>                          
+#>  [1,] 1 . 1 . . . . . . .
+#>  [2,] 1 . 1 . . . . . . .
+#>  [3,] . 1 . 1 . . . . . .
+```
+
+
 ## Major bugs affecting R versions <= 4.1.2
 
 - require `stringmagic` version >= 1.1.2 to fix several major bugs affecting R versions <= 4.1.2
diff --git a/R/miscfuns.R b/R/miscfuns.R
index 70604c0e..0b92cefe 100644
--- a/R/miscfuns.R
+++ b/R/miscfuns.R
@@ -1450,7 +1450,7 @@ i = function(factor_var, var, ref, keep, bin, ref2, keep2, bin2, ...){
     items_name = items
   }
 
-  if(FROM_FIXEST){
+  if(FROM_FIXEST || is_sparse){
     # Pour avoir des jolis noms c'est un vrai gloubiboulga,
     # mais j'ai pas trouve plus simple...
     if(IS_INTER_FACTOR){
@@ -1487,6 +1487,10 @@ i = function(factor_var, var, ref, keep, bin, ref2, keep2, bin2, ...){
     fe_colid = to_integer(fe_num[valid_row], sorted = TRUE)
 
     values = if(length(var) == 1) rep(1, length(valid_row)) else var
+
+    # Clean names
+    col_names = sub("^.*__CLEAN__", "", col_names)
+
     res = list(rowid = which(valid_row), values = values,
            colid = fe_colid, col_names = col_names)
 
diff --git a/R/sparse_model_matrix.R b/R/sparse_model_matrix.R
new file mode 100644
index 00000000..8538998c
--- /dev/null
+++ b/R/sparse_model_matrix.R
@@ -0,0 +1,708 @@
+#' Design matrix of a `fixest` object returned in sparse format
+#'
+#' This function creates the left-hand-side or the right-hand-side(s) of a [`femlm`], [`feols`] or [`feglm`] estimation.
+#'
+#' @inheritParams nobs.fixest
+#'
+#' @param data If missing (default) then the original data is obtained by evaluating the `call`. Otherwise, it should be a `data.frame`.
+#' @param type Character vector or one sided formula, default is "rhs". Contains the type of matrix/data.frame to be returned. Possible values are: "lhs", "rhs", "fixef", "iv.rhs1" (1st stage RHS), "iv.rhs2" (2nd stage RHS), "iv.endo" (endogenous vars.), "iv.exo" (exogenous vars), "iv.inst" (instruments).
+#' @param na.rm Default is `TRUE`. Should observations with NAs be removed from the matrix?
+#' @param collin.rm Logical scalar. Whether to remove variables that were found to be collinear during the estimation. Beware: it does not perform a collinearity check and bases on the `coef(object)`. Default is TRUE if object is a `fixest` object, or FALSE if object is a formula.
+#' @param combine Logical scalar, default is `TRUE`. Whether to combine each resulting sparse matrix 
+#' @param ... Not currently used.
+#'
+#' @return
+#' It returns either a single sparse matrix a list of matrices, depending whether `combine` is `TRUE` or `FALSE`. The sparse matrix is of class `dgCMatrix` from the `Matrix` package.
+#'
+#' @seealso
+#' See also the main estimation functions [`femlm`], [`feols`] or [`feglm`]. [`formula.fixest`], [`update.fixest`], [`summary.fixest`], [`vcov.fixest`].
+#'
+#'
+#' @author
+#' Laurent Berge, Kyle Butts
+#'
+#' @examples
+#'
+#' est = feols(wt ~ i(vs) + hp | cyl, mtcars)
+#' sparse_model_matrix(est)
+#' sparse_model_matrix(wt ~ i(vs) + hp | cyl, mtcars)
+#'
+#' @export
+sparse_model_matrix = function(object, data, type = "rhs", na.rm = TRUE,  collin.rm = NULL, combine = TRUE, ...) {
+    # We evaluate the formula with the past call
+    # type: lhs, rhs, fixef, iv.endo, iv.inst, iv.rhs1, iv.rhs2
+    # if fixef => return a DF
+
+    # Checking the arguments
+    if (is_user_level_call()) {
+        validate_dots(suggest_args = c("data", "type"))
+    }
+
+    # We allow type to be used in the location of data if data is missing
+    if (!missing(data) && missing(type)) {
+        sc = sys.call()
+        if (!"data" %in% names(sc)) {
+            if (!is.null(data) && (is.character(data) || "formula" %in% class(data))) {
+                # data is in fact the type
+                type = data
+                data = NULL
+            }
+        }
+    }
+
+    type = check_set_types(type, c("lhs", "rhs", "fixef", "iv.endo", "iv.inst", "iv.exo", "iv.rhs1", "iv.rhs2"))
+
+    if (isTRUE(object$is_fit)) {
+        stop("sparse_model_matrix method not available for fixest estimations obtained from fit methods.")
+    }
+
+    if (any(grepl("^iv", type)) && !isTRUE(object$iv)) {
+        stop("The type", enumerate_items(grep("^iv", type, value = TRUE), "s.is"), " only valid for IV estimations.")
+    }
+
+    check_arg(subset, "logical scalar | character vector no na")
+
+    if (missing(collin.rm)) {
+      collin.rm = if (inherits(object, "formula")) FALSE else TRUE
+    } else {
+      check_arg_plus(collin.rm, "logical scalar")
+    }
+
+    # Evaluation with the data
+    original_data = FALSE
+    if (missnull(data)) {
+        original_data = TRUE
+        data = fetch_data(object, "To apply 'sparse_model_matrix', ")
+    }
+
+    # control of the data
+    if (is.matrix(data)) {
+        if (is.null(colnames(data))) {
+            stop("If argument 'data' is to be a matrix, its columns must be named.")
+        }
+        data = as.data.frame(data)
+    }
+
+    if (!"data.frame" %in% class(data)) {
+        stop("The argument 'data' must be a data.frame or a matrix.")
+    }
+
+
+    # Allows passage of formula to sparse_model_matrix. A bit inefficient, but it works.
+    isFormula = FALSE
+    split_fml = NULL
+    if (inherits(object, "formula")) {
+      split_fml = fml_split_internal(object)
+      if (isTRUE(collin.rm)) {
+        message("Formula passed to sparse_model_matrix with collin.rm == TRUE. Estimating feols with formula.")
+        object = feols(object, data = data)
+      } else if (length(split_fml) == 3) {
+        message("Formula passed to sparse_model_matrix with an iv formula. Estimating feols with formula.")
+        object = feols(object, data = data)
+      } else {
+        isFormula = TRUE
+      }
+    }
+
+    # na.rm = FALSE doesn't work with type = "fixef" (which FE col gets NA?)
+    if (("fixef" %in% type & !na.rm)) {
+        # na.rm = TRUE
+        message("na.rm = FALSE doesn't work with type = 'fixef'. It has been set to TRUE.")
+    }
+
+
+    # Panel setup
+    if (!isFormula) {
+      panel__meta__info = set_panel_meta_info(object, data)
+    }
+
+    # The formulas
+    if (isFormula) {
+      fml_linear = split_fml[[1]]
+
+      fml_0 = attr(stats::terms(fml_linear), "intercept") == 0
+      fake_intercept = !is.null(split_fml[[2]]) | fml_0
+
+    } else {
+      fml_linear = formula(object, type = "linear")
+
+      # we kick out the intercept if there is presence of fixed-effects
+      fml_0 = attr(stats::terms(fml_linear), "intercept") == 0
+      fake_intercept = !is.null(object$fixef_vars) && !(!is.null(object$slope_flag) && all(object$slope_flag < 0)) 
+      fake_intercept = fake_intercept | fml_0
+    }
+
+    res = list()
+
+    if ("lhs" %in% type) {
+        lhs_text = deparse_long(fml_linear[[2]])
+        lhs = eval(fml_linear[[2]], data) 
+        lhs = Matrix::Matrix(lhs, sparse = TRUE, ncol = 1)
+
+        colnames(lhs) = lhs_text
+
+        res[["lhs"]] = lhs
+    }
+
+    if ("rhs" %in% type && !isTRUE(object$onlyFixef)) {
+
+        fml = fml_linear
+
+        if (isFormula && (length(split_fml) == 3)) {
+          fml_iv = split_fml[[3]]
+          fml = .xpd(..lhs ~ ..endo + ..rhs, ..lhs = fml[[2]], ..endo = fml_iv[[2]], ..rhs = fml[[3]])
+        } else if (isTRUE(object$iv)) {
+          fml_iv = object$fml_all$iv
+          fml = .xpd(..lhs ~ ..endo + ..rhs, ..lhs = fml[[2]], ..endo = fml_iv[[2]], ..rhs = fml[[3]])
+        }
+
+        vars = attr(stats::terms(fml), "term.labels")
+
+        linear.mat = vars_to_sparse_mat(vars = vars, data = data, object = object, collin.rm = collin.rm, type = "rhs", add_intercept = !fake_intercept)
+
+        res[["rhs"]] = linear.mat
+    }
+
+    if ("fixef" %in% type) {
+
+        if (isFormula && (length(split_fml) < 2)) {
+          mat_FE = NULL
+        } else if (!isFormula & length(object$fixef_id) == 0) {
+          mat_FE = NULL
+        } else { 
+
+            if (isFormula) {
+              fixef_fml = .xpd(~ ..fe, ..fe = split_fml[[2]])
+            } else {
+              fixef_fml = object$fml_all$fixef
+            }
+
+            fixef_terms_full = fixef_terms(fixef_fml)
+            fe_vars = fixef_terms_full$fe_vars
+            slope_var_list = fixef_terms_full$slope_vars_list
+
+            fixef_df = prepare_df(fe_vars, data, fastCombine = FALSE)
+
+            # Check for slope vars
+            if (any(fixef_terms_full$slope_flag > 0)) {
+              slope_df = prepare_df(unlist(slope_var_list), data)
+            }
+
+            cols_lengths = c(0)
+            total_cols = 0
+            n_FE = 0
+            nrows = nrow(data)
+            for (i in seq_along(fe_vars)) {
+
+              fe_var = fe_vars[i]
+              slope_vars = slope_var_list[[i]]
+              n_slope_vars = if (is.null(slope_vars)) 0 else length(slope_vars)
+
+              fe = fixef_df[[fe_var]]
+              unique_fe = unique(fe)
+              n_cols = length(unique_fe[!is.na(unique_fe)])
+
+              total_cols = total_cols + n_cols * (1 + n_slope_vars)
+              cols_lengths = c(cols_lengths, rep(n_cols, 1 + n_slope_vars))
+              n_FE = n_FE + 1 + n_slope_vars
+            }
+            running_cols = cumsum(cols_lengths)
+
+            id_all = names_all = val_all = vector("list", n_FE)
+            rowid = 1:nrows
+            j = 1
+            for (i in seq_along(fe_vars)) {
+
+              fe_var = fe_vars[i]
+              xi = fixef_df[[fe_var]]
+
+              keep = which(!is.na(xi))
+              if (length(keep) == 0) stop("All values of the fixed-effect variable '", fe_var, "' are NA.")
+
+              xi = xi[keep]
+              xi_quf = quickUnclassFactor(xi, addItem = TRUE)
+
+              col_id = xi_quf$x
+              col_levels = as.character(xi_quf$items)
+
+              slope_vars = slope_var_list[[i]]
+              n_slope_vars = if (is.null(slope_vars)) 0 else length(slope_vars)
+
+              # Be careful with NAs
+              # First fixed-effect by itself
+              val_all[[j]] = c(rep(1, length(col_id)), rep(NA, nrows - length(keep)))
+              id_all[[j]] = cbind(
+                c(
+                  rowid[keep],
+                  rowid[-keep]
+                ), 
+                c(
+                  running_cols[j] + col_id, 
+                  rep(running_cols[j] + 1, nrows - length(keep))
+                )
+              )
+              names_all[[j]] = paste0(fe_var, "::", col_levels)
+              j = j + 1
+
+              for (k in seq_along(slope_vars)) {
+                slope_var = slope_vars[k]
+                slope = slope_df[[slope_var]]
+
+                val_all[[j]] = c(as.numeric(slope[keep]), rep(NA, nrows - length(keep)))
+                id_all[[j]] = cbind(
+                  c(
+                    rowid[keep],         
+                    rowid[-keep]
+                  ), 
+                  c(
+                    running_cols[j] + col_id, 
+                    rep(running_cols[j] + 1, nrows - length(keep))
+                  )
+                )
+                names_all[[j]] = paste0(fe_var, "[[", slope_var, "]]", "::", col_levels)
+                j = j + 1
+              }
+            }
+
+            id_mat = do.call(rbind, id_all)
+            val_vec = unlist(val_all)
+            names_vec = unlist(names_all)
+
+            mat_FE = Matrix::sparseMatrix(
+              i = id_mat[, 1],
+              j = id_mat[, 2],
+              x = val_vec,
+              dimnames = list(NULL, names_vec)
+            )
+
+
+            # Keep non-zero FEs
+            if (collin.rm == TRUE) {
+              fixefs = fixef(object, sorted = TRUE)
+
+              select =	lapply(
+                names(fixefs), 
+                function(var) {
+                  names = names(fixefs[[var]])
+                  names = names[fixefs[[var]] != 0]
+
+                  paste0(var, "::", names)
+                }
+              )
+              select = unlist(select)
+
+              # When original_data isn't used, some FEs may not be in the new dataset, add them in
+              missing_cols = setdiff(select, colnames(mat_FE))
+              mat_FE = cbind(
+                mat_FE, 
+                Matrix::Matrix(0, ncol = length(missing_cols), nrow = nrow(mat_FE), sparse = TRUE, dimnames = list(NULL, missing_cols))
+              )
+
+              # Subset fixef and order columns to match fixef(object)
+              idx = which(colnames(mat_FE) %in% select)
+              mat_FE = mat_FE[, idx, drop = FALSE]
+
+              # Reorder columns to match fixef(object)
+              reorder = match(unlist(select), colnames(mat_FE))
+              mat_FE = mat_FE[, reorder[!is.na(reorder)], drop = FALSE]
+            }
+
+        }
+
+        res[["fixef"]] = mat_FE
+    }
+
+    #
+    # IV
+    #
+    if ("iv.endo" %in% type) {
+        fml = object$iv_endo_fml
+        vars = attr(stats::terms(fml), "term.labels")
+        endo.mat = vars_to_sparse_mat(vars = vars, data = data, object = object, collin.rm = collin.rm)
+
+        res[["iv.endo"]] = endo.mat
+    }
+
+    if ("iv.inst" %in% type) {
+        fml = object$fml_all$iv
+        vars = attr(stats::terms(fml), "term.labels")
+        inst.mat = vars_to_sparse_mat(vars = vars, data = data, object = object, collin.rm = collin.rm)
+
+        res[["iv.inst"]] = inst.mat
+    }
+
+    if ("iv.exo" %in% type) {
+
+        fml = object$fml_all$linear
+        vars = attr(stats::terms(fml), "term.labels")
+        exo.mat = vars_to_sparse_mat(vars = vars, data = data, object = object, collin.rm = collin.rm, type = "iv.exo", add_intercept = !fake_intercept)
+
+        res[["iv.exo"]] = exo.mat
+    }
+
+    if ("iv.rhs1" %in% type) {
+        fml = object$fml
+        if (object$iv_stage == 2) {
+            fml_iv = object$fml_all$iv
+            fml = .xpd(..lhs ~ ..inst + ..rhs, ..lhs = fml[[2]], ..inst = fml_iv[[3]], ..rhs = fml[[3]])
+        }
+        vars = attr(stats::terms(fml), "term.labels")
+        iv_rhs1 = vars_to_sparse_mat(vars = vars, data = data, object = object, collin.rm = collin.rm, type = "iv.rhs1", add_intercept = !fake_intercept)
+
+        res[["iv.rhs1"]] = iv_rhs1
+    }
+
+    if ("iv.rhs2" %in% type) {
+        # Second stage
+        if (!object$iv_stage == 2) {
+            stop("In model.matrix, the type 'iv.rhs2' is only valid for second stage IV models. This estimation is the first stage.")
+        }
+
+        # I) we get the fitted values
+        stage_1 = object$iv_first_stage
+
+        fit_vars = c()
+        for (i in seq_along(stage_1)) {
+            fit_vars[i] = v = paste0("fit_", names(stage_1)[i])
+            data[[v]] = predict(stage_1[[i]], newdata = data, sample = "original")
+        }
+
+        # II) we create the variables
+        fml = object$fml
+        fml = .xpd(..lhs ~ ..fit + ..rhs, ..lhs = fml[[2]], ..fit = fit_vars, ..rhs = fml[[3]])
+        vars = attr(stats::terms(fml), "term.labels")
+        iv_rhs2 = vars_to_sparse_mat(vars = vars, data = data, object = object, collin.rm = collin.rm, type = "iv.rhs2", add_intercept = !fake_intercept)
+
+        res[["iv.rhs2"]] = iv_rhs2
+    }
+
+    if (na.rm) {
+      na_rows = lapply(res, function(mat) {
+        # Get rows with NA 
+        1 + mat@i[is.na(mat@x)]
+      })
+
+      na_rows = unlist(na_rows, use.names = FALSE)
+
+      if (original_data) {
+        na_rows = c(na_rows, -1 * object$obs_selection$obsRemoved)
+      }
+
+      na_rows = unique(na_rows)
+
+      if (length(na_rows) > 0) {
+        res = lapply(res, function(mat) { 
+          mat[-na_rows, , drop = FALSE]
+        })
+      }
+    } 
+
+    # Formatting res
+    if (length(res) == 0) {
+        return(NULL)
+    } else if (length(type) > 1) {
+        if (combine) {
+          res = res[type]
+          res = do.call(cbind, unname(res))
+        }
+    } else {
+        res = res[[1]]
+    }
+
+    res
+}
+
+
+
+# Internal: modifies the calls so that each variable/interaction is evaluated with mult_sparse
+mult_wrap = function(x) {
+	# x: character string of a variable to be evaluated
+	# ex: "x1" => mult_sparse(x1)
+	#     "x1:factor(x2):x3" => mult_sparse(x3, factor(x2), x1)
+	#
+	# We also add the argument sparse to i()
+	#     "x1:i(species, TRUE)" => mult_sparse(x1, i(species, TRUE, sparse = TRUE))
+
+	x_call = str2lang(x)
+
+	res = (~ mult_sparse())[[2]]
+
+	if (length(x_call) == 1 || x_call[[1]] != ":") {
+		res[[2]] = x_call
+
+	} else {
+		res[[2]] = x_call[[3]]
+		tmp = x_call[[2]]
+
+		while (length(tmp) == 3 && tmp[[1]] == ":") {
+			res[[length(res) + 1]] = tmp[[3]]
+			tmp = tmp[[2]]
+		}
+
+		res[[length(res) + 1]] = tmp
+	}
+
+	# We also add sparse to i() if found
+	for (i in 2:length(res)) {
+		ri = res[[i]]
+		if (length(ri) > 1 && ri[[1]] == "i") {
+			ri[["sparse"]] = TRUE
+			res[[i]] = ri
+		}
+	}
+
+	if (length(res) > 2) {
+		# we restore the original order
+		res[-1] = rev(res[-1])
+	}
+
+	return(res)
+}
+
+# Internal function to evaluate the variables (and interactions) in a sparse way
+mult_sparse = function(...) {
+	# Only sparsifies factor variables
+	# Takes care of interactions
+
+	dots = list(...)
+	n = length(dots)
+  mc = match.call()
+  var_call = sapply(seq_len(n), function(i) deparse_long(mc[[i+1]]))
+
+	num_var = NULL
+	factor_list = list()
+  factor_idx = c()
+  i_idx = c()
+	info_i = NULL
+	is_i = is_factor = FALSE
+	# You can't have interactions between i and factors, it's either
+
+	for (i in 1:n) {
+		xi = dots[[i]]
+    vari = mc[[i+1]]
+		if (is.numeric(xi)) {
+			# We stack the product
+			num_var = if (is.null(num_var)) xi else xi * num_var
+		} else if (inherits(xi, "i_sparse")) {
+			is_i = TRUE
+			info_i = xi
+      i_idx = c(i_idx, i)
+		} else {
+			is_factor = TRUE
+			factor_list[[length(factor_list) + 1]] = xi
+      factor_idx = c(factor_idx, i)
+		}
+	}
+
+  if (is_factor && is_i) {
+    stop("Unfortunately, can not interact factor with `i()` in sparse_model_matrix")
+  }
+
+	# numeric
+	if (!is_i && !is_factor) {
+		return(num_var)
+	}
+	# factor()
+	if (is_factor) {
+		factor_list$add_items = TRUE
+		factor_list$items.list = TRUE
+    factor_list$multi.join = " ; "
+		fact_as_int = do.call(to_integer, factor_list)
+
+		values = if (is.null(num_var)) rep(1, length(fact_as_int$x)) else num_var
+		rowid = seq_along(values)
+
+    # messy, but need this to get things like `factor(am)1:hp:factor(cyl)6`
+    items_mat = do.call(rbind, strsplit(fact_as_int$items, " ; "))
+
+    # intersplice var_call with items 
+    col_names = sapply(seq_len(nrow(items_mat)), function(i) {
+      pasteable = var_call 
+      pasteable[factor_idx] = paste0(var_call[factor_idx], items_mat[i, ])
+      paste(pasteable, collapse = ":")
+    }, USE.NAMES = FALSE)
+
+		res = list(rowid = rowid, colid = fact_as_int$x, values = values,
+				   col_names = col_names, n_cols = length(fact_as_int$items))
+	# i()
+	} else {
+
+		values = info_i$values
+		if (!is.null(num_var)) {
+			num_var = num_var[info_i$rowid]
+			values = values * num_var
+      col_names = sapply(info_i$col_names, function(item) {
+        pasteable = var_call
+        pasteable[i_idx] = item
+        paste(pasteable, collapse = ":")
+      }, USE.NAMES = FALSE)
+		} else {
+      col_names = info_i$col_names
+    }
+
+		res = list(rowid = info_i$rowid, colid = info_i$colid,
+				   values = values[info_i$rowid],
+				   col_names = col_names,
+				   n_cols = length(info_i$col_names))
+	}
+
+	class(res) = "sparse_var"
+
+	res
+}
+
+# Takes a vector of strings denoting the variables (including terms like `poly()`, `i()`, `I()`, `lag()`, etc.) and returns a sparse matrix of the variables extracted from `data`.
+vars_to_sparse_mat = function(vars, data, collin.rm = FALSE, object = NULL, type = NULL, add_intercept = FALSE) {
+
+    if (length(vars) == 0) {
+      # Case only FEs
+      mat = NULL
+    } else {
+
+      # Since we don't want to evaluate the factors,
+      # the code is a bit intricate because we have to catch them before
+      # any interaction takes place
+      #
+      # that's why I wrap interactions in a function (mult_sparse())
+      #
+
+      # Below, we evaluate all the variables in a "sparse" way
+
+      vars_calls = lapply(vars, mult_wrap)
+
+      n = length(vars)
+      variables_list = vector("list", n)
+      for (i in 1:n) {
+        variables_list[[i]] = eval(vars_calls[[i]], data)
+      }
+
+      # To create the sparse matrix, we need the column indexes
+      total_cols = 0
+      running_cols = c(0)
+      for (i in 1:n) {
+        xi = variables_list[[i]]
+        if (inherits(xi, "sparse_var")) {
+          total_cols = total_cols + xi$n_cols
+        } else {
+          total_cols = total_cols + NCOL(xi)
+        }
+        running_cols[i + 1] = total_cols
+      }
+
+      # We just create a sparse matrix and fill it
+
+      # 1) creating the indexes + names
+
+      # NOTA: I use lists to avoid creating copies
+      rowid = 1:nrow(data)
+      id_all = values_all = names_all = vector("list", n)
+      for (i in 1:n) {
+        xi = variables_list[[i]]
+
+        if (inherits(xi, "sparse_var")) {
+
+          id_all[[i]] = cbind(xi$rowid, running_cols[i] + xi$colid)
+          values_all[[i]] = xi$values
+          names_all[[i]] = xi$col_names
+
+        } else if (NCOL(xi) == 1) {
+
+          id_all[[i]] = cbind(rowid, running_cols[i] + 1)
+          values_all[[i]] = xi
+          names_all[[i]] = vars[[i]]
+
+        } else {
+
+          colid = rep(1:NCOL(xi), each = nrow(data))
+          id_all[[i]] = cbind(rep(rowid, NCOL(xi)), running_cols[i] + colid)
+          values_all[[i]] = as.vector(xi)
+          if (!is.null(colnames(xi))) {
+            names_all[[i]] = paste0(vars[[i]], colnames(xi))
+          } else {
+            names_all[[i]] = paste0(vars[[i]], 1:NCOL(xi))
+          }
+
+        }
+      }
+
+      id_mat = do.call(rbind, id_all)
+      values_vec = unlist(values_all)
+      names_vec = unlist(names_all)
+
+      # 2) filling the matrix: one shot, no copies
+
+      mat = Matrix::Matrix(0, nrow(data), total_cols, dimnames = list(NULL, names_vec))
+      mat[id_mat] = values_vec
+
+    }
+
+    if (collin.rm & is.null(object)) {
+      stop("You need to provide the 'object' argument to use 'collin.rm = TRUE'.")
+    }
+
+    if (collin.rm) {
+        qui = which(colnames(mat) %in% object$collin.var)
+        if (length(qui) == ncol(mat)) {
+            mat = NULL
+        } else if (length(qui) > 0) {
+            mat =  mat[, -qui, drop = FALSE]
+        }
+
+        coefs = names(object$coefficients)
+        if (isTRUE(object$iv)) {
+          fml_iv = object$fml_all$iv
+          endo = fml_iv[[2]]
+
+          # Trick to get the rhs variables as a character vector
+          endo = .xpd(~ ..endo, ..endo = endo)
+          endo = attr(stats::terms(endo), "term.labels")
+
+          exo = lapply(object$iv_first_stage, function(x) names(stats::coef(x)))
+          exo = unique(unlist(exo, use.names = FALSE))
+        }
+
+        # Custom subsetting for na.rm depending on `type`
+        if (!is.null(type)) {
+          if (type == "rhs") {
+            if (isTRUE(object$iv)) {
+              keep = c(endo, coefs)
+            } else {
+              keep = coefs
+            }
+          } else if (type == "iv.exo") {
+            keep = coefs
+          } else if (type == "iv.exo") {
+            keep = c(endo, coefs)
+          } else if (type == "iv.rhs1") {
+            keep = c(exo, coefs)
+          } else if (type == "iv.rhs2") {
+            keep = coefs
+          }
+
+          keep = keep[!keep %in% object$collin.var]
+          if (length(keep) == 0) {
+              mat = NULL
+          } else {
+              idx = which(colnames(mat) %in% keep)
+              mat =  mat[, idx, drop = FALSE]
+          }
+        }
+
+        if (length(coefs) == ncol(mat) && any(colnames(mat) != names(coefs))) {
+            # we reorder the matrix
+            # This can happen in multiple estimations, where we respect the
+            # order of the user
+
+            if (all(names(coefs) %in% colnames(mat))) {
+                mat = mat[, names(coefs), drop = FALSE]
+            }
+        }
+    }
+
+    if (add_intercept) {
+        mat = cbind(1, mat)
+        colnames(mat)[1] = "(Intercept)"
+    }
+
+    return(mat)
+}
diff --git a/_pkgdown.yml b/_pkgdown.yml
index 1698743f..35a57f73 100644
--- a/_pkgdown.yml
+++ b/_pkgdown.yml
@@ -91,6 +91,7 @@ reference:
         - formula.fixest
         - terms.fixest
         - df.residual.fixest
+        - sparse_model_matrix
     - title: Formula tools
       desc: Tools to manipulate formulas
       contents:
diff --git a/man/sparse_model_matrix.Rd b/man/sparse_model_matrix.Rd
new file mode 100644
index 00000000..503dd2cd
--- /dev/null
+++ b/man/sparse_model_matrix.Rd
@@ -0,0 +1,50 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/sparse_model_matrix.R
+\name{sparse_model_matrix}
+\alias{sparse_model_matrix}
+\title{Design matrix of a \code{fixest} object returned in sparse format}
+\usage{
+sparse_model_matrix(
+  object,
+  data,
+  type = "rhs",
+  na.rm = TRUE,
+  collin.rm = NULL,
+  combine = TRUE,
+  ...
+)
+}
+\arguments{
+\item{object}{A \code{fixest} object. Obtained using the functions \code{\link{femlm}}, \code{\link{feols}} or \code{\link{feglm}}.}
+
+\item{data}{If missing (default) then the original data is obtained by evaluating the \code{call}. Otherwise, it should be a \code{data.frame}.}
+
+\item{type}{Character vector or one sided formula, default is "rhs". Contains the type of matrix/data.frame to be returned. Possible values are: "lhs", "rhs", "fixef", "iv.rhs1" (1st stage RHS), "iv.rhs2" (2nd stage RHS), "iv.endo" (endogenous vars.), "iv.exo" (exogenous vars), "iv.inst" (instruments).}
+
+\item{na.rm}{Default is \code{TRUE}. Should observations with NAs be removed from the matrix?}
+
+\item{collin.rm}{Logical scalar. Whether to remove variables that were found to be collinear during the estimation. Beware: it does not perform a collinearity check and bases on the \code{coef(object)}. Default is TRUE if object is a \code{fixest} object, or FALSE if object is a formula.}
+
+\item{combine}{Logical scalar, default is \code{TRUE}. Whether to combine each resulting sparse matrix}
+
+\item{...}{Not currently used.}
+}
+\value{
+It returns either a single sparse matrix a list of matrices, depending whether \code{combine} is \code{TRUE} or \code{FALSE}. The sparse matrix is of class \code{dgCMatrix} from the \code{Matrix} package.
+}
+\description{
+This function creates the left-hand-side or the right-hand-side(s) of a \code{\link{femlm}}, \code{\link{feols}} or \code{\link{feglm}} estimation.
+}
+\examples{
+
+est = feols(wt ~ i(vs) + hp | cyl, mtcars)
+sparse_model_matrix(est)
+sparse_model_matrix(wt ~ i(vs) + hp | cyl, mtcars)
+
+}
+\seealso{
+See also the main estimation functions \code{\link{femlm}}, \code{\link{feols}} or \code{\link{feglm}}. \code{\link{formula.fixest}}, \code{\link{update.fixest}}, \code{\link{summary.fixest}}, \code{\link{vcov.fixest}}.
+}
+\author{
+Laurent Berge, Kyle Butts
+}
diff --git a/tests/fixest_tests.R b/tests/fixest_tests.R
index d68bea95..9ac5b62d 100644
--- a/tests/fixest_tests.R
+++ b/tests/fixest_tests.R
@@ -2203,6 +2203,142 @@ res_mult = feols(y1 ~ x1 | species | x2 ~ x3, base)
 m_lhs_rhs_fixef = model.matrix(res_mult, type = c("lhs", "iv.rhs2", "fixef"), na.rm = FALSE)
 test(names(m_lhs_rhs_fixef), c("y1", "fit_x2", "x1", "species"))
 
+####
+#### sparse_model_matrix ####
+####
+
+base = iris
+names(base) = c("y1", "x1", "x2", "x3", "species")
+base$y2 = 10 + rnorm(150) + 0.5 * base$x1
+base$x4 = rnorm(150) + 0.5 * base$y1
+base$fe2 = rep(letters[1:15], 10)
+base$fe2[50:51] = NA
+base$y2[base$fe2 == "a" & !is.na(base$fe2)] = 0
+base$x2[1:5] = NA
+base$x3[6] = NA
+base$fe3 = rep(letters[1:10], 15)
+base$id = rep(1:15, each = 10)
+base$time = rep(1:10, 15)
+
+base_bis = base[1:50, ]
+base_bis$id = rep(1:5, each = 10)
+base_bis$time = rep(1:10, 5)
+
+
+res = feols(y1 ~ x1 + x2 + x3, base)
+sm1 = sparse_model_matrix(
+  res, type = "lhs",
+  na.rm = TRUE
+)
+test(length(sm1), res$nobs)
+
+sm1_na = sparse_model_matrix(
+  res, data = base, 
+  type = "lhs",
+  na.rm = FALSE
+)
+test(length(sm1_na), res$nobs_origin)
+test(max(abs(sm1_na - base$y1), na.rm = TRUE), 0)
+
+
+sm2 = sparse_model_matrix(
+  res, type = c("lhs", "rhs"), data = base, 
+  combine = FALSE, na.rm = FALSE
+)
+y = sm2[["lhs"]]
+X = sm2[["rhs"]]
+obs_rm = res$obs_selection$obsRemoved
+res_bis = solve(
+  crossprod(as.matrix(X)[obs_rm, ]), 
+  crossprod(as.matrix(X)[obs_rm, ], as.matrix(y)[obs_rm, ])
+)
+test(as.numeric(res_bis), res$coefficients)
+
+# No constant
+res_nocons = feols(mpg ~ 0 + i(cyl), mtcars)
+sm_nocons = sparse_model_matrix(res_nocons, type = "rhs")
+test("(Intercept)" %in% colnames(sm_nocons), FALSE)
+
+# Lag 
+res_lag = feols(y1 ~ l(x1, 1:2) + x2 + x3, base, panel = ~id + time)
+sm_lag = sparse_model_matrix(res_lag, type = "rhs")
+test(nrow(sm_lag), nobs(res_lag))
+
+
+# TODO: Fix poly and newdata
+# With poly
+# res_poly = feols(y1 ~ poly(x1, 2), base)
+# works
+# res_poly = feols(y1 ~ x1, base)
+# sm_poly_old = sparse_model_matrix(res_poly)
+# sm_poly_new = sparse_model_matrix(res_poly, data = base_bis)
+# test(sm_poly_old[1:50, 3], sm_poly_new[, 3])
+
+
+# Interacted fixef
+res = feols(y1 ~ x1 + x2 + x3 | species^fe2, base)
+sm_ife = sparse_model_matrix(res, data = base, type = "fixef", collin.rm = FALSE)
+
+# fixef
+res = feols(y1 ~ x1 + x2 + x3 | species + fe2, base)
+sm_fe = sparse_model_matrix(res, data = base, type = "fixef")
+test(ncol(sm_fe), 17)
+
+sm_fe_no_collin_rm = sparse_model_matrix(res, data = base, type = "fixef", collin.rm = FALSE)
+test(ncol(sm_fe_no_collin_rm), 18)
+
+sm_fe_base_bis = sparse_model_matrix(res, data = base_bis, type = "fixef")
+
+
+# Time-varying slopes
+res_slopes = feols(y1 ~ x1 + x2 + x3 | fe2[I(x2+1)], data = base[7:48, ])
+sm_slopes = sparse_model_matrix(res_slopes, type = "fixef")
+
+# IV
+res_iv = feols(y1 ~ x1 | x2 ~ x3, base)
+
+sm_rhs1 = sparse_model_matrix(res_iv, type = "iv.rhs1")
+test(colnames(sm_rhs1)[-1], c("x3", "x1"))
+
+sm_rhs2 = sparse_model_matrix(res_iv, type = "iv.rhs2")
+test(colnames(sm_rhs2)[-1], c("fit_x2", "x1"))
+
+sm_endo = sparse_model_matrix(res_iv, type = "iv.endo")
+test(colnames(sm_endo), "x2")
+
+sm_exo  = sparse_model_matrix(res_iv, type = "iv.exo")
+test(colnames(sm_exo)[-1], "x1")
+
+sm_inst  = sparse_model_matrix(res_iv, type = "iv.inst")
+test(colnames(sm_inst), "x3")
+
+# several
+res_mult = feols(y1 ~ x1 | species | x2 ~ x3, base)
+
+sm_lhs_rhs_fixef = sparse_model_matrix(res_mult, type = c("lhs", "iv.rhs2", "fixef"))
+test(colnames(sm_lhs_rhs_fixef), c("y1", "fit_x2", "x1", "species::setosa", "species::versicolor", "species::virginica"))
+
+
+# non-linear model
+res_pois = fepois(Sepal.Length ~ Sepal.Width + Petal.Length | Species, iris)
+sp_pois = sparse_model_matrix(res_pois, data = iris)
+
+
+# make sure names are correct
+test_col_names <- function(est) {
+  m <- fixest::sparse_model_matrix(est, collin.rm = FALSE)
+  q <- test(all(names(coef(est)) %in% colnames(m)), TRUE)
+  return(invisible(NULL))
+}
+test_col_names(feols(mpg ~ i(am) + disp | vs, mtcars))
+test_col_names(feols(mpg ~ i(am, i.cyl) + disp | vs, mtcars))
+test_col_names(feols(mpg ~ i(am, hp) + disp | vs, mtcars))
+test_col_names(feols(mpg ~ i(am):hp + disp | vs, mtcars))
+test_col_names(feols(mpg ~ factor(am) + disp | vs, mtcars))
+test_col_names(feols(mpg ~ factor(am):factor(cyl) + disp | vs, mtcars, notes = FALSE))
+test_col_names(feols(mpg ~ factor(am):hp + disp | vs, mtcars))
+test_col_names(feols(mpg ~ poly(hp, degree = 2) + disp | vs, mtcars))
+
 
 ####
 #### update ####