Skip to content

Commit

Permalink
Handle two-sided formulas in covariance_matrix
Browse files Browse the repository at this point in the history
  • Loading branch information
stanmart committed Aug 18, 2023
1 parent 29e4710 commit 20824c2
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 2 deletions.
12 changes: 10 additions & 2 deletions src/glum/_glm.py
Original file line number Diff line number Diff line change
Expand Up @@ -1572,14 +1572,18 @@ def covariance_matrix(
"matrix will be incorrect."
)

cannot_estimate_cov = X is None or (
y is None and not hasattr(self, "y_model_spec_")
)

if not skip_checks:
if (X is None or y is None) and self.covariance_matrix_ is None:
if cannot_estimate_cov and self.covariance_matrix_ is None:
raise ValueError(
"Either X and y must be provided or the covariance matrix "
"must have been previously computed."
)

if (X is None or y is None) and store_covariance_matrix:
if cannot_estimate_cov and store_covariance_matrix:
raise ValueError(
"X and y must be provided if 'store_covariance_matrix' is True."
)
Expand Down Expand Up @@ -1607,6 +1611,10 @@ def covariance_matrix(
)
return self.covariance_matrix_

if hasattr(self, "y_model_spec_"):
y = self.y_model_spec_.get_model_matrix(X).A.ravel()
# This has to go first because X is modified in the next line

if isinstance(X, pd.DataFrame):
X = self._convert_from_pandas(X)

Expand Down
44 changes: 44 additions & 0 deletions tests/glm/test_glm.py
Original file line number Diff line number Diff line change
Expand Up @@ -2268,6 +2268,50 @@ def test_store_covariance_matrix(
)


@pytest.mark.parametrize(
"formula", ["y ~ col_1 + col_2", "col_1 + col_2"], ids=["two-sided", "one-sided"]
)
def test_store_covariance_matrix_formula(regression_data, formula):
X, y = regression_data
df = pd.DataFrame(X, columns=[f"col_{i}" for i in range(X.shape[1])])

if "~" in formula:
df["y"] = y
y = None

regressor = GeneralizedLinearRegressor(
formula=formula,
family="gaussian",
alpha=0,
)
regressor.fit(df, y, store_covariance_matrix=True)

np.testing.assert_array_almost_equal(
regressor.covariance_matrix(df, y),
regressor.covariance_matrix(),
)

np.testing.assert_array_almost_equal(
regressor.std_errors(df, y),
regressor.std_errors(),
)


def test_store_covariance_matrix_formula_errors(regression_data):
X, y = regression_data
df = pd.DataFrame(X, columns=[f"col_{i}" for i in range(X.shape[1])])
formula = "col_1 + col_2"

regressor = GeneralizedLinearRegressor(
formula=formula,
family="gaussian",
alpha=0,
)
regressor.fit(df, y)
with pytest.raises(ValueError, match="Either X and y must be provided"):
regressor.covariance_matrix(df)


def test_store_covariance_matrix_errors(regression_data):
X, y = regression_data

Expand Down

0 comments on commit 20824c2

Please sign in to comment.