changes files to allow fe

Dpananos · Dpananos · commit 7c79a3ecff29 · 2025-11-12T19:58:36.000-05:00
diff --git a/pyfixest/estimation/fegaussian_.py b/pyfixest/estimation/fegaussian_.py
@@ -6,6 +6,7 @@
 
 from pyfixest.estimation.feglm_ import Feglm
 from pyfixest.estimation.FormulaParser import FixestFormula
+from pyfixest.estimation.literals import DemeanerBackendOptions
 
 
 class Fegaussian(Feglm):
@@ -33,6 +34,7 @@ def __init__(
             "scipy.sparse.linalg.lsqr",
             "jax",
         ],
+        demeaner_backend: DemeanerBackendOptions = "numba",
         store_data: bool = True,
         copy_data: bool = True,
         lean: bool = False,
@@ -56,6 +58,7 @@ def __init__(
             tol=tol,
             maxiter=maxiter,
             solver=solver,
+            demeaner_backend=demeaner_backend,
             store_data=store_data,
             copy_data=copy_data,
             lean=lean,
diff --git a/pyfixest/estimation/feglm_.py b/pyfixest/estimation/feglm_.py
@@ -8,10 +8,10 @@
 from pyfixest.errors import (
     NonConvergenceError,
 )
-from pyfixest.estimation.demean_ import demean
 from pyfixest.estimation.feols_ import Feols, PredictionErrorOptions, PredictionType
 from pyfixest.estimation.fepois_ import _check_for_separation
 from pyfixest.estimation.FormulaParser import FixestFormula
+from pyfixest.estimation.literals import DemeanerBackendOptions
 from pyfixest.utils.dev_utils import DataFrameType
 
 
@@ -40,6 +40,7 @@ def __init__(
             "scipy.sparse.linalg.lsqr",
             "jax",
         ],
+        demeaner_backend: DemeanerBackendOptions = "numba",
         store_data: bool = True,
         copy_data: bool = True,
         lean: bool = False,
@@ -61,6 +62,7 @@ def __init__(
             fixef_maxiter=fixef_maxiter,
             lookup_demeaned_data=lookup_demeaned_data,
             solver=solver,
+            demeaner_backend=demeaner_backend,
             store_data=store_data,
             copy_data=copy_data,
             lean=lean,
@@ -96,7 +98,8 @@ def prepare_model_matrix(self):
         "Prepare model inputs for estimation."
         super().prepare_model_matrix()
 
-        if self._fe is not None:
+        # Fixed effects are only supported for Gaussian family
+        if self._fe is not None and self._method != "feglm-gaussian":
             raise NotImplementedError("Fixed effects are not yet supported for GLMs.")
 
         # check for separation
@@ -314,7 +317,7 @@ def residualize(
         if flist is None:
             return v, X
         else:
-            vX_resid, success = demean(
+            vX_resid, success = self._demean_func(
                 x=np.c_[v, X], flist=flist, weights=weights, tol=tol, maxiter=maxiter
             )
             if success is False:
diff --git a/tests/test_errors.py b/tests/test_errors.py
@@ -813,6 +813,11 @@ def test_glm_errors():
         pf.feglm("Y ~ X1", data=data, family="logit")
 
     data["Y"] = np.where(data["Y"] > 0, 1, 0)
+
+    # Fixed effects are supported for Gaussian family
+    pf.feglm("Y ~ X1 | f1", data=data, family="gaussian")
+
+    # But not for other families
     with pytest.raises(
         NotImplementedError, match=r"Fixed effects are not yet supported for GLMs."
     ):
diff --git a/tests/test_feols_feglm_internally.py b/tests/test_feols_feglm_internally.py
@@ -60,7 +60,6 @@ def test_ols_vs_gaussian_glm(fml, inference, dropna):
         check_absolute_diff(fit_ols._vcov, fit_gaussian._vcov, tol=1e-10)
 
 
-@pytest.mark.skip("Fixed effects are not yet supported.")
 @pytest.mark.parametrize("fml", fml_list)
 @pytest.mark.parametrize("family", ["gaussian"])
 def test_feols_feglm_internally(fml, family):
@@ -76,10 +75,15 @@ def test_feols_feglm_internally(fml, family):
         fml=fml2, data=data, family=family, ssc=pf.ssc(k_adj=False, G_adj=False)
     )
 
-    assert fit1.coef().xs("X1") == fit2.coef().xs("X1"), (
-        f"Test failed for fml = {fml} and family = gaussian"
-    )
-    assert fit1.se().xs("X1") == fit2.se().xs("X1"), (
-        f"Test failed for fml = {fml} and family = gaussian"
+    # Coefficients should match between C(f1) and | f1
+    check_absolute_diff(
+        fit1.coef().xs("X1"),
+        fit2.coef().xs("X1"),
+        tol=1e-10,
+        msg=f"Coefficients do not match for fml = {fml} and family = gaussian",
     )
-    assert fit1._u_hat[0:5]
+
+    # Note: Standard errors differ between C(f1) and | f1 due to different
+    # degrees of freedom adjustments. With C(f1), the fixed effects are
+    # estimated explicitly, while with | f1 they are absorbed.
+    # This is expected behavior, so we don't compare standard errors here.
diff --git a/tests/test_vs_fixest.py b/tests/test_vs_fixest.py
@@ -125,6 +125,12 @@
     "Y ~ X1 + f1:X2",
 ]
 
+glm_fmls_with_fe = [
+    "Y ~ X1 | f1",
+    "Y ~ X1 | f1 + f2",
+    "Y ~ X1 + X2 | f2",
+]
+
 
 @pytest.fixture(scope="module")
 def data_feols(N=1000, seed=76540251, beta_type="2", error_type="2"):
@@ -883,6 +889,71 @@ def test_glm_vs_fixest(N, seed, dropna, fml, inference, family):
     )
 
 
+@pytest.mark.against_r_core
+@pytest.mark.parametrize("N", [100])
+@pytest.mark.parametrize("seed", [172])
+@pytest.mark.parametrize("dropna", [True, False])
+@pytest.mark.parametrize(
+    "fml",
+    glm_fmls_with_fe,
+)
+@pytest.mark.parametrize("inference", ["iid", "hetero", {"CRV1": "group_id"}])
+def test_glm_with_fe_vs_fixest(N, seed, dropna, fml, inference):
+    """Test Gaussian GLM with fixed effects against R's fixest."""
+    data = pf.get_data(N=N, seed=seed)
+    if dropna:
+        data = data.dropna()
+
+    r_inference = _get_r_inference(inference)
+
+    # Fit models for Gaussian family
+    fit_py = pf.feglm(fml=fml, data=data, family="gaussian", vcov=inference)
+    r_fml = _py_fml_to_r_fml(fml)
+    data_r = get_data_r(fml, data)
+
+    fit_r = fixest.feglm(
+        ro.Formula(r_fml), data=data_r, family=stats.gaussian(), vcov=r_inference
+    )
+
+    # Compare coefficients
+    py_coefs = fit_py.coef()
+    r_coefs = stats.coef(fit_r)
+
+    check_absolute_diff(
+        py_coefs, r_coefs, 1e-05, "py_gaussian_coefs != r_gaussian_coefs"
+    )
+
+    # Compare standard errors
+    py_se = fit_py.se().xs("X1")
+    r_se = _get_r_df(fit_r)["std.error"]
+    check_absolute_diff(
+        py_se,
+        r_se,
+        1e-04,
+        f"py_gaussian_se != r_gaussian_se for inference {inference}",
+    )
+
+    # Compare variance-covariance matrices
+    py_vcov = fit_py._vcov[0, 0]
+    r_vcov = stats.vcov(fit_r)[0, 0]
+    check_absolute_diff(
+        py_vcov,
+        r_vcov,
+        1e-04,
+        f"py_gaussian_vcov != r_gaussian_vcov for inference {inference}",
+    )
+
+    # Compare residuals - response
+    py_resid_response = fit_py._u_hat_response
+    r_resid_response = stats.resid(fit_r, type="response")
+    check_absolute_diff(
+        py_resid_response[0:5],
+        r_resid_response[0:5],
+        1e-04,
+        f"py_gaussian_resid_response != r_gaussian_resid_response for inference {inference}",
+    )
+
+
 @pytest.mark.against_r_core
 @pytest.mark.parametrize("N", [100])
 @pytest.mark.parametrize("seed", [17021])