|
125 | 125 | "Y ~ X1 + f1:X2", |
126 | 126 | ] |
127 | 127 |
|
| 128 | +glm_fmls_with_fe = [ |
| 129 | + "Y ~ X1 | f1", |
| 130 | + "Y ~ X1 | f1 + f2", |
| 131 | + "Y ~ X1 + X2 | f2", |
| 132 | +] |
| 133 | + |
128 | 134 |
|
129 | 135 | @pytest.fixture(scope="module") |
130 | 136 | def data_feols(N=1000, seed=76540251, beta_type="2", error_type="2"): |
@@ -883,6 +889,71 @@ def test_glm_vs_fixest(N, seed, dropna, fml, inference, family): |
883 | 889 | ) |
884 | 890 |
|
885 | 891 |
|
| 892 | +@pytest.mark.against_r_core |
| 893 | +@pytest.mark.parametrize("N", [100]) |
| 894 | +@pytest.mark.parametrize("seed", [172]) |
| 895 | +@pytest.mark.parametrize("dropna", [True, False]) |
| 896 | +@pytest.mark.parametrize( |
| 897 | + "fml", |
| 898 | + glm_fmls_with_fe, |
| 899 | +) |
| 900 | +@pytest.mark.parametrize("inference", ["iid", "hetero", {"CRV1": "group_id"}]) |
| 901 | +def test_glm_with_fe_vs_fixest(N, seed, dropna, fml, inference): |
| 902 | + """Test Gaussian GLM with fixed effects against R's fixest.""" |
| 903 | + data = pf.get_data(N=N, seed=seed) |
| 904 | + if dropna: |
| 905 | + data = data.dropna() |
| 906 | + |
| 907 | + r_inference = _get_r_inference(inference) |
| 908 | + |
| 909 | + # Fit models for Gaussian family |
| 910 | + fit_py = pf.feglm(fml=fml, data=data, family="gaussian", vcov=inference) |
| 911 | + r_fml = _py_fml_to_r_fml(fml) |
| 912 | + data_r = get_data_r(fml, data) |
| 913 | + |
| 914 | + fit_r = fixest.feglm( |
| 915 | + ro.Formula(r_fml), data=data_r, family=stats.gaussian(), vcov=r_inference |
| 916 | + ) |
| 917 | + |
| 918 | + # Compare coefficients |
| 919 | + py_coefs = fit_py.coef() |
| 920 | + r_coefs = stats.coef(fit_r) |
| 921 | + |
| 922 | + check_absolute_diff( |
| 923 | + py_coefs, r_coefs, 1e-05, "py_gaussian_coefs != r_gaussian_coefs" |
| 924 | + ) |
| 925 | + |
| 926 | + # Compare standard errors |
| 927 | + py_se = fit_py.se().xs("X1") |
| 928 | + r_se = _get_r_df(fit_r)["std.error"] |
| 929 | + check_absolute_diff( |
| 930 | + py_se, |
| 931 | + r_se, |
| 932 | + 1e-04, |
| 933 | + f"py_gaussian_se != r_gaussian_se for inference {inference}", |
| 934 | + ) |
| 935 | + |
| 936 | + # Compare variance-covariance matrices |
| 937 | + py_vcov = fit_py._vcov[0, 0] |
| 938 | + r_vcov = stats.vcov(fit_r)[0, 0] |
| 939 | + check_absolute_diff( |
| 940 | + py_vcov, |
| 941 | + r_vcov, |
| 942 | + 1e-04, |
| 943 | + f"py_gaussian_vcov != r_gaussian_vcov for inference {inference}", |
| 944 | + ) |
| 945 | + |
| 946 | + # Compare residuals - response |
| 947 | + py_resid_response = fit_py._u_hat_response |
| 948 | + r_resid_response = stats.resid(fit_r, type="response") |
| 949 | + check_absolute_diff( |
| 950 | + py_resid_response[0:5], |
| 951 | + r_resid_response[0:5], |
| 952 | + 1e-04, |
| 953 | + f"py_gaussian_resid_response != r_gaussian_resid_response for inference {inference}", |
| 954 | + ) |
| 955 | + |
| 956 | + |
886 | 957 | @pytest.mark.against_r_core |
887 | 958 | @pytest.mark.parametrize("N", [100]) |
888 | 959 | @pytest.mark.parametrize("seed", [17021]) |
|
0 commit comments