Skip to content

Commit 6777e3d

Browse files
authored
removes aliasing (#1075)
* removes aliasing * restart CI * revert a change to get codecov to pass
1 parent 670c6d7 commit 6777e3d

File tree

3 files changed

+58
-102
lines changed

3 files changed

+58
-102
lines changed

pyfixest/estimation/FixestMulti_.py

Lines changed: 31 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -285,34 +285,13 @@ def _estimate_all_models(
285285
-------
286286
None
287287
"""
288-
_is_iv = self._is_iv
289-
_data = self._data
290-
_method = self._method
291-
_drop_singletons = self._drop_singletons
292-
_ssc_dict = self._ssc_dict
293-
_drop_intercept = self._drop_intercept
294-
_weights = self._weights
295-
_fixef_tol = self._fixef_tol
296-
_fixef_maxiter = self._fixef_maxiter
297-
_weights_type = self._weights_type
298-
_lean = self._lean
299-
_store_data = self._store_data
300-
_copy_data = self._copy_data
301-
_run_split = self._run_split
302-
_run_full = self._run_full
303-
_splitvar = self._splitvar
304-
_context = self._context
305-
_quantreg_method = self._quantreg_method
306-
_quantreg_multi_method = self._quantreg_multi_method
307-
_quantile = self._quantile
308-
_quantile_tol = self._quantile_tol
309-
_quantile_maxiter = self._quantile_maxiter
310-
311288
FixestFormulaDict = self.FixestFormulaDict
312289
_fixef_keys = list(FixestFormulaDict.keys())
313290

314-
all_splits = (["all"] if _run_full else []) + (
315-
_data[_splitvar].dropna().unique().tolist() if _run_split else []
291+
all_splits = (["all"] if self._run_full else []) + (
292+
self._data[self._splitvar].dropna().unique().tolist()
293+
if self._run_split
294+
else []
316295
)
317296

318297
for sample_split_value in all_splits:
@@ -341,33 +320,33 @@ def _estimate_all_models(
341320

342321
model_kwargs = {
343322
"FixestFormula": FixestFormula,
344-
"data": _data,
345-
"ssc_dict": _ssc_dict,
346-
"drop_singletons": _drop_singletons,
347-
"drop_intercept": _drop_intercept,
348-
"weights": _weights,
349-
"weights_type": _weights_type,
323+
"data": self._data,
324+
"ssc_dict": self._ssc_dict,
325+
"drop_singletons": self._drop_singletons,
326+
"drop_intercept": self._drop_intercept,
327+
"weights": self._weights,
328+
"weights_type": self._weights_type,
350329
"solver": solver,
351330
"collin_tol": collin_tol,
352-
"fixef_tol": _fixef_tol,
353-
"fixef_maxiter": _fixef_maxiter,
354-
"store_data": _store_data,
355-
"copy_data": _copy_data,
356-
"lean": _lean,
357-
"context": _context,
331+
"fixef_tol": self._fixef_tol,
332+
"fixef_maxiter": self._fixef_maxiter,
333+
"store_data": self._store_data,
334+
"copy_data": self._copy_data,
335+
"lean": self._lean,
336+
"context": self._context,
358337
"sample_split_value": sample_split_value,
359-
"sample_split_var": _splitvar,
338+
"sample_split_var": self._splitvar,
360339
"lookup_demeaned_data": lookup_demeaned_data,
361340
}
362341

363-
if _method in {"feols", "fepois"}:
342+
if self._method in {"feols", "fepois"}:
364343
model_kwargs.update(
365344
{
366345
"demeaner_backend": demeaner_backend,
367346
}
368347
)
369348

370-
if _method in {
349+
if self._method in {
371350
"fepois",
372351
"feglm-logit",
373352
"feglm-probit",
@@ -381,20 +360,20 @@ def _estimate_all_models(
381360
}
382361
)
383362

384-
if _method in ["quantreg", "quantreg_multi"]:
363+
if self._method in ["quantreg", "quantreg_multi"]:
385364
model_kwargs.update(
386365
{
387-
"quantile": _quantile,
388-
"method": _quantreg_method,
389-
"quantile_tol": _quantile_tol,
390-
"quantile_maxiter": _quantile_maxiter,
366+
"quantile": self._quantile,
367+
"method": self._quantreg_method,
368+
"quantile_tol": self._quantile_tol,
369+
"quantile_maxiter": self._quantile_maxiter,
391370
"seed": self._seed,
392371
}
393372
)
394-
if _method == "quantreg_multi":
373+
if self._method == "quantreg_multi":
395374
model_kwargs.update(
396375
{
397-
"multi_method": _quantreg_multi_method,
376+
"multi_method": self._quantreg_multi_method,
398377
}
399378
)
400379

@@ -410,7 +389,7 @@ def _estimate_all_models(
410389
("quantreg_multi", None): QuantregMulti,
411390
}
412391

413-
if _method == "compression":
392+
if self._method == "compression":
414393
model_kwargs.update(
415394
{
416395
"reps": self._reps,
@@ -419,7 +398,9 @@ def _estimate_all_models(
419398
)
420399

421400
model_key = (
422-
(_method, _is_iv) if _method == "feols" else (_method, None)
401+
(self._method, self._is_iv)
402+
if self._method == "feols"
403+
else (self._method, None)
423404
)
424405
ModelClass = model_map[model_key] # type: ignore
425406
FIT = ModelClass(**model_kwargs)
@@ -448,7 +429,7 @@ def _estimate_all_models(
448429
) # a little hacky, but works
449430

450431
FIT.get_inference()
451-
if _method == "feols" and not FIT._is_iv:
432+
if self._method == "feols" and not FIT._is_iv:
452433
FIT.get_performance()
453434
if isinstance(FIT, Feiv):
454435
FIT.first_stage()

pyfixest/estimation/feiv_.py

Lines changed: 7 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -240,22 +240,16 @@ def drop_multicol_vars(self) -> None:
240240

241241
def get_fit(self) -> None:
242242
"""Fit a IV model using a 2SLS estimator."""
243-
_X = self._X
244-
_Z = self._Z
245-
_Y = self._Y
246-
247-
_solver = self._solver
248-
249243
# Start Second Stage
250-
self._tZX = _Z.T @ _X
251-
self._tXZ = _X.T @ _Z
252-
self._tZy = _Z.T @ _Y
253-
self._tZZinv = np.linalg.inv(_Z.T @ _Z)
244+
self._tZX = self._Z.T @ self._X
245+
self._tXZ = self._X.T @ self._Z
246+
self._tZy = self._Z.T @ self._Y
247+
self._tZZinv = np.linalg.inv(self._Z.T @ self._Z)
254248

255249
H = self._tXZ @ self._tZZinv
256250
A = H @ self._tZX
257251
B = H @ self._tZy
258-
self._beta_hat = solve_ols(A, B, _solver)
252+
self._beta_hat = solve_ols(A, B, self._solver)
259253

260254
# residuals
261255
self._u_hat = self._Y.flatten() - (self._X @ self._beta_hat).flatten()
@@ -452,11 +446,6 @@ def IV_weakness_test(self, iv_diag_statistics: Optional[list[str]] = None) -> No
452446

453447
# Create an identity matrix of size p_iv by p_iv
454448
# Pad the identity matrix with zeros to make it of size p_iv by k
455-
p_iv = self._p_iv # number of IVs
456-
k = (
457-
self._model_1st_stage._k
458-
) # number of estimated coefficients of 1st stage
459-
460449
# Extract all the IV indexes and its first index
461450
self._iv_loc = [
462451
self._coefnames_z.index(x)
@@ -470,8 +459,8 @@ def IV_weakness_test(self, iv_diag_statistics: Optional[list[str]] = None) -> No
470459
# H1 : H0 does not hold
471460

472461
# Pad identity matrix to implement wald-test
473-
R = np.zeros((p_iv, k))
474-
R[:, self._iv_loc] = np.eye(p_iv)
462+
R = np.zeros((self._p_iv, self._model_1st_stage._k))
463+
R[:, self._iv_loc] = np.eye(self._p_iv)
475464

476465
with warnings.catch_warnings():
477466
warnings.simplefilter("ignore")

pyfixest/estimation/feols_compressed_.py

Lines changed: 20 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -262,53 +262,41 @@ def vcov(
262262
super().vcov(vcov=vcov, vcov_kwargs=vcov_kwargs, data=data)
263263

264264
def _vcov_iid(self):
265-
_N = self._N
266-
_bread = self._bread
267-
268265
weights = self._compression_count.to_numpy()
269-
Yprime = self._Yprime.to_numpy()
270-
Yprimeprime = self._Yprimeprime.to_numpy()
266+
yprime = self._Yprime.to_numpy()
267+
yprimeprime = self._Yprimeprime.to_numpy()
271268
X = self._X / np.sqrt(weights)
272-
beta_hat = self._beta_hat
273-
yhat = (X @ beta_hat).reshape(-1, 1)
274-
rss_g = (yhat**2) * weights - 2 * yhat * Yprime + Yprimeprime
275-
sigma2 = np.sum(rss_g) / (_N - 1)
276-
277-
_vcov = _bread * sigma2
269+
yhat = (X @ self._beta_hat).reshape(-1, 1)
270+
rss_g = (yhat**2) * weights - 2 * yhat * yprime + yprimeprime
271+
sigma2 = np.sum(rss_g) / (self._N - 1)
278272

279-
return _vcov
273+
return self._bread * sigma2
280274

281275
def _vcov_hetero(self):
282-
_vcov_type_detail = self._vcov_type_detail
283-
_bread = self._bread
284-
285-
if _vcov_type_detail in ["HC2", "HC3"]:
276+
if self._vcov_type_detail in ["HC2", "HC3"]:
286277
raise NotImplementedError(
287-
f"Only HC1 robust inference is supported, but {_vcov_type_detail} was specified."
278+
f"Only HC1 robust inference is supported, but {self._vcov_type_detail} was specified."
288279
)
289280

290281
yprime = self._Yprime.to_numpy()
291282
yprimeprime = self._Yprimeprime.to_numpy()
292283
weights = self._compression_count.to_numpy()
293284
X = self._X / np.sqrt(weights)
294-
beta_hat = self._beta_hat
295-
yhat = (X @ beta_hat).reshape(-1, 1)
285+
yhat = (X @ self._beta_hat).reshape(-1, 1)
296286
rss_g = (yhat**2) * weights - 2 * yhat * yprime + yprimeprime
297287

298288
_meat = (X * rss_g).T @ X
299289

300-
return _bread @ _meat @ _bread
290+
return self._bread @ _meat @ self._bread
301291

302292
def _vcov_crv1(self, clustid: np.ndarray, cluster_col: np.ndarray):
303-
_data_long_nw = self._data_long
304-
305-
X_long = _data_long_nw.select(self._coefnames).to_numpy()
306-
Y_long = _data_long_nw.select(self._depvar).to_numpy()
293+
X_long = self._data_long.select(self._coefnames).to_numpy()
294+
Y_long = self._data_long.select(self._depvar).to_numpy()
307295

308296
yhat = X_long @ self._beta_hat
309297
uhat = Y_long.flatten() - yhat
310298

311-
_data_long_nw = _data_long_nw.with_columns(
299+
data_long = self._data_long.with_columns(
312300
[
313301
nw.lit(yhat.tolist()).alias("yhat"),
314302
nw.lit(uhat.tolist()).alias("uhat"),
@@ -317,27 +305,25 @@ def _vcov_crv1(self, clustid: np.ndarray, cluster_col: np.ndarray):
317305
]
318306
)
319307

320-
boot_iter = self._reps
321308
rng = np.random.default_rng(self._seed)
322309

323-
assert boot_iter is not None, "boot_iter must not be None"
310+
assert self._reps is not None, "boot_iter must not be None"
324311
assert self._k is not None, "self._k must not be None"
325-
beta_boot = np.zeros((boot_iter, self._k))
312+
beta_boot = np.zeros((self._reps, self._k))
326313

327-
clustervar = self._clustervar
328-
cluster = _data_long_nw[clustervar]
314+
cluster = data_long[self._clustervar]
329315
cluster_ids = np.sort(np.unique(cluster).astype(np.int32))
330-
_data_long_nw = _data_long_nw.with_columns(nw.col(clustervar[0]).cast(nw.Int32))
316+
data_long = data_long.with_columns(nw.col(self._clustervar[0]).cast(nw.Int32))
331317

332-
for b in tqdm(range(boot_iter)):
318+
for b in tqdm(range(self._reps)):
333319
boot_df = nw.from_native(
334320
{
335321
"coin_flip": rng.integers(0, 2, size=len(cluster_ids)),
336-
f"{clustervar[0]}": cluster_ids,
322+
f"{self._clustervar[0]}": cluster_ids,
337323
}
338324
)
339325

340-
df_boot = _data_long_nw.join(boot_df, on=f"{clustervar[0]}", how="left")
326+
df_boot = data_long.join(boot_df, on=f"{self._clustervar[0]}", how="left")
341327
df_boot = df_boot.with_columns(
342328
[
343329
nw.when(nw.col("coin_flip") == 1)

0 commit comments

Comments
 (0)