Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,9 @@ pip install -e ".[dev]"

Imported as `import scEcho`:

- `Echo_states` — per-modality density estimation and cross-modality density
- `echo_states` — per-modality density estimation and cross-modality density
comparison; writes per-cell direction labels into `.obs`.
- `Echo_features` — feature-level desynchronization pipeline (imputation,
- `echo_features` — feature-level desynchronization pipeline (imputation,
per-feature statistics, null-model significance testing).
- `plotting` — visualization (volcano plots, linked side-by-side embeddings,
per-group direction fractions).
Expand All @@ -68,8 +68,8 @@ Imported as `import scEcho`:
## Usage

See [`notebooks/example.ipynb`](notebooks/example.ipynb) for the canonical end-to-end pipeline. The basic
shape is `scEcho.Echo_states.dn_comp_obsm(adata, ...)` followed by
`scEcho.Echo_features.run_echo_features(adata, ...)`; see each function's
shape is `scEcho.echo_states.dn_comp_obsm(adata, ...)` followed by
`scEcho.echo_features.run_echo_features(adata, ...)`; see each function's
docstring for the required `.obsm` / `.obs` / `.layers` keys.

## License
Expand Down
6 changes: 3 additions & 3 deletions notebooks/example.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@
}
],
"source": [
"scEcho.Echo_states.dn_comp_obsm(retina_ad,\n",
"scEcho.echo_states.dn_comp_obsm(retina_ad,\n",
" obsm_key1='DM_EigenVectors_RNA',\n",
" obsm_key2='DM_EigenVectors_ATAC', \n",
" pval_threshold=0.05,\n",
Expand Down Expand Up @@ -707,7 +707,7 @@
}
],
"source": [
"scEcho.Echo_features.run_echo_features(\n",
"scEcho.echo_features.run_echo_features(\n",
" retina_ad,\n",
" obs_col=\"combo_type\",\n",
" layers=[\"RNA_lognorm_counts\"],\n",
Expand Down Expand Up @@ -1331,7 +1331,7 @@
}
],
"source": [
"MPC_res = scEcho.Echo_features.get_reconstruction_results(retina_ad,\n",
"MPC_res = scEcho.echo_features.get_reconstruction_results(retina_ad,\n",
" \"RNA_lognorm_counts\",\n",
" grouping = \"combo_type\",\n",
" group = \"MPC\",\n",
Expand Down
6 changes: 3 additions & 3 deletions src/scEcho/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@

__author__ = "Connor Finkbeiner"

from . import Echo_features, Echo_states, plotting, utils
from . import echo_features, echo_states, plotting, utils

__all__ = [
"Echo_states",
"Echo_features",
"echo_states",
"echo_features",
"plotting",
"utils",
"__version__",
Expand Down
139 changes: 73 additions & 66 deletions src/scEcho/Echo_features.py → src/scEcho/echo_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,55 @@ def embeddings_predict_layer(






def _compute_group_mhd_and_stats(
ad,
ind,
layer,
layer_for_lfc,
embedding1,
embedding2,
diagonal_variance,
):
"""Per-group Mahalanobis distance with graceful fallback when covariance keys are absent.

Pulled out of ``get_desynch_stats`` and ``run_null_desynch_test`` to remove
~80 LOC of duplication and the silent-divergence risk between the two
near-identical blocks.

``layer`` is the namespace of the predicted-space uncertainty keys in
``ad.obsp`` (e.g. ``layer`` for the observed pass, ``null_layer`` for the
null pass). ``layer_for_lfc`` is the namespace of the precomputed LFC layer
in ``ad.layers`` (always the *observed* layer in current call sites, since
the null path reuses the observed LFC values against the null covariance).

Returns either a ``float64`` ``(n_group,)`` array of Mahalanobis distances,
or ``np.nan`` (with a ``UserWarning``) when the predicted-covariance keys
for either embedding are not present in ``ad.obsp``.
"""
unc_key1 = f"predicted_{layer}_{embedding1}_space_uncertainty"
unc_key2 = f"predicted_{layer}_{embedding2}_space_uncertainty"
if (unc_key1 in ad.obsp) and (unc_key2 in ad.obsp):
ix = np.ix_(ind.values, ind.values)
unc1 = ad.obsp[unc_key1][ix]
unc2 = ad.obsp[unc_key2][ix]
# Kompot handles Cholesky stabilization internally (eps=1e-8 default).
return compute_mahalanobis_distances(
diff_values=ad[ind].layers[f"predicted_{layer_for_lfc}_LFC_{embedding1}_v_{embedding2}"].T,
covariance=unc1 + unc2,
diagonal_variance=diagonal_variance,
)
missing_unc = [k for k in [unc_key1, unc_key2] if k not in ad.obsp]
warnings.warn(
f"Posterior covariance keys not found in ad.obsp — Mahalanobis "
f"distance skipped for this group. To enable, rerun "
f"embeddings_predict_layer with save_covariance=True.\n"
f"\tMissing: {missing_unc}"
)
return np.nan


def get_desynch_stats(
ad: anndata.AnnData,
obs_col: str,
Expand Down Expand Up @@ -310,37 +358,14 @@ def get_desynch_stats(
)
diagonal_variance = None

# Model uncertainty — guarded against missing keys (when
# save_covariance=False was passed to embeddings_predict_layer) and
# indexed via the underlying obsp ndarray rather than a boolean-
# masked AnnData view (avoids ImplicitModificationWarning on
# AnnData ≥0.8).
unc_key1 = f"predicted_{layer}_{embedding1}_space_uncertainty"
unc_key2 = f"predicted_{layer}_{embedding2}_space_uncertainty"

if (unc_key1 in ad.obsp) and (unc_key2 in ad.obsp):
ix = np.ix_(ind.values, ind.values)
unc1 = ad.obsp[unc_key1][ix]
unc2 = ad.obsp[unc_key2][ix]

# ── Mahalanobis distance ───────────────────────────────────────────
# Kompot handles Cholesky stabilization internally (eps=1e-8 default).
res[f"MHD_{obs_col}_{c}_{modality1}_vs_{modality2}"] = compute_mahalanobis_distances(
diff_values=ad[ind].layers[f"predicted_{layer}_LFC_{embedding1}_v_{embedding2}"].T,
covariance=unc1 + unc2,
diagonal_variance=diagonal_variance,
)
else:
missing_unc = [k for k in [unc_key1, unc_key2] if k not in ad.obsp]
warnings.warn(
f"Posterior covariance keys not found in ad.obsp — Mahalanobis "
f"distance skipped for group '{c}'. To enable, rerun "
f"embeddings_predict_layer with save_covariance=True.\n"
f"\tMissing: {missing_unc}"
)
res[f"MHD_{obs_col}_{c}_{modality1}_vs_{modality2}"] = np.nan


# Model uncertainty + Mahalanobis distance (guarded against missing
# obsp keys when save_covariance=False was passed to
# embeddings_predict_layer).
res[f"MHD_{obs_col}_{c}_{modality1}_vs_{modality2}"] = _compute_group_mhd_and_stats(
ad, ind, layer, layer, embedding1, embedding2, diagonal_variance,
)



# ── Additional per-group layer statistics ──────────────────────────────

Expand Down Expand Up @@ -464,6 +489,7 @@ def run_null_desynch_test(
eps: float = 1e-16,
save_predictions: bool = True,
save_covariance: bool = True,
direction_colors: Sequence[str] = ("#ff7f0e", "#1f77b4", "lightgrey"),
) -> None:
"""Run a null model test for desynchronization statistics.

Expand All @@ -488,6 +514,13 @@ def run_null_desynch_test(
Two-sided p-value threshold for significance.
random_state : int
Random seed for null layer shuffling.
direction_colors : sequence of str, optional
Three colors written to
``ad.uns[f"desynch_direction_{layer}_{modality1}_v_{modality2}_colors"]``,
in the order ``({modality2}-structure, {modality1}-structure,
not-significant)`` to match the ordered ``CategoricalDtype`` of the
per-feature direction column. Defaults to
``("#ff7f0e", "#1f77b4", "lightgrey")``.

Returns
-------
Expand Down Expand Up @@ -683,45 +716,19 @@ def run_null_desynch_test(
)
diagonal_variance = None

# Model uncertainty — guarded against missing keys (when
# save_covariance=False was passed to embeddings_predict_layer) and
# indexed via the underlying obsp ndarray rather than a boolean-
# masked AnnData view (avoids ImplicitModificationWarning on
# AnnData ≥0.8).
unc_key1 = f"predicted_{null_layer}_{embedding1}_space_uncertainty"
unc_key2 = f"predicted_{null_layer}_{embedding2}_space_uncertainty"

diff = ad[ind].layers[f"predicted_{null_layer}_{embedding1}_space_residuals"] - ad[ind].layers[f"predicted_{null_layer}_{embedding2}_space_residuals"]

if (unc_key1 in ad.obsp) and (unc_key2 in ad.obsp):
ix = np.ix_(ind.values, ind.values)
unc1 = ad.obsp[unc_key1][ix]
unc2 = ad.obsp[unc_key2][ix]

# ── Mahalanobis distance ───────────────────────────────────────────
# Kompot handles Cholesky stabilization internally (eps=1e-8 default).
res[f"MHD_null_{obs_col}_{c}_{modality1}_vs_{modality2}"] = compute_mahalanobis_distances(
diff_values=ad[ind].layers[f"predicted_{layer}_LFC_{embedding1}_v_{embedding2}"].T,
covariance=unc1 + unc2,
diagonal_variance=diagonal_variance,
)
else:
missing_unc = [k for k in [unc_key1, unc_key2] if k not in ad.obsp]
warnings.warn(
f"Posterior covariance keys not found in ad.obsp — null "
f"Mahalanobis distance skipped for group '{c}'. To enable, "
f"rerun embeddings_predict_layer with save_covariance=True.\n"
f"\tMissing: {missing_unc}"
)
res[f"MHD_null_{obs_col}_{c}_{modality1}_vs_{modality2}"] = np.nan
# Model uncertainty + Mahalanobis distance against the null covariance.
# Uncertainty keys live in the null namespace; the LFC layer is the
# observed one (the null pass reuses observed LFC values against the
# null covariance).
res[f"MHD_null_{obs_col}_{c}_{modality1}_vs_{modality2}"] = _compute_group_mhd_and_stats(
ad, ind, null_layer, layer, embedding1, embedding2, diagonal_variance,
)



# ── Store direction colors in uns ─────────────────────────────────────────

ad.uns[f"desynch_direction_{layer}_{modality1}_v_{modality2}_colors"] = [
"#ff7f0e", "#1f77b4", "lightgrey"
]
ad.uns[f"desynch_direction_{layer}_{modality1}_v_{modality2}_colors"] = list(direction_colors)



Expand Down
20 changes: 17 additions & 3 deletions src/scEcho/Echo_states.py → src/scEcho/echo_states.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import logging
import warnings
from typing import Optional
from typing import Optional, Sequence

import anndata
import kompot
Expand Down Expand Up @@ -31,6 +31,8 @@ def dn_comp_obsm(
optimizer: Optional[str] = None,
sample_grouping_col: Optional[str] = None,
sv_min_cells: int = 200,
sqrt_eps: float = 1e-16,
direction_colors: Sequence[str] = ("#ff7f0e", "#1f77b4", "lightgrey"),
) -> None:
"""Compare density between two embeddings in separate spaces.

Expand Down Expand Up @@ -65,6 +67,18 @@ def dn_comp_obsm(
datasets).
sample_grouping_col : str, optional
Column for sample groupings. If specified, includes sample variance.
sqrt_eps : float, optional
Small constant added inside ``np.sqrt(variance_model + sqrt_eps)`` when
computing the standard deviation used for z-scoring the density LFC.
Defaults to ``1e-16``; empirically the variance term is well above
zero on tested pipelines (min observed: 0.0225), so the default has
negligible numerical effect — exposed for users who want to disable
the floor (``sqrt_eps=0``) or raise it.
direction_colors : sequence of str, optional
Three colors written to ``ad.uns[f"{direction_key}_colors"]``, in the
order ``(modality2-higher, modality1-higher, neutral)`` to match the
ordered ``CategoricalDtype`` of the direction column. Defaults to
``("#ff7f0e", "#1f77b4", "lightgrey")``.

Returns
-------
Expand Down Expand Up @@ -173,7 +187,7 @@ def dn_comp_obsm(
)


ad.obs[sd_key] = np.sqrt(variance_model + 1e-16)
ad.obs[sd_key] = np.sqrt(variance_model + sqrt_eps)


# ── Compute Z-scores and p-values ─────────────────────────────────────────
Expand Down Expand Up @@ -212,4 +226,4 @@ def dn_comp_obsm(
ordered=True,
)
ad.obs[direction_key] = ad.obs[direction_key].astype(cat_type)
ad.uns[f"{direction_key}_colors"] = ["#ff7f0e", "#1f77b4", "lightgrey"]
ad.uns[f"{direction_key}_colors"] = list(direction_colors)
11 changes: 9 additions & 2 deletions src/scEcho/plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,14 @@
]


# Defaults forwarded to ``adjustText.adjust_text`` from ``plot_scores``. Lifted
# out of the call site so callers can override any key (e.g. arrowprops style,
# additional repulsion knobs) via the ``**adjust_text_kwargs`` catch-all on
# ``plot_scores`` — keys passed by the caller override these defaults.
_DEFAULT_ADJUST_TEXT_KWARGS = {
"arrowprops": {"arrowstyle": "-", "color": "black", "lw": 0.5},
}


def plot_scores(
ad: anndata.AnnData,
Expand Down Expand Up @@ -213,13 +221,12 @@ def plot_scores(
x=plot_df[var_exp_col].values,
y=plot_df[MHD_col].values,
ax=ax,
arrowprops=dict(arrowstyle="-", color="black", lw=0.5),
expand=expand,
force_text=force_text,
force_points=force_points,
max_move=max_move,
iter_lim=iter_lim,
**adjust_text_kwargs,
**{**_DEFAULT_ADJUST_TEXT_KWARGS, **adjust_text_kwargs},
)

return ax
Expand Down
2 changes: 1 addition & 1 deletion src/scEcho/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from scipy.stats import spearmanr
from tqdm.auto import tqdm

from .Echo_features import embeddings_predict_layer
from .echo_features import embeddings_predict_layer

__all__ = [
# existing utils
Expand Down
4 changes: 2 additions & 2 deletions tests/test_determinism.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ def test_dn_comp_obsm_is_deterministic():
a1 = _build_adata(seed=0)
a2 = _build_adata(seed=0)

scEcho.Echo_states.dn_comp_obsm(a1, ls_factor=2, log_fold_change_threshold=0.5)
scEcho.Echo_states.dn_comp_obsm(a2, ls_factor=2, log_fold_change_threshold=0.5)
scEcho.echo_states.dn_comp_obsm(a1, ls_factor=2, log_fold_change_threshold=0.5)
scEcho.echo_states.dn_comp_obsm(a2, ls_factor=2, log_fold_change_threshold=0.5)

numeric_cols = [
"log_density_RNA",
Expand Down
Loading