settylab · katosh · May 19, 2026 · May 19, 2026 · May 19, 2026 · May 19, 2026
diff --git a/README.md b/README.md
@@ -56,9 +56,9 @@ pip install -e ".[dev]"
 
 Imported as `import scEcho`:
 
-- `Echo_states` — per-modality density estimation and cross-modality density
+- `echo_states` — per-modality density estimation and cross-modality density
   comparison; writes per-cell direction labels into `.obs`.
-- `Echo_features` — feature-level desynchronization pipeline (imputation,
+- `echo_features` — feature-level desynchronization pipeline (imputation,
   per-feature statistics, null-model significance testing).
 - `plotting` — visualization (volcano plots, linked side-by-side embeddings,
   per-group direction fractions).
@@ -68,8 +68,8 @@ Imported as `import scEcho`:
 ## Usage
 
 See [`notebooks/example.ipynb`](notebooks/example.ipynb) for the canonical end-to-end pipeline. The basic
-shape is `scEcho.Echo_states.dn_comp_obsm(adata, ...)` followed by
-`scEcho.Echo_features.run_echo_features(adata, ...)`; see each function's
+shape is `scEcho.echo_states.dn_comp_obsm(adata, ...)` followed by
+`scEcho.echo_features.run_echo_features(adata, ...)`; see each function's
 docstring for the required `.obsm` / `.obs` / `.layers` keys.
 
 ## License

diff --git a/notebooks/example.ipynb b/notebooks/example.ipynb
@@ -208,7 +208,7 @@
     }
    ],
    "source": [
-    "scEcho.Echo_states.dn_comp_obsm(retina_ad,\n",
+    "scEcho.echo_states.dn_comp_obsm(retina_ad,\n",
     "    obsm_key1='DM_EigenVectors_RNA',\n",
     "    obsm_key2='DM_EigenVectors_ATAC',                     \n",
     "    pval_threshold=0.05,\n",
@@ -707,7 +707,7 @@
     }
    ],
    "source": [
-    "scEcho.Echo_features.run_echo_features(\n",
+    "scEcho.echo_features.run_echo_features(\n",
     "    retina_ad,\n",
     "    obs_col=\"combo_type\",\n",
     "    layers=[\"RNA_lognorm_counts\"],\n",
@@ -1331,7 +1331,7 @@
     }
    ],
    "source": [
-    "MPC_res = scEcho.Echo_features.get_reconstruction_results(retina_ad,\n",
+    "MPC_res = scEcho.echo_features.get_reconstruction_results(retina_ad,\n",
     "                                         \"RNA_lognorm_counts\",\n",
     "                                         grouping = \"combo_type\",\n",
     "                                         group = \"MPC\",\n",

diff --git a/src/scEcho/__init__.py b/src/scEcho/__init__.py
@@ -7,11 +7,11 @@
 
 __author__ = "Connor Finkbeiner"
 
-from . import Echo_features, Echo_states, plotting, utils
+from . import echo_features, echo_states, plotting, utils
 
 __all__ = [
-    "Echo_states",
-    "Echo_features",
+    "echo_states",
+    "echo_features",
     "plotting",
     "utils",
     "__version__",

diff --git a/src/scEcho/Echo_features.py → src/scEcho/echo_features.py b/src/scEcho/Echo_features.py → src/scEcho/echo_features.py
@@ -180,7 +180,55 @@ def embeddings_predict_layer(
 
 
 
-
+
+
+def _compute_group_mhd_and_stats(
+    ad,
+    ind,
+    layer,
+    layer_for_lfc,
+    embedding1,
+    embedding2,
+    diagonal_variance,
+):
+    """Per-group Mahalanobis distance with graceful fallback when covariance keys are absent.
+
+    Pulled out of ``get_desynch_stats`` and ``run_null_desynch_test`` to remove
+    ~80 LOC of duplication and the silent-divergence risk between the two
+    near-identical blocks.
+
+    ``layer`` is the namespace of the predicted-space uncertainty keys in
+    ``ad.obsp`` (e.g. ``layer`` for the observed pass, ``null_layer`` for the
+    null pass). ``layer_for_lfc`` is the namespace of the precomputed LFC layer
+    in ``ad.layers`` (always the *observed* layer in current call sites, since
+    the null path reuses the observed LFC values against the null covariance).
+
+    Returns either a ``float64`` ``(n_group,)`` array of Mahalanobis distances,
+    or ``np.nan`` (with a ``UserWarning``) when the predicted-covariance keys
+    for either embedding are not present in ``ad.obsp``.
+    """
+    unc_key1 = f"predicted_{layer}_{embedding1}_space_uncertainty"
+    unc_key2 = f"predicted_{layer}_{embedding2}_space_uncertainty"
+    if (unc_key1 in ad.obsp) and (unc_key2 in ad.obsp):
+        ix = np.ix_(ind.values, ind.values)
+        unc1 = ad.obsp[unc_key1][ix]
+        unc2 = ad.obsp[unc_key2][ix]
+        # Kompot handles Cholesky stabilization internally (eps=1e-8 default).
+        return compute_mahalanobis_distances(
+            diff_values=ad[ind].layers[f"predicted_{layer_for_lfc}_LFC_{embedding1}_v_{embedding2}"].T,
+            covariance=unc1 + unc2,
+            diagonal_variance=diagonal_variance,
+        )
+    missing_unc = [k for k in [unc_key1, unc_key2] if k not in ad.obsp]
+    warnings.warn(
+        f"Posterior covariance keys not found in ad.obsp — Mahalanobis "
+        f"distance skipped for this group. To enable, rerun "
+        f"embeddings_predict_layer with save_covariance=True.\n"
+        f"\tMissing: {missing_unc}"
+    )
+    return np.nan
+
+
 def get_desynch_stats(
     ad: anndata.AnnData,
     obs_col: str,
@@ -310,37 +358,14 @@ def get_desynch_stats(
             )
             diagonal_variance = None
 
-        # Model uncertainty — guarded against missing keys (when
-        # save_covariance=False was passed to embeddings_predict_layer) and
-        # indexed via the underlying obsp ndarray rather than a boolean-
-        # masked AnnData view (avoids ImplicitModificationWarning on
-        # AnnData ≥0.8).
-        unc_key1 = f"predicted_{layer}_{embedding1}_space_uncertainty"
-        unc_key2 = f"predicted_{layer}_{embedding2}_space_uncertainty"
-
-        if (unc_key1 in ad.obsp) and (unc_key2 in ad.obsp):
-            ix = np.ix_(ind.values, ind.values)
-            unc1 = ad.obsp[unc_key1][ix]
-            unc2 = ad.obsp[unc_key2][ix]
-
-            # ── Mahalanobis distance ───────────────────────────────────────────
-            # Kompot handles Cholesky stabilization internally (eps=1e-8 default).
-            res[f"MHD_{obs_col}_{c}_{modality1}_vs_{modality2}"] = compute_mahalanobis_distances(
-                diff_values=ad[ind].layers[f"predicted_{layer}_LFC_{embedding1}_v_{embedding2}"].T,
-                covariance=unc1 + unc2,
-                diagonal_variance=diagonal_variance,
-            )
-        else:
-            missing_unc = [k for k in [unc_key1, unc_key2] if k not in ad.obsp]
-            warnings.warn(
-                f"Posterior covariance keys not found in ad.obsp — Mahalanobis "
-                f"distance skipped for group '{c}'. To enable, rerun "
-                f"embeddings_predict_layer with save_covariance=True.\n"
-                f"\tMissing: {missing_unc}"
-            )
-            res[f"MHD_{obs_col}_{c}_{modality1}_vs_{modality2}"] = np.nan
-
-
+        # Model uncertainty + Mahalanobis distance (guarded against missing
+        # obsp keys when save_covariance=False was passed to
+        # embeddings_predict_layer).
+        res[f"MHD_{obs_col}_{c}_{modality1}_vs_{modality2}"] = _compute_group_mhd_and_stats(
+            ad, ind, layer, layer, embedding1, embedding2, diagonal_variance,
+        )
+
+
 
         # ── Additional per-group layer statistics ──────────────────────────────
 
@@ -464,6 +489,7 @@ def run_null_desynch_test(
     eps: float = 1e-16,
     save_predictions: bool = True,
     save_covariance: bool = True,
+    direction_colors: Sequence[str] = ("#ff7f0e", "#1f77b4", "lightgrey"),
 ) -> None:
     """Run a null model test for desynchronization statistics.
 
@@ -488,6 +514,13 @@ def run_null_desynch_test(
         Two-sided p-value threshold for significance.
     random_state : int
         Random seed for null layer shuffling.
+    direction_colors : sequence of str, optional
+        Three colors written to
+        ``ad.uns[f"desynch_direction_{layer}_{modality1}_v_{modality2}_colors"]``,
+        in the order ``({modality2}-structure, {modality1}-structure,
+        not-significant)`` to match the ordered ``CategoricalDtype`` of the
+        per-feature direction column. Defaults to
+        ``("#ff7f0e", "#1f77b4", "lightgrey")``.
 
     Returns
     -------
@@ -683,45 +716,19 @@ def run_null_desynch_test(
             )
             diagonal_variance = None
 
-        # Model uncertainty — guarded against missing keys (when
-        # save_covariance=False was passed to embeddings_predict_layer) and
-        # indexed via the underlying obsp ndarray rather than a boolean-
-        # masked AnnData view (avoids ImplicitModificationWarning on
-        # AnnData ≥0.8).
-        unc_key1 = f"predicted_{null_layer}_{embedding1}_space_uncertainty"
-        unc_key2 = f"predicted_{null_layer}_{embedding2}_space_uncertainty"
-
-        diff = ad[ind].layers[f"predicted_{null_layer}_{embedding1}_space_residuals"] - ad[ind].layers[f"predicted_{null_layer}_{embedding2}_space_residuals"]
-
-        if (unc_key1 in ad.obsp) and (unc_key2 in ad.obsp):
-            ix = np.ix_(ind.values, ind.values)
-            unc1 = ad.obsp[unc_key1][ix]
-            unc2 = ad.obsp[unc_key2][ix]
-
-            # ── Mahalanobis distance ───────────────────────────────────────────
-            # Kompot handles Cholesky stabilization internally (eps=1e-8 default).
-            res[f"MHD_null_{obs_col}_{c}_{modality1}_vs_{modality2}"] = compute_mahalanobis_distances(
-                diff_values=ad[ind].layers[f"predicted_{layer}_LFC_{embedding1}_v_{embedding2}"].T,
-                covariance=unc1 + unc2,
-                diagonal_variance=diagonal_variance,
-            )
-        else:
-            missing_unc = [k for k in [unc_key1, unc_key2] if k not in ad.obsp]
-            warnings.warn(
-                f"Posterior covariance keys not found in ad.obsp — null "
-                f"Mahalanobis distance skipped for group '{c}'. To enable, "
-                f"rerun embeddings_predict_layer with save_covariance=True.\n"
-                f"\tMissing: {missing_unc}"
-            )
-            res[f"MHD_null_{obs_col}_{c}_{modality1}_vs_{modality2}"] = np.nan
+        # Model uncertainty + Mahalanobis distance against the null covariance.
+        # Uncertainty keys live in the null namespace; the LFC layer is the
+        # observed one (the null pass reuses observed LFC values against the
+        # null covariance).
+        res[f"MHD_null_{obs_col}_{c}_{modality1}_vs_{modality2}"] = _compute_group_mhd_and_stats(
+            ad, ind, null_layer, layer, embedding1, embedding2, diagonal_variance,
+        )
 
 
 
     # ── Store direction colors in uns ─────────────────────────────────────────
 
-    ad.uns[f"desynch_direction_{layer}_{modality1}_v_{modality2}_colors"] = [
-        "#ff7f0e", "#1f77b4", "lightgrey"
-    ]
+    ad.uns[f"desynch_direction_{layer}_{modality1}_v_{modality2}_colors"] = list(direction_colors)
 
 
 

diff --git a/src/scEcho/Echo_states.py → src/scEcho/echo_states.py b/src/scEcho/Echo_states.py → src/scEcho/echo_states.py
@@ -2,7 +2,7 @@
 
 import logging
 import warnings
-from typing import Optional
+from typing import Optional, Sequence
 
 import anndata
 import kompot
@@ -31,6 +31,8 @@ def dn_comp_obsm(
     optimizer: Optional[str] = None,
     sample_grouping_col: Optional[str] = None,
     sv_min_cells: int = 200,
+    sqrt_eps: float = 1e-16,
+    direction_colors: Sequence[str] = ("#ff7f0e", "#1f77b4", "lightgrey"),
 ) -> None:
     """Compare density between two embeddings in separate spaces.
 
@@ -65,6 +67,18 @@ def dn_comp_obsm(
         datasets).
     sample_grouping_col : str, optional
         Column for sample groupings. If specified, includes sample variance.
+    sqrt_eps : float, optional
+        Small constant added inside ``np.sqrt(variance_model + sqrt_eps)`` when
+        computing the standard deviation used for z-scoring the density LFC.
+        Defaults to ``1e-16``; empirically the variance term is well above
+        zero on tested pipelines (min observed: 0.0225), so the default has
+        negligible numerical effect — exposed for users who want to disable
+        the floor (``sqrt_eps=0``) or raise it.
+    direction_colors : sequence of str, optional
+        Three colors written to ``ad.uns[f"{direction_key}_colors"]``, in the
+        order ``(modality2-higher, modality1-higher, neutral)`` to match the
+        ordered ``CategoricalDtype`` of the direction column. Defaults to
+        ``("#ff7f0e", "#1f77b4", "lightgrey")``.
 
     Returns
     -------
@@ -173,7 +187,7 @@ def dn_comp_obsm(
         )
 
 
-    ad.obs[sd_key] = np.sqrt(variance_model + 1e-16)
+    ad.obs[sd_key] = np.sqrt(variance_model + sqrt_eps)
 
 
     # ── Compute Z-scores and p-values ─────────────────────────────────────────
@@ -212,4 +226,4 @@ def dn_comp_obsm(
         ordered=True,
     )
     ad.obs[direction_key] = ad.obs[direction_key].astype(cat_type)
-    ad.uns[f"{direction_key}_colors"] = ["#ff7f0e", "#1f77b4", "lightgrey"]
+    ad.uns[f"{direction_key}_colors"] = list(direction_colors)
diff --git a/src/scEcho/plotting.py b/src/scEcho/plotting.py
@@ -27,6 +27,14 @@
 ]
 
 
+# Defaults forwarded to ``adjustText.adjust_text`` from ``plot_scores``. Lifted
+# out of the call site so callers can override any key (e.g. arrowprops style,
+# additional repulsion knobs) via the ``**adjust_text_kwargs`` catch-all on
+# ``plot_scores`` — keys passed by the caller override these defaults.
+_DEFAULT_ADJUST_TEXT_KWARGS = {
+    "arrowprops": {"arrowstyle": "-", "color": "black", "lw": 0.5},
+}
+
 
 def plot_scores(
     ad: anndata.AnnData,
@@ -213,13 +221,12 @@ def plot_scores(
                 x=plot_df[var_exp_col].values,
                 y=plot_df[MHD_col].values,
                 ax=ax,
-                arrowprops=dict(arrowstyle="-", color="black", lw=0.5),
                 expand=expand,
                 force_text=force_text,
                 force_points=force_points,
                 max_move=max_move,
                 iter_lim=iter_lim,
-                **adjust_text_kwargs,
+                **{**_DEFAULT_ADJUST_TEXT_KWARGS, **adjust_text_kwargs},
             )
 
         return ax

diff --git a/src/scEcho/utils.py b/src/scEcho/utils.py
@@ -20,7 +20,7 @@
 from scipy.stats import spearmanr
 from tqdm.auto import tqdm
 
-from .Echo_features import embeddings_predict_layer
+from .echo_features import embeddings_predict_layer
 
 __all__ = [
     # existing utils

diff --git a/tests/test_determinism.py b/tests/test_determinism.py
@@ -27,8 +27,8 @@ def test_dn_comp_obsm_is_deterministic():
     a1 = _build_adata(seed=0)
     a2 = _build_adata(seed=0)
 
-    scEcho.Echo_states.dn_comp_obsm(a1, ls_factor=2, log_fold_change_threshold=0.5)
-    scEcho.Echo_states.dn_comp_obsm(a2, ls_factor=2, log_fold_change_threshold=0.5)
+    scEcho.echo_states.dn_comp_obsm(a1, ls_factor=2, log_fold_change_threshold=0.5)
+    scEcho.echo_states.dn_comp_obsm(a2, ls_factor=2, log_fold_change_threshold=0.5)
 
     numeric_cols = [
         "log_density_RNA",