From bbef2b7e480e4bfc7f7bd7a0cc593a3146d60e48 Mon Sep 17 00:00:00 2001 From: Dominik Date: Mon, 18 May 2026 20:42:31 -0700 Subject: [PATCH 1/5] feat(Echo_states): expose sqrt_eps kwarg on dn_comp_obsm (default 1e-16) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ``+1e-16`` inside ``np.sqrt(variance_model + 1e-16)`` was a hardcoded floor; lift it to a public kwarg so callers can override (raise it for extra-noisy variance, set it to 0 to disable). Default unchanged at 1e-16 — empirical floor (min observed 0.0225 across the test suite) makes the default a no-op, but the guard stays for robustness per operator decision on PR #7. --- src/scEcho/Echo_states.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/scEcho/Echo_states.py b/src/scEcho/Echo_states.py index 049595d..6c2d549 100755 --- a/src/scEcho/Echo_states.py +++ b/src/scEcho/Echo_states.py @@ -31,6 +31,7 @@ def dn_comp_obsm( optimizer: Optional[str] = None, sample_grouping_col: Optional[str] = None, sv_min_cells: int = 200, + sqrt_eps: float = 1e-16, ) -> None: """Compare density between two embeddings in separate spaces. @@ -65,6 +66,13 @@ def dn_comp_obsm( datasets). sample_grouping_col : str, optional Column for sample groupings. If specified, includes sample variance. + sqrt_eps : float, optional + Small constant added inside ``np.sqrt(variance_model + sqrt_eps)`` when + computing the standard deviation used for z-scoring the density LFC. + Defaults to ``1e-16``; empirically the variance term is well above + zero on tested pipelines (min observed: 0.0225), so the default has + negligible numerical effect — exposed for users who want to disable + the floor (``sqrt_eps=0``) or raise it. Returns ------- @@ -173,7 +181,7 @@ def dn_comp_obsm( ) - ad.obs[sd_key] = np.sqrt(variance_model + 1e-16) + ad.obs[sd_key] = np.sqrt(variance_model + sqrt_eps) # ── Compute Z-scores and p-values ───────────────────────────────────────── From b8546296ad22e1b134fbd7da8134c7b5d90d9c77 Mon Sep 17 00:00:00 2001 From: Dominik Date: Mon, 18 May 2026 19:54:31 -0700 Subject: [PATCH 2/5] refactor(Echo_features): extract _compute_group_mhd_and_stats from get_desynch_stats and run_null_desynch_test Removes ~50 LOC of near-identical Mahalanobis-distance + obsp-key-fallback code between the two functions, plus the dead 'diff' assignment in the null path. The two call sites differ only in (a) which layer namespace the predicted-covariance keys live under (observed layer vs null layer) and (b) which layer namespace the precomputed LFC layer lives under (always the observed layer in current code paths). Helper signature: _compute_group_mhd_and_stats( ad, ind, layer, layer_for_lfc, embedding1, embedding2, diagonal_variance, ) - get_desynch_stats: layer=layer, layer_for_lfc=layer - run_null_desynch_test: layer=null_layer, layer_for_lfc=layer Determinism regression passes bit-identically. Future drift between the two paths now has only one site to update. --- src/scEcho/Echo_features.py | 127 ++++++++++++++++++------------------ 1 file changed, 64 insertions(+), 63 deletions(-) diff --git a/src/scEcho/Echo_features.py b/src/scEcho/Echo_features.py index 9f11c10..98fc967 100755 --- a/src/scEcho/Echo_features.py +++ b/src/scEcho/Echo_features.py @@ -180,7 +180,55 @@ def embeddings_predict_layer( - + + +def _compute_group_mhd_and_stats( + ad, + ind, + layer, + layer_for_lfc, + embedding1, + embedding2, + diagonal_variance, +): + """Per-group Mahalanobis distance with graceful fallback when covariance keys are absent. + + Pulled out of ``get_desynch_stats`` and ``run_null_desynch_test`` to remove + ~80 LOC of duplication and the silent-divergence risk between the two + near-identical blocks. + + ``layer`` is the namespace of the predicted-space uncertainty keys in + ``ad.obsp`` (e.g. ``layer`` for the observed pass, ``null_layer`` for the + null pass). ``layer_for_lfc`` is the namespace of the precomputed LFC layer + in ``ad.layers`` (always the *observed* layer in current call sites, since + the null path reuses the observed LFC values against the null covariance). + + Returns either a ``float64`` ``(n_group,)`` array of Mahalanobis distances, + or ``np.nan`` (with a ``UserWarning``) when the predicted-covariance keys + for either embedding are not present in ``ad.obsp``. + """ + unc_key1 = f"predicted_{layer}_{embedding1}_space_uncertainty" + unc_key2 = f"predicted_{layer}_{embedding2}_space_uncertainty" + if (unc_key1 in ad.obsp) and (unc_key2 in ad.obsp): + ix = np.ix_(ind.values, ind.values) + unc1 = ad.obsp[unc_key1][ix] + unc2 = ad.obsp[unc_key2][ix] + # Kompot handles Cholesky stabilization internally (eps=1e-8 default). + return compute_mahalanobis_distances( + diff_values=ad[ind].layers[f"predicted_{layer_for_lfc}_LFC_{embedding1}_v_{embedding2}"].T, + covariance=unc1 + unc2, + diagonal_variance=diagonal_variance, + ) + missing_unc = [k for k in [unc_key1, unc_key2] if k not in ad.obsp] + warnings.warn( + f"Posterior covariance keys not found in ad.obsp — Mahalanobis " + f"distance skipped for this group. To enable, rerun " + f"embeddings_predict_layer with save_covariance=True.\n" + f"\tMissing: {missing_unc}" + ) + return np.nan + + def get_desynch_stats( ad: anndata.AnnData, obs_col: str, @@ -310,37 +358,14 @@ def get_desynch_stats( ) diagonal_variance = None - # Model uncertainty — guarded against missing keys (when - # save_covariance=False was passed to embeddings_predict_layer) and - # indexed via the underlying obsp ndarray rather than a boolean- - # masked AnnData view (avoids ImplicitModificationWarning on - # AnnData ≥0.8). - unc_key1 = f"predicted_{layer}_{embedding1}_space_uncertainty" - unc_key2 = f"predicted_{layer}_{embedding2}_space_uncertainty" - - if (unc_key1 in ad.obsp) and (unc_key2 in ad.obsp): - ix = np.ix_(ind.values, ind.values) - unc1 = ad.obsp[unc_key1][ix] - unc2 = ad.obsp[unc_key2][ix] - - # ── Mahalanobis distance ─────────────────────────────────────────── - # Kompot handles Cholesky stabilization internally (eps=1e-8 default). - res[f"MHD_{obs_col}_{c}_{modality1}_vs_{modality2}"] = compute_mahalanobis_distances( - diff_values=ad[ind].layers[f"predicted_{layer}_LFC_{embedding1}_v_{embedding2}"].T, - covariance=unc1 + unc2, - diagonal_variance=diagonal_variance, - ) - else: - missing_unc = [k for k in [unc_key1, unc_key2] if k not in ad.obsp] - warnings.warn( - f"Posterior covariance keys not found in ad.obsp — Mahalanobis " - f"distance skipped for group '{c}'. To enable, rerun " - f"embeddings_predict_layer with save_covariance=True.\n" - f"\tMissing: {missing_unc}" - ) - res[f"MHD_{obs_col}_{c}_{modality1}_vs_{modality2}"] = np.nan - - + # Model uncertainty + Mahalanobis distance (guarded against missing + # obsp keys when save_covariance=False was passed to + # embeddings_predict_layer). + res[f"MHD_{obs_col}_{c}_{modality1}_vs_{modality2}"] = _compute_group_mhd_and_stats( + ad, ind, layer, layer, embedding1, embedding2, diagonal_variance, + ) + + # ── Additional per-group layer statistics ────────────────────────────── @@ -683,37 +708,13 @@ def run_null_desynch_test( ) diagonal_variance = None - # Model uncertainty — guarded against missing keys (when - # save_covariance=False was passed to embeddings_predict_layer) and - # indexed via the underlying obsp ndarray rather than a boolean- - # masked AnnData view (avoids ImplicitModificationWarning on - # AnnData ≥0.8). - unc_key1 = f"predicted_{null_layer}_{embedding1}_space_uncertainty" - unc_key2 = f"predicted_{null_layer}_{embedding2}_space_uncertainty" - - diff = ad[ind].layers[f"predicted_{null_layer}_{embedding1}_space_residuals"] - ad[ind].layers[f"predicted_{null_layer}_{embedding2}_space_residuals"] - - if (unc_key1 in ad.obsp) and (unc_key2 in ad.obsp): - ix = np.ix_(ind.values, ind.values) - unc1 = ad.obsp[unc_key1][ix] - unc2 = ad.obsp[unc_key2][ix] - - # ── Mahalanobis distance ─────────────────────────────────────────── - # Kompot handles Cholesky stabilization internally (eps=1e-8 default). - res[f"MHD_null_{obs_col}_{c}_{modality1}_vs_{modality2}"] = compute_mahalanobis_distances( - diff_values=ad[ind].layers[f"predicted_{layer}_LFC_{embedding1}_v_{embedding2}"].T, - covariance=unc1 + unc2, - diagonal_variance=diagonal_variance, - ) - else: - missing_unc = [k for k in [unc_key1, unc_key2] if k not in ad.obsp] - warnings.warn( - f"Posterior covariance keys not found in ad.obsp — null " - f"Mahalanobis distance skipped for group '{c}'. To enable, " - f"rerun embeddings_predict_layer with save_covariance=True.\n" - f"\tMissing: {missing_unc}" - ) - res[f"MHD_null_{obs_col}_{c}_{modality1}_vs_{modality2}"] = np.nan + # Model uncertainty + Mahalanobis distance against the null covariance. + # Uncertainty keys live in the null namespace; the LFC layer is the + # observed one (the null pass reuses observed LFC values against the + # null covariance). + res[f"MHD_null_{obs_col}_{c}_{modality1}_vs_{modality2}"] = _compute_group_mhd_and_stats( + ad, ind, null_layer, layer, embedding1, embedding2, diagonal_variance, + ) From c855be6b80a8a0ae3d0675d4cebad627cf2e6a6c Mon Sep 17 00:00:00 2001 From: Dominik Date: Mon, 18 May 2026 19:56:50 -0700 Subject: [PATCH 3/5] feat: lift direction_colors and adjust_text arrowprops to public surface MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three previously hardcoded knobs lifted so downstream callers can customize without monkey-patching: - ``dn_comp_obsm(..., direction_colors=("#ff7f0e", "#1f77b4", "lightgrey"))`` — sequence written to ``ad.uns[f"{direction_key}_colors"]``, in the order ``(modality2-higher, modality1-higher, neutral)`` matching the CategoricalDtype. - ``run_null_desynch_test(..., direction_colors=(...))`` — sibling lift, same default, same order matching its own CategoricalDtype ``(modality2-structure, modality1-structure, not-significant)``. - ``plot_scores`` — module-level ``_DEFAULT_ADJUST_TEXT_KWARGS`` with the previously hardcoded ``arrowprops`` style; callers can override any key (or add new ones) via the existing ``**adjust_text_kwargs`` catch-all. The repulsion knobs (``expand``, ``force_text``, ``force_points``, ``max_move_frac``, ``iter_lim``) stay as explicit kwargs — already lifted in wave-1. --- src/scEcho/Echo_features.py | 12 +++++++++--- src/scEcho/Echo_states.py | 10 ++++++++-- src/scEcho/plotting.py | 11 +++++++++-- 3 files changed, 26 insertions(+), 7 deletions(-) diff --git a/src/scEcho/Echo_features.py b/src/scEcho/Echo_features.py index 98fc967..6304e8c 100755 --- a/src/scEcho/Echo_features.py +++ b/src/scEcho/Echo_features.py @@ -489,6 +489,7 @@ def run_null_desynch_test( eps: float = 1e-16, save_predictions: bool = True, save_covariance: bool = True, + direction_colors: Sequence[str] = ("#ff7f0e", "#1f77b4", "lightgrey"), ) -> None: """Run a null model test for desynchronization statistics. @@ -513,6 +514,13 @@ def run_null_desynch_test( Two-sided p-value threshold for significance. random_state : int Random seed for null layer shuffling. + direction_colors : sequence of str, optional + Three colors written to + ``ad.uns[f"desynch_direction_{layer}_{modality1}_v_{modality2}_colors"]``, + in the order ``({modality2}-structure, {modality1}-structure, + not-significant)`` to match the ordered ``CategoricalDtype`` of the + per-feature direction column. Defaults to + ``("#ff7f0e", "#1f77b4", "lightgrey")``. Returns ------- @@ -720,9 +728,7 @@ def run_null_desynch_test( # ── Store direction colors in uns ───────────────────────────────────────── - ad.uns[f"desynch_direction_{layer}_{modality1}_v_{modality2}_colors"] = [ - "#ff7f0e", "#1f77b4", "lightgrey" - ] + ad.uns[f"desynch_direction_{layer}_{modality1}_v_{modality2}_colors"] = list(direction_colors) diff --git a/src/scEcho/Echo_states.py b/src/scEcho/Echo_states.py index 6c2d549..3f05f9f 100755 --- a/src/scEcho/Echo_states.py +++ b/src/scEcho/Echo_states.py @@ -2,7 +2,7 @@ import logging import warnings -from typing import Optional +from typing import Optional, Sequence import anndata import kompot @@ -32,6 +32,7 @@ def dn_comp_obsm( sample_grouping_col: Optional[str] = None, sv_min_cells: int = 200, sqrt_eps: float = 1e-16, + direction_colors: Sequence[str] = ("#ff7f0e", "#1f77b4", "lightgrey"), ) -> None: """Compare density between two embeddings in separate spaces. @@ -73,6 +74,11 @@ def dn_comp_obsm( zero on tested pipelines (min observed: 0.0225), so the default has negligible numerical effect — exposed for users who want to disable the floor (``sqrt_eps=0``) or raise it. + direction_colors : sequence of str, optional + Three colors written to ``ad.uns[f"{direction_key}_colors"]``, in the + order ``(modality2-higher, modality1-higher, neutral)`` to match the + ordered ``CategoricalDtype`` of the direction column. Defaults to + ``("#ff7f0e", "#1f77b4", "lightgrey")``. Returns ------- @@ -220,4 +226,4 @@ def dn_comp_obsm( ordered=True, ) ad.obs[direction_key] = ad.obs[direction_key].astype(cat_type) - ad.uns[f"{direction_key}_colors"] = ["#ff7f0e", "#1f77b4", "lightgrey"] + ad.uns[f"{direction_key}_colors"] = list(direction_colors) diff --git a/src/scEcho/plotting.py b/src/scEcho/plotting.py index 2755858..8948e5b 100755 --- a/src/scEcho/plotting.py +++ b/src/scEcho/plotting.py @@ -27,6 +27,14 @@ ] +# Defaults forwarded to ``adjustText.adjust_text`` from ``plot_scores``. Lifted +# out of the call site so callers can override any key (e.g. arrowprops style, +# additional repulsion knobs) via the ``**adjust_text_kwargs`` catch-all on +# ``plot_scores`` — keys passed by the caller override these defaults. +_DEFAULT_ADJUST_TEXT_KWARGS = { + "arrowprops": {"arrowstyle": "-", "color": "black", "lw": 0.5}, +} + def plot_scores( ad: anndata.AnnData, @@ -213,13 +221,12 @@ def plot_scores( x=plot_df[var_exp_col].values, y=plot_df[MHD_col].values, ax=ax, - arrowprops=dict(arrowstyle="-", color="black", lw=0.5), expand=expand, force_text=force_text, force_points=force_points, max_move=max_move, iter_lim=iter_lim, - **adjust_text_kwargs, + **{**_DEFAULT_ADJUST_TEXT_KWARGS, **adjust_text_kwargs}, ) return ax From 330faad88b900585ad68a718153b2846da96aafb Mon Sep 17 00:00:00 2001 From: Dominik Date: Mon, 18 May 2026 19:59:17 -0700 Subject: [PATCH 4/5] refactor: rename Echo_states.py to echo_states.py (PEP 8 snake_case) API-breaking. No backward-compat shim (operator-approved). Users importing the module qualifier or referencing it as ``scEcho.Echo_states.`` must update their import paths: from scEcho import Echo_states -> from scEcho import echo_states scEcho.Echo_states.dn_comp_obsm(...) -> scEcho.echo_states.dn_comp_obsm(...) Symbols inside the module (``dn_comp_obsm``) are unchanged. Notebook source cells updated in-place (no re-run; cached D59-retina outputs preserved); cached warning paths still reference the old filename and that is fine. Echo_features renamed in the next commit; bundling both into one commit would obscure git's rename detection. --- README.md | 4 ++-- notebooks/example.ipynb | 2 +- src/scEcho/__init__.py | 4 ++-- src/scEcho/{Echo_states.py => echo_states.py} | 0 tests/test_determinism.py | 4 ++-- tests/test_echo_states.py | 6 +++--- tests/test_plotting.py | 2 +- 7 files changed, 11 insertions(+), 11 deletions(-) rename src/scEcho/{Echo_states.py => echo_states.py} (100%) diff --git a/README.md b/README.md index 914ee6c..a206965 100755 --- a/README.md +++ b/README.md @@ -56,7 +56,7 @@ pip install -e ".[dev]" Imported as `import scEcho`: -- `Echo_states` — per-modality density estimation and cross-modality density +- `echo_states` — per-modality density estimation and cross-modality density comparison; writes per-cell direction labels into `.obs`. - `Echo_features` — feature-level desynchronization pipeline (imputation, per-feature statistics, null-model significance testing). @@ -68,7 +68,7 @@ Imported as `import scEcho`: ## Usage See [`notebooks/example.ipynb`](notebooks/example.ipynb) for the canonical end-to-end pipeline. The basic -shape is `scEcho.Echo_states.dn_comp_obsm(adata, ...)` followed by +shape is `scEcho.echo_states.dn_comp_obsm(adata, ...)` followed by `scEcho.Echo_features.run_echo_features(adata, ...)`; see each function's docstring for the required `.obsm` / `.obs` / `.layers` keys. diff --git a/notebooks/example.ipynb b/notebooks/example.ipynb index 35b392e..9da0f3d 100755 --- a/notebooks/example.ipynb +++ b/notebooks/example.ipynb @@ -208,7 +208,7 @@ } ], "source": [ - "scEcho.Echo_states.dn_comp_obsm(retina_ad,\n", + "scEcho.echo_states.dn_comp_obsm(retina_ad,\n", " obsm_key1='DM_EigenVectors_RNA',\n", " obsm_key2='DM_EigenVectors_ATAC', \n", " pval_threshold=0.05,\n", diff --git a/src/scEcho/__init__.py b/src/scEcho/__init__.py index ab8c10a..98c923b 100755 --- a/src/scEcho/__init__.py +++ b/src/scEcho/__init__.py @@ -7,10 +7,10 @@ __author__ = "Connor Finkbeiner" -from . import Echo_features, Echo_states, plotting, utils +from . import Echo_features, echo_states, plotting, utils __all__ = [ - "Echo_states", + "echo_states", "Echo_features", "plotting", "utils", diff --git a/src/scEcho/Echo_states.py b/src/scEcho/echo_states.py similarity index 100% rename from src/scEcho/Echo_states.py rename to src/scEcho/echo_states.py diff --git a/tests/test_determinism.py b/tests/test_determinism.py index 7195877..2fcba08 100644 --- a/tests/test_determinism.py +++ b/tests/test_determinism.py @@ -27,8 +27,8 @@ def test_dn_comp_obsm_is_deterministic(): a1 = _build_adata(seed=0) a2 = _build_adata(seed=0) - scEcho.Echo_states.dn_comp_obsm(a1, ls_factor=2, log_fold_change_threshold=0.5) - scEcho.Echo_states.dn_comp_obsm(a2, ls_factor=2, log_fold_change_threshold=0.5) + scEcho.echo_states.dn_comp_obsm(a1, ls_factor=2, log_fold_change_threshold=0.5) + scEcho.echo_states.dn_comp_obsm(a2, ls_factor=2, log_fold_change_threshold=0.5) numeric_cols = [ "log_density_RNA", diff --git a/tests/test_echo_states.py b/tests/test_echo_states.py index 76405da..15986ba 100644 --- a/tests/test_echo_states.py +++ b/tests/test_echo_states.py @@ -1,4 +1,4 @@ -"""Tests for `scEcho.Echo_states` — currently `dn_comp_obsm` only. +"""Tests for `scEcho.echo_states` — currently `dn_comp_obsm` only. Two-tier: smoke (function runs, expected columns added) + correctness (numeric output matches hardcoded baseline captured on first green run). @@ -20,7 +20,7 @@ def test_smoke_dn_comp_obsm_writes_expected_obs_columns(synthetic_adata): - scEcho.Echo_states.dn_comp_obsm( + scEcho.echo_states.dn_comp_obsm( synthetic_adata, ls_factor=2, log_fold_change_threshold=0.5, @@ -53,7 +53,7 @@ def test_correctness_dn_comp_obsm_values(synthetic_adata): and `optimizer="L-BFGS-B"`. Tolerance: rtol=1e-4 (Mellon L-BFGS has minor numerical jitter across jax/jaxopt patch versions). """ - scEcho.Echo_states.dn_comp_obsm( + scEcho.echo_states.dn_comp_obsm( synthetic_adata, ls_factor=2, log_fold_change_threshold=0.5, diff --git a/tests/test_plotting.py b/tests/test_plotting.py index a16acc9..7fccb9b 100644 --- a/tests/test_plotting.py +++ b/tests/test_plotting.py @@ -27,7 +27,7 @@ def adata_with_desynch_features(synthetic_adata): def adata_with_dn_comp(synthetic_adata): """Fixture with dn_comp_obsm results in obs (used by plot_direction_fractions and plot_desynchronized_state_volcano).""" - scEcho.Echo_states.dn_comp_obsm( + scEcho.echo_states.dn_comp_obsm( synthetic_adata, ls_factor=2, log_fold_change_threshold=0.5, optimizer="L-BFGS-B", ) From 70debbb1d3f27c940f4fdc2c6ebb79d21a5b7033 Mon Sep 17 00:00:00 2001 From: Dominik Date: Mon, 18 May 2026 20:02:27 -0700 Subject: [PATCH 5/5] refactor: rename Echo_features.py to echo_features.py (PEP 8 snake_case) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit API-breaking. No backward-compat shim (operator-approved). Users importing the module qualifier or referencing it as ``scEcho.Echo_features.`` must update their import paths: from scEcho import Echo_features -> from scEcho import echo_features scEcho.Echo_features.run_echo_features(...) -> scEcho.echo_features.run_echo_features(...) from scEcho.Echo_features import compute_ncells -> from scEcho.echo_features import compute_ncells Symbols inside the module (``embeddings_predict_layer``, ``get_desynch_stats``, ``run_null_desynch_test``, ``run_echo_features``, ``make_null_layer``, ``get_reconstruction_results``, ``compute_ncells``) are unchanged. Notebook source cells updated in-place; cached RuntimeWarning paths in outputs still reference the old filename and that is harmless. Internal cross-module import in ``utils.py`` updated as part of this commit (``from .Echo_features import embeddings_predict_layer`` → ``from .echo_features import ...``). Combined with the Echo_states rename in the previous commit and the try_models / test_components absorption from PR #4's structure stream, this completes the v0.2.0 API boundary. --- README.md | 4 +- notebooks/example.ipynb | 4 +- src/scEcho/__init__.py | 4 +- .../{Echo_features.py => echo_features.py} | 0 src/scEcho/utils.py | 2 +- tests/test_echo_features.py | 56 +++++++++---------- tests/test_plotting.py | 4 +- 7 files changed, 37 insertions(+), 37 deletions(-) rename src/scEcho/{Echo_features.py => echo_features.py} (100%) diff --git a/README.md b/README.md index a206965..782391e 100755 --- a/README.md +++ b/README.md @@ -58,7 +58,7 @@ Imported as `import scEcho`: - `echo_states` — per-modality density estimation and cross-modality density comparison; writes per-cell direction labels into `.obs`. -- `Echo_features` — feature-level desynchronization pipeline (imputation, +- `echo_features` — feature-level desynchronization pipeline (imputation, per-feature statistics, null-model significance testing). - `plotting` — visualization (volcano plots, linked side-by-side embeddings, per-group direction fractions). @@ -69,7 +69,7 @@ Imported as `import scEcho`: See [`notebooks/example.ipynb`](notebooks/example.ipynb) for the canonical end-to-end pipeline. The basic shape is `scEcho.echo_states.dn_comp_obsm(adata, ...)` followed by -`scEcho.Echo_features.run_echo_features(adata, ...)`; see each function's +`scEcho.echo_features.run_echo_features(adata, ...)`; see each function's docstring for the required `.obsm` / `.obs` / `.layers` keys. ## License diff --git a/notebooks/example.ipynb b/notebooks/example.ipynb index 9da0f3d..0d90384 100755 --- a/notebooks/example.ipynb +++ b/notebooks/example.ipynb @@ -707,7 +707,7 @@ } ], "source": [ - "scEcho.Echo_features.run_echo_features(\n", + "scEcho.echo_features.run_echo_features(\n", " retina_ad,\n", " obs_col=\"combo_type\",\n", " layers=[\"RNA_lognorm_counts\"],\n", @@ -1331,7 +1331,7 @@ } ], "source": [ - "MPC_res = scEcho.Echo_features.get_reconstruction_results(retina_ad,\n", + "MPC_res = scEcho.echo_features.get_reconstruction_results(retina_ad,\n", " \"RNA_lognorm_counts\",\n", " grouping = \"combo_type\",\n", " group = \"MPC\",\n", diff --git a/src/scEcho/__init__.py b/src/scEcho/__init__.py index 98c923b..632b3f3 100755 --- a/src/scEcho/__init__.py +++ b/src/scEcho/__init__.py @@ -7,11 +7,11 @@ __author__ = "Connor Finkbeiner" -from . import Echo_features, echo_states, plotting, utils +from . import echo_features, echo_states, plotting, utils __all__ = [ "echo_states", - "Echo_features", + "echo_features", "plotting", "utils", "__version__", diff --git a/src/scEcho/Echo_features.py b/src/scEcho/echo_features.py similarity index 100% rename from src/scEcho/Echo_features.py rename to src/scEcho/echo_features.py diff --git a/src/scEcho/utils.py b/src/scEcho/utils.py index f9abdab..30cfb9e 100755 --- a/src/scEcho/utils.py +++ b/src/scEcho/utils.py @@ -20,7 +20,7 @@ from scipy.stats import spearmanr from tqdm.auto import tqdm -from .Echo_features import embeddings_predict_layer +from .echo_features import embeddings_predict_layer __all__ = [ # existing utils diff --git a/tests/test_echo_features.py b/tests/test_echo_features.py index 57114cc..a567eed 100644 --- a/tests/test_echo_features.py +++ b/tests/test_echo_features.py @@ -1,5 +1,5 @@ -"""Tests for `scEcho.Echo_features` — covers the six entries in -`Echo_features.__all__` plus the un-exposed `compute_ncells` helper. +"""Tests for `scEcho.echo_features` — covers the six entries in +`echo_features.__all__` plus the un-exposed `compute_ncells` helper. Smoke tests assert function-exit and expected output keys; correctness tests assert hardcoded baseline values captured on the first green run @@ -10,13 +10,13 @@ import pytest import scEcho -from scEcho.Echo_features import compute_ncells +from scEcho.echo_features import compute_ncells # ── embeddings_predict_layer ───────────────────────────────────────────────── def test_smoke_embeddings_predict_layer(synthetic_adata): - scEcho.Echo_features.embeddings_predict_layer( + scEcho.echo_features.embeddings_predict_layer( synthetic_adata, ls=1.0, sigma=0.1, layer="L", ) assert "predicted_L_DM_EigenVectors_RNA_space" in synthetic_adata.layers @@ -26,7 +26,7 @@ def test_smoke_embeddings_predict_layer(synthetic_adata): def test_correctness_embeddings_predict_layer_values(synthetic_adata): - scEcho.Echo_features.embeddings_predict_layer( + scEcho.echo_features.embeddings_predict_layer( synthetic_adata, ls=1.0, sigma=0.1, layer="L", ) pred = np.asarray(synthetic_adata.layers["predicted_L_DM_EigenVectors_RNA_space"]) @@ -47,10 +47,10 @@ def test_correctness_embeddings_predict_layer_values(synthetic_adata): # ── get_desynch_stats ──────────────────────────────────────────────────────── def test_smoke_get_desynch_stats(synthetic_adata): - scEcho.Echo_features.embeddings_predict_layer( + scEcho.echo_features.embeddings_predict_layer( synthetic_adata, ls=1.0, sigma=0.1, layer="L", ) - scEcho.Echo_features.get_desynch_stats( + scEcho.echo_features.get_desynch_stats( synthetic_adata, obs_col="combo_type", layer="L", ) res = synthetic_adata.varm["reconstruction_results_L"] @@ -62,10 +62,10 @@ def test_smoke_get_desynch_stats(synthetic_adata): def test_correctness_get_desynch_stats_mse_values(synthetic_adata): - scEcho.Echo_features.embeddings_predict_layer( + scEcho.echo_features.embeddings_predict_layer( synthetic_adata, ls=1.0, sigma=0.1, layer="L", ) - scEcho.Echo_features.get_desynch_stats( + scEcho.echo_features.get_desynch_stats( synthetic_adata, obs_col="combo_type", layer="L", ) res = synthetic_adata.varm["reconstruction_results_L"] @@ -92,7 +92,7 @@ def test_correctness_get_desynch_stats_mse_values(synthetic_adata): # ── make_null_layer ────────────────────────────────────────────────────────── def test_smoke_make_null_layer(synthetic_adata): - scEcho.Echo_features.make_null_layer(synthetic_adata, layer="L", random_state=0) + scEcho.echo_features.make_null_layer(synthetic_adata, layer="L", random_state=0) assert "L_null" in synthetic_adata.layers assert synthetic_adata.layers["L_null"].shape == synthetic_adata.layers["L"].shape @@ -103,7 +103,7 @@ def test_correctness_make_null_layer_preserves_per_feature_distribution(syntheti make_null_layer (breaks cell-cell correlations, keeps marginals). """ original = synthetic_adata.layers["L"].copy() - scEcho.Echo_features.make_null_layer(synthetic_adata, layer="L", random_state=0) + scEcho.echo_features.make_null_layer(synthetic_adata, layer="L", random_state=0) null = np.asarray(synthetic_adata.layers["L_null"]) # the function uses one shuffle index applied to all features (audit #25) @@ -118,13 +118,13 @@ def test_correctness_make_null_layer_preserves_per_feature_distribution(syntheti # ── run_null_desynch_test ──────────────────────────────────────────────────── def test_smoke_run_null_desynch_test(synthetic_adata): - scEcho.Echo_features.embeddings_predict_layer( + scEcho.echo_features.embeddings_predict_layer( synthetic_adata, ls=1.0, sigma=0.1, layer="L", ) - scEcho.Echo_features.get_desynch_stats( + scEcho.echo_features.get_desynch_stats( synthetic_adata, obs_col="combo_type", layer="L", ) - scEcho.Echo_features.run_null_desynch_test( + scEcho.echo_features.run_null_desynch_test( synthetic_adata, obs_col="combo_type", layer="L", ls=1.0, sigma=0.1, min_cells=10, ) @@ -139,13 +139,13 @@ def test_smoke_run_null_desynch_test(synthetic_adata): def test_correctness_run_null_desynch_test_values(synthetic_adata): - scEcho.Echo_features.embeddings_predict_layer( + scEcho.echo_features.embeddings_predict_layer( synthetic_adata, ls=1.0, sigma=0.1, layer="L", ) - scEcho.Echo_features.get_desynch_stats( + scEcho.echo_features.get_desynch_stats( synthetic_adata, obs_col="combo_type", layer="L", ) - scEcho.Echo_features.run_null_desynch_test( + scEcho.echo_features.run_null_desynch_test( synthetic_adata, obs_col="combo_type", layer="L", ls=1.0, sigma=0.1, min_cells=10, ) @@ -176,7 +176,7 @@ def test_correctness_run_null_desynch_test_values(synthetic_adata): # ── run_echo_features (orchestrator) ───────────────────────────────────────── def test_smoke_run_echo_features(synthetic_adata): - scEcho.Echo_features.run_echo_features( + scEcho.echo_features.run_echo_features( synthetic_adata, obs_col="combo_type", layers=["L"], sigma=0.1, ls=1.0, min_cells=10, verbose=False, ) @@ -198,14 +198,14 @@ def test_correctness_run_echo_features_matches_pipeline_components(synthetic_ada the three component functions individually with the same parameters. """ a2 = synthetic_adata.copy() - scEcho.Echo_features.run_echo_features( + scEcho.echo_features.run_echo_features( synthetic_adata, obs_col="combo_type", layers=["L"], sigma=0.1, ls=1.0, min_cells=10, verbose=False, ) - scEcho.Echo_features.embeddings_predict_layer(a2, ls=1.0, sigma=0.1, layer="L") - scEcho.Echo_features.get_desynch_stats(a2, obs_col="combo_type", layer="L") - scEcho.Echo_features.run_null_desynch_test( + scEcho.echo_features.embeddings_predict_layer(a2, ls=1.0, sigma=0.1, layer="L") + scEcho.echo_features.get_desynch_stats(a2, obs_col="combo_type", layer="L") + scEcho.echo_features.run_null_desynch_test( a2, obs_col="combo_type", layer="L", ls=1.0, sigma=0.1, min_cells=10, ) @@ -222,11 +222,11 @@ def test_correctness_run_echo_features_matches_pipeline_components(synthetic_ada # ── get_reconstruction_results ─────────────────────────────────────────────── def test_get_reconstruction_results_filters_to_group(synthetic_adata): - scEcho.Echo_features.run_echo_features( + scEcho.echo_features.run_echo_features( synthetic_adata, obs_col="combo_type", layers=["L"], sigma=0.1, ls=1.0, min_cells=10, verbose=False, ) - sub = scEcho.Echo_features.get_reconstruction_results( + sub = scEcho.echo_features.get_reconstruction_results( synthetic_adata, layer="L", grouping="combo_type", group="A", ) # all returned columns must reference group "A" @@ -234,20 +234,20 @@ def test_get_reconstruction_results_filters_to_group(synthetic_adata): assert "_combo_type_A" in col, f"unexpected column for group A: {col}" # asking for a group that doesn't exist raises with pytest.raises(KeyError): - scEcho.Echo_features.get_reconstruction_results( + scEcho.echo_features.get_reconstruction_results( synthetic_adata, layer="L", grouping="combo_type", group="Z", ) def test_get_reconstruction_results_min_cells_filter(synthetic_adata): - scEcho.Echo_features.run_echo_features( + scEcho.echo_features.run_echo_features( synthetic_adata, obs_col="combo_type", layers=["L"], sigma=0.1, ls=1.0, min_cells=10, verbose=False, ) - full = scEcho.Echo_features.get_reconstruction_results( + full = scEcho.echo_features.get_reconstruction_results( synthetic_adata, layer="L", grouping="combo_type", group="A", ) - filtered = scEcho.Echo_features.get_reconstruction_results( + filtered = scEcho.echo_features.get_reconstruction_results( synthetic_adata, layer="L", grouping="combo_type", group="A", min_cells=10**6, # absurd cutoff ) diff --git a/tests/test_plotting.py b/tests/test_plotting.py index 7fccb9b..4ae4993 100644 --- a/tests/test_plotting.py +++ b/tests/test_plotting.py @@ -14,9 +14,9 @@ @pytest.fixture def adata_with_desynch_features(synthetic_adata): - """Fixture whose AnnData has already been through the full Echo_features + """Fixture whose AnnData has already been through the full echo_features pipeline so plotting functions can read their expected obs/varm keys.""" - scEcho.Echo_features.run_echo_features( + scEcho.echo_features.run_echo_features( synthetic_adata, obs_col="combo_type", layers=["L"], sigma=0.1, ls=1.0, min_cells=10, verbose=False, )