From 096fa2a5b452d1fa1f8e874fc4ad2e4435e8fbff Mon Sep 17 00:00:00 2001
From: Dominik <dotto@fredhutch.org>
Date: Thu, 28 May 2026 18:09:19 -0700
Subject: [PATCH] align Mahalanobis and DA PTP with manuscript; bump 0.8.0

Three statistical corrections + one default-value harmonization;
all called out in the manuscript-vs-implementation review.

* Mahalanobis denominator sums the two posterior covariances
  (cov1 + cov2) instead of averaging them. The manuscript defines
  D(a,b) = sqrt((mu_a - mu_b)^T (Sigma_a + Sigma_b)^(-1) (mu_a - mu_b)),
  i.e. the variance of the difference of two independent posterior
  estimators. The GP-only branch in differential_expression.py was
  computing (cov1 + cov2) / 2 even though the sample-variance branch
  at the same call site was already summing.
  Effect: D shrinks by sqrt(2) in the GP-only regime. Relative
  rankings unchanged; FDR re-calibrates against the null. The
  resource-estimation comment is brought in line too.

* Differential-abundance posterior tail probability is one-sided:
  PTP = Phi(-|z|), matching the manuscript prose ("probability that
  the true density difference has the opposite sign to the estimated
  change") and the one-sided formula. The implementation was
  emitting 2 * Phi(-|z|) via a spurious + np.log(2).
  Effect: PTPs are halved; neg_log10_fold_change_ptp increases by
  log10(2). A previously-published cutoff of PTP < 1e-3 corresponded
  to |z| >= 3.29 (two-sided) and now corresponds to |z| >= 3.09
  (one-sided). Re-tune any ptp_threshold chosen against 0.7.0.

* use_empirical_variance defaults to False everywhere. The
  recommended kompot.de() path already defaulted to False via
  GPSettings; the deprecated wrappers (compute_differential_expression,
  compute_smoothed_expression), DifferentialExpression.__init__,
  ExpressionModel.__init__, smooth_expression(gp=None), and
  smooth_config_template.yaml all defaulted to True. Now consistent
  with the manuscript's "empirical variance is disabled by default"
  statement. Opt in via use_empirical_variance=True / GPSettings.

Regression coverage in tests/test_audit_fixes.py:
* monkeypatches compute_mahalanobis_distances to capture the
  combined-covariance kwarg from a controlled-kernel fit and
  asserts equality with cov1 + cov2 (not the average);
* replicates the ln_ptp formula and asserts equality with the
  closed-form Phi(-|z|) both directly and end-to-end through
  DifferentialAbundance.fit().predict();
* introspects every public entry point's signature and asserts
  the default is False; also parses both CLI YAML templates.

tests/test_empirical_variance.py::test_default_is_on was the only
pre-existing test pinning the old default; it has been flipped and
renamed test_default_is_off.

pyproject.toml and kompot/version.py bumped to 0.8.0; CHANGELOG
entry under [0.8.0] - 2026-05-28 documents the migration notes
above. Release tag is not cut by this commit.
---
 CHANGELOG.md                                  |  10 +
 kompot/anndata/differential_expression.py     |   2 +-
 kompot/anndata/smooth.py                      |   4 +-
 .../cli/templates/smooth_config_template.yaml |   2 +-
 kompot/differential/differential_abundance.py |   5 +-
 .../differential/differential_expression.py   |   8 +-
 kompot/differential/expression_model.py       |   3 +-
 kompot/resource_estimation.py                 |   4 +-
 kompot/version.py                             |   2 +-
 pyproject.toml                                |   2 +-
 tests/test_audit_fixes.py                     | 275 ++++++++++++++++++
 tests/test_empirical_variance.py              |  15 +-
 12 files changed, 314 insertions(+), 18 deletions(-)
 create mode 100644 tests/test_audit_fixes.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0a01d1f..6f6a739 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,16 @@ All notable changes to this project will be documented in this file.
 
 ## [Unreleased]
 
+## [0.8.0] - 2026-05-28
+
+### Behavior changes
+
+These three changes correct discrepancies between the implementation and the manuscript that describes Kompot's statistics. Two are numerical scale shifts that preserve relative rankings; the third is a default-value harmonization. Re-tune any absolute thresholds calibrated against 0.7.0.
+
+ - **Mahalanobis denominator now sums covariances**: the gene-wise Mahalanobis distance used by `DifferentialExpression.predict(compute_mahalanobis=True)` now computes the posterior combined covariance as `Σ_a + Σ_b` (the variance of the difference of two independent posterior estimators) instead of `(Σ_a + Σ_b) / 2`. Matches the manuscript definition `D(a,b) = sqrt((μ_a − μ_b)^T (Σ_a + Σ_b)^(-1) (μ_a − μ_b))`. **Effect**: absolute Mahalanobis distances in the GP-only regime contract by a factor of `√2` (and `D²` by 2). Relative rankings of genes are unchanged because the same scale factor applies everywhere, and FDR thresholds re-calibrate against the null. The sample-variance branch was already correctly summed.
+ - **Differential-abundance posterior tail probability is now one-sided**: `DifferentialAbundance.predict()` returns `PTP = Φ(−|z|)` (one-sided), matching the manuscript definition `PTP(x_i) = Φ(−|Δ(x_i)|/√(σ_a² + σ_b²))`. Previous releases returned `2·Φ(−|z|)` (two-sided). **Effect**: numeric PTP values are halved relative to 0.7.0; equivalently, `neg_log10_fold_change_ptp` increases by `log10(2) ≈ 0.301`. The threshold `PTP < 1e-3` previously corresponded to `|z| ≥ 3.29` and now corresponds to `|z| ≥ 3.09`. Re-tune any hard-coded `ptp_threshold` chosen against 0.7.0 if you want to preserve the old call-rate.
+ - **`use_empirical_variance` default is now `False` everywhere**: harmonized across `kompot.de()` (already False), `DifferentialExpression.__init__`, `ExpressionModel.__init__`, `kompot.smooth_expression()`, the deprecated `compute_differential_expression()` and `compute_smoothed_expression()` wrappers, and the CLI `smooth_config_template.yaml`. Previously these four entry points defaulted to `True`, inconsistent with both the recommended `kompot.de()` path and the manuscript's "empirical variance is disabled by default" statement. Code that relies on empirical variance must now pass `use_empirical_variance=True` explicitly.
+
 ### New features
 
  - **`--dry-run` flag for `kompot de` CLI**: estimates memory, disk, and output field requirements without running the analysis. Outputs machine-parseable JSON to stdout and a human-readable report to stderr. Exit code reflects feasibility.
diff --git a/kompot/anndata/differential_expression.py b/kompot/anndata/differential_expression.py
index 50d6fc8..09afd3a 100644
--- a/kompot/anndata/differential_expression.py
+++ b/kompot/anndata/differential_expression.py
@@ -751,7 +751,7 @@ def compute_differential_expression(
     return_full_results: bool = False,
     store_posterior_covariance: bool = False,
     allow_single_condition_variance: bool = False,
-    use_empirical_variance: bool = True,
+    use_empirical_variance: bool = False,
     progress: bool = True,
     null_genes="auto",
     null_seed=42,
diff --git a/kompot/anndata/smooth.py b/kompot/anndata/smooth.py
index 4e52ae3..8e07946 100644
--- a/kompot/anndata/smooth.py
+++ b/kompot/anndata/smooth.py
@@ -109,7 +109,7 @@ def smooth_expression(
     ls = gp.ls if gp is not None else None
     ls_factor = gp.ls_factor if gp is not None else 10.0
     n_landmarks = gp.n_landmarks if gp is not None else 5000
-    use_empirical_variance = gp.use_empirical_variance if gp is not None else True
+    use_empirical_variance = gp.use_empirical_variance if gp is not None else False
     eps = gp.eps if gp is not None else 1e-8
     random_state = gp.random_state if gp is not None else None
     batch_size = gp.batch_size if gp is not None else 500
@@ -393,7 +393,7 @@ def compute_smoothed_expression(
     sigma: float = 1.0,
     ls: Optional[float] = None,
     ls_factor: float = 10.0,
-    use_empirical_variance: bool = True,
+    use_empirical_variance: bool = False,
     eps: float = 1e-8,
     random_state: Optional[int] = None,
     batch_size: int = 500,
diff --git a/kompot/cli/templates/smooth_config_template.yaml b/kompot/cli/templates/smooth_config_template.yaml
index 7561499..a9cfd53 100644
--- a/kompot/cli/templates/smooth_config_template.yaml
+++ b/kompot/cli/templates/smooth_config_template.yaml
@@ -26,7 +26,7 @@ n_landmarks: 5000              # Number of landmarks for Nystrom approximation
 sample_col: null               # Column in adata.obs with sample IDs
 
 # Empirical variance (heteroscedastic noise):
-use_empirical_variance: true   # Estimate per-gene noise from GP residuals
+use_empirical_variance: false  # Estimate per-gene noise from GP residuals
 
 # GP kernel parameters:
 sigma: 1.0                     # Noise level for function estimator
diff --git a/kompot/differential/differential_abundance.py b/kompot/differential/differential_abundance.py
index bd9e0fe..17ab1ec 100644
--- a/kompot/differential/differential_abundance.py
+++ b/kompot/differential/differential_abundance.py
@@ -603,11 +603,12 @@ def compute_sample_variance2(X_batch):
         sd = np.sqrt(log_fold_change_uncertainty + self.eps)
         log_fold_change_zscore = log_fold_change / sd
 
-        # Compute PTP (Posterior Tail Probability) in natural log (base e)
+        # Compute PTP (Posterior Tail Probability) in natural log (base e).
+        # One-sided per manuscript: PTP = Φ(−|z|) = min(Φ(z), Φ(−z)) for real z.
         ln_ptp = np.minimum(
             normal.logcdf(log_fold_change_zscore),
             normal.logcdf(-log_fold_change_zscore),
-        ) + np.log(2)
+        )
 
         # Convert from natural log to negative log10 (for better volcano plot visualization)
         # ln_ptp is a log of a small value (typically < 1), so it's negative
diff --git a/kompot/differential/differential_expression.py b/kompot/differential/differential_expression.py
index 6e20b21..c04a4e5 100644
--- a/kompot/differential/differential_expression.py
+++ b/kompot/differential/differential_expression.py
@@ -42,7 +42,7 @@ def __init__(
         self,
         n_landmarks: Optional[int] = None,
         use_sample_variance: Optional[bool] = None,
-        use_empirical_variance: bool = True,
+        use_empirical_variance: bool = False,
         eps: float = 1e-8,  # Increased default epsilon for better numerical stability
         jit_compile: bool = False,
         function_predictor1: Optional[Any] = None,
@@ -625,8 +625,10 @@ def compute_mahalanobis_distances(
             # Points for sample variance computation
             variance_points = X
 
-        # Average the covariance matrices
-        combined_cov = (cov1 + cov2) / 2
+        # Sum the covariance matrices: Σ_a + Σ_b is the variance of the
+        # difference of independent posterior estimators, matching the
+        # Mahalanobis denominator defined in the manuscript.
+        combined_cov = cov1 + cov2
         del cov1, cov2
 
         # For sample variance, use diag=False to get full covariance matrices
diff --git a/kompot/differential/expression_model.py b/kompot/differential/expression_model.py
index af8e061..b25d16d 100644
--- a/kompot/differential/expression_model.py
+++ b/kompot/differential/expression_model.py
@@ -112,6 +112,7 @@ class ExpressionModel:
         Number of landmarks for Nystrom approximation.
     use_empirical_variance : bool
         Whether to estimate per-gene empirical variance from GP residuals.
+        By default False.
     eps : float
         Small constant for numerical stability.
     random_state : int, optional
@@ -135,7 +136,7 @@ class ExpressionModel:
     def __init__(
         self,
         n_landmarks: Optional[int] = None,
-        use_empirical_variance: bool = True,
+        use_empirical_variance: bool = False,
         eps: float = 1e-8,
         random_state: Optional[int] = None,
         batch_size: int = 500,
diff --git a/kompot/resource_estimation.py b/kompot/resource_estimation.py
index f4a36fa..e955010 100644
--- a/kompot/resource_estimation.py
+++ b/kompot/resource_estimation.py
@@ -898,10 +898,10 @@ def estimate_differential_expression_resources(
         shape=cov_matrix_shape,
     )
 
-    # Combined covariance matrix (averaged)
+    # Combined covariance matrix (sum: Σ_a + Σ_b)
     plan.add_requirement(
         "Combined covariance matrix",
-        cov_size,  # (cov1 + cov2) / 2
+        cov_size,  # cov1 + cov2
         "memory",
         shape=cov_matrix_shape,
     )
diff --git a/kompot/version.py b/kompot/version.py
index 26a803c..028b8b0 100644
--- a/kompot/version.py
+++ b/kompot/version.py
@@ -1,3 +1,3 @@
 """Version information."""
 
-__version__ = "0.7.0"
+__version__ = "0.8.0"
diff --git a/pyproject.toml b/pyproject.toml
index fc94426..fc8d333 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -49,7 +49,7 @@ ignore = ["E203", "W503"]
 
 [project]
 name = "kompot"
-version = "0.7.0"
+version = "0.8.0"
 description = "Differential abundance and gene expression analysis using Mahalanobis distance with JAX backend"
 readme = "README.md"
 authors = [
diff --git a/tests/test_audit_fixes.py b/tests/test_audit_fixes.py
new file mode 100644
index 0000000..eb74a25
--- /dev/null
+++ b/tests/test_audit_fixes.py
@@ -0,0 +1,275 @@
+"""Regression tests pinning manuscript-aligned statistical behavior.
+
+These tests guard the v0.8.0 corrections so the implementation cannot
+silently drift away from the manuscript's definitions:
+
+* Mahalanobis denominator must SUM covariances (not average) so that
+  ``D(a,b) = sqrt((mu_a - mu_b)^T (Sigma_a + Sigma_b)^(-1) (mu_a - mu_b))``.
+* DA posterior tail probability must be ONE-sided: ``Phi(-|z|)``.
+* ``use_empirical_variance`` must default to ``False`` at every
+  publicly-exposed entry point (the manuscript states that empirical
+  variance is disabled by default).
+"""
+
+import inspect
+
+import numpy as np
+import pytest
+
+
+# -----------------------------------------------------------------------------
+# Mahalanobis denominator is Σ_a + Σ_b (sum), not the average
+# -----------------------------------------------------------------------------
+
+
+class TestMahalanobisDenominatorIsSum:
+    """The covariance denominator in the gene-wise Mahalanobis distance
+    is the *sum* of the two posterior covariance matrices.
+    """
+
+    def test_combined_cov_equals_sum_via_compute_mahalanobis_distances(
+        self, monkeypatch
+    ):
+        """Capture the ``combined_cov`` argument that
+        ``DifferentialExpression.compute_mahalanobis_distances`` passes
+        into the underlying ``compute_mahalanobis_distances`` utility
+        and assert it equals ``cov1 + cov2`` (not ``(cov1+cov2)/2``).
+        """
+        from kompot.differential import DifferentialExpression
+        from kompot.differential import differential_expression as de_module
+
+        captured = {}
+
+        def fake_compute(
+            diff_values,
+            covariance=None,
+            batch_size=500,
+            jit_compile=False,
+            progress=False,
+            eps=1e-10,
+            diagonal_variance=None,
+            **_kwargs,
+        ):
+            captured["combined_cov"] = np.asarray(covariance)
+            n_genes = np.asarray(diff_values).shape[0]
+            return np.zeros(n_genes, dtype=float)
+
+        monkeypatch.setattr(
+            de_module, "compute_mahalanobis_distances", fake_compute
+        )
+
+        # Synthetic predictors with controllable covariance kernels:
+        # cov1 returns 2*I, cov2 returns 3*I, so cov1+cov2 = 5*I and the
+        # (buggy) average would be 2.5*I.
+        class _Pred:
+            def __init__(self, scale):
+                self.scale = scale
+
+            def covariance(self, X, diag=False):
+                k = X.shape[0]
+                return self.scale * np.eye(k)
+
+            def __call__(self, X):
+                # Return an (n_cells, n_genes) zero-mean expression so
+                # downstream `fold_change_subset` is well-defined.
+                return np.zeros((X.shape[0], 3), dtype=float)
+
+        de = DifferentialExpression(
+            n_landmarks=None,
+            use_sample_variance=False,
+            use_empirical_variance=False,
+            function_predictor1=_Pred(2.0),
+            function_predictor2=_Pred(3.0),
+        )
+
+        X_new = np.random.RandomState(0).randn(8, 4)
+        de.compute_mahalanobis_distances(X_new, use_landmarks=False, progress=False)
+
+        combined_cov = captured["combined_cov"]
+        expected_sum = 5.0 * np.eye(X_new.shape[0])
+        np.testing.assert_allclose(
+            combined_cov,
+            expected_sum,
+            rtol=1e-12,
+            atol=0,
+            err_msg=(
+                "Regression: combined posterior covariance should be "
+                "cov1 + cov2 (= 5*I here), got something else. The pre-"
+                "0.8.0 (buggy) value would have been 2.5*I (= "
+                "(cov1 + cov2) / 2)."
+            ),
+        )
+
+
+# -----------------------------------------------------------------------------
+# DA PTP is one-sided: Phi(-|z|), not 2*Phi(-|z|)
+# -----------------------------------------------------------------------------
+
+
+class TestDifferentialAbundancePTPOneSided:
+    """The differential-abundance posterior tail probability matches
+    the one-sided manuscript definition ``PTP = Phi(-|z|)``.
+    """
+
+    def test_ptp_one_sided_synthetic_z(self):
+        from scipy.stats import norm
+
+        # Replicate the exact ln_ptp computation from
+        # kompot.differential.differential_abundance, fed with controlled
+        # z-scores so we can compare against the closed-form one-sided
+        # tail probability.
+        import jax.scipy.stats.norm as normal
+
+        z = np.array([-3.0, -1.5, -0.5, 0.0, 0.5, 1.5, 3.0])
+
+        ln_ptp = np.minimum(
+            np.asarray(normal.logcdf(z)),
+            np.asarray(normal.logcdf(-z)),
+        )
+        ptp = np.exp(ln_ptp)
+
+        expected_one_sided = norm.cdf(-np.abs(z))
+        np.testing.assert_allclose(
+            ptp,
+            expected_one_sided,
+            rtol=1e-10,
+            atol=1e-12,
+            err_msg=(
+                "Regression: PTP should be the one-sided tail Phi(-|z|). "
+                "Pre-0.8.0 code emitted 2*Phi(-|z|) (two-sided)."
+            ),
+        )
+
+        # And explicitly that it is NOT the two-sided variant
+        two_sided = 2.0 * norm.cdf(-np.abs(z))
+        # Allow the symmetric `z == 0` boundary case (where both sides
+        # collapse to 0.5 and 1.0 respectively) by checking the strict
+        # off-axis values.
+        nonzero = z != 0
+        assert np.all(
+            np.abs(ptp[nonzero] - two_sided[nonzero]) > 1e-3
+        ), "PTP unexpectedly equals 2*Phi(-|z|) (two-sided)."
+
+    def test_da_predict_emits_one_sided_ptp(self):
+        """End-to-end: fit DA on a clearly-separated synthetic pair and
+        verify the recovered PTP at each evaluation point equals
+        ``Phi(-|z|)`` computed from the same fit's z-score, not twice
+        that value.
+        """
+        from scipy.stats import norm
+        from kompot.differential import DifferentialAbundance
+
+        rng = np.random.RandomState(42)
+        X1 = rng.randn(80, 3)
+        X2 = rng.randn(80, 3) + 0.4
+
+        da = DifferentialAbundance()
+        da.fit(X1, X2)
+
+        X_eval = np.vstack([X1[:20], X2[:20]])
+        out = da.predict(X_eval, progress=False)
+
+        z = np.asarray(out["log_fold_change_zscore"])
+        neg_log10_ptp = np.asarray(out["neg_log10_fold_change_ptp"])
+        ptp = 10.0 ** (-neg_log10_ptp)
+
+        expected = norm.cdf(-np.abs(z))
+        np.testing.assert_allclose(
+            ptp,
+            expected,
+            rtol=1e-4,
+            atol=1e-6,
+            err_msg=(
+                "Regression: PTP returned by DifferentialAbundance."
+                "predict() does not match the one-sided Phi(-|z|)."
+            ),
+        )
+
+
+# -----------------------------------------------------------------------------
+# use_empirical_variance defaults to False at every public entry point
+# -----------------------------------------------------------------------------
+
+
+class TestUseEmpiricalVarianceDefaultIsFalse:
+    """Every publicly-exposed entry point that accepts
+    ``use_empirical_variance`` must default to ``False`` (matching the
+    manuscript's "empirical variance is disabled by default" statement).
+    """
+
+    def _default_for(self, callable_obj, param_name="use_empirical_variance"):
+        sig = inspect.signature(callable_obj)
+        assert param_name in sig.parameters, (
+            f"{callable_obj.__qualname__} does not expose {param_name}"
+        )
+        param = sig.parameters[param_name]
+        assert param.default is not inspect.Parameter.empty, (
+            f"{callable_obj.__qualname__} parameter {param_name} has "
+            f"no default value"
+        )
+        return param.default
+
+    def test_gpsettings_default_is_false(self):
+        from kompot.settings import GPSettings
+
+        assert GPSettings().use_empirical_variance is False
+
+    def test_differential_expression_init_default_is_false(self):
+        from kompot.differential import DifferentialExpression
+
+        assert (
+            self._default_for(DifferentialExpression.__init__) is False
+        )
+
+    def test_expression_model_init_default_is_false(self):
+        from kompot.differential.expression_model import ExpressionModel
+
+        assert self._default_for(ExpressionModel.__init__) is False
+
+    def test_deprecated_compute_differential_expression_default_is_false(self):
+        from kompot.anndata.differential_expression import (
+            compute_differential_expression,
+        )
+
+        assert self._default_for(compute_differential_expression) is False
+
+    def test_deprecated_compute_smoothed_expression_default_is_false(self):
+        from kompot.anndata.smooth import compute_smoothed_expression
+
+        assert self._default_for(compute_smoothed_expression) is False
+
+    def test_smooth_config_template_default_is_false(self):
+        import pathlib
+
+        import yaml
+
+        import kompot
+
+        template = (
+            pathlib.Path(kompot.__file__).parent
+            / "cli"
+            / "templates"
+            / "smooth_config_template.yaml"
+        )
+        cfg = yaml.safe_load(template.read_text())
+        assert cfg["use_empirical_variance"] is False
+
+    def test_de_config_template_default_is_false(self):
+        import pathlib
+
+        import yaml
+
+        import kompot
+
+        template = (
+            pathlib.Path(kompot.__file__).parent
+            / "cli"
+            / "templates"
+            / "de_config_template.yaml"
+        )
+        cfg = yaml.safe_load(template.read_text())
+        assert cfg["use_empirical_variance"] is False
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/tests/test_empirical_variance.py b/tests/test_empirical_variance.py
index f8738ab..903e131 100644
--- a/tests/test_empirical_variance.py
+++ b/tests/test_empirical_variance.py
@@ -589,8 +589,15 @@ def test_results_with_empirical_variance(self, small_adata, fast_de_params):
         assert model.empirical_variance_predictor1 is not None
         assert model.empirical_variance_predictor2 is not None
 
-    def test_default_is_on(self, tiny_adata, fast_de_params):
-        """Default should be use_empirical_variance=True."""
+    def test_default_is_off(self, tiny_adata, fast_de_params):
+        """Default should be ``use_empirical_variance=False`` everywhere.
+
+        The deprecated ``compute_differential_expression`` wrapper
+        previously defaulted to ``True``, disagreeing with the
+        manuscript ("empirical variance is disabled by default") and
+        with the recommended ``kompot.de()`` path. v0.8.0 harmonizes
+        all public entry points to ``False``.
+        """
         from kompot.anndata.differential_expression import (
             compute_differential_expression,
         )
@@ -606,8 +613,8 @@ def test_default_is_on(self, tiny_adata, fast_de_params):
         )
 
         model = result["model"]
-        assert model.use_empirical_variance is True
-        assert model.empirical_variance_predictor1 is not None
+        assert model.use_empirical_variance is False
+        assert model.empirical_variance_predictor1 is None
 
 
 # ===== Leverage correction =====