Skip to content

Commit 635c217

Browse files
Refractoring and rt fix
1 parent eda8878 commit 635c217

5 files changed

Lines changed: 126 additions & 124 deletions

File tree

feature_generators/features_retention_time.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
def add_retention_time_features(
99
df_psms: pl.DataFrame,
10-
predictions_deeplc: pl.DataFrame,
10+
predictions_deeplc: pl.DataFrame = None,
1111
filter_rel_rt_error: float = 0.2,
1212
rt_prediction_error_abs: bool = True,
1313
rt_prediction_error_abs_relative: bool = True,
@@ -27,8 +27,10 @@ def add_retention_time_features(
2727
when falling back to absolute error.
2828
2929
Args:
30-
df_psms: PSM DataFrame with 'peptide' and 'rt' columns
31-
predictions_deeplc: DataFrame with 'peptide' and 'rt_predictions' columns
30+
df_psms: PSM DataFrame with 'peptide' and 'rt' columns.
31+
predictions_deeplc: DataFrame with 'peptide' and 'rt_predictions' columns.
32+
If None, df_psms must already contain 'rt_predictions' (e.g. from
33+
predict_deeplc_pl in Stage 1).
3234
filter_rel_rt_error: Maximum relative RT error threshold for filtering (default: 0.2)
3335
rt_prediction_error_abs: Whether to calculate absolute RT error (default: True)
3436
rt_prediction_error_abs_relative: Whether to calculate relative RT error (default: True)
@@ -38,7 +40,8 @@ def add_retention_time_features(
3840
Added columns: rt_predictions, rt_prediction_error_abs, rt_prediction_error_abs_relative
3941
"""
4042

41-
df_psms = df_psms.join(predictions_deeplc, on=["peptide"], how="left")
43+
if predictions_deeplc is not None:
44+
df_psms = df_psms.join(predictions_deeplc, on=["peptide"], how="left")
4245
# max_rt is the latest observed retention time in the experiment. Dividing
4346
# by max_rt converts absolute RT error into a fraction of the experiment's
4447
# total RT range, so the filter_rel_rt_error threshold (e.g. 0.2 = 20%)

mumdia.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1290,10 +1290,7 @@ def calculate_features(
12901290
)
12911291

12921292
# Step 2: Compute RT error features and filter out poor predictions.
1293-
# BUG: add_retention_time_features() requires (df_psms, predictions_deeplc, ...)
1294-
# but predictions_deeplc is not passed here. This call is missing the 2nd positional arg.
1295-
# It works only if df_psms already has 'rt_predictions' column (joined in Step 1)
1296-
# AND the function signature is updated to make predictions_deeplc optional.
1293+
# predictions_deeplc=None because rt_predictions is already in df_psms from Step 1.
12971294
log_info("Obtaining features retention time...")
12981295
df_psms = add_retention_time_features(df_psms, filter_rel_rt_error=0.15)
12991296

tests/test_config_compatibility.py

Lines changed: 63 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -1,82 +1,69 @@
1-
#!/usr/bin/env python3
2-
"""
3-
Test script to verify that both old and new config formats work with the new system.
1+
"""Tests for configuration backwards compatibility between old nested and new flat formats."""
42

5-
This demonstrates that the new system is fully backwards compatible.
6-
"""
3+
import os
4+
5+
import pytest
76

87
from config import load_config_from_json
98

10-
def test_config_format(config_path, format_name):
11-
"""Test loading and using a specific config format."""
12-
print(f"\n{'='*60}")
13-
print(f"🧪 Testing {format_name} Config Format")
14-
print(f"📁 File: {config_path}")
15-
print(f"{'='*60}")
16-
17-
try:
18-
# Load config
19-
config = load_config_from_json(config_path)
20-
print(f"✅ Config loaded successfully!")
21-
22-
# Show basic parameters
23-
print(f"\n📋 Basic Parameters:")
24-
print(f" mzML file: {config.mzml_file}")
25-
print(f" FASTA file: {config.fasta_file}")
26-
print(f" Result dir: {config.result_dir}")
27-
28-
# Show search parameter differences
29-
print(f"\n🔬 Search Parameters:")
30-
initial_config = config.get_initial_search_config()
31-
full_config = config.get_full_search_config()
32-
33-
print(f" Parameter Initial Search Full Search")
34-
print(f" cleave_at {initial_config['database']['enzyme']['cleave_at']:15} {full_config['database']['enzyme']['cleave_at']}")
35-
print(f" deisotope {str(initial_config['deisotope']):15} {str(full_config['deisotope'])}")
36-
print(f" report_psms {str(initial_config['report_psms']):15} {str(full_config['report_psms'])}")
37-
print(f" max_variable_mods {str(initial_config['database']['max_variable_mods']):15} {str(full_config['database']['max_variable_mods'])}")
38-
39-
# Show MuMDIA settings
40-
mumdia_config = config.get_mumdia_config()
41-
print(f"\n📊 MuMDIA Settings:")
42-
print(f" FDR initial search: {config.fdr_init_search}")
43-
print(f" Read initial pickle: {mumdia_config['read_initial_search_pickle']}")
44-
print(f" Write initial pickle: {mumdia_config['write_initial_search_pickle']}")
45-
46-
print(f"\n{format_name} format works perfectly!")
47-
return True
48-
49-
except Exception as e:
50-
print(f"❌ Error testing {format_name} format: {e}")
51-
return False
529

53-
def main():
54-
print("🔧 MuMDIA Config Backwards Compatibility Test")
55-
print("Testing both old (nested) and new (flat) config formats...")
56-
57-
# Test old nested format
58-
old_works = test_config_format("configs/config.json", "Legacy/Old Nested")
59-
60-
# Test new flat format
61-
new_works = test_config_format("configs/config_simple.json", "New Simplified Flat")
62-
63-
print(f"\n{'='*60}")
64-
print("🎯 Test Summary")
65-
print(f"{'='*60}")
66-
print(f"Legacy config (nested): {'✅ PASS' if old_works else '❌ FAIL'}")
67-
print(f"New config (flat): {'✅ PASS' if new_works else '❌ FAIL'}")
68-
print(f"Backwards compatibility: {'✅ MAINTAINED' if old_works and new_works else '❌ BROKEN'}")
69-
70-
if old_works and new_works:
71-
print(f"\n🎉 SUCCESS: Both config formats work!")
72-
print(f" • Users can keep using their existing config.json files")
73-
print(f" • Users can also switch to the new simplified format")
74-
print(f" • The new system automatically detects and converts formats")
75-
print(f"\nTo run MuMDIA:")
76-
print(f" python run.py configs/config.json # Old format")
77-
print(f" python run.py configs/config_simple.json # New format")
78-
else:
79-
print(f"\n❌ FAILURE: Config compatibility is broken!")
10+
@pytest.mark.unit
11+
class TestConfigCompatibility:
12+
"""Verify that both old and new config formats load correctly."""
13+
14+
@pytest.fixture
15+
def legacy_config_path(self):
16+
path = "configs/config.json"
17+
if not os.path.exists(path):
18+
pytest.skip("configs/config.json not found")
19+
return path
20+
21+
@pytest.fixture
22+
def flat_config_path(self):
23+
path = "configs/config_simple.json"
24+
if not os.path.exists(path):
25+
pytest.skip("configs/config_simple.json not found")
26+
return path
27+
28+
def _check_config(self, config):
29+
"""Common assertions for any loaded config."""
30+
assert hasattr(config, "mzml_file")
31+
assert hasattr(config, "fasta_file")
32+
assert hasattr(config, "result_dir")
33+
34+
initial = config.get_initial_search_config()
35+
full = config.get_full_search_config()
36+
37+
assert "database" in initial
38+
assert "database" in full
39+
assert "enzyme" in initial["database"]
40+
assert "report_psms" in initial
41+
42+
mumdia = config.get_mumdia_config()
43+
assert "read_initial_search_pickle" in mumdia
44+
assert "write_deeplc_pickle" in mumdia
45+
46+
def test_legacy_nested_format(self, legacy_config_path):
47+
"""Load the old nested (sage_basic / sage / mumdia) format."""
48+
config = load_config_from_json(legacy_config_path)
49+
self._check_config(config)
50+
51+
def test_flat_format(self, flat_config_path):
52+
"""Load the new flat format with _initial_search / _full_search overrides."""
53+
config = load_config_from_json(flat_config_path)
54+
self._check_config(config)
55+
56+
def test_override_mechanism(self, flat_config_path):
57+
"""Verify that search-stage overrides produce different configs."""
58+
config = load_config_from_json(flat_config_path)
59+
initial = config.get_initial_search_config()
60+
full = config.get_full_search_config()
8061

81-
if __name__ == "__main__":
82-
main()
62+
# At least one parameter should differ between stages
63+
differs = (
64+
initial["report_psms"] != full["report_psms"]
65+
or initial["deisotope"] != full["deisotope"]
66+
or initial["database"]["enzyme"]["cleave_at"]
67+
!= full["database"]["enzyme"]["cleave_at"]
68+
)
69+
assert differs, "Initial and full search configs should have at least one difference"

tests/test_diann_features.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -296,7 +296,7 @@ def load_test_data():
296296
return None
297297

298298

299-
def test_single_precursor(data, precursor_id="GYINSLGALTGGQALQQAK/2"):
299+
def check_single_precursor(data, precursor_id="GYINSLGALTGGQALQQAK/2"):
300300
"""Test feature generation for a single precursor."""
301301

302302
logger.info(f"Testing feature generation for precursor: {precursor_id}")
@@ -379,7 +379,7 @@ def print_feature_summary(features):
379379
print("\n" + "=" * 50)
380380

381381

382-
def test_multiple_precursors(data, max_precursors=5):
382+
def check_multiple_precursors(data, max_precursors=5):
383383
"""Test feature generation for multiple precursors."""
384384

385385
logger.info(f"Testing feature generation for up to {max_precursors} precursors")
@@ -392,7 +392,7 @@ def test_multiple_precursors(data, max_precursors=5):
392392
for precursor_id in unique_precursors:
393393
try:
394394
logger.info(f"Processing {precursor_id}")
395-
features = test_single_precursor(data, precursor_id)
395+
features = check_single_precursor(data, precursor_id)
396396
if features:
397397
results[precursor_id] = features
398398
else:
@@ -436,7 +436,7 @@ def benchmark_performance(data, num_tests=10):
436436
start_time = time.time()
437437

438438
try:
439-
features = test_single_precursor(data, precursor_id)
439+
features = check_single_precursor(data, precursor_id)
440440
if features:
441441
elapsed = time.time() - start_time
442442
times.append(elapsed)
@@ -1023,7 +1023,7 @@ def main_sequential():
10231023

10241024
# Test single precursor
10251025
print("\n1. Testing single precursor...")
1026-
features = test_single_precursor(data)
1026+
features = check_single_precursor(data)
10271027

10281028
if features:
10291029
print_feature_summary(features)

tests/test_workflow_integration.py

Lines changed: 50 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -41,53 +41,68 @@ class TestWorkflowIntegration:
4141

4242
@pytest.mark.skipif(not RUN_MODULE_AVAILABLE, reason="run module not available")
4343
@pytest.mark.integration
44-
def test_argument_parsing_integration(self):
45-
"""Test command line argument parsing and validation."""
46-
with patch(
47-
"sys.argv",
48-
["run.py", "--mzml_file", "test.mzML", "--result_dir", "test_results"],
49-
):
50-
parser, args = run.parse_arguments()
51-
52-
assert hasattr(args, "mzml_file")
53-
assert hasattr(args, "result_dir")
54-
assert args.mzml_file == "test.mzML"
55-
assert args.result_dir == "test_results"
44+
def test_config_loading_integration(self):
45+
"""Test loading config from JSON and generating search configs."""
46+
from config import MuMDIAConfig, load_config_from_json
5647

57-
@pytest.mark.skipif(not RUN_MODULE_AVAILABLE, reason="run module not available")
58-
@pytest.mark.integration
59-
def test_config_modification_workflow(self):
60-
"""Test configuration modification and validation workflow."""
61-
# Create a temporary config file
48+
# Create a temporary config file in the new flat format
6249
config_data = {
63-
"mumdia": {"fdr_init_search": 0.01, "min_occurrences": 1},
64-
"sage": {"database": {"fasta": "test.fasta"}},
50+
"mzml_file": "test.mzML",
51+
"fasta_file": "test.fasta",
52+
"result_dir": "test_results",
53+
"fdr_init_search": 0.01,
6554
}
6655

67-
with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f:
56+
with tempfile.NamedTemporaryFile(
57+
mode="w", suffix=".json", delete=False
58+
) as f:
6859
json.dump(config_data, f)
6960
config_path = f.name
7061

7162
try:
72-
with tempfile.TemporaryDirectory() as temp_dir:
73-
# Mock parser and args with proper defaults
74-
parser = Mock()
75-
parser._actions = []
63+
config = load_config_from_json(config_path)
64+
assert config.mzml_file == "test.mzML"
65+
assert config.result_dir == "test_results"
66+
assert config.fdr_init_search == 0.01
67+
68+
initial = config.get_initial_search_config()
69+
full = config.get_full_search_config()
70+
assert "database" in initial
71+
assert "database" in full
72+
finally:
73+
os.unlink(config_path)
7674

77-
# Create a proper args namespace without sentinel objects
78-
args = argparse.Namespace(
79-
fdr_init_search=0.05, min_occurrences=1, database=None, fasta=None
80-
)
75+
@pytest.mark.skipif(not RUN_MODULE_AVAILABLE, reason="run module not available")
76+
@pytest.mark.integration
77+
def test_legacy_config_conversion(self):
78+
"""Test that legacy nested configs are auto-converted."""
79+
from config import load_config_from_json
8180

82-
# Test config modification
83-
new_config_path = run.modify_config(config_path, temp_dir, parser, args)
81+
config_data = {
82+
"sage_basic": {
83+
"database": {"fasta": "test.fasta", "enzyme": {"cleave_at": "KR"}},
84+
"deisotope": False,
85+
"report_psms": 5,
86+
},
87+
"sage": {
88+
"database": {"fasta": "test.fasta", "enzyme": {"cleave_at": "$"}},
89+
"deisotope": True,
90+
"report_psms": 12,
91+
},
92+
"mumdia": {"fdr_init_search": 0.01},
93+
}
8494

85-
# Verify new config was created
86-
assert os.path.exists(new_config_path)
95+
with tempfile.NamedTemporaryFile(
96+
mode="w", suffix=".json", delete=False
97+
) as f:
98+
json.dump(config_data, f)
99+
config_path = f.name
87100

88-
with open(new_config_path) as f:
89-
updated_config = json.load(f)
90-
assert "mumdia" in updated_config
101+
try:
102+
config = load_config_from_json(config_path)
103+
mumdia_config = config.get_mumdia_config()
104+
assert "fdr_init_search" in mumdia_config
105+
assert mumdia_config["fdr_init_search"] == 0.01
91106
finally:
92107
os.unlink(config_path)
93108

0 commit comments

Comments
 (0)