Skip to content

Commit 5191751

Browse files
authored
Merge pull request #19 from CompOmics/fix-calibration-logic
Fix calibration logic by removing error and multiconformer output in csv
2 parents 51cb69b + 3e98325 commit 5191751

File tree

3 files changed

+37
-15
lines changed

3 files changed

+37
-15
lines changed

im2deep/_io_helpers.py

Lines changed: 32 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -210,7 +210,6 @@ def _parse_legacy_format(input_file: str | Path) -> PSMList:
210210
if has_ccs:
211211
metadata = {"CCS": _normalize_ccs_metadata_value(row["CCS"])}
212212

213-
LOGGER.debug(f"Parsed PSM: {peptidoform} with metadata: {metadata}")
214213
precursor = PSM(peptidoform=peptidoform, metadata=metadata, spectrum_id=idx)
215214
list_of_precursors.append(precursor)
216215
except Exception as e:
@@ -402,20 +401,40 @@ def write_output(
402401
ion_mobility : bool, optional
403402
Whether to include ion mobility in the output. Default is False.
404403
"""
404+
is_multi = len(predictions) > 0 and isinstance(predictions[0], np.ndarray)
405405
output_data = []
406406
for idx, psm in enumerate(psm_list):
407-
entry = {
408-
"index": psm.spectrum_id,
409-
"peptidoform": str(psm.peptidoform),
410-
"predicted_CCS": predictions[idx],
411-
}
412-
if ion_mobility:
413-
im_value = ccs2im(
414-
predictions[idx],
415-
psm.peptidoform.theoretical_mz, # type: ignore - already checked charge present
416-
psm.peptidoform.precursor_charge, # type: ignore - already checked charge present
417-
)
418-
entry["predicted_ion_mobility"] = im_value
407+
if is_multi:
408+
ccs_low, ccs_high = sorted(predictions[idx])
409+
entry = {
410+
"index": psm.spectrum_id,
411+
"peptidoform": str(psm.peptidoform),
412+
"predicted_ccs_low": ccs_low,
413+
"predicted_ccs_high": ccs_high,
414+
}
415+
if ion_mobility:
416+
entry["predicted_ion_mobility_low"] = ccs2im(
417+
ccs_low,
418+
psm.peptidoform.theoretical_mz, # type: ignore - already checked charge present
419+
psm.peptidoform.precursor_charge, # type: ignore - already checked charge present
420+
)
421+
entry["predicted_ion_mobility_high"] = ccs2im(
422+
ccs_high,
423+
psm.peptidoform.theoretical_mz, # type: ignore - already checked charge present
424+
psm.peptidoform.precursor_charge, # type: ignore - already checked charge present
425+
)
426+
else:
427+
entry = {
428+
"index": psm.spectrum_id,
429+
"peptidoform": str(psm.peptidoform),
430+
"predicted_CCS": predictions[idx],
431+
}
432+
if ion_mobility:
433+
entry["predicted_ion_mobility"] = ccs2im(
434+
predictions[idx],
435+
psm.peptidoform.theoretical_mz, # type: ignore - already checked charge present
436+
psm.peptidoform.precursor_charge, # type: ignore - already checked charge present
437+
)
419438
output_data.append(entry)
420439

421440
output_df = pd.DataFrame(output_data)

im2deep/calibration.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -440,6 +440,10 @@ def get_charge(pf):
440440
target_work["CCS"] = target_work["metadata"].apply(
441441
lambda x: float(x["CCS"]) if isinstance(x, dict) and "CCS" in x else np.nan
442442
)
443+
if "CCS" not in source_work.columns and "metadata" in source_work.columns:
444+
source_work["CCS"] = source_work["metadata"].apply(
445+
lambda x: float(x["CCS"]) if isinstance(x, dict) and "CCS" in x else np.nan
446+
)
443447

444448
source_work["peptide_key"] = source_work["peptidoform"].apply(get_peptide_key)
445449
source_work["charge"] = source_work["peptidoform"].apply(get_charge)

im2deep/core.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,6 @@ def predict_and_calibrate(
128128
)
129129

130130
if not calibration.is_fitted:
131-
calibration_reference = psm_df_reference if psm_df_reference is not None else psm_df_cal
132131
LOGGER.info("Fitting calibration...")
133132
if any(psm_list_cal["is_decoy"]):
134133
LOGGER.warning(
@@ -137,7 +136,7 @@ def predict_and_calibrate(
137136
)
138137
calibration.fit(
139138
psm_df_cal,
140-
calibration_reference,
139+
psm_df_reference, # None is fine; fit() loads the default reference if needed
141140
multi=multi,
142141
)
143142
else:

0 commit comments

Comments
 (0)