Skip to content

Commit 103bc9b

Browse files
committed
Handle mod on ambiguous AA range + small fixes
1 parent a70bb61 commit 103bc9b

File tree

5 files changed

+75
-25
lines changed

5 files changed

+75
-25
lines changed

mzcore/src/ontology/ontologies.rs

Lines changed: 44 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
use std::sync::LazyLock;
44

55
use context_error::*;
6-
use mzcv::{CVIndex, CVStructure, CVVersion, ControlledVocabulary};
6+
use mzcv::{AccessionCode, CVIndex, CVStructure, CVVersion, ControlledVocabulary};
77
use serde::{Deserialize, Serialize};
88

99
use crate::{
@@ -30,7 +30,7 @@ pub static STATIC_ONTOLOGIES: LazyLock<Ontologies> = LazyLock::new(Ontologies::i
3030
/// let modification = ontologies.get_by_name(&[], "Oxidation").unwrap();
3131
/// assert_eq!(modification.formula(), molecular_formula!(O 1));
3232
/// // or by index from a particular ontology
33-
/// let modification2 = ontologies.unimod().get_by_index(&35).unwrap();
33+
/// let modification2 = ontologies.unimod().get_by_index(&mzcv::AccessionCode::Numeric(35)).unwrap();
3434
/// assert_eq!(modification, modification2);
3535
/// // or search all (or a subset) for fuzzy matches
3636
/// let search = ontologies.search(&[], "Oxidated");
@@ -336,6 +336,48 @@ impl Ontologies {
336336
None
337337
}
338338

339+
/// Find the given index in this ontology.
340+
pub fn get_by_index(
341+
&self,
342+
ontology: Ontology,
343+
id: &AccessionCode,
344+
) -> Option<SimpleModification> {
345+
match ontology {
346+
Ontology::Unimod => {
347+
if let Some(m) = self.unimod.get_by_index(id) {
348+
return Some(m);
349+
}
350+
}
351+
Ontology::Psimod => {
352+
if let Some(m) = self.psimod.get_by_index(id) {
353+
return Some(m);
354+
}
355+
}
356+
Ontology::Xlmod => {
357+
if let Some(m) = self.xlmod.get_by_index(id) {
358+
return Some(m);
359+
}
360+
}
361+
Ontology::Gnome => {
362+
if let Some(m) = self.gnome.get_by_index(id) {
363+
return Some(m);
364+
}
365+
}
366+
Ontology::Resid => {
367+
if let Some(m) = self.resid.get_by_index(id) {
368+
return Some(m);
369+
}
370+
}
371+
Ontology::Custom => {
372+
if let Some(m) = self.custom.get_by_index(id) {
373+
return Some(m);
374+
}
375+
}
376+
}
377+
378+
None
379+
}
380+
339381
/// Get all modifications in the selected ontologies (or in all if the list is empty).
340382
pub fn data(&self, ontologies: &[Ontology]) -> impl Iterator<Item = SimpleModification> {
341383
let ontologies = if ontologies.is_empty() {

mzcore/src/sequence/peptidoform/parse.rs

Lines changed: 17 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -632,70 +632,71 @@ impl PeptidoformIonSet {
632632
while index < chars.len() && index < range.end {
633633
match (c_term, chars[index]) {
634634
(false, b'(') if chars.get(index + 1) == Some(&b'?') => {
635-
if braces_start.is_some() {
635+
if ambiguous_aa.is_some() {
636636
combine_error(
637637
&mut errors,
638638
BoxedError::new(
639639
BasicKind::Error,
640640
"Invalid ambiguous amino acid set",
641-
"Ambiguous amino acid sets cannot be nested within ranged ambiguous modifications",
641+
"Ambiguous amino acid sets cannot be nested within ambiguous amino acid sets",
642642
base_context.clone().add_highlight((0, index, 1)),
643643
),
644644
);
645645
return Err(errors);
646-
}
647-
if ambiguous_aa.is_some() {
646+
} else if braces_start.is_some() {
648647
combine_error(
649648
&mut errors,
650649
BoxedError::new(
651650
BasicKind::Error,
652651
"Invalid ambiguous amino acid set",
653-
"Ambiguous amino acid sets cannot be nested within ambiguous amino acid sets",
652+
"Ambiguous amino acid sets cannot be nested within ranged ambiguous modifications",
654653
base_context.clone().add_highlight((0, index, 1)),
655654
),
656655
);
657656
return Err(errors);
658657
}
658+
659659
ambiguous_aa = Some(ambiguous_aa_counter);
660+
braces_start = Some(peptide.len());
660661
ambiguous_aa_counter = handle!(single errors, ambiguous_aa_counter.checked_add(1).ok_or_else(|| {BoxedError::new(BasicKind::Error,
661662
"Invalid ambiguous amino acid set",
662-
format!("There are too many ambiguous amino acid sets, there can only be {} in one linear peptide", std::num::NonZeroU32::MAX),
663+
format!("There are too many ambiguous amino acid sets, there can only be {} in one linear peptidoform", std::num::NonZeroU32::MAX),
663664
base_context.clone().add_highlight((0, index, 1)))}));
664665
index += 2;
665666
}
666-
(false, b')') if ambiguous_aa.is_some() => {
667-
ambiguous_aa = None;
668-
index += 1;
669-
}
670667
(false, b'(') => {
671-
if braces_start.is_some() {
668+
if ambiguous_aa.is_some() {
672669
combine_error(
673670
&mut errors,
674671
BoxedError::new(
675672
BasicKind::Error,
676673
"Invalid ranged ambiguous modification",
677-
"Ranged ambiguous modifications cannot be nested within ranged ambiguous modifications",
674+
"Ranged ambiguous modifications cannot be nested within ambiguous amino acid sets",
678675
base_context.clone().add_highlight((0, index, 1)),
679676
),
680677
);
681678
return Err(errors);
682-
}
683-
if ambiguous_aa.is_some() {
679+
} else if braces_start.is_some() {
684680
combine_error(
685681
&mut errors,
686682
BoxedError::new(
687683
BasicKind::Error,
688684
"Invalid ranged ambiguous modification",
689-
"Ranged ambiguous modifications cannot be nested within ambiguous amino acid sets",
685+
"Ranged ambiguous modifications cannot be nested within ranged ambiguous modifications",
690686
base_context.clone().add_highlight((0, index, 1)),
691687
),
692688
);
693689
return Err(errors);
694690
}
691+
695692
braces_start = Some(peptide.len());
696693
index += 1;
697694
}
698695
(false, b')') if braces_start.is_some() => {
696+
if ambiguous_aa.is_some() {
697+
ambiguous_aa = None;
698+
}
699+
699700
let start = braces_start.unwrap();
700701
if start == peptide.len() {
701702
combine_error(
@@ -981,6 +982,7 @@ impl PeptidoformIonSet {
981982
);
982983
}
983984

985+
dbg!(&ranged_unknown_position_modifications);
984986
if let Err(errs) = peptide.apply_ranged_unknown_position_modification(
985987
&ranged_unknown_position_modifications,
986988
&ambiguous_lookup,

mzcore/src/sequence/peptidoform/tests/positive.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ Id,Example,Source,Key,Notes
1313
11,"EM[Oxidation]EVE[Cation:Mg[II]]ES[Phospho]PEK",spec2_0,"",""
1414
12,"EM[MOD:00719]EVEES[MOD:00046]PEK",spec2_0,"",""
1515
13,"EM[UNIMOD:35]EVEES[UNIMOD:56]PEK",spec2_0,"",""
16-
14,"EM[RESID:AA0581]EVEES[RESID:AA0037]PEK",spec2_0,"",""
16+
14,"EM[RESID:AA0581]EVEES[RESID:AA0037]PEK",spec2_0,"casing_specific",""
1717
15,"EMEVTK[XLMOD:02001#XL1]SESPEK[#XL1]",spec2_0,"",""
1818
16,"EMK[XLMOD:02000#XL1]EVTKSE[XLMOD:02010#XL2]SK[#XL1]PEK[#XL2]AR",spec2_0,"",""
1919
17,"EMEVTK[XLMOD:02001#XL1]SESPEK",spec2_0,"",""
@@ -137,7 +137,7 @@ Id,Example,Source,Key,Notes
137137
135,"PEPM[U:Oxidation]AS[M:O-phospho-L-serine]",spec2_1,"",""
138138
136,"{Phospho#g1}EM[U:Oxidation]EVS[#g1]",spec2_1,"ignore",""
139139
137,"{Sulfo#s}HT[#s]T[#s]S[#s]FRGC[#s]",spec2_1,"ignore",""
140-
138,"EM[R:L-methionine sulfone]EM[RESID:AA0581]",spec2_1,"",""
140+
138,"EM[R:L-methionine sulfone]EM[RESID:AA0581]",spec2_1,"casing_specific",""
141141
139,"(>Heavy chain)EVQLVESG",spec2_1,"casing_specific",""
142142
140,"EVTK[X:formaldehyde]LEK[XLMOD:02041]SEF",spec2_1,"",""
143143
141,"EVTK[X:formaldehyde#XL1]LEK[#XL1]SEF",spec2_1,"",""
@@ -246,7 +246,7 @@ Id,Example,Source,Key,Notes
246246
244,"{HexNAc#g1}PEPN[#g1]ITE",spec2_1,"ignore",""
247247
245,"{Hex#g1}{Sulfo#s}[#g1]-PN[#g1]KS[#s]C[#s]M",spec2_1,"ignore",""
248248
246,"{Glycan:Hex5HexNAc5#g1}PEPSTAT[#g1]IS[#g1]T[#g1]ICS[#g1]S[#g1]T[#g1]RIKES[#g1]IT[#g1]ES[#g1]",spec2_1,"ignore",""
249-
247,"EM[RESID:AA0581]EVEES[RESID:AA0037]PEK",spec2_1,"",""
249+
247,"EM[RESID:AA0581]EVEES[RESID:AA0037]PEK",spec2_1,"casing_specific",""
250250
248,"(>My (very) nice peptide)PEPTIDE", spec2_1, "casing_specific", ""
251251
249,"(>Interesting ]][ yes | [valid] #description_not_tag)PEPTIDE", spec2_1, "casing_specific", ""
252252
250,"(>Trypsin)AANSIPYQVSLNS+(>Keratin)AKEQFERQTA",spec2_1,"casing_specific",""
@@ -368,7 +368,7 @@ Id,Example,Source,Key,Notes
368368
366,"(PEP)[Oxidation#1(0.95)]M[#1(0.05)]AT",example,"just_parse",""
369369
367,"(?VCH)AT",example,"",""
370370
368,"PEPM[U:Oxidation]AS[M:O-phospho-L-serine]",example,"",""
371-
369,"EM[R:L-methionine sulfone]EM[RESID:AA0581]",example,"",""
371+
369,"EM[R:L-methionine sulfone]EM[RESID:AA0581]",example,"casing_specific",""
372372
370,"(>Heavy chain)EVQLVESG",example,"casing_specific",""
373373
371,"ED[MOD:00093#BRANCH]//D[#BRANCH]ATR",example,"casing_specific",""
374374
372,"NEEYN[Glycan:Hex5HexNAc4NeuAc1]K",example,"",""

mzcv/src/curie.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1286,6 +1286,12 @@ pub enum AccessionCode {
12861286
//Big(ThinStr),
12871287
}
12881288

1289+
impl From<u32> for AccessionCode {
1290+
fn from(value: u32) -> Self {
1291+
Self::Numeric(value)
1292+
}
1293+
}
1294+
12891295
impl Default for AccessionCode {
12901296
fn default() -> Self {
12911297
Self::Numeric(0)

mzcv/src/cv_source.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,11 @@ use crate::{CVError, Curie, hash_buf_reader::HashBufReader};
2020
/// }
2121
/// ```
2222
pub trait CVSource {
23-
/// Set this constant to true to enable automatic writing of the CV when the cahe is updated
23+
/// Set this constant to true to enable automatic writing of the CV when the cache is updated
2424
const AUTOMATICALLY_WRITE_UNCOMPRESSED: bool = false;
2525
/// The data item that is stored in the CV
2626
type Data: CVData + 'static;
27-
/// The type of the main datastructure to keep all data items (used to build any kind of hierarchy necessary)
27+
/// The type of the main data structure to keep all data items (used to build any kind of hierarchy necessary)
2828
type Structure: CVStructure<Self::Data> + Encode + Decode<()>;
2929
/// The name of the CV, used to create the paths to store intermediate files and caches so has to be valid in that context
3030
fn cv_name() -> &'static str;
@@ -48,7 +48,7 @@ pub trait CVSource {
4848
folder.join(Self::cv_name())
4949
}
5050
/// Parse the textual representation of this CV. Return the version, the data, and possibly a
51-
/// list of warnings or non critical errors encountered while parsing.
51+
/// list of warnings or noncritical errors encountered while parsing.
5252
/// # Errors
5353
/// If the parsing failed.
5454
fn parse(
@@ -183,7 +183,7 @@ pub trait CVStructure<Data>: Default {
183183
Self: 'a;
184184
/// Iterate over all data items
185185
fn iter_data(&self) -> Self::IterData<'_>;
186-
/// Add a sinlge data item to the structure
186+
/// Add a single data item to the structure
187187
fn add(&mut self, data: std::sync::Arc<Data>);
188188
/// The indexing type
189189
type Index: Clone;

0 commit comments

Comments
 (0)