Skip to content

Commit 800ef04

Browse files
committed
Merge branch 'main' of https://github.com/rusteomics/mzcore
2 parents 66aad5f + 14a59e6 commit 800ef04

File tree

8 files changed

+66
-32
lines changed

8 files changed

+66
-32
lines changed

imgt/src/species.rs

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -118,23 +118,28 @@ macro_rules! species {
118118
fn from_str(s: &str) -> Result<Self, Self::Err> {
119119
let s = s.trim().to_lowercase();
120120
for (name, species) in SPECIES_PARSE_LIST {
121-
if name.to_lowercase() == s {
121+
if name.eq_ignore_ascii_case(&s) {
122122
return Ok(*species);
123123
}
124124
}
125125

126-
// let options: Vec<String> = SPECIES_PARSE_LIST.iter()
127-
// .map(|option| option.0.to_lowercase())
128-
// .collect();
129-
// let options: Vec<&str> = options.iter()
130-
// .map(|option| option.as_str())
131-
// .collect();
126+
let mut options: Vec<(usize, Self)> = Vec::with_capacity(5);
127+
for (name, species) in SPECIES_PARSE_LIST {
128+
let d = mzcv::text::levenshtein_distance(&name.to_lowercase(), &s);
129+
let index = options.binary_search_by(|a| a.0.cmp(&d)).unwrap_or_else(|i| i);
130+
if index < 5 {
131+
if options.len() == 5 {
132+
options.remove(4);
133+
}
134+
options.insert(index, (d, *species));
135+
}
136+
}
132137

133138
Err(BoxedError::new(BasicKind::Error,
134139
"Unknown species name",
135140
"The provided name could not be recognised as a species name.",
136-
Context::show(s.as_str())
137-
).to_owned() // TODO: maybe figure out a way to use mzcv to generate fuzzy matches here
141+
Context::show(s.as_str()).to_owned()
142+
).suggestions(options.into_iter().map(|(_, s)| s.to_string()))
138143
)
139144
}
140145
}

mzannotate/src/fragment/mzpaf/write.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -202,7 +202,6 @@ impl ToMzPAF for Fragment {
202202
| FragmentType::Y(_)
203203
| FragmentType::YComposition(_, _) => {
204204
if let Some(formula) = &self.formula {
205-
// TODO: better way of storing?
206205
write!(w, "f{{{}}}", formula.hill_notation_core())?;
207206
if formula.additional_mass() != 0.0 {
208207
write!(w, "{:+}", formula.additional_mass())?;

mzcore/src/space.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,13 @@ impl Add for UsedSpace {
118118
}
119119
}
120120

121+
impl Add<()> for UsedSpace {
122+
type Output = Self;
123+
fn add(self, rhs: ()) -> Self {
124+
self
125+
}
126+
}
127+
121128
impl<T: Space> Space for &[T] {
122129
fn space(&self) -> UsedSpace {
123130
let mut total = UsedSpace::default();

mzcv/src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ mod hash_buf_reader;
88
mod load;
99
mod lzw;
1010
mod obo;
11-
mod text;
11+
pub mod text;
1212

1313
pub use curie::*;
1414
pub use cv_error::*;

mzcv/src/load.rs

Lines changed: 30 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -625,16 +625,38 @@ impl<CV: CVSource> CVIndex<CV> {
625625
flate2::Compression::fast(),
626626
);
627627
std::io::copy(&mut encoder, &mut writer)
628-
629-
// response
630-
// .copy_to(&mut writer)
631-
// // .map(|_| ())
632-
// .map_err(|e| e.to_string())
633-
634-
// Ok(0)
635628
}
636629
crate::CVCompression::Lzw => {
637-
todo!()
630+
use crate::lzw::ZArchive;
631+
632+
let content = ZArchive::new(response.as_ref())
633+
.and_then(|mut a| a.read())
634+
.map_err(|e| {
635+
use crate::lzw::{self, ArchiveError};
636+
637+
BoxedError::new(
638+
CVError::FileCouldNotBeParsed,
639+
"Could not decompress Lzw compression",
640+
match e {
641+
ArchiveError::DecompressionFailed { entry, reason } => {
642+
format!("{entry} {reason}")
643+
}
644+
ArchiveError::InvalidHeader => format!(
645+
"Invalid header, the first bytes have to be {:x}{:x}",
646+
lzw::ID[0],
647+
lzw::ID[1]
648+
),
649+
ArchiveError::IO(io) => io.to_string(),
650+
},
651+
Context::none().source(url.to_string()).to_owned(),
652+
)
653+
})?;
654+
655+
let mut encoder = flate2::bufread::GzEncoder::new(
656+
BufReader::new(content.as_slice()),
657+
flate2::Compression::fast(),
658+
);
659+
std::io::copy(&mut encoder, &mut writer)
638660
}
639661
}
640662
.map_err(|e| {

mzcv/src/text.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,8 @@ const fn simplify_char(c: char) -> u8 {
1616
if c.is_ascii() { c as u32 as u8 } else { 0 }
1717
}
1818

19-
pub(crate) fn levenshtein_distance(word1: &str, word2: &str) -> usize {
19+
/// Get the Levenshtein distance between two words
20+
pub fn levenshtein_distance(word1: &str, word2: &str) -> usize {
2021
let word2_len = word2.chars().count();
2122
let mut row1 = (0..).take(word2_len + 1).collect::<Vec<_>>();
2223
let mut row2 = vec![0; word2_len + 1];

mzident/src/common_parser.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ macro_rules! format_family {
9191
fn space(&self) -> mzcore::space::UsedSpace {
9292
( mzcore::space::UsedSpace::default()
9393
$(+ self.$prname.space())*
94-
$(+ self.$poname.space())*)
94+
$(+ {$(#[cfg(feature = $pocfg)])? self.$poname.space()})*)
9595
.set_total::<Self>()
9696
}
9797
}
@@ -226,7 +226,7 @@ macro_rules! format_family {
226226
fn space(&self) -> mzcore::space::UsedSpace {
227227
( mzcore::space::UsedSpace::default()
228228
$(+ self.$rname.space())*
229-
$(+ self.$oname.space())*)
229+
$(+ {$(#[cfg(feature = $ocfg)])? self.$oname.space()})*)
230230
.set_total::<Self>()
231231
}
232232
}

mzident/src/formats/fasta.rs

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1367,23 +1367,23 @@ impl ProteinMetaData for FastaData {
13671367
}
13681368

13691369
fn species(&self) -> Option<mzcv::Curie> {
1370-
todo!()
1370+
None // TODO: Maybe look at the header to see if this can be filled
13711371
}
13721372

13731373
fn species_name(&self) -> Option<&str> {
1374-
todo!()
1374+
None
13751375
}
13761376

13771377
fn search_engine(&self) -> &[(super::CVTerm, Option<(f64, super::CVTerm)>)] {
1378-
todo!()
1378+
&[]
13791379
}
13801380

13811381
fn ambiguity_members(&self) -> &[String] {
1382-
todo!()
1382+
&[]
13831383
}
13841384

13851385
fn database(&self) -> Option<(&str, Option<&str>)> {
1386-
todo!()
1386+
None
13871387
}
13881388

13891389
fn modifications(
@@ -1392,23 +1392,23 @@ impl ProteinMetaData for FastaData {
13921392
Vec<(mzcore::prelude::SequencePosition, Option<f64>)>,
13931393
mzcore::sequence::SimpleModification,
13941394
)] {
1395-
todo!()
1395+
&[]
13961396
}
13971397

13981398
fn coverage(&self) -> Option<f64> {
1399-
todo!()
1399+
None
14001400
}
14011401

14021402
fn gene_ontology(&self) -> &[mzcv::Curie] {
1403-
todo!()
1403+
&[]
14041404
}
14051405

14061406
fn reliability(&self) -> Option<super::Reliability> {
1407-
todo!()
1407+
None
14081408
}
14091409

14101410
fn uri(&self) -> Option<&str> {
1411-
todo!()
1411+
None
14121412
}
14131413
}
14141414

0 commit comments

Comments
 (0)