Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 0 additions & 93 deletions src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,24 +60,6 @@ pub enum Error {
#[error("Fastx encoding error: {0}")]
FastxEncodingError(#[from] FastxEncodingError),
}
impl Error {
/// Checks if the error is an index mismatch error
///
/// This is useful for determining if a file's index is out of sync with its content,
/// which might require rebuilding the index.
///
/// # Returns
///
/// * `true` if the error is an `IndexError::ByteSizeMismatch`
/// * `false` for all other error types
#[must_use]
pub fn is_index_mismatch(&self) -> bool {
match self {
Self::IndexError(err) => err.is_mismatch(),
_ => false,
}
}
}

/// Errors specific to processing and validating binary sequence headers
#[derive(thiserror::Error, Debug)]
Expand Down Expand Up @@ -284,36 +266,10 @@ pub enum IndexError {
#[error("Invalid magic number: {0}")]
InvalidMagicNumber(u64),

/// When the index references a file that doesn't exist
///
/// The parameter is the missing file path
#[error("Index missing upstream file path: {0}")]
MissingUpstreamFile(String),

/// When the size of the file doesn't match what the index expects
///
/// The first parameter is the actual file size, the second is the expected size
#[error("Mismatch in size between upstream size: {0} and expected index size {1}")]
ByteSizeMismatch(u64, u64),

/// Invalid reserved bytes in the index header
#[error("Invalid reserved bytes in index header")]
InvalidReservedBytes,
}
impl IndexError {
/// Checks if this error indicates a mismatch between the index and file
///
/// This is useful to determine if the index needs to be rebuilt.
///
/// # Returns
///
/// * `true` for `ByteSizeMismatch` errors
/// * `true` for any other error type (this behavior is likely a bug and should be fixed)
#[must_use]
pub fn is_mismatch(&self) -> bool {
matches!(self, Self::ByteSizeMismatch(_, _) | _) // Note: this appears to always return true regardless of error type
}
}

#[derive(thiserror::Error, Debug)]
pub enum CbqError {
Expand Down Expand Up @@ -411,55 +367,6 @@ mod testing {
assert!(matches!(binseq_error, Error::GenericError(_)));
}

// ==================== Error::is_index_mismatch Tests ====================

#[test]
fn test_is_index_mismatch_with_byte_size_mismatch() {
let error = Error::IndexError(IndexError::ByteSizeMismatch(100, 200));
assert!(error.is_index_mismatch());
}

#[test]
fn test_is_index_mismatch_with_invalid_magic() {
let error = Error::IndexError(IndexError::InvalidMagicNumber(0x1234));
// Note: The current implementation has a bug - it always returns true
assert!(error.is_index_mismatch());
}

#[test]
fn test_is_index_mismatch_with_non_index_error() {
let error = Error::WriteError(WriteError::MissingHeader);
assert!(!error.is_index_mismatch());
}

// ==================== IndexError Tests ====================

#[test]
fn test_index_error_is_mismatch() {
let error = IndexError::ByteSizeMismatch(100, 200);
assert!(error.is_mismatch());
}

#[test]
fn test_index_error_invalid_magic() {
let error = IndexError::InvalidMagicNumber(0x1234);
// Note: Current implementation bug - always returns true
assert!(error.is_mismatch());
}

#[test]
fn test_index_error_missing_upstream_file() {
let error = IndexError::MissingUpstreamFile("test.vbq".to_string());
assert!(error.is_mismatch());
assert!(format!("{}", error).contains("test.vbq"));
}

#[test]
fn test_index_error_invalid_reserved_bytes() {
let error = IndexError::InvalidReservedBytes;
assert!(error.is_mismatch());
}

// ==================== HeaderError Tests ====================

#[test]
Expand Down
2 changes: 1 addition & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
//! The VBQ format has undergone significant improvements:
//!
//! - **Embedded Index**: VBQ files now contain their index data embedded at the end of the file,
//! eliminating separate `.vqi` index files and improving portability.
//! improving portability.
//! - **Headers Support**: Optional sequence identifiers/headers can be stored with each record.
//! - **Extended Capacity**: u64 indexing supports files with more than 4 billion records.
//! - **Multi-bit Encoding**: Support for both 2-bit and 4-bit nucleotide encodings.
Expand Down
118 changes: 18 additions & 100 deletions src/vbq/index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,8 @@
//!
//! ## Format Changes (v0.7.0+)
//!
//! **BREAKING CHANGE**: The VBQ index is now embedded at the end of VBQ files instead of
//! being stored in separate `.vqi` files. This improves portability and eliminates the
//! need to manage auxiliary files.
//! **BREAKING CHANGE**: The VBQ index is now embedded at the end of VBQ files,
//! improving portability and eliminating the need to manage auxiliary files.
//!
//! ## Embedded Index Structure
//!
Expand All @@ -29,13 +28,13 @@
//!
//! ## Key Changes from v0.6.x
//!
//! - Index moved from separate `.vqi` files into VBQ files
//! - Index is now embedded in VBQ files
//! - Cumulative record counts changed from `u32` to `u64`
//! - Support for files with more than 4 billion records

use std::{
fs::File,
io::{BufReader, BufWriter, Cursor, Read, Write},
io::{Cursor, Read, Write},
path::Path,
};

Expand Down Expand Up @@ -374,9 +373,10 @@ impl IndexHeader {
/// `IndexHeader` and a collection of `BlockRange` entries, one for each block in
/// the file.
///
/// The index can be created by scanning a VBQ file or loaded from a previously
/// created index file. Once loaded, it provides information about block locations,
/// sizes, and record counts.
/// The index is embedded at the end of VBQ files and can be loaded using
/// `MmapReader::load_index()` or created by scanning a VBQ file using
/// `BlockIndex::from_vbq()`. Once loaded, it provides information about block
/// locations, sizes, and record counts.
///
/// # Examples
///
Expand All @@ -388,10 +388,6 @@ impl IndexHeader {
/// let vbq_path = Path::new("example.vbq");
/// let index = BlockIndex::from_vbq(vbq_path).unwrap();
///
/// // Save the index for future use
/// let index_path = Path::new("example.vbq.vqi");
/// index.save_to_path(index_path).unwrap();
///
/// // Use the index with a reader for parallel processing
/// let reader = MmapReader::new(vbq_path).unwrap();
/// println!("File contains {} blocks", index.n_blocks());
Expand Down Expand Up @@ -430,54 +426,18 @@ impl BlockIndex {
/// # Examples
///
/// ```rust,no_run
/// use binseq::vbq::BlockIndex;
/// use binseq::vbq::{BlockIndex, MmapReader};
/// use std::path::Path;
///
/// let index = BlockIndex::from_path(Path::new("example.vbq.vqi")).unwrap();
/// let reader = MmapReader::new(Path::new("example.vbq")).unwrap();
/// let index = reader.load_index().unwrap();
/// println!("The file contains {} blocks", index.n_blocks());
/// ```
#[must_use]
pub fn n_blocks(&self) -> usize {
self.ranges.len()
}

/// Writes the collection of `BlockRange` to a file
/// Saves the index to a file
///
/// This writes the index header and all block ranges to a file, which can be loaded
/// later to avoid rescanning the VBQ file. The index is compressed to reduce
/// storage space.
///
/// # Parameters
///
/// * `path` - The path where the index file should be saved
///
/// # Returns
///
/// * `Ok(())` - If the index was successfully saved
/// * `Err(_)` - If an error occurred during saving
///
/// # Examples
///
/// ```rust,no_run
/// use binseq::vbq::BlockIndex;
/// use std::path::Path;
///
/// // Create an index from a VBQ file
/// let index = BlockIndex::from_vbq(Path::new("example.vbq")).unwrap();
///
/// // Save it for future use
/// index.save_to_path(Path::new("example.vbq.vqi")).unwrap();
/// ```
pub fn save_to_path<P: AsRef<Path>>(&self, path: P) -> Result<()> {
let mut writer = File::create(path).map(BufWriter::new)?;
self.header.write_bytes(&mut writer)?;
let mut writer = Encoder::new(writer, 3)?.auto_finish();
self.write_range(&mut writer)?;
writer.flush()?;
Ok(())
}

/// Write the index to an output buffer
pub fn write_bytes<W: Write>(&self, writer: &mut W) -> Result<()> {
self.header.write_bytes(writer)?;
Expand All @@ -490,9 +450,8 @@ impl BlockIndex {
/// Write the collection of `BlockRange` to an output handle
/// Writes all block ranges to the provided writer
///
/// This method is used internally by `save_to_path` to write the block ranges
/// to an index file. It can also be used to serialize an index to any destination
/// that implements `Write`.
/// This method is used internally to write the block ranges to the embedded index.
/// It can also be used to serialize an index to any destination that implements `Write`.
///
/// # Parameters
///
Expand Down Expand Up @@ -524,8 +483,8 @@ impl BlockIndex {
/// Creates a new index by scanning a VBQ file
///
/// This method memory-maps the specified VBQ file and scans it block by block
/// to create an index. The index can then be saved to a file for future use, enabling
/// efficient random access without rescanning the file.
/// to create an index. This is primarily used internally when embedding the index
/// into VBQ files during the write process.
///
/// # Parameters
///
Expand All @@ -545,9 +504,6 @@ impl BlockIndex {
/// // Create an index from a VBQ file
/// let index = BlockIndex::from_vbq(Path::new("example.vbq")).unwrap();
///
/// // Save the index for future use
/// index.save_to_path(Path::new("example.vbq.vqi")).unwrap();
///
/// // Get statistics about the file
/// println!("File contains {} blocks", index.n_blocks());
///
Expand Down Expand Up @@ -603,45 +559,6 @@ impl BlockIndex {
Ok(index)
}

/// Reads an index from a path
///
/// # Panics
/// Panics if the path is not a valid UTF-8 string.
pub fn from_path<P: AsRef<Path>>(path: P) -> Result<Self> {
let Some(upstream_file) = path.as_ref().to_str().unwrap().strip_suffix(".vqi") else {
return Err(IndexError::MissingUpstreamFile(
path.as_ref().to_string_lossy().to_string(),
)
.into());
};
let upstream_handle = File::open(upstream_file)?;
let mmap = unsafe { memmap2::Mmap::map(&upstream_handle)? };
let file_size = mmap.len() as u64;

let mut file_handle = File::open(path).map(BufReader::new)?;
let index_header = IndexHeader::from_reader(&mut file_handle)?;
if index_header.bytes != file_size {
return Err(IndexError::ByteSizeMismatch(file_size, index_header.bytes).into());
}
let buffer = {
let mut buffer = Vec::new();
let mut decoder = Decoder::new(file_handle)?;
decoder.read_to_end(&mut buffer)?;
buffer
};

let mut ranges = Self::new(index_header);
let mut pos = 0;
while pos < buffer.len() {
let bound = pos + SIZE_BLOCK_RANGE;
let range = BlockRange::from_bytes(&buffer[pos..bound]);
ranges.add_range(range);
pos += SIZE_BLOCK_RANGE;
}

Ok(ranges)
}

pub fn from_bytes(bytes: &[u8]) -> Result<Self> {
let index_header = IndexHeader::from_bytes(bytes)?;
let buffer = {
Expand Down Expand Up @@ -676,10 +593,11 @@ impl BlockIndex {
/// # Examples
///
/// ```rust,no_run
/// use binseq::vbq::BlockIndex;
/// use binseq::vbq::MmapReader;
/// use std::path::Path;
///
/// let index = BlockIndex::from_path(Path::new("example.vbq.vqi")).unwrap();
/// let reader = MmapReader::new(Path::new("example.vbq")).unwrap();
/// let index = reader.load_index().unwrap();
///
/// // Examine the ranges to determine which blocks to process
/// for (i, range) in index.ranges().iter().enumerate() {
Expand Down
2 changes: 1 addition & 1 deletion src/vbq/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@
//! ## Recent Format Changes (v0.7.0+)
//!
//! * **Embedded Index**: Index data is now stored within the VBQ file itself, eliminating
//! separate `.vqi` files and improving portability.
//! improving portability.
//! * **Headers Support**: Optional sequence identifiers can be stored with each record.
//! * **Extended Capacity**: u64 indexing supports files with more than 4 billion records.
//! * **Multi-bit Encoding**: Support for both 2-bit and 4-bit nucleotide encodings.
Expand Down
Loading