Skip to content

Commit b4532db

Browse files
committed
Add LZ4HC support
1 parent b911b71 commit b4532db

File tree

4 files changed

+524
-30
lines changed

4 files changed

+524
-30
lines changed

Cargo.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,10 +38,11 @@ git = "https://github.com/main--/rust-lz-fear"
3838
#features = ["std", "safe-encode", "safe-decode", "frame"]
3939

4040
[features]
41-
default = ["std", "safe-encode", "safe-decode", "frame", "checked-decode"]
41+
default = ["std", "hc", "safe-encode", "safe-decode", "frame", "checked-decode"]
4242
safe-decode = []
4343
safe-encode = []
4444
checked-decode = [] # Adds important checks while decoding. Only remove on trusted input!
45+
hc = []
4546
frame = ["std", "dep:twox-hash"]
4647
std = []
4748
# use nightly compiler features

src/block/compress.rs

Lines changed: 33 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -265,16 +265,16 @@ fn write_integer(output: &mut impl Sink, mut n: usize) {
265265

266266
/// Handle the last bytes from the input as literals
267267
#[cold]
268-
fn handle_last_literals(output: &mut impl Sink, input: &[u8], start: usize) {
269-
let lit_len = input.len() - start;
268+
pub(crate) fn handle_last_literals(output: &mut impl Sink, input: &[u8]) {
269+
let lit_len = input.len();
270270

271271
let token = token_from_literal(lit_len);
272272
push_byte(output, token);
273273
if lit_len >= 0xF {
274274
write_integer(output, lit_len - 0xF);
275275
}
276276
// Now, write the actual literals.
277-
output.extend_from_slice(&input[start..]);
277+
output.extend_from_slice(input);
278278
}
279279

280280
/// Moves the cursors back as long as the bytes match, to find additional bytes in a duplicate
@@ -372,7 +372,7 @@ pub(crate) fn compress_internal<T: HashTable, const USE_DICT: bool, S: Sink>(
372372

373373
let output_start_pos = output.pos();
374374
if input.len() - input_pos < LZ4_MIN_LENGTH {
375-
handle_last_literals(output, input, input_pos);
375+
handle_last_literals(output, &input[input_pos..]);
376376
return Ok(output.pos() - output_start_pos);
377377
}
378378

@@ -410,7 +410,7 @@ pub(crate) fn compress_internal<T: HashTable, const USE_DICT: bool, S: Sink>(
410410

411411
// Same as cur + MFLIMIT > input.len()
412412
if cur > end_pos_check {
413-
handle_last_literals(output, input, literal_start);
413+
handle_last_literals(output, &input[literal_start..]);
414414
return Ok(output.pos() - output_start_pos);
415415
}
416416
// Find a candidate in the dictionary with the hash of the current four bytes.
@@ -491,34 +491,39 @@ pub(crate) fn compress_internal<T: HashTable, const USE_DICT: bool, S: Sink>(
491491
let hash = T::get_hash_at(input, cur - 2);
492492
dict.put_at(hash, cur - 2 + input_stream_offset);
493493

494-
let token = token_from_literal_and_match_length(lit_len, duplicate_length);
494+
encode_sequence(&input[literal_start..literal_start + lit_len], output, offset, duplicate_length);
495495

496-
// Push the token to the output stream.
497-
push_byte(output, token);
498-
// If we were unable to fit the literals length into the token, write the extensional
499-
// part.
500-
if lit_len >= 0xF {
501-
write_integer(output, lit_len - 0xF);
502-
}
503-
504-
// Now, write the actual literals.
505-
//
506-
// The unsafe version copies blocks of 8bytes, and therefore may copy up to 7bytes more than
507-
// needed. This is safe, because the last 12 bytes (MF_LIMIT) are handled in
508-
// handle_last_literals.
509-
copy_literals_wild(output, input, literal_start, lit_len);
510-
// write the offset in little endian.
511-
push_u16(output, offset);
512-
513-
// If we were unable to fit the duplicates length into the token, write the
514-
// extensional part.
515-
if duplicate_length >= 0xF {
516-
write_integer(output, duplicate_length - 0xF);
517-
}
518496
literal_start = cur;
519497
}
520498
}
521499

500+
pub(crate) fn encode_sequence<S: Sink>(literal: &[u8], output: &mut S, offset: u16, match_len: usize) {
501+
let token = token_from_literal_and_match_length(literal.len(), match_len);
502+
// Push the token to the output stream.
503+
push_byte(output, token);
504+
// If we were unable to fit the literals length into the token, write the extensional
505+
// part.
506+
if literal.len() >= 0xF {
507+
write_integer(output, literal.len() - 0xF);
508+
}
509+
510+
// Now, write the actual literals.
511+
//
512+
// The unsafe version copies blocks of 8bytes, and therefore may copy up to 7bytes more than
513+
// needed. This is safe, because the last 12 bytes (MF_LIMIT) are handled in
514+
// handle_last_literals.
515+
copy_literals_wild(output, literal, 0, literal.len());
516+
// write the offset in little endian.
517+
push_u16(output, offset);
518+
519+
// If we were unable to fit the duplicates length into the token, write the
520+
// extensional part.
521+
if match_len >= 0xF {
522+
write_integer(output, match_len - 0xF);
523+
}
524+
525+
}
526+
522527
#[inline]
523528
#[cfg(feature = "safe-encode")]
524529
fn push_byte(output: &mut impl Sink, el: u8) {
@@ -567,7 +572,6 @@ fn copy_literals_wild(output: &mut impl Sink, input: &[u8], input_start: usize,
567572
#[inline]
568573
#[cfg(not(feature = "safe-encode"))]
569574
fn copy_literals_wild(output: &mut impl Sink, input: &[u8], input_start: usize, len: usize) {
570-
debug_assert!(input_start + len / 8 * 8 + ((len % 8) != 0) as usize * 8 <= input.len());
571575
debug_assert!(output.pos() + len / 8 * 8 + ((len % 8) != 0) as usize * 8 <= output.capacity());
572576
unsafe {
573577
// Note: This used to be a wild copy loop of 8 bytes, but the compiler consistently

0 commit comments

Comments
 (0)