From d9c5b1cefb8c2b78ff88b95825f74124b4332607 Mon Sep 17 00:00:00 2001 From: hubcycle Date: Fri, 18 Jul 2025 15:08:19 +0530 Subject: [PATCH] feat: add infalliable first-last accessor methods - provide `first()`, `last()`, `split_first()`, `split_last()` methods. - use unsafe blocks for performance, and throughly document the safety. - add tests and miri workflow to ci. --- .github/workflows/miri.yml | 27 ++++++++ Cargo.toml | 5 +- README.md | 10 +++ src/lib.rs | 125 +++++++++++++++++++++++++++++++++++++ 4 files changed, 166 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/miri.yml diff --git a/.github/workflows/miri.yml b/.github/workflows/miri.yml new file mode 100644 index 0000000..10e74f8 --- /dev/null +++ b/.github/workflows/miri.yml @@ -0,0 +1,27 @@ +name: miri tests + +on: + pull_request: + branches: [ main ] + push: + branches: [ main ] + +jobs: + miri: + name: "miri" + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Install Rust + uses: dtolnay/rust-toolchain@nightly + with: + components: miri + + - name: Setup Miri + run: cargo miri setup + + - name: Run Miri Tests + run: cargo miri test + env: + MIRIFLAGS: -Zmiri-strict-provenance diff --git a/Cargo.toml b/Cargo.toml index 41e6ba1..7d910f5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "nebz" -version = "0.1.1" +version = "0.1.2" edition = "2024" description = "an immutable non-empty bytes container" readme = "README.md" @@ -18,3 +18,6 @@ bytes = ["dep:bytes"] [dependencies] bytes = { version = "1", optional = true } + +[dev-dependencies] +rstest = { version = "0.25", default-features = false } diff --git a/README.md b/README.md index fb5aed5..9f19ebe 100644 --- a/README.md +++ b/README.md @@ -5,3 +5,13 @@ An immutable non-empty bytes container. Dual-licensed under [Apache 2.0](LICENSE-APACHE) or [MIT](LICENSE-MIT). For more details, see [docs.rs/nebz](https://docs.rs/nebz). + +## Safety Notice + +This crate uses **unsafe code** for performance optimization in methods like `first()`, `last()`, `split_first()`, and `split_last()`. All unsafe blocks are: + +- Thoroughly documented with safety justifications +- Protected by `NonEmptyBz`'s non-empty guarantee +- **Verified with Miri** - passes all undefined behavior detection tests + +Run `cargo +nightly miri test` to verify the safety. diff --git a/src/lib.rs b/src/lib.rs index 0dcc3f0..ef721ae 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -24,6 +24,68 @@ impl NonEmptyBz { &self.0 } + /// Returns the first byte of the contained value. + pub fn first(&self) -> u8 + where + T: AsRef<[u8]>, + { + // SAFETY: `NonEmptyBz` guarantees that the contained value is non-empty i.e. + // there is always at least one element at index 0. + unsafe { *self.0.as_ref().get_unchecked(0) } + } + + /// Returns the last byte of the contained value. + pub fn last(&self) -> u8 + where + T: AsRef<[u8]>, + { + let slice = self.0.as_ref(); + + // SAFETY: `NonEmptyBz` guarantees that the contained value is non-empty, + // so `len() >= 1` i.e. `len() - 1` is always a valid index: + // - For len() == 1: len() - 1 == 0 (valid) + // - For len() > 1: len() - 1 < len() (valid) + // Therefore, `len() - 1` is always within bounds for a non-empty slice. + unsafe { *slice.get_unchecked(slice.len() - 1) } + } + + /// Returns the first byte and the remaining slice. + /// + /// The remaining slice will be empty if the contained value has only one byte. + pub fn split_first(&self) -> (u8, &[u8]) + where + T: AsRef<[u8]>, + { + let first = self.first(); + + // SAFETY: Since len() >= 1, the range [1..] is always valid. + // When len() == 1, this creates an empty slice [1..1]. + // When len() > 1, this creates a valid slice [1..len()]. + let rest = unsafe { self.0.as_ref().get_unchecked(1..) }; + + (first, rest) + } + + /// Returns the last byte and the remaining slice. + /// + /// The remaining slice will be empty if the contained value has only one byte. + pub fn split_last(&self) -> (u8, &[u8]) + where + T: AsRef<[u8]>, + { + let slice = self.0.as_ref(); + let len = slice.len(); + + let last = self.last(); + + // SAFETY: Since len() >= 1, the range [..len() - 1] is always valid. + // When len() == 1, this creates an empty slice [..0]. + // When len() > 1, this creates a valid slice [..len() - 1]. + let rest = unsafe { slice.get_unchecked(..len - 1) }; + + (last, rest) + } + /// Converts from `&NonEmptyBz` to `NonEmptyBz<&T>`. pub const fn as_ref(&self) -> NonEmptyBz<&T> { NonEmptyBz(&self.0) @@ -96,3 +158,66 @@ impl From> for NonEmptyBz { Self(bytes::Bytes::copy_from_slice(nebz_slice.get())) } } + +#[cfg(test)] +mod tests { + use rstest::rstest; + + use super::*; + + #[rstest] + // single byte + #[case::single_byte_owned_array(NonEmptyBz::from_owned_array([42]), 42, 42, &[], &[])] + #[case::single_byte_borrowed_array(NonEmptyBz::from_borrowed_array(b"a"), b'a', b'a', &[], &[])] + #[case::single_byte_vec(NonEmptyBz::new(vec![0]).unwrap(), 0, 0, &[], &[])] + #[case::single_byte_str(NonEmptyBz::new("z").unwrap(), b'z', b'z', &[], &[])] + // multiple bytes + #[case::multiple_bytes_owned_array( + NonEmptyBz::from_owned_array([1, 2, 3, 4]), + 1, + 4, + &[2, 3, 4], + &[1, 2, 3], + )] + #[case::multiple_bytes_borrowed_array( + NonEmptyBz::from_borrowed_array(b"nebz"), + b'n', + b'z', + b"ebz", + b"neb" + )] + #[case::multiple_bytes_vec( + NonEmptyBz::new(vec![255, 0, 128]).unwrap(), + 255, + 128, + &[0, 128], + &[255, 0], + )] + #[case::multiple_bytes_str( + NonEmptyBz::new("hello").unwrap(), + b'h', + b'o', + b"ello", + b"hell", + )] + fn first_last_works( + #[case] nebz: NonEmptyBz, + #[case] expected_first: u8, + #[case] expected_last: u8, + #[case] expected_split_first_rest: &[u8], + #[case] expected_split_last_rest: &[u8], + ) where + T: AsRef<[u8]>, + { + assert_eq!(nebz.first(), expected_first); + assert_eq!(nebz.last(), expected_last); + + let (first, rest) = nebz.split_first(); + assert_eq!(first, expected_first); + assert_eq!(rest, expected_split_first_rest); + + let (last, rest) = nebz.split_last(); + assert_eq!(last, expected_last); + assert_eq!(rest, expected_split_last_rest); + } +}