diff --git a/Cargo.lock b/Cargo.lock index 130c6c3..372eaac 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -56,6 +56,16 @@ version = "2.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3" +[[package]] +name = "borsh" +version = "1.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfd1e3f8955a5d7de9fab72fc8373fade9fb8a703968cb200ae3dc6cf08e185a" +dependencies = [ + "bytes", + "cfg_aliases", +] + [[package]] name = "buffa" version = "0.6.0" @@ -63,10 +73,13 @@ dependencies = [ "arbitrary", "base64", "bytes", + "compact_str", + "ecow", "hashbrown 0.15.5", "once_cell", "serde", "serde_json", + "smol_str", "thiserror", ] @@ -148,12 +161,27 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" +[[package]] +name = "castaway" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dec551ab6e7578819132c713a93c022a05d60159dc86e7a7050223577484c55a" +dependencies = [ + "rustversion", +] + [[package]] name = "cfg-if" version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" +[[package]] +name = "cfg_aliases" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" + [[package]] name = "ciborium" version = "0.2.2" @@ -206,6 +234,22 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c8d4a3bb8b1e0c1050499d1815f5ab16d04f0959b233085fb31653fbfc9d98f9" +[[package]] +name = "compact_str" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fdb1325a1cece981e8a296ab8f0f9b63ae357bd0784a9faaf548cc7b480707a" +dependencies = [ + "arbitrary", + "castaway", + "cfg-if", + "itoa", + "rustversion", + "ryu", + "serde", + "static_assertions", +] + [[package]] name = "criterion" version = "0.5.1" @@ -284,6 +328,15 @@ dependencies = [ "syn", ] +[[package]] +name = "ecow" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78e4f79b296fbaab6ce2e22d52cb4c7f010fe0ebe7a32e34fa25885fd797bd02" +dependencies = [ + "serde", +] + [[package]] name = "either" version = "1.15.0" @@ -621,6 +674,12 @@ version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" +[[package]] +name = "ryu" +version = "1.0.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f" + [[package]] name = "same-file" version = "1.0.6" @@ -679,6 +738,23 @@ dependencies = [ "zmij", ] +[[package]] +name = "smol_str" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4aaa7368fcf4852a4c2dd92df0cace6a71f2091ca0a23391ce7f3a31833f1523" +dependencies = [ + "arbitrary", + "borsh", + "serde_core", +] + +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + [[package]] name = "syn" version = "2.0.114" diff --git a/Cargo.toml b/Cargo.toml index 91783dc..d908ac7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -62,6 +62,14 @@ serde = { version = "1", default-features = false } serde_json = { version = "1", default-features = false, features = ["alloc"] } arbitrary = { version = "1", default-features = false, features = ["derive"] } thiserror = { version = "2", default-features = false } +# Optional `string` field representations that will be selectable via +# `buffa_build`'s forthcoming `string_type` knob. Declared +# `default-features = false` so they stay `no_std`; the relevant +# `serde`/`arbitrary`/`std` sub-features are turned on by the matching `buffa` +# feature (see buffa/Cargo.toml). +smol_str = { version = "0.3", default-features = false } +ecow = { version = "0.2", default-features = false } +compact_str = { version = "0.9", default-features = false } prettyplease = "0.2" proc-macro2 = "1" quote = "1" diff --git a/buffa/Cargo.toml b/buffa/Cargo.toml index ffd83da..65161f8 100644 --- a/buffa/Cargo.toml +++ b/buffa/Cargo.toml @@ -16,14 +16,42 @@ rustdoc-args = ["--cfg", "docsrs"] [features] default = ["std"] -std = ["bytes/std", "thiserror/std", "serde?/std", "serde_json?/std"] +std = [ + "bytes/std", + "thiserror/std", + "serde?/std", + "serde_json?/std", + "smol_str?/std", + "ecow?/std", + "compact_str?/std", +] # `json` and `text` are independently enableable. Both share the # `type_registry` module (gated `any(json, text)`) but carry per-format # entry types (`JsonAnyEntry` vs `TextAnyEntry`) — no struct-literal `#[cfg]` # needed, no `Option` placeholder fields. -json = ["dep:serde", "dep:base64", "dep:serde_json", "serde/alloc", "serde/derive", "hashbrown/serde"] -arbitrary = ["dep:arbitrary"] +json = [ + "dep:serde", + "dep:base64", + "dep:serde_json", + "serde/alloc", + "serde/derive", + "hashbrown/serde", + # Pull in serde support for whichever configurable string types are enabled. + "smol_str?/serde", + "ecow?/serde", + "compact_str?/serde", +] +arbitrary = ["dep:arbitrary", "smol_str?/arbitrary", "compact_str?/arbitrary"] text = [] +# Configurable `string` field representations. Enabling one of these will let +# `buffa_build`'s forthcoming `string_type` knob map proto `string` to that +# type; buffa re-exports the crate so generated code references +# `::buffa::::` and downstream crates do not declare the dependency +# themselves. `ecow` has no native `arbitrary` impl, so buffa supplies a shim +# under the `arbitrary` feature. +smol_str = ["dep:smol_str"] +ecow = ["dep:ecow"] +compact_str = ["dep:compact_str"] [dependencies] arbitrary = { workspace = true, optional = true } @@ -34,3 +62,6 @@ once_cell = { workspace = true, features = ["alloc"] } serde = { workspace = true, optional = true } serde_json = { workspace = true, optional = true } thiserror = { workspace = true } +smol_str = { workspace = true, optional = true } +ecow = { workspace = true, optional = true } +compact_str = { workspace = true, optional = true } diff --git a/buffa/src/json_helpers.rs b/buffa/src/json_helpers.rs index 46a8e54..2d422aa 100644 --- a/buffa/src/json_helpers.rs +++ b/buffa/src/json_helpers.rs @@ -334,31 +334,56 @@ pub mod proto_bool { /// /// Use with `#[serde(with = "::buffa::json_helpers::proto_string")]`. pub mod proto_string { - use alloc::string::ToString; use serde::{Deserializer, Serializer}; - pub fn serialize(value: &str, s: S) -> Result { - s.serialize_str(value) + /// Serialize a `string` field. + /// + /// Generic over `T: AsRef` so configurable string types + /// (`smol_str::SmolStr`, `ecow::EcoString`, ...) serialize without relying + /// on `Deref` coercion at the `#[serde(with = ...)]` call + /// site. `String` and `&str` both satisfy the bound. + pub fn serialize + ?Sized, S: Serializer>( + value: &T, + s: S, + ) -> Result { + s.serialize_str(value.as_ref()) } - pub fn deserialize<'de, D: Deserializer<'de>>(d: D) -> Result { - struct V; - impl<'de> serde::de::Visitor<'de> for V { - type Value = alloc::string::String; + /// Deserialize a `string` field (or JSON `null` → `""`). + /// + /// Generic over the return type so that codegen's `string_type` knob (which + /// can map the field to `smol_str::SmolStr`, `ecow::EcoString`, etc.) works + /// without a per-type shim. The visitor constructs the target type directly: + /// `visit_str` goes through `From<&str>`, so a short string is inlined by an + /// SSO type without first allocating an intermediate `String`. `String` + /// itself satisfies both `From<&str>` and `From`, keeping the + /// default path zero-extra-cost. Type inference picks `T` from the field + /// type at the serde call site. + pub fn deserialize<'de, T, D>(d: D) -> Result + where + T: for<'a> From<&'a str> + From, + D: Deserializer<'de>, + { + struct V(core::marker::PhantomData); + impl<'de, T> serde::de::Visitor<'de> for V + where + T: for<'a> From<&'a str> + From, + { + type Value = T; fn expecting(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { f.write_str("a string or null") } - fn visit_unit(self) -> Result { - Ok(alloc::string::String::new()) + fn visit_unit(self) -> Result { + Ok(T::from("")) } - fn visit_str(self, v: &str) -> Result { - Ok(v.to_string()) + fn visit_str(self, v: &str) -> Result { + Ok(T::from(v)) } - fn visit_string(self, v: alloc::string::String) -> Result { - Ok(v) + fn visit_string(self, v: alloc::string::String) -> Result { + Ok(T::from(v)) } } - d.deserialize_any(V) + d.deserialize_any(V::(core::marker::PhantomData)) } } @@ -1132,6 +1157,17 @@ proto_elem_json_delegate!(bool, proto_bool); proto_elem_json_delegate!(alloc::string::String, proto_string); proto_elem_json_delegate!(alloc::vec::Vec, bytes); +// Configurable `string` field representations (codegen's `string_type()`), for +// `repeated string` / `map<_, string>`. The `proto_string` with-module is +// generic — `serialize` over `AsRef`, `deserialize` over `From` — +// so each delegate is a one-liner with no per-type shim. +#[cfg(feature = "smol_str")] +proto_elem_json_delegate!(::smol_str::SmolStr, proto_string); +#[cfg(feature = "ecow")] +proto_elem_json_delegate!(::ecow::EcoString, proto_string); +#[cfg(feature = "compact_str")] +proto_elem_json_delegate!(::compact_str::CompactString, proto_string); + // bytes::Bytes — for codegen's `use_bytes_type()` with `repeated bytes`. // Serialize: `Bytes: Deref` → `bytes::serialize(&[u8], s)`. // Deserialize: `bytes::deserialize` is generic over `T: From>`; diff --git a/buffa/src/json_helpers/tests.rs b/buffa/src/json_helpers/tests.rs index 3889ad1..b6775d2 100644 --- a/buffa/src/json_helpers/tests.rs +++ b/buffa/src/json_helpers/tests.rs @@ -1227,6 +1227,100 @@ fn proto_string_null_is_empty() { assert_eq!(v.0, ""); } +/// A stand-in for a configurable string type (`SmolStr`/`EcoString`/...): it +/// implements just the `From`/`From<&str>`/`AsRef` surface the +/// generic `proto_string` path relies on, proving the with-module deserializes +/// into an arbitrary target type without a per-type shim. +#[derive(Clone, PartialEq, Debug, Default)] +struct MyStr(alloc::string::String); +impl From for MyStr { + fn from(s: alloc::string::String) -> Self { + MyStr(s) + } +} +impl From<&str> for MyStr { + fn from(s: &str) -> Self { + MyStr(s.into()) + } +} +impl AsRef for MyStr { + fn as_ref(&self) -> &str { + &self.0 + } +} + +#[derive(serde::Serialize, serde::Deserialize)] +struct SerdeCustomStr(#[serde(with = "proto_string")] MyStr); + +#[test] +fn proto_string_deserializes_into_custom_type() { + let recovered: SerdeCustomStr = serde_json::from_str(r#""hello""#).unwrap(); + assert_eq!(recovered.0, MyStr("hello".into())); + // null still maps to the empty value via the `From` conversion. + let empty: SerdeCustomStr = serde_json::from_str("null").unwrap(); + assert_eq!(empty.0, MyStr::default()); + // Round-trips back out, serialized via `AsRef`. + let json = serde_json::to_string(&SerdeCustomStr(MyStr("hi".into()))).unwrap(); + assert_eq!(json, r#""hi""#); +} + +// Configurable string types: the singular `proto_string` with-module and the +// `ProtoElemJson` (repeated) path must both round-trip through proto3 JSON. +#[cfg(feature = "smol_str")] +#[test] +fn proto_string_smol_str_roundtrip() { + #[derive(serde::Serialize, serde::Deserialize)] + struct W(#[serde(with = "proto_string")] smol_str::SmolStr); + let json = serde_json::to_string(&W("hi".into())).unwrap(); + assert_eq!(json, r#""hi""#); + assert_eq!(serde_json::from_str::(&json).unwrap().0, "hi"); + assert_eq!(serde_json::from_str::("null").unwrap().0, ""); + + // repeated string via ProtoElemJson + #[derive(serde::Serialize, serde::Deserialize)] + struct R(#[serde(with = "proto_seq")] alloc::vec::Vec); + let v = R(vec!["a".into(), "b".into()]); + let json = serde_json::to_string(&v).unwrap(); + assert_eq!(json, r#"["a","b"]"#); + assert_eq!(serde_json::from_str::(&json).unwrap().0, v.0); +} + +#[cfg(feature = "ecow")] +#[test] +fn proto_string_ecow_roundtrip() { + #[derive(serde::Serialize, serde::Deserialize)] + struct W(#[serde(with = "proto_string")] ecow::EcoString); + let json = serde_json::to_string(&W("hi".into())).unwrap(); + assert_eq!(json, r#""hi""#); + assert_eq!(serde_json::from_str::(&json).unwrap().0, "hi"); + assert_eq!(serde_json::from_str::("null").unwrap().0, ""); + + #[derive(serde::Serialize, serde::Deserialize)] + struct R(#[serde(with = "proto_seq")] alloc::vec::Vec); + let v = R(vec!["a".into(), "b".into()]); + let json = serde_json::to_string(&v).unwrap(); + assert_eq!(json, r#"["a","b"]"#); + assert_eq!(serde_json::from_str::(&json).unwrap().0, v.0); +} + +#[cfg(feature = "compact_str")] +#[test] +fn proto_string_compact_str_roundtrip() { + #[derive(serde::Serialize, serde::Deserialize)] + struct W(#[serde(with = "proto_string")] compact_str::CompactString); + let json = serde_json::to_string(&W("hi".into())).unwrap(); + assert_eq!(json, r#""hi""#); + assert_eq!(serde_json::from_str::(&json).unwrap().0, "hi"); + assert_eq!(serde_json::from_str::("null").unwrap().0, ""); + + #[derive(serde::Serialize, serde::Deserialize)] + struct R(#[serde(with = "proto_seq")] alloc::vec::Vec); + let v = R(vec!["a".into(), "b".into()]); + let json = serde_json::to_string(&v).unwrap(); + assert_eq!(json, r#"["a","b"]"#); + assert_eq!(serde_json::from_str::(&json).unwrap().0, v.0); +} + // ── closed_enum tests ───────────────────────────────────────────────── #[derive(serde::Serialize, serde::Deserialize)] diff --git a/buffa/src/lib.rs b/buffa/src/lib.rs index 4241434..0b9137e 100644 --- a/buffa/src/lib.rs +++ b/buffa/src/lib.rs @@ -73,6 +73,16 @@ //! | `json` | | Proto3 JSON via `serde` (the `json_helpers` and `any_registry` modules) | //! | `text` | | Textproto (human-readable) encoding and decoding | //! | `arbitrary` | | `arbitrary::Arbitrary` impls for fuzzing | +//! | `smol_str` | | Allow generated `string` fields to use `smol_str::SmolStr` (see [`ProtoString`]) | +//! | `ecow` | | Allow generated `string` fields to use `ecow::EcoString` (see [`ProtoString`]) | +//! | `compact_str` | | Allow generated `string` fields to use `compact_str::CompactString` (see [`ProtoString`]) | +//! +//! The three string-type flags compose with `json` and `arbitrary`: enabling, +//! for example, both `smol_str` and `json` turns on `smol_str/serde`, and +//! `smol_str` + `arbitrary` turns on `smol_str/arbitrary`. `ecow` has no native +//! `Arbitrary` impl, so `ecow` + `arbitrary` is served by an in-crate shim +//! instead. None of the three is selected by generated code until a build is +//! configured to use it (see [`ProtoString`]). //! //! With `default-features = false` the crate is `#![no_std]` (requires //! `alloc`). Proto3 JSON serialization still works without `std` via @@ -134,6 +144,21 @@ pub use ::bytes; #[doc(hidden)] pub use ::serde_json; +// Configurable `string` field representations. Re-exported so that code +// generated with `buffa_build`'s `string_type` knob can name +// `::buffa::smol_str::SmolStr` (etc.) without the consumer crate declaring the +// dependency — the same arrangement as `bytes` above. Each is gated on the +// matching `buffa` feature. +#[cfg(feature = "compact_str")] +#[doc(hidden)] +pub use ::compact_str; +#[cfg(feature = "ecow")] +#[doc(hidden)] +pub use ::ecow; +#[cfg(feature = "smol_str")] +#[doc(hidden)] +pub use ::smol_str; + /// Include the generated stitcher for a proto **package** from `OUT_DIR`. /// /// Codegen emits one `.mod.rs` per package which `include!`s the @@ -218,6 +243,7 @@ pub use message::{DecodeOptions, Message, MessageName, RECURSION_LIMIT}; pub use message_field::{DefaultInstance, MessageField}; pub use oneof::Oneof; pub use size_cache::SizeCache; +pub use types::ProtoString; pub use unknown_fields::{UnknownField, UnknownFieldData, UnknownFields}; #[cfg(feature = "text")] @@ -287,6 +313,43 @@ pub mod __private { let vv: ::alloc::vec::Vec<::alloc::vec::Vec> = ::arbitrary::Arbitrary::arbitrary(u)?; Ok(vv.into_iter().map(::bytes::Bytes::from).collect()) } + + /// `arbitrary` helpers for `ecow::EcoString` string fields. + /// + /// Unlike `smol_str::SmolStr` and `compact_str::CompactString`, `EcoString` + /// ships no `Arbitrary` impl, so codegen attaches + /// `#[arbitrary(with = ::buffa::__private::arbitrary_ecow*)]` to + /// `EcoString`-typed string fields (the same pattern used for + /// `bytes::Bytes`). The three variants cover singular, optional, and + /// repeated fields; oneof variant inner fields use `arbitrary_ecow`. + #[cfg(all(feature = "ecow", feature = "arbitrary"))] + pub fn arbitrary_ecow( + u: &mut ::arbitrary::Unstructured<'_>, + ) -> ::arbitrary::Result<::ecow::EcoString> { + let s: ::alloc::string::String = ::arbitrary::Arbitrary::arbitrary(u)?; + Ok(::ecow::EcoString::from(s)) + } + + #[cfg(all(feature = "ecow", feature = "arbitrary"))] + pub fn arbitrary_ecow_opt( + u: &mut ::arbitrary::Unstructured<'_>, + ) -> ::arbitrary::Result<::core::option::Option<::ecow::EcoString>> { + let opt: ::core::option::Option<::alloc::string::String> = + ::arbitrary::Arbitrary::arbitrary(u)?; + Ok(opt.map(::ecow::EcoString::from)) + } + + #[cfg(all(feature = "ecow", feature = "arbitrary"))] + pub fn arbitrary_ecow_vec( + u: &mut ::arbitrary::Unstructured<'_>, + ) -> ::arbitrary::Result<::alloc::vec::Vec<::ecow::EcoString>> { + // Materializing a `Vec` first (rather than building `EcoString`s + // element-by-element) is deliberate: it makes the byte-consumption order + // identical to the underlying `Vec` impl, which is what the + // parity test asserts. Do not "optimize" the intermediate `Vec` away. + let vv: ::alloc::vec::Vec<::alloc::string::String> = ::arbitrary::Arbitrary::arbitrary(u)?; + Ok(vv.into_iter().map(::ecow::EcoString::from).collect()) + } } #[cfg(all(test, feature = "arbitrary"))] @@ -342,6 +405,32 @@ mod arbitrary_tests { assert_eq!(b.slice(..).as_ref(), v.as_slice()); } } + + // The `EcoString` arbitrary shims must mirror the underlying `String` / + // `Option` / `Vec` impls, since `ecow` ships no native + // `Arbitrary`. The other two configurable string types use their own + // native impls and need no shim. + #[cfg(feature = "ecow")] + #[test] + fn arbitrary_ecow_matches_string() { + use super::__private::{arbitrary_ecow, arbitrary_ecow_opt, arbitrary_ecow_vec}; + use alloc::string::String; + + let s = arbitrary_ecow(&mut Unstructured::new(&SEED)).unwrap(); + let v: String = Arbitrary::arbitrary(&mut Unstructured::new(&SEED)).unwrap(); + assert_eq!(s.as_str(), v.as_str()); + + let so = arbitrary_ecow_opt(&mut Unstructured::new(&SEED)).unwrap(); + let vo: Option = Arbitrary::arbitrary(&mut Unstructured::new(&SEED)).unwrap(); + assert_eq!(so.as_ref().map(|x| x.as_str()), vo.as_deref()); + + let sv = arbitrary_ecow_vec(&mut Unstructured::new(&SEED)).unwrap(); + let vv: Vec = Arbitrary::arbitrary(&mut Unstructured::new(&SEED)).unwrap(); + assert_eq!(sv.len(), vv.len()); + for (a, b) in sv.iter().zip(&vv) { + assert_eq!(a.as_str(), b.as_str()); + } + } } /// Minimal fixture types for compile-checking doc examples. diff --git a/buffa/src/types.rs b/buffa/src/types.rs index c3fd30c..a4af12e 100644 --- a/buffa/src/types.rs +++ b/buffa/src/types.rs @@ -447,6 +447,87 @@ pub fn string_encoded_len(value: &str) -> usize { varint_len(len as u64) + len } +/// The bound generated code places on the Rust type used for a proto `string` +/// field. You neither implement nor name this trait by hand — a blanket impl +/// covers every conforming type, and a forthcoming `string_type` knob in +/// `buffa_build` selects the concrete type at code-generation time. +/// +/// buffa generates [`String`] by default. The knob will be able to substitute a +/// small-string-optimized type — such as `smol_str::SmolStr`, +/// `ecow::EcoString`, or `compact_str::CompactString` (each behind the matching +/// `buffa` feature) — for read-mostly schemas where `String`'s growable buffer +/// is unnecessary. +/// +/// The bounds are exactly what generated code requires of a string field: +/// +/// - `Clone + PartialEq + Default + Debug` — for the `#[derive(...)]` and the +/// hand-written `Debug` impl on message structs, and for `clear()` (which +/// resets the field to [`Default`] rather than relying on a `String`-specific +/// `clear`, since the small-string types may be immutable). +/// - `Send + Sync` — so a message owning such a field stays `Send + Sync`; +/// without this bound an exotic string type could silently make every +/// containing message thread-unsafe. +/// - [`AsRef`] — the encoder borrows the field as `&str`; +/// [`encode_string`] and [`string_encoded_len`] take `&str`. +/// - `From` and `From<&str>` — used by the decode path +/// ([`decode_string_to`]) and (once the knob lands) the view→owned conversion +/// to construct the field from freshly decoded text. +/// +/// For the default `String` representation every conversion is the identity, so +/// the generic path costs nothing relative to the specialized one. +pub trait ProtoString: + Clone + + PartialEq + + Default + + core::fmt::Debug + + Send + + Sync + + AsRef + + From + + for<'a> From<&'a str> +{ +} + +impl ProtoString for T where + T: Clone + + PartialEq + + Default + + core::fmt::Debug + + Send + + Sync + + AsRef + + From + + for<'a> From<&'a str> +{ +} + +// The default representation must always satisfy the bound; freeze that +// invariant against future changes to the trait's supertraits. +const _: fn() = || { + fn assert_proto_string() {} + assert_proto_string::(); +}; + +/// Decode a length-delimited `string` into a configurable [`ProtoString`] type. +/// +/// This is the generic counterpart to [`decode_string`]: it reads the varint +/// length prefix, copies that many bytes, and validates UTF-8 identically, then +/// constructs the target representation via `From`. Generated code uses +/// the in-place [`merge_string`] for `String` fields (which reuses the existing +/// allocation) and this helper for every other [`ProtoString`] type. +/// +/// # Errors +/// +/// Propagates the same errors as [`decode_string`]: +/// - [`DecodeError::UnexpectedEof`] if the buffer is shorter than the declared +/// length. +/// - [`DecodeError::MessageTooLarge`] if the declared length overflows `usize`. +/// - [`DecodeError::InvalidUtf8`] if the bytes are not valid UTF-8. +#[inline] +pub fn decode_string_to(buf: &mut impl Buf) -> Result { + decode_string(buf).map(S::from) +} + /// Encode a `bytes` value as a varint length prefix followed by raw bytes /// (wire type 2). #[inline] @@ -1259,6 +1340,84 @@ mod tests { assert_eq!(decoded, ""); } + #[test] + fn test_decode_string_to_roundtrip() { + // `decode_string_to::` must decode identically to `decode_string` + // for any `ProtoString` type; `String` exercises the identity path. + for s in ["", "hello", "héllo", "世界", "a".repeat(128).as_str()] { + let mut buf = Vec::new(); + encode_string(s, &mut buf); + let decoded: String = decode_string_to(&mut buf.as_slice()).unwrap(); + assert_eq!(s, decoded); + } + } + + #[test] + fn test_decode_string_to_propagates_errors() { + // Invalid UTF-8 surfaces before the `From` conversion. + let bad: &[u8] = &[0x02, 0xFF, 0xFE]; + assert_eq!( + decode_string_to::(&mut &bad[..]), + Err(DecodeError::InvalidUtf8) + ); + // Truncated payload is reported as EOF. + let short: &[u8] = &[0x04, 0x61, 0x62]; + assert_eq!( + decode_string_to::(&mut &short[..]), + Err(DecodeError::UnexpectedEof) + ); + } + + /// Every configurable string representation must decode identically to + /// `String` via the shared `decode_string_to` / `ProtoString` path. The + /// inputs straddle the small-string-optimization inline boundary so both + /// the inline and heap representations are exercised. + #[cfg(feature = "smol_str")] + #[test] + fn test_decode_string_to_smol_str() { + // Lengths straddle the inline caps of all three SSO types (ecow ~15, + // smol_str 23, compact_str 24 bytes on 64-bit) plus a clearly-heap case. + for n in [0usize, 13, 14, 15, 22, 23, 24, 25, 64] { + let owned = "a".repeat(n); + let s = owned.as_str(); + let mut buf = Vec::new(); + encode_string(s, &mut buf); + let decoded: smol_str::SmolStr = decode_string_to(&mut buf.as_slice()).unwrap(); + assert_eq!(decoded.as_str(), s); + } + } + + #[cfg(feature = "ecow")] + #[test] + fn test_decode_string_to_ecow() { + // Lengths straddle the inline caps of all three SSO types (ecow ~15, + // smol_str 23, compact_str 24 bytes on 64-bit) plus a clearly-heap case. + for n in [0usize, 13, 14, 15, 22, 23, 24, 25, 64] { + let owned = "a".repeat(n); + let s = owned.as_str(); + let mut buf = Vec::new(); + encode_string(s, &mut buf); + let decoded: ecow::EcoString = decode_string_to(&mut buf.as_slice()).unwrap(); + assert_eq!(decoded.as_str(), s); + } + } + + #[cfg(feature = "compact_str")] + #[test] + fn test_decode_string_to_compact_str() { + // Lengths straddle the inline caps of all three SSO types (ecow ~15, + // smol_str 23, compact_str 24 bytes on 64-bit) plus a clearly-heap case. + for n in [0usize, 13, 14, 15, 22, 23, 24, 25, 64] { + let owned = "a".repeat(n); + let s = owned.as_str(); + let mut buf = Vec::new(); + encode_string(s, &mut buf); + let decoded: compact_str::CompactString = + decode_string_to(&mut buf.as_slice()).unwrap(); + assert_eq!(decoded.as_str(), s); + } + } + #[test] fn test_string_invalid_utf8() { // Length prefix = 2, payload = two invalid UTF-8 bytes.