From 89b14437dcc5d1f1f7c0b6ee68b03460e3928971 Mon Sep 17 00:00:00 2001 From: Takeru Ohta Date: Sat, 6 Sep 2025 16:10:39 +0900 Subject: [PATCH 1/4] feat: optimize atom encoding by using SMALL_ATOM_UTF8_EXT for short atoms --- src/codec.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/codec.rs b/src/codec.rs index f06dd3c..bdb62b3 100644 --- a/src/codec.rs +++ b/src/codec.rs @@ -628,13 +628,13 @@ impl Encoder { return Err(EncodeError::TooLongAtomName(x.clone())); } - let is_ascii = x.name.as_bytes().iter().all(|&c| c < 0x80); - if is_ascii { - self.writer.write_u8(ATOM_EXT)?; + if let Ok(len) = u8::try_from(x.name.len()) { + self.writer.write_u8(SMALL_ATOM_UTF8_EXT)?; + self.writer.write_u8(len)?; } else { self.writer.write_u8(ATOM_UTF8_EXT)?; + self.writer.write_u16(x.name.len() as u16)?; } - self.writer.write_u16(x.name.len() as u16)?; self.writer.write_all(x.name.as_bytes())?; Ok(()) } From 231da4c06aae4ffc748a1c2c3b9c35646981b8e3 Mon Sep 17 00:00:00 2001 From: Takeru Ohta Date: Sat, 6 Sep 2025 16:22:02 +0900 Subject: [PATCH 2/4] test: update expected bytes for SMALL_ATOM_UTF8_EXT encoding format --- tests/lib.rs | 38 ++++++++++++++++---------------------- 1 file changed, 16 insertions(+), 22 deletions(-) diff --git a/tests/lib.rs b/tests/lib.rs index f31a4d1..e433128 100644 --- a/tests/lib.rs +++ b/tests/lib.rs @@ -28,7 +28,7 @@ fn atom_test() { // Encode assert_eq!( - vec![131, 100, 0, 3, 102, 111, 111], + vec![131, 119, 3, 102, 111, 111], encode(Term::from(Atom::from("foo"))) ); } @@ -170,8 +170,8 @@ fn pid_test() { // Encode assert_eq!( vec![ - 131, 88, 100, 0, 13, 110, 111, 110, 111, 100, 101, 64, 110, 111, 104, 111, 115, 116, 0, - 0, 0, 49, 0, 0, 0, 0, 0, 0, 0, 0 + 131, 88, 119, 13, 110, 111, 110, 111, 100, 101, 64, 110, 111, 104, 111, 115, 116, 0, 0, + 0, 49, 0, 0, 0, 0, 0, 0, 0, 0 ], encode(Term::from(Pid::from(("nonode@nohost", 49, 0)))) ); @@ -198,8 +198,8 @@ fn port_test() { // Encode assert_eq!( vec![ - 131, 89, 100, 0, 13, 110, 111, 110, 111, 100, 101, 64, 110, 111, 104, 111, 115, 116, 0, - 0, 1, 110, 0, 0, 0, 0 + 131, 89, 119, 13, 110, 111, 110, 111, 100, 101, 64, 110, 111, 104, 111, 115, 116, 0, 0, + 1, 110, 0, 0, 0, 0 ], encode(Term::from(Port::from(("nonode@nohost", 366)))) ); @@ -231,7 +231,7 @@ fn reference_test() { // Encode assert_eq!( vec![ - 131, 90, 0, 1, 100, 0, 3, 102, 111, 111, 0, 0, 0, 0, 0, 0, 0, 123 + 131, 90, 0, 1, 119, 3, 102, 111, 111, 0, 0, 0, 0, 0, 0, 0, 123 ], encode(Term::from(Reference::from(("foo", 123)))) ); @@ -256,9 +256,7 @@ fn external_fun_test() { // Encode assert_eq!( - vec![ - 131, 113, 100, 0, 3, 102, 111, 111, 100, 0, 3, 98, 97, 114, 97, 3 - ], + vec![131, 113, 119, 3, 102, 111, 111, 119, 3, 98, 97, 114, 97, 3], encode(Term::from(ExternalFun::from(("foo", "bar", 3)))) ); } @@ -278,10 +276,10 @@ fn internal_fun_test() { free_vars: vec![Term::from(FixInteger::from(10))], }; let bytes = [ - 131, 112, 0, 0, 0, 71, 1, 115, 60, 203, 97, 151, 228, 98, 75, 71, 169, 49, 166, 34, 126, - 65, 11, 0, 0, 0, 0, 0, 0, 0, 1, 100, 0, 1, 97, 97, 0, 98, 3, 153, 230, 91, 88, 100, 0, 13, - 110, 111, 110, 111, 100, 101, 64, 110, 111, 104, 111, 115, 116, 0, 0, 0, 36, 0, 0, 0, 0, 0, - 0, 0, 0, 97, 10, + 131, 112, 0, 0, 0, 69, 1, 115, 60, 203, 97, 151, 228, 98, 75, 71, 169, 49, 166, 34, 126, + 65, 11, 0, 0, 0, 0, 0, 0, 0, 1, 119, 1, 97, 97, 0, 98, 3, 153, 230, 91, 88, 119, 13, 110, + 111, 110, 111, 100, 101, 64, 110, 111, 104, 111, 115, 116, 0, 0, 0, 36, 0, 0, 0, 0, 0, 0, + 0, 0, 97, 10, ]; // Decode assert_eq!(Ok(term.clone()), decode(&bytes).try_into()); @@ -381,7 +379,7 @@ fn list_test() { // Encode assert_eq!(vec![131, 106], encode(Term::from(List::nil()))); assert_eq!( - vec![131, 108, 0, 0, 0, 1, 100, 0, 1, 97, 106], + vec![131, 108, 0, 0, 0, 1, 119, 1, 97, 106], encode(Term::from(List::from(vec![Term::from(Atom::from("a"))]))) ); } @@ -409,7 +407,7 @@ fn improper_list_test() { // Encode assert_eq!( - vec![131, 108, 0, 0, 0, 1, 100, 0, 1, 97, 97, 1], + vec![131, 108, 0, 0, 0, 1, 119, 1, 97, 97, 1], encode(Term::from(ImproperList::from(( vec![Term::from(Atom::from("a"))], Term::from(FixInteger::from(1)) @@ -441,7 +439,7 @@ fn tuple_test() { // Encode assert_eq!( - vec![131, 104, 2, 100, 0, 1, 97, 97, 1], + vec![131, 104, 2, 119, 1, 97, 97, 1], encode(Term::from(Tuple::from(vec![ Term::from(Atom::from("a")), Term::from(FixInteger::from(1)) @@ -478,12 +476,8 @@ fn map_test() { let buf = encode(Term::from(map.clone())); // Hashmap Iter is not deterministic, so we need to check both possible outputs assert!( - [ - 131, 116, 0, 0, 0, 2, 97, 1, 97, 2, 100, 0, 1, 97, 100, 0, 1, 98 - ] == buf.as_slice() - || [ - 131, 116, 0, 0, 0, 2, 100, 0, 1, 97, 100, 0, 1, 98, 97, 1, 97, 2 - ] == buf.as_slice() + [131, 116, 0, 0, 0, 2, 97, 1, 97, 2, 119, 1, 97, 119, 1, 98] == buf.as_slice() + || [131, 116, 0, 0, 0, 2, 119, 1, 97, 119, 1, 98, 97, 1, 97, 2] == buf.as_slice() ); //Access From 2df859d8e11a4007b1336a9f0f68d0d7b342eb62 Mon Sep 17 00:00:00 2001 From: Takeru Ohta Date: Sat, 6 Sep 2025 16:23:37 +0900 Subject: [PATCH 3/4] docs: update example to use SMALL_ATOM_UTF8_EXT encoding format --- src/lib.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index f7b6052..02ae420 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -8,7 +8,7 @@ //! use std::io::Cursor; //! use eetf::{Term, Atom}; //! -//! let bytes = vec![131, 100, 0, 3, 102, 111, 111]; +//! let bytes = vec![131, 119, 3, 102, 111, 111]; //! let term = Term::decode(Cursor::new(&bytes)).unwrap(); //! assert_eq!(term, Term::from(Atom::from("foo"))); //! ``` @@ -21,7 +21,7 @@ //! let mut buf = Vec::new(); //! let term = Term::from(Atom::from("foo")); //! term.encode(&mut buf).unwrap(); -//! assert_eq!(vec![131, 100, 0, 3, 102, 111, 111], buf); +//! assert_eq!(vec![131, 119, 3, 102, 111, 111], buf); //! ``` //! //! # Reference From 0aa47812d7349a2bb3d75bd1178388d737db5269 Mon Sep 17 00:00:00 2001 From: Takeru Ohta Date: Sat, 6 Sep 2025 16:26:49 +0900 Subject: [PATCH 4/4] refactor: improve atom encoding with better length validation and type safety --- src/codec.rs | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/codec.rs b/src/codec.rs index bdb62b3..5d966c3 100644 --- a/src/codec.rs +++ b/src/codec.rs @@ -624,16 +624,14 @@ impl Encoder { Ok(()) } fn encode_atom(&mut self, x: &Atom) -> EncodeResult { - if x.name.len() > 0xFFFF { - return Err(EncodeError::TooLongAtomName(x.clone())); - } - if let Ok(len) = u8::try_from(x.name.len()) { self.writer.write_u8(SMALL_ATOM_UTF8_EXT)?; self.writer.write_u8(len)?; - } else { + } else if let Ok(len) = u16::try_from(x.name.len()) { self.writer.write_u8(ATOM_UTF8_EXT)?; - self.writer.write_u16(x.name.len() as u16)?; + self.writer.write_u16(len)?; + } else { + return Err(EncodeError::TooLongAtomName(x.clone())); } self.writer.write_all(x.name.as_bytes())?; Ok(())