diff --git a/Changelog.md b/Changelog.md index 093531193..688f3a2f5 100644 --- a/Changelog.md +++ b/Changelog.md @@ -27,6 +27,8 @@ struct and can be applied at once. When `serde-types` feature is enabled, config ### Bug Fixes +- [#923]: Implement correct skipping of well-formed DTD. + ### Misc Changes - [#908]: Increase minimal supported `serde` version from 1.0.139 to 1.0.180. @@ -41,6 +43,7 @@ struct and can be applied at once. When `serde-types` feature is enabled, config [#908]: https://github.com/tafia/quick-xml/pull/908 [#913]: https://github.com/tafia/quick-xml/pull/913 [#924]: https://github.com/tafia/quick-xml/pull/924 +[#923]: https://github.com/tafia/quick-xml/issues/923 ## 0.38.4 -- 2025-11-11 diff --git a/src/parser/comment.rs b/src/parser/comment.rs new file mode 100644 index 000000000..045172906 --- /dev/null +++ b/src/parser/comment.rs @@ -0,0 +1,190 @@ +//! Contains a parser for an XML comment. + +use crate::errors::SyntaxError; +use crate::parser::Parser; + +/// A parser that search a `-->` sequence in the slice. +/// +/// To use a parser create an instance of parser and [`feed`] data into it. +/// After successful search the parser will return [`Some`] with position where +/// comment is ended (the position after `-->`). If search was unsuccessful, +/// a [`None`] will be returned. You typically would expect positive result of +/// search, so that you should feed new data until yo'll get it. +/// +/// NOTE: after successful match the parser does not returned to the initial +/// state and should not be used anymore. Create a new parser if you want to perform +/// new search. +/// +/// # Example +/// +/// ``` +/// # use pretty_assertions::assert_eq; +/// use quick_xml::parser::{CommentParser, Parser}; +/// +/// let mut parser = CommentParser::default(); +/// +/// // Parse `and the text follow...` +/// // splitted into three chunks +/// assert_eq!(parser.feed(b"and the text follow..."), Some(12)); +/// // ^ ^ +/// // 0 11 +/// ``` +/// +/// [`feed`]: Self::feed() +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub enum CommentParser { + /// The parser does not yet seen any dashes at the end of previous slice. + Seen0, + /// The parser already seen one dash on the end of previous slice. + Seen1, + /// The parser already seen two dashes on the end of previous slice. + Seen2, +} + +impl Default for CommentParser { + #[inline] + fn default() -> Self { + Self::Seen0 + } +} + +impl Parser for CommentParser { + /// Determines the end position of an XML comment in the provided slice. + /// Comments is a pieces of text enclosed in `` braces. + /// Comment ends on the first occurrence of `-->` which cannot be escaped. + /// + /// Returns position after the `-->` or `None` if such sequence was not found. + /// + /// # Parameters + /// - `bytes`: a slice to find the end of a comment. + /// Should contain text in ASCII-compatible encoding + #[inline] + fn feed(&mut self, bytes: &[u8]) -> Option { + let result = match self { + Self::Seen0 => seen0(bytes), + Self::Seen1 => seen1(bytes), + Self::Seen2 => seen2(bytes), + }; + if let Some(r) = result { + return Some(r); + } + if bytes.ends_with(b"--") { + *self = Self::Seen2; + } else { + self.next_state(bytes.last().copied()); + } + None + } + + #[inline] + fn eof_error(self, _content: &[u8]) -> SyntaxError { + SyntaxError::UnclosedComment + } +} + +impl CommentParser { + #[inline] + fn next_state(&mut self, last: Option) { + match (*self, last) { + (Self::Seen0, Some(b'-')) => *self = Self::Seen1, + + (Self::Seen1, Some(b'-')) => *self = Self::Seen2, + (Self::Seen1, Some(_)) => *self = Self::Seen0, + + (Self::Seen2, Some(b'-')) => {} + (Self::Seen2, Some(_)) => *self = Self::Seen0, + + _ => {} + } + } +} + +#[inline] +fn seen0(bytes: &[u8]) -> Option { + for i in memchr::memchr_iter(b'>', bytes) { + if bytes[..i].ends_with(b"--") { + // +1 for `>` which should be included in event + return Some(i + 1); + } + } + None +} + +#[inline] +fn seen1(bytes: &[u8]) -> Option { + // -|-> + if bytes.starts_with(b"->") { + return Some(2); + } + // Even if the first character is `-` it cannot be part of close sequence, + // because we checked that condition above. That means that we can forgot that + // we seen one `-` at the end of the previous chunk. + // -|x... + seen0(bytes) +} + +#[inline] +fn seen2(bytes: &[u8]) -> Option { + match bytes.get(0) { + // --| + None => None, + // --|> + Some(b'>') => Some(1), + // The end sequence here can be matched only if bytes starts with `->` + // which is handled in seen1(). + // --|x... + Some(_) => seen1(bytes), + } +} + +#[test] +fn parse() { + use pretty_assertions::assert_eq; + use CommentParser::*; + + /// Returns `Ok(pos)` with the position in the buffer where element is ended. + /// + /// Returns `Err(internal_state)` if parsing was not done yet. + fn parse_comment(bytes: &[u8], mut parser: CommentParser) -> Result { + match parser.feed(bytes) { + Some(i) => Ok(i), + None => Err(parser), + } + } + + assert_eq!(parse_comment(b"", Seen0), Err(Seen0)); // xx| + assert_eq!(parse_comment(b"", Seen1), Err(Seen1)); // x-| + assert_eq!(parse_comment(b"", Seen2), Err(Seen2)); // --| + + assert_eq!(parse_comment(b"-", Seen0), Err(Seen1)); // xx|- + assert_eq!(parse_comment(b"-", Seen1), Err(Seen2)); // x-|- + assert_eq!(parse_comment(b"-", Seen2), Err(Seen2)); // --|- + + assert_eq!(parse_comment(b">", Seen0), Err(Seen0)); // xx|> + assert_eq!(parse_comment(b">", Seen1), Err(Seen0)); // x-|> + assert_eq!(parse_comment(b">", Seen2), Ok(1)); // --|> + + assert_eq!(parse_comment(b"--", Seen0), Err(Seen2)); // xx|-- + assert_eq!(parse_comment(b"--", Seen1), Err(Seen2)); // x-|-- + assert_eq!(parse_comment(b"--", Seen2), Err(Seen2)); // --|-- + + assert_eq!(parse_comment(b"->", Seen0), Err(Seen0)); // xx|-> + assert_eq!(parse_comment(b"->", Seen1), Ok(2)); // x-|-> + assert_eq!(parse_comment(b"->", Seen2), Ok(2)); // --|-> + + assert_eq!(parse_comment(b"-->", Seen0), Ok(3)); // xx|--> + assert_eq!(parse_comment(b"-->", Seen1), Ok(3)); // x-|--> + assert_eq!(parse_comment(b"-->", Seen2), Ok(3)); // --|--> + + assert_eq!(parse_comment(b">-->", Seen0), Ok(4)); // xx|>--> + assert_eq!(parse_comment(b">-->", Seen1), Ok(4)); // x-|>--> + assert_eq!(parse_comment(b">-->", Seen2), Ok(1)); // --|>--> + + assert_eq!(parse_comment(b"->-->", Seen0), Ok(5)); // xx|->--> + assert_eq!(parse_comment(b"->-->", Seen1), Ok(2)); // x-|->--> + assert_eq!(parse_comment(b"->-->", Seen2), Ok(2)); // --|->--> +} diff --git a/src/parser/dtd.rs b/src/parser/dtd.rs new file mode 100644 index 000000000..3b10758d7 --- /dev/null +++ b/src/parser/dtd.rs @@ -0,0 +1,273 @@ +use crate::parser::{CommentParser, ElementParser, Parser, PiParser}; + +#[derive(Debug, Clone, Copy, PartialEq)] +pub enum DtdParser { + /// If inside a PubidLiteral or SystemLiteral, it holds the quote type (either `'` or `"`). + /// Otherwise, it holds `0` (this is an initial state). + /// + /// ```text + /// [28] doctypedecl ::= '' + /// ``` + BeforeInternalSubset(u8), + /// Inside of the `intSubset` rule. + /// + /// ```text + /// [28a] DeclSep ::= PEReference | S + /// [28b] intSubset ::= (markupdecl | DeclSep)* + /// [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl | NotationDecl | PI | Comment + /// ``` + InsideOfInternalSubset, + /// After `]` but before `>`. + AfterInternalSubset, + InComment(CommentParser), + InPi(PiParser), + /// ```text + /// [45] elementdecl ::= '' + /// ``` + InElementDecl, + /// This state handles ATTLIST, ENTITY and NOTATION elements, i.e. all elements that can have + /// quotes strings (`'...'` or `"..."`) inside their markup, in which `>` should not be threated + /// as the end of the markup. + /// + /// This state handles the following productions from XML grammar: + /// + /// ### ATTLIST + /// + /// ```text + /// [52] AttlistDecl ::= '' + /// [53] AttDef ::= S Name S AttType S DefaultDecl + /// [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue) + /// ``` + /// + /// ### ENTITY + /// + /// ```text + /// [70] EntityDecl ::= GEDecl | PEDecl + /// [71] GEDecl ::= '' + /// [72] PEDecl ::= '' + /// [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?) + /// [74] PEDef ::= EntityValue | ExternalID + /// [75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral + /// [76] NDataDecl ::= S 'NDATA' S Name + /// ``` + /// + /// ### NOTATION + /// + /// ```text + /// [82] NotationDecl ::= '' + /// ``` + InQuoteSensitive(ElementParser), + /// The state where it was not possible to determine which markup it was during the previous iteration. \ + /// It holds the number of bytes read since the start of the markup. + UndecidedMarkup(usize), + Finished, +} + +impl DtdParser { + /// Skip DTD contents. + /// + /// # Parameters (as same as `reader::BangType::parse`) + /// - `buf`: buffer with data consumed on previous iterations + /// - `chunk`: data read on current iteration and not yet consumed from reader + pub fn feed<'b>(&mut self, buf: &[u8], chunk: &'b [u8]) -> Option<(&'b [u8], usize)> { + // This method assumes the DTD is well-formed. + // Since this crate does not support parsing DTDs, the inability to read non-well-formed DTDs + // is not particularly problematic; the only point of interest is reporting well-formed DTDs + // to the user without errors. + + let mut cur = chunk; + while !cur.is_empty() { + match *self { + Self::BeforeInternalSubset(0) => { + // Find the + // - start of quoted string ('...' or "...") + // - start of internal subset ([...]) + // - end of DOCTYPE declaration (>) + if let Some(i) = cur + .iter() + .position(|&b| matches!(b, b'\'' | b'"' | b'[' | b'>')) + { + let b = cur[i]; + match b { + b'\'' | b'"' => { + // SystemLiteral or PubidLiteral + *self = Self::BeforeInternalSubset(b); + cur = &cur[i + 1..]; + continue; + } + b'[' => { + *self = Self::InsideOfInternalSubset; + cur = &cur[i + 1..]; + continue; + } + b'>' => { + *self = Self::Finished; + let len = chunk.len() - cur.len() + i; + // +1 for `>` + return Some((&chunk[..len], len + 1)); + } + _ => {} + } + continue; + } + break; + } + // Inside the quoted string (this is PubidLiteral or SystemLiteral) we do not want to + // recognize other special characters (namely [ and >). Find only the closing quote + Self::BeforeInternalSubset(quote) => { + // ExternalID handling + if let Some(i) = memchr::memchr(quote, cur) { + *self = Self::BeforeInternalSubset(0); + cur = &cur[i + 1..]; + continue; + } + break; + } + Self::InsideOfInternalSubset => { + // Find the end of internal subset ([) or the start of the markup inside (<) + if let Some(i) = memchr::memchr2(b']', b'<', cur) { + if cur[i] == b']' { + *self = Self::AfterInternalSubset; + cur = &cur[i + 1..]; // +1 to skip `]` + continue; + } + // +1 to start after `<` + if let Some(skip) = self.switch(&cur[i + 1..]) { + cur = &cur[i + 1 + skip..]; // +1 to skip `<` + continue; + } + // Keep the number of already looked bytes (started from byte after `<`, so -1), + // try to decide after feeding the new chunk + *self = Self::UndecidedMarkup(cur.len() - i - 1); + } + break; + } + Self::AfterInternalSubset => { + if let Some(i) = memchr::memchr(b'>', cur) { + *self = Self::Finished; + let len = chunk.len() - cur.len() + i; + // +1 for `>` + return Some((&chunk[..len], len + 1)); + } + break; + } + Self::InComment(ref mut parser) => { + // If comment is ended, return to the main state, otherwise keep in the current state + if let Some(i) = parser.feed(cur) { + *self = Self::InsideOfInternalSubset; + cur = &cur[i..]; + continue; + } + break; + } + Self::InPi(ref mut parser) => { + // If processing instruction is ended, return to the main state, + // otherwise keep in the current state + if let Some(i) = parser.feed(cur) { + *self = Self::InsideOfInternalSubset; + cur = &cur[i..]; + continue; + } + break; + } + Self::InElementDecl => { + // `` does not have places where `>` could be escaped + // so the first occurrence ends that state + if let Some(i) = memchr::memchr(b'>', cur) { + *self = Self::InsideOfInternalSubset; + cur = &cur[i + 1..]; // +1 for `>` + continue; + } + break; + } + Self::InQuoteSensitive(ref mut parser) => { + // If ATTLIST, ENTITY or NOTATION is ended, return to the main state, + // otherwise keep in the current state + if let Some(i) = parser.feed(cur) { + *self = Self::InsideOfInternalSubset; + cur = &cur[i..]; + continue; + } + break; + } + Self::UndecidedMarkup(skipped) => { + // Buffer is long enough to store the longest possible keyword `!NOTATION` + let mut bytes = [0u8; 9]; + + // Copy the last `skipped` bytes from the previous iteration into buffer, + // for example, "!NOT" (skipped = 4 in that case)... + bytes[..skipped].copy_from_slice(&buf[buf.len() - skipped..]); + + // ...add new bytes to the buffer from current iteration, + // for example, "ATION"... + let end = bytes.len().min(skipped + cur.len()); + bytes[skipped..end].copy_from_slice(&cur[..end - skipped]); + + // ...and try to match over it. + // For example, "!NOTATION" will return 9, and we skip 9-4=5 bytes of "ATION" + if let Some(skip) = self.switch(&bytes[..end]) { + cur = &cur[skip - skipped..]; + continue; + } + *self = Self::UndecidedMarkup(skipped + cur.len()); + break; + } + Self::Finished => break, + } + } + + None + } + + #[inline] + fn switch(&mut self, markup: &[u8]) -> Option { + match markup { + [b'?', ..] => { + // { + // Comment, /// . Contains balance of '<' (+1) and '>' (-1) - DocType(i32), + DocType(DtdParser), } impl BangType { #[inline(always)] @@ -1165,7 +1165,7 @@ impl BangType { Ok(match byte { Some(b'[') => Self::CData, Some(b'-') => Self::Comment, - Some(b'D') | Some(b'd') => Self::DocType(0), + Some(b'D') | Some(b'd') => Self::DocType(DtdParser::BeforeInternalSubset(0)), _ => return Err(SyntaxError::InvalidBangMarkup), }) } @@ -1222,18 +1222,7 @@ impl BangType { } } } - Self::DocType(ref mut balance) => { - for i in memchr::memchr2_iter(b'<', b'>', chunk) { - if chunk[i] == b'<' { - *balance += 1; - } else { - if *balance == 0 { - return Some((&chunk[..i], i + 1)); // +1 for `>` - } - *balance -= 1; - } - } - } + Self::DocType(ref mut parser) => return parser.feed(buf, chunk), } None } @@ -1266,7 +1255,7 @@ mod test { mod read_bang_element { use super::*; use crate::errors::{Error, SyntaxError}; - use crate::reader::BangType; + use crate::reader::{BangType, DtdParser}; use crate::utils::Bytes; /// Checks that reading CDATA content works correctly @@ -1552,7 +1541,7 @@ mod test { .unwrap(); assert_eq!( (ty, Bytes(bytes)), - (BangType::DocType(0), Bytes(b"!DOCTYPE")) + (BangType::DocType(DtdParser::Finished), Bytes(b"!DOCTYPE")) ); assert_eq!(position, 10); } @@ -1626,7 +1615,7 @@ mod test { .unwrap(); assert_eq!( (ty, Bytes(bytes)), - (BangType::DocType(0), Bytes(b"!doctype")) + (BangType::DocType(DtdParser::Finished), Bytes(b"!doctype")) ); assert_eq!(position, 10); } diff --git a/src/reader/state.rs b/src/reader/state.rs index 005d698b8..ff08272de 100644 --- a/src/reader/state.rs +++ b/src/reader/state.rs @@ -7,7 +7,7 @@ use crate::events::{BytesCData, BytesDecl, BytesEnd, BytesPI, BytesStart, BytesT use crate::parser::{Parser, PiParser}; #[cfg(feature = "encoding")] use crate::reader::EncodingRef; -use crate::reader::{BangType, Config, ParseState}; +use crate::reader::{BangType, Config, DtdParser, ParseState}; use crate::utils::{is_whitespace, name_len}; /// A struct that holds a current reader state and a parser configuration. @@ -145,7 +145,7 @@ impl ReaderState { // https://www.w3.org/TR/xml11/#sec-prolog-dtd // HTML5 allows mixed case for doctype declarations: // https://html.spec.whatwg.org/multipage/parsing.html#markup-declaration-open-state - BangType::DocType(0) if uncased_starts_with(buf, b"!DOCTYPE") => { + BangType::DocType(DtdParser::Finished) if uncased_starts_with(buf, b"!DOCTYPE") => { match buf[8..].iter().position(|&b| !is_whitespace(b)) { Some(start) => Ok(Event::DocType(BytesText::wrap( // Cut of `!DOCTYPE` and any number of spaces from start diff --git a/tests/helpers/mod.rs b/tests/helpers/mod.rs index bbc7c0c04..bcf51350a 100644 --- a/tests/helpers/mod.rs +++ b/tests/helpers/mod.rs @@ -149,3 +149,291 @@ macro_rules! small_buffers_tests { } }; } + +/// Tests for reader-errors and reader-dtd. +#[macro_export] +macro_rules! event_ok { + ($test:ident($xml:literal) => $pos:literal : $event:expr) => { + mod $test { + use super::*; + + mod reader { + use super::*; + use pretty_assertions::assert_eq; + + #[test] + fn borrowed() { + let mut reader = Reader::from_str($xml); + reader.config_mut().enable_all_checks(true); + assert_eq!( + (reader.read_event().unwrap(), reader.buffer_position()), + ($event, $pos) + ); + } + + #[test] + fn buffered() { + let mut buf = Vec::new(); + let mut reader = Reader::from_str($xml); + reader.config_mut().enable_all_checks(true); + assert_eq!( + ( + reader.read_event_into(&mut buf).unwrap(), + reader.buffer_position() + ), + ($event, $pos) + ); + } + + #[cfg(feature = "async-tokio")] + #[tokio::test] + async fn async_tokio() { + let mut buf = Vec::new(); + let mut reader = Reader::from_str($xml); + reader.config_mut().enable_all_checks(true); + assert_eq!( + ( + reader.read_event_into_async(&mut buf).await.unwrap(), + reader.buffer_position() + ), + ($event, $pos) + ); + } + } + + mod ns_reader { + use super::*; + use pretty_assertions::assert_eq; + + #[test] + fn borrowed() { + let mut reader = NsReader::from_str($xml); + reader.config_mut().enable_all_checks(true); + assert_eq!( + ( + reader.read_resolved_event().unwrap().1, + reader.buffer_position() + ), + ($event, $pos) + ); + } + + #[test] + fn buffered() { + let mut buf = Vec::new(); + let mut reader = NsReader::from_str($xml); + reader.config_mut().enable_all_checks(true); + assert_eq!( + ( + reader.read_resolved_event_into(&mut buf).unwrap().1, + reader.buffer_position() + ), + ($event, $pos) + ); + } + + #[cfg(feature = "async-tokio")] + #[tokio::test] + async fn async_tokio() { + let mut buf = Vec::new(); + let mut reader = NsReader::from_str($xml); + reader.config_mut().enable_all_checks(true); + assert_eq!( + ( + reader + .read_resolved_event_into_async(&mut buf) + .await + .unwrap() + .1, + reader.buffer_position() + ), + ($event, $pos) + ); + } + } + } + }; +} + +/// Tests for reader-errors and reader-dtd. +#[macro_export] +macro_rules! syntax_err { + ($test:ident($xml:literal) => $pos:expr, $cause:expr) => { + mod $test { + use super::*; + + mod reader { + use super::*; + use pretty_assertions::assert_eq; + + #[test] + fn borrowed() { + let mut reader = Reader::from_str($xml); + assert_eq!( + reader + .read_event() + .expect("parser should return `Event::Text`"), + Event::Text(BytesText::new(".")) + ); + match reader.read_event() { + Err(Error::Syntax(cause)) => assert_eq!( + (cause, reader.error_position(), reader.buffer_position()), + ($cause, 1, $pos), + ), + x => panic!("Expected `Err(Syntax(_))`, but got {:?}", x), + } + assert_eq!( + reader + .read_event() + .expect("parser should return `Event::Eof` after error"), + Event::Eof + ); + } + + #[test] + fn buffered() { + let mut buf = Vec::new(); + let mut reader = Reader::from_str($xml); + assert_eq!( + reader + .read_event_into(&mut buf) + .expect("parser should return `Event::Text`"), + Event::Text(BytesText::new(".")) + ); + match reader.read_event_into(&mut buf) { + Err(Error::Syntax(cause)) => assert_eq!( + (cause, reader.error_position(), reader.buffer_position()), + ($cause, 1, $pos), + ), + x => panic!("Expected `Err(Syntax(_))`, but got {:?}", x), + } + assert_eq!( + reader + .read_event_into(&mut buf) + .expect("parser should return `Event::Eof` after error"), + Event::Eof + ); + } + + #[cfg(feature = "async-tokio")] + #[tokio::test] + async fn async_tokio() { + let mut buf = Vec::new(); + let mut reader = Reader::from_str($xml); + assert_eq!( + reader + .read_event_into_async(&mut buf) + .await + .expect("parser should return `Event::Text`"), + Event::Text(BytesText::new(".")) + ); + match reader.read_event_into_async(&mut buf).await { + Err(Error::Syntax(cause)) => assert_eq!( + (cause, reader.error_position(), reader.buffer_position()), + ($cause, 1, $pos), + ), + x => panic!("Expected `Err(Syntax(_))`, but got {:?}", x), + } + assert_eq!( + reader + .read_event_into_async(&mut buf) + .await + .expect("parser should return `Event::Eof` after error"), + Event::Eof + ); + } + } + + mod ns_reader { + use super::*; + use pretty_assertions::assert_eq; + + #[test] + fn borrowed() { + let mut reader = NsReader::from_str($xml); + assert_eq!( + reader + .read_resolved_event() + .expect("parser should return `Event::Text`") + .1, + Event::Text(BytesText::new(".")) + ); + match reader.read_resolved_event() { + Err(Error::Syntax(cause)) => assert_eq!( + (cause, reader.error_position(), reader.buffer_position()), + ($cause, 1, $pos), + ), + x => panic!("Expected `Err(Syntax(_))`, but got {:?}", x), + } + assert_eq!( + reader + .read_resolved_event() + .expect("parser should return `Event::Eof` after error") + .1, + Event::Eof + ); + } + + #[test] + fn buffered() { + let mut buf = Vec::new(); + let mut reader = NsReader::from_str($xml); + assert_eq!( + reader + .read_resolved_event_into(&mut buf) + .expect("parser should return `Event::Text`") + .1, + Event::Text(BytesText::new(".")) + ); + match reader.read_resolved_event_into(&mut buf) { + Err(Error::Syntax(cause)) => assert_eq!( + (cause, reader.error_position(), reader.buffer_position()), + ($cause, 1, $pos), + ), + x => panic!("Expected `Err(Syntax(_))`, but got {:?}", x), + } + assert_eq!( + reader + .read_resolved_event_into(&mut buf) + .expect("parser should return `Event::Eof` after error") + .1, + Event::Eof + ); + } + + #[cfg(feature = "async-tokio")] + #[tokio::test] + async fn async_tokio() { + let mut buf = Vec::new(); + let mut reader = NsReader::from_str($xml); + assert_eq!( + reader + .read_resolved_event_into_async(&mut buf) + .await + .expect("parser should return `Event::Text`") + .1, + Event::Text(BytesText::new(".")) + ); + match reader.read_resolved_event_into_async(&mut buf).await { + Err(Error::Syntax(cause)) => assert_eq!( + (cause, reader.error_position(), reader.buffer_position()), + ($cause, 1, $pos), + ), + x => panic!("Expected `Err(Syntax(_))`, but got {:?}", x), + } + assert_eq!( + reader + .read_resolved_event_into_async(&mut buf) + .await + .expect("parser should return `Event::Eof` after error") + .1, + Event::Eof + ); + } + } + } + }; + ($test:ident($xml:literal) => $cause:expr) => { + syntax_err!($test($xml) => $xml.len() as u64, $cause); + }; +} diff --git a/tests/issues.rs b/tests/issues.rs index 916ba7ffc..966aa9ad6 100644 --- a/tests/issues.rs +++ b/tests/issues.rs @@ -175,11 +175,13 @@ mod issue514 { #[test] fn issue590() { let mut reader = Reader::from_reader(BufReader::with_capacity( - 16, - &b"]>"[..], - // 0 7 ^15 ^23 - //[ ] = capacity + 24, + &b"]>"[..], + // 0 7 ^23 ^44 + //[ ] = capacity )); + // Ensure, that capacity was not increased unexpectedly + assert_eq!(reader.get_ref().capacity(), 24); let mut buf = Vec::new(); loop { if reader.read_event_into(&mut buf).unwrap() == Event::Eof { @@ -512,3 +514,56 @@ fn issue801() { } } } + +/// Regression tests for https://github.com/tafia/quick-xml/issues/923 +mod issue923 { + use super::*; + use pretty_assertions::assert_eq; + + #[test] + fn case1() { + let mut reader = Reader::from_str( + r#" + +"> + +]> +&ent;"#, + ); + let mut buf = Vec::new(); + reader.read_event_into(&mut buf).unwrap(); // + reader.read_event_into(&mut buf).unwrap(); + reader.read_event_into(&mut buf).unwrap(); // + reader.read_event_into(&mut buf).unwrap(); + reader.read_event_into(&mut buf).unwrap(); // DTD + reader.read_event_into(&mut buf).unwrap(); + reader.read_event_into(&mut buf).unwrap(); // + reader.read_event_into(&mut buf).unwrap(); // &ent; + reader.read_event_into(&mut buf).unwrap(); // + + assert_eq!(reader.read_event_into(&mut buf).unwrap(), Event::Eof); + } + + #[test] + fn case2() { + let mut reader = Reader::from_str( + r#" + + +]> +"#, + ); + let mut buf = Vec::new(); + reader.read_event_into(&mut buf).unwrap(); // + reader.read_event_into(&mut buf).unwrap(); + reader.read_event_into(&mut buf).unwrap(); // + reader.read_event_into(&mut buf).unwrap(); + reader.read_event_into(&mut buf).unwrap(); // DTD + reader.read_event_into(&mut buf).unwrap(); + reader.read_event_into(&mut buf).unwrap(); // + + assert_eq!(reader.read_event_into(&mut buf).unwrap(), Event::Eof); + } +} diff --git a/tests/reader-dtd.rs b/tests/reader-dtd.rs new file mode 100644 index 000000000..6ff4fef21 --- /dev/null +++ b/tests/reader-dtd.rs @@ -0,0 +1,137 @@ +use quick_xml::errors::{Error, SyntaxError}; +use quick_xml::events::{BytesText, Event}; +use quick_xml::reader::{NsReader, Reader}; + +// For event_ok and syntax_err macros +mod helpers; + +macro_rules! ok { + ($test:ident : $pos:literal $xml:literal $event:literal) => { + event_ok!($test ($xml) => $pos : Event::DocType(BytesText::from_escaped($event))); + }; +} + +mod without_internal_subset { + use super::*; + + ok!(simple: 15 + "" + "root" + ); + ok!(with_external_id_1: 21 + r#"['">"# + r#"root ">['""# + ); + ok!(with_external_id_2: 21 + r#"["'>"# + r#"root '>["'"# + ); + ok!(with_external_id_3: 22 + r#"['" >"# + r#"root ">['" "# + ); + ok!(with_external_id_4: 22 + r#"["' >"# + r#"root '>["' "# + ); +} + +ok!(with_external_id_1: 23 + r#"['"[]>"# + r#"root ">['"[]"# +); +ok!(with_external_id_2: 23 + r#"["'[]>"# + r#"root '>["'[]"# +); +ok!(with_external_id_3: 25 + r#"['" [] >"# + r#"root ">['" [] "# +); +ok!(with_external_id_4: 25 + r#"["' [] >"# + r#"root '>["' [] "# +); + +ok!(entity_1: 35 + r#"">]>"# + r#"root [">]"# +); +ok!(entity_2: 35 + r#"]>"# + r#"root []"# +); + +ok!(attlist: 86 + r#" 2 is true" att2 #FIXED '>>> in other quote'>]>"# + r#"root [ 2 is true" att2 #FIXED '>>> in other quote'>]"# +); +ok!(notation: 80 + r#">>some_system_id"'>]>"# + r#"root [>>some_system_id"'>]"# +); +ok!(comment: 51 + r#"]>"# + r#"root []"# +); +ok!(pi: 37 + r#"><<>><><>?>]>"# + r#"root [><<>><><>?>]"# +); +ok!(all_together: 164 + " + + '> + '> + --> + ?> + ] + >" + "e [ + + + '> + '> + --> + ?> + ] + " +); + +ok!(unknown_dtd_markup: 34 + " ] >" + "e [ ] " +); + +mod unclosed { + use super::*; + + syntax_err!(doctype_1(". SyntaxError::UnclosedDoctype); + syntax_err!(doctype_2(".['\" [ ] ") => SyntaxError::UnclosedDoctype); + syntax_err!(doctype_3(".[\"' [ ] ") => SyntaxError::UnclosedDoctype); + + syntax_err!(external_id_1(".[\" ") => SyntaxError::UnclosedDoctype); + syntax_err!(external_id_2(".[' ") => SyntaxError::UnclosedDoctype); + + syntax_err!(internal_subset_1(". SyntaxError::UnclosedDoctype); + syntax_err!(internal_subset_2(".['\" [ ") => SyntaxError::UnclosedDoctype); + syntax_err!(internal_subset_3(".[\"' [ ") => SyntaxError::UnclosedDoctype); + + syntax_err!(element(". SyntaxError::UnclosedDoctype); + syntax_err!(attlist(". SyntaxError::UnclosedDoctype); + + syntax_err!(entity_1(". SyntaxError::UnclosedDoctype); + syntax_err!(entity_2(".' ") => SyntaxError::UnclosedDoctype); + syntax_err!(entity_3(".\" ") => SyntaxError::UnclosedDoctype); + + syntax_err!(notation_1(". SyntaxError::UnclosedDoctype); + syntax_err!(notation_2(".' ") => SyntaxError::UnclosedDoctype); + syntax_err!(notation_3(".\" ") => SyntaxError::UnclosedDoctype); + + syntax_err!(comment_1(". SyntaxError::UnclosedDoctype); + syntax_err!(comment_2(". ") => SyntaxError::UnclosedDoctype); + + syntax_err!(pi_1(". SyntaxError::UnclosedDoctype); + syntax_err!(pi_2(". ") => SyntaxError::UnclosedDoctype); +} diff --git a/tests/reader-errors.rs b/tests/reader-errors.rs index e6fce8bec..b65e70cb8 100644 --- a/tests/reader-errors.rs +++ b/tests/reader-errors.rs @@ -4,283 +4,23 @@ use quick_xml::errors::{Error, SyntaxError}; use quick_xml::events::{BytesCData, BytesDecl, BytesEnd, BytesPI, BytesStart, BytesText, Event}; use quick_xml::reader::{NsReader, Reader}; -macro_rules! ok { - ($test:ident($xml:literal) => $pos:literal : $event:expr) => { - mod $test { - use super::*; - - mod reader { - use super::*; - use pretty_assertions::assert_eq; - - #[test] - fn borrowed() { - let mut reader = Reader::from_str($xml); - reader.config_mut().enable_all_checks(true); - assert_eq!(reader.read_event().unwrap(), $event); - assert_eq!(reader.buffer_position(), $pos); - } - - #[test] - fn buffered() { - let mut buf = Vec::new(); - let mut reader = Reader::from_str($xml); - reader.config_mut().enable_all_checks(true); - assert_eq!(reader.read_event_into(&mut buf).unwrap(), $event); - assert_eq!(reader.buffer_position(), $pos); - } - - #[cfg(feature = "async-tokio")] - #[tokio::test] - async fn async_tokio() { - let mut buf = Vec::new(); - let mut reader = Reader::from_str($xml); - reader.config_mut().enable_all_checks(true); - assert_eq!( - reader.read_event_into_async(&mut buf).await.unwrap(), - $event - ); - assert_eq!(reader.buffer_position(), $pos); - } - } - - mod ns_reader { - use super::*; - use pretty_assertions::assert_eq; - - #[test] - fn borrowed() { - let mut reader = NsReader::from_str($xml); - reader.config_mut().enable_all_checks(true); - assert_eq!(reader.read_resolved_event().unwrap().1, $event); - assert_eq!(reader.buffer_position(), $pos); - } - - #[test] - fn buffered() { - let mut buf = Vec::new(); - let mut reader = NsReader::from_str($xml); - reader.config_mut().enable_all_checks(true); - assert_eq!(reader.read_resolved_event_into(&mut buf).unwrap().1, $event); - assert_eq!(reader.buffer_position(), $pos); - } - - #[cfg(feature = "async-tokio")] - #[tokio::test] - async fn async_tokio() { - let mut buf = Vec::new(); - let mut reader = NsReader::from_str($xml); - reader.config_mut().enable_all_checks(true); - assert_eq!( - reader - .read_resolved_event_into_async(&mut buf) - .await - .unwrap() - .1, - $event - ); - assert_eq!(reader.buffer_position(), $pos); - } - } - } - }; -} +// For event_ok and syntax_err macros +mod helpers; mod syntax { use super::*; - macro_rules! err { - ($test:ident($xml:literal) => $pos:expr, $cause:expr) => { - mod $test { - use super::*; - - mod reader { - use super::*; - use pretty_assertions::assert_eq; - - #[test] - fn borrowed() { - let mut reader = Reader::from_str($xml); - assert_eq!( - reader - .read_event() - .expect("parser should return `Event::Text`"), - Event::Text(BytesText::new(".")) - ); - match reader.read_event() { - Err(Error::Syntax(cause)) => assert_eq!( - (cause, reader.error_position(), reader.buffer_position()), - ($cause, 1, $pos), - ), - x => panic!("Expected `Err(Syntax(_))`, but got {:?}", x), - } - assert_eq!( - reader - .read_event() - .expect("parser should return `Event::Eof` after error"), - Event::Eof - ); - } - - #[test] - fn buffered() { - let mut buf = Vec::new(); - let mut reader = Reader::from_str($xml); - assert_eq!( - reader - .read_event_into(&mut buf) - .expect("parser should return `Event::Text`"), - Event::Text(BytesText::new(".")) - ); - match reader.read_event_into(&mut buf) { - Err(Error::Syntax(cause)) => assert_eq!( - (cause, reader.error_position(), reader.buffer_position()), - ($cause, 1, $pos), - ), - x => panic!("Expected `Err(Syntax(_))`, but got {:?}", x), - } - assert_eq!( - reader - .read_event_into(&mut buf) - .expect("parser should return `Event::Eof` after error"), - Event::Eof - ); - } - - #[cfg(feature = "async-tokio")] - #[tokio::test] - async fn async_tokio() { - let mut buf = Vec::new(); - let mut reader = Reader::from_str($xml); - assert_eq!( - reader - .read_event_into_async(&mut buf) - .await - .expect("parser should return `Event::Text`"), - Event::Text(BytesText::new(".")) - ); - match reader.read_event_into_async(&mut buf).await { - Err(Error::Syntax(cause)) => assert_eq!( - (cause, reader.error_position(), reader.buffer_position()), - ($cause, 1, $pos), - ), - x => panic!("Expected `Err(Syntax(_))`, but got {:?}", x), - } - assert_eq!( - reader - .read_event_into_async(&mut buf) - .await - .expect("parser should return `Event::Eof` after error"), - Event::Eof - ); - } - } - - mod ns_reader { - use super::*; - use pretty_assertions::assert_eq; - - #[test] - fn borrowed() { - let mut reader = NsReader::from_str($xml); - assert_eq!( - reader - .read_resolved_event() - .expect("parser should return `Event::Text`") - .1, - Event::Text(BytesText::new(".")) - ); - match reader.read_resolved_event() { - Err(Error::Syntax(cause)) => assert_eq!( - (cause, reader.error_position(), reader.buffer_position()), - ($cause, 1, $pos), - ), - x => panic!("Expected `Err(Syntax(_))`, but got {:?}", x), - } - assert_eq!( - reader - .read_resolved_event() - .expect("parser should return `Event::Eof` after error") - .1, - Event::Eof - ); - } - - #[test] - fn buffered() { - let mut buf = Vec::new(); - let mut reader = NsReader::from_str($xml); - assert_eq!( - reader - .read_resolved_event_into(&mut buf) - .expect("parser should return `Event::Text`") - .1, - Event::Text(BytesText::new(".")) - ); - match reader.read_resolved_event_into(&mut buf) { - Err(Error::Syntax(cause)) => assert_eq!( - (cause, reader.error_position(), reader.buffer_position()), - ($cause, 1, $pos), - ), - x => panic!("Expected `Err(Syntax(_))`, but got {:?}", x), - } - assert_eq!( - reader - .read_resolved_event_into(&mut buf) - .expect("parser should return `Event::Eof` after error") - .1, - Event::Eof - ); - } - - #[cfg(feature = "async-tokio")] - #[tokio::test] - async fn async_tokio() { - let mut buf = Vec::new(); - let mut reader = NsReader::from_str($xml); - assert_eq!( - reader - .read_resolved_event_into_async(&mut buf) - .await - .expect("parser should return `Event::Text`") - .1, - Event::Text(BytesText::new(".")) - ); - match reader.read_resolved_event_into_async(&mut buf).await { - Err(Error::Syntax(cause)) => assert_eq!( - (cause, reader.error_position(), reader.buffer_position()), - ($cause, 1, $pos), - ), - x => panic!("Expected `Err(Syntax(_))`, but got {:?}", x), - } - assert_eq!( - reader - .read_resolved_event_into_async(&mut buf) - .await - .expect("parser should return `Event::Eof` after error") - .1, - Event::Eof - ); - } - } - } - }; - ($test:ident($xml:literal) => $cause:expr) => { - err!($test($xml) => $xml.len() as u64, $cause); - }; - } - mod tag { use super::*; - err!(unclosed1(".<") => SyntaxError::UnclosedTag); - err!(unclosed2(". SyntaxError::UnclosedTag); - err!(unclosed3(". SyntaxError::UnclosedTag); - err!(unclosed4(".< ") => SyntaxError::UnclosedTag); - err!(unclosed5(".<\t") => SyntaxError::UnclosedTag); - err!(unclosed6(".<\r") => SyntaxError::UnclosedTag); - err!(unclosed7(".<\n") => SyntaxError::UnclosedTag); - err!(unclosed8(".< \t\r\nx") => SyntaxError::UnclosedTag); + syntax_err!(unclosed1(".<") => SyntaxError::UnclosedTag); + syntax_err!(unclosed2(". SyntaxError::UnclosedTag); + syntax_err!(unclosed3(". SyntaxError::UnclosedTag); + syntax_err!(unclosed4(".< ") => SyntaxError::UnclosedTag); + syntax_err!(unclosed5(".<\t") => SyntaxError::UnclosedTag); + syntax_err!(unclosed6(".<\r") => SyntaxError::UnclosedTag); + syntax_err!(unclosed7(".<\n") => SyntaxError::UnclosedTag); + syntax_err!(unclosed8(".< \t\r\nx") => SyntaxError::UnclosedTag); /// Closed tags can be tested only in pair with open tags, because otherwise /// `IllFormedError::UnmatchedEndTag` will be raised @@ -330,64 +70,64 @@ mod syntax { } // Incorrect after-bang symbol is detected early, so buffer_position() stay at `!` - err!(unclosed_bang1(". 2, SyntaxError::InvalidBangMarkup); - err!(unclosed_bang2(".") => 2, SyntaxError::InvalidBangMarkup); - err!(unclosed_bang3(". 2, SyntaxError::InvalidBangMarkup); - err!(unclosed_bang4(".") => 2, SyntaxError::InvalidBangMarkup); + syntax_err!(unclosed_bang1(". 2, SyntaxError::InvalidBangMarkup); + syntax_err!(unclosed_bang2(".") => 2, SyntaxError::InvalidBangMarkup); + syntax_err!(unclosed_bang3(". 2, SyntaxError::InvalidBangMarkup); + syntax_err!(unclosed_bang4(".") => 2, SyntaxError::InvalidBangMarkup); /// https://www.w3.org/TR/xml11/#NT-Comment mod comment { use super::*; - err!(unclosed01(". SyntaxError::UnclosedComment); - err!(unclosed02(".") => SyntaxError::UnclosedComment); - err!(unclosed07(".") => SyntaxError::UnclosedComment); - err!(unclosed10(".") => 7: Event::Comment(BytesText::new(""))); - ok!(normal2("rest") => 7: Event::Comment(BytesText::new(""))); + syntax_err!(unclosed01(". SyntaxError::UnclosedComment); + syntax_err!(unclosed02(".") => SyntaxError::UnclosedComment); + syntax_err!(unclosed07(".") => SyntaxError::UnclosedComment); + syntax_err!(unclosed10(".") => 7: Event::Comment(BytesText::new(""))); + event_ok!(normal2("rest") => 7: Event::Comment(BytesText::new(""))); } /// https://www.w3.org/TR/xml11/#NT-CDSect mod cdata { use super::*; - err!(unclosed01(". SyntaxError::UnclosedCData); - err!(unclosed02(". SyntaxError::UnclosedCData); - err!(unclosed03(". SyntaxError::UnclosedCData); - err!(unclosed04(".") => SyntaxError::UnclosedCData); - err!(unclosed05(". SyntaxError::UnclosedCData); - err!(unclosed06(". SyntaxError::UnclosedCData); - err!(unclosed07(".") => SyntaxError::UnclosedCData); - err!(unclosed08(". SyntaxError::UnclosedCData); - err!(unclosed09(". SyntaxError::UnclosedCData); - err!(unclosed10(".") => SyntaxError::UnclosedCData); - err!(unclosed11(". SyntaxError::UnclosedCData); - err!(unclosed12(". SyntaxError::UnclosedCData); - err!(unclosed13(".") => SyntaxError::UnclosedCData); - err!(unclosed14(". SyntaxError::UnclosedCData); - err!(unclosed15(". SyntaxError::UnclosedCData); - err!(unclosed16(".") => SyntaxError::UnclosedCData); - err!(unclosed17(". SyntaxError::UnclosedCData); - err!(unclosed18(". SyntaxError::UnclosedCData); - err!(unclosed19(".") => SyntaxError::UnclosedCData); - err!(unclosed20(". SyntaxError::UnclosedCData); - err!(unclosed21(". SyntaxError::UnclosedCData); - err!(unclosed22(".") => SyntaxError::UnclosedCData); - err!(unclosed23(". SyntaxError::UnclosedCData); - err!(unclosed24(". SyntaxError::UnclosedCData); - err!(unclosed25(".") => SyntaxError::UnclosedCData); - - err!(lowercase(".") => SyntaxError::UnclosedCData); - - ok!(normal1("") => 12: Event::CData(BytesCData::new(""))); - ok!(normal2("rest") => 12: Event::CData(BytesCData::new(""))); + syntax_err!(unclosed01(". SyntaxError::UnclosedCData); + syntax_err!(unclosed02(". SyntaxError::UnclosedCData); + syntax_err!(unclosed03(". SyntaxError::UnclosedCData); + syntax_err!(unclosed04(".") => SyntaxError::UnclosedCData); + syntax_err!(unclosed05(". SyntaxError::UnclosedCData); + syntax_err!(unclosed06(". SyntaxError::UnclosedCData); + syntax_err!(unclosed07(".") => SyntaxError::UnclosedCData); + syntax_err!(unclosed08(". SyntaxError::UnclosedCData); + syntax_err!(unclosed09(". SyntaxError::UnclosedCData); + syntax_err!(unclosed10(".") => SyntaxError::UnclosedCData); + syntax_err!(unclosed11(". SyntaxError::UnclosedCData); + syntax_err!(unclosed12(". SyntaxError::UnclosedCData); + syntax_err!(unclosed13(".") => SyntaxError::UnclosedCData); + syntax_err!(unclosed14(". SyntaxError::UnclosedCData); + syntax_err!(unclosed15(". SyntaxError::UnclosedCData); + syntax_err!(unclosed16(".") => SyntaxError::UnclosedCData); + syntax_err!(unclosed17(". SyntaxError::UnclosedCData); + syntax_err!(unclosed18(". SyntaxError::UnclosedCData); + syntax_err!(unclosed19(".") => SyntaxError::UnclosedCData); + syntax_err!(unclosed20(". SyntaxError::UnclosedCData); + syntax_err!(unclosed21(". SyntaxError::UnclosedCData); + syntax_err!(unclosed22(".") => SyntaxError::UnclosedCData); + syntax_err!(unclosed23(". SyntaxError::UnclosedCData); + syntax_err!(unclosed24(". SyntaxError::UnclosedCData); + syntax_err!(unclosed25(".") => SyntaxError::UnclosedCData); + + syntax_err!(lowercase(".") => SyntaxError::UnclosedCData); + + event_ok!(normal1("") => 12: Event::CData(BytesCData::new(""))); + event_ok!(normal2("rest") => 12: Event::CData(BytesCData::new(""))); } /// According to the grammar, only upper-case letters allowed for DOCTYPE writing. @@ -396,82 +136,82 @@ mod syntax { mod doctype { use super::*; - err!(unclosed01(". SyntaxError::UnclosedDoctype); - err!(unclosed02(". SyntaxError::UnclosedDoctype); - err!(unclosed03(". SyntaxError::UnclosedDoctype); - err!(unclosed04(".") => SyntaxError::UnclosedDoctype); - err!(unclosed05(". SyntaxError::UnclosedDoctype); - err!(unclosed06(". SyntaxError::UnclosedDoctype); - err!(unclosed07(".") => SyntaxError::UnclosedDoctype); - err!(unclosed08(". SyntaxError::UnclosedDoctype); - err!(unclosed09(". SyntaxError::UnclosedDoctype); - err!(unclosed10(".") => SyntaxError::UnclosedDoctype); - err!(unclosed11(". SyntaxError::UnclosedDoctype); - err!(unclosed12(". SyntaxError::UnclosedDoctype); - err!(unclosed13(".") => SyntaxError::UnclosedDoctype); - err!(unclosed14(". SyntaxError::UnclosedDoctype); - err!(unclosed15(". SyntaxError::UnclosedDoctype); - err!(unclosed16(".") => SyntaxError::UnclosedDoctype); - err!(unclosed17(". SyntaxError::UnclosedDoctype); - err!(unclosed18(". SyntaxError::UnclosedDoctype); - err!(unclosed19(".") => SyntaxError::UnclosedDoctype); - err!(unclosed20(". SyntaxError::UnclosedDoctype); - err!(unclosed21(". SyntaxError::UnclosedDoctype); + syntax_err!(unclosed01(". SyntaxError::UnclosedDoctype); + syntax_err!(unclosed02(". SyntaxError::UnclosedDoctype); + syntax_err!(unclosed03(". SyntaxError::UnclosedDoctype); + syntax_err!(unclosed04(".") => SyntaxError::UnclosedDoctype); + syntax_err!(unclosed05(". SyntaxError::UnclosedDoctype); + syntax_err!(unclosed06(". SyntaxError::UnclosedDoctype); + syntax_err!(unclosed07(".") => SyntaxError::UnclosedDoctype); + syntax_err!(unclosed08(". SyntaxError::UnclosedDoctype); + syntax_err!(unclosed09(". SyntaxError::UnclosedDoctype); + syntax_err!(unclosed10(".") => SyntaxError::UnclosedDoctype); + syntax_err!(unclosed11(". SyntaxError::UnclosedDoctype); + syntax_err!(unclosed12(". SyntaxError::UnclosedDoctype); + syntax_err!(unclosed13(".") => SyntaxError::UnclosedDoctype); + syntax_err!(unclosed14(". SyntaxError::UnclosedDoctype); + syntax_err!(unclosed15(". SyntaxError::UnclosedDoctype); + syntax_err!(unclosed16(".") => SyntaxError::UnclosedDoctype); + syntax_err!(unclosed17(". SyntaxError::UnclosedDoctype); + syntax_err!(unclosed18(". SyntaxError::UnclosedDoctype); + syntax_err!(unclosed19(".") => SyntaxError::UnclosedDoctype); + syntax_err!(unclosed20(". SyntaxError::UnclosedDoctype); + syntax_err!(unclosed21(". SyntaxError::UnclosedDoctype); // results in IllFormed(MissingDoctypeName), checked below - err!(unclosed22(". SyntaxError::UnclosedDoctype); + syntax_err!(unclosed22(". SyntaxError::UnclosedDoctype); // According to the grammar, XML declaration MUST contain at least one space // and an element name, but we do not consider this as a _syntax_ error. - ok!(normal1("") => 12: Event::DocType(BytesText::new("e"))); - ok!(normal2("rest") => 12: Event::DocType(BytesText::new("e"))); + event_ok!(normal1("") => 12: Event::DocType(BytesText::new("e"))); + event_ok!(normal2("rest") => 12: Event::DocType(BytesText::new("e"))); } /// https://www.w3.org/TR/xml11/#NT-PI mod pi { use super::*; - err!(unclosed01(". SyntaxError::UnclosedPI); - err!(unclosed02(". SyntaxError::UnclosedPI); - err!(unclosed03(".") => SyntaxError::UnclosedPI); - err!(unclosed04(". SyntaxError::UnclosedPI); - err!(unclosed05(". SyntaxError::UnclosedPI); - err!(unclosed06(". SyntaxError::UnclosedPI); - err!(unclosed07(". SyntaxError::UnclosedPI); - err!(unclosed08(". SyntaxError::UnclosedPI); - err!(unclosed09(". SyntaxError::UnclosedPI); - err!(unclosed10(". SyntaxError::UnclosedPI); + syntax_err!(unclosed01(". SyntaxError::UnclosedPI); + syntax_err!(unclosed02(". SyntaxError::UnclosedPI); + syntax_err!(unclosed03(".") => SyntaxError::UnclosedPI); + syntax_err!(unclosed04(". SyntaxError::UnclosedPI); + syntax_err!(unclosed05(". SyntaxError::UnclosedPI); + syntax_err!(unclosed06(". SyntaxError::UnclosedPI); + syntax_err!(unclosed07(". SyntaxError::UnclosedPI); + syntax_err!(unclosed08(". SyntaxError::UnclosedPI); + syntax_err!(unclosed09(". SyntaxError::UnclosedPI); + syntax_err!(unclosed10(". SyntaxError::UnclosedPI); // According to the grammar, processing instruction MUST contain a non-empty // target name, but we do not consider this as a _syntax_ error. - ok!(normal_empty1("") => 4: Event::PI(BytesPI::new(""))); - ok!(normal_empty2("rest") => 4: Event::PI(BytesPI::new(""))); - ok!(normal_xmlx1("") => 8: Event::PI(BytesPI::new("xmlx"))); - ok!(normal_xmlx2("rest") => 8: Event::PI(BytesPI::new("xmlx"))); + event_ok!(normal_empty1("") => 4: Event::PI(BytesPI::new(""))); + event_ok!(normal_empty2("rest") => 4: Event::PI(BytesPI::new(""))); + event_ok!(normal_xmlx1("") => 8: Event::PI(BytesPI::new("xmlx"))); + event_ok!(normal_xmlx2("rest") => 8: Event::PI(BytesPI::new("xmlx"))); } /// https://www.w3.org/TR/xml11/#NT-prolog mod decl { use super::*; - err!(unclosed1(". SyntaxError::UnclosedPI); - err!(unclosed2(". SyntaxError::UnclosedPI); - err!(unclosed3(". SyntaxError::UnclosedXmlDecl); - err!(unclosed4(". SyntaxError::UnclosedXmlDecl); - err!(unclosed5(". SyntaxError::UnclosedXmlDecl); - err!(unclosed6(". SyntaxError::UnclosedXmlDecl); - err!(unclosed7(". SyntaxError::UnclosedXmlDecl); - err!(unclosed8(". SyntaxError::UnclosedXmlDecl); + syntax_err!(unclosed1(". SyntaxError::UnclosedPI); + syntax_err!(unclosed2(". SyntaxError::UnclosedPI); + syntax_err!(unclosed3(". SyntaxError::UnclosedXmlDecl); + syntax_err!(unclosed4(". SyntaxError::UnclosedXmlDecl); + syntax_err!(unclosed5(". SyntaxError::UnclosedXmlDecl); + syntax_err!(unclosed6(". SyntaxError::UnclosedXmlDecl); + syntax_err!(unclosed7(". SyntaxError::UnclosedXmlDecl); + syntax_err!(unclosed8(". SyntaxError::UnclosedXmlDecl); // "xmls" is a PI target, not an XML declaration - err!(unclosed9(". SyntaxError::UnclosedPI); + syntax_err!(unclosed9(". SyntaxError::UnclosedPI); // According to the grammar, XML declaration MUST contain at least one space // and `version` attribute, but we do not consider this as a _syntax_ error. - ok!(normal1("") => 7: Event::Decl(BytesDecl::from_start(BytesStart::new("xml")))); - ok!(normal2("") => 8: Event::Decl(BytesDecl::from_start(BytesStart::from_content("xml ", 3)))); - ok!(normal3("") => 8: Event::Decl(BytesDecl::from_start(BytesStart::from_content("xml\t", 3)))); - ok!(normal4("") => 8: Event::Decl(BytesDecl::from_start(BytesStart::from_content("xml\r", 3)))); - ok!(normal5("") => 8: Event::Decl(BytesDecl::from_start(BytesStart::from_content("xml\n", 3)))); - ok!(normal6("rest") => 8: Event::Decl(BytesDecl::from_start(BytesStart::from_content("xml\n", 3)))); + event_ok!(normal1("") => 7: Event::Decl(BytesDecl::from_start(BytesStart::new("xml")))); + event_ok!(normal2("") => 8: Event::Decl(BytesDecl::from_start(BytesStart::from_content("xml ", 3)))); + event_ok!(normal3("") => 8: Event::Decl(BytesDecl::from_start(BytesStart::from_content("xml\t", 3)))); + event_ok!(normal4("") => 8: Event::Decl(BytesDecl::from_start(BytesStart::from_content("xml\r", 3)))); + event_ok!(normal5("") => 8: Event::Decl(BytesDecl::from_start(BytesStart::from_content("xml\n", 3)))); + event_ok!(normal6("rest") => 8: Event::Decl(BytesDecl::from_start(BytesStart::from_content("xml\n", 3)))); } /// Tests for UTF-16 encoded XML declarations. @@ -927,13 +667,13 @@ mod ill_formed { // ^= 9 err!(missing_doctype_name2("") => 13: IllFormedError::MissingDoctypeName); // ^= 13 - ok!(missing_doctype_name3("") => 15: Event::DocType(BytesText::new("x"))); + event_ok!(missing_doctype_name3("") => 15: Event::DocType(BytesText::new("x"))); err2!(unmatched_end_tag1(".") => 1: IllFormedError::UnmatchedEndTag("".to_string())); err2!(unmatched_end_tag2(".") => 1: IllFormedError::UnmatchedEndTag("end".to_string())); err2!(unmatched_end_tag3(".") => 1: IllFormedError::UnmatchedEndTag("end".to_string())); - ok!(mismatched_end_tag1("") => 7: Event::Start(BytesStart::new("start"))); + event_ok!(mismatched_end_tag1("") => 7: Event::Start(BytesStart::new("start"))); err2!(mismatched_end_tag2("") => 7: IllFormedError::MismatchedEndTag { // ^= 7 expected: "start".to_string(), @@ -950,7 +690,7 @@ mod ill_formed { found: "end".to_string(), }); - ok!(double_hyphen_in_comment1("") => 7: Event::Comment(BytesText::new(""))); + event_ok!(double_hyphen_in_comment1("") => 7: Event::Comment(BytesText::new(""))); err!(double_hyphen_in_comment2("") => 4: IllFormedError::DoubleHyphenInComment); // ^= 4 err!(double_hyphen_in_comment3("") => 5: IllFormedError::DoubleHyphenInComment); @@ -970,16 +710,16 @@ mod ill_formed { err2!(unclosed_hex2(".") => 1: IllFormedError::UnclosedReference); // We do not check correctness of references during parsing - ok!(empty("&;") => 2: Event::GeneralRef(BytesRef::new(""))); - ok!(normal1("&x;") => 3: Event::GeneralRef(BytesRef::new("x"))); - ok!(normal2("&x;rest") => 3: Event::GeneralRef(BytesRef::new("x"))); - ok!(num("&#;") => 3: Event::GeneralRef(BytesRef::new("#"))); - ok!(dec("") => 4: Event::GeneralRef(BytesRef::new("#2"))); - ok!(hex1("&#x;") => 4: Event::GeneralRef(BytesRef::new("#x"))); - ok!(hex2("") => 5: Event::GeneralRef(BytesRef::new("#xF"))); + event_ok!(empty("&;") => 2: Event::GeneralRef(BytesRef::new(""))); + event_ok!(normal1("&x;") => 3: Event::GeneralRef(BytesRef::new("x"))); + event_ok!(normal2("&x;rest") => 3: Event::GeneralRef(BytesRef::new("x"))); + event_ok!(num("&#;") => 3: Event::GeneralRef(BytesRef::new("#"))); + event_ok!(dec("") => 4: Event::GeneralRef(BytesRef::new("#2"))); + event_ok!(hex1("&#x;") => 4: Event::GeneralRef(BytesRef::new("#x"))); + event_ok!(hex2("") => 5: Event::GeneralRef(BytesRef::new("#xF"))); // XML specification explicitly allowed any number of leading zeroes - ok!(long_dec(" ") => 44: Event::GeneralRef(BytesRef::new("#00000000000000000000000000000000000000032"))); - ok!(long_hex(" ") => 45: Event::GeneralRef(BytesRef::new("#x00000000000000000000000000000000000000020"))); + event_ok!(long_dec(" ") => 44: Event::GeneralRef(BytesRef::new("#00000000000000000000000000000000000000032"))); + event_ok!(long_hex(" ") => 45: Event::GeneralRef(BytesRef::new("#x00000000000000000000000000000000000000020"))); } }