1use crate::unescape::unescape;
7use std::borrow::Cow;
8use std::error::Error;
9use std::fmt::{Display, Formatter};
10use xmlparser::{ElementEnd, Token, Tokenizer};
11
12pub type Depth = usize;
13
14#[derive(Debug)]
18enum XmlDecodeErrorKind {
19 InvalidXml(xmlparser::Error),
20 InvalidEscape { esc: String },
21 Custom(Cow<'static, str>),
22 Unhandled(Box<dyn std::error::Error + Send + Sync + 'static>),
23}
24
25#[derive(Debug)]
26pub struct XmlDecodeError {
27 kind: XmlDecodeErrorKind,
28}
29
30impl Display for XmlDecodeError {
31 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
32 match &self.kind {
33 XmlDecodeErrorKind::InvalidXml(_) => write!(f, "XML parse error"),
34 XmlDecodeErrorKind::InvalidEscape { esc } => write!(f, "invalid XML escape: {esc}"),
35 XmlDecodeErrorKind::Custom(msg) => write!(f, "error parsing XML: {msg}"),
36 XmlDecodeErrorKind::Unhandled(_) => write!(f, "error parsing XML"),
37 }
38 }
39}
40
41impl Error for XmlDecodeError {
42 fn source(&self) -> Option<&(dyn Error + 'static)> {
43 match &self.kind {
44 XmlDecodeErrorKind::InvalidXml(source) => Some(source as _),
45 XmlDecodeErrorKind::Unhandled(source) => Some(source.as_ref() as _),
46 XmlDecodeErrorKind::InvalidEscape { .. } | XmlDecodeErrorKind::Custom(..) => None,
47 }
48 }
49}
50
51impl XmlDecodeError {
52 pub(crate) fn invalid_xml(error: xmlparser::Error) -> Self {
53 Self {
54 kind: XmlDecodeErrorKind::InvalidXml(error),
55 }
56 }
57
58 pub(crate) fn invalid_escape(esc: impl Into<String>) -> Self {
59 Self {
60 kind: XmlDecodeErrorKind::InvalidEscape { esc: esc.into() },
61 }
62 }
63
64 pub fn custom(msg: impl Into<Cow<'static, str>>) -> Self {
65 Self {
66 kind: XmlDecodeErrorKind::Custom(msg.into()),
67 }
68 }
69
70 pub fn unhandled(error: impl Into<Box<dyn Error + Send + Sync + 'static>>) -> Self {
71 Self {
72 kind: XmlDecodeErrorKind::Unhandled(error.into()),
73 }
74 }
75}
76
77#[derive(PartialEq, Debug)]
78pub struct Name<'a> {
79 pub prefix: &'a str,
80 pub local: &'a str,
81}
82
83impl Name<'_> {
84 pub fn matches(&self, tag_name: &str) -> bool {
86 let split = tag_name.find(':');
87 match split {
88 None => tag_name == self.local,
89 Some(idx) => {
90 let (prefix, local) = tag_name.split_at(idx);
91 let local = &local[1..];
92 self.local == local && self.prefix == prefix
93 }
94 }
95 }
96}
97
98#[derive(Debug, PartialEq)]
99pub struct Attr<'a> {
100 name: Name<'a>,
101 value: Cow<'a, str>,
103}
104
105#[derive(Debug, PartialEq)]
106pub struct StartEl<'a> {
107 name: Name<'a>,
108 attributes: Vec<Attr<'a>>,
109 closed: bool,
110 depth: Depth,
111}
112
113impl<'a> StartEl<'a> {
121 pub fn depth(&self) -> Depth {
122 self.depth
123 }
124
125 fn new(local: &'a str, prefix: &'a str, depth: Depth) -> Self {
126 Self {
127 name: Name { prefix, local },
128 attributes: vec![],
129 closed: false,
130 depth,
131 }
132 }
133
134 pub fn attr<'b>(&'b self, key: &'b str) -> Option<&'b str> {
138 self.attributes
139 .iter()
140 .find(|attr| attr.name.matches(key))
141 .map(|attr| attr.value.as_ref())
142 }
143
144 pub fn matches(&self, pat: &str) -> bool {
147 self.name.matches(pat)
148 }
149
150 pub fn local(&self) -> &str {
157 self.name.local
158 }
159
160 pub fn prefix(&self) -> &str {
166 self.name.prefix
167 }
168
169 fn end_el(&self, el: ElementEnd<'_>, depth: Depth) -> bool {
171 if depth != self.depth {
172 return false;
173 }
174 match el {
175 ElementEnd::Open => false,
176 ElementEnd::Close(prefix, local) => {
177 prefix.as_str() == self.name.prefix && local.as_str() == self.name.local
178 }
179 ElementEnd::Empty => false,
180 }
181 }
182}
183
184pub struct Document<'a> {
189 tokenizer: Tokenizer<'a>,
190 depth: Depth,
191}
192
193impl<'a> TryFrom<&'a [u8]> for Document<'a> {
194 type Error = XmlDecodeError;
195
196 fn try_from(value: &'a [u8]) -> Result<Self, Self::Error> {
197 Ok(Document::new(
198 std::str::from_utf8(value).map_err(XmlDecodeError::unhandled)?,
199 ))
200 }
201}
202
203impl<'inp> Document<'inp> {
204 pub fn new(doc: &'inp str) -> Self {
205 Document {
206 tokenizer: Tokenizer::from(doc),
207 depth: 0,
208 }
209 }
210
211 pub fn next_start_element<'a>(&'a mut self) -> Option<StartEl<'inp>> {
227 next_start_element(self)
228 }
229
230 pub fn root_element<'a>(&'a mut self) -> Result<ScopedDecoder<'inp, 'a>, XmlDecodeError> {
232 let start_el = self
233 .next_start_element()
234 .ok_or_else(|| XmlDecodeError::custom("no root element"))?;
235 Ok(ScopedDecoder {
236 doc: self,
237 start_el,
238 terminated: false,
239 })
240 }
241
242 pub fn scoped_to<'a>(&'a mut self, start_el: StartEl<'inp>) -> ScopedDecoder<'inp, 'a> {
248 ScopedDecoder {
249 doc: self,
250 start_el,
251 terminated: false,
252 }
253 }
254}
255
256#[derive(Debug)]
259pub struct XmlToken<'inp>(Token<'inp>);
260
261impl<'inp> Iterator for Document<'inp> {
272 type Item = Result<(XmlToken<'inp>, Depth), XmlDecodeError>;
273 fn next<'a>(&'a mut self) -> Option<Result<(XmlToken<'inp>, Depth), XmlDecodeError>> {
274 let tok = self.tokenizer.next()?;
275 let tok = match tok {
276 Err(e) => return Some(Err(XmlDecodeError::invalid_xml(e))),
277 Ok(tok) => tok,
278 };
279 match tok {
281 Token::ElementEnd {
282 end: ElementEnd::Close(_, _),
283 ..
284 } => {
285 self.depth -= 1;
286 }
287 Token::ElementEnd {
288 end: ElementEnd::Empty,
289 ..
290 } => self.depth -= 1,
291 t @ Token::ElementStart { .. } => {
292 self.depth += 1;
293 return Some(Ok((XmlToken(t), self.depth - 1)));
296 }
297 _ => {}
298 }
299 Some(Ok((XmlToken(tok), self.depth)))
300 }
301}
302
303pub struct ScopedDecoder<'inp, 'a> {
308 doc: &'a mut Document<'inp>,
309 start_el: StartEl<'inp>,
310 terminated: bool,
311}
312
313impl Drop for ScopedDecoder<'_, '_> {
316 fn drop(&mut self) {
317 for _ in self {}
318 }
319}
320
321impl<'inp> ScopedDecoder<'inp, '_> {
322 pub fn start_el<'a>(&'a self) -> &'a StartEl<'inp> {
324 &self.start_el
325 }
326
327 pub fn next_tag<'a>(&'a mut self) -> Option<ScopedDecoder<'inp, 'a>> {
342 let next_tag = next_start_element(self)?;
343 Some(self.nested_decoder(next_tag))
344 }
345
346 fn nested_decoder<'a>(&'a mut self, start_el: StartEl<'inp>) -> ScopedDecoder<'inp, 'a> {
347 ScopedDecoder {
348 doc: self.doc,
349 start_el,
350 terminated: false,
351 }
352 }
353}
354
355impl<'inp> Iterator for ScopedDecoder<'inp, '_> {
356 type Item = Result<(XmlToken<'inp>, Depth), XmlDecodeError>;
357
358 fn next(&mut self) -> Option<Self::Item> {
359 if self.start_el.closed {
360 self.terminated = true;
361 }
362 if self.terminated {
363 return None;
364 }
365 let (tok, depth) = match self.doc.next() {
366 Some(Ok((tok, depth))) => (tok, depth),
367 other => return other,
368 };
369
370 match tok.0 {
371 Token::ElementEnd { end, .. } if self.start_el.end_el(end, depth) => {
372 self.terminated = true;
373 return None;
374 }
375 _ => {}
376 }
377 Some(Ok((tok, depth)))
378 }
379}
380
381fn next_start_element<'a, 'inp>(
383 tokens: &'a mut impl Iterator<Item = Result<(XmlToken<'inp>, Depth), XmlDecodeError>>,
384) -> Option<StartEl<'inp>> {
385 let mut out = StartEl::new("", "", 0);
386 loop {
387 match tokens.next()? {
388 Ok((XmlToken(Token::ElementStart { local, prefix, .. }), depth)) => {
389 out.name.local = local.as_str();
390 out.name.prefix = prefix.as_str();
391 out.depth = depth;
392 }
393 Ok((
394 XmlToken(Token::Attribute {
395 prefix,
396 local,
397 value,
398 ..
399 }),
400 _,
401 )) => out.attributes.push(Attr {
402 name: Name {
403 local: local.as_str(),
404 prefix: prefix.as_str(),
405 },
406 value: unescape(value.as_str()).ok()?,
407 }),
408 Ok((
409 XmlToken(Token::ElementEnd {
410 end: ElementEnd::Open,
411 ..
412 }),
413 _,
414 )) => break,
415 Ok((
416 XmlToken(Token::ElementEnd {
417 end: ElementEnd::Empty,
418 ..
419 }),
420 _,
421 )) => {
422 out.closed = true;
423 break;
424 }
425 _ => {}
426 }
427 }
428 Some(out)
429}
430
431pub fn try_data<'a, 'inp>(
436 tokens: &'a mut impl Iterator<Item = Result<(XmlToken<'inp>, Depth), XmlDecodeError>>,
437) -> Result<Cow<'inp, str>, XmlDecodeError> {
438 loop {
439 match tokens.next().map(|opt| opt.map(|opt| opt.0)) {
440 None => return Ok(Cow::Borrowed("")),
441 Some(Ok(XmlToken(Token::Text { text }))) => return unescape(text.as_str()),
442 Some(Ok(e @ XmlToken(Token::ElementStart { .. }))) => {
443 return Err(XmlDecodeError::custom(format!(
444 "looking for a data element, found: {e:?}"
445 )))
446 }
447 Some(Err(e)) => return Err(e),
448 _ => {}
449 }
450 }
451}
452
453#[cfg(test)]
454mod test {
455 use crate::decode::{try_data, Attr, Depth, Document, Name, StartEl};
456
457 fn closed<'a>(local: &'a str, prefix: &'a str, depth: Depth) -> StartEl<'a> {
459 let mut s = StartEl::new(local, prefix, depth);
460 s.closed = true;
461 s
462 }
463
464 #[test]
465 fn scoped_tokens() {
466 let xml = r#"<Response><A></A></Response>"#;
467 let mut doc = Document::new(xml);
468 let mut root = doc.root_element().expect("valid document");
469 assert_eq!(root.start_el().local(), "Response");
470 assert_eq!(root.next_tag().expect("tag exists").start_el().local(), "A");
471 assert!(root.next_tag().is_none());
472 }
473
474 #[test]
475 fn handle_depth_properly() {
476 let xml = r#"<Response><Response></Response><A/></Response>"#;
477 let mut doc = Document::new(xml);
478 let mut scoped = doc.root_element().expect("valid document");
479 assert_eq!(
480 scoped.next_tag().unwrap().start_el(),
481 &StartEl::new("Response", "", 1)
482 );
483 let closed_a = closed("A", "", 1);
484 assert_eq!(scoped.next_tag().unwrap().start_el(), &closed_a);
485 assert!(scoped.next_tag().is_none())
486 }
487
488 #[test]
489 fn self_closing() {
490 let xml = r#"<Response/>"#;
491 let mut doc = Document::new(xml);
492 let mut scoped = doc.root_element().expect("valid doc");
493 assert!(scoped.start_el.closed);
494 assert!(scoped.next_tag().is_none())
495 }
496
497 #[test]
498 fn terminate_scope() {
499 let xml = r#"<Response><Struct><A></A><Also/></Struct><More/></Response>"#;
500 let mut doc = Document::new(xml);
501 let mut response_iter = doc.root_element().expect("valid doc");
502 let mut struct_iter = response_iter.next_tag().unwrap();
503 assert_eq!(
504 struct_iter.next_tag().as_ref().map(|t| t.start_el()),
505 Some(&StartEl::new("A", "", 2))
506 );
507 drop(struct_iter);
510 assert_eq!(
511 response_iter.next_tag().unwrap().start_el(),
512 &closed("More", "", 1)
513 );
514 }
515
516 #[test]
517 fn read_data_invalid() {
518 let xml = r#"<Response><A></A></Response>"#;
519 let mut doc = Document::new(xml);
520 let mut resp = doc.root_element().unwrap();
521 try_data(&mut resp).expect_err("no data");
522 }
523
524 #[test]
525 fn read_data() {
526 let xml = r#"<Response>hello</Response>"#;
527 let mut doc = Document::new(xml);
528 let mut scoped = doc.root_element().unwrap();
529 assert_eq!(try_data(&mut scoped).unwrap(), "hello");
530 }
531
532 #[test]
534 fn read_data_whitespace() {
535 let xml = r#"<Response> hello </Response>"#;
536 let mut doc = Document::new(xml);
537 let mut scoped = doc.root_element().unwrap();
538 assert_eq!(try_data(&mut scoped).unwrap(), " hello ");
539 }
540
541 #[test]
542 fn ignore_insignificant_whitespace() {
543 let xml = r#"<Response> <A> </A> </Response>"#;
544 let mut doc = Document::new(xml);
545 let mut resp = doc.root_element().unwrap();
546 let mut a = resp.next_tag().expect("should be a");
547 let data = try_data(&mut a).expect("valid");
548 assert_eq!(data, " ");
549 }
550
551 #[test]
552 fn read_attributes() {
553 let xml = r#"<Response xsi:type="CanonicalUser">hello</Response>"#;
554 let mut tokenizer = Document::new(xml);
555 let root = tokenizer.root_element().unwrap();
556
557 assert_eq!(
558 root.start_el().attributes,
559 vec![Attr {
560 name: Name {
561 prefix: "xsi",
562 local: "type"
563 },
564 value: "CanonicalUser".into()
565 }]
566 )
567 }
568
569 #[test]
570 fn unescape_data() {
571 let xml = r#"<Response key=""hey">">></Response>"#;
572 let mut doc = Document::new(xml);
573 let mut root = doc.root_element().unwrap();
574 assert_eq!(try_data(&mut root).unwrap(), ">");
575 assert_eq!(root.start_el().attr("key"), Some("\"hey\">"));
576 }
577
578 #[test]
579 fn nested_self_closer() {
580 let xml = r#"<XmlListsInputOutput>
581 <stringList/>
582 <stringSet></stringSet>
583 </XmlListsInputOutput>"#;
584 let mut doc = Document::new(xml);
585 let mut root = doc.root_element().unwrap();
586 let mut string_list = root.next_tag().unwrap();
587 assert_eq!(string_list.start_el(), &closed("stringList", "", 1));
588 assert!(string_list.next_tag().is_none());
589 drop(string_list);
590 assert_eq!(
591 root.next_tag().unwrap().start_el(),
592 &StartEl::new("stringSet", "", 1)
593 );
594 }
595
596 #[test]
597 fn confusing_nested_same_name_tag() {
598 let root_tags = &["a", "b", "c", "d"];
601 let xml = r#"<XmlListsInputOutput>
602 <a/>
603 <b>
604 <c/>
605 <b></b>
606 <here/>
607 </b>
608 <c></c>
609 <d>more</d>
610 </XmlListsInputOutput>"#;
611 let mut doc = Document::new(xml);
612 let mut root = doc.root_element().unwrap();
613 let mut cmp = vec![];
614 while let Some(tag) = root.next_tag() {
615 cmp.push(tag.start_el().local().to_owned());
616 }
617 assert_eq!(root_tags, cmp.as_slice());
618 }
619}