aws_smithy_json/
deserialize.rs

1/*
2 * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
3 * SPDX-License-Identifier: Apache-2.0
4 */
5
6use crate::deserialize::error::{DeserializeError as Error, DeserializeErrorKind as ErrorKind};
7use aws_smithy_types::Number;
8use ErrorKind::*;
9
10pub mod error;
11pub mod token;
12
13pub use token::{EscapeError, EscapedStr, Offset, Token};
14
15/// JSON token parser as a Rust iterator
16///
17/// This parser will parse and yield exactly one [`Token`] per iterator `next()` call.
18/// Validation is done on the fly, so it is possible for it to parse an invalid JSON document
19/// until it gets to the first [`Error`].
20///
21/// JSON string values are left escaped in the [`Token::ValueString`] as an [`EscapedStr`],
22/// which is a new type around a slice of original `input` bytes so that the caller can decide
23/// when to unescape and allocate into a [`String`].
24///
25/// The parser *will* accept multiple valid JSON values. For example, `b"null true"` will
26/// yield `ValueNull` and `ValueTrue`. It is the responsibility of the caller to handle this for
27/// their use-case.
28pub fn json_token_iter(input: &[u8]) -> JsonTokenIterator<'_> {
29    JsonTokenIterator {
30        input,
31        index: 0,
32        state_stack: vec![State::Initial],
33    }
34}
35
36/// Internal parser state for the iterator. Used to context between successive `next` calls.
37#[derive(Copy, Clone, Debug, Eq, PartialEq)]
38enum State {
39    /// Entry point. Expecting any JSON value.
40    Initial,
41    /// Expecting the next token to be the *first* value in an array, or the end of the array.
42    ArrayFirstValueOrEnd,
43    /// Expecting the next token to the next value in an array, or the end of the array.
44    ArrayNextValueOrEnd,
45    /// Expecting the next token to be the *first* key in the object, or the end of the object.
46    ObjectFirstKeyOrEnd,
47    /// Expecting the next token to the next object key, or the end of the object.
48    ObjectNextKeyOrEnd,
49    /// Expecting the next token to be the value of a field in an object.
50    ObjectFieldValue,
51}
52
53/// An iterator over a `&[u8]` that yields `Result<Token, Error>` with [Token] being JSON tokens.
54/// Construct with [json_token_iter].
55pub struct JsonTokenIterator<'a> {
56    input: &'a [u8],
57    index: usize,
58    state_stack: Vec<State>,
59}
60
61impl<'a> JsonTokenIterator<'a> {
62    /// Previews the next byte.
63    fn peek_byte(&self) -> Option<u8> {
64        if self.index >= self.input.len() {
65            None
66        } else {
67            Some(self.input[self.index])
68        }
69    }
70
71    /// Expects there to be another byte coming up, and previews it.
72    /// If there isn't, an `UnexpectedEOS` error is returned.
73    fn peek_expect(&self) -> Result<u8, Error> {
74        self.peek_byte().ok_or_else(|| self.error(UnexpectedEos))
75    }
76
77    /// Advances to the next byte in the stream.
78    fn advance(&mut self) {
79        if self.index < self.input.len() {
80            self.index += 1;
81        }
82    }
83
84    /// Advances and returns the next byte in the stream.
85    fn next_byte(&mut self) -> Option<u8> {
86        let next = self.peek_byte();
87        self.advance();
88        next
89    }
90
91    /// Expects there to be another byte coming up, and returns it while advancing.
92    /// If there isn't, an `UnexpectedEOS` error is returned.
93    fn next_expect(&mut self) -> Result<u8, Error> {
94        self.next_byte().ok_or_else(|| self.error(UnexpectedEos))
95    }
96
97    /// Creates an error at the given `offset` in the stream.
98    fn error_at(&self, offset: usize, kind: ErrorKind) -> Error {
99        Error::new(kind, Some(offset))
100    }
101
102    /// Creates an error at the current offset in the stream.
103    fn error(&self, kind: ErrorKind) -> Error {
104        self.error_at(self.index, kind)
105    }
106
107    /// Advances until it hits a non-whitespace character or the end of the slice.
108    fn discard_whitespace(&mut self) {
109        while let Some(byte) = self.peek_byte() {
110            match byte {
111                b' ' | b'\t' | b'\r' | b'\n' => {
112                    self.advance();
113                }
114                _ => break,
115            }
116        }
117    }
118
119    /// Returns the top of the state stack (current state).
120    fn state(&self) -> State {
121        self.state_stack[self.state_stack.len() - 1]
122    }
123
124    /// Replaces the top of the state stack with a new `state`.
125    fn replace_state(&mut self, state: State) {
126        self.state_stack.pop();
127        self.state_stack.push(state);
128    }
129
130    /// Returns current offset
131    fn offset(&self) -> Offset {
132        Offset(self.index)
133    }
134
135    /// Discards the '{' character and pushes the `ObjectFirstKeyOrEnd` state.
136    fn start_object(&mut self) -> Token<'a> {
137        let offset = self.offset();
138        let byte = self.next_byte();
139        debug_assert_eq!(byte, Some(b'{'));
140        self.state_stack.push(State::ObjectFirstKeyOrEnd);
141        Token::StartObject { offset }
142    }
143
144    /// Discards the '}' character and pops the current state.
145    fn end_object(&mut self) -> Token<'a> {
146        let offset = self.offset();
147        let (byte, state) = (self.next_byte(), self.state_stack.pop());
148        debug_assert_eq!(byte, Some(b'}'));
149        debug_assert!(
150            state == Some(State::ObjectFirstKeyOrEnd) || state == Some(State::ObjectNextKeyOrEnd)
151        );
152        Token::EndObject { offset }
153    }
154
155    /// Discards the '[' character and pushes the `ArrayFirstValueOrEnd` state.
156    fn start_array(&mut self) -> Token<'a> {
157        let offset = self.offset();
158        let byte = self.next_byte();
159        debug_assert_eq!(byte, Some(b'['));
160        self.state_stack.push(State::ArrayFirstValueOrEnd);
161        Token::StartArray { offset }
162    }
163
164    /// Discards the ']' character and pops the current state.
165    fn end_array(&mut self) -> Token<'a> {
166        let offset = self.offset();
167        let (byte, state) = (self.next_byte(), self.state_stack.pop());
168        debug_assert_eq!(byte, Some(b']'));
169        debug_assert!(
170            state == Some(State::ArrayFirstValueOrEnd) || state == Some(State::ArrayNextValueOrEnd)
171        );
172        Token::EndArray { offset }
173    }
174
175    /// Reads a JSON string out of the stream.
176    fn read_string(&mut self) -> Result<&'a str, Error> {
177        // Skip the starting quote
178        let quote_byte = self.next_byte();
179        debug_assert_eq!(quote_byte, Some(b'\"'));
180
181        // Read bytes until a non-escaped end-quote, unescaping sequences as needed on the fly
182        let start = self.index;
183        loop {
184            match self.peek_expect()? {
185                b'"' => {
186                    let value = std::str::from_utf8(&self.input[start..self.index])
187                        .map_err(|_| self.error(InvalidUtf8))?;
188                    self.advance();
189                    return Ok(value);
190                }
191                b'\\' => match self.next_expect()? {
192                    b'\\' | b'/' | b'"' | b'b' | b'f' | b'n' | b'r' | b't' => self.advance(),
193                    b'u' => {
194                        if self.index + 4 > self.input.len() {
195                            return Err(self.error_at(self.input.len(), UnexpectedEos));
196                        }
197                        self.index += 4;
198                    }
199                    byte => return Err(self.error(InvalidEscape(byte.into()))),
200                },
201                byte @ 0x00..=0x1F => return Err(self.error(UnexpectedControlCharacter(byte))),
202                _ => self.advance(),
203            }
204        }
205    }
206
207    /// Expects the given literal to be next in the stream.
208    fn expect_literal(&mut self, expected: &[u8]) -> Result<(), Error> {
209        let (start, end) = (self.index, self.index + expected.len());
210        if end > self.input.len() {
211            return Err(self.error_at(self.input.len(), UnexpectedEos));
212        }
213        if expected != &self.input[start..end] {
214            return Err(self.error_at(
215                start,
216                ExpectedLiteral(std::str::from_utf8(expected).unwrap().into()),
217            ));
218        }
219        self.index = end;
220        Ok(())
221    }
222
223    /// Expects a literal `null` next in the stream.
224    fn expect_null(&mut self) -> Result<Token<'a>, Error> {
225        let offset = self.offset();
226        self.expect_literal(b"null")?;
227        Ok(Token::ValueNull { offset })
228    }
229
230    /// Expects a boolean `true` / `false` to be next in the stream and returns its value.
231    fn expect_bool(&mut self) -> Result<Token<'a>, Error> {
232        let offset = self.offset();
233        match self.peek_expect()? {
234            b't' => {
235                self.expect_literal(b"true")?;
236                Ok(Token::ValueBool {
237                    offset,
238                    value: true,
239                })
240            }
241            b'f' => {
242                self.expect_literal(b"false")?;
243                Ok(Token::ValueBool {
244                    offset,
245                    value: false,
246                })
247            }
248            _ => unreachable!(
249                "this function must only be called when the next character is 't' or 'f'"
250            ),
251        }
252    }
253
254    /// Advances passed the exponent part of a floating point number.
255    fn skip_exponent(&mut self) {
256        self.advance();
257        match self.peek_byte() {
258            Some(b'-') => self.advance(),
259            Some(b'+') => self.advance(),
260            _ => {}
261        }
262        while let Some(b'0'..=b'9') = self.peek_byte() {
263            self.advance();
264        }
265    }
266
267    /// Advances passed the decimal part of a floating point number.
268    fn skip_decimal(&mut self) {
269        self.advance();
270        while let Some(byte) = self.peek_byte() {
271            match byte {
272                b'0'..=b'9' => self.advance(),
273                b'e' | b'E' => self.skip_exponent(),
274                _ => break,
275            }
276        }
277    }
278
279    /// Starting from the current location in the stream, this advances until
280    /// it finds a character that doesn't look like its part of a number, and then
281    /// returns `(start_index, end_index, negative, floating)`, with `start_index`
282    /// and `end_index` representing the slice of the stream that is the number,
283    /// `negative` whether or not it is a negative number, and `floating` whether or not
284    /// the number contains a decimal point and/or an exponent.
285    fn scan_number(&mut self) -> (usize, usize, bool, bool) {
286        let start_index = self.index;
287        let negative = if self.peek_byte() == Some(b'-') {
288            self.advance();
289            true
290        } else {
291            false
292        };
293        let mut floating = false;
294        while let Some(byte) = self.peek_byte() {
295            match byte {
296                b'0'..=b'9' => self.advance(),
297                b'.' => {
298                    floating = true;
299                    self.skip_decimal();
300                }
301                b'e' | b'E' => {
302                    floating = true;
303                    self.skip_exponent();
304                }
305                _ => break,
306            }
307        }
308        (start_index, self.index, negative, floating)
309    }
310
311    /// Expects a number in the stream, and returns its value.
312    fn expect_number(&mut self) -> Result<Token<'a>, Error> {
313        let offset = self.offset();
314        let (start, end, negative, floating) = self.scan_number();
315        let number_slice = &self.input[start..end];
316
317        // Unsafe: we examined every character in the range, and they are all number characters
318        debug_assert!(std::str::from_utf8(number_slice).is_ok());
319        let number_str = unsafe { std::str::from_utf8_unchecked(number_slice) };
320
321        use std::str::FromStr;
322        Ok(Token::ValueNumber {
323            offset,
324            value: if floating {
325                Number::Float(
326                    f64::from_str(number_str)
327                        .map_err(|_| self.error_at(start, InvalidNumber))
328                        .and_then(|f| {
329                            must_be_finite(f).map_err(|_| self.error_at(start, InvalidNumber))
330                        })?,
331                )
332            } else if negative {
333                // If the negative value overflows, then stuff it into an f64
334                match u64::from_str(&number_str[1..]) {
335                    Ok(positive) => {
336                        // Check if the positive value fits in i64's negative range
337                        if positive <= i64::MAX as u64 {
338                            Number::NegInt(-(positive as i64))
339                        } else if positive == (i64::MAX as u64) + 1 {
340                            // Special case: i64::MIN
341                            Number::NegInt(i64::MIN)
342                        } else {
343                            // Too large for i64, use f64
344                            Number::Float(-(positive as f64))
345                        }
346                    }
347                    Err(_) => {
348                        // Number too large for u64, parse as f64 (may be infinity)
349                        Number::Float(
350                            f64::from_str(number_str)
351                                .map_err(|_| self.error_at(start, InvalidNumber))?,
352                        )
353                    }
354                }
355            } else {
356                // Try to parse as u64, fall back to f64 if too large
357                match u64::from_str(number_str) {
358                    Ok(n) => Number::PosInt(n),
359                    Err(_) => {
360                        // Number too large for u64, parse as f64 (may be infinity)
361                        Number::Float(
362                            f64::from_str(number_str)
363                                .map_err(|_| self.error_at(start, InvalidNumber))?,
364                        )
365                    }
366                }
367            },
368        })
369    }
370
371    /// Reads a value from the stream and returns the next token. For objects and arrays,
372    /// the entire object or array will not be ready, but rather, a [Token::StartObject]/[Token::StartArray]
373    /// will be returned.
374    fn read_value(&mut self) -> Result<Token<'a>, Error> {
375        self.discard_whitespace();
376        let offset = self.offset();
377        match self.peek_expect()? {
378            b'{' => Ok(self.start_object()),
379            b'[' => Ok(self.start_array()),
380            b'"' => self.read_string().map(|s| Token::ValueString {
381                offset,
382                value: EscapedStr::new(s),
383            }),
384            byte => {
385                let value = match byte {
386                    b'n' => self.expect_null(),
387                    b't' | b'f' => self.expect_bool(),
388                    b'-' | (b'0'..=b'9') => self.expect_number(),
389                    byte => Err(self.error(UnexpectedToken(
390                        byte.into(),
391                        "'{', '[', '\"', 'null', 'true', 'false', <number>",
392                    ))),
393                }?;
394                // Verify there are no unexpected trailers on the end of the value
395                if let Some(byte) = self.peek_byte() {
396                    match byte {
397                        b' ' | b'\t' | b'\r' | b'\n' | b'}' | b']' | b',' => {}
398                        _ => {
399                            return Err(self.error(UnexpectedToken(
400                                byte.into(),
401                                "<whitespace>, '}', ']', ','",
402                            )))
403                        }
404                    }
405                }
406                Ok(value)
407            }
408        }
409    }
410
411    /// Handles the [State::ArrayFirstValueOrEnd] state.
412    fn state_array_first_value_or_end(&mut self) -> Result<Token<'a>, Error> {
413        match self.peek_expect()? {
414            b']' => Ok(self.end_array()),
415            _ => {
416                self.replace_state(State::ArrayNextValueOrEnd);
417                self.read_value()
418            }
419        }
420    }
421
422    /// Handles the [State::ArrayNextValueOrEnd] state.
423    fn state_array_next_value_or_end(&mut self) -> Result<Token<'a>, Error> {
424        match self.peek_expect()? {
425            b']' => Ok(self.end_array()),
426            b',' => {
427                self.advance();
428                self.read_value()
429            }
430            byte => Err(self.error(UnexpectedToken(byte.into(), "']', ','"))),
431        }
432    }
433
434    /// Expects an object key.
435    fn object_key(&mut self) -> Result<Token<'a>, Error> {
436        let offset = self.offset();
437        match self.peek_expect()? {
438            b'"' => {
439                self.replace_state(State::ObjectFieldValue);
440                self.read_string().map(|s| Token::ObjectKey {
441                    offset,
442                    key: EscapedStr::new(s),
443                })
444            }
445            byte => Err(self.error(UnexpectedToken(byte.into(), "'\"'"))),
446        }
447    }
448
449    /// Handles the [State::ObjectFirstKeyOrEnd] state.
450    fn state_object_first_key_or_end(&mut self) -> Result<Token<'a>, Error> {
451        match self.peek_expect()? {
452            b'}' => Ok(self.end_object()),
453            _ => self.object_key(),
454        }
455    }
456
457    /// Handles the [State::ObjectNextKeyOrEnd] state.
458    fn state_object_next_key_or_end(&mut self) -> Result<Token<'a>, Error> {
459        match self.peek_expect()? {
460            b'}' => Ok(self.end_object()),
461            b',' => {
462                self.advance();
463                self.discard_whitespace();
464                self.object_key()
465            }
466            byte => Err(self.error(UnexpectedToken(byte.into(), "'}', ','"))),
467        }
468    }
469
470    /// Handles the [State::ObjectFieldValue] state.
471    fn state_object_field_value(&mut self) -> Result<Token<'a>, Error> {
472        match self.peek_expect()? {
473            b':' => {
474                self.advance();
475                self.replace_state(State::ObjectNextKeyOrEnd);
476                self.read_value()
477            }
478            byte => Err(self.error(UnexpectedToken(byte.into(), "':'"))),
479        }
480    }
481}
482
483impl<'a> Iterator for JsonTokenIterator<'a> {
484    type Item = Result<Token<'a>, Error>;
485
486    fn next(&mut self) -> Option<Self::Item> {
487        debug_assert!(self.index <= self.input.len());
488        if self.index == self.input.len() {
489            return None;
490        }
491
492        self.discard_whitespace();
493        let result = match self.state() {
494            State::Initial => self.peek_byte().map(|_| self.read_value()),
495            State::ArrayFirstValueOrEnd => Some(self.state_array_first_value_or_end()),
496            State::ArrayNextValueOrEnd => Some(self.state_array_next_value_or_end()),
497            State::ObjectFirstKeyOrEnd => Some(self.state_object_first_key_or_end()),
498            State::ObjectNextKeyOrEnd => Some(self.state_object_next_key_or_end()),
499            State::ObjectFieldValue => Some(self.state_object_field_value()),
500        };
501        // Invalidate the stream if we encountered an error
502        if result.as_ref().map(|r| r.is_err()).unwrap_or(false) {
503            self.index = self.input.len();
504        }
505        result
506    }
507}
508
509fn must_be_finite(f: f64) -> Result<f64, ()> {
510    if f.is_finite() {
511        Ok(f)
512    } else {
513        Err(())
514    }
515}
516
517fn must_not_be_finite(f: f64) -> Result<f64, ()> {
518    if !f.is_finite() {
519        Ok(f)
520    } else {
521        Err(())
522    }
523}
524
525#[cfg(test)]
526mod tests {
527    use crate::deserialize::error::{DeserializeError as Error, DeserializeErrorKind as ErrorKind};
528    use crate::deserialize::token::expect_number_as_string_or_null;
529    use crate::deserialize::token::test::{
530        end_array, end_object, object_key, start_array, start_object, value_bool, value_null,
531        value_number, value_string,
532    };
533    use crate::deserialize::{json_token_iter, EscapedStr, Token};
534    use aws_smithy_types::Number;
535    use proptest::prelude::*;
536
537    #[track_caller]
538    fn expect_token(
539        expected: Option<Result<Token<'_>, Error>>,
540        actual: Option<Result<Token<'_>, Error>>,
541    ) {
542        let (expected, actual) = (
543            expected.transpose().expect("err in expected"),
544            actual.transpose().expect("err in actual"),
545        );
546        assert_eq!(expected, actual);
547    }
548
549    macro_rules! expect_err {
550        ($kind:pat, $offset:expr, $value:expr) => {
551            let err: Error = $value.transpose().err().expect("expected error");
552            assert!(matches!(err.kind, $kind));
553            assert_eq!($offset, err.offset);
554        };
555    }
556
557    #[test]
558    fn test_empty() {
559        assert!(json_token_iter(b"").next().is_none());
560        assert!(json_token_iter(b" ").next().is_none());
561        assert!(json_token_iter(b"\t").next().is_none());
562    }
563
564    #[test]
565    fn test_empty_string() {
566        let mut iter = json_token_iter(b"\"\"");
567        expect_token(value_string(0, ""), iter.next());
568        expect_token(None, iter.next());
569
570        let mut iter = json_token_iter(b" \r\n\t \"\"  ");
571        expect_token(value_string(5, ""), iter.next());
572        expect_token(None, iter.next());
573    }
574
575    #[test]
576    fn test_empty_array() {
577        let mut iter = json_token_iter(b"[]");
578        expect_token(start_array(0), iter.next());
579        expect_token(end_array(1), iter.next());
580        expect_token(None, iter.next());
581    }
582
583    #[test]
584    fn test_empty_object() {
585        let mut iter = json_token_iter(b"{}");
586        expect_token(start_object(0), iter.next());
587        expect_token(end_object(1), iter.next());
588        expect_token(None, iter.next());
589    }
590
591    #[test]
592    fn test_null() {
593        expect_token(value_null(1), json_token_iter(b" null ").next());
594
595        let mut iter = json_token_iter(b"[null, null,null]");
596        expect_token(start_array(0), iter.next());
597        expect_token(value_null(1), iter.next());
598        expect_token(value_null(7), iter.next());
599        expect_token(value_null(12), iter.next());
600        expect_token(end_array(16), iter.next());
601        expect_token(None, iter.next());
602
603        assert!(json_token_iter(b"n").next().unwrap().is_err());
604        assert!(json_token_iter(b"nul").next().unwrap().is_err());
605        assert!(json_token_iter(b"nulll").next().unwrap().is_err());
606    }
607
608    #[test]
609    fn test_bools() {
610        assert!(json_token_iter(b"tru").next().unwrap().is_err());
611        assert!(json_token_iter(b"truee").next().unwrap().is_err());
612        assert!(json_token_iter(b"f").next().unwrap().is_err());
613        assert!(json_token_iter(b"falsee").next().unwrap().is_err());
614        expect_token(value_bool(1, true), json_token_iter(b" true ").next());
615        expect_token(value_bool(0, false), json_token_iter(b"false").next());
616
617        let mut iter = json_token_iter(b"[true,false]");
618        expect_token(start_array(0), iter.next());
619        expect_token(value_bool(1, true), iter.next());
620        expect_token(value_bool(6, false), iter.next());
621        expect_token(end_array(11), iter.next());
622        expect_token(None, iter.next());
623    }
624
625    proptest! {
626        #[test]
627        fn string_prop_test(input in ".*") {
628            let json: String = serde_json::to_string(&input).unwrap();
629            let mut iter = json_token_iter(json.as_bytes());
630            expect_token(value_string(0, &json[1..(json.len() - 1)]), iter.next());
631            expect_token(None, iter.next());
632        }
633
634        #[test]
635        fn integer_prop_test(input: i64) {
636            let json = serde_json::to_string(&input).unwrap();
637            let mut iter = json_token_iter(json.as_bytes());
638            let expected = if input < 0 {
639                Number::NegInt(input)
640            } else {
641                Number::PosInt(input as u64)
642            };
643            expect_token(value_number(0, expected), iter.next());
644            expect_token(None, iter.next());
645        }
646
647        #[test]
648        fn float_prop_test(input: f64) {
649            let json = serde_json::to_string(&input).unwrap();
650            let mut iter = json_token_iter(json.as_bytes());
651            expect_token(value_number(0, Number::Float(input)), iter.next());
652            expect_token(None, iter.next());
653        }
654    }
655
656    #[test]
657    fn valid_numbers() {
658        let expect = |number, input| {
659            expect_token(value_number(0, number), json_token_iter(input).next());
660        };
661        expect(Number::Float(0.0), b"0.");
662        expect(Number::Float(0.0), b"0e0");
663        expect(Number::Float(0.0), b"0E0");
664        expect(Number::Float(10.0), b"1E1");
665        expect(Number::Float(10.0), b"1E+1");
666        expect(Number::Float(100.0), b"1e+2");
667
668        expect(Number::NegInt(-50000), b"-50000");
669        expect(
670            Number::Float(-18446744073709551615.0),
671            b"-18446744073709551615",
672        );
673    }
674
675    // These cases actually shouldn't parse according to the spec, but it's easier
676    // to be lenient on these, and it doesn't really impact the SDK use-case.
677    #[test]
678    fn invalid_numbers_we_are_intentionally_accepting() {
679        let expect = |number, input| {
680            expect_token(value_number(0, number), json_token_iter(input).next());
681        };
682
683        expect(Number::NegInt(-1), b"-01");
684        expect(Number::Float(-2.0), b"-2.");
685        expect(Number::Float(0.0), b"0.e1");
686        expect(Number::Float(0.002), b"2.e-3");
687        expect(Number::Float(2000.0), b"2.e3");
688        expect(Number::NegInt(-12), b"-012");
689        expect(Number::Float(-0.123), b"-.123");
690        expect(Number::Float(1.0), b"1.");
691        expect(Number::PosInt(12), b"012");
692    }
693
694    #[test]
695    fn invalid_numbers() {
696        macro_rules! unexpected_token {
697            ($input:expr, $token:pat, $offset:expr, $msg:pat) => {
698                let tokens: Vec<Result<Token<'_>, Error>> = json_token_iter($input).collect();
699                assert_eq!(1, tokens.len());
700                expect_err!(
701                    ErrorKind::UnexpectedToken($token, $msg),
702                    Some($offset),
703                    tokens.into_iter().next()
704                );
705            };
706        }
707
708        let invalid_number = |input, offset| {
709            let tokens: Vec<Result<Token<'_>, Error>> = json_token_iter(input).collect();
710            assert_eq!(1, tokens.len());
711            expect_err!(
712                ErrorKind::InvalidNumber,
713                Some(offset),
714                tokens.into_iter().next()
715            );
716        };
717
718        unexpected_token!(
719            b".",
720            '.',
721            0,
722            "'{', '[', '\"', 'null', 'true', 'false', <number>"
723        );
724        unexpected_token!(
725            b".0",
726            '.',
727            0,
728            "'{', '[', '\"', 'null', 'true', 'false', <number>"
729        );
730        unexpected_token!(b"0-05", '-', 1, "<whitespace>, '}', ']', ','");
731        unexpected_token!(b"0x05", 'x', 1, "<whitespace>, '}', ']', ','");
732        unexpected_token!(b"123.invalid", 'i', 4, "<whitespace>, '}', ']', ','");
733        unexpected_token!(b"123invalid", 'i', 3, "<whitespace>, '}', ']', ','");
734        unexpected_token!(
735            b"asdf",
736            'a',
737            0,
738            "'{', '[', '\"', 'null', 'true', 'false', <number>"
739        );
740
741        invalid_number(b"-a", 0);
742        invalid_number(b"1e", 0);
743        invalid_number(b"1e-", 0);
744
745        // Number parsing fails before it even looks at the trailer because of invalid exponent
746        invalid_number(b"123.0Einvalid", 0);
747    }
748
749    #[test]
750    fn test_unclosed_array() {
751        let mut iter = json_token_iter(br#" [null "#);
752        expect_token(start_array(1), iter.next());
753        expect_token(value_null(2), iter.next());
754        expect_err!(ErrorKind::UnexpectedEos, Some(7), iter.next());
755    }
756
757    #[test]
758    fn test_array_with_items() {
759        let mut iter = json_token_iter(b"[[], {}, \"test\"]");
760        expect_token(start_array(0), iter.next());
761        expect_token(start_array(1), iter.next());
762        expect_token(end_array(2), iter.next());
763        expect_token(start_object(5), iter.next());
764        expect_token(end_object(6), iter.next());
765        expect_token(value_string(9, "test"), iter.next());
766        expect_token(end_array(15), iter.next());
767        expect_token(None, iter.next());
768    }
769
770    #[test]
771    fn test_object_with_items() {
772        let mut tokens = json_token_iter(
773            br#"{ "some_int": 5,
774                  "some_float": 5.2,
775                  "some_negative": -5,
776                  "some_negative_float": -2.4,
777                  "some_string": "test",
778                  "some_struct": { "nested": "asdf" },
779                  "some_array": ["one", "two"] }"#,
780        );
781        expect_token(start_object(0), tokens.next());
782        expect_token(object_key(2, "some_int"), tokens.next());
783        expect_token(value_number(14, Number::PosInt(5)), tokens.next());
784        expect_token(object_key(35, "some_float"), tokens.next());
785        expect_token(value_number(49, Number::Float(5.2)), tokens.next());
786        expect_token(object_key(72, "some_negative"), tokens.next());
787        expect_token(value_number(89, Number::NegInt(-5)), tokens.next());
788        expect_token(object_key(111, "some_negative_float"), tokens.next());
789        expect_token(value_number(134, Number::Float(-2.4)), tokens.next());
790        expect_token(object_key(158, "some_string"), tokens.next());
791        expect_token(value_string(173, "test"), tokens.next());
792        expect_token(object_key(199, "some_struct"), tokens.next());
793        expect_token(start_object(214), tokens.next());
794        expect_token(object_key(216, "nested"), tokens.next());
795        expect_token(value_string(226, "asdf"), tokens.next());
796        expect_token(end_object(233), tokens.next());
797        expect_token(object_key(254, "some_array"), tokens.next());
798        expect_token(start_array(268), tokens.next());
799        expect_token(value_string(269, "one"), tokens.next());
800        expect_token(value_string(276, "two"), tokens.next());
801        expect_token(end_array(281), tokens.next());
802        expect_token(end_object(283), tokens.next());
803        expect_token(None, tokens.next());
804    }
805
806    #[test]
807    fn test_object_trailing_comma() {
808        let mut iter = json_token_iter(br#" { "test": "trailing", } "#);
809        expect_token(start_object(1), iter.next());
810        expect_token(object_key(3, "test"), iter.next());
811        expect_token(value_string(11, "trailing"), iter.next());
812        expect_err!(
813            ErrorKind::UnexpectedToken('}', "'\"'"),
814            Some(23),
815            iter.next()
816        );
817        assert!(iter.next().is_none());
818    }
819
820    #[test]
821    fn test_object_no_colon() {
822        let mut iter = json_token_iter(br#" {"test" "#);
823        expect_token(start_object(1), iter.next());
824        expect_token(object_key(2, "test"), iter.next());
825        expect_err!(ErrorKind::UnexpectedEos, Some(9), iter.next());
826        expect_token(None, iter.next());
827    }
828
829    #[test]
830    fn unescaped_ctrl_characters() {
831        assert!(json_token_iter(b"\"test\x00test\"")
832            .next()
833            .unwrap()
834            .is_err());
835        assert!(json_token_iter(b"\"test\ntest\"").next().unwrap().is_err());
836        assert!(json_token_iter(b"\"test\ttest\"").next().unwrap().is_err());
837    }
838
839    #[test]
840    fn escaped_str() {
841        let escaped = EscapedStr::new("foo\\nbar");
842        assert_eq!("foo\\nbar", escaped.as_escaped_str());
843        assert_eq!("foo\nbar", escaped.to_unescaped().unwrap());
844    }
845
846    #[test]
847    fn test_integer_overflow_to_float() {
848        // Positive integer larger than u64::MAX should parse as Float
849        let input = b"18450000000000000000";
850        let mut iter = json_token_iter(input);
851        match iter.next() {
852            Some(Ok(Token::ValueNumber {
853                value: Number::Float(f),
854                ..
855            })) => {
856                assert!(f.is_finite());
857                assert!(f > 0.0);
858            }
859            other => panic!("Expected Float token, got {:?}", other),
860        }
861
862        // Negative integer smaller than i64::MIN should parse as Float
863        let input = b"-9223372036854775809";
864        let mut iter = json_token_iter(input);
865        match iter.next() {
866            Some(Ok(Token::ValueNumber {
867                value: Number::Float(f),
868                ..
869            })) => {
870                assert!(f.is_finite());
871                assert!(f < 0.0);
872            }
873            other => panic!("Expected Float token, got {:?}", other),
874        }
875
876        // Extremely large number should parse as infinity
877        let large_num = b"100000000000000000000000000000000000000000000000000000000000000\
878        0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000\
879        0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000\
880        00000000000000000000000000000000000000000000000000000000000000000000000";
881        let mut iter = json_token_iter(large_num);
882        match iter.next() {
883            Some(Ok(Token::ValueNumber {
884                value: Number::Float(f),
885                ..
886            })) => {
887                assert_eq!(f, f64::INFINITY);
888            }
889            other => panic!("Expected Float(infinity) token, got {:?}", other),
890        }
891    }
892
893    #[test]
894    fn test_integer_within_range() {
895        // Numbers that fit in u64/i64 should still parse as PosInt/NegInt
896        let input = b"9007199254740993";
897        let mut iter = json_token_iter(input);
898        match iter.next() {
899            Some(Ok(Token::ValueNumber {
900                value: Number::PosInt(n),
901                ..
902            })) => {
903                assert_eq!(n, 9007199254740993);
904            }
905            other => panic!("Expected PosInt token, got {:?}", other),
906        }
907
908        let input = b"-9223372036854775808";
909        let mut iter = json_token_iter(input);
910        match iter.next() {
911            Some(Ok(Token::ValueNumber {
912                value: Number::NegInt(n),
913                ..
914            })) => {
915                assert_eq!(n, i64::MIN);
916            }
917            other => panic!("Expected NegInt token, got {:?}", other),
918        }
919    }
920
921    #[test]
922    fn test_integer_boundaries() {
923        // Zero
924        let input = b"0";
925        let mut iter = json_token_iter(input);
926        match iter.next() {
927            Some(Ok(Token::ValueNumber {
928                value: Number::PosInt(0),
929                ..
930            })) => {}
931            other => panic!("Expected PosInt(0), got {:?}", other),
932        }
933
934        // Regular negative number
935        let input = b"-123";
936        let mut iter = json_token_iter(input);
937        match iter.next() {
938            Some(Ok(Token::ValueNumber {
939                value: Number::NegInt(-123),
940                ..
941            })) => {}
942            other => panic!("Expected NegInt(-123), got {:?}", other),
943        }
944
945        // i64::MAX (largest positive i64)
946        let input = b"9223372036854775807";
947        let mut iter = json_token_iter(input);
948        match iter.next() {
949            Some(Ok(Token::ValueNumber {
950                value: Number::PosInt(n),
951                ..
952            })) => {
953                assert_eq!(n, i64::MAX as u64);
954            }
955            other => panic!("Expected PosInt(i64::MAX), got {:?}", other),
956        }
957
958        // i64::MIN + 1 (edge case for negative range check)
959        let input = b"-9223372036854775807";
960        let mut iter = json_token_iter(input);
961        match iter.next() {
962            Some(Ok(Token::ValueNumber {
963                value: Number::NegInt(n),
964                ..
965            })) => {
966                assert_eq!(n, i64::MIN + 1);
967            }
968            other => panic!("Expected NegInt(i64::MIN + 1), got {:?}", other),
969        }
970
971        // u64::MAX (fits in u64, should be PosInt)
972        let input = b"18446744073709551615";
973        let mut iter = json_token_iter(input);
974        match iter.next() {
975            Some(Ok(Token::ValueNumber {
976                value: Number::PosInt(n),
977                ..
978            })) => {
979                assert_eq!(n, u64::MAX);
980            }
981            other => panic!("Expected PosInt(u64::MAX), got {:?}", other),
982        }
983    }
984
985    #[cfg(test)]
986    mod proptest_tests {
987        use super::*;
988
989        proptest! {
990            #[test]
991            fn positive_integers_within_u64_parse_as_posint(n in 0u64..=u64::MAX) {
992                let input = n.to_string();
993                let input_bytes = input.as_bytes();
994                let mut iter = json_token_iter(input_bytes);
995
996                match iter.next() {
997                    Some(Ok(Token::ValueNumber { value: Number::PosInt(parsed), .. })) => {
998                        prop_assert_eq!(parsed, n);
999                    }
1000                    other => {
1001                        return Err(proptest::test_runner::TestCaseError::fail(
1002                            format!("Expected PosInt({}), got {:?}", n, other)
1003                        ));
1004                    }
1005                }
1006            }
1007
1008            #[test]
1009            fn negative_integers_within_i64_parse_as_negint(n in i64::MIN..=i64::MAX) {
1010                if n >= 0 {
1011                    return Ok(());
1012                }
1013
1014                let input = n.to_string();
1015                let input_bytes = input.as_bytes();
1016                let mut iter = json_token_iter(input_bytes);
1017
1018                match iter.next() {
1019                    Some(Ok(Token::ValueNumber { value: Number::NegInt(parsed), .. })) => {
1020                        prop_assert_eq!(parsed, n);
1021                    }
1022                    other => {
1023                        return Err(proptest::test_runner::TestCaseError::fail(
1024                            format!("Expected NegInt({}), got {:?}", n, other)
1025                        ));
1026                    }
1027                }
1028            }
1029
1030            #[test]
1031            fn large_integers_overflow_to_float(
1032                // u64::MAX = 18_446_744_073_709_551_615 (20 digits)
1033                // Generate numbers with 21+ digits to guarantee overflow
1034                num_str in "1[0-9]{20,49}"
1035            ) {
1036                let input_bytes = num_str.as_bytes();
1037                let mut iter = json_token_iter(input_bytes);
1038
1039                match iter.next() {
1040                    Some(Ok(Token::ValueNumber { value: Number::Float(f), .. })) => {
1041                        prop_assert!(f.is_finite());
1042                        prop_assert!(f > 0.0);
1043                    }
1044                    other => {
1045                        return Err(proptest::test_runner::TestCaseError::fail(
1046                            format!("Expected Float for large number, got {:?}", other)
1047                        ));
1048                    }
1049                }
1050
1051                // Validate expect_number_as_string_or_null extracts the correct string
1052                let mut iter = json_token_iter(input_bytes);
1053                let result = expect_number_as_string_or_null(iter.next(), input_bytes)?;
1054                prop_assert_eq!(result, Some(num_str.as_str()));
1055            }
1056        }
1057    }
1058}