aws_smithy_json/deserialize/
token.rs

1/*
2 * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
3 * SPDX-License-Identifier: Apache-2.0
4 */
5
6use crate::deserialize::error::DeserializeError as Error;
7use crate::deserialize::must_not_be_finite;
8use crate::escape::unescape_string;
9pub use crate::escape::EscapeError;
10use aws_smithy_types::date_time::Format;
11use aws_smithy_types::primitive::Parse;
12use aws_smithy_types::{base64, Blob, DateTime, Document, Number};
13use std::borrow::Cow;
14use std::collections::HashMap;
15use std::iter::Peekable;
16
17/// New-type around `&str` that indicates the string is an escaped JSON string.
18/// Provides functions for retrieving the string in either form.
19#[derive(Debug, PartialEq, Eq, Copy, Clone)]
20pub struct EscapedStr<'a>(&'a str);
21
22impl<'a> EscapedStr<'a> {
23    pub fn new(value: &'a str) -> EscapedStr<'a> {
24        EscapedStr(value)
25    }
26
27    /// Returns the escaped string value
28    pub fn as_escaped_str(&self) -> &'a str {
29        self.0
30    }
31
32    /// Unescapes the string and returns it.
33    /// If the string doesn't need unescaping, it will be returned directly.
34    pub fn to_unescaped(self) -> Result<Cow<'a, str>, EscapeError> {
35        unescape_string(self.0)
36    }
37}
38
39/// Represents the location of a token
40#[derive(Debug, Eq, PartialEq, Copy, Clone)]
41pub struct Offset(pub usize);
42
43impl Offset {
44    /// Creates a custom error from the offset
45    pub fn error(&self, msg: Cow<'static, str>) -> Error {
46        Error::custom(msg).with_offset(self.0)
47    }
48}
49
50/// Enum representing the different JSON tokens that can be returned by
51/// [`crate::deserialize::json_token_iter`].
52#[derive(Debug, PartialEq)]
53pub enum Token<'a> {
54    StartArray {
55        offset: Offset,
56    },
57    EndArray {
58        offset: Offset,
59    },
60    ObjectKey {
61        offset: Offset,
62        key: EscapedStr<'a>,
63    },
64    StartObject {
65        offset: Offset,
66    },
67    EndObject {
68        offset: Offset,
69    },
70    ValueBool {
71        offset: Offset,
72        value: bool,
73    },
74    ValueNull {
75        offset: Offset,
76    },
77    ValueNumber {
78        offset: Offset,
79        value: Number,
80    },
81    ValueString {
82        offset: Offset,
83        value: EscapedStr<'a>,
84    },
85}
86
87impl Token<'_> {
88    pub fn offset(&self) -> Offset {
89        use Token::*;
90        *match self {
91            StartArray { offset } => offset,
92            EndArray { offset } => offset,
93            ObjectKey { offset, .. } => offset,
94            StartObject { offset } => offset,
95            EndObject { offset } => offset,
96            ValueBool { offset, .. } => offset,
97            ValueNull { offset } => offset,
98            ValueNumber { offset, .. } => offset,
99            ValueString { offset, .. } => offset,
100        }
101    }
102
103    /// Builds an error from the token's offset
104    pub fn error(&self, msg: Cow<'static, str>) -> Error {
105        self.offset().error(msg)
106    }
107}
108
109macro_rules! expect_fn {
110    ($name:ident, $token:ident, $doc:tt) => {
111        #[doc=$doc]
112        pub fn $name(token_result: Option<Result<Token<'_>, Error>>) -> Result<(), Error> {
113            match token_result.transpose()? {
114                Some(Token::$token { .. }) => Ok(()),
115                Some(token) => {
116                    Err(token.error(Cow::Borrowed(concat!("expected ", stringify!($token)))))
117                }
118                None => Err(Error::custom(concat!("expected ", stringify!($token)))),
119            }
120        }
121    };
122}
123
124expect_fn!(
125    expect_start_object,
126    StartObject,
127    "Expects a [Token::StartObject] token and returns an error if it's not present."
128);
129expect_fn!(
130    expect_start_array,
131    StartArray,
132    "Expects a [Token::StartArray] token and returns an error if it's not present."
133);
134
135macro_rules! expect_value_or_null_fn {
136    ($name:ident, $token:ident, $typ:ident, $doc:tt) => {
137        #[doc=$doc]
138        #[allow(unknown_lints)]
139        #[allow(mismatched_lifetime_syntaxes)]
140        pub fn $name(token: Option<Result<Token<'_>, Error>>) -> Result<Option<$typ>, Error> {
141            match token.transpose()? {
142                Some(Token::ValueNull { .. }) => Ok(None),
143                Some(Token::$token { value, .. }) => Ok(Some(value)),
144                _ => Err(Error::custom(concat!(
145                    "expected ",
146                    stringify!($token),
147                    " or ValueNull"
148                ))),
149            }
150        }
151    };
152}
153
154expect_value_or_null_fn!(expect_bool_or_null, ValueBool, bool, "Expects a [Token::ValueBool] or [Token::ValueNull], and returns the bool value if it's not null.");
155expect_value_or_null_fn!(expect_string_or_null, ValueString, EscapedStr, "Expects a [Token::ValueString] or [Token::ValueNull], and returns the [EscapedStr] value if it's not null.");
156
157/// Expects a [Token::ValueString], [Token::ValueNumber] or [Token::ValueNull].
158///
159/// If the value is a string, it MUST be `Infinity`, `-Infinity` or `Nan`.
160/// If the value is a number, it is returned directly
161pub fn expect_number_or_null(
162    token: Option<Result<Token<'_>, Error>>,
163) -> Result<Option<Number>, Error> {
164    match token.transpose()? {
165        Some(Token::ValueNull { .. }) => Ok(None),
166        Some(Token::ValueNumber { value, .. }) => Ok(Some(value)),
167        Some(Token::ValueString { value, offset }) => match value.to_unescaped() {
168            Err(err) => Err(Error::custom_source( "expected a valid string, escape was invalid", err).with_offset(offset.0)),
169            Ok(v) => f64::parse_smithy_primitive(v.as_ref())
170                // disregard the exact error
171                .map_err(|_|())
172                // only infinite / NaN can be used as strings
173                .and_then(must_not_be_finite)
174                .map(|float| Some(aws_smithy_types::Number::Float(float)))
175                // convert to a helpful error
176                .map_err(|_| {
177                    Error::custom(
178                        format!(
179                        "only `Infinity`, `-Infinity`, `NaN` can represent a float as a string but found `{v}`"
180                    )).with_offset(offset.0)
181                }),
182        },
183        _ => Err(Error::custom(
184            "expected ValueString, ValueNumber, or ValueNull",
185        )),
186    }
187}
188
189/// Expects a [Token::ValueNumber] or [Token::ValueNull], and returns the number as a string
190/// to preserve arbitrary precision.
191///
192/// This function extracts the raw JSON number string without converting it to u64/i64/f64,
193/// which would cause precision loss for numbers larger than those types can represent.
194/// This is essential for BigInteger and BigDecimal support.
195///
196/// # Arguments
197/// * `token` - The token to extract the number from
198/// * `input` - The original JSON input bytes (needed to extract the raw number string)
199///
200/// # Returns
201/// * `Ok(Some(string))` - The number as a string slice
202/// * `Ok(None)` - If the token is null
203/// * `Err` - If the token is not a number or null
204pub fn expect_number_as_string_or_null<'a>(
205    token: Option<Result<Token<'a>, Error>>,
206    input: &'a [u8],
207) -> Result<Option<&'a str>, Error> {
208    match token.transpose()? {
209        Some(Token::ValueNull { .. }) => Ok(None),
210        Some(Token::ValueNumber { offset, .. }) => {
211            let start = offset.0;
212            let mut end = start;
213
214            // Skip optional minus sign
215            if end < input.len() && input[end] == b'-' {
216                end += 1;
217            }
218
219            // Scan digits, decimal point, exponent
220            while end < input.len() {
221                match input[end] {
222                    b'0'..=b'9' | b'.' | b'e' | b'E' | b'+' | b'-' => end += 1,
223                    _ => break,
224                }
225            }
226
227            let number_slice = &input[start..end];
228            let number_str = std::str::from_utf8(number_slice)
229                .map_err(|_| Error::custom("invalid UTF-8 in number"))?;
230            Ok(Some(number_str))
231        }
232        _ => Err(Error::custom("expected ValueNumber or ValueNull")),
233    }
234}
235
236/// Expects a [Token::ValueString] or [Token::ValueNull]. If the value is a string, it interprets it as a base64 encoded [Blob] value.
237pub fn expect_blob_or_null(token: Option<Result<Token<'_>, Error>>) -> Result<Option<Blob>, Error> {
238    Ok(match expect_string_or_null(token)? {
239        Some(value) => Some(Blob::new(
240            base64::decode(value.as_escaped_str())
241                .map_err(|err| Error::custom_source("failed to decode base64", err))?,
242        )),
243        None => None,
244    })
245}
246
247/// Expects a [Token::ValueNull], [Token::ValueString], or [Token::ValueNumber] depending
248/// on the passed in `timestamp_format`. If there is a non-null value, it interprets it as an
249/// [`DateTime` ] in the requested format.
250pub fn expect_timestamp_or_null(
251    token: Option<Result<Token<'_>, Error>>,
252    timestamp_format: Format,
253) -> Result<Option<DateTime>, Error> {
254    Ok(match timestamp_format {
255        Format::EpochSeconds => expect_number_or_null(token)?
256            .map(|v| v.to_f64_lossy())
257            .map(|v| {
258                if v.is_nan() {
259                    Err(Error::custom("NaN is not a valid epoch"))
260                } else if v.is_infinite() {
261                    Err(Error::custom("infinity is not a valid epoch"))
262                } else {
263                    Ok(DateTime::from_secs_f64(v))
264                }
265            })
266            .transpose()?,
267        Format::DateTime | Format::HttpDate | Format::DateTimeWithOffset => {
268            expect_string_or_null(token)?
269                .map(|v| DateTime::from_str(v.as_escaped_str(), timestamp_format))
270                .transpose()
271                .map_err(|err| Error::custom_source("failed to parse timestamp", err))?
272        }
273    })
274}
275
276/// Expects and parses a complete document value.
277pub fn expect_document<'a, I>(tokens: &mut Peekable<I>) -> Result<Document, Error>
278where
279    I: Iterator<Item = Result<Token<'a>, Error>>,
280{
281    expect_document_inner(tokens, 0)
282}
283
284const MAX_DOCUMENT_RECURSION: usize = 256;
285
286fn expect_document_inner<'a, I>(tokens: &mut Peekable<I>, depth: usize) -> Result<Document, Error>
287where
288    I: Iterator<Item = Result<Token<'a>, Error>>,
289{
290    if depth >= MAX_DOCUMENT_RECURSION {
291        return Err(Error::custom(
292            "exceeded max recursion depth while parsing document",
293        ));
294    }
295    match tokens.next().transpose()? {
296        Some(Token::ValueNull { .. }) => Ok(Document::Null),
297        Some(Token::ValueBool { value, .. }) => Ok(Document::Bool(value)),
298        Some(Token::ValueNumber { value, .. }) => Ok(Document::Number(value)),
299        Some(Token::ValueString { value, .. }) => {
300            Ok(Document::String(value.to_unescaped()?.into_owned()))
301        }
302        Some(Token::StartObject { .. }) => {
303            let mut object = HashMap::new();
304            loop {
305                match tokens.next().transpose()? {
306                    Some(Token::EndObject { .. }) => break,
307                    Some(Token::ObjectKey { key, .. }) => {
308                        let key = key.to_unescaped()?.into_owned();
309                        let value = expect_document_inner(tokens, depth + 1)?;
310                        object.insert(key, value);
311                    }
312                    _ => return Err(Error::custom("expected object key or end object")),
313                }
314            }
315            Ok(Document::Object(object))
316        }
317        Some(Token::StartArray { .. }) => {
318            let mut array = Vec::new();
319            loop {
320                match tokens.peek() {
321                    Some(Ok(Token::EndArray { .. })) => {
322                        tokens.next().transpose().unwrap();
323                        break;
324                    }
325                    _ => array.push(expect_document_inner(tokens, depth + 1)?),
326                }
327            }
328            Ok(Document::Array(array))
329        }
330        Some(Token::EndObject { .. }) | Some(Token::ObjectKey { .. }) => {
331            unreachable!("end object and object key are handled in start object")
332        }
333        Some(Token::EndArray { .. }) => unreachable!("end array is handled in start array"),
334        None => Err(Error::custom("expected value")),
335    }
336}
337
338/// Skips an entire value in the token stream. Errors if it isn't a value.
339pub fn skip_value<'a>(
340    tokens: &mut impl Iterator<Item = Result<Token<'a>, Error>>,
341) -> Result<(), Error> {
342    skip_inner(0, tokens)
343}
344
345/// Assumes a start object/array token has already been consumed and skips tokens until
346/// until its corresponding end object/array token is found.
347pub fn skip_to_end<'a>(
348    tokens: &mut impl Iterator<Item = Result<Token<'a>, Error>>,
349) -> Result<(), Error> {
350    skip_inner(1, tokens)
351}
352
353fn skip_inner<'a>(
354    depth: isize,
355    tokens: &mut impl Iterator<Item = Result<Token<'a>, Error>>,
356) -> Result<(), Error> {
357    loop {
358        match tokens.next().transpose()? {
359            Some(Token::StartObject { .. }) | Some(Token::StartArray { .. }) => {
360                skip_inner(depth + 1, tokens)?;
361                if depth == 0 {
362                    break;
363                }
364            }
365            Some(Token::EndObject { .. }) | Some(Token::EndArray { .. }) => {
366                debug_assert!(depth > 0);
367                break;
368            }
369            Some(Token::ValueNull { .. })
370            | Some(Token::ValueBool { .. })
371            | Some(Token::ValueNumber { .. })
372            | Some(Token::ValueString { .. }) => {
373                if depth == 0 {
374                    break;
375                }
376            }
377            Some(Token::ObjectKey { .. }) => {}
378            _ => return Err(Error::custom("expected value")),
379        }
380    }
381    Ok(())
382}
383
384#[cfg(test)]
385pub mod test {
386    use super::*;
387    use crate::deserialize::error::DeserializeErrorKind as ErrorKind;
388    use crate::deserialize::error::DeserializeErrorKind::UnexpectedToken;
389    use crate::deserialize::json_token_iter;
390
391    pub fn start_array<'a>(offset: usize) -> Option<Result<Token<'a>, Error>> {
392        Some(Ok(Token::StartArray {
393            offset: Offset(offset),
394        }))
395    }
396
397    pub fn end_array<'a>(offset: usize) -> Option<Result<Token<'a>, Error>> {
398        Some(Ok(Token::EndArray {
399            offset: Offset(offset),
400        }))
401    }
402
403    pub fn start_object<'a>(offset: usize) -> Option<Result<Token<'a>, Error>> {
404        Some(Ok(Token::StartObject {
405            offset: Offset(offset),
406        }))
407    }
408
409    pub fn end_object<'a>(offset: usize) -> Option<Result<Token<'a>, Error>> {
410        Some(Ok(Token::EndObject {
411            offset: Offset(offset),
412        }))
413    }
414
415    pub fn object_key(offset: usize, key: &str) -> Option<Result<Token<'_>, Error>> {
416        Some(Ok(Token::ObjectKey {
417            offset: Offset(offset),
418            key: EscapedStr::new(key),
419        }))
420    }
421
422    pub fn value_bool<'a>(offset: usize, boolean: bool) -> Option<Result<Token<'a>, Error>> {
423        Some(Ok(Token::ValueBool {
424            offset: Offset(offset),
425            value: boolean,
426        }))
427    }
428
429    pub fn value_number<'a>(offset: usize, number: Number) -> Option<Result<Token<'a>, Error>> {
430        Some(Ok(Token::ValueNumber {
431            offset: Offset(offset),
432            value: number,
433        }))
434    }
435
436    pub fn value_null<'a>(offset: usize) -> Option<Result<Token<'a>, Error>> {
437        Some(Ok(Token::ValueNull {
438            offset: Offset(offset),
439        }))
440    }
441
442    pub fn value_string(offset: usize, string: &str) -> Option<Result<Token<'_>, Error>> {
443        Some(Ok(Token::ValueString {
444            offset: Offset(offset),
445            value: EscapedStr::new(string),
446        }))
447    }
448
449    #[track_caller]
450    fn expect_err_custom<T>(message: &str, offset: Option<usize>, result: Result<T, Error>) {
451        let err = result.err().expect("expected error");
452        let (actual_message, actual_offset) = match &err.kind {
453            ErrorKind::Custom { message, .. } => (message.as_ref(), err.offset),
454            _ => panic!("expected ErrorKind::Custom, got {err:?}"),
455        };
456        assert_eq!((message, offset), (actual_message, actual_offset));
457    }
458
459    #[test]
460    fn skip_simple_value() {
461        let mut tokens = json_token_iter(b"null true");
462        skip_value(&mut tokens).unwrap();
463        assert!(matches!(
464            tokens.next(),
465            Some(Ok(Token::ValueBool { value: true, .. }))
466        ))
467    }
468
469    #[test]
470    fn skip_array() {
471        let mut tokens = json_token_iter(b"[1, 2, 3, 4] true");
472        skip_value(&mut tokens).unwrap();
473        assert!(matches!(
474            tokens.next(),
475            Some(Ok(Token::ValueBool { value: true, .. }))
476        ))
477    }
478
479    #[test]
480    fn skip_object() {
481        let mut tokens = json_token_iter(b"{\"one\": 5, \"two\": 3} true");
482        skip_value(&mut tokens).unwrap();
483        assert!(matches!(
484            tokens.next(),
485            Some(Ok(Token::ValueBool { value: true, .. }))
486        ))
487    }
488
489    #[test]
490    fn test_skip_to_end() {
491        let tokens = json_token_iter(b"{\"one\": { \"two\": [] }, \"three\":2 }");
492        let mut tokens = tokens.skip(2);
493        assert!(matches!(tokens.next(), Some(Ok(Token::StartObject { .. }))));
494        skip_to_end(&mut tokens).unwrap();
495        match tokens.next() {
496            Some(Ok(Token::ObjectKey { key, .. })) => {
497                assert_eq!("three", key.as_escaped_str());
498            }
499            _ => panic!("expected object key three"),
500        }
501    }
502
503    #[test]
504    fn test_non_finite_floats() {
505        let mut tokens = json_token_iter(b"inf");
506        tokens
507            .next()
508            .expect("there is a token")
509            .expect_err("but it is invalid, ensure that Rust float boundary cases don't parse");
510    }
511
512    #[test]
513    fn mismatched_braces() {
514        // The skip_value function doesn't need to explicitly handle these cases since
515        // token iterator's parser handles them. This test confirms that assumption.
516        assert!(matches!(
517            skip_value(&mut json_token_iter(br#"[{"foo": 5]}"#)),
518            Err(Error {
519                kind: UnexpectedToken(']', "'}', ','"),
520                offset: Some(10)
521            })
522        ));
523        assert!(matches!(
524            skip_value(&mut json_token_iter(br#"{"foo": 5]}"#)),
525            Err(Error {
526                kind: UnexpectedToken(']', "'}', ','"),
527                offset: Some(9)
528            })
529        ));
530        assert!(matches!(
531            skip_value(&mut json_token_iter(br#"[5,6}"#)),
532            Err(Error {
533                kind: UnexpectedToken('}', "']', ','"),
534                offset: Some(4)
535            })
536        ));
537    }
538
539    #[test]
540    fn skip_nested() {
541        let mut tokens = json_token_iter(
542            br#"
543            {"struct": {"foo": 5, "bar": 11, "arr": [1, 2, 3, {}, 5, []]},
544             "arr": [[], [[]], [{"arr":[]}]],
545             "simple": "foo"}
546            true
547        "#,
548        );
549        skip_value(&mut tokens).unwrap();
550        assert!(matches!(
551            tokens.next(),
552            Some(Ok(Token::ValueBool { value: true, .. }))
553        ))
554    }
555
556    #[test]
557    fn test_expect_start_object() {
558        expect_err_custom(
559            "expected StartObject",
560            Some(2),
561            expect_start_object(value_bool(2, true)),
562        );
563        assert!(expect_start_object(start_object(0)).is_ok());
564    }
565
566    #[test]
567    fn test_expect_start_array() {
568        expect_err_custom(
569            "expected StartArray",
570            Some(2),
571            expect_start_array(value_bool(2, true)),
572        );
573        assert!(expect_start_array(start_array(0)).is_ok());
574    }
575
576    #[test]
577    fn test_expect_string_or_null() {
578        assert_eq!(None, expect_string_or_null(value_null(0)).unwrap());
579        assert_eq!(
580            Some(EscapedStr("test\\n")),
581            expect_string_or_null(value_string(0, "test\\n")).unwrap()
582        );
583        expect_err_custom(
584            "expected ValueString or ValueNull",
585            None,
586            expect_string_or_null(value_bool(0, true)),
587        );
588    }
589
590    #[test]
591    fn test_expect_number_or_null() {
592        assert_eq!(None, expect_number_or_null(value_null(0)).unwrap());
593        assert_eq!(
594            Some(Number::PosInt(5)),
595            expect_number_or_null(value_number(0, Number::PosInt(5))).unwrap()
596        );
597        expect_err_custom(
598            "expected ValueString, ValueNumber, or ValueNull",
599            None,
600            expect_number_or_null(value_bool(0, true)),
601        );
602        assert_eq!(
603            Some(Number::Float(f64::INFINITY)),
604            expect_number_or_null(value_string(0, "Infinity")).unwrap()
605        );
606        expect_err_custom(
607            "only `Infinity`, `-Infinity`, `NaN` can represent a float as a string but found `123`",
608            Some(0),
609            expect_number_or_null(value_string(0, "123")),
610        );
611        match expect_number_or_null(value_string(0, "NaN")) {
612            Ok(Some(Number::Float(v))) if v.is_nan() => {
613                // ok
614            }
615            not_ok => {
616                panic!("expected nan, found: {not_ok:?}")
617            }
618        }
619    }
620
621    #[test]
622    fn test_expect_blob_or_null() {
623        assert_eq!(None, expect_blob_or_null(value_null(0)).unwrap());
624        assert_eq!(
625            Some(Blob::new(b"hello!".to_vec())),
626            expect_blob_or_null(value_string(0, "aGVsbG8h")).unwrap()
627        );
628        expect_err_custom(
629            "expected ValueString or ValueNull",
630            None,
631            expect_blob_or_null(value_bool(0, true)),
632        );
633    }
634
635    #[test]
636    fn test_expect_timestamp_or_null() {
637        assert_eq!(
638            None,
639            expect_timestamp_or_null(value_null(0), Format::HttpDate).unwrap()
640        );
641        for (invalid, display_name) in &[
642            ("NaN", "NaN"),
643            ("Infinity", "infinity"),
644            ("-Infinity", "infinity"),
645        ] {
646            expect_err_custom(
647                format!("{display_name} is not a valid epoch").as_str(),
648                None,
649                expect_timestamp_or_null(value_string(0, invalid), Format::EpochSeconds),
650            );
651        }
652        assert_eq!(
653            Some(DateTime::from_secs_f64(2048.0)),
654            expect_timestamp_or_null(value_number(0, Number::Float(2048.0)), Format::EpochSeconds)
655                .unwrap()
656        );
657        assert_eq!(
658            Some(DateTime::from_secs_f64(1445412480.0)),
659            expect_timestamp_or_null(
660                value_string(0, "Wed, 21 Oct 2015 07:28:00 GMT"),
661                Format::HttpDate
662            )
663            .unwrap()
664        );
665        assert_eq!(
666            Some(DateTime::from_secs_f64(1445412480.0)),
667            expect_timestamp_or_null(value_string(0, "2015-10-21T07:28:00Z"), Format::DateTime)
668                .unwrap()
669        );
670        expect_err_custom(
671                "only `Infinity`, `-Infinity`, `NaN` can represent a float as a string but found `wrong`",
672                Some(0),
673            expect_timestamp_or_null(value_string(0, "wrong"), Format::EpochSeconds)
674        );
675        expect_err_custom(
676            "expected ValueString or ValueNull",
677            None,
678            expect_timestamp_or_null(value_number(0, Number::Float(0.0)), Format::DateTime),
679        );
680    }
681
682    #[test]
683    fn test_expect_document() {
684        let test = |value| expect_document(&mut json_token_iter(value).peekable()).unwrap();
685        assert_eq!(Document::Null, test(b"null"));
686        assert_eq!(Document::Bool(true), test(b"true"));
687        assert_eq!(Document::Number(Number::Float(3.2)), test(b"3.2"));
688        assert_eq!(Document::String("Foo\nBar".into()), test(b"\"Foo\\nBar\""));
689        assert_eq!(Document::Array(Vec::new()), test(b"[]"));
690        assert_eq!(Document::Object(HashMap::new()), test(b"{}"));
691        assert_eq!(
692            Document::Array(vec![
693                Document::Number(Number::PosInt(1)),
694                Document::Bool(false),
695                Document::String("s".into()),
696                Document::Array(Vec::new()),
697                Document::Object(HashMap::new()),
698            ]),
699            test(b"[1,false,\"s\",[],{}]")
700        );
701        assert_eq!(
702            Document::Object(
703                vec![
704                    ("num".to_string(), Document::Number(Number::PosInt(1))),
705                    ("bool".to_string(), Document::Bool(true)),
706                    ("string".to_string(), Document::String("s".into())),
707                    (
708                        "array".to_string(),
709                        Document::Array(vec![
710                            Document::Object(
711                                vec![("foo".to_string(), Document::Bool(false))]
712                                    .into_iter()
713                                    .collect(),
714                            ),
715                            Document::Object(
716                                vec![("bar".to_string(), Document::Bool(true))]
717                                    .into_iter()
718                                    .collect(),
719                            ),
720                        ])
721                    ),
722                    (
723                        "nested".to_string(),
724                        Document::Object(
725                            vec![("test".to_string(), Document::Null),]
726                                .into_iter()
727                                .collect()
728                        )
729                    ),
730                ]
731                .into_iter()
732                .collect()
733            ),
734            test(
735                br#"
736                { "num": 1,
737                  "bool": true,
738                  "string": "s",
739                  "array":
740                      [{ "foo": false },
741                       { "bar": true }],
742                  "nested": { "test": null } }
743                "#
744            )
745        );
746    }
747
748    #[test]
749    fn test_document_recursion_limit() {
750        let mut value = String::new();
751        value.extend(std::iter::repeat_n('[', 300));
752        value.extend(std::iter::repeat_n(']', 300));
753        expect_err_custom(
754            "exceeded max recursion depth while parsing document",
755            None,
756            expect_document(&mut json_token_iter(value.as_bytes()).peekable()),
757        );
758
759        value = String::new();
760        value.extend(std::iter::repeat_n("{\"t\":", 300));
761        value.push('1');
762        value.extend(std::iter::repeat_n('}', 300));
763        expect_err_custom(
764            "exceeded max recursion depth while parsing document",
765            None,
766            expect_document(&mut json_token_iter(value.as_bytes()).peekable()),
767        );
768    }
769
770    #[test]
771    fn test_expect_number_as_string_preserves_precision() {
772        use crate::deserialize::json_token_iter;
773
774        // Test large integer that fits in u64 but would lose precision in f64
775        // f64 has 53 bits of precision, so numbers > 2^53 lose precision
776        let input = b"18450000000000000000"; // 2^53 + 1, loses precision in f64
777        let mut iter = json_token_iter(input);
778        let result = expect_number_as_string_or_null(iter.next(), input).unwrap();
779        assert_eq!(result, Some("18450000000000000000"));
780
781        // Test large negative integer
782        let input = b"-9007199254740993";
783        let mut iter = json_token_iter(input);
784        let result = expect_number_as_string_or_null(iter.next(), input).unwrap();
785        assert_eq!(result, Some("-9007199254740993"));
786
787        // Test decimal with many digits
788        let input = b"123456789.123456789";
789        let mut iter = json_token_iter(input);
790        let result = expect_number_as_string_or_null(iter.next(), input).unwrap();
791        assert_eq!(result, Some("123456789.123456789"));
792
793        // Test scientific notation
794        let input = b"1.23e+50";
795        let mut iter = json_token_iter(input);
796        let result = expect_number_as_string_or_null(iter.next(), input).unwrap();
797        assert_eq!(result, Some("1.23e+50"));
798
799        // Test negative scientific notation
800        let input = b"-1.23e-50";
801        let mut iter = json_token_iter(input);
802        let result = expect_number_as_string_or_null(iter.next(), input).unwrap();
803        assert_eq!(result, Some("-1.23e-50"));
804
805        // Test null
806        let input = b"null";
807        let mut iter = json_token_iter(input);
808        let result = expect_number_as_string_or_null(iter.next(), input).unwrap();
809        assert_eq!(result, None);
810
811        // Test small numbers still work
812        let input = b"42";
813        let mut iter = json_token_iter(input);
814        let result = expect_number_as_string_or_null(iter.next(), input).unwrap();
815        assert_eq!(result, Some("42"));
816
817        // Test zero
818        let input = b"0";
819        let mut iter = json_token_iter(input);
820        let result = expect_number_as_string_or_null(iter.next(), input).unwrap();
821        assert_eq!(result, Some("0"));
822    }
823
824    #[test]
825    fn test_expect_number_as_string_error_cases() {
826        use crate::deserialize::json_token_iter;
827
828        // Test error when token is a string (not a number)
829        let input = b"\"not a number\"";
830        let mut iter = json_token_iter(input);
831        let result = expect_number_as_string_or_null(iter.next(), input);
832        assert!(result.is_err());
833
834        // Test error when token is a boolean
835        let input = b"true";
836        let mut iter = json_token_iter(input);
837        let result = expect_number_as_string_or_null(iter.next(), input);
838        assert!(result.is_err());
839
840        // Test error when token is an object
841        let input = b"{}";
842        let mut iter = json_token_iter(input);
843        let result = expect_number_as_string_or_null(iter.next(), input);
844        assert!(result.is_err());
845
846        // Test error when token is an array
847        let input = b"[]";
848        let mut iter = json_token_iter(input);
849        let result = expect_number_as_string_or_null(iter.next(), input);
850        assert!(result.is_err());
851    }
852
853    // Property-based tests to validate with random inputs
854    mod proptest_tests {
855        use super::*;
856        use crate::deserialize::json_token_iter;
857        use proptest::prelude::*;
858
859        proptest! {
860            #[test]
861            fn extracted_large_integer_matches_input(
862                // Generate 20-100 digit numbers (way bigger than i64 max: 19 digits)
863                num_str in "[1-9][0-9]{19,99}"
864            ) {
865                let input_bytes = num_str.as_bytes();
866                let mut iter = json_token_iter(input_bytes);
867                let result = expect_number_as_string_or_null(iter.next(), input_bytes)?;
868
869                prop_assert_eq!(result, Some(num_str.as_str()));
870            }
871
872            #[test]
873            fn extracted_large_negative_integer_matches_input(
874                // Generate negative numbers with 20-100 digits
875                num_str in "-[1-9][0-9]{19,99}"
876            ) {
877                let input_bytes = num_str.as_bytes();
878                let mut iter = json_token_iter(input_bytes);
879                let result = expect_number_as_string_or_null(iter.next(), input_bytes)?;
880
881                prop_assert_eq!(result, Some(num_str.as_str()));
882            }
883
884            #[test]
885            fn extracted_scientific_notation_matches_input(
886                mantissa in -999999999i64..999999999i64,
887                exponent in -100i32..100i32
888            ) {
889                let input = format!("{}e{}", mantissa, exponent);
890                let input_bytes = input.as_bytes();
891
892                let mut iter = json_token_iter(input_bytes);
893                let result = expect_number_as_string_or_null(iter.next(), input_bytes)?;
894
895                prop_assert_eq!(result, Some(input.as_str()));
896            }
897
898            #[test]
899            fn null_always_returns_none(
900                // Generate random whitespace/formatting around null
901                prefix in "[ \t\n\r]*",
902                suffix in "[ \t\n\r]*"
903            ) {
904                let input = format!("{}null{}", prefix, suffix);
905                let input_bytes = input.as_bytes();
906
907                let mut iter = json_token_iter(input_bytes);
908                let result = expect_number_as_string_or_null(iter.next(), input_bytes)?;
909
910                prop_assert_eq!(result, None);
911            }
912        }
913    }
914}