aws_smithy_json/deserialize/
token.rs

1/*
2 * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
3 * SPDX-License-Identifier: Apache-2.0
4 */
5
6use crate::deserialize::error::DeserializeError as Error;
7use crate::deserialize::must_not_be_finite;
8use crate::escape::unescape_string;
9pub use crate::escape::EscapeError;
10use aws_smithy_types::date_time::Format;
11use aws_smithy_types::primitive::Parse;
12use aws_smithy_types::{base64, Blob, DateTime, Document, Number};
13use std::borrow::Cow;
14use std::collections::HashMap;
15use std::iter::Peekable;
16
17/// New-type around `&str` that indicates the string is an escaped JSON string.
18/// Provides functions for retrieving the string in either form.
19#[derive(Debug, PartialEq, Eq, Copy, Clone)]
20pub struct EscapedStr<'a>(&'a str);
21
22impl<'a> EscapedStr<'a> {
23    pub fn new(value: &'a str) -> EscapedStr<'a> {
24        EscapedStr(value)
25    }
26
27    /// Returns the escaped string value
28    pub fn as_escaped_str(&self) -> &'a str {
29        self.0
30    }
31
32    /// Unescapes the string and returns it.
33    /// If the string doesn't need unescaping, it will be returned directly.
34    pub fn to_unescaped(self) -> Result<Cow<'a, str>, EscapeError> {
35        unescape_string(self.0)
36    }
37}
38
39/// Represents the location of a token
40#[derive(Debug, Eq, PartialEq, Copy, Clone)]
41pub struct Offset(pub usize);
42
43impl Offset {
44    /// Creates a custom error from the offset
45    pub fn error(&self, msg: Cow<'static, str>) -> Error {
46        Error::custom(msg).with_offset(self.0)
47    }
48}
49
50/// Enum representing the different JSON tokens that can be returned by
51/// [`crate::deserialize::json_token_iter`].
52#[derive(Debug, PartialEq)]
53pub enum Token<'a> {
54    StartArray {
55        offset: Offset,
56    },
57    EndArray {
58        offset: Offset,
59    },
60    ObjectKey {
61        offset: Offset,
62        key: EscapedStr<'a>,
63    },
64    StartObject {
65        offset: Offset,
66    },
67    EndObject {
68        offset: Offset,
69    },
70    ValueBool {
71        offset: Offset,
72        value: bool,
73    },
74    ValueNull {
75        offset: Offset,
76    },
77    ValueNumber {
78        offset: Offset,
79        value: Number,
80    },
81    ValueString {
82        offset: Offset,
83        value: EscapedStr<'a>,
84    },
85}
86
87impl Token<'_> {
88    pub fn offset(&self) -> Offset {
89        use Token::*;
90        *match self {
91            StartArray { offset } => offset,
92            EndArray { offset } => offset,
93            ObjectKey { offset, .. } => offset,
94            StartObject { offset } => offset,
95            EndObject { offset } => offset,
96            ValueBool { offset, .. } => offset,
97            ValueNull { offset } => offset,
98            ValueNumber { offset, .. } => offset,
99            ValueString { offset, .. } => offset,
100        }
101    }
102
103    /// Builds an error from the token's offset
104    pub fn error(&self, msg: Cow<'static, str>) -> Error {
105        self.offset().error(msg)
106    }
107}
108
109macro_rules! expect_fn {
110    ($name:ident, $token:ident, $doc:tt) => {
111        #[doc=$doc]
112        pub fn $name(token_result: Option<Result<Token<'_>, Error>>) -> Result<(), Error> {
113            match token_result.transpose()? {
114                Some(Token::$token { .. }) => Ok(()),
115                Some(token) => {
116                    Err(token.error(Cow::Borrowed(concat!("expected ", stringify!($token)))))
117                }
118                None => Err(Error::custom(concat!("expected ", stringify!($token)))),
119            }
120        }
121    };
122}
123
124expect_fn!(
125    expect_start_object,
126    StartObject,
127    "Expects a [Token::StartObject] token and returns an error if it's not present."
128);
129expect_fn!(
130    expect_start_array,
131    StartArray,
132    "Expects a [Token::StartArray] token and returns an error if it's not present."
133);
134
135macro_rules! expect_value_or_null_fn {
136    ($name:ident, $token:ident, $typ:ident, $doc:tt) => {
137        #[doc=$doc]
138        #[allow(unknown_lints)]
139        #[allow(mismatched_lifetime_syntaxes)]
140        pub fn $name(token: Option<Result<Token<'_>, Error>>) -> Result<Option<$typ>, Error> {
141            match token.transpose()? {
142                Some(Token::ValueNull { .. }) => Ok(None),
143                Some(Token::$token { value, .. }) => Ok(Some(value)),
144                _ => Err(Error::custom(concat!(
145                    "expected ",
146                    stringify!($token),
147                    " or ValueNull"
148                ))),
149            }
150        }
151    };
152}
153
154expect_value_or_null_fn!(expect_bool_or_null, ValueBool, bool, "Expects a [Token::ValueBool] or [Token::ValueNull], and returns the bool value if it's not null.");
155expect_value_or_null_fn!(expect_string_or_null, ValueString, EscapedStr, "Expects a [Token::ValueString] or [Token::ValueNull], and returns the [EscapedStr] value if it's not null.");
156
157/// Expects a [Token::ValueString], [Token::ValueNumber] or [Token::ValueNull].
158///
159/// If the value is a string, it MUST be `Infinity`, `-Infinity` or `Nan`.
160/// If the value is a number, it is returned directly
161pub fn expect_number_or_null(
162    token: Option<Result<Token<'_>, Error>>,
163) -> Result<Option<Number>, Error> {
164    match token.transpose()? {
165        Some(Token::ValueNull { .. }) => Ok(None),
166        Some(Token::ValueNumber { value, offset, .. }) => {
167            // Validate finite numbers - error on infinity/NaN
168            match value {
169                Number::Float(f) if !f.is_finite() => {
170                    Err(Error::custom("number must be finite").with_offset(offset.0))
171                }
172                _ => Ok(Some(value)),
173            }
174        }
175        Some(Token::ValueString { value, offset }) => match value.to_unescaped() {
176            Err(err) => Err(Error::custom_source( "expected a valid string, escape was invalid", err).with_offset(offset.0)),
177            Ok(v) => f64::parse_smithy_primitive(v.as_ref())
178                // disregard the exact error
179                .map_err(|_|())
180                // only infinite / NaN can be used as strings
181                .and_then(must_not_be_finite)
182                .map(|float| Some(aws_smithy_types::Number::Float(float)))
183                // convert to a helpful error
184                .map_err(|_| {
185                    Error::custom(
186                        format!(
187                        "only `Infinity`, `-Infinity`, `NaN` can represent a float as a string but found `{v}`"
188                    )).with_offset(offset.0)
189                }),
190        },
191        _ => Err(Error::custom(
192            "expected ValueString, ValueNumber, or ValueNull",
193        )),
194    }
195}
196
197/// Expects a [Token::ValueNumber] or [Token::ValueNull], and returns the number as a string
198/// to preserve arbitrary precision.
199///
200/// This function extracts the raw JSON number string without converting it to u64/i64/f64,
201/// which would cause precision loss for numbers larger than those types can represent.
202/// This is essential for BigInteger and BigDecimal support.
203///
204/// # Arguments
205/// * `token` - The token to extract the number from
206/// * `input` - The original JSON input bytes (needed to extract the raw number string)
207///
208/// # Returns
209/// * `Ok(Some(string))` - The number as a string slice
210/// * `Ok(None)` - If the token is null
211/// * `Err` - If the token is not a number or null
212pub fn expect_number_as_string_or_null<'a>(
213    token: Option<Result<Token<'a>, Error>>,
214    input: &'a [u8],
215) -> Result<Option<&'a str>, Error> {
216    match token.transpose()? {
217        Some(Token::ValueNull { .. }) => Ok(None),
218        Some(Token::ValueNumber { offset, .. }) => {
219            let start = offset.0;
220            let mut end = start;
221
222            // Skip optional minus sign
223            if end < input.len() && input[end] == b'-' {
224                end += 1;
225            }
226
227            // Scan digits, decimal point, exponent
228            while end < input.len() {
229                match input[end] {
230                    b'0'..=b'9' | b'.' | b'e' | b'E' | b'+' | b'-' => end += 1,
231                    _ => break,
232                }
233            }
234
235            let number_slice = &input[start..end];
236            let number_str = std::str::from_utf8(number_slice)
237                .map_err(|_| Error::custom("invalid UTF-8 in number"))?;
238            Ok(Some(number_str))
239        }
240        _ => Err(Error::custom("expected ValueNumber or ValueNull")),
241    }
242}
243
244/// Expects a [Token::ValueString] or [Token::ValueNull]. If the value is a string, it interprets it as a base64 encoded [Blob] value.
245pub fn expect_blob_or_null(token: Option<Result<Token<'_>, Error>>) -> Result<Option<Blob>, Error> {
246    Ok(match expect_string_or_null(token)? {
247        Some(value) => Some(Blob::new(
248            base64::decode(value.as_escaped_str())
249                .map_err(|err| Error::custom_source("failed to decode base64", err))?,
250        )),
251        None => None,
252    })
253}
254
255/// Expects a [Token::ValueNull], [Token::ValueString], or [Token::ValueNumber] depending
256/// on the passed in `timestamp_format`. If there is a non-null value, it interprets it as an
257/// [`DateTime` ] in the requested format.
258pub fn expect_timestamp_or_null(
259    token: Option<Result<Token<'_>, Error>>,
260    timestamp_format: Format,
261) -> Result<Option<DateTime>, Error> {
262    Ok(match timestamp_format {
263        Format::EpochSeconds => expect_number_or_null(token)?
264            .map(|v| v.to_f64_lossy())
265            .map(|v| {
266                if v.is_nan() {
267                    Err(Error::custom("NaN is not a valid epoch"))
268                } else if v.is_infinite() {
269                    Err(Error::custom("infinity is not a valid epoch"))
270                } else {
271                    Ok(DateTime::from_secs_f64(v))
272                }
273            })
274            .transpose()?,
275        Format::DateTime | Format::HttpDate | Format::DateTimeWithOffset => {
276            expect_string_or_null(token)?
277                .map(|v| DateTime::from_str(v.as_escaped_str(), timestamp_format))
278                .transpose()
279                .map_err(|err| Error::custom_source("failed to parse timestamp", err))?
280        }
281    })
282}
283
284/// Expects and parses a complete document value.
285pub fn expect_document<'a, I>(tokens: &mut Peekable<I>) -> Result<Document, Error>
286where
287    I: Iterator<Item = Result<Token<'a>, Error>>,
288{
289    expect_document_inner(tokens, 0)
290}
291
292const MAX_DOCUMENT_RECURSION: usize = 256;
293
294fn expect_document_inner<'a, I>(tokens: &mut Peekable<I>, depth: usize) -> Result<Document, Error>
295where
296    I: Iterator<Item = Result<Token<'a>, Error>>,
297{
298    if depth >= MAX_DOCUMENT_RECURSION {
299        return Err(Error::custom(
300            "exceeded max recursion depth while parsing document",
301        ));
302    }
303    match tokens.next().transpose()? {
304        Some(Token::ValueNull { .. }) => Ok(Document::Null),
305        Some(Token::ValueBool { value, .. }) => Ok(Document::Bool(value)),
306        Some(Token::ValueNumber { value, .. }) => Ok(Document::Number(value)),
307        Some(Token::ValueString { value, .. }) => {
308            Ok(Document::String(value.to_unescaped()?.into_owned()))
309        }
310        Some(Token::StartObject { .. }) => {
311            let mut object = HashMap::new();
312            loop {
313                match tokens.next().transpose()? {
314                    Some(Token::EndObject { .. }) => break,
315                    Some(Token::ObjectKey { key, .. }) => {
316                        let key = key.to_unescaped()?.into_owned();
317                        let value = expect_document_inner(tokens, depth + 1)?;
318                        object.insert(key, value);
319                    }
320                    _ => return Err(Error::custom("expected object key or end object")),
321                }
322            }
323            Ok(Document::Object(object))
324        }
325        Some(Token::StartArray { .. }) => {
326            let mut array = Vec::new();
327            loop {
328                match tokens.peek() {
329                    Some(Ok(Token::EndArray { .. })) => {
330                        tokens.next().transpose().unwrap();
331                        break;
332                    }
333                    _ => array.push(expect_document_inner(tokens, depth + 1)?),
334                }
335            }
336            Ok(Document::Array(array))
337        }
338        Some(Token::EndObject { .. }) | Some(Token::ObjectKey { .. }) => {
339            unreachable!("end object and object key are handled in start object")
340        }
341        Some(Token::EndArray { .. }) => unreachable!("end array is handled in start array"),
342        None => Err(Error::custom("expected value")),
343    }
344}
345
346/// Skips an entire value in the token stream. Errors if it isn't a value.
347pub fn skip_value<'a>(
348    tokens: &mut impl Iterator<Item = Result<Token<'a>, Error>>,
349) -> Result<(), Error> {
350    skip_inner(0, tokens)
351}
352
353/// Assumes a start object/array token has already been consumed and skips tokens until
354/// until its corresponding end object/array token is found.
355pub fn skip_to_end<'a>(
356    tokens: &mut impl Iterator<Item = Result<Token<'a>, Error>>,
357) -> Result<(), Error> {
358    skip_inner(1, tokens)
359}
360
361fn skip_inner<'a>(
362    depth: isize,
363    tokens: &mut impl Iterator<Item = Result<Token<'a>, Error>>,
364) -> Result<(), Error> {
365    loop {
366        match tokens.next().transpose()? {
367            Some(Token::StartObject { .. }) | Some(Token::StartArray { .. }) => {
368                skip_inner(depth + 1, tokens)?;
369                if depth == 0 {
370                    break;
371                }
372            }
373            Some(Token::EndObject { .. }) | Some(Token::EndArray { .. }) => {
374                debug_assert!(depth > 0);
375                break;
376            }
377            Some(Token::ValueNull { .. })
378            | Some(Token::ValueBool { .. })
379            | Some(Token::ValueNumber { .. })
380            | Some(Token::ValueString { .. }) => {
381                if depth == 0 {
382                    break;
383                }
384            }
385            Some(Token::ObjectKey { .. }) => {}
386            _ => return Err(Error::custom("expected value")),
387        }
388    }
389    Ok(())
390}
391
392#[cfg(test)]
393pub mod test {
394    use super::*;
395    use crate::deserialize::error::DeserializeErrorKind as ErrorKind;
396    use crate::deserialize::error::DeserializeErrorKind::UnexpectedToken;
397    use crate::deserialize::json_token_iter;
398
399    pub fn start_array<'a>(offset: usize) -> Option<Result<Token<'a>, Error>> {
400        Some(Ok(Token::StartArray {
401            offset: Offset(offset),
402        }))
403    }
404
405    pub fn end_array<'a>(offset: usize) -> Option<Result<Token<'a>, Error>> {
406        Some(Ok(Token::EndArray {
407            offset: Offset(offset),
408        }))
409    }
410
411    pub fn start_object<'a>(offset: usize) -> Option<Result<Token<'a>, Error>> {
412        Some(Ok(Token::StartObject {
413            offset: Offset(offset),
414        }))
415    }
416
417    pub fn end_object<'a>(offset: usize) -> Option<Result<Token<'a>, Error>> {
418        Some(Ok(Token::EndObject {
419            offset: Offset(offset),
420        }))
421    }
422
423    pub fn object_key(offset: usize, key: &str) -> Option<Result<Token<'_>, Error>> {
424        Some(Ok(Token::ObjectKey {
425            offset: Offset(offset),
426            key: EscapedStr::new(key),
427        }))
428    }
429
430    pub fn value_bool<'a>(offset: usize, boolean: bool) -> Option<Result<Token<'a>, Error>> {
431        Some(Ok(Token::ValueBool {
432            offset: Offset(offset),
433            value: boolean,
434        }))
435    }
436
437    pub fn value_number<'a>(offset: usize, number: Number) -> Option<Result<Token<'a>, Error>> {
438        Some(Ok(Token::ValueNumber {
439            offset: Offset(offset),
440            value: number,
441        }))
442    }
443
444    pub fn value_null<'a>(offset: usize) -> Option<Result<Token<'a>, Error>> {
445        Some(Ok(Token::ValueNull {
446            offset: Offset(offset),
447        }))
448    }
449
450    pub fn value_string(offset: usize, string: &str) -> Option<Result<Token<'_>, Error>> {
451        Some(Ok(Token::ValueString {
452            offset: Offset(offset),
453            value: EscapedStr::new(string),
454        }))
455    }
456
457    #[track_caller]
458    fn expect_err_custom<T>(message: &str, offset: Option<usize>, result: Result<T, Error>) {
459        let err = result.err().expect("expected error");
460        let (actual_message, actual_offset) = match &err.kind {
461            ErrorKind::Custom { message, .. } => (message.as_ref(), err.offset),
462            _ => panic!("expected ErrorKind::Custom, got {err:?}"),
463        };
464        assert_eq!((message, offset), (actual_message, actual_offset));
465    }
466
467    #[test]
468    fn skip_simple_value() {
469        let mut tokens = json_token_iter(b"null true");
470        skip_value(&mut tokens).unwrap();
471        assert!(matches!(
472            tokens.next(),
473            Some(Ok(Token::ValueBool { value: true, .. }))
474        ))
475    }
476
477    #[test]
478    fn skip_array() {
479        let mut tokens = json_token_iter(b"[1, 2, 3, 4] true");
480        skip_value(&mut tokens).unwrap();
481        assert!(matches!(
482            tokens.next(),
483            Some(Ok(Token::ValueBool { value: true, .. }))
484        ))
485    }
486
487    #[test]
488    fn skip_object() {
489        let mut tokens = json_token_iter(b"{\"one\": 5, \"two\": 3} true");
490        skip_value(&mut tokens).unwrap();
491        assert!(matches!(
492            tokens.next(),
493            Some(Ok(Token::ValueBool { value: true, .. }))
494        ))
495    }
496
497    #[test]
498    fn test_skip_to_end() {
499        let tokens = json_token_iter(b"{\"one\": { \"two\": [] }, \"three\":2 }");
500        let mut tokens = tokens.skip(2);
501        assert!(matches!(tokens.next(), Some(Ok(Token::StartObject { .. }))));
502        skip_to_end(&mut tokens).unwrap();
503        match tokens.next() {
504            Some(Ok(Token::ObjectKey { key, .. })) => {
505                assert_eq!("three", key.as_escaped_str());
506            }
507            _ => panic!("expected object key three"),
508        }
509    }
510
511    #[test]
512    fn test_non_finite_floats() {
513        let mut tokens = json_token_iter(b"inf");
514        tokens
515            .next()
516            .expect("there is a token")
517            .expect_err("but it is invalid, ensure that Rust float boundary cases don't parse");
518    }
519
520    #[test]
521    fn mismatched_braces() {
522        // The skip_value function doesn't need to explicitly handle these cases since
523        // token iterator's parser handles them. This test confirms that assumption.
524        assert!(matches!(
525            skip_value(&mut json_token_iter(br#"[{"foo": 5]}"#)),
526            Err(Error {
527                kind: UnexpectedToken(']', "'}', ','"),
528                offset: Some(10)
529            })
530        ));
531        assert!(matches!(
532            skip_value(&mut json_token_iter(br#"{"foo": 5]}"#)),
533            Err(Error {
534                kind: UnexpectedToken(']', "'}', ','"),
535                offset: Some(9)
536            })
537        ));
538        assert!(matches!(
539            skip_value(&mut json_token_iter(br#"[5,6}"#)),
540            Err(Error {
541                kind: UnexpectedToken('}', "']', ','"),
542                offset: Some(4)
543            })
544        ));
545    }
546
547    #[test]
548    fn skip_nested() {
549        let mut tokens = json_token_iter(
550            br#"
551            {"struct": {"foo": 5, "bar": 11, "arr": [1, 2, 3, {}, 5, []]},
552             "arr": [[], [[]], [{"arr":[]}]],
553             "simple": "foo"}
554            true
555        "#,
556        );
557        skip_value(&mut tokens).unwrap();
558        assert!(matches!(
559            tokens.next(),
560            Some(Ok(Token::ValueBool { value: true, .. }))
561        ))
562    }
563
564    #[test]
565    fn test_expect_start_object() {
566        expect_err_custom(
567            "expected StartObject",
568            Some(2),
569            expect_start_object(value_bool(2, true)),
570        );
571        assert!(expect_start_object(start_object(0)).is_ok());
572    }
573
574    #[test]
575    fn test_expect_start_array() {
576        expect_err_custom(
577            "expected StartArray",
578            Some(2),
579            expect_start_array(value_bool(2, true)),
580        );
581        assert!(expect_start_array(start_array(0)).is_ok());
582    }
583
584    #[test]
585    fn test_expect_string_or_null() {
586        assert_eq!(None, expect_string_or_null(value_null(0)).unwrap());
587        assert_eq!(
588            Some(EscapedStr("test\\n")),
589            expect_string_or_null(value_string(0, "test\\n")).unwrap()
590        );
591        expect_err_custom(
592            "expected ValueString or ValueNull",
593            None,
594            expect_string_or_null(value_bool(0, true)),
595        );
596    }
597
598    #[test]
599    fn test_expect_number_or_null() {
600        assert_eq!(None, expect_number_or_null(value_null(0)).unwrap());
601        assert_eq!(
602            Some(Number::PosInt(5)),
603            expect_number_or_null(value_number(0, Number::PosInt(5))).unwrap()
604        );
605        expect_err_custom(
606            "expected ValueString, ValueNumber, or ValueNull",
607            None,
608            expect_number_or_null(value_bool(0, true)),
609        );
610        assert_eq!(
611            Some(Number::Float(f64::INFINITY)),
612            expect_number_or_null(value_string(0, "Infinity")).unwrap()
613        );
614        expect_err_custom(
615            "only `Infinity`, `-Infinity`, `NaN` can represent a float as a string but found `123`",
616            Some(0),
617            expect_number_or_null(value_string(0, "123")),
618        );
619        match expect_number_or_null(value_string(0, "NaN")) {
620            Ok(Some(Number::Float(v))) if v.is_nan() => {
621                // ok
622            }
623            not_ok => {
624                panic!("expected nan, found: {not_ok:?}")
625            }
626        }
627
628        // Test that infinity in ValueNumber token returns an error
629        let result = expect_number_or_null(value_number(0, Number::Float(f64::INFINITY)));
630        assert!(result.is_err(), "Expected error for infinity token");
631    }
632
633    #[test]
634    fn test_expect_blob_or_null() {
635        assert_eq!(None, expect_blob_or_null(value_null(0)).unwrap());
636        assert_eq!(
637            Some(Blob::new(b"hello!".to_vec())),
638            expect_blob_or_null(value_string(0, "aGVsbG8h")).unwrap()
639        );
640        expect_err_custom(
641            "expected ValueString or ValueNull",
642            None,
643            expect_blob_or_null(value_bool(0, true)),
644        );
645    }
646
647    #[test]
648    fn test_expect_timestamp_or_null() {
649        assert_eq!(
650            None,
651            expect_timestamp_or_null(value_null(0), Format::HttpDate).unwrap()
652        );
653        for (invalid, display_name) in &[
654            ("NaN", "NaN"),
655            ("Infinity", "infinity"),
656            ("-Infinity", "infinity"),
657        ] {
658            expect_err_custom(
659                format!("{display_name} is not a valid epoch").as_str(),
660                None,
661                expect_timestamp_or_null(value_string(0, invalid), Format::EpochSeconds),
662            );
663        }
664        assert_eq!(
665            Some(DateTime::from_secs_f64(2048.0)),
666            expect_timestamp_or_null(value_number(0, Number::Float(2048.0)), Format::EpochSeconds)
667                .unwrap()
668        );
669        assert_eq!(
670            Some(DateTime::from_secs_f64(1445412480.0)),
671            expect_timestamp_or_null(
672                value_string(0, "Wed, 21 Oct 2015 07:28:00 GMT"),
673                Format::HttpDate
674            )
675            .unwrap()
676        );
677        assert_eq!(
678            Some(DateTime::from_secs_f64(1445412480.0)),
679            expect_timestamp_or_null(value_string(0, "2015-10-21T07:28:00Z"), Format::DateTime)
680                .unwrap()
681        );
682        expect_err_custom(
683                "only `Infinity`, `-Infinity`, `NaN` can represent a float as a string but found `wrong`",
684                Some(0),
685            expect_timestamp_or_null(value_string(0, "wrong"), Format::EpochSeconds)
686        );
687        expect_err_custom(
688            "expected ValueString or ValueNull",
689            None,
690            expect_timestamp_or_null(value_number(0, Number::Float(0.0)), Format::DateTime),
691        );
692    }
693
694    #[test]
695    fn test_expect_document() {
696        let test = |value| expect_document(&mut json_token_iter(value).peekable()).unwrap();
697        assert_eq!(Document::Null, test(b"null"));
698        assert_eq!(Document::Bool(true), test(b"true"));
699        assert_eq!(Document::Number(Number::Float(3.2)), test(b"3.2"));
700        assert_eq!(Document::String("Foo\nBar".into()), test(b"\"Foo\\nBar\""));
701        assert_eq!(Document::Array(Vec::new()), test(b"[]"));
702        assert_eq!(Document::Object(HashMap::new()), test(b"{}"));
703        assert_eq!(
704            Document::Array(vec![
705                Document::Number(Number::PosInt(1)),
706                Document::Bool(false),
707                Document::String("s".into()),
708                Document::Array(Vec::new()),
709                Document::Object(HashMap::new()),
710            ]),
711            test(b"[1,false,\"s\",[],{}]")
712        );
713        assert_eq!(
714            Document::Object(
715                vec![
716                    ("num".to_string(), Document::Number(Number::PosInt(1))),
717                    ("bool".to_string(), Document::Bool(true)),
718                    ("string".to_string(), Document::String("s".into())),
719                    (
720                        "array".to_string(),
721                        Document::Array(vec![
722                            Document::Object(
723                                vec![("foo".to_string(), Document::Bool(false))]
724                                    .into_iter()
725                                    .collect(),
726                            ),
727                            Document::Object(
728                                vec![("bar".to_string(), Document::Bool(true))]
729                                    .into_iter()
730                                    .collect(),
731                            ),
732                        ])
733                    ),
734                    (
735                        "nested".to_string(),
736                        Document::Object(
737                            vec![("test".to_string(), Document::Null),]
738                                .into_iter()
739                                .collect()
740                        )
741                    ),
742                ]
743                .into_iter()
744                .collect()
745            ),
746            test(
747                br#"
748                { "num": 1,
749                  "bool": true,
750                  "string": "s",
751                  "array":
752                      [{ "foo": false },
753                       { "bar": true }],
754                  "nested": { "test": null } }
755                "#
756            )
757        );
758    }
759
760    #[test]
761    fn test_document_recursion_limit() {
762        let mut value = String::new();
763        value.extend(std::iter::repeat_n('[', 300));
764        value.extend(std::iter::repeat_n(']', 300));
765        expect_err_custom(
766            "exceeded max recursion depth while parsing document",
767            None,
768            expect_document(&mut json_token_iter(value.as_bytes()).peekable()),
769        );
770
771        value = String::new();
772        value.extend(std::iter::repeat_n("{\"t\":", 300));
773        value.push('1');
774        value.extend(std::iter::repeat_n('}', 300));
775        expect_err_custom(
776            "exceeded max recursion depth while parsing document",
777            None,
778            expect_document(&mut json_token_iter(value.as_bytes()).peekable()),
779        );
780    }
781
782    #[test]
783    fn test_expect_number_as_string_preserves_precision() {
784        use crate::deserialize::json_token_iter;
785
786        // Test large integer that fits in u64 but would lose precision in f64
787        // f64 has 53 bits of precision, so numbers > 2^53 lose precision
788        let input = b"18450000000000000000"; // 2^53 + 1, loses precision in f64
789        let mut iter = json_token_iter(input);
790        let result = expect_number_as_string_or_null(iter.next(), input).unwrap();
791        assert_eq!(result, Some("18450000000000000000"));
792
793        // Test large negative integer
794        let input = b"-9007199254740993";
795        let mut iter = json_token_iter(input);
796        let result = expect_number_as_string_or_null(iter.next(), input).unwrap();
797        assert_eq!(result, Some("-9007199254740993"));
798
799        // Test decimal with many digits
800        let input = b"123456789.123456789";
801        let mut iter = json_token_iter(input);
802        let result = expect_number_as_string_or_null(iter.next(), input).unwrap();
803        assert_eq!(result, Some("123456789.123456789"));
804
805        // Test scientific notation
806        let input = b"1.23e+50";
807        let mut iter = json_token_iter(input);
808        let result = expect_number_as_string_or_null(iter.next(), input).unwrap();
809        assert_eq!(result, Some("1.23e+50"));
810
811        // Test negative scientific notation
812        let input = b"-1.23e-50";
813        let mut iter = json_token_iter(input);
814        let result = expect_number_as_string_or_null(iter.next(), input).unwrap();
815        assert_eq!(result, Some("-1.23e-50"));
816
817        // Test null
818        let input = b"null";
819        let mut iter = json_token_iter(input);
820        let result = expect_number_as_string_or_null(iter.next(), input).unwrap();
821        assert_eq!(result, None);
822
823        // Test small numbers still work
824        let input = b"42";
825        let mut iter = json_token_iter(input);
826        let result = expect_number_as_string_or_null(iter.next(), input).unwrap();
827        assert_eq!(result, Some("42"));
828
829        // Test zero
830        let input = b"0";
831        let mut iter = json_token_iter(input);
832        let result = expect_number_as_string_or_null(iter.next(), input).unwrap();
833        assert_eq!(result, Some("0"));
834
835        // Test lowercase e in scientific notation is preserved
836        let input = b"2.5e-8";
837        let mut iter = json_token_iter(input);
838        let result = expect_number_as_string_or_null(iter.next(), input).unwrap();
839        assert_eq!(result, Some("2.5e-8"));
840
841        // Test uppercase E in scientific notation is preserved
842        let input = b"2.5E-8";
843        let mut iter = json_token_iter(input);
844        let result = expect_number_as_string_or_null(iter.next(), input).unwrap();
845        assert_eq!(result, Some("2.5E-8"));
846    }
847
848    #[test]
849    fn test_expect_number_as_string_error_cases() {
850        use crate::deserialize::json_token_iter;
851
852        // Test error when token is a string (not a number)
853        let input = b"\"not a number\"";
854        let mut iter = json_token_iter(input);
855        let result = expect_number_as_string_or_null(iter.next(), input);
856        assert!(result.is_err());
857
858        // Test error when token is a boolean
859        let input = b"true";
860        let mut iter = json_token_iter(input);
861        let result = expect_number_as_string_or_null(iter.next(), input);
862        assert!(result.is_err());
863
864        // Test error when token is an object
865        let input = b"{}";
866        let mut iter = json_token_iter(input);
867        let result = expect_number_as_string_or_null(iter.next(), input);
868        assert!(result.is_err());
869
870        // Test error when token is an array
871        let input = b"[]";
872        let mut iter = json_token_iter(input);
873        let result = expect_number_as_string_or_null(iter.next(), input);
874        assert!(result.is_err());
875    }
876
877    // Property-based tests to validate with random inputs
878    mod proptest_tests {
879        use super::*;
880        use crate::deserialize::json_token_iter;
881        use proptest::prelude::*;
882
883        proptest! {
884            #[test]
885            fn extracted_large_integer_matches_input(
886                // Generate 20-100 digit numbers (way bigger than i64 max: 19 digits)
887                num_str in "[1-9][0-9]{19,99}"
888            ) {
889                let input_bytes = num_str.as_bytes();
890                let mut iter = json_token_iter(input_bytes);
891                let result = expect_number_as_string_or_null(iter.next(), input_bytes)?;
892
893                prop_assert_eq!(result, Some(num_str.as_str()));
894            }
895
896            #[test]
897            fn extracted_large_negative_integer_matches_input(
898                // Generate negative numbers with 20-100 digits
899                num_str in "-[1-9][0-9]{19,99}"
900            ) {
901                let input_bytes = num_str.as_bytes();
902                let mut iter = json_token_iter(input_bytes);
903                let result = expect_number_as_string_or_null(iter.next(), input_bytes)?;
904
905                prop_assert_eq!(result, Some(num_str.as_str()));
906            }
907
908            #[test]
909            fn extracted_scientific_notation_matches_input(
910                mantissa in -999999999i64..999999999i64,
911                exponent in -100i32..100i32
912            ) {
913                let input = format!("{}e{}", mantissa, exponent);
914                let input_bytes = input.as_bytes();
915
916                let mut iter = json_token_iter(input_bytes);
917                let result = expect_number_as_string_or_null(iter.next(), input_bytes)?;
918
919                prop_assert_eq!(result, Some(input.as_str()));
920            }
921
922            #[test]
923            fn null_always_returns_none(
924                // Generate random whitespace/formatting around null
925                prefix in "[ \t\n\r]*",
926                suffix in "[ \t\n\r]*"
927            ) {
928                let input = format!("{}null{}", prefix, suffix);
929                let input_bytes = input.as_bytes();
930
931                let mut iter = json_token_iter(input_bytes);
932                let result = expect_number_as_string_or_null(iter.next(), input_bytes)?;
933
934                prop_assert_eq!(result, None);
935            }
936        }
937    }
938}