aws_smithy_schema/schema/serde/
deserializer.rs

1/*
2 * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
3 * SPDX-License-Identifier: Apache-2.0
4 */
5
6//! Shape deserialization interfaces for the Smithy data model.
7
8use super::error::SerdeError;
9use crate::Schema;
10use aws_smithy_types::{BigDecimal, BigInteger, Blob, DateTime, Document};
11
12/// Deserializes Smithy shapes from a serial format.
13///
14/// This trait provides a format-agnostic API for deserializing the Smithy data model.
15/// Implementations read from a serial format and create data objects based on schemas.
16///
17/// The deserializer uses a consumer pattern for aggregate types (structures, lists, maps)
18/// to avoid trait object limitations and enable efficient deserialization without
19/// intermediate allocations.
20///
21/// # Consumer Pattern
22///
23/// For aggregate types, the deserializer calls a consumer function for each element/member.
24/// The consumer receives mutable state and updates it with each deserialized value.
25/// This pattern:
26/// - Avoids trait object issues with generic methods
27/// - Enables zero-cost abstractions (closures can be inlined)
28/// - Allows caller to control deserialization order and state management
29/// - Matches the SEP's recommendation for compiled typed languages
30/// - Uses `&mut dyn ShapeDeserializer` so composite deserializers (e.g., HTTP
31///   binding + body) can transparently delegate without the consumer knowing
32///   the concrete deserializer type. This enables runtime protocol swapping.
33///
34/// # Example
35///
36/// ```ignore
37/// // Deserializing a structure
38/// let mut builder = MyStructBuilder::default();
39/// deserializer.read_struct(
40///     &MY_STRUCT_SCHEMA,
41///     &mut |member, deser| {
42///         match member.member_index() {
43///             Some(0) => builder.field1 = Some(deser.read_string(member)?),
44///             Some(1) => builder.field2 = Some(deser.read_integer(member)?),
45///             _ => {}
46///         }
47///         Ok(())
48///     },
49/// )?;
50/// let my_struct = builder.build();
51/// ```
52/// Maximum pre-allocation size for containers, used to prevent denial-of-service
53/// from untrusted payloads claiming excessively large sizes.
54pub const MAX_CONTAINER_PREALLOC: usize = 10_000;
55
56/// Caps a raw container size at [`MAX_CONTAINER_PREALLOC`].
57///
58/// Implementations of [`ShapeDeserializer::container_size`] SHOULD use this
59/// when returning a size derived from untrusted input (e.g., a CBOR length header).
60pub fn capped_container_size(raw: usize) -> usize {
61    raw.min(MAX_CONTAINER_PREALLOC)
62}
63
64pub trait ShapeDeserializer {
65    /// Reads a structure from the deserializer.
66    ///
67    /// The consumer is called for each member with the member schema and a
68    /// `&mut dyn ShapeDeserializer` to read the member value. Using `dyn`
69    /// allows composite deserializers (e.g., HTTP binding + body) to
70    /// transparently delegate without the consumer knowing the concrete type.
71    fn read_struct(
72        &mut self,
73        schema: &Schema,
74        state: &mut dyn FnMut(&Schema, &mut dyn ShapeDeserializer) -> Result<(), SerdeError>,
75    ) -> Result<(), SerdeError>;
76
77    /// Reads a list from the deserializer.
78    ///
79    /// The consumer is called for each element with a `&mut dyn ShapeDeserializer`.
80    fn read_list(
81        &mut self,
82        schema: &Schema,
83        state: &mut dyn FnMut(&mut dyn ShapeDeserializer) -> Result<(), SerdeError>,
84    ) -> Result<(), SerdeError>;
85
86    /// Reads a map from the deserializer.
87    ///
88    /// The consumer is called for each entry with the key and a `&mut dyn ShapeDeserializer`.
89    fn read_map(
90        &mut self,
91        schema: &Schema,
92        state: &mut dyn FnMut(String, &mut dyn ShapeDeserializer) -> Result<(), SerdeError>,
93    ) -> Result<(), SerdeError>;
94
95    /// Reads a boolean value.
96    fn read_boolean(&mut self, schema: &Schema) -> Result<bool, SerdeError>;
97
98    /// Reads a byte (i8) value.
99    fn read_byte(&mut self, schema: &Schema) -> Result<i8, SerdeError>;
100
101    /// Reads a short (i16) value.
102    fn read_short(&mut self, schema: &Schema) -> Result<i16, SerdeError>;
103
104    /// Reads an integer (i32) value.
105    fn read_integer(&mut self, schema: &Schema) -> Result<i32, SerdeError>;
106
107    /// Reads a long (i64) value.
108    fn read_long(&mut self, schema: &Schema) -> Result<i64, SerdeError>;
109
110    /// Reads a float (f32) value.
111    fn read_float(&mut self, schema: &Schema) -> Result<f32, SerdeError>;
112
113    /// Reads a double (f64) value.
114    fn read_double(&mut self, schema: &Schema) -> Result<f64, SerdeError>;
115
116    /// Reads a big integer value.
117    fn read_big_integer(&mut self, schema: &Schema) -> Result<BigInteger, SerdeError>;
118
119    /// Reads a big decimal value.
120    fn read_big_decimal(&mut self, schema: &Schema) -> Result<BigDecimal, SerdeError>;
121
122    /// Reads a string value.
123    fn read_string(&mut self, schema: &Schema) -> Result<String, SerdeError>;
124
125    /// Reads a blob (byte array) value.
126    fn read_blob(&mut self, schema: &Schema) -> Result<Blob, SerdeError>;
127
128    /// Reads a timestamp value.
129    fn read_timestamp(&mut self, schema: &Schema) -> Result<DateTime, SerdeError>;
130
131    /// Reads a document value.
132    fn read_document(&mut self, schema: &Schema) -> Result<Document, SerdeError>;
133
134    /// Checks if the current value is null.
135    ///
136    /// This is used for sparse collections where null values are significant.
137    fn is_null(&self) -> bool;
138
139    /// Consumes a null value, advancing past it.
140    ///
141    /// This should be called after `is_null()` returns true to advance the
142    /// deserializer past the null token.
143    fn read_null(&mut self) -> Result<(), SerdeError> {
144        Ok(())
145    }
146
147    /// Returns the size of the current container if known.
148    ///
149    /// This is an optimization hint that allows pre-allocating collections
150    /// with the correct capacity. Returns `None` if the size is unknown or
151    /// not applicable.
152    ///
153    /// Implementations SHOULD cap the returned value at a reasonable maximum
154    /// (e.g., 10,000) to prevent denial-of-service from untrusted payloads
155    /// that claim excessively large container sizes (e.g., a CBOR header
156    /// declaring billions of elements). Use [`capped_container_size`] to apply
157    /// a standard cap.
158    fn container_size(&self) -> Option<usize>;
159
160    // --- Collection helper methods ---
161    //
162    // This is a **closed set** of helpers for the most common AWS collection
163    // patterns. No additional helpers will be added. New collection patterns
164    // should use the generic `read_list`/`read_map` with closures.
165    //
166    // These exist for two reasons:
167    // 1. Code size: each helper replaces ~6-8 lines of closure boilerplate in
168    //    generated code, yielding ~43% reduction for collection-heavy models.
169    // 2. Performance: codec implementations (e.g., `JsonDeserializer`) override
170    //    these to call concrete `read_string`/`read_integer`/etc. methods
171    //    directly, eliminating per-element vtable dispatch. This requires the
172    //    methods to be on the core trait (not an extension trait) since they
173    //    are called through `&mut dyn ShapeDeserializer` in generated code.
174
175    /// Reads a list of strings.
176    fn read_string_list(&mut self, schema: &Schema) -> Result<Vec<String>, SerdeError> {
177        let mut out = Vec::new();
178        self.read_list(schema, &mut |deser| {
179            out.push(deser.read_string(schema)?);
180            Ok(())
181        })?;
182        Ok(out)
183    }
184
185    /// Reads a list of blobs.
186    fn read_blob_list(
187        &mut self,
188        schema: &Schema,
189    ) -> Result<Vec<aws_smithy_types::Blob>, SerdeError> {
190        let mut out = Vec::new();
191        self.read_list(schema, &mut |deser| {
192            out.push(deser.read_blob(schema)?);
193            Ok(())
194        })?;
195        Ok(out)
196    }
197
198    /// Reads a list of integers.
199    fn read_integer_list(&mut self, schema: &Schema) -> Result<Vec<i32>, SerdeError> {
200        let mut out = Vec::new();
201        self.read_list(schema, &mut |deser| {
202            out.push(deser.read_integer(schema)?);
203            Ok(())
204        })?;
205        Ok(out)
206    }
207
208    /// Reads a list of longs.
209    fn read_long_list(&mut self, schema: &Schema) -> Result<Vec<i64>, SerdeError> {
210        let mut out = Vec::new();
211        self.read_list(schema, &mut |deser| {
212            out.push(deser.read_long(schema)?);
213            Ok(())
214        })?;
215        Ok(out)
216    }
217
218    /// Reads a map with string values.
219    fn read_string_string_map(
220        &mut self,
221        schema: &Schema,
222    ) -> Result<std::collections::HashMap<String, String>, SerdeError> {
223        let mut out = std::collections::HashMap::new();
224        self.read_map(schema, &mut |key, deser| {
225            out.insert(key, deser.read_string(schema)?);
226            Ok(())
227        })?;
228        Ok(out)
229    }
230}