aws_smithy_schema/schema/serde/deserializer.rs
1/*
2 * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
3 * SPDX-License-Identifier: Apache-2.0
4 */
5
6//! Shape deserialization interfaces for the Smithy data model.
7
8use super::error::SerdeError;
9use crate::Schema;
10use aws_smithy_types::{BigDecimal, BigInteger, Blob, DateTime, Document};
11
12/// Deserializes Smithy shapes from a serial format.
13///
14/// This trait provides a format-agnostic API for deserializing the Smithy data model.
15/// Implementations read from a serial format and create data objects based on schemas.
16///
17/// The deserializer uses a consumer pattern for aggregate types (structures, lists, maps)
18/// to avoid trait object limitations and enable efficient deserialization without
19/// intermediate allocations.
20///
21/// # Consumer Pattern
22///
23/// For aggregate types, the deserializer calls a consumer function for each element/member.
24/// The consumer receives mutable state and updates it with each deserialized value.
25/// This pattern:
26/// - Avoids trait object issues with generic methods
27/// - Enables zero-cost abstractions (closures can be inlined)
28/// - Allows caller to control deserialization order and state management
29/// - Matches the SEP's recommendation for compiled typed languages
30/// - Uses `&mut dyn ShapeDeserializer` so composite deserializers (e.g., HTTP
31/// binding + body) can transparently delegate without the consumer knowing
32/// the concrete deserializer type. This enables runtime protocol swapping.
33///
34/// # Example
35///
36/// ```ignore
37/// // Deserializing a structure
38/// let mut builder = MyStructBuilder::default();
39/// deserializer.read_struct(
40/// &MY_STRUCT_SCHEMA,
41/// &mut |member, deser| {
42/// match member.member_index() {
43/// Some(0) => builder.field1 = Some(deser.read_string(member)?),
44/// Some(1) => builder.field2 = Some(deser.read_integer(member)?),
45/// _ => {}
46/// }
47/// Ok(())
48/// },
49/// )?;
50/// let my_struct = builder.build();
51/// ```
52/// Maximum pre-allocation size for containers, used to prevent denial-of-service
53/// from untrusted payloads claiming excessively large sizes.
54pub const MAX_CONTAINER_PREALLOC: usize = 10_000;
55
56/// Caps a raw container size at [`MAX_CONTAINER_PREALLOC`].
57///
58/// Implementations of [`ShapeDeserializer::container_size`] SHOULD use this
59/// when returning a size derived from untrusted input (e.g., a CBOR length header).
60pub fn capped_container_size(raw: usize) -> usize {
61 raw.min(MAX_CONTAINER_PREALLOC)
62}
63
64pub trait ShapeDeserializer {
65 /// Reads a structure from the deserializer.
66 ///
67 /// The consumer is called for each member with the member schema and a
68 /// `&mut dyn ShapeDeserializer` to read the member value. Using `dyn`
69 /// allows composite deserializers (e.g., HTTP binding + body) to
70 /// transparently delegate without the consumer knowing the concrete type.
71 fn read_struct(
72 &mut self,
73 schema: &Schema,
74 state: &mut dyn FnMut(&Schema, &mut dyn ShapeDeserializer) -> Result<(), SerdeError>,
75 ) -> Result<(), SerdeError>;
76
77 /// Reads a list from the deserializer.
78 ///
79 /// The consumer is called for each element with a `&mut dyn ShapeDeserializer`.
80 fn read_list(
81 &mut self,
82 schema: &Schema,
83 state: &mut dyn FnMut(&mut dyn ShapeDeserializer) -> Result<(), SerdeError>,
84 ) -> Result<(), SerdeError>;
85
86 /// Reads a map from the deserializer.
87 ///
88 /// The consumer is called for each entry with the key and a `&mut dyn ShapeDeserializer`.
89 fn read_map(
90 &mut self,
91 schema: &Schema,
92 state: &mut dyn FnMut(String, &mut dyn ShapeDeserializer) -> Result<(), SerdeError>,
93 ) -> Result<(), SerdeError>;
94
95 /// Reads a boolean value.
96 fn read_boolean(&mut self, schema: &Schema) -> Result<bool, SerdeError>;
97
98 /// Reads a byte (i8) value.
99 fn read_byte(&mut self, schema: &Schema) -> Result<i8, SerdeError>;
100
101 /// Reads a short (i16) value.
102 fn read_short(&mut self, schema: &Schema) -> Result<i16, SerdeError>;
103
104 /// Reads an integer (i32) value.
105 fn read_integer(&mut self, schema: &Schema) -> Result<i32, SerdeError>;
106
107 /// Reads a long (i64) value.
108 fn read_long(&mut self, schema: &Schema) -> Result<i64, SerdeError>;
109
110 /// Reads a float (f32) value.
111 fn read_float(&mut self, schema: &Schema) -> Result<f32, SerdeError>;
112
113 /// Reads a double (f64) value.
114 fn read_double(&mut self, schema: &Schema) -> Result<f64, SerdeError>;
115
116 /// Reads a big integer value.
117 fn read_big_integer(&mut self, schema: &Schema) -> Result<BigInteger, SerdeError>;
118
119 /// Reads a big decimal value.
120 fn read_big_decimal(&mut self, schema: &Schema) -> Result<BigDecimal, SerdeError>;
121
122 /// Reads a string value.
123 fn read_string(&mut self, schema: &Schema) -> Result<String, SerdeError>;
124
125 /// Reads a blob (byte array) value.
126 fn read_blob(&mut self, schema: &Schema) -> Result<Blob, SerdeError>;
127
128 /// Reads a timestamp value.
129 fn read_timestamp(&mut self, schema: &Schema) -> Result<DateTime, SerdeError>;
130
131 /// Reads a document value.
132 fn read_document(&mut self, schema: &Schema) -> Result<Document, SerdeError>;
133
134 /// Checks if the current value is null.
135 ///
136 /// This is used for sparse collections where null values are significant.
137 fn is_null(&self) -> bool;
138
139 /// Consumes a null value, advancing past it.
140 ///
141 /// This should be called after `is_null()` returns true to advance the
142 /// deserializer past the null token.
143 fn read_null(&mut self) -> Result<(), SerdeError> {
144 Ok(())
145 }
146
147 /// Returns the size of the current container if known.
148 ///
149 /// This is an optimization hint that allows pre-allocating collections
150 /// with the correct capacity. Returns `None` if the size is unknown or
151 /// not applicable.
152 ///
153 /// Implementations SHOULD cap the returned value at a reasonable maximum
154 /// (e.g., 10,000) to prevent denial-of-service from untrusted payloads
155 /// that claim excessively large container sizes (e.g., a CBOR header
156 /// declaring billions of elements). Use [`capped_container_size`] to apply
157 /// a standard cap.
158 fn container_size(&self) -> Option<usize>;
159
160 // --- Collection helper methods ---
161 //
162 // This is a **closed set** of helpers for the most common AWS collection
163 // patterns. No additional helpers will be added. New collection patterns
164 // should use the generic `read_list`/`read_map` with closures.
165 //
166 // These exist for two reasons:
167 // 1. Code size: each helper replaces ~6-8 lines of closure boilerplate in
168 // generated code, yielding ~43% reduction for collection-heavy models.
169 // 2. Performance: codec implementations (e.g., `JsonDeserializer`) override
170 // these to call concrete `read_string`/`read_integer`/etc. methods
171 // directly, eliminating per-element vtable dispatch. This requires the
172 // methods to be on the core trait (not an extension trait) since they
173 // are called through `&mut dyn ShapeDeserializer` in generated code.
174
175 /// Reads a list of strings.
176 fn read_string_list(&mut self, schema: &Schema) -> Result<Vec<String>, SerdeError> {
177 let mut out = Vec::new();
178 self.read_list(schema, &mut |deser| {
179 out.push(deser.read_string(schema)?);
180 Ok(())
181 })?;
182 Ok(out)
183 }
184
185 /// Reads a list of blobs.
186 fn read_blob_list(
187 &mut self,
188 schema: &Schema,
189 ) -> Result<Vec<aws_smithy_types::Blob>, SerdeError> {
190 let mut out = Vec::new();
191 self.read_list(schema, &mut |deser| {
192 out.push(deser.read_blob(schema)?);
193 Ok(())
194 })?;
195 Ok(out)
196 }
197
198 /// Reads a list of integers.
199 fn read_integer_list(&mut self, schema: &Schema) -> Result<Vec<i32>, SerdeError> {
200 let mut out = Vec::new();
201 self.read_list(schema, &mut |deser| {
202 out.push(deser.read_integer(schema)?);
203 Ok(())
204 })?;
205 Ok(out)
206 }
207
208 /// Reads a list of longs.
209 fn read_long_list(&mut self, schema: &Schema) -> Result<Vec<i64>, SerdeError> {
210 let mut out = Vec::new();
211 self.read_list(schema, &mut |deser| {
212 out.push(deser.read_long(schema)?);
213 Ok(())
214 })?;
215 Ok(out)
216 }
217
218 /// Reads a map with string values.
219 fn read_string_string_map(
220 &mut self,
221 schema: &Schema,
222 ) -> Result<std::collections::HashMap<String, String>, SerdeError> {
223 let mut out = std::collections::HashMap::new();
224 self.read_map(schema, &mut |key, deser| {
225 out.insert(key, deser.read_string(schema)?);
226 Ok(())
227 })?;
228 Ok(out)
229 }
230}