base64/engine/
mod.rs

1//! Provides the [Engine] abstraction and out of the box implementations.
2#[cfg(any(feature = "alloc", test))]
3use crate::chunked_encoder;
4use crate::{
5    encode::{encode_with_padding, EncodeSliceError},
6    encoded_len, DecodeError, DecodeSliceError,
7};
8#[cfg(any(feature = "alloc", test))]
9use alloc::vec::Vec;
10
11#[cfg(any(feature = "alloc", test))]
12use alloc::{string::String, vec};
13
14pub mod general_purpose;
15
16#[cfg(test)]
17mod naive;
18
19#[cfg(test)]
20mod tests;
21
22pub use general_purpose::{GeneralPurpose, GeneralPurposeConfig};
23
24/// An `Engine` provides low-level encoding and decoding operations that all other higher-level parts of the API use. Users of the library will generally not need to implement this.
25///
26/// Different implementations offer different characteristics. The library currently ships with
27/// [GeneralPurpose] that offers good speed and works on any CPU, with more choices
28/// coming later, like a constant-time one when side channel resistance is called for, and vendor-specific vectorized ones for more speed.
29///
30/// See [general_purpose::STANDARD_NO_PAD] if you just want standard base64. Otherwise, when possible, it's
31/// recommended to store the engine in a `const` so that references to it won't pose any lifetime
32/// issues, and to avoid repeating the cost of engine setup.
33///
34/// Since almost nobody will need to implement `Engine`, docs for internal methods are hidden.
35// When adding an implementation of Engine, include them in the engine test suite:
36// - add an implementation of [engine::tests::EngineWrapper]
37// - add the implementation to the `all_engines` macro
38// All tests run on all engines listed in the macro.
39pub trait Engine: Send + Sync {
40    /// The config type used by this engine
41    type Config: Config;
42    /// The decode estimate used by this engine
43    type DecodeEstimate: DecodeEstimate;
44
45    /// This is not meant to be called directly; it is only for `Engine` implementors.
46    /// See the other `encode*` functions on this trait.
47    ///
48    /// Encode the `input` bytes into the `output` buffer based on the mapping in `encode_table`.
49    ///
50    /// `output` will be long enough to hold the encoded data.
51    ///
52    /// Returns the number of bytes written.
53    ///
54    /// No padding should be written; that is handled separately.
55    ///
56    /// Must not write any bytes into the output slice other than the encoded data.
57    #[doc(hidden)]
58    fn internal_encode(&self, input: &[u8], output: &mut [u8]) -> usize;
59
60    /// This is not meant to be called directly; it is only for `Engine` implementors.
61    ///
62    /// As an optimization to prevent the decoded length from being calculated twice, it is
63    /// sometimes helpful to have a conservative estimate of the decoded size before doing the
64    /// decoding, so this calculation is done separately and passed to [Engine::decode()] as needed.
65    #[doc(hidden)]
66    fn internal_decoded_len_estimate(&self, input_len: usize) -> Self::DecodeEstimate;
67
68    /// This is not meant to be called directly; it is only for `Engine` implementors.
69    /// See the other `decode*` functions on this trait.
70    ///
71    /// Decode `input` base64 bytes into the `output` buffer.
72    ///
73    /// `decode_estimate` is the result of [Engine::internal_decoded_len_estimate()], which is passed in to avoid
74    /// calculating it again (expensive on short inputs).`
75    ///
76    /// Each complete 4-byte chunk of encoded data decodes to 3 bytes of decoded data, but this
77    /// function must also handle the final possibly partial chunk.
78    /// If the input length is not a multiple of 4, or uses padding bytes to reach a multiple of 4,
79    /// the trailing 2 or 3 bytes must decode to 1 or 2 bytes, respectively, as per the
80    /// [RFC](https://tools.ietf.org/html/rfc4648#section-3.5).
81    ///
82    /// Decoding must not write any bytes into the output slice other than the decoded data.
83    ///
84    /// Non-canonical trailing bits in the final tokens or non-canonical padding must be reported as
85    /// errors unless the engine is configured otherwise.
86    #[doc(hidden)]
87    fn internal_decode(
88        &self,
89        input: &[u8],
90        output: &mut [u8],
91        decode_estimate: Self::DecodeEstimate,
92    ) -> Result<DecodeMetadata, DecodeSliceError>;
93
94    /// Returns the config for this engine.
95    fn config(&self) -> &Self::Config;
96
97    /// Encode arbitrary octets as base64 using the provided `Engine`.
98    /// Returns a `String`.
99    ///
100    /// # Example
101    ///
102    /// ```rust
103    /// use base64::{Engine as _, engine::{self, general_purpose}, alphabet};
104    ///
105    /// let b64 = general_purpose::STANDARD.encode(b"hello world~");
106    /// println!("{}", b64);
107    ///
108    /// const CUSTOM_ENGINE: engine::GeneralPurpose =
109    ///     engine::GeneralPurpose::new(&alphabet::URL_SAFE, general_purpose::NO_PAD);
110    ///
111    /// let b64_url = CUSTOM_ENGINE.encode(b"hello internet~");
112    /// ```
113    #[cfg(any(feature = "alloc", test))]
114    #[inline]
115    fn encode<T: AsRef<[u8]>>(&self, input: T) -> String {
116        fn inner<E>(engine: &E, input_bytes: &[u8]) -> String
117        where
118            E: Engine + ?Sized,
119        {
120            let encoded_size = encoded_len(input_bytes.len(), engine.config().encode_padding())
121                .expect("integer overflow when calculating buffer size");
122
123            let mut buf = vec![0; encoded_size];
124
125            encode_with_padding(input_bytes, &mut buf[..], engine, encoded_size);
126
127            String::from_utf8(buf).expect("Invalid UTF8")
128        }
129
130        inner(self, input.as_ref())
131    }
132
133    /// Encode arbitrary octets as base64 into a supplied `String`.
134    /// Writes into the supplied `String`, which may allocate if its internal buffer isn't big enough.
135    ///
136    /// # Example
137    ///
138    /// ```rust
139    /// use base64::{Engine as _, engine::{self, general_purpose}, alphabet};
140    /// const CUSTOM_ENGINE: engine::GeneralPurpose =
141    ///     engine::GeneralPurpose::new(&alphabet::URL_SAFE, general_purpose::NO_PAD);
142    ///
143    /// fn main() {
144    ///     let mut buf = String::new();
145    ///     general_purpose::STANDARD.encode_string(b"hello world~", &mut buf);
146    ///     println!("{}", buf);
147    ///
148    ///     buf.clear();
149    ///     CUSTOM_ENGINE.encode_string(b"hello internet~", &mut buf);
150    ///     println!("{}", buf);
151    /// }
152    /// ```
153    #[cfg(any(feature = "alloc", test))]
154    #[inline]
155    fn encode_string<T: AsRef<[u8]>>(&self, input: T, output_buf: &mut String) {
156        fn inner<E>(engine: &E, input_bytes: &[u8], output_buf: &mut String)
157        where
158            E: Engine + ?Sized,
159        {
160            let mut sink = chunked_encoder::StringSink::new(output_buf);
161
162            chunked_encoder::ChunkedEncoder::new(engine)
163                .encode(input_bytes, &mut sink)
164                .expect("Writing to a String shouldn't fail");
165        }
166
167        inner(self, input.as_ref(), output_buf)
168    }
169
170    /// Encode arbitrary octets as base64 into a supplied slice.
171    /// Writes into the supplied output buffer.
172    ///
173    /// This is useful if you wish to avoid allocation entirely (e.g. encoding into a stack-resident
174    /// or statically-allocated buffer).
175    ///
176    /// # Example
177    ///
178    #[cfg_attr(feature = "alloc", doc = "```")]
179    #[cfg_attr(not(feature = "alloc"), doc = "```ignore")]
180    /// use base64::{Engine as _, engine::general_purpose};
181    /// let s = b"hello internet!";
182    /// let mut buf = Vec::new();
183    /// // make sure we'll have a slice big enough for base64 + padding
184    /// buf.resize(s.len() * 4 / 3 + 4, 0);
185    ///
186    /// let bytes_written = general_purpose::STANDARD.encode_slice(s, &mut buf).unwrap();
187    ///
188    /// // shorten our vec down to just what was written
189    /// buf.truncate(bytes_written);
190    ///
191    /// assert_eq!(s, general_purpose::STANDARD.decode(&buf).unwrap().as_slice());
192    /// ```
193    #[inline]
194    fn encode_slice<T: AsRef<[u8]>>(
195        &self,
196        input: T,
197        output_buf: &mut [u8],
198    ) -> Result<usize, EncodeSliceError> {
199        fn inner<E>(
200            engine: &E,
201            input_bytes: &[u8],
202            output_buf: &mut [u8],
203        ) -> Result<usize, EncodeSliceError>
204        where
205            E: Engine + ?Sized,
206        {
207            let encoded_size = encoded_len(input_bytes.len(), engine.config().encode_padding())
208                .expect("usize overflow when calculating buffer size");
209
210            if output_buf.len() < encoded_size {
211                return Err(EncodeSliceError::OutputSliceTooSmall);
212            }
213
214            let b64_output = &mut output_buf[0..encoded_size];
215
216            encode_with_padding(input_bytes, b64_output, engine, encoded_size);
217
218            Ok(encoded_size)
219        }
220
221        inner(self, input.as_ref(), output_buf)
222    }
223
224    /// Decode the input into a new `Vec`.
225    ///
226    /// # Example
227    ///
228    /// ```rust
229    /// use base64::{Engine as _, alphabet, engine::{self, general_purpose}};
230    ///
231    /// let bytes = general_purpose::STANDARD
232    ///     .decode("aGVsbG8gd29ybGR+Cg==").unwrap();
233    /// println!("{:?}", bytes);
234    ///
235    /// // custom engine setup
236    /// let bytes_url = engine::GeneralPurpose::new(
237    ///              &alphabet::URL_SAFE,
238    ///              general_purpose::NO_PAD)
239    ///     .decode("aGVsbG8gaW50ZXJuZXR-Cg").unwrap();
240    /// println!("{:?}", bytes_url);
241    /// ```
242    #[cfg(any(feature = "alloc", test))]
243    #[inline]
244    fn decode<T: AsRef<[u8]>>(&self, input: T) -> Result<Vec<u8>, DecodeError> {
245        fn inner<E>(engine: &E, input_bytes: &[u8]) -> Result<Vec<u8>, DecodeError>
246        where
247            E: Engine + ?Sized,
248        {
249            let estimate = engine.internal_decoded_len_estimate(input_bytes.len());
250            let mut buffer = vec![0; estimate.decoded_len_estimate()];
251
252            let bytes_written = engine
253                .internal_decode(input_bytes, &mut buffer, estimate)
254                .map_err(|e| match e {
255                    DecodeSliceError::DecodeError(e) => e,
256                    DecodeSliceError::OutputSliceTooSmall => {
257                        unreachable!("Vec is sized conservatively")
258                    }
259                })?
260                .decoded_len;
261
262            buffer.truncate(bytes_written);
263
264            Ok(buffer)
265        }
266
267        inner(self, input.as_ref())
268    }
269
270    /// Decode the `input` into the supplied `buffer`.
271    ///
272    /// Writes into the supplied `Vec`, which may allocate if its internal buffer isn't big enough.
273    /// Returns a `Result` containing an empty tuple, aka `()`.
274    ///
275    /// # Example
276    ///
277    /// ```rust
278    /// use base64::{Engine as _, alphabet, engine::{self, general_purpose}};
279    /// const CUSTOM_ENGINE: engine::GeneralPurpose =
280    ///     engine::GeneralPurpose::new(&alphabet::URL_SAFE, general_purpose::PAD);
281    ///
282    /// fn main() {
283    ///     use base64::Engine;
284    ///     let mut buffer = Vec::<u8>::new();
285    ///     // with the default engine
286    ///     general_purpose::STANDARD
287    ///         .decode_vec("aGVsbG8gd29ybGR+Cg==", &mut buffer,).unwrap();
288    ///     println!("{:?}", buffer);
289    ///
290    ///     buffer.clear();
291    ///
292    ///     // with a custom engine
293    ///     CUSTOM_ENGINE.decode_vec(
294    ///         "aGVsbG8gaW50ZXJuZXR-Cg==",
295    ///         &mut buffer,
296    ///     ).unwrap();
297    ///     println!("{:?}", buffer);
298    /// }
299    /// ```
300    #[cfg(any(feature = "alloc", test))]
301    #[inline]
302    fn decode_vec<T: AsRef<[u8]>>(
303        &self,
304        input: T,
305        buffer: &mut Vec<u8>,
306    ) -> Result<(), DecodeError> {
307        fn inner<E>(engine: &E, input_bytes: &[u8], buffer: &mut Vec<u8>) -> Result<(), DecodeError>
308        where
309            E: Engine + ?Sized,
310        {
311            let starting_output_len = buffer.len();
312            let estimate = engine.internal_decoded_len_estimate(input_bytes.len());
313
314            let total_len_estimate = estimate
315                .decoded_len_estimate()
316                .checked_add(starting_output_len)
317                .expect("Overflow when calculating output buffer length");
318
319            buffer.resize(total_len_estimate, 0);
320
321            let buffer_slice = &mut buffer.as_mut_slice()[starting_output_len..];
322
323            let bytes_written = engine
324                .internal_decode(input_bytes, buffer_slice, estimate)
325                .map_err(|e| match e {
326                    DecodeSliceError::DecodeError(e) => e,
327                    DecodeSliceError::OutputSliceTooSmall => {
328                        unreachable!("Vec is sized conservatively")
329                    }
330                })?
331                .decoded_len;
332
333            buffer.truncate(starting_output_len + bytes_written);
334
335            Ok(())
336        }
337
338        inner(self, input.as_ref(), buffer)
339    }
340
341    /// Decode the input into the provided output slice.
342    ///
343    /// Returns the number of bytes written to the slice, or an error if `output` is smaller than
344    /// the estimated decoded length.
345    ///
346    /// This will not write any bytes past exactly what is decoded (no stray garbage bytes at the end).
347    ///
348    /// See [crate::decoded_len_estimate] for calculating buffer sizes.
349    ///
350    /// See [Engine::decode_slice_unchecked] for a version that panics instead of returning an error
351    /// if the output buffer is too small.
352    #[inline]
353    fn decode_slice<T: AsRef<[u8]>>(
354        &self,
355        input: T,
356        output: &mut [u8],
357    ) -> Result<usize, DecodeSliceError> {
358        fn inner<E>(
359            engine: &E,
360            input_bytes: &[u8],
361            output: &mut [u8],
362        ) -> Result<usize, DecodeSliceError>
363        where
364            E: Engine + ?Sized,
365        {
366            engine
367                .internal_decode(
368                    input_bytes,
369                    output,
370                    engine.internal_decoded_len_estimate(input_bytes.len()),
371                )
372                .map(|dm| dm.decoded_len)
373        }
374
375        inner(self, input.as_ref(), output)
376    }
377
378    /// Decode the input into the provided output slice.
379    ///
380    /// Returns the number of bytes written to the slice.
381    ///
382    /// This will not write any bytes past exactly what is decoded (no stray garbage bytes at the end).
383    ///
384    /// See [crate::decoded_len_estimate] for calculating buffer sizes.
385    ///
386    /// See [Engine::decode_slice] for a version that returns an error instead of panicking if the output
387    /// buffer is too small.
388    ///
389    /// # Panics
390    ///
391    /// Panics if the provided output buffer is too small for the decoded data.
392    #[inline]
393    fn decode_slice_unchecked<T: AsRef<[u8]>>(
394        &self,
395        input: T,
396        output: &mut [u8],
397    ) -> Result<usize, DecodeError> {
398        fn inner<E>(engine: &E, input_bytes: &[u8], output: &mut [u8]) -> Result<usize, DecodeError>
399        where
400            E: Engine + ?Sized,
401        {
402            engine
403                .internal_decode(
404                    input_bytes,
405                    output,
406                    engine.internal_decoded_len_estimate(input_bytes.len()),
407                )
408                .map(|dm| dm.decoded_len)
409                .map_err(|e| match e {
410                    DecodeSliceError::DecodeError(e) => e,
411                    DecodeSliceError::OutputSliceTooSmall => {
412                        panic!("Output slice is too small")
413                    }
414                })
415        }
416
417        inner(self, input.as_ref(), output)
418    }
419}
420
421/// The minimal level of configuration that engines must support.
422pub trait Config {
423    /// Returns `true` if padding should be added after the encoded output.
424    ///
425    /// Padding is added outside the engine's encode() since the engine may be used
426    /// to encode only a chunk of the overall output, so it can't always know when
427    /// the output is "done" and would therefore need padding (if configured).
428    // It could be provided as a separate parameter when encoding, but that feels like
429    // leaking an implementation detail to the user, and it's hopefully more convenient
430    // to have to only pass one thing (the engine) to any part of the API.
431    fn encode_padding(&self) -> bool;
432}
433
434/// The decode estimate used by an engine implementation. Users do not need to interact with this;
435/// it is only for engine implementors.
436///
437/// Implementors may store relevant data here when constructing this to avoid having to calculate
438/// them again during actual decoding.
439pub trait DecodeEstimate {
440    /// Returns a conservative (err on the side of too big) estimate of the decoded length to use
441    /// for pre-allocating buffers, etc.
442    ///
443    /// The estimate must be no larger than the next largest complete triple of decoded bytes.
444    /// That is, the final quad of tokens to decode may be assumed to be complete with no padding.
445    fn decoded_len_estimate(&self) -> usize;
446}
447
448/// Controls how pad bytes are handled when decoding.
449///
450/// Each [Engine] must support at least the behavior indicated by
451/// [DecodePaddingMode::RequireCanonical], and may support other modes.
452#[derive(Clone, Copy, Debug, PartialEq, Eq)]
453pub enum DecodePaddingMode {
454    /// Canonical padding is allowed, but any fewer padding bytes than that is also allowed.
455    Indifferent,
456    /// Padding must be canonical (0, 1, or 2 `=` as needed to produce a 4 byte suffix).
457    RequireCanonical,
458    /// Padding must be absent -- for when you want predictable padding, without any wasted bytes.
459    RequireNone,
460}
461
462/// Metadata about the result of a decode operation
463#[derive(PartialEq, Eq, Debug)]
464pub struct DecodeMetadata {
465    /// Number of decoded bytes output
466    pub(crate) decoded_len: usize,
467    /// Offset of the first padding byte in the input, if any
468    pub(crate) padding_offset: Option<usize>,
469}
470
471impl DecodeMetadata {
472    pub(crate) fn new(decoded_bytes: usize, padding_index: Option<usize>) -> Self {
473        Self {
474            decoded_len: decoded_bytes,
475            padding_offset: padding_index,
476        }
477    }
478}