base64/
encode.rs

1#[cfg(any(feature = "alloc", test))]
2use alloc::string::String;
3use core::fmt;
4#[cfg(any(feature = "std", test))]
5use std::error;
6
7#[cfg(any(feature = "alloc", test))]
8use crate::engine::general_purpose::STANDARD;
9use crate::engine::{Config, Engine};
10use crate::PAD_BYTE;
11
12/// Encode arbitrary octets as base64 using the [`STANDARD` engine](STANDARD).
13///
14/// See [Engine::encode].
15#[allow(unused)]
16#[deprecated(since = "0.21.0", note = "Use Engine::encode")]
17#[cfg(any(feature = "alloc", test))]
18pub fn encode<T: AsRef<[u8]>>(input: T) -> String {
19    STANDARD.encode(input)
20}
21
22///Encode arbitrary octets as base64 using the provided `Engine` into a new `String`.
23///
24/// See [Engine::encode].
25#[allow(unused)]
26#[deprecated(since = "0.21.0", note = "Use Engine::encode")]
27#[cfg(any(feature = "alloc", test))]
28pub fn encode_engine<E: Engine, T: AsRef<[u8]>>(input: T, engine: &E) -> String {
29    engine.encode(input)
30}
31
32///Encode arbitrary octets as base64 into a supplied `String`.
33///
34/// See [Engine::encode_string].
35#[allow(unused)]
36#[deprecated(since = "0.21.0", note = "Use Engine::encode_string")]
37#[cfg(any(feature = "alloc", test))]
38pub fn encode_engine_string<E: Engine, T: AsRef<[u8]>>(
39    input: T,
40    output_buf: &mut String,
41    engine: &E,
42) {
43    engine.encode_string(input, output_buf)
44}
45
46/// Encode arbitrary octets as base64 into a supplied slice.
47///
48/// See [Engine::encode_slice].
49#[allow(unused)]
50#[deprecated(since = "0.21.0", note = "Use Engine::encode_slice")]
51pub fn encode_engine_slice<E: Engine, T: AsRef<[u8]>>(
52    input: T,
53    output_buf: &mut [u8],
54    engine: &E,
55) -> Result<usize, EncodeSliceError> {
56    engine.encode_slice(input, output_buf)
57}
58
59/// B64-encode and pad (if configured).
60///
61/// This helper exists to avoid recalculating encoded_size, which is relatively expensive on short
62/// inputs.
63///
64/// `encoded_size` is the encoded size calculated for `input`.
65///
66/// `output` must be of size `encoded_size`.
67///
68/// All bytes in `output` will be written to since it is exactly the size of the output.
69pub(crate) fn encode_with_padding<E: Engine + ?Sized>(
70    input: &[u8],
71    output: &mut [u8],
72    engine: &E,
73    expected_encoded_size: usize,
74) {
75    debug_assert_eq!(expected_encoded_size, output.len());
76
77    let b64_bytes_written = engine.internal_encode(input, output);
78
79    let padding_bytes = if engine.config().encode_padding() {
80        add_padding(b64_bytes_written, &mut output[b64_bytes_written..])
81    } else {
82        0
83    };
84
85    let encoded_bytes = b64_bytes_written
86        .checked_add(padding_bytes)
87        .expect("usize overflow when calculating b64 length");
88
89    debug_assert_eq!(expected_encoded_size, encoded_bytes);
90}
91
92/// Calculate the base64 encoded length for a given input length, optionally including any
93/// appropriate padding bytes.
94///
95/// Returns `None` if the encoded length can't be represented in `usize`. This will happen for
96/// input lengths in approximately the top quarter of the range of `usize`.
97pub const fn encoded_len(bytes_len: usize, padding: bool) -> Option<usize> {
98    let rem = bytes_len % 3;
99
100    let complete_input_chunks = bytes_len / 3;
101    // `?` is disallowed in const, and `let Some(_) = _ else` requires 1.65.0, whereas this
102    // messier syntax works on 1.48
103    let complete_chunk_output =
104        if let Some(complete_chunk_output) = complete_input_chunks.checked_mul(4) {
105            complete_chunk_output
106        } else {
107            return None;
108        };
109
110    if rem > 0 {
111        if padding {
112            complete_chunk_output.checked_add(4)
113        } else {
114            let encoded_rem = match rem {
115                1 => 2,
116                // only other possible remainder is 2
117                // can't use a separate _ => unreachable!() in const fns in ancient rust versions
118                _ => 3,
119            };
120            complete_chunk_output.checked_add(encoded_rem)
121        }
122    } else {
123        Some(complete_chunk_output)
124    }
125}
126
127/// Write padding characters.
128/// `unpadded_output_len` is the size of the unpadded but base64 encoded data.
129/// `output` is the slice where padding should be written, of length at least 2.
130///
131/// Returns the number of padding bytes written.
132pub(crate) fn add_padding(unpadded_output_len: usize, output: &mut [u8]) -> usize {
133    let pad_bytes = (4 - (unpadded_output_len % 4)) % 4;
134    // for just a couple bytes, this has better performance than using
135    // .fill(), or iterating over mutable refs, which call memset()
136    #[allow(clippy::needless_range_loop)]
137    for i in 0..pad_bytes {
138        output[i] = PAD_BYTE;
139    }
140
141    pad_bytes
142}
143
144/// Errors that can occur while encoding into a slice.
145#[derive(Clone, Debug, PartialEq, Eq)]
146pub enum EncodeSliceError {
147    /// The provided slice is too small.
148    OutputSliceTooSmall,
149}
150
151impl fmt::Display for EncodeSliceError {
152    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
153        match self {
154            Self::OutputSliceTooSmall => write!(f, "Output slice too small"),
155        }
156    }
157}
158
159#[cfg(any(feature = "std", test))]
160impl error::Error for EncodeSliceError {}
161
162#[cfg(test)]
163mod tests {
164    use super::*;
165
166    use crate::{
167        alphabet,
168        engine::general_purpose::{GeneralPurpose, NO_PAD, STANDARD},
169        tests::{assert_encode_sanity, random_config, random_engine},
170    };
171    use rand::{
172        distributions::{Distribution, Uniform},
173        Rng, SeedableRng,
174    };
175    use std::str;
176
177    const URL_SAFE_NO_PAD_ENGINE: GeneralPurpose = GeneralPurpose::new(&alphabet::URL_SAFE, NO_PAD);
178
179    #[test]
180    fn encoded_size_correct_standard() {
181        assert_encoded_length(0, 0, &STANDARD, true);
182
183        assert_encoded_length(1, 4, &STANDARD, true);
184        assert_encoded_length(2, 4, &STANDARD, true);
185        assert_encoded_length(3, 4, &STANDARD, true);
186
187        assert_encoded_length(4, 8, &STANDARD, true);
188        assert_encoded_length(5, 8, &STANDARD, true);
189        assert_encoded_length(6, 8, &STANDARD, true);
190
191        assert_encoded_length(7, 12, &STANDARD, true);
192        assert_encoded_length(8, 12, &STANDARD, true);
193        assert_encoded_length(9, 12, &STANDARD, true);
194
195        assert_encoded_length(54, 72, &STANDARD, true);
196
197        assert_encoded_length(55, 76, &STANDARD, true);
198        assert_encoded_length(56, 76, &STANDARD, true);
199        assert_encoded_length(57, 76, &STANDARD, true);
200
201        assert_encoded_length(58, 80, &STANDARD, true);
202    }
203
204    #[test]
205    fn encoded_size_correct_no_pad() {
206        assert_encoded_length(0, 0, &URL_SAFE_NO_PAD_ENGINE, false);
207
208        assert_encoded_length(1, 2, &URL_SAFE_NO_PAD_ENGINE, false);
209        assert_encoded_length(2, 3, &URL_SAFE_NO_PAD_ENGINE, false);
210        assert_encoded_length(3, 4, &URL_SAFE_NO_PAD_ENGINE, false);
211
212        assert_encoded_length(4, 6, &URL_SAFE_NO_PAD_ENGINE, false);
213        assert_encoded_length(5, 7, &URL_SAFE_NO_PAD_ENGINE, false);
214        assert_encoded_length(6, 8, &URL_SAFE_NO_PAD_ENGINE, false);
215
216        assert_encoded_length(7, 10, &URL_SAFE_NO_PAD_ENGINE, false);
217        assert_encoded_length(8, 11, &URL_SAFE_NO_PAD_ENGINE, false);
218        assert_encoded_length(9, 12, &URL_SAFE_NO_PAD_ENGINE, false);
219
220        assert_encoded_length(54, 72, &URL_SAFE_NO_PAD_ENGINE, false);
221
222        assert_encoded_length(55, 74, &URL_SAFE_NO_PAD_ENGINE, false);
223        assert_encoded_length(56, 75, &URL_SAFE_NO_PAD_ENGINE, false);
224        assert_encoded_length(57, 76, &URL_SAFE_NO_PAD_ENGINE, false);
225
226        assert_encoded_length(58, 78, &URL_SAFE_NO_PAD_ENGINE, false);
227    }
228
229    #[test]
230    fn encoded_size_overflow() {
231        assert_eq!(None, encoded_len(usize::MAX, true));
232    }
233
234    #[test]
235    fn encode_engine_string_into_nonempty_buffer_doesnt_clobber_prefix() {
236        let mut orig_data = Vec::new();
237        let mut prefix = String::new();
238        let mut encoded_data_no_prefix = String::new();
239        let mut encoded_data_with_prefix = String::new();
240        let mut decoded = Vec::new();
241
242        let prefix_len_range = Uniform::new(0, 1000);
243        let input_len_range = Uniform::new(0, 1000);
244
245        let mut rng = rand::rngs::SmallRng::from_entropy();
246
247        for _ in 0..10_000 {
248            orig_data.clear();
249            prefix.clear();
250            encoded_data_no_prefix.clear();
251            encoded_data_with_prefix.clear();
252            decoded.clear();
253
254            let input_len = input_len_range.sample(&mut rng);
255
256            for _ in 0..input_len {
257                orig_data.push(rng.gen());
258            }
259
260            let prefix_len = prefix_len_range.sample(&mut rng);
261            for _ in 0..prefix_len {
262                // getting convenient random single-byte printable chars that aren't base64 is
263                // annoying
264                prefix.push('#');
265            }
266            encoded_data_with_prefix.push_str(&prefix);
267
268            let engine = random_engine(&mut rng);
269            engine.encode_string(&orig_data, &mut encoded_data_no_prefix);
270            engine.encode_string(&orig_data, &mut encoded_data_with_prefix);
271
272            assert_eq!(
273                encoded_data_no_prefix.len() + prefix_len,
274                encoded_data_with_prefix.len()
275            );
276            assert_encode_sanity(
277                &encoded_data_no_prefix,
278                engine.config().encode_padding(),
279                input_len,
280            );
281            assert_encode_sanity(
282                &encoded_data_with_prefix[prefix_len..],
283                engine.config().encode_padding(),
284                input_len,
285            );
286
287            // append plain encode onto prefix
288            prefix.push_str(&encoded_data_no_prefix);
289
290            assert_eq!(prefix, encoded_data_with_prefix);
291
292            engine
293                .decode_vec(&encoded_data_no_prefix, &mut decoded)
294                .unwrap();
295            assert_eq!(orig_data, decoded);
296        }
297    }
298
299    #[test]
300    fn encode_engine_slice_into_nonempty_buffer_doesnt_clobber_suffix() {
301        let mut orig_data = Vec::new();
302        let mut encoded_data = Vec::new();
303        let mut encoded_data_original_state = Vec::new();
304        let mut decoded = Vec::new();
305
306        let input_len_range = Uniform::new(0, 1000);
307
308        let mut rng = rand::rngs::SmallRng::from_entropy();
309
310        for _ in 0..10_000 {
311            orig_data.clear();
312            encoded_data.clear();
313            encoded_data_original_state.clear();
314            decoded.clear();
315
316            let input_len = input_len_range.sample(&mut rng);
317
318            for _ in 0..input_len {
319                orig_data.push(rng.gen());
320            }
321
322            // plenty of existing garbage in the encoded buffer
323            for _ in 0..10 * input_len {
324                encoded_data.push(rng.gen());
325            }
326
327            encoded_data_original_state.extend_from_slice(&encoded_data);
328
329            let engine = random_engine(&mut rng);
330
331            let encoded_size = encoded_len(input_len, engine.config().encode_padding()).unwrap();
332
333            assert_eq!(
334                encoded_size,
335                engine.encode_slice(&orig_data, &mut encoded_data).unwrap()
336            );
337
338            assert_encode_sanity(
339                str::from_utf8(&encoded_data[0..encoded_size]).unwrap(),
340                engine.config().encode_padding(),
341                input_len,
342            );
343
344            assert_eq!(
345                &encoded_data[encoded_size..],
346                &encoded_data_original_state[encoded_size..]
347            );
348
349            engine
350                .decode_vec(&encoded_data[0..encoded_size], &mut decoded)
351                .unwrap();
352            assert_eq!(orig_data, decoded);
353        }
354    }
355
356    #[test]
357    fn encode_to_slice_random_valid_utf8() {
358        let mut input = Vec::new();
359        let mut output = Vec::new();
360
361        let input_len_range = Uniform::new(0, 1000);
362
363        let mut rng = rand::rngs::SmallRng::from_entropy();
364
365        for _ in 0..10_000 {
366            input.clear();
367            output.clear();
368
369            let input_len = input_len_range.sample(&mut rng);
370
371            for _ in 0..input_len {
372                input.push(rng.gen());
373            }
374
375            let config = random_config(&mut rng);
376            let engine = random_engine(&mut rng);
377
378            // fill up the output buffer with garbage
379            let encoded_size = encoded_len(input_len, config.encode_padding()).unwrap();
380            for _ in 0..encoded_size {
381                output.push(rng.gen());
382            }
383
384            let orig_output_buf = output.clone();
385
386            let bytes_written = engine.internal_encode(&input, &mut output);
387
388            // make sure the part beyond bytes_written is the same garbage it was before
389            assert_eq!(orig_output_buf[bytes_written..], output[bytes_written..]);
390
391            // make sure the encoded bytes are UTF-8
392            let _ = str::from_utf8(&output[0..bytes_written]).unwrap();
393        }
394    }
395
396    #[test]
397    fn encode_with_padding_random_valid_utf8() {
398        let mut input = Vec::new();
399        let mut output = Vec::new();
400
401        let input_len_range = Uniform::new(0, 1000);
402
403        let mut rng = rand::rngs::SmallRng::from_entropy();
404
405        for _ in 0..10_000 {
406            input.clear();
407            output.clear();
408
409            let input_len = input_len_range.sample(&mut rng);
410
411            for _ in 0..input_len {
412                input.push(rng.gen());
413            }
414
415            let engine = random_engine(&mut rng);
416
417            // fill up the output buffer with garbage
418            let encoded_size = encoded_len(input_len, engine.config().encode_padding()).unwrap();
419            for _ in 0..encoded_size + 1000 {
420                output.push(rng.gen());
421            }
422
423            let orig_output_buf = output.clone();
424
425            encode_with_padding(&input, &mut output[0..encoded_size], &engine, encoded_size);
426
427            // make sure the part beyond b64 is the same garbage it was before
428            assert_eq!(orig_output_buf[encoded_size..], output[encoded_size..]);
429
430            // make sure the encoded bytes are UTF-8
431            let _ = str::from_utf8(&output[0..encoded_size]).unwrap();
432        }
433    }
434
435    #[test]
436    fn add_padding_random_valid_utf8() {
437        let mut output = Vec::new();
438
439        let mut rng = rand::rngs::SmallRng::from_entropy();
440
441        // cover our bases for length % 4
442        for unpadded_output_len in 0..20 {
443            output.clear();
444
445            // fill output with random
446            for _ in 0..100 {
447                output.push(rng.gen());
448            }
449
450            let orig_output_buf = output.clone();
451
452            let bytes_written = add_padding(unpadded_output_len, &mut output);
453
454            // make sure the part beyond bytes_written is the same garbage it was before
455            assert_eq!(orig_output_buf[bytes_written..], output[bytes_written..]);
456
457            // make sure the encoded bytes are UTF-8
458            let _ = str::from_utf8(&output[0..bytes_written]).unwrap();
459        }
460    }
461
462    fn assert_encoded_length<E: Engine>(
463        input_len: usize,
464        enc_len: usize,
465        engine: &E,
466        padded: bool,
467    ) {
468        assert_eq!(enc_len, encoded_len(input_len, padded).unwrap());
469
470        let mut bytes: Vec<u8> = Vec::new();
471        let mut rng = rand::rngs::SmallRng::from_entropy();
472
473        for _ in 0..input_len {
474            bytes.push(rng.gen());
475        }
476
477        let encoded = engine.encode(&bytes);
478        assert_encode_sanity(&encoded, padded, input_len);
479
480        assert_eq!(enc_len, encoded.len());
481    }
482
483    #[test]
484    fn encode_imap() {
485        assert_eq!(
486            &GeneralPurpose::new(&alphabet::IMAP_MUTF7, NO_PAD).encode(b"\xFB\xFF"),
487            &GeneralPurpose::new(&alphabet::STANDARD, NO_PAD)
488                .encode(b"\xFB\xFF")
489                .replace('/', ",")
490        );
491    }
492}