//! Buffered Base64 decoder.

use crate::{
    encoding,
    line_ending::{CHAR_CR, CHAR_LF},
    Encoding,
    Error::{self, InvalidLength},
    MIN_LINE_WIDTH,
};
use core::{cmp, marker::PhantomData};

#[cfg(feature = "alloc")]
use {alloc::vec::Vec, core::iter};

#[cfg(feature = "std")]
use std::io;

#[cfg(doc)]
use crate::{Base64, Base64Unpadded};

/// Stateful Base64 decoder with support for buffered, incremental decoding.
///
/// The `E` type parameter can be any type which impls [`Encoding`] such as
/// [`Base64`] or [`Base64Unpadded`].
#[derive(Clone)]
pub struct Decoder<'i, E: Encoding> {
    /// Current line being processed.
    line: Line<'i>,

    /// Base64 input data reader.
    line_reader: LineReader<'i>,

    /// Length of the remaining data after Base64 decoding.
    remaining_len: usize,

    /// Block buffer used for non-block-aligned data.
    block_buffer: BlockBuffer,

    /// Phantom parameter for the Base64 encoding in use.
    encoding: PhantomData<E>,
}

impl<'i, E: Encoding> Decoder<'i, E> {
    /// Create a new decoder for a byte slice containing contiguous
    /// (non-newline-delimited) Base64-encoded data.
    ///
    /// # Returns
    /// - `Ok(decoder)` on success.
    /// - `Err(Error::InvalidLength)` if the input buffer is empty.
    pub fn new(input: &'i [u8]) -> Result<Self, Error> {
        let line_reader = LineReader::new_unwrapped(input)?;
        let remaining_len = line_reader.decoded_len::<E>()?;

        Ok(Self {
            line: Line::default(),
            line_reader,
            remaining_len,
            block_buffer: BlockBuffer::default(),
            encoding: PhantomData,
        })
    }

    /// Create a new decoder for a byte slice containing Base64 which
    /// line wraps at the given line length.
    ///
    /// Trailing newlines are not supported and must be removed in advance.
    ///
    /// Newlines are handled according to what are roughly [RFC7468] conventions:
    ///
    /// ```text
    /// [parsers] MUST handle different newline conventions
    /// ```
    ///
    /// RFC7468 allows any of the following as newlines, and allows a mixture
    /// of different types of newlines:
    ///
    /// ```text
    /// eol        = CRLF / CR / LF
    /// ```
    ///
    /// # Returns
    /// - `Ok(decoder)` on success.
    /// - `Err(Error::InvalidLength)` if the input buffer is empty or the line
    ///   width is zero.
    ///
    /// [RFC7468]: https://datatracker.ietf.org/doc/html/rfc7468
    pub fn new_wrapped(input: &'i [u8], line_width: usize) -> Result<Self, Error> {
        let line_reader = LineReader::new_wrapped(input, line_width)?;
        let remaining_len = line_reader.decoded_len::<E>()?;

        Ok(Self {
            line: Line::default(),
            line_reader,
            remaining_len,
            block_buffer: BlockBuffer::default(),
            encoding: PhantomData,
        })
    }

    /// Fill the provided buffer with data decoded from Base64.
    ///
    /// Enough Base64 input data must remain to fill the entire buffer.
    ///
    /// # Returns
    /// - `Ok(bytes)` if the expected amount of data was read
    /// - `Err(Error::InvalidLength)` if the exact amount of data couldn't be read
    pub fn decode<'o>(&mut self, out: &'o mut [u8]) -> Result<&'o [u8], Error> {
        if self.is_finished() {
            return Err(InvalidLength);
        }

        let mut out_pos = 0;

        while out_pos < out.len() {
            // If there's data in the block buffer, use it
            if !self.block_buffer.is_empty() {
                let out_rem = out.len().checked_sub(out_pos).ok_or(InvalidLength)?;
                let bytes = self.block_buffer.take(out_rem)?;
                out[out_pos..][..bytes.len()].copy_from_slice(bytes);
                out_pos = out_pos.checked_add(bytes.len()).ok_or(InvalidLength)?;
            }

            // Advance the line reader if necessary
            if self.line.is_empty() && !self.line_reader.is_empty() {
                self.advance_line()?;
            }

            // Attempt to decode a stride of block-aligned data
            let in_blocks = self.line.len() / 4;
            let out_rem = out.len().checked_sub(out_pos).ok_or(InvalidLength)?;
            let out_blocks = out_rem / 3;
            let blocks = cmp::min(in_blocks, out_blocks);
            let in_aligned = self.line.take(blocks.checked_mul(4).ok_or(InvalidLength)?);

            if !in_aligned.is_empty() {
                let out_buf = &mut out[out_pos..][..blocks.checked_mul(3).ok_or(InvalidLength)?];
                let decoded_len = self.perform_decode(in_aligned, out_buf)?.len();
                out_pos = out_pos.checked_add(decoded_len).ok_or(InvalidLength)?;
            }

            if out_pos < out.len() {
                if self.is_finished() {
                    // If we're out of input then we've been requested to decode
                    // more data than is actually available.
                    return Err(InvalidLength);
                } else {
                    // If we still have data available but haven't completely
                    // filled the output slice, we're in a situation where
                    // either the input or output isn't block-aligned, so fill
                    // the internal block buffer.
                    self.fill_block_buffer()?;
                }
            }
        }

        self.remaining_len = self
            .remaining_len
            .checked_sub(out.len())
            .ok_or(InvalidLength)?;

        Ok(out)
    }

    /// Decode all remaining Base64 data, placing the result into `buf`.
    ///
    /// If successful, this function will return the total number of bytes
    /// decoded into `buf`.
    #[cfg(feature = "alloc")]
    pub fn decode_to_end<'o>(&mut self, buf: &'o mut Vec<u8>) -> Result<&'o [u8], Error> {
        let start_len = buf.len();
        let remaining_len = self.remaining_len();
        let total_len = start_len.checked_add(remaining_len).ok_or(InvalidLength)?;

        if total_len > buf.capacity() {
            buf.reserve(total_len.checked_sub(buf.capacity()).ok_or(InvalidLength)?);
        }

        // Append `decoded_len` zeroes to the vector
        buf.extend(iter::repeat(0).take(remaining_len));
        self.decode(&mut buf[start_len..])?;
        Ok(&buf[start_len..])
    }

    /// Get the length of the remaining data after Base64 decoding.
    ///
    /// Decreases every time data is decoded.
    pub fn remaining_len(&self) -> usize {
        self.remaining_len
    }

    /// Has all of the input data been decoded?
    pub fn is_finished(&self) -> bool {
        self.line.is_empty() && self.line_reader.is_empty() && self.block_buffer.is_empty()
    }

    /// Fill the block buffer with data.
    fn fill_block_buffer(&mut self) -> Result<(), Error> {
        let mut buf = [0u8; BlockBuffer::SIZE];

        let decoded = if self.line.len() < 4 && !self.line_reader.is_empty() {
            // Handle input block which is split across lines
            let mut tmp = [0u8; 4];

            // Copy remaining data in the line into tmp
            let line_end = self.line.take(4);
            tmp[..line_end.len()].copy_from_slice(line_end);

            // Advance the line and attempt to fill tmp
            self.advance_line()?;
            let len = 4usize.checked_sub(line_end.len()).ok_or(InvalidLength)?;
            let line_begin = self.line.take(len);
            tmp[line_end.len()..][..line_begin.len()].copy_from_slice(line_begin);

            let tmp_len = line_begin
                .len()
                .checked_add(line_end.len())
                .ok_or(InvalidLength)?;

            self.perform_decode(&tmp[..tmp_len], &mut buf)
        } else {
            let block = self.line.take(4);
            self.perform_decode(block, &mut buf)
        }?;

        self.block_buffer.fill(decoded)
    }

    /// Advance the internal buffer to the next line.
    fn advance_line(&mut self) -> Result<(), Error> {
        debug_assert!(self.line.is_empty(), "expected line buffer to be empty");

        if let Some(line) = self.line_reader.next().transpose()? {
            self.line = line;
            Ok(())
        } else {
            Err(InvalidLength)
        }
    }

    /// Perform Base64 decoding operation.
    fn perform_decode<'o>(&self, src: &[u8], dst: &'o mut [u8]) -> Result<&'o [u8], Error> {
        if self.is_finished() {
            E::decode(src, dst)
        } else {
            E::Unpadded::decode(src, dst)
        }
    }
}

#[cfg(feature = "std")]
impl<'i, E: Encoding> io::Read for Decoder<'i, E> {
    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
        if self.is_finished() {
            return Ok(0);
        }
        let slice = match buf.get_mut(..self.remaining_len()) {
            Some(bytes) => bytes,
            None => buf,
        };

        self.decode(slice)?;
        Ok(slice.len())
    }

    fn read_to_end(&mut self, buf: &mut Vec<u8>) -> io::Result<usize> {
        if self.is_finished() {
            return Ok(0);
        }
        Ok(self.decode_to_end(buf)?.len())
    }

    fn read_exact(&mut self, buf: &mut [u8]) -> io::Result<()> {
        self.decode(buf)?;
        Ok(())
    }
}

/// Base64 decode buffer for a 1-block input.
///
/// This handles a partially decoded block of data, i.e. data which has been
/// decoded but not read.
#[derive(Clone, Default, Debug)]
struct BlockBuffer {
    /// 3 decoded bytes from a 4-byte Base64-encoded input.
    decoded: [u8; Self::SIZE],

    /// Length of the buffer.
    length: usize,

    /// Position within the buffer.
    position: usize,
}

impl BlockBuffer {
    /// Size of the buffer in bytes.
    const SIZE: usize = 3;

    /// Fill the buffer by decoding up to 3 bytes of decoded Base64 input.
    fn fill(&mut self, decoded_input: &[u8]) -> Result<(), Error> {
        debug_assert!(self.is_empty());

        if decoded_input.len() > Self::SIZE {
            return Err(InvalidLength);
        }

        self.position = 0;
        self.length = decoded_input.len();
        self.decoded[..decoded_input.len()].copy_from_slice(decoded_input);
        Ok(())
    }

    /// Take a specified number of bytes from the buffer.
    ///
    /// Returns as many bytes as possible, or an empty slice if the buffer has
    /// already been read to completion.
    fn take(&mut self, mut nbytes: usize) -> Result<&[u8], Error> {
        debug_assert!(self.position <= self.length);
        let start_pos = self.position;
        let remaining_len = self.length.checked_sub(start_pos).ok_or(InvalidLength)?;

        if nbytes > remaining_len {
            nbytes = remaining_len;
        }

        self.position = self.position.checked_add(nbytes).ok_or(InvalidLength)?;
        Ok(&self.decoded[start_pos..][..nbytes])
    }

    /// Have all of the bytes in this buffer been consumed?
    fn is_empty(&self) -> bool {
        self.position == self.length
    }
}

/// A single line of linewrapped data, providing a read buffer.
#[derive(Clone, Debug)]
pub struct Line<'i> {
    /// Remaining data in the line
    remaining: &'i [u8],
}

impl<'i> Default for Line<'i> {
    fn default() -> Self {
        Self::new(&[])
    }
}

impl<'i> Line<'i> {
    /// Create a new line which wraps the given input data.
    fn new(bytes: &'i [u8]) -> Self {
        Self { remaining: bytes }
    }

    /// Take up to `nbytes` from this line buffer.
    fn take(&mut self, nbytes: usize) -> &'i [u8] {
        let (bytes, rest) = if nbytes < self.remaining.len() {
            self.remaining.split_at(nbytes)
        } else {
            (self.remaining, [].as_ref())
        };

        self.remaining = rest;
        bytes
    }

    /// Slice off a tail of a given length.
    fn slice_tail(&self, nbytes: usize) -> Result<&'i [u8], Error> {
        let offset = self.len().checked_sub(nbytes).ok_or(InvalidLength)?;
        self.remaining.get(offset..).ok_or(InvalidLength)
    }

    /// Get the number of bytes remaining in this line.
    fn len(&self) -> usize {
        self.remaining.len()
    }

    /// Is the buffer for this line empty?
    fn is_empty(&self) -> bool {
        self.len() == 0
    }

    /// Trim the newline off the end of this line.
    fn trim_end(&self) -> Self {
        Line::new(match self.remaining {
            [line @ .., CHAR_CR, CHAR_LF] => line,
            [line @ .., CHAR_CR] => line,
            [line @ .., CHAR_LF] => line,
            line => line,
        })
    }
}

/// Iterator over multi-line Base64 input.
#[derive(Clone)]
struct LineReader<'i> {
    /// Remaining linewrapped data to be processed.
    remaining: &'i [u8],

    /// Line width.
    line_width: Option<usize>,
}

impl<'i> LineReader<'i> {
    /// Create a new reader which operates over continugous unwrapped data.
    fn new_unwrapped(bytes: &'i [u8]) -> Result<Self, Error> {
        if bytes.is_empty() {
            Err(InvalidLength)
        } else {
            Ok(Self {
                remaining: bytes,
                line_width: None,
            })
        }
    }

    /// Create a new reader which operates over linewrapped data.
    fn new_wrapped(bytes: &'i [u8], line_width: usize) -> Result<Self, Error> {
        if line_width < MIN_LINE_WIDTH {
            return Err(InvalidLength);
        }

        let mut reader = Self::new_unwrapped(bytes)?;
        reader.line_width = Some(line_width);
        Ok(reader)
    }

    /// Is this line reader empty?
    fn is_empty(&self) -> bool {
        self.remaining.is_empty()
    }

    /// Get the total length of the data decoded from this line reader.
    fn decoded_len<E: Encoding>(&self) -> Result<usize, Error> {
        let mut buffer = [0u8; 4];
        let mut lines = self.clone();
        let mut line = match lines.next().transpose()? {
            Some(l) => l,
            None => return Ok(0),
        };
        let mut base64_len = 0usize;

        loop {
            base64_len = base64_len.checked_add(line.len()).ok_or(InvalidLength)?;

            match lines.next().transpose()? {
                Some(l) => {
                    // Store the end of the line in the buffer so we can
                    // reassemble the last block to determine the real length
                    buffer.copy_from_slice(line.slice_tail(4)?);

                    line = l
                }

                // To compute an exact decoded length we need to decode the
                // last Base64 block and get the decoded length.
                //
                // This is what the somewhat complex code below is doing.
                None => {
                    // Compute number of bytes in the last block (may be unpadded)
                    let base64_last_block_len = match base64_len % 4 {
                        0 => 4,
                        n => n,
                    };

                    // Compute decoded length without the last block
                    let decoded_len = encoding::decoded_len(
                        base64_len
                            .checked_sub(base64_last_block_len)
                            .ok_or(InvalidLength)?,
                    );

                    // Compute the decoded length of the last block
                    let mut out = [0u8; 3];
                    let last_block_len = if line.len() < base64_last_block_len {
                        let buffered_part_len = base64_last_block_len
                            .checked_sub(line.len())
                            .ok_or(InvalidLength)?;

                        let offset = 4usize.checked_sub(buffered_part_len).ok_or(InvalidLength)?;

                        for i in 0..buffered_part_len {
                            buffer[i] = buffer[offset.checked_add(i).ok_or(InvalidLength)?];
                        }

                        buffer[buffered_part_len..][..line.len()].copy_from_slice(line.remaining);
                        let buffer_len = buffered_part_len
                            .checked_add(line.len())
                            .ok_or(InvalidLength)?;

                        E::decode(&buffer[..buffer_len], &mut out)?.len()
                    } else {
                        let last_block = line.slice_tail(base64_last_block_len)?;
                        E::decode(last_block, &mut out)?.len()
                    };

                    return decoded_len.checked_add(last_block_len).ok_or(InvalidLength);
                }
            }
        }
    }
}

impl<'i> Iterator for LineReader<'i> {
    type Item = Result<Line<'i>, Error>;

    fn next(&mut self) -> Option<Result<Line<'i>, Error>> {
        if let Some(line_width) = self.line_width {
            let rest = match self.remaining.get(line_width..) {
                None | Some([]) => {
                    if self.remaining.is_empty() {
                        return None;
                    } else {
                        let line = Line::new(self.remaining).trim_end();
                        self.remaining = &[];
                        return Some(Ok(line));
                    }
                }
                Some([CHAR_CR, CHAR_LF, rest @ ..]) => rest,
                Some([CHAR_CR, rest @ ..]) => rest,
                Some([CHAR_LF, rest @ ..]) => rest,
                _ => {
                    // Expected a leading newline
                    return Some(Err(Error::InvalidEncoding));
                }
            };

            let line = Line::new(&self.remaining[..line_width]);
            self.remaining = rest;
            Some(Ok(line))
        } else if !self.remaining.is_empty() {
            let line = Line::new(self.remaining).trim_end();
            self.remaining = b"";

            if line.is_empty() {
                None
            } else {
                Some(Ok(line))
            }
        } else {
            None
        }
    }
}

#[cfg(test)]
mod tests {
    use crate::{alphabet::Alphabet, test_vectors::*, Base64, Base64Unpadded, Decoder};

    #[cfg(feature = "std")]
    use {alloc::vec::Vec, std::io::Read};

    #[test]
    fn decode_padded() {
        decode_test(PADDED_BIN, || {
            Decoder::<Base64>::new(PADDED_BASE64.as_bytes()).unwrap()
        })
    }

    #[test]
    fn decode_unpadded() {
        decode_test(UNPADDED_BIN, || {
            Decoder::<Base64Unpadded>::new(UNPADDED_BASE64.as_bytes()).unwrap()
        })
    }

    #[test]
    fn decode_multiline_padded() {
        decode_test(MULTILINE_PADDED_BIN, || {
            Decoder::<Base64>::new_wrapped(MULTILINE_PADDED_BASE64.as_bytes(), 70).unwrap()
        })
    }

    #[test]
    fn decode_multiline_unpadded() {
        decode_test(MULTILINE_UNPADDED_BIN, || {
            Decoder::<Base64Unpadded>::new_wrapped(MULTILINE_UNPADDED_BASE64.as_bytes(), 70)
                .unwrap()
        })
    }

    #[cfg(feature = "std")]
    #[test]
    fn read_multiline_padded() {
        let mut decoder =
            Decoder::<Base64>::new_wrapped(MULTILINE_PADDED_BASE64.as_bytes(), 70).unwrap();

        let mut buf = Vec::new();
        let len = decoder.read_to_end(&mut buf).unwrap();

        assert_eq!(len, MULTILINE_PADDED_BIN.len());
        assert_eq!(buf.as_slice(), MULTILINE_PADDED_BIN);
    }

    /// Core functionality of a decoding test
    fn decode_test<'a, F, V>(expected: &[u8], f: F)
    where
        F: Fn() -> Decoder<'a, V>,
        V: Alphabet,
    {
        for chunk_size in 1..expected.len() {
            let mut decoder = f();
            let mut remaining_len = decoder.remaining_len();
            let mut buffer = [0u8; 1024];

            for chunk in expected.chunks(chunk_size) {
                assert!(!decoder.is_finished());
                let decoded = decoder.decode(&mut buffer[..chunk.len()]).unwrap();
                assert_eq!(chunk, decoded);

                remaining_len -= decoded.len();
                assert_eq!(remaining_len, decoder.remaining_len());
            }

            assert!(decoder.is_finished());
            assert_eq!(decoder.remaining_len(), 0);
        }
    }
}