flate2/gz/
bufread.rs

1use std::cmp;
2use std::io;
3use std::io::prelude::*;
4use std::mem;
5
6#[cfg(feature = "tokio")]
7use futures::Poll;
8#[cfg(feature = "tokio")]
9use tokio_io::{AsyncRead, AsyncWrite};
10
11use super::{GzBuilder, GzHeader};
12use super::{FCOMMENT, FEXTRA, FHCRC, FNAME};
13use crate::crc::CrcReader;
14use crate::deflate;
15use crate::Compression;
16
17fn copy(into: &mut [u8], from: &[u8], pos: &mut usize) -> usize {
18    let min = cmp::min(into.len(), from.len() - *pos);
19    for (slot, val) in into.iter_mut().zip(from[*pos..*pos + min].iter()) {
20        *slot = *val;
21    }
22    *pos += min;
23    return min;
24}
25
26pub(crate) fn corrupt() -> io::Error {
27    io::Error::new(
28        io::ErrorKind::InvalidInput,
29        "corrupt gzip stream does not have a matching checksum",
30    )
31}
32
33fn bad_header() -> io::Error {
34    io::Error::new(io::ErrorKind::InvalidInput, "invalid gzip header")
35}
36
37fn read_le_u16<R: Read>(r: &mut R) -> io::Result<u16> {
38    let mut b = [0; 2];
39    r.read_exact(&mut b)?;
40    Ok((b[0] as u16) | ((b[1] as u16) << 8))
41}
42
43pub(crate) fn read_gz_header<R: Read>(r: &mut R) -> io::Result<GzHeader> {
44    let mut crc_reader = CrcReader::new(r);
45    let mut header = [0; 10];
46    crc_reader.read_exact(&mut header)?;
47
48    let id1 = header[0];
49    let id2 = header[1];
50    if id1 != 0x1f || id2 != 0x8b {
51        return Err(bad_header());
52    }
53    let cm = header[2];
54    if cm != 8 {
55        return Err(bad_header());
56    }
57
58    let flg = header[3];
59    let mtime = ((header[4] as u32) << 0)
60        | ((header[5] as u32) << 8)
61        | ((header[6] as u32) << 16)
62        | ((header[7] as u32) << 24);
63    let _xfl = header[8];
64    let os = header[9];
65
66    let extra = if flg & FEXTRA != 0 {
67        let xlen = read_le_u16(&mut crc_reader)?;
68        let mut extra = vec![0; xlen as usize];
69        crc_reader.read_exact(&mut extra)?;
70        Some(extra)
71    } else {
72        None
73    };
74    let filename = if flg & FNAME != 0 {
75        // wow this is slow
76        let mut b = Vec::new();
77        for byte in crc_reader.by_ref().bytes() {
78            let byte = byte?;
79            if byte == 0 {
80                break;
81            }
82            b.push(byte);
83        }
84        Some(b)
85    } else {
86        None
87    };
88    let comment = if flg & FCOMMENT != 0 {
89        // wow this is slow
90        let mut b = Vec::new();
91        for byte in crc_reader.by_ref().bytes() {
92            let byte = byte?;
93            if byte == 0 {
94                break;
95            }
96            b.push(byte);
97        }
98        Some(b)
99    } else {
100        None
101    };
102
103    if flg & FHCRC != 0 {
104        let calced_crc = crc_reader.crc().sum() as u16;
105        let stored_crc = read_le_u16(&mut crc_reader)?;
106        if calced_crc != stored_crc {
107            return Err(corrupt());
108        }
109    }
110
111    Ok(GzHeader {
112        extra: extra,
113        filename: filename,
114        comment: comment,
115        operating_system: os,
116        mtime: mtime,
117    })
118}
119
120/// A gzip streaming encoder
121///
122/// This structure exposes a [`BufRead`] interface that will read uncompressed data
123/// from the underlying reader and expose the compressed version as a [`BufRead`]
124/// interface.
125///
126/// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html
127///
128/// # Examples
129///
130/// ```
131/// use std::io::prelude::*;
132/// use std::io;
133/// use flate2::Compression;
134/// use flate2::bufread::GzEncoder;
135/// use std::fs::File;
136/// use std::io::BufReader;
137///
138/// // Opens sample file, compresses the contents and returns a Vector or error
139/// // File wrapped in a BufReader implements BufRead
140///
141/// fn open_hello_world() -> io::Result<Vec<u8>> {
142///     let f = File::open("examples/hello_world.txt")?;
143///     let b = BufReader::new(f);
144///     let mut gz = GzEncoder::new(b, Compression::fast());
145///     let mut buffer = Vec::new();
146///     gz.read_to_end(&mut buffer)?;
147///     Ok(buffer)
148/// }
149/// ```
150#[derive(Debug)]
151pub struct GzEncoder<R> {
152    inner: deflate::bufread::DeflateEncoder<CrcReader<R>>,
153    header: Vec<u8>,
154    pos: usize,
155    eof: bool,
156}
157
158pub fn gz_encoder<R: BufRead>(header: Vec<u8>, r: R, lvl: Compression) -> GzEncoder<R> {
159    let crc = CrcReader::new(r);
160    GzEncoder {
161        inner: deflate::bufread::DeflateEncoder::new(crc, lvl),
162        header: header,
163        pos: 0,
164        eof: false,
165    }
166}
167
168impl<R: BufRead> GzEncoder<R> {
169    /// Creates a new encoder which will use the given compression level.
170    ///
171    /// The encoder is not configured specially for the emitted header. For
172    /// header configuration, see the `GzBuilder` type.
173    ///
174    /// The data read from the stream `r` will be compressed and available
175    /// through the returned reader.
176    pub fn new(r: R, level: Compression) -> GzEncoder<R> {
177        GzBuilder::new().buf_read(r, level)
178    }
179
180    fn read_footer(&mut self, into: &mut [u8]) -> io::Result<usize> {
181        if self.pos == 8 {
182            return Ok(0);
183        }
184        let crc = self.inner.get_ref().crc();
185        let ref arr = [
186            (crc.sum() >> 0) as u8,
187            (crc.sum() >> 8) as u8,
188            (crc.sum() >> 16) as u8,
189            (crc.sum() >> 24) as u8,
190            (crc.amount() >> 0) as u8,
191            (crc.amount() >> 8) as u8,
192            (crc.amount() >> 16) as u8,
193            (crc.amount() >> 24) as u8,
194        ];
195        Ok(copy(into, arr, &mut self.pos))
196    }
197}
198
199impl<R> GzEncoder<R> {
200    /// Acquires a reference to the underlying reader.
201    pub fn get_ref(&self) -> &R {
202        self.inner.get_ref().get_ref()
203    }
204
205    /// Acquires a mutable reference to the underlying reader.
206    ///
207    /// Note that mutation of the reader may result in surprising results if
208    /// this encoder is continued to be used.
209    pub fn get_mut(&mut self) -> &mut R {
210        self.inner.get_mut().get_mut()
211    }
212
213    /// Returns the underlying stream, consuming this encoder
214    pub fn into_inner(self) -> R {
215        self.inner.into_inner().into_inner()
216    }
217}
218
219#[inline]
220fn finish(buf: &[u8; 8]) -> (u32, u32) {
221    let crc = ((buf[0] as u32) << 0)
222        | ((buf[1] as u32) << 8)
223        | ((buf[2] as u32) << 16)
224        | ((buf[3] as u32) << 24);
225    let amt = ((buf[4] as u32) << 0)
226        | ((buf[5] as u32) << 8)
227        | ((buf[6] as u32) << 16)
228        | ((buf[7] as u32) << 24);
229    (crc, amt)
230}
231
232impl<R: BufRead> Read for GzEncoder<R> {
233    fn read(&mut self, mut into: &mut [u8]) -> io::Result<usize> {
234        let mut amt = 0;
235        if self.eof {
236            return self.read_footer(into);
237        } else if self.pos < self.header.len() {
238            amt += copy(into, &self.header, &mut self.pos);
239            if amt == into.len() {
240                return Ok(amt);
241            }
242            let tmp = into;
243            into = &mut tmp[amt..];
244        }
245        match self.inner.read(into)? {
246            0 => {
247                self.eof = true;
248                self.pos = 0;
249                self.read_footer(into)
250            }
251            n => Ok(amt + n),
252        }
253    }
254}
255
256impl<R: BufRead + Write> Write for GzEncoder<R> {
257    fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
258        self.get_mut().write(buf)
259    }
260
261    fn flush(&mut self) -> io::Result<()> {
262        self.get_mut().flush()
263    }
264}
265
266/// A gzip streaming decoder
267///
268/// This structure exposes a [`ReadBuf`] interface that will consume compressed
269/// data from the underlying reader and emit uncompressed data.
270///
271/// [`ReadBuf`]: https://doc.rust-lang.org/std/io/trait.BufRead.html
272///
273/// # Examples
274///
275/// ```
276/// use std::io::prelude::*;
277/// use std::io;
278/// # use flate2::Compression;
279/// # use flate2::write::GzEncoder;
280/// use flate2::bufread::GzDecoder;
281///
282/// # fn main() {
283/// #   let mut e = GzEncoder::new(Vec::new(), Compression::default());
284/// #   e.write_all(b"Hello World").unwrap();
285/// #   let bytes = e.finish().unwrap();
286/// #   println!("{}", decode_reader(bytes).unwrap());
287/// # }
288/// #
289/// // Uncompresses a Gz Encoded vector of bytes and returns a string or error
290/// // Here &[u8] implements BufRead
291///
292/// fn decode_reader(bytes: Vec<u8>) -> io::Result<String> {
293///    let mut gz = GzDecoder::new(&bytes[..]);
294///    let mut s = String::new();
295///    gz.read_to_string(&mut s)?;
296///    Ok(s)
297/// }
298/// ```
299#[derive(Debug)]
300pub struct GzDecoder<R> {
301    inner: GzState,
302    header: Option<GzHeader>,
303    reader: CrcReader<deflate::bufread::DeflateDecoder<R>>,
304    multi: bool,
305}
306
307#[derive(Debug)]
308enum GzState {
309    Header(Vec<u8>),
310    Body,
311    Finished(usize, [u8; 8]),
312    Err(io::Error),
313    End,
314}
315
316/// A small adapter which reads data originally from `buf` and then reads all
317/// further data from `reader`. This will also buffer all data read from
318/// `reader` into `buf` for reuse on a further call.
319struct Buffer<'a, T: 'a> {
320    buf: &'a mut Vec<u8>,
321    buf_cur: usize,
322    buf_max: usize,
323    reader: &'a mut T,
324}
325
326impl<'a, T> Buffer<'a, T> {
327    fn new(buf: &'a mut Vec<u8>, reader: &'a mut T) -> Buffer<'a, T> {
328        Buffer {
329            reader,
330            buf_cur: 0,
331            buf_max: buf.len(),
332            buf,
333        }
334    }
335}
336
337impl<'a, T: Read> Read for Buffer<'a, T> {
338    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
339        if self.buf_cur == self.buf_max {
340            let len = self.reader.read(buf)?;
341            self.buf.extend_from_slice(&buf[..len]);
342            Ok(len)
343        } else {
344            let len = (&self.buf[self.buf_cur..self.buf_max]).read(buf)?;
345            self.buf_cur += len;
346            Ok(len)
347        }
348    }
349}
350
351impl<R: BufRead> GzDecoder<R> {
352    /// Creates a new decoder from the given reader, immediately parsing the
353    /// gzip header.
354    pub fn new(mut r: R) -> GzDecoder<R> {
355        let mut buf = Vec::with_capacity(10); // minimum header length
356        let mut header = None;
357
358        let result = {
359            let mut reader = Buffer::new(&mut buf, &mut r);
360            read_gz_header(&mut reader)
361        };
362
363        let state = match result {
364            Ok(hdr) => {
365                header = Some(hdr);
366                GzState::Body
367            }
368            Err(ref err) if io::ErrorKind::WouldBlock == err.kind() => GzState::Header(buf),
369            Err(err) => GzState::Err(err),
370        };
371
372        GzDecoder {
373            inner: state,
374            reader: CrcReader::new(deflate::bufread::DeflateDecoder::new(r)),
375            multi: false,
376            header,
377        }
378    }
379
380    fn multi(mut self, flag: bool) -> GzDecoder<R> {
381        self.multi = flag;
382        self
383    }
384}
385
386impl<R> GzDecoder<R> {
387    /// Returns the header associated with this stream, if it was valid
388    pub fn header(&self) -> Option<&GzHeader> {
389        self.header.as_ref()
390    }
391
392    /// Acquires a reference to the underlying reader.
393    pub fn get_ref(&self) -> &R {
394        self.reader.get_ref().get_ref()
395    }
396
397    /// Acquires a mutable reference to the underlying stream.
398    ///
399    /// Note that mutation of the stream may result in surprising results if
400    /// this encoder is continued to be used.
401    pub fn get_mut(&mut self) -> &mut R {
402        self.reader.get_mut().get_mut()
403    }
404
405    /// Consumes this decoder, returning the underlying reader.
406    pub fn into_inner(self) -> R {
407        self.reader.into_inner().into_inner()
408    }
409}
410
411impl<R: BufRead> Read for GzDecoder<R> {
412    fn read(&mut self, into: &mut [u8]) -> io::Result<usize> {
413        let GzDecoder {
414            inner,
415            header,
416            reader,
417            multi,
418        } = self;
419
420        loop {
421            *inner = match mem::replace(inner, GzState::End) {
422                GzState::Header(mut buf) => {
423                    let result = {
424                        let mut reader = Buffer::new(&mut buf, reader.get_mut().get_mut());
425                        read_gz_header(&mut reader)
426                    };
427                    let hdr = result.map_err(|err| {
428                        if io::ErrorKind::WouldBlock == err.kind() {
429                            *inner = GzState::Header(buf);
430                        }
431
432                        err
433                    })?;
434                    *header = Some(hdr);
435                    GzState::Body
436                }
437                GzState::Body => {
438                    if into.is_empty() {
439                        *inner = GzState::Body;
440                        return Ok(0);
441                    }
442
443                    let n = reader.read(into).map_err(|err| {
444                        if io::ErrorKind::WouldBlock == err.kind() {
445                            *inner = GzState::Body;
446                        }
447
448                        err
449                    })?;
450
451                    match n {
452                        0 => GzState::Finished(0, [0; 8]),
453                        n => {
454                            *inner = GzState::Body;
455                            return Ok(n);
456                        }
457                    }
458                }
459                GzState::Finished(pos, mut buf) => {
460                    if pos < buf.len() {
461                        let n = reader
462                            .get_mut()
463                            .get_mut()
464                            .read(&mut buf[pos..])
465                            .and_then(|n| {
466                                if n == 0 {
467                                    Err(io::ErrorKind::UnexpectedEof.into())
468                                } else {
469                                    Ok(n)
470                                }
471                            })
472                            .map_err(|err| {
473                                if io::ErrorKind::WouldBlock == err.kind() {
474                                    *inner = GzState::Finished(pos, buf);
475                                }
476
477                                err
478                            })?;
479
480                        GzState::Finished(pos + n, buf)
481                    } else {
482                        let (crc, amt) = finish(&buf);
483
484                        if crc != reader.crc().sum() || amt != reader.crc().amount() {
485                            return Err(corrupt());
486                        } else if *multi {
487                            let is_eof = reader
488                                .get_mut()
489                                .get_mut()
490                                .fill_buf()
491                                .map(|buf| buf.is_empty())
492                                .map_err(|err| {
493                                    if io::ErrorKind::WouldBlock == err.kind() {
494                                        *inner = GzState::Finished(pos, buf);
495                                    }
496
497                                    err
498                                })?;
499
500                            if is_eof {
501                                GzState::End
502                            } else {
503                                reader.reset();
504                                reader.get_mut().reset_data();
505                                header.take();
506                                GzState::Header(Vec::with_capacity(10))
507                            }
508                        } else {
509                            GzState::End
510                        }
511                    }
512                }
513                GzState::Err(err) => return Err(err),
514                GzState::End => return Ok(0),
515            };
516        }
517    }
518}
519
520#[cfg(feature = "tokio")]
521impl<R: AsyncRead + BufRead> AsyncRead for GzDecoder<R> {}
522
523impl<R: BufRead + Write> Write for GzDecoder<R> {
524    fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
525        self.get_mut().write(buf)
526    }
527
528    fn flush(&mut self) -> io::Result<()> {
529        self.get_mut().flush()
530    }
531}
532
533#[cfg(feature = "tokio")]
534impl<R: AsyncWrite + BufRead> AsyncWrite for GzDecoder<R> {
535    fn shutdown(&mut self) -> Poll<(), io::Error> {
536        self.get_mut().shutdown()
537    }
538}
539
540/// A gzip streaming decoder that decodes all members of a multistream
541///
542/// A gzip member consists of a header, compressed data and a trailer. The [gzip
543/// specification](https://tools.ietf.org/html/rfc1952), however, allows multiple
544/// gzip members to be joined in a single stream. `MultiGzDecoder` will
545/// decode all consecutive members while `GzDecoder` will only decompress
546/// the first gzip member. The multistream format is commonly used in
547/// bioinformatics, for example when using the BGZF compressed data.
548///
549/// This structure exposes a [`BufRead`] interface that will consume all gzip members
550/// from the underlying reader and emit uncompressed data.
551///
552/// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html
553///
554/// # Examples
555///
556/// ```
557/// use std::io::prelude::*;
558/// use std::io;
559/// # use flate2::Compression;
560/// # use flate2::write::GzEncoder;
561/// use flate2::bufread::MultiGzDecoder;
562///
563/// # fn main() {
564/// #   let mut e = GzEncoder::new(Vec::new(), Compression::default());
565/// #   e.write_all(b"Hello World").unwrap();
566/// #   let bytes = e.finish().unwrap();
567/// #   println!("{}", decode_reader(bytes).unwrap());
568/// # }
569/// #
570/// // Uncompresses a Gz Encoded vector of bytes and returns a string or error
571/// // Here &[u8] implements BufRead
572///
573/// fn decode_reader(bytes: Vec<u8>) -> io::Result<String> {
574///    let mut gz = MultiGzDecoder::new(&bytes[..]);
575///    let mut s = String::new();
576///    gz.read_to_string(&mut s)?;
577///    Ok(s)
578/// }
579/// ```
580#[derive(Debug)]
581pub struct MultiGzDecoder<R>(GzDecoder<R>);
582
583impl<R: BufRead> MultiGzDecoder<R> {
584    /// Creates a new decoder from the given reader, immediately parsing the
585    /// (first) gzip header. If the gzip stream contains multiple members all will
586    /// be decoded.
587    pub fn new(r: R) -> MultiGzDecoder<R> {
588        MultiGzDecoder(GzDecoder::new(r).multi(true))
589    }
590}
591
592impl<R> MultiGzDecoder<R> {
593    /// Returns the current header associated with this stream, if it's valid
594    pub fn header(&self) -> Option<&GzHeader> {
595        self.0.header()
596    }
597
598    /// Acquires a reference to the underlying reader.
599    pub fn get_ref(&self) -> &R {
600        self.0.get_ref()
601    }
602
603    /// Acquires a mutable reference to the underlying stream.
604    ///
605    /// Note that mutation of the stream may result in surprising results if
606    /// this encoder is continued to be used.
607    pub fn get_mut(&mut self) -> &mut R {
608        self.0.get_mut()
609    }
610
611    /// Consumes this decoder, returning the underlying reader.
612    pub fn into_inner(self) -> R {
613        self.0.into_inner()
614    }
615}
616
617impl<R: BufRead> Read for MultiGzDecoder<R> {
618    fn read(&mut self, into: &mut [u8]) -> io::Result<usize> {
619        self.0.read(into)
620    }
621}
622
623#[cfg(feature = "tokio")]
624impl<R: AsyncRead + BufRead> AsyncRead for MultiGzDecoder<R> {}
625
626impl<R: BufRead + Write> Write for MultiGzDecoder<R> {
627    fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
628        self.get_mut().write(buf)
629    }
630
631    fn flush(&mut self) -> io::Result<()> {
632        self.get_mut().flush()
633    }
634}
635
636#[cfg(feature = "tokio")]
637impl<R: AsyncWrite + BufRead> AsyncWrite for MultiGzDecoder<R> {
638    fn shutdown(&mut self) -> Poll<(), io::Error> {
639        self.get_mut().shutdown()
640    }
641}