xml/
util.rs

1use std::io::{self, Read};
2use std::str;
3use std::fmt;
4
5#[derive(Debug)]
6pub enum CharReadError {
7    UnexpectedEof,
8    Utf8(str::Utf8Error),
9    Io(io::Error)
10}
11
12impl From<str::Utf8Error> for CharReadError {
13    fn from(e: str::Utf8Error) -> CharReadError {
14        CharReadError::Utf8(e)
15    }
16}
17
18impl From<io::Error> for CharReadError {
19    fn from(e: io::Error) -> CharReadError {
20        CharReadError::Io(e)
21    }
22}
23
24impl fmt::Display for CharReadError {
25    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
26        use self::CharReadError::*;
27        match *self {
28            UnexpectedEof => write!(f, "unexpected end of stream"),
29            Utf8(ref e) => write!(f, "UTF-8 decoding error: {}", e),
30            Io(ref e) => write!(f, "I/O error: {}", e)
31        }
32    }
33}
34
35pub fn next_char_from<R: Read>(source: &mut R) -> Result<Option<char>, CharReadError> {
36    const MAX_CODEPOINT_LEN: usize = 4;
37
38    let mut bytes = source.bytes();
39    let mut buf = [0u8; MAX_CODEPOINT_LEN];
40    let mut pos = 0;
41
42    loop {
43        let next = match bytes.next() {
44            Some(Ok(b)) => b,
45            Some(Err(e)) => return Err(e.into()),
46            None if pos == 0 => return Ok(None),
47            None => return Err(CharReadError::UnexpectedEof)
48        };
49        buf[pos] = next;
50        pos += 1;
51
52        match str::from_utf8(&buf[..pos]) {
53            Ok(s) => return Ok(s.chars().next()),  // always Some(..)
54            Err(_) if pos < MAX_CODEPOINT_LEN => {},
55            Err(e) => return Err(e.into())
56        }
57    }
58}
59
60#[cfg(test)]
61mod tests {
62    #[test]
63    fn test_next_char_from() {
64        use std::io;
65        use std::error::Error;
66
67        let mut bytes: &[u8] = "correct".as_bytes();    // correct ASCII
68        assert_eq!(super::next_char_from(&mut bytes).unwrap(), Some('c'));
69
70        let mut bytes: &[u8] = "правильно".as_bytes();  // correct BMP
71        assert_eq!(super::next_char_from(&mut bytes).unwrap(), Some('п'));
72
73        let mut bytes: &[u8] = "😊".as_bytes();          // correct non-BMP
74        assert_eq!(super::next_char_from(&mut bytes).unwrap(), Some('😊'));
75
76        let mut bytes: &[u8] = b"";                     // empty
77        assert_eq!(super::next_char_from(&mut bytes).unwrap(), None);
78
79        let mut bytes: &[u8] = b"\xf0\x9f\x98";         // incomplete code point
80        match super::next_char_from(&mut bytes).unwrap_err() {
81            super::CharReadError::UnexpectedEof => {},
82            e => panic!("Unexpected result: {:?}", e)
83        };
84
85        let mut bytes: &[u8] = b"\xff\x9f\x98\x32";     // invalid code point
86        match super::next_char_from(&mut bytes).unwrap_err() {
87            super::CharReadError::Utf8(_) => {},
88            e => panic!("Unexpected result: {:?}", e)
89        };
90
91
92        // error during read
93        struct ErrorReader;
94        impl io::Read for ErrorReader {
95            fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
96                Err(io::Error::new(io::ErrorKind::Other, "test error"))
97            }
98        }
99
100        let mut r = ErrorReader;
101        match super::next_char_from(&mut r).unwrap_err() {
102            super::CharReadError::Io(ref e) if e.kind() == io::ErrorKind::Other &&
103                                               e.description() == "test error" => {},
104            e => panic!("Unexpected result: {:?}", e)
105        }
106    }
107}