http/uri/
scheme.rs

1use std::convert::TryFrom;
2use std::fmt;
3use std::hash::{Hash, Hasher};
4use std::str::FromStr;
5
6use bytes::Bytes;
7
8use super::{ErrorKind, InvalidUri};
9use crate::byte_str::ByteStr;
10
11/// Represents the scheme component of a URI
12#[derive(Clone)]
13pub struct Scheme {
14    pub(super) inner: Scheme2,
15}
16
17#[derive(Clone, Debug)]
18pub(super) enum Scheme2<T = Box<ByteStr>> {
19    None,
20    Standard(Protocol),
21    Other(T),
22}
23
24#[derive(Copy, Clone, Debug)]
25pub(super) enum Protocol {
26    Http,
27    Https,
28}
29
30impl Scheme {
31    /// HTTP protocol scheme
32    pub const HTTP: Scheme = Scheme {
33        inner: Scheme2::Standard(Protocol::Http),
34    };
35
36    /// HTTP protocol over TLS.
37    pub const HTTPS: Scheme = Scheme {
38        inner: Scheme2::Standard(Protocol::Https),
39    };
40
41    pub(super) fn empty() -> Self {
42        Scheme {
43            inner: Scheme2::None,
44        }
45    }
46
47    /// Return a str representation of the scheme
48    ///
49    /// # Examples
50    ///
51    /// ```
52    /// # use http::uri::*;
53    /// let scheme: Scheme = "http".parse().unwrap();
54    /// assert_eq!(scheme.as_str(), "http");
55    /// ```
56    #[inline]
57    pub fn as_str(&self) -> &str {
58        use self::Protocol::*;
59        use self::Scheme2::*;
60
61        match self.inner {
62            Standard(Http) => "http",
63            Standard(Https) => "https",
64            Other(ref v) => &v[..],
65            None => unreachable!(),
66        }
67    }
68}
69
70impl<'a> TryFrom<&'a [u8]> for Scheme {
71    type Error = InvalidUri;
72    #[inline]
73    fn try_from(s: &'a [u8]) -> Result<Self, Self::Error> {
74        use self::Scheme2::*;
75
76        match Scheme2::parse_exact(s)? {
77            None => Err(ErrorKind::InvalidScheme.into()),
78            Standard(p) => Ok(Standard(p).into()),
79            Other(_) => {
80                let bytes = Bytes::copy_from_slice(s);
81
82                // Safety: postcondition on parse_exact() means that s and
83                // hence bytes are valid UTF-8.
84                let string = unsafe { ByteStr::from_utf8_unchecked(bytes) };
85
86                Ok(Other(Box::new(string)).into())
87            }
88        }
89    }
90}
91
92impl<'a> TryFrom<&'a str> for Scheme {
93    type Error = InvalidUri;
94    #[inline]
95    fn try_from(s: &'a str) -> Result<Self, Self::Error> {
96        TryFrom::try_from(s.as_bytes())
97    }
98}
99
100impl FromStr for Scheme {
101    type Err = InvalidUri;
102
103    fn from_str(s: &str) -> Result<Self, Self::Err> {
104        TryFrom::try_from(s)
105    }
106}
107
108impl fmt::Debug for Scheme {
109    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
110        fmt::Debug::fmt(self.as_str(), f)
111    }
112}
113
114impl fmt::Display for Scheme {
115    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
116        f.write_str(self.as_str())
117    }
118}
119
120impl AsRef<str> for Scheme {
121    #[inline]
122    fn as_ref(&self) -> &str {
123        self.as_str()
124    }
125}
126
127impl PartialEq for Scheme {
128    fn eq(&self, other: &Scheme) -> bool {
129        use self::Protocol::*;
130        use self::Scheme2::*;
131
132        match (&self.inner, &other.inner) {
133            (&Standard(Http), &Standard(Http)) => true,
134            (&Standard(Https), &Standard(Https)) => true,
135            (&Other(ref a), &Other(ref b)) => a.eq_ignore_ascii_case(b),
136            (&None, _) | (_, &None) => unreachable!(),
137            _ => false,
138        }
139    }
140}
141
142impl Eq for Scheme {}
143
144/// Case-insensitive equality
145///
146/// # Examples
147///
148/// ```
149/// # use http::uri::Scheme;
150/// let scheme: Scheme = "HTTP".parse().unwrap();
151/// assert_eq!(scheme, *"http");
152/// ```
153impl PartialEq<str> for Scheme {
154    fn eq(&self, other: &str) -> bool {
155        self.as_str().eq_ignore_ascii_case(other)
156    }
157}
158
159/// Case-insensitive equality
160impl PartialEq<Scheme> for str {
161    fn eq(&self, other: &Scheme) -> bool {
162        other == self
163    }
164}
165
166/// Case-insensitive hashing
167impl Hash for Scheme {
168    fn hash<H>(&self, state: &mut H)
169    where
170        H: Hasher,
171    {
172        match self.inner {
173            Scheme2::None => (),
174            Scheme2::Standard(Protocol::Http) => state.write_u8(1),
175            Scheme2::Standard(Protocol::Https) => state.write_u8(2),
176            Scheme2::Other(ref other) => {
177                other.len().hash(state);
178                for &b in other.as_bytes() {
179                    state.write_u8(b.to_ascii_lowercase());
180                }
181            }
182        }
183    }
184}
185
186impl<T> Scheme2<T> {
187    pub(super) fn is_none(&self) -> bool {
188        match *self {
189            Scheme2::None => true,
190            _ => false,
191        }
192    }
193}
194
195// Require the scheme to not be too long in order to enable further
196// optimizations later.
197const MAX_SCHEME_LEN: usize = 64;
198
199// scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
200//
201// SCHEME_CHARS is a table of valid characters in the scheme part of a URI.  An
202// entry in the table is 0 for invalid characters. For valid characters the
203// entry is itself (i.e.  the entry for 43 is b'+' because b'+' == 43u8). An
204// important characteristic of this table is that all entries above 127 are
205// invalid. This makes all of the valid entries a valid single-byte UTF-8 code
206// point. This means that a slice of such valid entries is valid UTF-8.
207const SCHEME_CHARS: [u8; 256] = [
208    //  0      1      2      3      4      5      6      7      8      9
209        0,     0,     0,     0,     0,     0,     0,     0,     0,     0, //   x
210        0,     0,     0,     0,     0,     0,     0,     0,     0,     0, //  1x
211        0,     0,     0,     0,     0,     0,     0,     0,     0,     0, //  2x
212        0,     0,     0,     0,     0,     0,     0,     0,     0,     0, //  3x
213        0,     0,     0,  b'+',     0,  b'-',  b'.',     0,  b'0',  b'1', //  4x
214     b'2',  b'3',  b'4',  b'5',  b'6',  b'7',  b'8',  b'9',  b':',     0, //  5x
215        0,     0,     0,     0,     0,  b'A',  b'B',  b'C',  b'D',  b'E', //  6x
216     b'F',  b'G',  b'H',  b'I',  b'J',  b'K',  b'L',  b'M',  b'N',  b'O', //  7x
217     b'P',  b'Q',  b'R',  b'S',  b'T',  b'U',  b'V',  b'W',  b'X',  b'Y', //  8x
218     b'Z',     0,     0,     0,     0,     0,     0,  b'a',  b'b',  b'c', //  9x
219     b'd',  b'e',  b'f',  b'g',  b'h',  b'i',  b'j',  b'k',  b'l',  b'm', // 10x
220     b'n',  b'o',  b'p',  b'q',  b'r',  b's',  b't',  b'u',  b'v',  b'w', // 11x
221     b'x',  b'y',  b'z',     0,     0,     0,  b'~',     0,     0,     0, // 12x
222        0,     0,     0,     0,     0,     0,     0,     0,     0,     0, // 13x
223        0,     0,     0,     0,     0,     0,     0,     0,     0,     0, // 14x
224        0,     0,     0,     0,     0,     0,     0,     0,     0,     0, // 15x
225        0,     0,     0,     0,     0,     0,     0,     0,     0,     0, // 16x
226        0,     0,     0,     0,     0,     0,     0,     0,     0,     0, // 17x
227        0,     0,     0,     0,     0,     0,     0,     0,     0,     0, // 18x
228        0,     0,     0,     0,     0,     0,     0,     0,     0,     0, // 19x
229        0,     0,     0,     0,     0,     0,     0,     0,     0,     0, // 20x
230        0,     0,     0,     0,     0,     0,     0,     0,     0,     0, // 21x
231        0,     0,     0,     0,     0,     0,     0,     0,     0,     0, // 22x
232        0,     0,     0,     0,     0,     0,     0,     0,     0,     0, // 23x
233        0,     0,     0,     0,     0,     0,     0,     0,     0,     0, // 24x
234        0,     0,     0,     0,     0,     0                              // 25x
235];
236
237impl Scheme2<usize> {
238    // Postcondition: On all Ok() returns, s is valid UTF-8
239    fn parse_exact(s: &[u8]) -> Result<Scheme2<()>, InvalidUri> {
240        match s {
241            b"http" => Ok(Protocol::Http.into()),
242            b"https" => Ok(Protocol::Https.into()),
243            _ => {
244                if s.len() > MAX_SCHEME_LEN {
245                    return Err(ErrorKind::SchemeTooLong.into());
246                }
247
248                // check that each byte in s is a SCHEME_CHARS which implies
249                // that it is a valid single byte UTF-8 code point.
250                for &b in s {
251                    match SCHEME_CHARS[b as usize] {
252                        b':' => {
253                            // Don't want :// here
254                            return Err(ErrorKind::InvalidScheme.into());
255                        }
256                        0 => {
257                            return Err(ErrorKind::InvalidScheme.into());
258                        }
259                        _ => {}
260                    }
261                }
262
263                Ok(Scheme2::Other(()))
264            }
265        }
266    }
267
268    pub(super) fn parse(s: &[u8]) -> Result<Scheme2<usize>, InvalidUri> {
269        if s.len() >= 7 {
270            // Check for HTTP
271            if s[..7].eq_ignore_ascii_case(b"http://") {
272                // Prefix will be striped
273                return Ok(Protocol::Http.into());
274            }
275        }
276
277        if s.len() >= 8 {
278            // Check for HTTPs
279            if s[..8].eq_ignore_ascii_case(b"https://") {
280                return Ok(Protocol::Https.into());
281            }
282        }
283
284        if s.len() > 3 {
285            for i in 0..s.len() {
286                let b = s[i];
287
288                match SCHEME_CHARS[b as usize] {
289                    b':' => {
290                        // Not enough data remaining
291                        if s.len() < i + 3 {
292                            break;
293                        }
294
295                        // Not a scheme
296                        if &s[i + 1..i + 3] != b"//" {
297                            break;
298                        }
299
300                        if i > MAX_SCHEME_LEN {
301                            return Err(ErrorKind::SchemeTooLong.into());
302                        }
303
304                        // Return scheme
305                        return Ok(Scheme2::Other(i));
306                    }
307                    // Invald scheme character, abort
308                    0 => break,
309                    _ => {}
310                }
311            }
312        }
313
314        Ok(Scheme2::None)
315    }
316}
317
318impl Protocol {
319    pub(super) fn len(&self) -> usize {
320        match *self {
321            Protocol::Http => 4,
322            Protocol::Https => 5,
323        }
324    }
325}
326
327impl<T> From<Protocol> for Scheme2<T> {
328    fn from(src: Protocol) -> Self {
329        Scheme2::Standard(src)
330    }
331}
332
333#[doc(hidden)]
334impl From<Scheme2> for Scheme {
335    fn from(src: Scheme2) -> Self {
336        Scheme { inner: src }
337    }
338}
339
340#[cfg(test)]
341mod test {
342    use super::*;
343
344    #[test]
345    fn scheme_eq_to_str() {
346        assert_eq!(&scheme("http"), "http");
347        assert_eq!(&scheme("https"), "https");
348        assert_eq!(&scheme("ftp"), "ftp");
349        assert_eq!(&scheme("my+funky+scheme"), "my+funky+scheme");
350    }
351
352    #[test]
353    fn invalid_scheme_is_error() {
354        Scheme::try_from("my_funky_scheme").expect_err("Unexpectly valid Scheme");
355
356        // Invalid UTF-8
357        Scheme::try_from([0xC0].as_ref()).expect_err("Unexpectly valid Scheme");
358    }
359
360    fn scheme(s: &str) -> Scheme {
361        s.parse().expect(&format!("Invalid scheme: {}", s))
362    }
363}