encode_unicode/
errors.rs

1/* Copyright 2016 The encode_unicode Developers
2 *
3 * Licensed under the Apache License, Version 2.0, <LICENSE-APACHE or
4 * http://apache.org/licenses/LICENSE-2.0> or the MIT license <LICENSE-MIT or
5 * http://opensource.org/licenses/MIT>, at your option. This file may not be
6 * copied, modified, or distributed except according to those terms.
7 */
8
9
10//! Boilerplatey error types
11
12extern crate core;
13use self::core::fmt::{self,Display,Formatter};
14#[cfg(feature="std")]
15use std::error::Error;
16
17
18macro_rules! description {($err:ty, $desc:expr) => {
19    #[cfg(not(feature="std"))]
20    impl $err {
21        #[allow(missing_docs)]
22        pub fn description(&self) -> &'static str {
23            ($desc)(self)
24        }
25    }
26    #[cfg(feature="std")]
27    impl Error for $err {
28        fn description(&self) -> &'static str {
29            ($desc)(self)
30        }
31    }
32    impl Display for $err {
33        fn fmt(&self,  fmtr: &mut Formatter) -> fmt::Result {
34            write!(fmtr, "{}", self.description())
35        }
36    }
37}}
38
39
40macro_rules! single_cause {(#[$doc1:meta] #[$doc2:meta] $err:ident => $desc:expr) => {
41    // Rust 1.15 doesn't understand $(#[$doc:meta])* $:ident
42    #[$doc1]
43    #[$doc2]
44    #[derive(Clone,Copy, Debug, PartialEq,Eq)]
45    pub struct $err;
46    description!{$err, |_| $desc }
47}}
48
49
50single_cause!{
51    /// Cannot tell whether an `u16` needs an extra unit,
52    /// because it's a trailing surrogate itself.
53    InvalidUtf16FirstUnit => "is a trailing surrogate"
54}
55
56single_cause!{
57    /// Cannot create an `Utf8Char` or `Utf16Char` from the first codepoint of a str,
58    /// because there are none.
59    EmptyStrError => "is empty"
60}
61
62single_cause!{
63    /// Cannot create an `Utf8Char` from a standalone `u8`
64    /// that is not an ASCII character.
65    NonAsciiError => "is not an ASCII character"
66}
67
68single_cause!{
69    /// Cannot create an `Utf16Char` from a standalone `u16` that is not a
70    /// codepoint in the basic multilingual plane, but part of a suurrogate pair.
71    NonBMPError => "is not a codepoint in the basic multilingual plane"
72}
73
74
75
76macro_rules! simple {(#[$tydoc:meta] $err:ident  {
77                          $($(#[$vardoc:meta])* ::$variant:ident => $string:expr),+,
78                      } ) => {
79    #[$tydoc]
80    #[derive(Clone,Copy, Debug, PartialEq,Eq)]
81    pub enum $err {
82        $($(#[$vardoc])* $variant),*
83    }
84    description!{$err, |e: &$err| match *e {$($err::$variant=>$string),*} }
85}}
86
87
88simple!{/// Reasons why an `u32` is not a valid UTF codepoint.
89    InvalidCodepoint {
90        /// It's reserved for UTF-16 surrogate pairs."
91        ::Utf16Reserved => "is reserved for UTF-16 surrogate pairs",
92        /// It's higher than the highest codepoint (which is 0x10ffff).
93        ::TooHigh => "is higher than the highest codepoint",
94    }}
95use self::InvalidCodepoint::*;
96impl InvalidCodepoint {
97    /// Get the range of values for which this error would be given.
98    pub fn error_range(self) -> (u32,u32) {match self {
99        Utf16Reserved => (0xd8_00, 0xdf_ff),
100        TooHigh => (0x00_10_ff_ff, 0xff_ff_ff_ff),
101    }}
102}
103
104
105simple!{/// Reasons why a `[u16; 2]` doesn't form a valid UTF-16 codepoint.
106    InvalidUtf16Array {
107        /// The first unit is a trailing/low surrogate, which is never valid.
108        ::FirstIsTrailingSurrogate => "the first unit is a trailing surrogate, which is never valid",
109        /// The second unit is needed, but is not a trailing surrogate.
110        ::SecondIsNotTrailingSurrogate => "the second unit is needed but is not a trailing surrogate",
111    }}
112
113simple!{/// Reasons why one or two `u16`s are not valid UTF-16, in sinking precedence.
114    InvalidUtf16Tuple {
115        /// The first unit is a trailing/low surrogate, which is never valid.
116        ///
117        /// Note that the value of a low surrogate is actually higher than a high surrogate.
118        ::FirstIsTrailingSurrogate => "the first unit is a trailing / low surrogate, which is never valid",
119        /// You provided a second unit, but the first one stands on its own.
120        ::SuperfluousSecond => "the second unit is superfluous",
121        /// The first and only unit requires a second unit.
122        ::MissingSecond => "the first unit requires a second unit",
123        /// The first unit requires a second unit, but it's not a trailing/low surrogate.
124        ///
125        /// Note that the value of a low surrogate is actually higher than a high surrogate.
126        ::InvalidSecond => "the required second unit is not a trailing / low surrogate",
127    }}
128
129
130simple!{/// Reasons why a slice of `u16`s doesn't start with valid UTF-16.
131    InvalidUtf16Slice {
132        /// The slice is empty.
133        ::EmptySlice => "the slice is empty",
134        /// The first unit is a low surrogate.
135        ::FirstLowSurrogate => "the first unit is a trailing surrogate",
136        /// The first and only unit requires a second unit.
137        ::MissingSecond => "the first and only unit requires a second one",
138        /// The first unit requires a second one, but it's not a trailing surrogate.
139        ::SecondNotLowSurrogate => "the required second unit is not a trailing surrogate",
140    }}
141
142simple!{/// Types of invalid sequences encountered by `Utf16CharParser`.
143    Utf16PairError {
144        /// A trailing surrogate was not preceeded by a leading surrogate.
145        ::UnexpectedTrailingSurrogate => "a trailing surrogate was not preceeded by a leading surrogate",
146        /// A leading surrogate was followed by an unit that was not a trailing surrogate.
147        ::UnmatchedLeadingSurrogate => "a leading surrogate was followed by an unit that was not a trailing surrogate",
148        /// A trailing surrogate was expected when the end was reached.
149        ::Incomplete => "a trailing surrogate was expected when the end was reached",
150    }}
151
152
153simple!{/// Reasons why `Utf8Char::from_str()` or `Utf16Char::from_str()` failed.
154    FromStrError {
155        /// `Utf8Char` or `Utf16Char` cannot store more than a single codepoint.
156        ::MultipleCodepoints => "has more than one codepoint",
157        /// `Utf8Char` or `Utf16Char` cannot be empty.
158        ::Empty => "is empty",
159    }}
160
161
162simple!{/// Reasons why a byte is not the start of a UTF-8 codepoint.
163    InvalidUtf8FirstByte {
164        /// Sequences cannot be longer than 4 bytes. Is given for values >= 240.
165        ::TooLongSeqence => "is greater than 247 (UTF-8 sequences cannot be longer than four bytes)",
166        /// This byte belongs to a previous sequence. Is given for values between 128 and 192 (exclusive).
167        ::ContinuationByte => "is a continuation of a previous sequence",
168    }}
169use self::InvalidUtf8FirstByte::*;
170
171
172
173macro_rules! complex {
174($err:ty
175 {$($sub:ty => $to:expr,)*}
176 {$($desc:pat => $string:expr),+,}
177 => $use_cause:expr =>
178 {$($cause:pat => $result:expr),+,} $(#[$causedoc:meta])*
179) => {
180    $(impl From<$sub> for $err {
181          fn from(error: $sub) -> $err {
182              $to(error)
183          }
184      })*
185    #[cfg(not(feature="std"))]
186    impl $err {
187        #[allow(missing_docs)]
188        pub fn description(&self) -> &'static str {
189            match *self{ $($desc => $string,)* }
190        }
191        /// A hack to avoid two Display impls
192        fn cause(&self) -> Option<&Display> {None}
193    }
194    #[cfg(feature="std")]
195    impl Error for $err {
196        fn description(&self) -> &'static str {
197            match *self{ $($desc => $string,)* }
198        }
199        $(#[$causedoc])*
200        fn cause(&self) -> Option<&Error> {
201            match *self{ $($cause => $result,)* }
202        }
203    }
204    impl Display for $err {
205        fn fmt(&self,  fmtr: &mut Formatter) -> fmt::Result {
206            match (self.cause(), $use_cause) {
207                (Some(d),true) => write!(fmtr, "{}: {}", self.description(), d),
208                        _      => write!(fmtr, "{}", self.description()),
209            }
210        }
211    }
212}}
213
214
215/// Reasons why a byte sequence is not valid UTF-8, excluding invalid codepoint.
216/// In sinking precedence.
217#[derive(Clone,Copy, Debug, PartialEq,Eq)]
218pub enum InvalidUtf8 {
219    /// Something is wrong with the first byte.
220    FirstByte(InvalidUtf8FirstByte),
221    /// The byte at index 1...3 should be a continuation byte,
222    /// but dosesn't fit the pattern 0b10xx_xxxx.
223    NotAContinuationByte(usize),
224    /// There are too many leading zeros: it could be a byte shorter.
225    ///
226    /// [Decoding this could allow someone to input otherwise prohibited
227    /// characters and sequences, such as "../"](https://tools.ietf.org/html/rfc3629#section-10).
228    OverLong,
229}
230use self::InvalidUtf8::*;
231complex!{InvalidUtf8 {
232        InvalidUtf8FirstByte => FirstByte,
233    } {
234        FirstByte(TooLongSeqence) => "the first byte is greater than 239 (UTF-8 sequences cannot be longer than four bytes)",
235        FirstByte(ContinuationByte) => "the first byte is a continuation of a previous sequence",
236        OverLong => "the sequence contains too many zeros and could be shorter",
237        NotAContinuationByte(_) => "the sequence is too short",
238    } => false => {
239        FirstByte(ref cause) => Some(cause),
240        _ => None,
241    }/// Returns `Some` if the error is a `InvalidUtf8FirstByte`.
242}
243
244
245/// Reasons why a byte array is not valid UTF-8, in sinking precedence.
246#[derive(Clone,Copy, Debug, PartialEq,Eq)]
247pub enum InvalidUtf8Array {
248    /// Not a valid UTF-8 sequence.
249    Utf8(InvalidUtf8),
250    /// Not a valid unicode codepoint.
251    Codepoint(InvalidCodepoint),
252}
253complex!{InvalidUtf8Array {
254        InvalidUtf8 => InvalidUtf8Array::Utf8,
255        InvalidCodepoint => InvalidUtf8Array::Codepoint,
256    } {
257        InvalidUtf8Array::Utf8(_) => "the sequence is invalid UTF-8",
258        InvalidUtf8Array::Codepoint(_) => "the encoded codepoint is invalid",
259    } => true => {
260        InvalidUtf8Array::Utf8(ref u) => Some(u),
261        InvalidUtf8Array::Codepoint(ref c) => Some(c),
262    }/// Always returns `Some`.
263}
264
265
266/// Reasons why a byte slice is not valid UTF-8, in sinking precedence.
267#[derive(Clone,Copy, Debug, PartialEq,Eq)]
268pub enum InvalidUtf8Slice {
269    /// Something is certainly wrong with the first byte.
270    Utf8(InvalidUtf8),
271    /// The encoded codepoint is invalid:
272    Codepoint(InvalidCodepoint),
273    /// The slice is too short; n bytes was required.
274    TooShort(usize),
275}
276complex!{InvalidUtf8Slice {
277        InvalidUtf8 => InvalidUtf8Slice::Utf8,
278        InvalidCodepoint => InvalidUtf8Slice::Codepoint,
279    } {
280        InvalidUtf8Slice::Utf8(_) => "the sequence is invalid UTF-8",
281        InvalidUtf8Slice::Codepoint(_) => "the encoded codepoint is invalid",
282        InvalidUtf8Slice::TooShort(1) => "the slice is empty",
283        InvalidUtf8Slice::TooShort(_) => "the slice is shorter than the sequence",
284    } => true => {
285        InvalidUtf8Slice::Utf8(ref u) => Some(u),
286        InvalidUtf8Slice::Codepoint(ref c) => Some(c),
287        InvalidUtf8Slice::TooShort(_) => None,
288    }
289}