1use utf16_iterators::Utf16Iterator;
10use traits::{CharExt, U16UtfExt};
11use utf8_char::Utf8Char;
12use errors::{InvalidUtf16Slice, InvalidUtf16Tuple, NonBMPError, EmptyStrError, FromStrError};
13extern crate core;
14use self::core::{hash,fmt};
15use self::core::cmp::Ordering;
16use self::core::borrow::Borrow;
17use self::core::ops::Deref;
18use self::core::str::FromStr;
19#[cfg(feature="std")]
20use self::core::iter::FromIterator;
21#[cfg(feature="std")]
22#[allow(deprecated)]
23use std::ascii::AsciiExt;
24#[cfg(feature="ascii")]
25use self::core::char;
26#[cfg(feature="ascii")]
27extern crate ascii;
28#[cfg(feature="ascii")]
29use self::ascii::{AsciiChar,ToAsciiChar,ToAsciiCharError};
30
31
32#[derive(Default)]
34#[derive(PartialEq,Eq)]
38#[derive(Clone,Copy)]
39
40
41pub struct Utf16Char {
45 units: [u16; 2],
46}
47
48
49 impl FromStr for Utf16Char {
53 type Err = FromStrError;
54 fn from_str(s: &str) -> Result<Self, FromStrError> {
71 match Utf16Char::from_str_start(s) {
72 Ok((u16c,bytes)) if bytes == s.len() => Ok(u16c),
73 Ok((_,_)) => Err(FromStrError::MultipleCodepoints),
74 Err(EmptyStrError) => Err(FromStrError::Empty),
75 }
76 }
77}
78impl From<char> for Utf16Char {
79 fn from(c: char) -> Self {
80 let (first, second) = c.to_utf16_tuple();
81 Utf16Char{ units: [first, second.unwrap_or(0)] }
82 }
83}
84impl From<Utf8Char> for Utf16Char {
85 fn from(utf8: Utf8Char) -> Utf16Char {
86 let (b, utf8_len) = utf8.to_array();
87 match utf8_len {
88 1 => Utf16Char{ units: [b[0] as u16, 0] },
89 4 => {let mut first = 0xd800 - (0x01_00_00u32 >> 10) as u16;
91 first += (b[0] as u16 & 0x07) << 8;
92 first += (b[1] as u16 & 0x3f) << 2;
93 first += (b[2] as u16 & 0x30) >> 4;
94 let mut second = 0xdc00;
95 second |= (b[2] as u16 & 0x0f) << 6;
96 second |= b[3] as u16 & 0x3f;
97 Utf16Char{ units: [first, second] }
98 },
99 _ => { let mut unit = ((b[0] as u16 & 0x1f) << 6) | (b[1] as u16 & 0x3f);
101 if utf8_len == 3 {
102 unit = (unit << 6) | (b[2] as u16 & 0x3f);
103 }
104 Utf16Char{ units: [unit, 0] }
105 },
106 }
107 }
108}
109impl From<Utf16Char> for char {
110 fn from(uc: Utf16Char) -> char {
111 unsafe{ char::from_utf16_tuple_unchecked(uc.to_tuple()) }
112 }
113}
114impl IntoIterator for Utf16Char {
115 type Item=u16;
116 type IntoIter=Utf16Iterator;
117 fn into_iter(self) -> Utf16Iterator {
119 Utf16Iterator::from(self)
120 }
121}
122
123#[cfg(feature="std")]
124impl Extend<Utf16Char> for Vec<u16> {
125 fn extend<I:IntoIterator<Item=Utf16Char>>(&mut self, iter: I) {
126 let iter = iter.into_iter();
127 self.reserve(iter.size_hint().0);
128 for u16c in iter {
129 self.push(u16c.units[0]);
130 if u16c.units[1] != 0 {
131 self.push(u16c.units[1]);
132 }
133 }
134 }
135}
136#[cfg(feature="std")]
137impl<'a> Extend<&'a Utf16Char> for Vec<u16> {
138 fn extend<I:IntoIterator<Item=&'a Utf16Char>>(&mut self, iter: I) {
139 self.extend(iter.into_iter().cloned())
140 }
141}
142#[cfg(feature="std")]
143impl FromIterator<Utf16Char> for Vec<u16> {
144 fn from_iter<I:IntoIterator<Item=Utf16Char>>(iter: I) -> Self {
145 let mut vec = Vec::new();
146 vec.extend(iter);
147 return vec;
148 }
149}
150#[cfg(feature="std")]
151impl<'a> FromIterator<&'a Utf16Char> for Vec<u16> {
152 fn from_iter<I:IntoIterator<Item=&'a Utf16Char>>(iter: I) -> Self {
153 Self::from_iter(iter.into_iter().cloned())
154 }
155}
156
157#[cfg(feature="std")]
158impl Extend<Utf16Char> for String {
159 fn extend<I:IntoIterator<Item=Utf16Char>>(&mut self, iter: I) {
160 self.extend(iter.into_iter().map(|u16c| Utf8Char::from(u16c) ));
161 }
162}
163#[cfg(feature="std")]
164impl<'a> Extend<&'a Utf16Char> for String {
165 fn extend<I:IntoIterator<Item=&'a Utf16Char>>(&mut self, iter: I) {
166 self.extend(iter.into_iter().cloned());
167 }
168}
169#[cfg(feature="std")]
170impl FromIterator<Utf16Char> for String {
171 fn from_iter<I:IntoIterator<Item=Utf16Char>>(iter: I) -> Self {
172 let mut s = String::new();
173 s.extend(iter);
174 return s;
175 }
176}
177#[cfg(feature="std")]
178impl<'a> FromIterator<&'a Utf16Char> for String {
179 fn from_iter<I:IntoIterator<Item=&'a Utf16Char>>(iter: I) -> Self {
180 Self::from_iter(iter.into_iter().cloned())
181 }
182}
183
184
185 impl AsRef<[u16]> for Utf16Char {
189 #[inline]
190 fn as_ref(&self) -> &[u16] {
191 &self.units[..self.len()]
192 }
193}
194impl Borrow<[u16]> for Utf16Char {
195 #[inline]
196 fn borrow(&self) -> &[u16] {
197 self.as_ref()
198 }
199}
200impl Deref for Utf16Char {
201 type Target = [u16];
202 #[inline]
203 fn deref(&self) -> &[u16] {
204 self.as_ref()
205 }
206}
207
208
209 #[cfg(feature="std")]
213#[allow(deprecated)]
214impl AsciiExt for Utf16Char {
215 type Owned = Self;
216 fn is_ascii(&self) -> bool {
217 self.units[0] < 128
218 }
219 fn eq_ignore_ascii_case(&self, other: &Self) -> bool {
220 self.to_ascii_lowercase() == other.to_ascii_lowercase()
221 }
222 fn to_ascii_uppercase(&self) -> Self {
223 let n = self.units[0].wrapping_sub(b'a' as u16);
224 if n < 26 {Utf16Char{ units: [n+b'A' as u16, 0] }}
225 else {*self}
226 }
227 fn to_ascii_lowercase(&self) -> Self {
228 let n = self.units[0].wrapping_sub(b'A' as u16);
229 if n < 26 {Utf16Char{ units: [n+b'a' as u16, 0] }}
230 else {*self}
231 }
232 fn make_ascii_uppercase(&mut self) {
233 *self = self.to_ascii_uppercase()
234 }
235 fn make_ascii_lowercase(&mut self) {
236 *self = self.to_ascii_lowercase();
237 }
238}
239
240#[cfg(feature="ascii")]
241impl From<AsciiChar> for Utf16Char {
243 #[inline]
244 fn from(ac: AsciiChar) -> Self {
245 Utf16Char{ units: [ac.as_byte() as u16, 0] }
246 }
247}
248#[cfg(feature="ascii")]
249impl ToAsciiChar for Utf16Char {
251 #[inline]
252 fn to_ascii_char(self) -> Result<AsciiChar, ToAsciiCharError> {
253 unsafe{ AsciiChar::from(char::from_u32_unchecked(self.units[0] as u32)) }
254 }
255 #[inline]
256 unsafe fn to_ascii_char_unchecked(self) -> AsciiChar {
257 AsciiChar::from_unchecked(self.units[0] as u8)
258 }
259}
260
261
262 impl hash::Hash for Utf16Char {
266 fn hash<H : hash::Hasher>(&self, state: &mut H) {
267 self.to_char().hash(state);
268 }
269}
270impl fmt::Debug for Utf16Char {
271 fn fmt(&self, fmtr: &mut fmt::Formatter) -> fmt::Result {
272 fmt::Debug::fmt(&self.to_char(), fmtr)
273 }
274}
275impl fmt::Display for Utf16Char {
276 fn fmt(&self, fmtr: &mut fmt::Formatter) -> fmt::Result {
277 fmt::Display::fmt(&Utf8Char::from(*self), fmtr)
278 }
279}
280impl PartialOrd for Utf16Char {
283 #[inline]
284 fn partial_cmp(&self, rhs: &Self) -> Option<Ordering> {
285 Some(self.cmp(rhs))
286 }
287}
288impl Ord for Utf16Char {
289 #[inline]
290 fn cmp(&self, rhs: &Self) -> Ordering {
291 let lhs = (self.units[0] as u32, self.units[1] as u32);
298 let rhs = (rhs.units[0] as u32, rhs.units[1] as u32);
299 let lhs = (lhs.0 << (lhs.1 >> 12)) + lhs.1;
300 let rhs = (rhs.0 << (rhs.1 >> 12)) + rhs.1;
301 lhs.cmp(&rhs)
302 }
303}
304
305
306 impl PartialEq<char> for Utf16Char {
310 fn eq(&self, u32c: &char) -> bool {
311 *self == Utf16Char::from(*u32c)
312 }
313}
314impl PartialEq<Utf16Char> for char {
315 fn eq(&self, u16c: &Utf16Char) -> bool {
316 Utf16Char::from(*self) == *u16c
317 }
318}
319impl PartialOrd<char> for Utf16Char {
320 fn partial_cmp(&self, u32c: &char) -> Option<Ordering> {
321 self.partial_cmp(&Utf16Char::from(*u32c))
322 }
323}
324impl PartialOrd<Utf16Char> for char {
325 fn partial_cmp(&self, u16c: &Utf16Char) -> Option<Ordering> {
326 Utf16Char::from(*self).partial_cmp(u16c)
327 }
328}
329
330impl PartialEq<Utf8Char> for Utf16Char {
331 fn eq(&self, u8c: &Utf8Char) -> bool {
332 *self == Utf16Char::from(*u8c)
333 }
334}
335impl PartialOrd<Utf8Char> for Utf16Char {
336 fn partial_cmp(&self, u8c: &Utf8Char) -> Option<Ordering> {
337 self.partial_cmp(&Utf16Char::from(*u8c))
338 }
339}
340impl PartialEq<u16> for Utf16Char {
357 fn eq(&self, unit: &u16) -> bool {
358 self.units[0] == *unit && self.units[1] == 0
359 }
360}
361impl PartialEq<u8> for Utf16Char {
372 fn eq(&self, byte: &u8) -> bool {
373 self.units[0] == *byte as u16
374 }
375}
376#[cfg(feature = "ascii")]
377impl PartialEq<AsciiChar> for Utf16Char {
379 #[inline]
380 fn eq(&self, ascii: &AsciiChar) -> bool {
381 self.units[0] == *ascii as u16
382 }
383}
384#[cfg(feature = "ascii")]
385impl PartialEq<Utf16Char> for AsciiChar {
387 #[inline]
388 fn eq(&self, u16c: &Utf16Char) -> bool {
389 *self as u16 == u16c.units[0]
390 }
391}
392#[cfg(feature = "ascii")]
393impl PartialOrd<AsciiChar> for Utf16Char {
395 #[inline]
396 fn partial_cmp(&self, ascii: &AsciiChar) -> Option<Ordering> {
397 self.units[0].partial_cmp(&(*ascii as u16))
398 }
399}
400#[cfg(feature = "ascii")]
401impl PartialOrd<Utf16Char> for AsciiChar {
403 #[inline]
404 fn partial_cmp(&self, u16c: &Utf16Char) -> Option<Ordering> {
405 (*self as u16).partial_cmp(&u16c.units[0])
406 }
407}
408
409
410 impl Utf16Char {
414 pub fn from_str_start(s: &str) -> Result<(Self,usize), EmptyStrError> {
434 if s.is_empty() {
435 return Err(EmptyStrError);
436 }
437 let b = s.as_bytes();
438 match b[0] {
440 0...127 => {let unit = b[0] as u16;Ok((Utf16Char{ units: [unit, 0] }, 1))
443 },
444 0b1000_0000...0b1101_1111 => {let unit = (((b[1] & 0x3f) as u16) << 0) | (((b[0] & 0x1f) as u16) << 6);Ok((Utf16Char{ units: [unit, 0] }, 2))
448 },
449 0b1110_0000...0b1110_1111 => {let unit = (((b[2] & 0x3f) as u16) << 0) | (((b[1] & 0x3f) as u16) << 6) | (((b[0] & 0x0f) as u16) << 12);Ok((Utf16Char{ units: [unit, 0] }, 3))
454 },
455 _ => {let second = 0xdc00 | (((b[3] & 0x3f) as u16) << 0) | (((b[2] & 0x0f) as u16) << 6);let first = 0xd800-(0x01_00_00u32>>10) as u16+ (((b[2] & 0x30) as u16) >> 4) + (((b[1] & 0x3f) as u16) << 2) + (((b[0] & 0x07) as u16) << 8); Ok((Utf16Char{ units: [first, second] }, 4))
464 }
465 }
466 }
467 pub fn from_slice_start(src: &[u16]) -> Result<(Self,usize), InvalidUtf16Slice> {
470 char::from_utf16_slice_start(src).map(|(_,len)| {
471 let second = if len==2 {src[1]} else {0};
472 (Utf16Char{ units: [src[0], second] }, len)
473 })
474 }
475 pub unsafe fn from_slice_start_unchecked(src: &[u16]) -> (Self,usize) {
481 let first = *src.get_unchecked(0);
482 if first.is_utf16_leading_surrogate() {
483 (Utf16Char{ units: [first, *src.get_unchecked(1)] }, 2)
484 } else {
485 (Utf16Char{ units: [first, 0] }, 1)
486 }
487 }
488 pub fn from_tuple(utf16: (u16,Option<u16>)) -> Result<Self,InvalidUtf16Tuple> {
490 unsafe {char::from_utf16_tuple(utf16).map(|_|
491 Self::from_tuple_unchecked(utf16)
492 )}
493 }
494 pub unsafe fn from_tuple_unchecked(utf16: (u16,Option<u16>)) -> Self {
500 Utf16Char{ units: [utf16.0, utf16.1.unwrap_or(0)] }
501 }
502 pub fn from_bmp(bmp_codepoint: u16) -> Result<Self,NonBMPError> {
520 if bmp_codepoint & 0xf800 != 0xd800 {
521 Ok(Utf16Char{ units: [bmp_codepoint, 0] })
522 } else {
523 Err(NonBMPError)
524 }
525 }
526 #[inline]
534 pub unsafe fn from_bmp_unchecked(bmp_codepoint: u16) -> Self {
535 Utf16Char{ units: [bmp_codepoint, 0] }
536 }
537
538 #[inline]
543 pub fn len(self) -> usize {
544 1 + (self.units[1] as usize >> 15)
545 }
546 #[inline]
550 pub fn is_ascii(&self) -> bool {
551 self.units[0] <= 127
552 }
553 #[cfg(feature="std")]
557 pub fn eq_ignore_ascii_case(&self, other: &Self) -> bool {
558 self.to_ascii_lowercase() == other.to_ascii_lowercase()
559 }
560 #[cfg(feature="std")]
565 pub fn to_ascii_uppercase(&self) -> Self {
566 let n = self.units[0].wrapping_sub(b'a' as u16);
567 if n < 26 {Utf16Char{ units: [n+b'A' as u16, 0] }}
568 else {*self}
569 }
570 #[cfg(feature="std")]
575 pub fn to_ascii_lowercase(&self) -> Self {
576 let n = self.units[0].wrapping_sub(b'A' as u16);
577 if n < 26 {Utf16Char{ units: [n+b'a' as u16, 0] }}
578 else {*self}
579 }
580 #[cfg(feature="std")]
585 pub fn make_ascii_uppercase(&mut self) {
586 *self = self.to_ascii_uppercase()
587 }
588 #[cfg(feature="std")]
593 pub fn make_ascii_lowercase(&mut self) {
594 *self = self.to_ascii_lowercase();
595 }
596
597 pub fn to_char(self) -> char {
599 self.into()
600 }
601 pub fn to_slice(self, dst: &mut[u16]) -> usize {
609 let extra = self.units[1] as usize >> 15;
611 match dst.get_mut(extra) {
612 Some(first) => *first = self.units[extra],
613 None => panic!("The provided buffer is too small.")
614 }
615 if extra != 0 {dst[0] = self.units[0];}
616 extra+1
617 }
618 #[inline]
620 pub fn to_tuple(self) -> (u16,Option<u16>) {
621 (self.units[0], if self.units[1]==0 {None} else {Some(self.units[1])})
622 }
623}