unicase/unicode/
mod.rs
1use alloc::string::String;
2use core::cmp::Ordering;
3use core::hash::{Hash, Hasher};
4
5use self::map::lookup;
6mod map;
7
8#[derive(Clone, Copy, Debug, Default)]
9pub struct Unicode<S>(pub S);
10
11impl<S: AsRef<str>> Unicode<S> {
12 pub fn to_folded_case(&self) -> String {
13 self.0.as_ref().chars().flat_map(lookup).collect()
14 }
15}
16
17impl<S1: AsRef<str>, S2: AsRef<str>> PartialEq<Unicode<S2>> for Unicode<S1> {
18 #[inline]
19 fn eq(&self, other: &Unicode<S2>) -> bool {
20 let mut left = self.0.as_ref().chars().flat_map(lookup);
21 let mut right = other.0.as_ref().chars().flat_map(lookup);
22
23 loop {
25 let x = match left.next() {
26 None => return right.next().is_none(),
27 Some(val) => val,
28 };
29
30 let y = match right.next() {
31 None => return false,
32 Some(val) => val,
33 };
34
35 if x != y {
36 return false;
37 }
38 }
39 }
40}
41
42impl<S: AsRef<str>> Eq for Unicode<S> {}
43
44impl<T: AsRef<str>> PartialOrd for Unicode<T> {
45 #[inline]
46 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
47 Some(self.cmp(other))
48 }
49}
50
51impl<T: AsRef<str>> Ord for Unicode<T> {
52 #[inline]
53 fn cmp(&self, other: &Self) -> Ordering {
54 let self_chars = self.0.as_ref().chars().flat_map(lookup);
55 let other_chars = other.0.as_ref().chars().flat_map(lookup);
56 self_chars.cmp(other_chars)
57 }
58}
59
60impl<S: AsRef<str>> Hash for Unicode<S> {
61 #[inline]
62 fn hash<H: Hasher>(&self, hasher: &mut H) {
63 let mut buf = [0; 4];
64 for c in self.0.as_ref().chars().flat_map(|c| lookup(c)) {
65 let len = char_to_utf8(c, &mut buf);
66 for &b in &buf[..len] {
70 hasher.write_u8(b);
71 }
72 }
73 hasher.write_u8(0xFF);
75 }
76}
77
78#[inline]
79fn char_to_utf8(c: char, dst: &mut [u8; 4]) -> usize {
80 const TAG_CONT: u8 = 0b1000_0000;
81 const TAG_TWO_B: u8 = 0b1100_0000;
82 const TAG_THREE_B: u8 = 0b1110_0000;
83 const TAG_FOUR_B: u8 = 0b1111_0000;
84
85 let code = c as u32;
86 if code <= 0x7F {
87 dst[0] = code as u8;
88 1
89 } else if code <= 0x7FF {
90 dst[0] = (code >> 6 & 0x1F) as u8 | TAG_TWO_B;
91 dst[1] = (code & 0x3F) as u8 | TAG_CONT;
92 2
93 } else if code <= 0xFFFF {
94 dst[0] = (code >> 12 & 0x0F) as u8 | TAG_THREE_B;
95 dst[1] = (code >> 6 & 0x3F) as u8 | TAG_CONT;
96 dst[2] = (code & 0x3F) as u8 | TAG_CONT;
97 3
98 } else {
99 dst[0] = (code >> 18 & 0x07) as u8 | TAG_FOUR_B;
100 dst[1] = (code >> 12 & 0x3F) as u8 | TAG_CONT;
101 dst[2] = (code >> 6 & 0x3F) as u8 | TAG_CONT;
102 dst[3] = (code & 0x3F) as u8 | TAG_CONT;
103 4
104 }
105}
106
107mod fold {
110 #[derive(Clone, Copy)]
111 pub enum Fold {
112 Zero,
113 One(char),
114 Two(char, char),
115 Three(char, char, char),
116 }
117
118 impl Iterator for Fold {
119 type Item = char;
120 #[inline]
121 fn next(&mut self) -> Option<char> {
122 match *self {
123 Fold::Zero => None,
124 Fold::One(one) => {
125 *self = Fold::Zero;
126 Some(one)
127 }
128 Fold::Two(one, two) => {
129 *self = Fold::One(two);
130 Some(one)
131 }
132 Fold::Three(one, two, three) => {
133 *self = Fold::Two(one, two);
134 Some(three)
135 }
136 }
137 }
138
139 #[inline]
140 fn size_hint(&self) -> (usize, Option<usize>) {
141 match *self {
142 Fold::Zero => (0, Some(0)),
143 Fold::One(..) => (1, Some(1)),
144 Fold::Two(..) => (2, Some(2)),
145 Fold::Three(..) => (3, Some(3)),
146 }
147 }
148 }
149 impl From<(char,)> for Fold {
150 #[inline]
151 fn from((one,): (char,)) -> Fold {
152 Fold::One(one)
153 }
154 }
155
156 impl From<(char, char)> for Fold {
157 #[inline]
158 fn from((one, two): (char, char)) -> Fold {
159 Fold::Two(one, two)
160 }
161 }
162
163 impl From<(char, char, char)> for Fold {
164 #[inline]
165 fn from((one, two, three): (char, char, char)) -> Fold {
166 Fold::Three(one, two, three)
167 }
168 }
169}
170
171#[cfg(test)]
172mod tests {
173 use super::Unicode;
174
175 macro_rules! eq {
176 ($left:expr, $right:expr) => {{
177 assert_eq!(Unicode($left), Unicode($right));
178 }};
179 }
180
181 #[test]
182 fn test_ascii_folding() {
183 eq!("foo bar", "FoO BAR");
184 }
185
186 #[test]
187 fn test_simple_case_folding() {
188 eq!("στιγμας", "στιγμασ");
189 }
190
191 #[test]
192 fn test_full_case_folding() {
193 eq!("flour", "flour");
194 eq!("Maße", "MASSE");
195 eq!("ᾲ στο διάολο", "ὰι στο διάολο");
196 }
197
198 #[test]
199 fn test_to_folded_case() {
200 assert_eq!(Unicode("Maße").to_folded_case(), "masse");
201 }
202
203 #[cfg(feature = "nightly")]
204 #[bench]
205 fn bench_ascii_folding(b: &mut ::test::Bencher) {
206 b.bytes = b"foo bar".len() as u64;
207 b.iter(|| eq!("foo bar", "FoO BAR"));
208 }
209
210 #[cfg(feature = "nightly")]
211 #[bench]
212 fn bench_simple_case_folding(b: &mut ::test::Bencher) {
213 b.bytes = "στιγμας".len() as u64;
214 b.iter(|| eq!("στιγμας", "στιγμασ"));
215 }
216}