1use fprint::TypeFingerprint;
5use serde::{Deserialize, Serialize};
6use std::collections::VecDeque;
7use std::hash::{Hash, Hasher};
8
9mod lookup;
10mod nfd;
11use unicode_gen;
12
13pub struct CaseFoldIterator<I: Iterator<Item = char>> {
15 input: I,
17 buf: VecDeque<char>,
18}
19
20impl<I: Iterator<Item = char>> Iterator for CaseFoldIterator<I> {
21 type Item = char;
22
23 fn next(&mut self) -> Option<char> {
24 if let Some(ch) = self.buf.pop_front() {
25 return Some(ch);
26 }
27 self.input.next().map(|ch| {
28 if let Some(mapping) = crate::lookup::casefold(ch) {
29 let mut chars = mapping.chars();
30 let first = chars.next().unwrap();
31 self.buf.extend(chars);
32 first
33 } else {
34 ch
35 }
36 })
37 }
38}
39
40pub fn casefold<I: Iterator<Item = char>>(input: I) -> CaseFoldIterator<I> {
41 CaseFoldIterator { input, buf: VecDeque::new() }
42}
43
44pub fn utf8_bytes(c: char) -> impl Iterator<Item = u8> {
47 let mut buf = [0; 4];
48 let len = c.encode_utf8(&mut buf).len();
49 buf.into_iter().take(len)
50}
51
52pub fn casefold_cmp(a: &str, b: &str) -> std::cmp::Ordering {
58 let a_it = nfd::nfd(casefold(a.chars()).filter(|x| !lookup::default_ignorable(*x)));
59 let b_it = nfd::nfd(casefold(b.chars()).filter(|x| !lookup::default_ignorable(*x)));
60 a_it.cmp(b_it)
61}
62
63#[derive(arbitrary::Arbitrary, Clone, Eq, Serialize, Deserialize, TypeFingerprint)]
65pub struct CasefoldString(String);
66impl CasefoldString {
67 pub fn new(s: String) -> Self {
68 Self(s)
69 }
70}
71impl std::fmt::Debug for CasefoldString {
72 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
73 write!(f, "CasefoldString(\"{}\")", self.0)
74 }
75}
76impl std::fmt::Display for CasefoldString {
77 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
78 write!(f, "{}", self.0)
79 }
80}
81#[repr(transparent)]
91pub struct CasefoldStr(str);
92
93impl CasefoldStr {
94 pub fn new(s: &str) -> &Self {
95 unsafe { &*(s as *const str as *const CasefoldStr) }
98 }
99
100 pub fn as_str(&self) -> &str {
101 &self.0
102 }
103
104 pub fn casefold_normalized_chars(&self) -> impl Iterator<Item = char> + '_ {
105 nfd::nfd(casefold(self.0.chars()).filter(|x| !lookup::default_ignorable(*x)))
106 }
107}
108
109impl std::fmt::Debug for CasefoldStr {
110 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
111 write!(f, "CasefoldStr(\"{}\")", &self.0)
112 }
113}
114
115impl std::fmt::Display for CasefoldStr {
116 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
117 write!(f, "{}", &self.0)
118 }
119}
120
121impl std::cmp::PartialEq for CasefoldStr {
122 fn eq(&self, rhs: &Self) -> bool {
123 casefold_cmp(self.as_str(), rhs.as_str()).is_eq()
124 }
125}
126impl std::cmp::Eq for CasefoldStr {}
127
128impl std::cmp::Ord for CasefoldStr {
129 fn cmp(&self, rhs: &Self) -> std::cmp::Ordering {
130 casefold_cmp(self.as_str(), rhs.as_str())
131 }
132}
133impl std::cmp::PartialOrd for CasefoldStr {
134 fn partial_cmp(&self, rhs: &Self) -> Option<std::cmp::Ordering> {
135 Some(self.cmp(rhs))
136 }
137}
138
139impl ToOwned for CasefoldStr {
140 type Owned = CasefoldString;
141 fn to_owned(&self) -> Self::Owned {
142 CasefoldString::new(self.as_str().to_owned())
143 }
144}
145
146impl Hash for CasefoldStr {
147 fn hash<H: Hasher>(&self, state: &mut H) {
148 for ch in self.casefold_normalized_chars() {
149 ch.hash(state);
150 }
151 }
152}
153
154impl CasefoldString {
155 pub fn as_str(&self) -> &str {
156 &self.0
157 }
158}
159
160impl std::ops::Deref for CasefoldString {
161 type Target = CasefoldStr;
162 fn deref(&self) -> &CasefoldStr {
163 CasefoldStr::new(&self.0)
164 }
165}
166
167impl std::borrow::Borrow<CasefoldStr> for CasefoldString {
168 fn borrow(&self) -> &CasefoldStr {
169 &**self
170 }
171}
172
173impl std::cmp::PartialEq for CasefoldString {
174 fn eq(&self, rhs: &Self) -> bool {
175 **self == **rhs
176 }
177}
178
179impl std::cmp::Ord for CasefoldString {
180 fn cmp(&self, rhs: &Self) -> std::cmp::Ordering {
181 (**self).cmp(&**rhs)
182 }
183}
184
185impl std::cmp::PartialOrd for CasefoldString {
186 fn partial_cmp(&self, rhs: &Self) -> Option<std::cmp::Ordering> {
187 Some(self.cmp(rhs))
188 }
189}
190
191impl Hash for CasefoldString {
193 fn hash<H>(&self, state: &mut H)
194 where
195 H: Hasher,
196 {
197 (**self).hash(state);
198 }
199}
200
201impl From<&str> for CasefoldString {
202 fn from(item: &str) -> Self {
203 CasefoldString(item.into())
204 }
205}
206
207impl<'a> From<&'a str> for &'a CasefoldStr {
208 fn from(item: &'a str) -> Self {
209 CasefoldStr::new(item)
210 }
211}
212#[cfg(test)]
213mod test {
214 use super::*;
215 use std::hash::{Hash, Hasher};
216
217 fn get_hash<T: Hash + ?Sized>(t: &T) -> u64 {
218 let mut s = std::collections::hash_map::DefaultHasher::new();
219 t.hash(&mut s);
220 s.finish()
221 }
222
223 #[test]
224 fn test_casefold() {
225 assert_eq!(casefold("Hello There".chars()).collect::<String>(), "hello there");
226 assert_eq!(casefold("HELLO There".chars()).collect::<String>(), "hello there");
227 }
228
229 #[test]
230 fn test_casefold_cmp() {
231 assert_eq!(casefold_cmp("Hello", "hello"), std::cmp::Ordering::Equal);
232 assert_eq!(casefold_cmp("Hello There", "hello"), std::cmp::Ordering::Greater);
233 assert_eq!(casefold_cmp("hello there", "hello"), std::cmp::Ordering::Greater);
234 assert_eq!(casefold_cmp("hello\u{00AD}", "hello"), std::cmp::Ordering::Equal);
235 assert_eq!(casefold_cmp("\u{03AA}", "\u{0399}\u{0308}"), std::cmp::Ordering::Equal);
236
237 assert_eq!(
239 casefold_cmp("\u{308}\u{05ae}Hello", "\u{05ae}\u{308}hello"),
240 std::cmp::Ordering::Equal
241 );
242 }
243
244 #[test]
245 fn test_casefoldstring() {
246 let a = CasefoldString::new("Hello There".to_owned());
247 let b = CasefoldString::new("hello there".to_owned());
248 let c = CasefoldString::new("hello".to_owned());
249 let d = CasefoldString::new("\u{03AA}".to_owned());
250 let e = CasefoldString::new("\u{0399}\u{0308}".to_owned());
251 assert_eq!(a, a);
253 assert_eq!(a, b);
254 assert_eq!(d, e);
255 assert!(a > c);
256 assert!(b > c);
257 assert_eq!(a.0, format!("{}", a));
258 assert_eq!("CasefoldString(\"Hello There\")", format!("{:?}", a));
260 assert_eq!(format!("{}", "Hello There".to_owned()), format!("{}", a));
262 }
263
264 #[test]
265 fn test_casefold_hash_equality() {
266 let a = CasefoldString::new("hello\u{00AD}".to_owned());
268 let b = CasefoldString::new("hello".to_owned());
269 assert_eq!(a, b);
270 assert_eq!(get_hash(&a), get_hash(&b));
271 }
272
273 #[test]
274 fn test_casefoldstr() {
275 use std::borrow::Borrow;
276
277 let a = CasefoldString::new("Hello There".to_owned());
278 let b = CasefoldStr::new("hello there");
279 let c = CasefoldStr::new("hello");
280
281 let borrowed_a: &CasefoldStr = a.borrow();
283 assert_eq!(borrowed_a, b);
284 assert!(borrowed_a > c);
285
286 assert_eq!(b, b);
288 assert_eq!(CasefoldStr::new("Hello"), CasefoldStr::new("hello"));
289 assert!(b > c);
290
291 assert_eq!(b.as_str(), "hello there");
293 assert_ne!(b.as_str(), "Hello There");
294
295 let owned_b: CasefoldString = b.to_owned();
297 assert_eq!(owned_b, a);
298
299 assert_eq!(get_hash(&a), get_hash(borrowed_a));
301 assert_eq!(get_hash(&owned_b), get_hash(b));
302 }
303
304 #[test]
305 fn test_casefold_str_normalized() {
306 let raw = "Hello\u{00AD} World\u{2126}";
308 let cf_str = CasefoldStr::new(raw);
309
310 let normalized_chars: String = cf_str.casefold_normalized_chars().collect();
311
312 let expected_str = "hello world\u{03c9}";
319 assert_eq!(normalized_chars, expected_str);
320 }
321}