1use fprint::TypeFingerprint;
5use serde::{Deserialize, Serialize};
6use std::collections::VecDeque;
7use std::hash::{Hash, Hasher};
8
9mod lookup;
10mod nfd;
11use unicode_gen;
12
13pub struct CaseFoldIterator<I: Iterator<Item = char>> {
15 input: I,
17 buf: VecDeque<char>,
18}
19
20impl<I: Iterator<Item = char>> Iterator for CaseFoldIterator<I> {
21 type Item = char;
22
23 fn next(&mut self) -> Option<char> {
24 if let Some(ch) = self.buf.pop_front() {
25 return Some(ch);
26 }
27 self.input.next().map(|ch| {
28 if let Some(mapping) = crate::lookup::casefold(ch) {
29 let mut chars = mapping.chars();
30 let first = chars.next().unwrap();
31 self.buf.extend(chars);
32 first
33 } else {
34 ch
35 }
36 })
37 }
38}
39
40pub fn casefold<I: Iterator<Item = char>>(input: I) -> CaseFoldIterator<I> {
42 CaseFoldIterator { input, buf: VecDeque::new() }
43}
44
45pub fn casefold_cmp(a: &str, b: &str) -> std::cmp::Ordering {
51 let a_it = nfd::nfd(casefold(a.chars()).filter(|x| !lookup::default_ignorable(*x)));
52 let b_it = nfd::nfd(casefold(b.chars()).filter(|x| !lookup::default_ignorable(*x)));
53 a_it.cmp(b_it)
54}
55
56#[derive(arbitrary::Arbitrary, Clone, Eq, Serialize, Deserialize, TypeFingerprint)]
58pub struct CasefoldString(String);
59impl CasefoldString {
60 pub fn new(s: String) -> Self {
61 Self(s)
62 }
63}
64impl std::fmt::Debug for CasefoldString {
65 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
66 write!(f, "CasefoldString(\"{}\")", self.0)
67 }
68}
69impl std::fmt::Display for CasefoldString {
70 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
71 write!(f, "{}", self.0)
72 }
73}
74impl std::ops::Deref for CasefoldString {
75 type Target = str;
76 fn deref(&self) -> &str {
77 &self.0
78 }
79}
80impl std::cmp::PartialEq for CasefoldString {
81 fn eq(&self, rhs: &Self) -> bool {
82 casefold_cmp(&self.0, &rhs.0) == std::cmp::Ordering::Equal
83 }
84}
85impl std::cmp::Ord for CasefoldString {
86 fn cmp(&self, rhs: &Self) -> std::cmp::Ordering {
87 casefold_cmp(&self.0, &rhs.0)
88 }
89}
90impl std::cmp::PartialOrd for CasefoldString {
91 fn partial_cmp(&self, rhs: &Self) -> Option<std::cmp::Ordering> {
92 Some(casefold_cmp(&self.0, &rhs.0))
93 }
94}
95
96pub fn casefold_hash(s: &str, state: &mut impl Hasher) {
97 let normalized_chars = nfd::nfd(casefold(s.chars()).filter(|x| !lookup::default_ignorable(*x)));
98 for ch in normalized_chars {
99 ch.hash(state);
100 }
101}
102
103impl Hash for CasefoldString {
105 fn hash<H>(&self, state: &mut H)
106 where
107 H: std::hash::Hasher,
108 {
109 casefold_hash(&self.0, state);
110 }
111}
112
113impl From<&str> for CasefoldString {
114 fn from(item: &str) -> Self {
115 CasefoldString(item.into())
116 }
117}
118
119#[cfg(test)]
120mod test {
121 use super::*;
122
123 #[test]
124 fn test_casefold() {
125 assert_eq!(casefold("Hello There".chars()).collect::<String>(), "hello there");
126 assert_eq!(casefold("HELLO There".chars()).collect::<String>(), "hello there");
127 }
128
129 #[test]
130 fn test_casefold_cmp() {
131 assert_eq!(casefold_cmp("Hello", "hello"), std::cmp::Ordering::Equal);
132 assert_eq!(casefold_cmp("Hello There", "hello"), std::cmp::Ordering::Greater);
133 assert_eq!(casefold_cmp("hello there", "hello"), std::cmp::Ordering::Greater);
134 assert_eq!(casefold_cmp("hello\u{00AD}", "hello"), std::cmp::Ordering::Equal);
135 assert_eq!(casefold_cmp("\u{03AA}", "\u{0399}\u{0308}"), std::cmp::Ordering::Equal);
136
137 assert_eq!(
139 casefold_cmp("\u{308}\u{05ae}Hello", "\u{05ae}\u{308}hello"),
140 std::cmp::Ordering::Equal
141 );
142 }
143
144 #[test]
145 fn test_casefoldstring() {
146 let a = CasefoldString::new("Hello There".to_owned());
147 let b = CasefoldString::new("hello there".to_owned());
148 let c = CasefoldString::new("hello".to_owned());
149 let d = CasefoldString::new("\u{03AA}".to_owned());
150 let e = CasefoldString::new("\u{0399}\u{0308}".to_owned());
151 assert_eq!(a, a);
153 assert_eq!(a, b);
154 assert_eq!(d, e);
155 assert!(a > c);
156 assert!(b > c);
157 assert_eq!(a.0, format!("{}", a));
158 assert_eq!("CasefoldString(\"Hello There\")", format!("{:?}", a));
160 assert_eq!(format!("{}", "Hello There".to_owned()), format!("{}", a));
162 }
163
164 #[test]
165 fn test_casefold_hash_equality() {
166 use std::hash::{Hash, Hasher};
167 fn get_hash<T: Hash>(t: &T) -> u64 {
168 let mut s = std::collections::hash_map::DefaultHasher::new();
169 t.hash(&mut s);
170 s.finish()
171 }
172 let a = CasefoldString::new("hello\u{00AD}".to_owned());
174 let b = CasefoldString::new("hello".to_owned());
175 assert_eq!(a, b);
176 assert_eq!(get_hash(&a), get_hash(&b));
177 }
178}