fxfs_unicode/
lib.rs
1use fprint::TypeFingerprint;
5use serde::{Deserialize, Serialize};
6use std::collections::VecDeque;
7
8mod lookup;
9mod nfd;
10use unicode_gen;
11
12pub struct CaseFoldIterator<I: Iterator<Item = char>> {
14 input: I,
16 buf: VecDeque<char>,
17}
18
19impl<I: Iterator<Item = char>> Iterator for CaseFoldIterator<I> {
20 type Item = char;
21
22 fn next(&mut self) -> Option<char> {
23 if let Some(ch) = self.buf.pop_front() {
24 return Some(ch);
25 }
26 self.input.next().map(|ch| {
27 if let Some(mapping) = crate::lookup::casefold(ch) {
28 let mut chars = mapping.chars();
29 let first = chars.next().unwrap();
30 self.buf.extend(chars);
31 first
32 } else {
33 ch
34 }
35 })
36 }
37}
38
39pub fn casefold<I: Iterator<Item = char>>(input: I) -> CaseFoldIterator<I> {
41 CaseFoldIterator { input, buf: VecDeque::new() }
42}
43
44pub fn casefold_cmp(a: &str, b: &str) -> std::cmp::Ordering {
50 let a_it = nfd::nfd(casefold(a.chars()).filter(|x| !lookup::default_ignorable(*x)));
51 let b_it = nfd::nfd(casefold(b.chars()).filter(|x| !lookup::default_ignorable(*x)));
52 a_it.cmp(b_it)
53}
54
55#[derive(arbitrary::Arbitrary, Clone, Eq, Serialize, Deserialize, TypeFingerprint)]
57pub struct CasefoldString(String);
58impl CasefoldString {
59 pub fn new(s: String) -> Self {
60 Self(s)
61 }
62}
63impl std::fmt::Debug for CasefoldString {
64 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
65 write!(f, "CasefoldString(\"{}\")", self.0)
66 }
67}
68impl std::fmt::Display for CasefoldString {
69 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
70 write!(f, "{}", self.0)
71 }
72}
73impl std::ops::Deref for CasefoldString {
74 type Target = str;
75 fn deref(&self) -> &str {
76 &self.0
77 }
78}
79impl std::cmp::PartialEq for CasefoldString {
80 fn eq(&self, rhs: &Self) -> bool {
81 casefold_cmp(&self.0, &rhs.0) == std::cmp::Ordering::Equal
82 }
83}
84impl std::cmp::Ord for CasefoldString {
85 fn cmp(&self, rhs: &Self) -> std::cmp::Ordering {
86 casefold_cmp(&self.0, &rhs.0)
87 }
88}
89impl std::cmp::PartialOrd for CasefoldString {
90 fn partial_cmp(&self, rhs: &Self) -> Option<std::cmp::Ordering> {
91 Some(casefold_cmp(&self.0, &rhs.0))
92 }
93}
94
95impl std::hash::Hash for CasefoldString {
97 fn hash<H>(&self, state: &mut H)
98 where
99 H: std::hash::Hasher,
100 {
101 for ch in casefold(self.0.chars()) {
102 ch.hash(state);
103 }
104 }
105}
106
107impl From<&str> for CasefoldString {
108 fn from(item: &str) -> Self {
109 CasefoldString(item.into())
110 }
111}
112
113#[cfg(test)]
114mod test {
115 use super::*;
116
117 #[test]
118 fn test_casefold() {
119 assert_eq!(casefold("Hello There".chars()).collect::<String>(), "hello there");
120 assert_eq!(casefold("HELLO There".chars()).collect::<String>(), "hello there");
121 }
122
123 #[test]
124 fn test_casefold_cmp() {
125 assert_eq!(casefold_cmp("Hello", "hello"), std::cmp::Ordering::Equal);
126 assert_eq!(casefold_cmp("Hello There", "hello"), std::cmp::Ordering::Greater);
127 assert_eq!(casefold_cmp("hello there", "hello"), std::cmp::Ordering::Greater);
128 assert_eq!(casefold_cmp("hello\u{00AD}", "hello"), std::cmp::Ordering::Equal);
129 assert_eq!(casefold_cmp("\u{03AA}", "\u{0399}\u{0308}"), std::cmp::Ordering::Equal);
130
131 assert_eq!(
133 casefold_cmp("\u{308}\u{05ae}Hello", "\u{05ae}\u{308}hello"),
134 std::cmp::Ordering::Equal
135 );
136 }
137
138 #[test]
139 fn test_casefoldstring() {
140 let a = CasefoldString::new("Hello There".to_owned());
141 let b = CasefoldString::new("hello there".to_owned());
142 let c = CasefoldString::new("hello".to_owned());
143 let d = CasefoldString::new("\u{03AA}".to_owned());
144 let e = CasefoldString::new("\u{0399}\u{0308}".to_owned());
145 assert_eq!(a, a);
147 assert_eq!(a, b);
148 assert_eq!(d, e);
149 assert!(a > c);
150 assert!(b > c);
151 assert_eq!(a.0, format!("{}", a));
152 assert_eq!("CasefoldString(\"Hello There\")", format!("{:?}", a));
154 assert_eq!(format!("{}", "Hello There".to_owned()), format!("{}", a));
156 }
157}