unicase/
lib.rs

Help
1#![cfg_attr(test, deny(missing_docs))]
2#![cfg_attr(test, deny(warnings))]
3#![cfg_attr(feature = "nightly", feature(test))]
4#![no_std]
5
6//! # UniCase
7//!
8//! UniCase provides a way of specifying strings that are case-insensitive.
9//!
10//! UniCase supports full [Unicode case
11//! folding](https://www.w3.org/International/wiki/Case_folding). It can also
12//! utilize faster ASCII case comparisons, if both strings are ASCII.
13//!
14//! Using the `UniCase::new()` constructor will check the string to see if it
15//! is all ASCII. When a `UniCase` is compared against another, if both are
16//! ASCII, it will use the faster comparison.
17//!
18//! There also exists the `Ascii` type in this crate, which will always assume
19//! to use the ASCII case comparisons, if the encoding is already known.
20//!
21//! ## Example
22//!
23//! ```rust
24//! use unicase::UniCase;
25//!
26//! let a = UniCase::new("Maße");
27//! let b = UniCase::new("MASSE");
28//! let c = UniCase::new("mase");
29//!
30//! assert_eq!(a, b);
31//! assert!(b != c);
32//! ```
33//!
34//! ## Ascii
35//!
36//! ```rust
37//! use unicase::Ascii;
38//!
39//! let a = Ascii::new("foobar");
40//! let b = Ascii::new("FoObAr");
41//!
42//! assert_eq!(a, b);
43//! ```
44
45#[cfg(test)]
46extern crate std;
47#[cfg(feature = "nightly")]
48extern crate test;
49
50extern crate alloc;
51use alloc::string::String;
52
53use alloc::borrow::Cow;
54use core::cmp::Ordering;
55use core::fmt;
56use core::hash::{Hash, Hasher};
57use core::ops::{Deref, DerefMut};
58use core::str::FromStr;
59
60use self::unicode::Unicode;
61
62mod ascii;
63mod unicode;
64
65/// Case Insensitive wrapper of strings.
66#[derive(Clone, Copy)]
67pub struct UniCase<S>(Encoding<S>);
68
69/// Case Insensitive wrapper of Ascii strings.
70#[derive(Clone, Copy, Debug, Default)]
71pub struct Ascii<S>(S);
72
73/// Compare two string-like types for case-less equality, using unicode folding.
74///
75/// Equivalent to `UniCase::new(left) == UniCase::new(right)`.
76///
77/// Note: This will perform a scan for ASCII characters before doing the
78/// the comparison. See `UniCase` for more information.
79#[inline]
80pub fn eq<S: AsRef<str> + ?Sized>(left: &S, right: &S) -> bool {
81    UniCase::new(left) == UniCase::new(right)
82}
83
84/// Compare two string-like types for case-less equality, ignoring ASCII case.
85///
86/// Equivalent to `Ascii::new(left) == Ascii::new(right)`.
87#[inline]
88pub fn eq_ascii<S: AsRef<str> + ?Sized>(left: &S, right: &S) -> bool {
89    Ascii(left) == Ascii(right)
90}
91
92#[derive(Clone, Copy, Debug)]
93enum Encoding<S> {
94    Ascii(Ascii<S>),
95    Unicode(Unicode<S>),
96}
97
98macro_rules! inner {
99    (mut $e:expr) => {{
100        match &mut $e {
101            &mut Encoding::Ascii(ref mut s) => &mut s.0,
102            &mut Encoding::Unicode(ref mut s) => &mut s.0,
103        }
104    }};
105    ($e:expr) => {{
106        match &$e {
107            &Encoding::Ascii(ref s) => &s.0,
108            &Encoding::Unicode(ref s) => &s.0,
109        }
110    }};
111}
112
113impl<S: AsRef<str> + Default> Default for UniCase<S> {
114    fn default() -> Self {
115        Self::new(Default::default())
116    }
117}
118
119impl<S: AsRef<str>> UniCase<S> {
120    /// Creates a new `UniCase`.
121    ///
122    /// Note: This scans the text to determine if it is all ASCII or not.
123    pub fn new(s: S) -> UniCase<S> {
124        if s.as_ref().is_ascii() {
125            UniCase(Encoding::Ascii(Ascii(s)))
126        } else {
127            UniCase(Encoding::Unicode(Unicode(s)))
128        }
129    }
130
131    /// Returns a copy of this string where each character is mapped to its
132    /// Unicode CaseFolding equivalent.
133    ///
134    /// # Note
135    ///
136    /// Unicode Case Folding is meant for string storage and matching, not for
137    /// display.
138    pub fn to_folded_case(&self) -> String {
139        match self.0 {
140            Encoding::Ascii(ref s) => s.0.as_ref().to_ascii_lowercase(),
141            Encoding::Unicode(ref s) => s.to_folded_case(),
142        }
143    }
144}
145
146impl<S> UniCase<S> {
147    /// Creates a new `UniCase`, skipping the ASCII check.
148    pub const fn unicode(s: S) -> UniCase<S> {
149        UniCase(Encoding::Unicode(Unicode(s)))
150    }
151
152    /// Creates a new `UniCase` which performs only ASCII case folding.
153    pub const fn ascii(s: S) -> UniCase<S> {
154        UniCase(Encoding::Ascii(Ascii(s)))
155    }
156
157    /// Return `true` if this instance will only perform ASCII case folding.
158    pub fn is_ascii(&self) -> bool {
159        match self.0 {
160            Encoding::Ascii(_) => true,
161            Encoding::Unicode(_) => false,
162        }
163    }
164
165    /// Unwraps the inner value held by this `UniCase`.
166    #[inline]
167    pub fn into_inner(self) -> S {
168        match self.0 {
169            Encoding::Ascii(s) => s.0,
170            Encoding::Unicode(s) => s.0,
171        }
172    }
173}
174
175impl<S> Deref for UniCase<S> {
176    type Target = S;
177    #[inline]
178    fn deref<'a>(&'a self) -> &'a S {
179        inner!(self.0)
180    }
181}
182
183impl<S> DerefMut for UniCase<S> {
184    #[inline]
185    fn deref_mut<'a>(&'a mut self) -> &'a mut S {
186        inner!(mut self.0)
187    }
188}
189
190impl<S: AsRef<str>> AsRef<str> for UniCase<S> {
191    #[inline]
192    fn as_ref(&self) -> &str {
193        inner!(self.0).as_ref()
194    }
195}
196
197impl<S: fmt::Debug> fmt::Debug for UniCase<S> {
198    #[inline]
199    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
200        fmt::Debug::fmt(inner!(self.0), fmt)
201    }
202}
203
204impl<S: fmt::Display> fmt::Display for UniCase<S> {
205    #[inline]
206    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
207        fmt::Display::fmt(inner!(self.0), fmt)
208    }
209}
210
211impl<S1: AsRef<str>, S2: AsRef<str>> PartialEq<UniCase<S2>> for UniCase<S1> {
212    #[inline]
213    fn eq(&self, other: &UniCase<S2>) -> bool {
214        match (&self.0, &other.0) {
215            (&Encoding::Ascii(ref x), &Encoding::Ascii(ref y)) => x == y,
216            (&Encoding::Unicode(ref x), &Encoding::Unicode(ref y)) => x == y,
217            (&Encoding::Ascii(ref x), &Encoding::Unicode(ref y)) => &Unicode(x.as_ref()) == y,
218            (&Encoding::Unicode(ref x), &Encoding::Ascii(ref y)) => x == &Unicode(y.as_ref()),
219        }
220    }
221}
222
223impl<S: AsRef<str>> Eq for UniCase<S> {}
224
225impl<S: AsRef<str>> Hash for UniCase<S> {
226    #[inline]
227    fn hash<H: Hasher>(&self, hasher: &mut H) {
228        match self.0 {
229            Encoding::Ascii(ref s) => s.hash(hasher),
230            Encoding::Unicode(ref s) => s.hash(hasher),
231        }
232    }
233}
234
235impl<S> From<Ascii<S>> for UniCase<S> {
236    fn from(ascii: Ascii<S>) -> Self {
237        UniCase(Encoding::Ascii(ascii))
238    }
239}
240
241macro_rules! from_impl {
242    ($from:ty => $to:ty; $by:ident) => (
243        impl<'a> From<$from> for UniCase<$to> {
244            fn from(s: $from) -> Self {
245                UniCase::unicode(s.$by())
246            }
247        }
248    );
249    ($from:ty => $to:ty) => ( from_impl!($from => $to; into); )
250}
251
252macro_rules! into_impl {
253    ($to:ty) => {
254        impl<'a> Into<$to> for UniCase<$to> {
255            fn into(self) -> $to {
256                self.into_inner()
257            }
258        }
259    };
260}
261
262impl<S: AsRef<str>> From<S> for UniCase<S> {
263    fn from(s: S) -> Self {
264        UniCase::unicode(s)
265    }
266}
267
268from_impl!(&'a str => Cow<'a, str>);
269from_impl!(String => Cow<'a, str>);
270from_impl!(&'a str => String);
271from_impl!(Cow<'a, str> => String; into_owned);
272from_impl!(&'a String => &'a str; as_ref);
273
274into_impl!(&'a str);
275into_impl!(String);
276into_impl!(Cow<'a, str>);
277
278impl<T: AsRef<str>> PartialOrd for UniCase<T> {
279    #[inline]
280    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
281        Some(self.cmp(other))
282    }
283}
284
285impl<T: AsRef<str>> Ord for UniCase<T> {
286    #[inline]
287    fn cmp(&self, other: &Self) -> Ordering {
288        match (&self.0, &other.0) {
289            (&Encoding::Ascii(ref x), &Encoding::Ascii(ref y)) => x.cmp(y),
290            (&Encoding::Unicode(ref x), &Encoding::Unicode(ref y)) => x.cmp(y),
291            (&Encoding::Ascii(ref x), &Encoding::Unicode(ref y)) => {
292                Unicode(x.as_ref()).cmp(&Unicode(y.0.as_ref()))
293            }
294            (&Encoding::Unicode(ref x), &Encoding::Ascii(ref y)) => {
295                Unicode(x.0.as_ref()).cmp(&Unicode(y.as_ref()))
296            }
297        }
298    }
299}
300
301impl<S: FromStr + AsRef<str>> FromStr for UniCase<S> {
302    type Err = <S as FromStr>::Err;
303    fn from_str(s: &str) -> Result<UniCase<S>, Self::Err> {
304        s.parse().map(UniCase::new)
305    }
306}
307
308#[cfg(test)]
309mod tests {
310    use super::UniCase;
311    use std::borrow::ToOwned;
312    use std::collections::hash_map::DefaultHasher;
313    use std::hash::{Hash, Hasher};
314    use std::string::String;
315
316    fn hash<T: Hash>(t: &T) -> u64 {
317        let mut s = DefaultHasher::new();
318        t.hash(&mut s);
319        s.finish()
320    }
321
322    #[test]
323    fn test_copy_for_refs() {
324        fn foo<T>(_: UniCase<T>) {}
325
326        let a = UniCase::new("foobar");
327        foo(a);
328        foo(a);
329    }
330
331    #[test]
332    fn test_eq_ascii() {
333        let a = UniCase::new("foobar");
334        let b = UniCase::new("FOOBAR");
335        let c = UniCase::ascii("FoObAr");
336
337        assert_eq!(a, b);
338        assert_eq!(b, a);
339        assert_eq!(a, c);
340        assert_eq!(c, a);
341        assert_eq!(hash(&a), hash(&b));
342        assert_eq!(hash(&a), hash(&c));
343        assert!(a.is_ascii());
344        assert!(b.is_ascii());
345        assert!(c.is_ascii());
346    }
347
348    #[test]
349    fn test_eq_unicode() {
350        let a = UniCase::new("στιγμας");
351        let b = UniCase::new("στιγμασ");
352        assert_eq!(a, b);
353        assert_eq!(b, a);
354        assert_eq!(hash(&a), hash(&b));
355    }
356
357    #[test]
358    fn test_eq_unicode_left_is_substring() {
359        // https://github.com/seanmonstar/unicase/issues/38
360        let a = UniCase::unicode("foo");
361        let b = UniCase::unicode("foobar");
362
363        assert!(a != b);
364        assert!(b != a);
365    }
366
367    #[cfg(feature = "nightly")]
368    #[bench]
369    fn bench_unicase_ascii(b: &mut ::test::Bencher) {
370        b.bytes = b"foobar".len() as u64;
371        let x = UniCase::new("foobar");
372        let y = UniCase::new("FOOBAR");
373        b.iter(|| assert_eq!(x, y));
374    }
375
376    #[cfg(feature = "nightly")]
377    static SUBJECT: &'static [u8] = b"ffoo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz oo bar baz quux herp derp";
378
379    #[cfg(feature = "nightly")]
380    #[inline(never)]
381    fn is_ascii(bytes: &[u8]) -> bool {
382        #[allow(unused, deprecated)]
383        use std::ascii::AsciiExt;
384        bytes.is_ascii()
385    }
386
387    #[cfg(feature = "nightly")]
388    #[bench]
389    fn bench_is_ascii(b: &mut ::test::Bencher) {
390        b.iter(|| assert!(is_ascii(SUBJECT)));
391    }
392
393    #[cfg(feature = "nightly")]
394    #[bench]
395    fn bench_is_utf8(b: &mut ::test::Bencher) {
396        b.iter(|| assert!(::std::str::from_utf8(SUBJECT).is_ok()));
397    }
398
399    #[test]
400    fn test_case_cmp() {
401        assert!(UniCase::new("a") < UniCase::new("B"));
402
403        assert!(UniCase::new("A") < UniCase::new("b"));
404        assert!(UniCase::new("aa") > UniCase::new("a"));
405
406        assert!(UniCase::new("a") < UniCase::new("aa"));
407        assert!(UniCase::new("a") < UniCase::new("AA"));
408    }
409
410    #[test]
411    fn test_from_impls() {
412        let view: &'static str = "foobar";
413        let _: UniCase<&'static str> = view.into();
414        let _: UniCase<&str> = view.into();
415        let _: UniCase<String> = view.into();
416
417        let owned: String = view.to_owned();
418        let _: UniCase<&str> = (&owned).into();
419        let _: UniCase<String> = owned.into();
420    }
421
422    #[test]
423    fn test_into_impls() {
424        let view: UniCase<&'static str> = UniCase::new("foobar");
425        let _: &'static str = view.into();
426        let _: &str = view.into();
427
428        let owned: UniCase<String> = "foobar".into();
429        let _: String = owned.clone().into();
430        let _: &str = owned.as_ref();
431    }
432
433    #[test]
434    fn test_unicase_unicode_const() {
435        const _UNICASE: UniCase<&'static str> = UniCase::unicode("");
436    }
437}
unicase/lib.rs

unicase/
lib.rs