unicase/
lib.rs

1#![cfg_attr(test, deny(missing_docs))]
2#![cfg_attr(test, deny(warnings))]
3#![doc(html_root_url = "https://docs.rs/unicase/2.7.0")]
4#![cfg_attr(feature = "nightly", feature(test))]
5#![cfg_attr(all(__unicase__core_and_alloc, not(test),), no_std)]
6
7//! # UniCase
8//!
9//! UniCase provides a way of specifying strings that are case-insensitive.
10//!
11//! UniCase supports full [Unicode case
12//! folding](https://www.w3.org/International/wiki/Case_folding). It can also
13//! utilize faster ASCII case comparisons, if both strings are ASCII.
14//!
15//! Using the `UniCase::new()` constructor will check the string to see if it
16//! is all ASCII. When a `UniCase` is compared against another, if both are
17//! ASCII, it will use the faster comparison.
18//!
19//! There also exists the `Ascii` type in this crate, which will always assume
20//! to use the ASCII case comparisons, if the encoding is already known.
21//!
22//! ## Example
23//!
24//! ```rust
25//! use unicase::UniCase;
26//!
27//! let a = UniCase::new("Maße");
28//! let b = UniCase::new("MASSE");
29//! let c = UniCase::new("mase");
30//!
31//! assert_eq!(a, b);
32//! assert!(b != c);
33//! ```
34//!
35//! ## Ascii
36//!
37//! ```rust
38//! use unicase::Ascii;
39//!
40//! let a = Ascii::new("foobar");
41//! let b = Ascii::new("FoObAr");
42//!
43//! assert_eq!(a, b);
44//! ```
45
46#[cfg(feature = "nightly")]
47extern crate test;
48
49#[cfg(all(__unicase__core_and_alloc, not(test)))]
50extern crate alloc;
51#[cfg(all(__unicase__core_and_alloc, not(test)))]
52use alloc::string::String;
53
54#[cfg(not(all(__unicase__core_and_alloc, not(test))))]
55extern crate std as alloc;
56#[cfg(not(all(__unicase__core_and_alloc, not(test))))]
57extern crate std as core;
58
59use alloc::borrow::Cow;
60#[cfg(__unicase__iter_cmp)]
61use core::cmp::Ordering;
62use core::fmt;
63use core::hash::{Hash, Hasher};
64use core::ops::{Deref, DerefMut};
65use core::str::FromStr;
66
67use self::unicode::Unicode;
68
69mod ascii;
70mod unicode;
71
72/// Case Insensitive wrapper of strings.
73#[derive(Clone, Copy)]
74pub struct UniCase<S>(Encoding<S>);
75
76/// Case Insensitive wrapper of Ascii strings.
77#[derive(Clone, Copy, Debug, Default)]
78pub struct Ascii<S>(S);
79
80/// Compare two string-like types for case-less equality, using unicode folding.
81///
82/// Equivalent to `UniCase::new(left) == UniCase::new(right)`.
83///
84/// Note: This will perform a scan for ASCII characters before doing the
85/// the comparison. See `UniCase` for more information.
86#[inline]
87pub fn eq<S: AsRef<str> + ?Sized>(left: &S, right: &S) -> bool {
88    UniCase::new(left) == UniCase::new(right)
89}
90
91/// Compare two string-like types for case-less equality, ignoring ASCII case.
92///
93/// Equivalent to `Ascii::new(left) == Ascii::new(right)`.
94#[inline]
95pub fn eq_ascii<S: AsRef<str> + ?Sized>(left: &S, right: &S) -> bool {
96    Ascii(left) == Ascii(right)
97}
98
99#[derive(Clone, Copy, Debug)]
100enum Encoding<S> {
101    Ascii(Ascii<S>),
102    Unicode(Unicode<S>),
103}
104
105macro_rules! inner {
106    (mut $e:expr) => {{
107        match &mut $e {
108            &mut Encoding::Ascii(ref mut s) => &mut s.0,
109            &mut Encoding::Unicode(ref mut s) => &mut s.0,
110        }
111    }};
112    ($e:expr) => {{
113        match &$e {
114            &Encoding::Ascii(ref s) => &s.0,
115            &Encoding::Unicode(ref s) => &s.0,
116        }
117    }};
118}
119
120impl<S: AsRef<str> + Default> Default for UniCase<S> {
121    fn default() -> Self {
122        Self::new(Default::default())
123    }
124}
125
126impl<S: AsRef<str>> UniCase<S> {
127    /// Creates a new `UniCase`.
128    ///
129    /// Note: This scans the text to determine if it is all ASCII or not.
130    pub fn new(s: S) -> UniCase<S> {
131        #[cfg(not(__unicase__core_and_alloc))]
132        #[allow(deprecated, unused)]
133        use std::ascii::AsciiExt;
134
135        if s.as_ref().is_ascii() {
136            UniCase(Encoding::Ascii(Ascii(s)))
137        } else {
138            UniCase(Encoding::Unicode(Unicode(s)))
139        }
140    }
141}
142
143impl<S> UniCase<S> {
144    /// Creates a new `UniCase`, skipping the ASCII check.
145    #[cfg(__unicase__const_fns)]
146    pub const fn unicode(s: S) -> UniCase<S> {
147        UniCase(Encoding::Unicode(Unicode(s)))
148    }
149
150    /// Creates a new `UniCase`, skipping the ASCII check.
151    ///
152    /// For Rust versions >= 1.31, this is a `const fn`.
153    #[cfg(not(__unicase__const_fns))]
154    pub fn unicode(s: S) -> UniCase<S> {
155        UniCase(Encoding::Unicode(Unicode(s)))
156    }
157
158    /// Creates a new `UniCase` which performs only ASCII case folding.
159    #[cfg(__unicase__const_fns)]
160    pub const fn ascii(s: S) -> UniCase<S> {
161        UniCase(Encoding::Ascii(Ascii(s)))
162    }
163
164    /// Creates a new `UniCase` which performs only ASCII case folding.
165    ///
166    /// For Rust versions >= 1.31, this is a `const fn`.
167    #[cfg(not(__unicase__const_fns))]
168    pub fn ascii(s: S) -> UniCase<S> {
169        UniCase(Encoding::Ascii(Ascii(s)))
170    }
171
172    /// Return `true` if this instance will only perform ASCII case folding.
173    pub fn is_ascii(&self) -> bool {
174        match self.0 {
175            Encoding::Ascii(_) => true,
176            Encoding::Unicode(_) => false,
177        }
178    }
179
180    /// Unwraps the inner value held by this `UniCase`.
181    #[inline]
182    pub fn into_inner(self) -> S {
183        match self.0 {
184            Encoding::Ascii(s) => s.0,
185            Encoding::Unicode(s) => s.0,
186        }
187    }
188}
189
190impl<S> Deref for UniCase<S> {
191    type Target = S;
192    #[inline]
193    fn deref<'a>(&'a self) -> &'a S {
194        inner!(self.0)
195    }
196}
197
198impl<S> DerefMut for UniCase<S> {
199    #[inline]
200    fn deref_mut<'a>(&'a mut self) -> &'a mut S {
201        inner!(mut self.0)
202    }
203}
204
205impl<S: AsRef<str>> AsRef<str> for UniCase<S> {
206    #[inline]
207    fn as_ref(&self) -> &str {
208        inner!(self.0).as_ref()
209    }
210}
211
212impl<S: fmt::Debug> fmt::Debug for UniCase<S> {
213    #[inline]
214    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
215        fmt::Debug::fmt(inner!(self.0), fmt)
216    }
217}
218
219impl<S: fmt::Display> fmt::Display for UniCase<S> {
220    #[inline]
221    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
222        fmt::Display::fmt(inner!(self.0), fmt)
223    }
224}
225
226impl<S1: AsRef<str>, S2: AsRef<str>> PartialEq<UniCase<S2>> for UniCase<S1> {
227    #[inline]
228    fn eq(&self, other: &UniCase<S2>) -> bool {
229        match (&self.0, &other.0) {
230            (&Encoding::Ascii(ref x), &Encoding::Ascii(ref y)) => x == y,
231            (&Encoding::Unicode(ref x), &Encoding::Unicode(ref y)) => x == y,
232            (&Encoding::Ascii(ref x), &Encoding::Unicode(ref y)) => &Unicode(x.as_ref()) == y,
233            (&Encoding::Unicode(ref x), &Encoding::Ascii(ref y)) => x == &Unicode(y.as_ref()),
234        }
235    }
236}
237
238impl<S: AsRef<str>> Eq for UniCase<S> {}
239
240impl<S: AsRef<str>> Hash for UniCase<S> {
241    #[inline]
242    fn hash<H: Hasher>(&self, hasher: &mut H) {
243        match self.0 {
244            Encoding::Ascii(ref s) => s.hash(hasher),
245            Encoding::Unicode(ref s) => s.hash(hasher),
246        }
247    }
248}
249
250impl<S> From<Ascii<S>> for UniCase<S> {
251    fn from(ascii: Ascii<S>) -> Self {
252        UniCase(Encoding::Ascii(ascii))
253    }
254}
255
256macro_rules! from_impl {
257    ($from:ty => $to:ty; $by:ident) => (
258        impl<'a> From<$from> for UniCase<$to> {
259            fn from(s: $from) -> Self {
260                UniCase::unicode(s.$by())
261            }
262        }
263    );
264    ($from:ty => $to:ty) => ( from_impl!($from => $to; into); )
265}
266
267macro_rules! into_impl {
268    ($to:ty) => {
269        impl<'a> Into<$to> for UniCase<$to> {
270            fn into(self) -> $to {
271                self.into_inner()
272            }
273        }
274    };
275}
276
277impl<S: AsRef<str>> From<S> for UniCase<S> {
278    fn from(s: S) -> Self {
279        UniCase::unicode(s)
280    }
281}
282
283from_impl!(&'a str => Cow<'a, str>);
284from_impl!(String => Cow<'a, str>);
285from_impl!(&'a str => String);
286from_impl!(Cow<'a, str> => String; into_owned);
287from_impl!(&'a String => &'a str; as_ref);
288
289into_impl!(&'a str);
290into_impl!(String);
291into_impl!(Cow<'a, str>);
292
293#[cfg(__unicase__iter_cmp)]
294impl<T: AsRef<str>> PartialOrd for UniCase<T> {
295    #[inline]
296    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
297        Some(self.cmp(other))
298    }
299}
300
301#[cfg(__unicase__iter_cmp)]
302impl<T: AsRef<str>> Ord for UniCase<T> {
303    #[inline]
304    fn cmp(&self, other: &Self) -> Ordering {
305        match (&self.0, &other.0) {
306            (&Encoding::Ascii(ref x), &Encoding::Ascii(ref y)) => x.cmp(y),
307            (&Encoding::Unicode(ref x), &Encoding::Unicode(ref y)) => x.cmp(y),
308            (&Encoding::Ascii(ref x), &Encoding::Unicode(ref y)) => {
309                Unicode(x.as_ref()).cmp(&Unicode(y.0.as_ref()))
310            }
311            (&Encoding::Unicode(ref x), &Encoding::Ascii(ref y)) => {
312                Unicode(x.0.as_ref()).cmp(&Unicode(y.as_ref()))
313            }
314        }
315    }
316}
317
318impl<S: FromStr + AsRef<str>> FromStr for UniCase<S> {
319    type Err = <S as FromStr>::Err;
320    fn from_str(s: &str) -> Result<UniCase<S>, Self::Err> {
321        s.parse().map(UniCase::new)
322    }
323}
324
325#[cfg(test)]
326mod tests {
327    use super::UniCase;
328    #[cfg(__unicase__default_hasher)]
329    use std::collections::hash_map::DefaultHasher;
330    #[cfg(not(__unicase__default_hasher))]
331    use std::hash::SipHasher as DefaultHasher;
332    use std::hash::{Hash, Hasher};
333
334    fn hash<T: Hash>(t: &T) -> u64 {
335        let mut s = DefaultHasher::new();
336        t.hash(&mut s);
337        s.finish()
338    }
339
340    #[test]
341    fn test_copy_for_refs() {
342        fn foo<T>(_: UniCase<T>) {}
343
344        let a = UniCase::new("foobar");
345        foo(a);
346        foo(a);
347    }
348
349    #[test]
350    fn test_eq_ascii() {
351        let a = UniCase::new("foobar");
352        let b = UniCase::new("FOOBAR");
353        let c = UniCase::ascii("FoObAr");
354
355        assert_eq!(a, b);
356        assert_eq!(b, a);
357        assert_eq!(a, c);
358        assert_eq!(c, a);
359        assert_eq!(hash(&a), hash(&b));
360        assert_eq!(hash(&a), hash(&c));
361        assert!(a.is_ascii());
362        assert!(b.is_ascii());
363        assert!(c.is_ascii());
364    }
365
366    #[test]
367    fn test_eq_unicode() {
368        let a = UniCase::new("στιγμας");
369        let b = UniCase::new("στιγμασ");
370        assert_eq!(a, b);
371        assert_eq!(b, a);
372        assert_eq!(hash(&a), hash(&b));
373    }
374
375    #[test]
376    fn test_eq_unicode_left_is_substring() {
377        // https://github.com/seanmonstar/unicase/issues/38
378        let a = UniCase::unicode("foo");
379        let b = UniCase::unicode("foobar");
380
381        assert!(a != b);
382        assert!(b != a);
383    }
384
385    #[cfg(feature = "nightly")]
386    #[bench]
387    fn bench_unicase_ascii(b: &mut ::test::Bencher) {
388        b.bytes = b"foobar".len() as u64;
389        let x = UniCase::new("foobar");
390        let y = UniCase::new("FOOBAR");
391        b.iter(|| assert_eq!(x, y));
392    }
393
394    #[cfg(feature = "nightly")]
395    static SUBJECT: &'static [u8] = b"ffoo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz oo bar baz quux herp derp";
396
397    #[cfg(feature = "nightly")]
398    #[inline(never)]
399    fn is_ascii(bytes: &[u8]) -> bool {
400        #[allow(unused, deprecated)]
401        use std::ascii::AsciiExt;
402        bytes.is_ascii()
403    }
404
405    #[cfg(feature = "nightly")]
406    #[bench]
407    fn bench_is_ascii(b: &mut ::test::Bencher) {
408        b.iter(|| assert!(is_ascii(SUBJECT)));
409    }
410
411    #[cfg(feature = "nightly")]
412    #[bench]
413    fn bench_is_utf8(b: &mut ::test::Bencher) {
414        b.iter(|| assert!(::std::str::from_utf8(SUBJECT).is_ok()));
415    }
416
417    #[cfg(__unicase__iter_cmp)]
418    #[test]
419    fn test_case_cmp() {
420        assert!(UniCase::new("a") < UniCase::new("B"));
421
422        assert!(UniCase::new("A") < UniCase::new("b"));
423        assert!(UniCase::new("aa") > UniCase::new("a"));
424
425        assert!(UniCase::new("a") < UniCase::new("aa"));
426        assert!(UniCase::new("a") < UniCase::new("AA"));
427    }
428
429    #[test]
430    fn test_from_impls() {
431        let view: &'static str = "foobar";
432        let _: UniCase<&'static str> = view.into();
433        let _: UniCase<&str> = view.into();
434        let _: UniCase<String> = view.into();
435
436        let owned: String = view.to_owned();
437        let _: UniCase<&str> = (&owned).into();
438        let _: UniCase<String> = owned.into();
439    }
440
441    #[test]
442    fn test_into_impls() {
443        let view: UniCase<&'static str> = UniCase::new("foobar");
444        let _: &'static str = view.into();
445        let _: &str = view.into();
446
447        let owned: UniCase<String> = "foobar".into();
448        let _: String = owned.clone().into();
449        let _: &str = owned.as_ref();
450    }
451
452    #[cfg(__unicase__const_fns)]
453    #[test]
454    fn test_unicase_unicode_const() {
455        const _UNICASE: UniCase<&'static str> = UniCase::unicode("");
456    }
457}