char_collection/
char_collection.rs

Help
1// Copyright 2019 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use anyhow::{format_err, Error};
6use std::clone::Clone;
7use std::cmp::Ordering;
8use std::hash::{Hash, Hasher};
9use std::ops::Range;
10use unic_char_range::{chars, CharIter, CharRange};
11
12/// A trait for objects that represent one or more disjoint, non-adjacent
13/// [CharRanges](unic_char_range::CharRange).
14pub trait MultiCharRange {
15    /// Iterate over the disjoint, non-adjacent [CharRange]s in the collection in ascending order.
16    fn iter_ranges<'a>(&'a self) -> Box<dyn Iterator<Item = CharRange> + 'a>;
17    /// The number of ranges in the collection.
18    fn range_count(&self) -> usize;
19}
20
21/// A collection of `char`s (i.e. Unicode code points), used for storing large continuous ranges
22/// efficiently.
23///
24/// Lookups and insertions are O(log <var>R</var>), where <var>R</var> is the number of disjoint
25/// ranges in the collection.
26///
27/// The easiest way to create instances is using the
28/// [char_collect!](::char_collection::char_collect) macro.
29///
30/// ```
31/// use char_collection::CharCollection;
32///
33/// let mut collection: CharCollection = char_collect!('a'..='d', 'x'..='z');
34/// char_collection += 'e';
35/// char_collection += chars!('p'..='t');
36/// assert_eq!(
37///     collection.iter_ranges().collect(),
38///     vec![chars!('a'..='e'), chars!('p'..='t'), chars!('x'..='z')]);
39///
40/// assert!(collection.contains(&'c'));
41/// assert!(collection.contains_range(chars!('q'..='s')));
42/// assert!(!collection.contains(&'9'));
43///
44/// collection -= chars!('t'..='y');
45/// assert_eq!(
46///     collection.iter_ranges().collect(),
47///     vec![chars!('a'..='e', chars!('p'..'s'), chars!('z'..='z'))]);
48/// ```
49///
50/// TODO(kpozin): Implement IntoIter.
51#[derive(Clone, Debug, Eq, PartialEq, Default)]
52pub struct CharCollection {
53    ranges: Vec<CharRange>,
54}
55
56impl CharCollection {
57    /// Create a new, empty `CharCollection`.
58    pub fn new() -> CharCollection {
59        CharCollection::default()
60    }
61
62    /// Create a new `CharCollection` from a list of disjoint, non-adjacent `CharRange`s, pre-sorted
63    /// in ascending code point order.
64    ///
65    /// This factory method is primarily intended for use in deserializing valid representations of
66    /// `CharCollections`. Will return an error if ranges are out of order, overlapping, or
67    /// adjacent.
68    pub fn from_sorted_ranges<T>(ranges: T) -> Result<CharCollection, Error>
69    where
70        T: IntoIterator<Item = CharRange>,
71    {
72        // If the original `ranges` is also a Vec, this doesn't result in an extra copy.
73        let collection = CharCollection { ranges: ranges.into_iter().collect() };
74        let ranges: &Vec<CharRange> = &collection.ranges;
75        match (1..ranges.len()).find(|i| (ranges[*i].low as i64 - ranges[*i - 1].high as i64) <= 1)
76        {
77            Some(i) => Err(format_err!(
78                "These ranges are out of order, overlapping, or adjacent: {}, {}",
79                format_range(&ranges[i - 1]),
80                format_range(&ranges[i])
81            )),
82            None => Ok(collection),
83        }
84    }
85
86    /// Create a new `CharCollection` from a list of `char`s, pre-sorted in ascending code point
87    /// order.
88    ///
89    /// This factory method is primarily intended for use in deserializing valid representations of
90    /// `CharCollections`. Will return an error if chars are out of order or contain duplicates.
91    pub fn from_sorted_chars<T>(chars: T) -> Result<CharCollection, Error>
92    where
93        T: IntoIterator<Item = char>,
94    {
95        let mut collection = CharCollection::new();
96        for ch in chars.into_iter() {
97            collection.append(ch)?;
98        }
99        Ok(collection)
100    }
101
102    /// Iterate over all the `char`s in the collection.
103    pub fn iter(&self) -> impl Iterator<Item = char> + '_ {
104        self.ranges.iter().flat_map(CharRange::iter)
105    }
106
107    /// Test whether the collection contains a specific `char`.
108    ///
109    /// The time complexity is O(log <var>R</var>), where <var>R</var> is the number of ranges in
110    /// the collection.
111    pub fn contains(&self, ch: &char) -> bool {
112        self.find_containing_range(ch).is_ok()
113    }
114
115    /// Test whether the collection contains an entire range of characters.
116    ///
117    /// The time complexity is O(log <var>R</var>), where <var>R</var> is the number of ranges in
118    /// the collection.
119    pub fn contains_range(&self, range: &CharRange) -> bool {
120        if range.is_empty() {
121            return false;
122        }
123
124        let lower_existing_range = self.find_containing_range(&range.low);
125        let upper_existing_range = self.find_containing_range(&range.high);
126
127        // Fully enclosed in existing range.
128        return lower_existing_range == upper_existing_range && lower_existing_range.is_ok();
129    }
130
131    /// Insert a `char` or other collection of chars into this collection.
132    ///
133    /// Returns `&mut self` for easy chaining.
134    ///
135    /// The time complexity is O(<var>T</var> log(<var>R</var> + <var>T</var>)), where <var>R</var>
136    /// is the number of ranges in this collection and <var>T</var> is the number of ranges in
137    /// `to_add`.
138    pub fn insert<V: MultiCharRange>(&mut self, to_add: &V) -> &mut Self {
139        to_add.iter_ranges().for_each(|range| self.insert_char_range(&range));
140        self
141    }
142
143    /// Appends a `char` to the end of the existing collection. Panics if the given `char` is not
144    /// higher than the highest code point in the existing collection.
145    ///
146    /// Returns `&mut self` for easy chaining.
147    ///
148    /// The time complexity is O(1).
149    pub fn append(&mut self, ch: char) -> Result<&mut Self, Error> {
150        let mut coalesced = false;
151        if let Some(last_range) = self.ranges.last_mut() {
152            if last_range.cmp_char(ch) != Ordering::Less {
153                return Err(format_err!("Cannot append {} after {}", ch, last_range.high));
154            }
155            if are_chars_adjacent(&last_range.high, &ch) {
156                last_range.high = ch;
157                coalesced = true;
158            }
159        }
160        if !coalesced {
161            self.ranges.push(chars!(ch..=ch));
162        }
163        Ok(self)
164    }
165
166    /// Appends a `CharRange` to the end of the existing collection. Panics if the given range is
167    /// not higher than the highest code point in the existing collection. (The new range _may_ be
168    /// adjacent to the previous highest range, but may not overlap.)
169    ///
170    /// Returns `&mut self` for easy chaining.
171    ///
172    /// The time complexity is O(1).
173    pub fn append_range(&mut self, range: CharRange) -> Result<&mut Self, Error> {
174        let mut coalesced = false;
175        if let Some(last_range) = self.ranges.last_mut() {
176            if last_range.cmp_char(range.low) != Ordering::Less {
177                return Err(format_err!(
178                    "Cannot append {} after {}",
179                    format_range(&range),
180                    last_range.high
181                ));
182            }
183            if are_chars_adjacent(&last_range.high, &range.low) {
184                last_range.high = range.high;
185                coalesced = true;
186            }
187        }
188        if !coalesced {
189            self.ranges.push(range);
190        }
191        Ok(self)
192    }
193
194    /// Remove a `char` or other collection of chars from this collection.
195    ///
196    /// Returns `&mut self` for easy chaining.
197    ///
198    /// The time complexity is O(<var>T</var> log(<var>R</var> + <var>T</var>)), where <var>R</var>
199    /// is the number of ranges in this collection and <var>T</var> is the number of ranges in
200    /// `to_remove`.
201    pub fn remove<V: MultiCharRange>(&mut self, to_remove: &V) -> &mut Self {
202        to_remove.iter_ranges().for_each(|range| self.remove_char_range(&range));
203        self
204    }
205
206    /// Remove all entries from this collection.
207    ///
208    /// Returns `&mut self` for easy chaining.
209    pub fn clear(&mut self) -> &mut Self {
210        self.ranges.clear();
211        self
212    }
213
214    /// Return the set union of this collection and another one.
215    ///
216    /// The time complexity is O(min(<var>R</var>, <var>T</var>) log(<var>R</var> + <var>T</var>)),
217    /// where <var>R</var> is the number of ranges in this collection and <var>T</var> is the number
218    /// of ranges in `rhs`.
219    pub fn union<V: MultiCharRange>(&self, rhs: &V) -> CharCollection {
220        let mut result: CharCollection;
221        if self.range_count() > rhs.range_count() {
222            result = self.clone();
223            result.insert(rhs);
224        } else {
225            result = rhs.into();
226            result.insert(self);
227        }
228        result
229    }
230
231    /// Return the set intersection of this collection and another one.
232    ///
233    /// The time complexity is O(min(<var>R</var>, <var>T</var>) log(<var>R</var> + <var>T</var>)),
234    /// where <var>R</var> is the number of ranges in this collection and <var>T</var> is the number
235    /// of ranges in `rhs`.
236    pub fn intersection<V: MultiCharRange>(&self, rhs: &V) -> CharCollection {
237        let mut result: CharCollection;
238        if self.range_count() > rhs.range_count() {
239            result = self.clone();
240            let rhs: CharCollection = rhs.into();
241            result.remove(&rhs.complement());
242        } else {
243            result = rhs.into();
244            result.remove(&self.complement());
245        }
246        result
247    }
248
249    /// Return the (non-symmetric) set difference of this collection and another one.
250    ///
251    /// The time complexity is O(<var>T</var> log(<var>R</var> + <var>T</var>)), where <var>R</var>
252    /// is the number of ranges in this collection and <var>T</var> is the number of ranges in
253    /// `rhs`.
254    pub fn difference<V: MultiCharRange>(&self, rhs: &V) -> CharCollection {
255        let mut result: CharCollection = self.clone();
256        result.remove(rhs);
257        result
258    }
259
260    /// Return the set complement of this collection (over the universe of `char`s).
261    ///
262    /// The time complexity is O(<var>R</var>), where <var>R</var> is the number of ranges in this
263    /// collection.
264    pub fn complement(&self) -> CharCollection {
265        if self.ranges.is_empty() {
266            return CharCollection::from(&CharRange::all());
267        }
268
269        let mut result_ranges: Vec<CharRange> = Vec::new();
270
271        if self.ranges[0].low != '\u{0}' {
272            result_ranges.push(CharRange::open_right('\u{0}', self.ranges[0].low));
273        }
274
275        let mut prev_high = self.ranges[0].high;
276
277        for range in &self.ranges[1..] {
278            result_ranges.push(CharRange::open(prev_high, range.low));
279            prev_high = range.high;
280        }
281
282        if prev_high != std::char::MAX {
283            result_ranges.push(CharRange::open_left(prev_high, std::char::MAX));
284        }
285
286        CharCollection { ranges: result_ranges }
287    }
288
289    /// Insert a single `CharRange`.
290    ///
291    /// Depending on how the new range relates to existing ranges in
292    /// the collection, it might be subsumed by an existing range, modify the endpoints of an
293    /// existing range, or replace one or more existing ranges.
294    fn insert_char_range(&mut self, new_range: &CharRange) {
295        if new_range.is_empty() {
296            return;
297        }
298
299        let lower_existing_range = self.find_containing_range(&new_range.low);
300        let upper_existing_range = self.find_containing_range(&new_range.high);
301
302        // Fully enclosed in existing range.
303        if lower_existing_range == upper_existing_range && lower_existing_range.is_ok() {
304            return;
305        }
306
307        let new_low: char;
308        let new_high: char;
309
310        let remove_from_idx: usize;
311        let remove_to_idx: usize;
312
313        match lower_existing_range {
314            Ok((idx, lower_existing_range)) => {
315                new_low = lower_existing_range.low;
316                remove_from_idx = idx;
317            }
318            Err(idx) => {
319                new_low = new_range.low;
320                remove_from_idx = idx;
321            }
322        }
323
324        match upper_existing_range {
325            Ok((idx, higher_existing_range)) => {
326                new_high = higher_existing_range.high;
327                remove_to_idx = idx + 1;
328            }
329            Err(idx) => {
330                new_high = new_range.high;
331                remove_to_idx = idx;
332            }
333        }
334
335        self.replace_ranges(chars!(new_low..=new_high), remove_from_idx..remove_to_idx);
336    }
337
338    /// Remove a single `CharRange`.
339    ///
340    /// Depending on how the removed range relates to existing ranges in the collection, it might
341    /// remove or modify the endpoints of existing ranges.
342    fn remove_char_range(&mut self, range_to_remove: &CharRange) {
343        if range_to_remove.is_empty() {
344            return;
345        }
346
347        let lower_existing_range = self.find_containing_range(&range_to_remove.low);
348        let upper_existing_range = self.find_containing_range(&range_to_remove.high);
349
350        let mut replacement_ranges: Vec<CharRange> = Vec::new();
351
352        let remove_from_idx: usize;
353        let remove_to_idx: usize;
354
355        match lower_existing_range {
356            Ok((idx, lower_existing_range)) => {
357                if lower_existing_range.low < range_to_remove.low {
358                    replacement_ranges
359                        .push(CharRange::open_right(lower_existing_range.low, range_to_remove.low));
360                }
361                remove_from_idx = idx;
362            }
363            Err(idx) => remove_from_idx = idx,
364        }
365
366        match upper_existing_range {
367            Ok((idx, higher_existing_range)) => {
368                if range_to_remove.high < higher_existing_range.high {
369                    replacement_ranges.push(CharRange::open_left(
370                        range_to_remove.high,
371                        higher_existing_range.high,
372                    ));
373                }
374                remove_to_idx = idx + 1;
375            }
376            Err(idx) => {
377                remove_to_idx = idx;
378            }
379        }
380
381        self.ranges.splice(remove_from_idx..remove_to_idx, replacement_ranges);
382    }
383
384    /// Delete all the existing `CharRange`s that fall within `indices_to_replace` in the vector,
385    /// and insert `char_range_to_insert` in their place. If the newly formed range is adjacent to
386    /// a kept range on its left or right, coalesce them.
387    fn replace_ranges(
388        &mut self,
389        mut char_range_to_insert: CharRange,
390        mut indices_to_replace: Range<usize>,
391    ) {
392        // If the newly formed range is adjacent to the range on its left, coalesce the two.
393        if indices_to_replace.start > 0 {
394            let prev_char_range = self.ranges[indices_to_replace.start - 1];
395            if are_chars_adjacent(&prev_char_range.high, &char_range_to_insert.low) {
396                char_range_to_insert.low = prev_char_range.low;
397                indices_to_replace.start -= 1;
398            }
399        }
400
401        // If the newly formed range is adjacent to the range on its right, coalesce the two.
402        if indices_to_replace.end < self.ranges.len() {
403            let next_char_range = self.ranges[indices_to_replace.end];
404            if are_chars_adjacent(&char_range_to_insert.high, &next_char_range.low) {
405                char_range_to_insert.high = next_char_range.high;
406                indices_to_replace.end += 1;
407            }
408        }
409
410        self.ranges.splice(indices_to_replace, vec![char_range_to_insert]);
411    }
412
413    fn find_containing_range(&self, query: &char) -> Result<(usize, CharRange), usize> {
414        let result = self.ranges.binary_search_by(|range| range.cmp_char(query.clone()));
415        match result {
416            Ok(index) => Ok((index, self.ranges[index])),
417            Err(index) => Err(index),
418        }
419    }
420}
421
422impl MultiCharRange for CharCollection {
423    fn iter_ranges<'a>(&'a self) -> Box<dyn Iterator<Item = CharRange> + 'a> {
424        Box::new(self.ranges.iter().map(|range| range.clone()))
425    }
426
427    fn range_count(&self) -> usize {
428        self.ranges.len()
429    }
430}
431
432#[allow(clippy::derived_hash_with_manual_eq)] // TODO(https://fxbug.dev/42177010)
433impl Hash for CharCollection {
434    fn hash<H: Hasher>(&self, state: &mut H) {
435        self.ranges.iter().for_each(|range| hash_char_range(range, state));
436    }
437}
438
439fn hash_char_range<H: Hasher>(range: &CharRange, state: &mut H) {
440    range.low.hash(state);
441    range.high.hash(state);
442}
443
444fn are_chars_adjacent(left: &char, right: &char) -> bool {
445    let mut iter: CharIter = CharRange::open_right(left.clone(), right.clone()).iter();
446    match iter.next_back() {
447        None => false,
448        Some(next_right) => left == &next_right,
449    }
450}
451
452fn format_range(range: &CharRange) -> String {
453    format!("{}..={}", range.low, range.high)
454}
455
456#[cfg(test)]
457mod tests {
458    use super::{are_chars_adjacent, CharCollection};
459    use anyhow::Error;
460    use std::char;
461    use unic_char_range::{chars, CharRange};
462
463    #[test]
464    fn test_from_sorted_ranges() -> Result<(), Error> {
465        let expected = char_collect!('a'..='d', 'g'..='l', 'z');
466        let actual = CharCollection::from_sorted_ranges(vec![
467            chars!('a'..='d'),
468            chars!('g'..='l'),
469            chars!('z'..='z'),
470        ])?;
471        assert_eq!(actual, expected);
472        Ok(())
473    }
474
475    #[test]
476    fn test_from_sorted_ranges_out_of_order() {
477        assert!(CharCollection::from_sorted_ranges(vec![
478            chars!('g'..='l'),
479            chars!('a'..='d'),
480            chars!('z'..='z'),
481        ])
482        .is_err());
483    }
484
485    #[test]
486    fn test_from_sorted_ranges_overlap() {
487        assert!(CharCollection::from_sorted_ranges(vec![
488            chars!('a'..='d'),
489            chars!('c'..='l'),
490            chars!('z'..='z'),
491        ])
492        .is_err());
493    }
494
495    #[test]
496    fn test_from_sorted_ranges_adjacent() {
497        assert!(
498            CharCollection::from_sorted_ranges(vec![chars!('a'..='d'), chars!('e'..='g')]).is_err()
499        );
500    }
501
502    #[test]
503    fn test_from_sorted_chars() -> Result<(), Error> {
504        let chars = vec!['a', 'b', 'c', 'd', 'g', 'h', 'i', 'j', 'k', 'l', 'z'];
505        let expected = char_collect!('a'..='d', 'g'..='l', 'z');
506        let actual = CharCollection::from_sorted_chars(chars)?;
507        assert_eq!(actual, expected);
508        Ok(())
509    }
510
511    #[test]
512    fn test_from_sorted_chars_out_of_order() {
513        let chars = vec!['a', 'b', 'c', 'd', 'g', 'h', 'i', 'j', 'k', 'l', 'e'];
514        assert!(CharCollection::from_sorted_chars(chars).is_err());
515    }
516
517    #[test]
518    fn test_find_containing_range() {
519        let collection = char_collect!({ ('a'..='d') + ('g'..='j') + ('l'..='o') + 'z' });
520        assert_eq!(collection.find_containing_range(&'0'), Err(0));
521        assert_eq!(collection.find_containing_range(&'c'), Ok((0, chars!('a'..='d'))));
522        assert_eq!(collection.find_containing_range(&'e'), Err(1));
523    }
524
525    #[test]
526    fn test_insert_initial() {
527        let collection = char_collect!('a'..='d');
528        assert_eq!(collection.ranges, vec![chars!('a'..='d')])
529    }
530
531    #[test]
532    fn test_insert_exact_match() {
533        let mut collection = char_collect!('a'..='d', 'g'..='l');
534        collection += 'a'..='d';
535        assert_eq!(collection.ranges, vec![chars!('a'..='d'), chars!('g'..='l')]);
536    }
537
538    #[test]
539    fn test_insert_non_overlapping_sorted() {
540        let collection = char_collect!('a'..='d', 'g'..='j', 'l'..='o');
541        assert_eq!(
542            collection.ranges,
543            vec![chars!('a'..='d'), chars!('g'..='j'), chars!('l'..='o')]
544        );
545    }
546
547    #[test]
548    fn test_insert_non_overlapping_unsorted() {
549        let collection = char_collect!('l'..='o', 'a'..='d', 'l'..='o', 'a'..='d', 'g'..='j');
550        assert_eq!(
551            collection.ranges,
552            vec![chars!('a'..='d'), chars!('g'..='j'), chars!('l'..='o')]
553        );
554    }
555
556    #[test]
557    fn test_insert_overlapping_all_existent() {
558        let mut collection = char_collect!('l'..='o', 'a'..='d');
559
560        collection += 'a'..='o';
561        assert_eq!(collection.ranges, vec![chars!('a'..='o')]);
562    }
563
564    #[test]
565    fn test_insert_overlapping_some_existent() {
566        let mut collection = char_collect!('c'..='e', 'j'..='m', 'p'..='s');
567
568        collection += 'i'..='n';
569        assert_eq!(
570            collection.ranges,
571            vec![chars!('c'..='e'), chars!('i'..='n'), chars!('p'..='s')]
572        );
573    }
574
575    #[test]
576    fn test_insert_overlapping_with_intersections() {
577        let mut collection = char_collect!('c'..='e', 'j'..='m', 'p'..='s');
578
579        collection += 'd'..='k';
580        assert_eq!(collection.ranges, vec![chars!('c'..='m'), chars!('p'..='s')]);
581    }
582
583    #[test]
584    fn test_insert_coalesce_adjacent_ranges() {
585        let mut collection = char_collect!('a'..='c', 'j'..='m');
586
587        collection += 'd'..='i';
588        assert_eq!(collection.ranges, vec![chars!('a'..='m')]);
589    }
590
591    #[test]
592    fn test_append() -> Result<(), Error> {
593        let mut collection = char_collect!('a'..='c');
594
595        collection.append('d')?.append('g')?.append('h')?.append('i')?.append('z')?;
596        assert_eq!(collection, char_collect!('a'..='d', 'g'..='i', 'z'));
597        Ok(())
598    }
599
600    #[test]
601    fn test_append_out_of_order() -> Result<(), Error> {
602        let mut collection = char_collect!('a'..='c');
603        assert!(collection
604            .append('d')?
605            .append('g')?
606            .append('h')?
607            .append('i')?
608            .append('e')
609            .is_err());
610        Ok(())
611    }
612
613    #[test]
614    fn test_append_range() -> Result<(), Error> {
615        let mut collection = char_collect!('a'..='c');
616        collection.append_range(chars!('g'..='i'))?.append_range(chars!('j'..='m'))?;
617        assert_eq!(collection, char_collect!('a'..='c', 'g'..='m'));
618        Ok(())
619    }
620
621    #[test]
622    fn test_append_range_out_of_order() -> Result<(), Error> {
623        let mut collection = char_collect!('a'..='c');
624        assert!(collection
625            .append_range(chars!('g'..='i'))?
626            .append_range(chars!('j'..='m'))?
627            .append_range(chars!('k'..='m'))
628            .is_err());
629        Ok(())
630    }
631
632    #[test]
633    fn test_remove_exact_range() {
634        let mut collection = char_collect!('c'..='e', 'j'..='m', 'p'..='s');
635
636        collection -= 'j'..='m';
637        assert_eq!(collection.ranges, vec![chars!('c'..='e'), chars!['p'..='s']]);
638    }
639
640    #[test]
641    fn test_remove_overlapping_all_existent() {
642        let mut collection = char_collect!('c'..='e', 'j'..='m', 'p'..='s');
643
644        collection -= 'c'..='s';
645        assert_eq!(collection.ranges, vec![]);
646    }
647
648    #[test]
649    fn test_remove_overlapping_all_existent_superset() {
650        let mut collection = char_collect!('c'..='e', 'j'..='m', 'p'..='s');
651
652        collection -= 'a'..='z';
653        assert_eq!(collection.ranges, vec![]);
654    }
655
656    #[test]
657    fn test_remove_one_subrange() {
658        let mut collection = char_collect!('c'..='e', 'j'..='m', 'p'..='s');
659
660        collection -= 'k'..='l';
661        assert_eq!(
662            collection.ranges,
663            vec![chars!('c'..='e'), chars!('j'..='j'), chars!('m'..='m'), chars!('p'..='s')]
664        );
665    }
666
667    #[test]
668    fn test_remove_intersection() {
669        let mut collection = char_collect!('c'..='e', 'j'..='m', 'p'..='s');
670
671        collection -= 'd'..='q';
672        assert_eq!(collection.ranges, vec![chars!('c'..='c'), chars!('r'..='s')]);
673    }
674
675    #[test]
676    fn test_complement_simple() {
677        let collection = char_collect!(0x10..=0x50, 0x70..=0x70, 0x99..=0x640);
678        assert_eq!(
679            collection.complement(),
680            char_collect!(0x00..=0x0F, 0x51..=0x6F, 0x71..=0x98, 0x641..=(char::MAX as u32))
681        );
682    }
683
684    #[test]
685    fn test_complement_all() {
686        let collection = char_collect!(CharRange::all());
687        assert_eq!(collection.complement(), char_collect!());
688    }
689
690    #[test]
691    fn test_complement_none() {
692        let collection = char_collect!();
693        assert_eq!(collection.complement(), char_collect!(CharRange::all()));
694    }
695
696    #[test]
697    fn test_complement_includes_min_and_max() {
698        let collection = char_collect!(0x0..=0x10, 0x40..=0x50, 0xCCCC..=(char::MAX as u32));
699        assert_eq!(collection.complement(), char_collect!(0x11..=0x3F, 0x51..=0xCCCB));
700    }
701
702    #[test]
703    fn test_union() {
704        let collection_a = char_collect!('a'..='g', 'm'..='z', 'B'..='R');
705        let collection_b = char_collect!('e'..='q', 'W'..='Y');
706
707        let expected = char_collect!('a'..='z', 'B'..='R', 'W'..='Y');
708        assert_eq!(collection_a.union(&collection_b), expected);
709        assert_eq!(collection_b.union(&collection_a), expected);
710    }
711
712    #[test]
713    fn test_intersection() {
714        let collection_a = char_collect!('a'..='g', 'm'..='z');
715        let collection_b = char_collect!('e'..='q');
716
717        let expected = char_collect!('e'..='g', 'm'..='q');
718        assert_eq!(collection_a.intersection(&collection_b), expected);
719        assert_eq!(collection_b.intersection(&collection_a), expected);
720    }
721
722    #[test]
723    fn test_macro_expressions() {
724        use unicode_blocks::UnicodeBlockId::Arabic;
725
726        let collection =
727            char_collect!({ ('c'..='e') + ('f'..='h') - ('a'..='d') + Arabic + (0x5..=0x42) });
728        assert_eq!(collection, char_collect!(0x5..=0x42, 'e'..='h', Arabic));
729    }
730
731    #[test]
732    fn test_iter() {
733        let collection = char_collect!('a'..='c', 'j'..='l', 'x'..='z');
734
735        let v = collection.iter().collect::<Vec<char>>();
736        assert_eq!(v, vec!['a', 'b', 'c', 'j', 'k', 'l', 'x', 'y', 'z']);
737    }
738
739    #[test]
740    fn test_are_chars_adjacent() {
741        assert!(are_chars_adjacent(&'a', &'b'));
742        assert!(!are_chars_adjacent(&'b', &'a'));
743        assert!(!are_chars_adjacent(&'a', &'c'));
744    }
745}
char_collection/char_collection.rs

char_collection/
char_collection.rs