1use anyhow::{format_err, Error};
6use std::clone::Clone;
7use std::cmp::Ordering;
8use std::hash::{Hash, Hasher};
9use std::ops::Range;
10use unic_char_range::{chars, CharIter, CharRange};
11
12pub trait MultiCharRange {
15 fn iter_ranges<'a>(&'a self) -> Box<dyn Iterator<Item = CharRange> + 'a>;
17 fn range_count(&self) -> usize;
19}
20
21#[derive(Clone, Debug, Eq, PartialEq, Default)]
52pub struct CharCollection {
53 ranges: Vec<CharRange>,
54}
55
56impl CharCollection {
57 pub fn new() -> CharCollection {
59 CharCollection::default()
60 }
61
62 pub fn from_sorted_ranges<T>(ranges: T) -> Result<CharCollection, Error>
69 where
70 T: IntoIterator<Item = CharRange>,
71 {
72 let collection = CharCollection { ranges: ranges.into_iter().collect() };
74 let ranges: &Vec<CharRange> = &collection.ranges;
75 match (1..ranges.len()).find(|i| (ranges[*i].low as i64 - ranges[*i - 1].high as i64) <= 1)
76 {
77 Some(i) => Err(format_err!(
78 "These ranges are out of order, overlapping, or adjacent: {}, {}",
79 format_range(&ranges[i - 1]),
80 format_range(&ranges[i])
81 )),
82 None => Ok(collection),
83 }
84 }
85
86 pub fn from_sorted_chars<T>(chars: T) -> Result<CharCollection, Error>
92 where
93 T: IntoIterator<Item = char>,
94 {
95 let mut collection = CharCollection::new();
96 for ch in chars.into_iter() {
97 collection.append(ch)?;
98 }
99 Ok(collection)
100 }
101
102 pub fn iter(&self) -> impl Iterator<Item = char> + '_ {
104 self.ranges.iter().flat_map(CharRange::iter)
105 }
106
107 pub fn contains(&self, ch: &char) -> bool {
112 self.find_containing_range(ch).is_ok()
113 }
114
115 pub fn contains_range(&self, range: &CharRange) -> bool {
120 if range.is_empty() {
121 return false;
122 }
123
124 let lower_existing_range = self.find_containing_range(&range.low);
125 let upper_existing_range = self.find_containing_range(&range.high);
126
127 return lower_existing_range == upper_existing_range && lower_existing_range.is_ok();
129 }
130
131 pub fn insert<V: MultiCharRange>(&mut self, to_add: &V) -> &mut Self {
139 to_add.iter_ranges().for_each(|range| self.insert_char_range(&range));
140 self
141 }
142
143 pub fn append(&mut self, ch: char) -> Result<&mut Self, Error> {
150 let mut coalesced = false;
151 if let Some(last_range) = self.ranges.last_mut() {
152 if last_range.cmp_char(ch) != Ordering::Less {
153 return Err(format_err!("Cannot append {} after {}", ch, last_range.high));
154 }
155 if are_chars_adjacent(&last_range.high, &ch) {
156 last_range.high = ch;
157 coalesced = true;
158 }
159 }
160 if !coalesced {
161 self.ranges.push(chars!(ch..=ch));
162 }
163 Ok(self)
164 }
165
166 pub fn append_range(&mut self, range: CharRange) -> Result<&mut Self, Error> {
174 let mut coalesced = false;
175 if let Some(last_range) = self.ranges.last_mut() {
176 if last_range.cmp_char(range.low) != Ordering::Less {
177 return Err(format_err!(
178 "Cannot append {} after {}",
179 format_range(&range),
180 last_range.high
181 ));
182 }
183 if are_chars_adjacent(&last_range.high, &range.low) {
184 last_range.high = range.high;
185 coalesced = true;
186 }
187 }
188 if !coalesced {
189 self.ranges.push(range);
190 }
191 Ok(self)
192 }
193
194 pub fn remove<V: MultiCharRange>(&mut self, to_remove: &V) -> &mut Self {
202 to_remove.iter_ranges().for_each(|range| self.remove_char_range(&range));
203 self
204 }
205
206 pub fn clear(&mut self) -> &mut Self {
210 self.ranges.clear();
211 self
212 }
213
214 pub fn union<V: MultiCharRange>(&self, rhs: &V) -> CharCollection {
220 let mut result: CharCollection;
221 if self.range_count() > rhs.range_count() {
222 result = self.clone();
223 result.insert(rhs);
224 } else {
225 result = rhs.into();
226 result.insert(self);
227 }
228 result
229 }
230
231 pub fn intersection<V: MultiCharRange>(&self, rhs: &V) -> CharCollection {
237 let mut result: CharCollection;
238 if self.range_count() > rhs.range_count() {
239 result = self.clone();
240 let rhs: CharCollection = rhs.into();
241 result.remove(&rhs.complement());
242 } else {
243 result = rhs.into();
244 result.remove(&self.complement());
245 }
246 result
247 }
248
249 pub fn difference<V: MultiCharRange>(&self, rhs: &V) -> CharCollection {
255 let mut result: CharCollection = self.clone();
256 result.remove(rhs);
257 result
258 }
259
260 pub fn complement(&self) -> CharCollection {
265 if self.ranges.is_empty() {
266 return CharCollection::from(&CharRange::all());
267 }
268
269 let mut result_ranges: Vec<CharRange> = Vec::new();
270
271 if self.ranges[0].low != '\u{0}' {
272 result_ranges.push(CharRange::open_right('\u{0}', self.ranges[0].low));
273 }
274
275 let mut prev_high = self.ranges[0].high;
276
277 for range in &self.ranges[1..] {
278 result_ranges.push(CharRange::open(prev_high, range.low));
279 prev_high = range.high;
280 }
281
282 if prev_high != std::char::MAX {
283 result_ranges.push(CharRange::open_left(prev_high, std::char::MAX));
284 }
285
286 CharCollection { ranges: result_ranges }
287 }
288
289 fn insert_char_range(&mut self, new_range: &CharRange) {
295 if new_range.is_empty() {
296 return;
297 }
298
299 let lower_existing_range = self.find_containing_range(&new_range.low);
300 let upper_existing_range = self.find_containing_range(&new_range.high);
301
302 if lower_existing_range == upper_existing_range && lower_existing_range.is_ok() {
304 return;
305 }
306
307 let new_low: char;
308 let new_high: char;
309
310 let remove_from_idx: usize;
311 let remove_to_idx: usize;
312
313 match lower_existing_range {
314 Ok((idx, lower_existing_range)) => {
315 new_low = lower_existing_range.low;
316 remove_from_idx = idx;
317 }
318 Err(idx) => {
319 new_low = new_range.low;
320 remove_from_idx = idx;
321 }
322 }
323
324 match upper_existing_range {
325 Ok((idx, higher_existing_range)) => {
326 new_high = higher_existing_range.high;
327 remove_to_idx = idx + 1;
328 }
329 Err(idx) => {
330 new_high = new_range.high;
331 remove_to_idx = idx;
332 }
333 }
334
335 self.replace_ranges(chars!(new_low..=new_high), remove_from_idx..remove_to_idx);
336 }
337
338 fn remove_char_range(&mut self, range_to_remove: &CharRange) {
343 if range_to_remove.is_empty() {
344 return;
345 }
346
347 let lower_existing_range = self.find_containing_range(&range_to_remove.low);
348 let upper_existing_range = self.find_containing_range(&range_to_remove.high);
349
350 let mut replacement_ranges: Vec<CharRange> = Vec::new();
351
352 let remove_from_idx: usize;
353 let remove_to_idx: usize;
354
355 match lower_existing_range {
356 Ok((idx, lower_existing_range)) => {
357 if lower_existing_range.low < range_to_remove.low {
358 replacement_ranges
359 .push(CharRange::open_right(lower_existing_range.low, range_to_remove.low));
360 }
361 remove_from_idx = idx;
362 }
363 Err(idx) => remove_from_idx = idx,
364 }
365
366 match upper_existing_range {
367 Ok((idx, higher_existing_range)) => {
368 if range_to_remove.high < higher_existing_range.high {
369 replacement_ranges.push(CharRange::open_left(
370 range_to_remove.high,
371 higher_existing_range.high,
372 ));
373 }
374 remove_to_idx = idx + 1;
375 }
376 Err(idx) => {
377 remove_to_idx = idx;
378 }
379 }
380
381 self.ranges.splice(remove_from_idx..remove_to_idx, replacement_ranges);
382 }
383
384 fn replace_ranges(
388 &mut self,
389 mut char_range_to_insert: CharRange,
390 mut indices_to_replace: Range<usize>,
391 ) {
392 if indices_to_replace.start > 0 {
394 let prev_char_range = self.ranges[indices_to_replace.start - 1];
395 if are_chars_adjacent(&prev_char_range.high, &char_range_to_insert.low) {
396 char_range_to_insert.low = prev_char_range.low;
397 indices_to_replace.start -= 1;
398 }
399 }
400
401 if indices_to_replace.end < self.ranges.len() {
403 let next_char_range = self.ranges[indices_to_replace.end];
404 if are_chars_adjacent(&char_range_to_insert.high, &next_char_range.low) {
405 char_range_to_insert.high = next_char_range.high;
406 indices_to_replace.end += 1;
407 }
408 }
409
410 self.ranges.splice(indices_to_replace, vec![char_range_to_insert]);
411 }
412
413 fn find_containing_range(&self, query: &char) -> Result<(usize, CharRange), usize> {
414 let result = self.ranges.binary_search_by(|range| range.cmp_char(query.clone()));
415 match result {
416 Ok(index) => Ok((index, self.ranges[index])),
417 Err(index) => Err(index),
418 }
419 }
420}
421
422impl MultiCharRange for CharCollection {
423 fn iter_ranges<'a>(&'a self) -> Box<dyn Iterator<Item = CharRange> + 'a> {
424 Box::new(self.ranges.iter().map(|range| range.clone()))
425 }
426
427 fn range_count(&self) -> usize {
428 self.ranges.len()
429 }
430}
431
432#[allow(clippy::derived_hash_with_manual_eq)] impl Hash for CharCollection {
434 fn hash<H: Hasher>(&self, state: &mut H) {
435 self.ranges.iter().for_each(|range| hash_char_range(range, state));
436 }
437}
438
439fn hash_char_range<H: Hasher>(range: &CharRange, state: &mut H) {
440 range.low.hash(state);
441 range.high.hash(state);
442}
443
444fn are_chars_adjacent(left: &char, right: &char) -> bool {
445 let mut iter: CharIter = CharRange::open_right(left.clone(), right.clone()).iter();
446 match iter.next_back() {
447 None => false,
448 Some(next_right) => left == &next_right,
449 }
450}
451
452fn format_range(range: &CharRange) -> String {
453 format!("{}..={}", range.low, range.high)
454}
455
456#[cfg(test)]
457mod tests {
458 use super::{are_chars_adjacent, CharCollection};
459 use anyhow::Error;
460 use std::char;
461 use unic_char_range::{chars, CharRange};
462
463 #[test]
464 fn test_from_sorted_ranges() -> Result<(), Error> {
465 let expected = char_collect!('a'..='d', 'g'..='l', 'z');
466 let actual = CharCollection::from_sorted_ranges(vec![
467 chars!('a'..='d'),
468 chars!('g'..='l'),
469 chars!('z'..='z'),
470 ])?;
471 assert_eq!(actual, expected);
472 Ok(())
473 }
474
475 #[test]
476 fn test_from_sorted_ranges_out_of_order() {
477 assert!(CharCollection::from_sorted_ranges(vec![
478 chars!('g'..='l'),
479 chars!('a'..='d'),
480 chars!('z'..='z'),
481 ])
482 .is_err());
483 }
484
485 #[test]
486 fn test_from_sorted_ranges_overlap() {
487 assert!(CharCollection::from_sorted_ranges(vec![
488 chars!('a'..='d'),
489 chars!('c'..='l'),
490 chars!('z'..='z'),
491 ])
492 .is_err());
493 }
494
495 #[test]
496 fn test_from_sorted_ranges_adjacent() {
497 assert!(
498 CharCollection::from_sorted_ranges(vec![chars!('a'..='d'), chars!('e'..='g')]).is_err()
499 );
500 }
501
502 #[test]
503 fn test_from_sorted_chars() -> Result<(), Error> {
504 let chars = vec!['a', 'b', 'c', 'd', 'g', 'h', 'i', 'j', 'k', 'l', 'z'];
505 let expected = char_collect!('a'..='d', 'g'..='l', 'z');
506 let actual = CharCollection::from_sorted_chars(chars)?;
507 assert_eq!(actual, expected);
508 Ok(())
509 }
510
511 #[test]
512 fn test_from_sorted_chars_out_of_order() {
513 let chars = vec!['a', 'b', 'c', 'd', 'g', 'h', 'i', 'j', 'k', 'l', 'e'];
514 assert!(CharCollection::from_sorted_chars(chars).is_err());
515 }
516
517 #[test]
518 fn test_find_containing_range() {
519 let collection = char_collect!({ ('a'..='d') + ('g'..='j') + ('l'..='o') + 'z' });
520 assert_eq!(collection.find_containing_range(&'0'), Err(0));
521 assert_eq!(collection.find_containing_range(&'c'), Ok((0, chars!('a'..='d'))));
522 assert_eq!(collection.find_containing_range(&'e'), Err(1));
523 }
524
525 #[test]
526 fn test_insert_initial() {
527 let collection = char_collect!('a'..='d');
528 assert_eq!(collection.ranges, vec![chars!('a'..='d')])
529 }
530
531 #[test]
532 fn test_insert_exact_match() {
533 let mut collection = char_collect!('a'..='d', 'g'..='l');
534 collection += 'a'..='d';
535 assert_eq!(collection.ranges, vec![chars!('a'..='d'), chars!('g'..='l')]);
536 }
537
538 #[test]
539 fn test_insert_non_overlapping_sorted() {
540 let collection = char_collect!('a'..='d', 'g'..='j', 'l'..='o');
541 assert_eq!(
542 collection.ranges,
543 vec![chars!('a'..='d'), chars!('g'..='j'), chars!('l'..='o')]
544 );
545 }
546
547 #[test]
548 fn test_insert_non_overlapping_unsorted() {
549 let collection = char_collect!('l'..='o', 'a'..='d', 'l'..='o', 'a'..='d', 'g'..='j');
550 assert_eq!(
551 collection.ranges,
552 vec![chars!('a'..='d'), chars!('g'..='j'), chars!('l'..='o')]
553 );
554 }
555
556 #[test]
557 fn test_insert_overlapping_all_existent() {
558 let mut collection = char_collect!('l'..='o', 'a'..='d');
559
560 collection += 'a'..='o';
561 assert_eq!(collection.ranges, vec![chars!('a'..='o')]);
562 }
563
564 #[test]
565 fn test_insert_overlapping_some_existent() {
566 let mut collection = char_collect!('c'..='e', 'j'..='m', 'p'..='s');
567
568 collection += 'i'..='n';
569 assert_eq!(
570 collection.ranges,
571 vec![chars!('c'..='e'), chars!('i'..='n'), chars!('p'..='s')]
572 );
573 }
574
575 #[test]
576 fn test_insert_overlapping_with_intersections() {
577 let mut collection = char_collect!('c'..='e', 'j'..='m', 'p'..='s');
578
579 collection += 'd'..='k';
580 assert_eq!(collection.ranges, vec![chars!('c'..='m'), chars!('p'..='s')]);
581 }
582
583 #[test]
584 fn test_insert_coalesce_adjacent_ranges() {
585 let mut collection = char_collect!('a'..='c', 'j'..='m');
586
587 collection += 'd'..='i';
588 assert_eq!(collection.ranges, vec![chars!('a'..='m')]);
589 }
590
591 #[test]
592 fn test_append() -> Result<(), Error> {
593 let mut collection = char_collect!('a'..='c');
594
595 collection.append('d')?.append('g')?.append('h')?.append('i')?.append('z')?;
596 assert_eq!(collection, char_collect!('a'..='d', 'g'..='i', 'z'));
597 Ok(())
598 }
599
600 #[test]
601 fn test_append_out_of_order() -> Result<(), Error> {
602 let mut collection = char_collect!('a'..='c');
603 assert!(collection
604 .append('d')?
605 .append('g')?
606 .append('h')?
607 .append('i')?
608 .append('e')
609 .is_err());
610 Ok(())
611 }
612
613 #[test]
614 fn test_append_range() -> Result<(), Error> {
615 let mut collection = char_collect!('a'..='c');
616 collection.append_range(chars!('g'..='i'))?.append_range(chars!('j'..='m'))?;
617 assert_eq!(collection, char_collect!('a'..='c', 'g'..='m'));
618 Ok(())
619 }
620
621 #[test]
622 fn test_append_range_out_of_order() -> Result<(), Error> {
623 let mut collection = char_collect!('a'..='c');
624 assert!(collection
625 .append_range(chars!('g'..='i'))?
626 .append_range(chars!('j'..='m'))?
627 .append_range(chars!('k'..='m'))
628 .is_err());
629 Ok(())
630 }
631
632 #[test]
633 fn test_remove_exact_range() {
634 let mut collection = char_collect!('c'..='e', 'j'..='m', 'p'..='s');
635
636 collection -= 'j'..='m';
637 assert_eq!(collection.ranges, vec![chars!('c'..='e'), chars!['p'..='s']]);
638 }
639
640 #[test]
641 fn test_remove_overlapping_all_existent() {
642 let mut collection = char_collect!('c'..='e', 'j'..='m', 'p'..='s');
643
644 collection -= 'c'..='s';
645 assert_eq!(collection.ranges, vec![]);
646 }
647
648 #[test]
649 fn test_remove_overlapping_all_existent_superset() {
650 let mut collection = char_collect!('c'..='e', 'j'..='m', 'p'..='s');
651
652 collection -= 'a'..='z';
653 assert_eq!(collection.ranges, vec![]);
654 }
655
656 #[test]
657 fn test_remove_one_subrange() {
658 let mut collection = char_collect!('c'..='e', 'j'..='m', 'p'..='s');
659
660 collection -= 'k'..='l';
661 assert_eq!(
662 collection.ranges,
663 vec![chars!('c'..='e'), chars!('j'..='j'), chars!('m'..='m'), chars!('p'..='s')]
664 );
665 }
666
667 #[test]
668 fn test_remove_intersection() {
669 let mut collection = char_collect!('c'..='e', 'j'..='m', 'p'..='s');
670
671 collection -= 'd'..='q';
672 assert_eq!(collection.ranges, vec![chars!('c'..='c'), chars!('r'..='s')]);
673 }
674
675 #[test]
676 fn test_complement_simple() {
677 let collection = char_collect!(0x10..=0x50, 0x70..=0x70, 0x99..=0x640);
678 assert_eq!(
679 collection.complement(),
680 char_collect!(0x00..=0x0F, 0x51..=0x6F, 0x71..=0x98, 0x641..=(char::MAX as u32))
681 );
682 }
683
684 #[test]
685 fn test_complement_all() {
686 let collection = char_collect!(CharRange::all());
687 assert_eq!(collection.complement(), char_collect!());
688 }
689
690 #[test]
691 fn test_complement_none() {
692 let collection = char_collect!();
693 assert_eq!(collection.complement(), char_collect!(CharRange::all()));
694 }
695
696 #[test]
697 fn test_complement_includes_min_and_max() {
698 let collection = char_collect!(0x0..=0x10, 0x40..=0x50, 0xCCCC..=(char::MAX as u32));
699 assert_eq!(collection.complement(), char_collect!(0x11..=0x3F, 0x51..=0xCCCB));
700 }
701
702 #[test]
703 fn test_union() {
704 let collection_a = char_collect!('a'..='g', 'm'..='z', 'B'..='R');
705 let collection_b = char_collect!('e'..='q', 'W'..='Y');
706
707 let expected = char_collect!('a'..='z', 'B'..='R', 'W'..='Y');
708 assert_eq!(collection_a.union(&collection_b), expected);
709 assert_eq!(collection_b.union(&collection_a), expected);
710 }
711
712 #[test]
713 fn test_intersection() {
714 let collection_a = char_collect!('a'..='g', 'm'..='z');
715 let collection_b = char_collect!('e'..='q');
716
717 let expected = char_collect!('e'..='g', 'm'..='q');
718 assert_eq!(collection_a.intersection(&collection_b), expected);
719 assert_eq!(collection_b.intersection(&collection_a), expected);
720 }
721
722 #[test]
723 fn test_macro_expressions() {
724 use unicode_blocks::UnicodeBlockId::Arabic;
725
726 let collection =
727 char_collect!({ ('c'..='e') + ('f'..='h') - ('a'..='d') + Arabic + (0x5..=0x42) });
728 assert_eq!(collection, char_collect!(0x5..=0x42, 'e'..='h', Arabic));
729 }
730
731 #[test]
732 fn test_iter() {
733 let collection = char_collect!('a'..='c', 'j'..='l', 'x'..='z');
734
735 let v = collection.iter().collect::<Vec<char>>();
736 assert_eq!(v, vec!['a', 'b', 'c', 'j', 'k', 'l', 'x', 'y', 'z']);
737 }
738
739 #[test]
740 fn test_are_chars_adjacent() {
741 assert!(are_chars_adjacent(&'a', &'b'));
742 assert!(!are_chars_adjacent(&'b', &'a'));
743 assert!(!are_chars_adjacent(&'a', &'c'));
744 }
745}