unicode_bidi/
lib.rs

1// Copyright 2015 The Servo Project Developers. See the
2// COPYRIGHT file at the top-level directory of this distribution.
3//
4// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7// option. This file may not be copied, modified, or distributed
8// except according to those terms.
9
10//! This crate implements the [Unicode Bidirectional Algorithm][tr9] for display of mixed
11//! right-to-left and left-to-right text.  It is written in safe Rust, compatible with the
12//! current stable release.
13//!
14//! ## Example
15//!
16//! ```rust
17//! use unicode_bidi::BidiInfo;
18//!
19//! // This example text is defined using `concat!` because some browsers
20//! // and text editors have trouble displaying bidi strings.
21//! let text = concat![
22//!   "א",
23//!   "ב",
24//!   "ג",
25//!   "a",
26//!   "b",
27//!   "c",
28//! ];
29//!
30//! // Resolve embedding levels within the text.  Pass `None` to detect the
31//! // paragraph level automatically.
32//! let bidi_info = BidiInfo::new(&text, None);
33//!
34//! // This paragraph has embedding level 1 because its first strong character is RTL.
35//! assert_eq!(bidi_info.paragraphs.len(), 1);
36//! let para = &bidi_info.paragraphs[0];
37//! assert_eq!(para.level.number(), 1);
38//! assert_eq!(para.level.is_rtl(), true);
39//!
40//! // Re-ordering is done after wrapping each paragraph into a sequence of
41//! // lines. For this example, I'll just use a single line that spans the
42//! // entire paragraph.
43//! let line = para.range.clone();
44//!
45//! let display = bidi_info.reorder_line(para, line);
46//! assert_eq!(display, concat![
47//!   "a",
48//!   "b",
49//!   "c",
50//!   "ג",
51//!   "ב",
52//!   "א",
53//! ]);
54//! ```
55//!
56//! [tr9]: <http://www.unicode.org/reports/tr9/>
57
58#![forbid(unsafe_code)]
59
60#![cfg_attr(feature="flame_it", feature(plugin, custom_attribute))]
61#![cfg_attr(feature="flame_it", plugin(flamer))]
62
63
64#[macro_use]
65extern crate matches;
66
67#[cfg(feature = "serde")]
68#[macro_use]
69extern crate serde;
70
71#[cfg(all(feature = "serde", test))]
72extern crate serde_test;
73
74#[cfg(feature = "flame_it")]
75extern crate flame;
76
77
78pub mod deprecated;
79pub mod format_chars;
80pub mod level;
81
82mod char_data;
83mod explicit;
84mod implicit;
85mod prepare;
86
87pub use char_data::{BidiClass, bidi_class, UNICODE_VERSION};
88pub use level::{Level, LTR_LEVEL, RTL_LEVEL};
89pub use prepare::LevelRun;
90
91use std::borrow::Cow;
92use std::cmp::{max, min};
93use std::iter::repeat;
94use std::ops::Range;
95
96use BidiClass::*;
97use format_chars as chars;
98
99
100/// Bidi information about a single paragraph
101#[derive(Debug, PartialEq)]
102pub struct ParagraphInfo {
103    /// The paragraphs boundaries within the text, as byte indices.
104    ///
105    /// TODO: Shrink this to only include the starting index?
106    pub range: Range<usize>,
107
108    /// The paragraph embedding level.
109    ///
110    /// <http://www.unicode.org/reports/tr9/#BD4>
111    pub level: Level,
112}
113
114/// Initial bidi information of the text.
115///
116/// Contains the text paragraphs and `BidiClass` of its characters.
117#[derive(PartialEq, Debug)]
118pub struct InitialInfo<'text> {
119    /// The text
120    pub text: &'text str,
121
122    /// The BidiClass of the character at each byte in the text.
123    /// If a character is multiple bytes, its class will appear multiple times in the vector.
124    pub original_classes: Vec<BidiClass>,
125
126    /// The boundaries and level of each paragraph within the text.
127    pub paragraphs: Vec<ParagraphInfo>,
128}
129
130impl<'text> InitialInfo<'text> {
131    /// Find the paragraphs and BidiClasses in a string of text.
132    ///
133    /// <http://www.unicode.org/reports/tr9/#The_Paragraph_Level>
134    ///
135    /// Also sets the class for each First Strong Isolate initiator (FSI) to LRI or RLI if a strong
136    /// character is found before the matching PDI.  If no strong character is found, the class will
137    /// remain FSI, and it's up to later stages to treat these as LRI when needed.
138    #[cfg_attr(feature = "flame_it", flame)]
139    pub fn new(text: &str, default_para_level: Option<Level>) -> InitialInfo {
140        let mut original_classes = Vec::with_capacity(text.len());
141
142        // The stack contains the starting byte index for each nested isolate we're inside.
143        let mut isolate_stack = Vec::new();
144        let mut paragraphs = Vec::new();
145
146        let mut para_start = 0;
147        let mut para_level = default_para_level;
148
149        #[cfg(feature = "flame_it")] flame::start("InitialInfo::new(): iter text.char_indices()");
150
151        for (i, c) in text.char_indices() {
152            let class = bidi_class(c);
153
154            #[cfg(feature = "flame_it")] flame::start("original_classes.extend()");
155
156            original_classes.extend(repeat(class).take(c.len_utf8()));
157
158            #[cfg(feature = "flame_it")] flame::end("original_classes.extend()");
159
160            match class {
161
162                B => {
163                    // P1. Split the text into separate paragraphs. The paragraph separator is kept
164                    // with the previous paragraph.
165                    let para_end = i + c.len_utf8();
166                    paragraphs.push(ParagraphInfo {
167                        range: para_start..para_end,
168                        // P3. If no character is found in p2, set the paragraph level to zero.
169                        level: para_level.unwrap_or(LTR_LEVEL),
170                    });
171                    // Reset state for the start of the next paragraph.
172                    para_start = para_end;
173                    // TODO: Support defaulting to direction of previous paragraph
174                    //
175                    // <http://www.unicode.org/reports/tr9/#HL1>
176                    para_level = default_para_level;
177                    isolate_stack.clear();
178                }
179
180                L | R | AL => {
181                    match isolate_stack.last() {
182                        Some(&start) => {
183                            if original_classes[start] == FSI {
184                                // X5c. If the first strong character between FSI and its matching
185                                // PDI is R or AL, treat it as RLI. Otherwise, treat it as LRI.
186                                for j in 0..chars::FSI.len_utf8() {
187                                    original_classes[start + j] =
188                                        if class == L { LRI } else { RLI };
189                                }
190                            }
191                        }
192
193                        None => {
194                            if para_level.is_none() {
195                                // P2. Find the first character of type L, AL, or R, while skipping
196                                // any characters between an isolate initiator and its matching
197                                // PDI.
198                                para_level = Some(if class != L { RTL_LEVEL } else { LTR_LEVEL });
199                            }
200                        }
201                    }
202                }
203
204                RLI | LRI | FSI => {
205                    isolate_stack.push(i);
206                }
207
208                PDI => {
209                    isolate_stack.pop();
210                }
211
212                _ => {}
213            }
214        }
215        if para_start < text.len() {
216            paragraphs.push(ParagraphInfo {
217                range: para_start..text.len(),
218                level: para_level.unwrap_or(LTR_LEVEL),
219            });
220        }
221        assert_eq!(original_classes.len(), text.len());
222
223        #[cfg(feature = "flame_it")] flame::end("InitialInfo::new(): iter text.char_indices()");
224
225        InitialInfo {
226            text,
227            original_classes,
228            paragraphs,
229        }
230    }
231}
232
233/// Bidi information of the text.
234///
235/// The `original_classes` and `levels` vectors are indexed by byte offsets into the text.  If a
236/// character is multiple bytes wide, then its class and level will appear multiple times in these
237/// vectors.
238// TODO: Impl `struct StringProperty<T> { values: Vec<T> }` and use instead of Vec<T>
239#[derive(Debug, PartialEq)]
240pub struct BidiInfo<'text> {
241    /// The text
242    pub text: &'text str,
243
244    /// The BidiClass of the character at each byte in the text.
245    pub original_classes: Vec<BidiClass>,
246
247    /// The directional embedding level of each byte in the text.
248    pub levels: Vec<Level>,
249
250    /// The boundaries and paragraph embedding level of each paragraph within the text.
251    ///
252    /// TODO: Use SmallVec or similar to avoid overhead when there are only one or two paragraphs?
253    /// Or just don't include the first paragraph, which always starts at 0?
254    pub paragraphs: Vec<ParagraphInfo>,
255}
256
257impl<'text> BidiInfo<'text> {
258    /// Split the text into paragraphs and determine the bidi embedding levels for each paragraph.
259    ///
260    /// TODO: In early steps, check for special cases that allow later steps to be skipped. like
261    /// text that is entirely LTR.  See the `nsBidi` class from Gecko for comparison.
262    ///
263    /// TODO: Support auto-RTL base direction
264    #[cfg_attr(feature = "flame_it", flame)]
265    pub fn new(text: &str, default_para_level: Option<Level>) -> BidiInfo {
266        let InitialInfo {
267            original_classes,
268            paragraphs,
269            ..
270        } = InitialInfo::new(text, default_para_level);
271
272        let mut levels = Vec::<Level>::with_capacity(text.len());
273        let mut processing_classes = original_classes.clone();
274
275        for para in &paragraphs {
276            let text = &text[para.range.clone()];
277            let original_classes = &original_classes[para.range.clone()];
278            let processing_classes = &mut processing_classes[para.range.clone()];
279
280            let new_len = levels.len() + para.range.len();
281            levels.resize(new_len, para.level);
282            let levels = &mut levels[para.range.clone()];
283
284            explicit::compute(
285                text,
286                para.level,
287                original_classes,
288                levels,
289                processing_classes,
290            );
291
292            let sequences = prepare::isolating_run_sequences(para.level, original_classes, levels);
293            for sequence in &sequences {
294                implicit::resolve_weak(sequence, processing_classes);
295                implicit::resolve_neutral(sequence, levels, processing_classes);
296            }
297            implicit::resolve_levels(processing_classes, levels);
298
299            assign_levels_to_removed_chars(para.level, original_classes, levels);
300        }
301
302        BidiInfo {
303            text,
304            original_classes,
305            paragraphs,
306            levels,
307        }
308    }
309
310    /// Re-order a line based on resolved levels and return only the embedding levels, one `Level`
311    /// per *byte*.
312    #[cfg_attr(feature = "flame_it", flame)]
313    pub fn reordered_levels(&self, para: &ParagraphInfo, line: Range<usize>) -> Vec<Level> {
314        let (levels, _) = self.visual_runs(para, line.clone());
315        levels
316    }
317
318    /// Re-order a line based on resolved levels and return only the embedding levels, one `Level`
319    /// per *character*.
320    #[cfg_attr(feature = "flame_it", flame)]
321    pub fn reordered_levels_per_char(
322        &self,
323        para: &ParagraphInfo,
324        line: Range<usize>,
325    ) -> Vec<Level> {
326        let levels = self.reordered_levels(para, line);
327        self.text.char_indices().map(|(i, _)| levels[i]).collect()
328    }
329
330
331    /// Re-order a line based on resolved levels and return the line in display order.
332    #[cfg_attr(feature = "flame_it", flame)]
333    pub fn reorder_line(&self, para: &ParagraphInfo, line: Range<usize>) -> Cow<'text, str> {
334        let (levels, runs) = self.visual_runs(para, line.clone());
335
336        // If all isolating run sequences are LTR, no reordering is needed
337        if runs.iter().all(|run| levels[run.start].is_ltr()) {
338            return self.text[line.clone()].into();
339        }
340
341        let mut result = String::with_capacity(line.len());
342        for run in runs {
343            if levels[run.start].is_rtl() {
344                result.extend(self.text[run].chars().rev());
345            } else {
346                result.push_str(&self.text[run]);
347            }
348        }
349        result.into()
350    }
351
352    /// Find the level runs within a line and return them in visual order.
353    ///
354    /// `line` is a range of bytes indices within `levels`.
355    ///
356    /// <http://www.unicode.org/reports/tr9/#Reordering_Resolved_Levels>
357    #[cfg_attr(feature = "flame_it", flame)]
358    pub fn visual_runs(
359        &self,
360        para: &ParagraphInfo,
361        line: Range<usize>,
362    ) -> (Vec<Level>, Vec<LevelRun>) {
363        assert!(line.start <= self.levels.len());
364        assert!(line.end <= self.levels.len());
365
366        let mut levels = self.levels.clone();
367
368        // Reset some whitespace chars to paragraph level.
369        // <http://www.unicode.org/reports/tr9/#L1>
370        let line_str: &str = &self.text[line.clone()];
371        let mut reset_from: Option<usize> = Some(0);
372        let mut reset_to: Option<usize> = None;
373        for (i, c) in line_str.char_indices() {
374            match self.original_classes[i] {
375                // Ignored by X9
376                RLE | LRE | RLO | LRO | PDF | BN => {}
377                // Segment separator, Paragraph separator
378                B | S => {
379                    assert_eq!(reset_to, None);
380                    reset_to = Some(i + c.len_utf8());
381                    if reset_from == None {
382                        reset_from = Some(i);
383                    }
384                }
385                // Whitespace, isolate formatting
386                WS | FSI | LRI | RLI | PDI => {
387                    if reset_from == None {
388                        reset_from = Some(i);
389                    }
390                }
391                _ => {
392                    reset_from = None;
393                }
394            }
395            if let (Some(from), Some(to)) = (reset_from, reset_to) {
396                #[cfg_attr(feature = "cargo-clippy", allow(needless_range_loop))]
397                for j in from..to {
398                    levels[j] = para.level;
399                }
400                reset_from = None;
401                reset_to = None;
402            }
403        }
404        if let Some(from) = reset_from {
405            #[cfg_attr(feature = "cargo-clippy", allow(needless_range_loop))]
406            for j in from..line_str.len() {
407                levels[j] = para.level;
408            }
409        }
410
411        // Find consecutive level runs.
412        let mut runs = Vec::new();
413        let mut start = line.start;
414        let mut run_level = levels[start];
415        let mut min_level = run_level;
416        let mut max_level = run_level;
417
418        for (i, &new_level) in levels.iter().enumerate().take(line.end).skip(start + 1) {
419            if new_level != run_level {
420                // End of the previous run, start of a new one.
421                runs.push(start..i);
422                start = i;
423                run_level = new_level;
424                min_level = min(run_level, min_level);
425                max_level = max(run_level, max_level);
426            }
427        }
428        runs.push(start..line.end);
429
430        let run_count = runs.len();
431
432        // Re-order the odd runs.
433        // <http://www.unicode.org/reports/tr9/#L2>
434
435        // Stop at the lowest *odd* level.
436        min_level = min_level.new_lowest_ge_rtl().expect("Level error");
437
438        while max_level >= min_level {
439            // Look for the start of a sequence of consecutive runs of max_level or higher.
440            let mut seq_start = 0;
441            while seq_start < run_count {
442                if self.levels[runs[seq_start].start] < max_level {
443                    seq_start += 1;
444                    continue;
445                }
446
447                // Found the start of a sequence. Now find the end.
448                let mut seq_end = seq_start + 1;
449                while seq_end < run_count {
450                    if self.levels[runs[seq_end].start] < max_level {
451                        break;
452                    }
453                    seq_end += 1;
454                }
455
456                // Reverse the runs within this sequence.
457                runs[seq_start..seq_end].reverse();
458
459                seq_start = seq_end;
460            }
461            max_level.lower(1).expect(
462                "Lowering embedding level below zero",
463            );
464        }
465
466        (levels, runs)
467    }
468
469    /// If processed text has any computed RTL levels
470    ///
471    /// This information is usually used to skip re-ordering of text when no RTL level is present
472    #[inline]
473    pub fn has_rtl(&self) -> bool {
474        level::has_rtl(&self.levels)
475    }
476}
477
478/// Assign levels to characters removed by rule X9.
479///
480/// The levels assigned to these characters are not specified by the algorithm.  This function
481/// assigns each one the level of the previous character, to avoid breaking level runs.
482#[cfg_attr(feature = "flame_it", flame)]
483fn assign_levels_to_removed_chars(para_level: Level, classes: &[BidiClass], levels: &mut [Level]) {
484    for i in 0..levels.len() {
485        if prepare::removed_by_x9(classes[i]) {
486            levels[i] = if i > 0 { levels[i - 1] } else { para_level };
487        }
488    }
489}
490
491
492#[cfg(test)]
493mod tests {
494    use super::*;
495
496    #[test]
497    fn test_initial_text_info() {
498        let text = "a1";
499        assert_eq!(
500            InitialInfo::new(text, None),
501            InitialInfo {
502                text,
503                original_classes: vec![L, EN],
504                paragraphs: vec![
505                    ParagraphInfo {
506                        range: 0..2,
507                        level: LTR_LEVEL,
508                    },
509                ],
510            }
511        );
512
513        let text = "غ א";
514        assert_eq!(
515            InitialInfo::new(text, None),
516            InitialInfo {
517                text,
518                original_classes: vec![AL, AL, WS, R, R],
519                paragraphs: vec![
520                    ParagraphInfo {
521                        range: 0..5,
522                        level: RTL_LEVEL,
523                    },
524                ],
525            }
526        );
527
528        let text = "a\u{2029}b";
529        assert_eq!(
530            InitialInfo::new(text, None),
531            InitialInfo {
532                text,
533                original_classes: vec![L, B, B, B, L],
534                paragraphs: vec![
535                    ParagraphInfo {
536                        range: 0..4,
537                        level: LTR_LEVEL,
538                    },
539                    ParagraphInfo {
540                        range: 4..5,
541                        level: LTR_LEVEL,
542                    },
543                ],
544            }
545        );
546
547        let text = format!("{}א{}a", chars::FSI, chars::PDI);
548        assert_eq!(
549            InitialInfo::new(&text, None),
550            InitialInfo {
551                text: &text,
552                original_classes: vec![RLI, RLI, RLI, R, R, PDI, PDI, PDI, L],
553                paragraphs: vec![
554                    ParagraphInfo {
555                        range: 0..9,
556                        level: LTR_LEVEL,
557                    },
558                ],
559            }
560        );
561    }
562
563    #[test]
564    fn test_process_text() {
565        let text = "abc123";
566        assert_eq!(
567            BidiInfo::new(text, Some(LTR_LEVEL)),
568            BidiInfo {
569                text,
570                levels: Level::vec(&[0, 0, 0, 0, 0, 0]),
571                original_classes: vec![L, L, L, EN, EN, EN],
572                paragraphs: vec![
573                    ParagraphInfo {
574                        range: 0..6,
575                        level: LTR_LEVEL,
576                    },
577                ],
578            }
579        );
580
581        let text = "abc אבג";
582        assert_eq!(
583            BidiInfo::new(text, Some(LTR_LEVEL)),
584            BidiInfo {
585                text,
586                levels: Level::vec(&[0, 0, 0, 0, 1, 1, 1, 1, 1, 1]),
587                original_classes: vec![L, L, L, WS, R, R, R, R, R, R],
588                paragraphs: vec![
589                    ParagraphInfo {
590                        range: 0..10,
591                        level: LTR_LEVEL,
592                    },
593                ],
594            }
595        );
596        assert_eq!(
597            BidiInfo::new(text, Some(RTL_LEVEL)),
598            BidiInfo {
599                text,
600                levels: Level::vec(&[2, 2, 2, 1, 1, 1, 1, 1, 1, 1]),
601                original_classes: vec![L, L, L, WS, R, R, R, R, R, R],
602                paragraphs: vec![
603                    ParagraphInfo {
604                        range: 0..10,
605                        level: RTL_LEVEL,
606                    },
607                ],
608            }
609        );
610
611        let text = "אבג abc";
612        assert_eq!(
613            BidiInfo::new(text, Some(LTR_LEVEL)),
614            BidiInfo {
615                text,
616                levels: Level::vec(&[1, 1, 1, 1, 1, 1, 0, 0, 0, 0]),
617                original_classes: vec![R, R, R, R, R, R, WS, L, L, L],
618                paragraphs: vec![
619                    ParagraphInfo {
620                        range: 0..10,
621                        level: LTR_LEVEL,
622                    },
623                ],
624            }
625        );
626        assert_eq!(
627            BidiInfo::new(text, None),
628            BidiInfo {
629                text,
630                levels: Level::vec(&[1, 1, 1, 1, 1, 1, 1, 2, 2, 2]),
631                original_classes: vec![R, R, R, R, R, R, WS, L, L, L],
632                paragraphs: vec![
633                    ParagraphInfo {
634                        range: 0..10,
635                        level: RTL_LEVEL,
636                    },
637                ],
638            }
639        );
640
641        let text = "غ2ظ א2ג";
642        assert_eq!(
643            BidiInfo::new(text, Some(LTR_LEVEL)),
644            BidiInfo {
645                text,
646                levels: Level::vec(&[1, 1, 2, 1, 1, 1, 1, 1, 2, 1, 1]),
647                original_classes: vec![AL, AL, EN, AL, AL, WS, R, R, EN, R, R],
648                paragraphs: vec![
649                    ParagraphInfo {
650                        range: 0..11,
651                        level: LTR_LEVEL,
652                    },
653                ],
654            }
655        );
656
657        let text = "a א.\nג";
658        assert_eq!(
659            BidiInfo::new(text, None),
660            BidiInfo {
661                text,
662                original_classes: vec![L, WS, R, R, CS, B, R, R],
663                levels: Level::vec(&[0, 0, 1, 1, 0, 0, 1, 1]),
664                paragraphs: vec![
665                    ParagraphInfo {
666                        range: 0..6,
667                        level: LTR_LEVEL,
668                    },
669                    ParagraphInfo {
670                        range: 6..8,
671                        level: RTL_LEVEL,
672                    },
673                ],
674            }
675        );
676
677        /// BidiTest:69635 (AL ET EN)
678        let bidi_info = BidiInfo::new("\u{060B}\u{20CF}\u{06F9}", None);
679        assert_eq!(bidi_info.original_classes, vec![AL, AL, ET, ET, ET, EN, EN]);
680    }
681
682    #[test]
683    fn test_bidi_info_has_rtl() {
684        // ASCII only
685        assert_eq!(BidiInfo::new("123", None).has_rtl(), false);
686        assert_eq!(BidiInfo::new("123", Some(LTR_LEVEL)).has_rtl(), false);
687        assert_eq!(BidiInfo::new("123", Some(RTL_LEVEL)).has_rtl(), false);
688        assert_eq!(BidiInfo::new("abc", None).has_rtl(), false);
689        assert_eq!(BidiInfo::new("abc", Some(LTR_LEVEL)).has_rtl(), false);
690        assert_eq!(BidiInfo::new("abc", Some(RTL_LEVEL)).has_rtl(), false);
691        assert_eq!(BidiInfo::new("abc 123", None).has_rtl(), false);
692        assert_eq!(BidiInfo::new("abc\n123", None).has_rtl(), false);
693
694        // With Hebrew
695        assert_eq!(BidiInfo::new("אבּג", None).has_rtl(), true);
696        assert_eq!(BidiInfo::new("אבּג", Some(LTR_LEVEL)).has_rtl(), true);
697        assert_eq!(BidiInfo::new("אבּג", Some(RTL_LEVEL)).has_rtl(), true);
698        assert_eq!(BidiInfo::new("abc אבּג", None).has_rtl(), true);
699        assert_eq!(BidiInfo::new("abc\nאבּג", None).has_rtl(), true);
700        assert_eq!(BidiInfo::new("אבּג abc", None).has_rtl(), true);
701        assert_eq!(BidiInfo::new("אבּג\nabc", None).has_rtl(), true);
702        assert_eq!(BidiInfo::new("אבּג 123", None).has_rtl(), true);
703        assert_eq!(BidiInfo::new("אבּג\n123", None).has_rtl(), true);
704    }
705
706    fn reorder_paras(text: &str) -> Vec<Cow<str>> {
707        let bidi_info = BidiInfo::new(text, None);
708        bidi_info
709            .paragraphs
710            .iter()
711            .map(|para| bidi_info.reorder_line(para, para.range.clone()))
712            .collect()
713    }
714
715    #[test]
716    fn test_reorder_line() {
717        /// Bidi_Class: L L L B L L L B L L L
718        assert_eq!(
719            reorder_paras("abc\ndef\nghi"),
720            vec!["abc\n", "def\n", "ghi"]
721        );
722
723        /// Bidi_Class: L L EN B L L EN B L L EN
724        assert_eq!(
725            reorder_paras("ab1\nde2\ngh3"),
726            vec!["ab1\n", "de2\n", "gh3"]
727        );
728
729        /// Bidi_Class: L L L B AL AL AL
730        assert_eq!(reorder_paras("abc\nابج"), vec!["abc\n", "جبا"]);
731
732        /// Bidi_Class: AL AL AL B L L L
733        assert_eq!(reorder_paras("ابج\nabc"), vec!["\nجبا", "abc"]);
734
735        assert_eq!(reorder_paras("1.-2"), vec!["1.-2"]);
736        assert_eq!(reorder_paras("1-.2"), vec!["1-.2"]);
737        assert_eq!(reorder_paras("abc אבג"), vec!["abc גבא"]);
738
739        // Numbers being weak LTR characters, cannot reorder strong RTL
740        assert_eq!(reorder_paras("123 אבג"), vec!["גבא 123"]);
741
742        assert_eq!(reorder_paras("abc\u{202A}def"), vec!["abc\u{202A}def"]);
743
744        assert_eq!(
745            reorder_paras("abc\u{202A}def\u{202C}ghi"),
746            vec!["abc\u{202A}def\u{202C}ghi"]
747        );
748
749        assert_eq!(
750            reorder_paras("abc\u{2066}def\u{2069}ghi"),
751            vec!["abc\u{2066}def\u{2069}ghi"]
752        );
753
754        // Testing for RLE Character
755        assert_eq!(
756            reorder_paras("\u{202B}abc אבג\u{202C}"),
757            vec!["\u{202B}\u{202C}גבא abc"]
758        );
759
760        // Testing neutral characters
761        assert_eq!(reorder_paras("אבג? אבג"), vec!["גבא ?גבא"]);
762
763        // Testing neutral characters with special case
764        assert_eq!(reorder_paras("A אבג?"), vec!["A גבא?"]);
765
766        // Testing neutral characters with Implicit RTL Marker
767        assert_eq!(
768            reorder_paras("A אבג?\u{200F}"),
769            vec!["A \u{200F}?גבא"]
770        );
771        assert_eq!(reorder_paras("אבג abc"), vec!["abc גבא"]);
772        assert_eq!(
773            reorder_paras("abc\u{2067}.-\u{2069}ghi"),
774            vec!["abc\u{2067}-.\u{2069}ghi"]
775        );
776
777        assert_eq!(
778            reorder_paras("Hello, \u{2068}\u{202E}world\u{202C}\u{2069}!"),
779            vec!["Hello, \u{2068}\u{202E}\u{202C}dlrow\u{2069}!"]
780        );
781
782        // With mirrorable characters in RTL run
783        assert_eq!(reorder_paras("א(ב)ג."), vec![".ג)ב(א"]);
784
785        // With mirrorable characters on level boundry
786        assert_eq!(
787            reorder_paras("אב(גד[&ef].)gh"),
788            vec!["ef].)gh&[דג(בא"]
789        );
790    }
791
792    fn reordered_levels_for_paras(text: &str) -> Vec<Vec<Level>> {
793        let bidi_info = BidiInfo::new(text, None);
794        bidi_info
795            .paragraphs
796            .iter()
797            .map(|para| bidi_info.reordered_levels(para, para.range.clone()))
798            .collect()
799    }
800
801    fn reordered_levels_per_char_for_paras(text: &str) -> Vec<Vec<Level>> {
802        let bidi_info = BidiInfo::new(text, None);
803        bidi_info
804            .paragraphs
805            .iter()
806            .map(|para| {
807                bidi_info.reordered_levels_per_char(para, para.range.clone())
808            })
809            .collect()
810    }
811
812    #[test]
813    fn test_reordered_levels() {
814
815        /// BidiTest:946 (LRI PDI)
816        let text = "\u{2067}\u{2069}";
817        assert_eq!(
818            reordered_levels_for_paras(text),
819            vec![Level::vec(&[0, 0, 0, 0, 0, 0])]
820        );
821        assert_eq!(
822            reordered_levels_per_char_for_paras(text),
823            vec![Level::vec(&[0, 0])]
824        );
825
826        /* TODO
827        /// BidiTest:69635 (AL ET EN)
828        let text = "\u{060B}\u{20CF}\u{06F9}";
829        assert_eq!(
830            reordered_levels_for_paras(text),
831            vec![Level::vec(&[1, 1, 1, 1, 1, 2, 2])]
832        );
833        assert_eq!(
834            reordered_levels_per_char_for_paras(text),
835            vec![Level::vec(&[1, 1, 2])]
836        );
837         */
838
839        /* TODO
840        // BidiTest:291284 (AN RLI PDF R)
841        assert_eq!(
842            reordered_levels_per_char_for_paras("\u{0605}\u{2067}\u{202C}\u{0590}"),
843            vec![&["2", "0", "x", "1"]]
844        );
845         */
846    }
847}
848
849
850#[cfg(all(feature = "serde", test))]
851mod serde_tests {
852    use serde_test::{Token, assert_tokens};
853    use super::*;
854
855    #[test]
856    fn test_levels() {
857        let text = "abc אבג";
858        let bidi_info = BidiInfo::new(text, None);
859        let levels = bidi_info.levels;
860        assert_eq!(text.as_bytes().len(), 10);
861        assert_eq!(levels.len(), 10);
862        assert_tokens(
863            &levels,
864            &[
865                Token::Seq { len: Some(10) },
866                Token::NewtypeStruct { name: "Level" },
867                Token::U8(0),
868                Token::NewtypeStruct { name: "Level" },
869                Token::U8(0),
870                Token::NewtypeStruct { name: "Level" },
871                Token::U8(0),
872                Token::NewtypeStruct { name: "Level" },
873                Token::U8(0),
874                Token::NewtypeStruct { name: "Level" },
875                Token::U8(1),
876                Token::NewtypeStruct { name: "Level" },
877                Token::U8(1),
878                Token::NewtypeStruct { name: "Level" },
879                Token::U8(1),
880                Token::NewtypeStruct { name: "Level" },
881                Token::U8(1),
882                Token::NewtypeStruct { name: "Level" },
883                Token::U8(1),
884                Token::NewtypeStruct { name: "Level" },
885                Token::U8(1),
886                Token::SeqEnd,
887            ],
888        );
889    }
890}