unicode_bidi/
explicit.rs

1// Copyright 2015 The Servo Project Developers. See the
2// COPYRIGHT file at the top-level directory of this distribution.
3//
4// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7// option. This file may not be copied, modified, or distributed
8// except according to those terms.
9
10//! 3.3.2 Explicit Levels and Directions
11//!
12//! <http://www.unicode.org/reports/tr9/#Explicit_Levels_and_Directions>
13
14use super::char_data::{BidiClass, is_rtl};
15use super::level::Level;
16
17use BidiClass::*;
18
19/// Compute explicit embedding levels for one paragraph of text (X1-X8).
20///
21/// `processing_classes[i]` must contain the `BidiClass` of the char at byte index `i`,
22/// for each char in `text`.
23#[cfg_attr(feature = "flame_it", flame)]
24pub fn compute(
25    text: &str,
26    para_level: Level,
27    original_classes: &[BidiClass],
28    levels: &mut [Level],
29    processing_classes: &mut [BidiClass],
30) {
31    assert_eq!(text.len(), original_classes.len());
32
33    // <http://www.unicode.org/reports/tr9/#X1>
34    let mut stack = DirectionalStatusStack::new();
35    stack.push(para_level, OverrideStatus::Neutral);
36
37    let mut overflow_isolate_count = 0u32;
38    let mut overflow_embedding_count = 0u32;
39    let mut valid_isolate_count = 0u32;
40
41    for (i, c) in text.char_indices() {
42        match original_classes[i] {
43
44            // Rules X2-X5c
45            RLE | LRE | RLO | LRO | RLI | LRI | FSI => {
46                let last_level = stack.last().level;
47
48                // X5a-X5c: Isolate initiators get the level of the last entry on the stack.
49                let is_isolate = matches!(original_classes[i], RLI | LRI | FSI);
50                if is_isolate {
51                    levels[i] = last_level;
52                    match stack.last().status {
53                        OverrideStatus::RTL => processing_classes[i] = R,
54                        OverrideStatus::LTR => processing_classes[i] = L,
55                        _ => {}
56                    }
57                }
58
59                let new_level = if is_rtl(original_classes[i]) {
60                    last_level.new_explicit_next_rtl()
61                } else {
62                    last_level.new_explicit_next_ltr()
63                };
64                if new_level.is_ok() && overflow_isolate_count == 0 &&
65                    overflow_embedding_count == 0
66                {
67                    let new_level = new_level.unwrap();
68                    stack.push(
69                        new_level,
70                        match original_classes[i] {
71                            RLO => OverrideStatus::RTL,
72                            LRO => OverrideStatus::LTR,
73                            RLI | LRI | FSI => OverrideStatus::Isolate,
74                            _ => OverrideStatus::Neutral,
75                        },
76                    );
77                    if is_isolate {
78                        valid_isolate_count += 1;
79                    } else {
80                        // The spec doesn't explicitly mention this step, but it is necessary.
81                        // See the reference implementations for comparison.
82                        levels[i] = new_level;
83                    }
84                } else if is_isolate {
85                    overflow_isolate_count += 1;
86                } else if overflow_isolate_count == 0 {
87                    overflow_embedding_count += 1;
88                }
89            }
90
91            // <http://www.unicode.org/reports/tr9/#X6a>
92            PDI => {
93                if overflow_isolate_count > 0 {
94                    overflow_isolate_count -= 1;
95                } else if valid_isolate_count > 0 {
96                    overflow_embedding_count = 0;
97                    loop {
98                        // Pop everything up to and including the last Isolate status.
99                        match stack.vec.pop() {
100                            None |
101                            Some(Status { status: OverrideStatus::Isolate, .. }) => break,
102                            _ => continue,
103                        }
104                    }
105                    valid_isolate_count -= 1;
106                }
107                let last = stack.last();
108                levels[i] = last.level;
109                match last.status {
110                    OverrideStatus::RTL => processing_classes[i] = R,
111                    OverrideStatus::LTR => processing_classes[i] = L,
112                    _ => {}
113                }
114            }
115
116            // <http://www.unicode.org/reports/tr9/#X7>
117            PDF => {
118                if overflow_isolate_count > 0 {
119                    continue;
120                }
121                if overflow_embedding_count > 0 {
122                    overflow_embedding_count -= 1;
123                    continue;
124                }
125                if stack.last().status != OverrideStatus::Isolate && stack.vec.len() >= 2 {
126                    stack.vec.pop();
127                }
128                // The spec doesn't explicitly mention this step, but it is necessary.
129                // See the reference implementations for comparison.
130                levels[i] = stack.last().level;
131            }
132
133            // Nothing
134            B | BN => {}
135
136            // <http://www.unicode.org/reports/tr9/#X6>
137            _ => {
138                let last = stack.last();
139                levels[i] = last.level;
140                match last.status {
141                    OverrideStatus::RTL => processing_classes[i] = R,
142                    OverrideStatus::LTR => processing_classes[i] = L,
143                    _ => {}
144                }
145            }
146        }
147
148        // Handle multi-byte characters.
149        for j in 1..c.len_utf8() {
150            levels[i + j] = levels[i];
151            processing_classes[i + j] = processing_classes[i];
152        }
153    }
154}
155
156/// Entries in the directional status stack:
157struct Status {
158    level: Level,
159    status: OverrideStatus,
160}
161
162#[derive(PartialEq)]
163enum OverrideStatus {
164    Neutral,
165    RTL,
166    LTR,
167    Isolate,
168}
169
170struct DirectionalStatusStack {
171    vec: Vec<Status>,
172}
173
174impl DirectionalStatusStack {
175    fn new() -> Self {
176        DirectionalStatusStack { vec: Vec::with_capacity(Level::max_explicit_depth() as usize + 2) }
177    }
178
179    fn push(&mut self, level: Level, status: OverrideStatus) {
180        self.vec.push(Status { level, status });
181    }
182
183    fn last(&self) -> &Status {
184        self.vec.last().unwrap()
185    }
186}