vte/
lib.rs

1//! Parser for implementing virtual terminal emulators
2//!
3//! [`Parser`] is implemented according to [Paul Williams' ANSI parser
4//! state machine]. The state machine doesn't assign meaning to the parsed data
5//! and is thus not itself sufficient for writing a terminal emulator. Instead,
6//! it is expected that an implementation of [`Perform`] is provided which does
7//! something useful with the parsed data. The [`Parser`] handles the book
8//! keeping, and the [`Perform`] gets to simply handle actions.
9//!
10//! # Examples
11//!
12//! For an example of using the [`Parser`] please see the examples folder. The example included
13//! there simply logs all the actions [`Perform`] does. One quick thing to see it in action is to
14//! pipe `vim` into it
15//!
16//! ```ignore
17//! cargo build --release --example parselog
18//! vim | target/release/examples/parselog
19//! ```
20//!
21//! Just type `:q` to exit.
22//!
23//! # Differences from original state machine description
24//!
25//! * UTF-8 Support for Input
26//! * OSC Strings can be terminated by 0x07
27//! * Only supports 7-bit codes. Some 8-bit codes are still supported, but they
28//!   no longer work in all states.
29//!
30//! [`Parser`]: struct.Parser.html
31//! [`Perform`]: trait.Perform.html
32//! [Paul Williams' ANSI parser state machine]: https://vt100.net/emu/dec_ansi_parser
33#![no_std]
34
35extern crate utf8parse as utf8;
36
37use core::mem;
38
39mod table;
40mod definitions;
41
42use definitions::{Action, State, unpack};
43
44use table::{EXIT_ACTIONS, ENTRY_ACTIONS, STATE_CHANGE};
45
46impl State {
47    /// Get exit action for this state
48    #[inline(always)]
49    pub fn exit_action(&self) -> Action {
50        unsafe {
51            *EXIT_ACTIONS.get_unchecked(*self as usize)
52        }
53    }
54
55    /// Get entry action for this state
56    #[inline(always)]
57    pub fn entry_action(&self) -> Action {
58        unsafe {
59            *ENTRY_ACTIONS.get_unchecked(*self as usize)
60        }
61    }
62}
63
64
65const MAX_INTERMEDIATES: usize = 2;
66const MAX_OSC_RAW: usize = 1024;
67const MAX_PARAMS: usize = 16;
68
69struct VtUtf8Receiver<'a, P: Perform + 'a>(&'a mut P, &'a mut State);
70
71impl<'a, P: Perform> utf8::Receiver for VtUtf8Receiver<'a, P> {
72    fn codepoint(&mut self, c: char) {
73        self.0.print(c);
74        *self.1 = State::Ground;
75    }
76
77    fn invalid_sequence(&mut self) {
78        self.0.print('�');
79        *self.1 = State::Ground;
80    }
81}
82
83/// Parser for raw _VTE_ protocol which delegates actions to a [`Perform`]
84///
85/// [`Perform`]: trait.Perform.html
86pub struct Parser {
87    state: State,
88    intermediates: [u8; MAX_INTERMEDIATES],
89    intermediate_idx: usize,
90    params: [i64; MAX_PARAMS],
91    param: i64,
92    collecting_param: bool,
93    num_params: usize,
94    osc_raw: [u8; MAX_OSC_RAW],
95    osc_params: [(usize, usize); MAX_PARAMS],
96    osc_idx: usize,
97    osc_num_params: usize,
98    ignoring: bool,
99    utf8_parser: utf8::Parser,
100}
101
102impl Parser {
103    /// Create a new Parser
104    pub fn new() -> Parser {
105        Parser {
106            state: State::Ground,
107            intermediates: [0u8; MAX_INTERMEDIATES],
108            intermediate_idx: 0,
109            params: [0i64; MAX_PARAMS],
110            param: 0,
111            collecting_param: false,
112            num_params: 0,
113            osc_raw: [0; MAX_OSC_RAW],
114            osc_params: [(0, 0); MAX_PARAMS],
115            osc_idx: 0,
116            osc_num_params: 0,
117            ignoring: false,
118            utf8_parser: utf8::Parser::new(),
119        }
120    }
121
122    #[inline]
123    fn params(&self) -> &[i64] {
124        &self.params[..self.num_params]
125    }
126
127    #[inline]
128    fn intermediates(&self) -> &[u8] {
129        &self.intermediates[..self.intermediate_idx]
130    }
131
132    /// Advance the parser state
133    ///
134    /// Requires a [`Perform`] in case `byte` triggers an action
135    ///
136    /// [`Perform`]: trait.Perform.html
137    #[inline]
138    pub fn advance<P: Perform>(&mut self, performer: &mut P, byte: u8) {
139        // Utf8 characters are handled out-of-band.
140        if let State::Utf8 = self.state {
141            self.process_utf8(performer, byte);
142            return;
143        }
144
145        // Handle state changes in the anywhere state before evaluating changes
146        // for current state.
147        let mut change = STATE_CHANGE[State::Anywhere as usize][byte as usize];
148
149        if change == 0 {
150            change = STATE_CHANGE[self.state as usize][byte as usize];
151        }
152
153        // Unpack into a state and action
154        let (state, action) = unpack(change);
155
156        self.perform_state_change(performer, state, action, byte);
157    }
158
159    #[inline]
160    fn process_utf8<P>(&mut self, performer: &mut P, byte: u8)
161        where P: Perform
162    {
163        let mut receiver = VtUtf8Receiver(performer, &mut self.state);
164        let utf8_parser = &mut self.utf8_parser;
165        utf8_parser.advance(&mut receiver, byte);
166    }
167
168    #[inline]
169    fn perform_state_change<P>(&mut self, performer: &mut P, state: State, action: Action, byte: u8)
170        where P: Perform
171    {
172        macro_rules! maybe_action {
173            ($action:expr, $arg:expr) => {
174                match $action {
175                    Action::None => (),
176                    action => {
177                        self.perform_action(performer, action, $arg);
178                    },
179                }
180            }
181        }
182
183        match state {
184            State::Anywhere => {
185                // Just run the action
186                self.perform_action(performer, action, byte);
187            },
188            state => {
189                // Exit action for previous state
190                let exit_action = self.state.exit_action();
191                maybe_action!(exit_action, 0);
192
193                // Transition action
194                maybe_action!(action, byte);
195
196                // Entry action for new state
197                maybe_action!(state.entry_action(), 0);
198
199                // Assume the new state
200                self.state = state;
201            }
202        }
203    }
204
205    /// Separate method for osc_dispatch that borrows self as read-only
206    ///
207    /// The aliasing is needed here for multiple slices into self.osc_raw
208    #[inline]
209    fn osc_dispatch<P: Perform>(&self, performer: &mut P) {
210        let mut slices: [&[u8]; MAX_PARAMS] = unsafe { mem::uninitialized() };
211
212        for i in 0..self.osc_num_params {
213            let indices = self.osc_params[i];
214            slices[i] = &self.osc_raw[indices.0..indices.1];
215        }
216
217        performer.osc_dispatch(
218            &slices[..self.osc_num_params],
219        );
220    }
221
222    #[inline]
223    fn perform_action<P: Perform>(&mut self, performer: &mut P, action: Action, byte: u8) {
224        match action {
225            Action::Print => performer.print(byte as char),
226            Action::Execute => performer.execute(byte),
227            Action::Hook => {
228                performer.hook(
229                    self.params(),
230                    self.intermediates(),
231                    self.ignoring,
232                );
233            },
234            Action::Put => performer.put(byte),
235            Action::OscStart => {
236                self.osc_idx = 0;
237                self.osc_num_params = 0;
238            },
239            Action::OscPut => {
240                let idx = self.osc_idx;
241                if idx == self.osc_raw.len() {
242                    return;
243                }
244
245                // Param separator
246                if byte == b';' {
247                    let param_idx = self.osc_num_params;
248                    match param_idx {
249                        // Only process up to MAX_PARAMS
250                        MAX_PARAMS => return,
251
252                        // First param is special - 0 to current byte index
253                        0 => {
254                            self.osc_params[param_idx] = (0, idx);
255                        },
256
257                        // All other params depend on previous indexing
258                        _ => {
259                            let prev = self.osc_params[param_idx - 1];
260                            let begin = prev.1;
261                            self.osc_params[param_idx] = (begin, idx);
262                        }
263                    }
264
265                    self.osc_num_params += 1;
266                } else {
267                    self.osc_raw[idx] = byte;
268                    self.osc_idx += 1;
269                }
270            },
271            Action::OscEnd => {
272                let param_idx = self.osc_num_params;
273                let idx = self.osc_idx;
274
275                match param_idx {
276                    // Finish last parameter if not already maxed
277                    MAX_PARAMS => (),
278
279                    // First param is special - 0 to current byte index
280                    0 => {
281                        self.osc_params[param_idx] = (0, idx);
282                        self.osc_num_params += 1;
283                    },
284
285                    // All other params depend on previous indexing
286                    _ => {
287                        let prev = self.osc_params[param_idx - 1];
288                        let begin = prev.1;
289                        self.osc_params[param_idx] = (begin, idx);
290                        self.osc_num_params += 1;
291                    }
292                }
293                self.osc_dispatch(performer);
294            },
295            Action::Unhook => performer.unhook(),
296            Action::CsiDispatch => {
297                if self.collecting_param {
298                    let idx = self.num_params;
299                    self.params[idx] = self.param;
300                    self.num_params += 1;
301                }
302                performer.csi_dispatch(
303                    self.params(),
304                    self.intermediates(),
305                    self.ignoring,
306                    byte as char
307                );
308
309                self.num_params = 0;
310                self.param = 0;
311                self.collecting_param = false;
312            }
313            Action::EscDispatch => {
314                performer.esc_dispatch(
315                    self.params(),
316                    self.intermediates(),
317                    self.ignoring,
318                    byte
319                );
320            },
321            Action::Ignore | Action::None => (),
322            Action::Collect => {
323                if self.intermediate_idx == MAX_INTERMEDIATES {
324                    self.ignoring = true;
325                } else {
326                    self.intermediates[self.intermediate_idx] = byte;
327                    self.intermediate_idx += 1;
328                }
329            },
330            Action::Param => {
331                if byte == b';' {
332                    // Completed a param
333                    let idx = self.num_params;
334
335                    if idx == MAX_PARAMS - 1 {
336                        return;
337                    }
338
339                    self.params[idx] = self.param;
340                    self.param = 0;
341                    self.num_params += 1;
342                    self.collecting_param = false;
343                } else {
344                    // Continue collecting bytes into param
345                    self.param = self.param.saturating_mul(10);
346                    self.param = self.param.saturating_add((byte - b'0') as i64);
347                    self.collecting_param = true;
348                }
349            },
350            Action::Clear => {
351                self.intermediate_idx = 0;
352                self.num_params = 0;
353                self.ignoring = false;
354            },
355            Action::BeginUtf8 => {
356                self.process_utf8(performer, byte);
357            },
358        }
359    }
360}
361
362/// Performs actions requested by the Parser
363///
364/// Actions in this case mean, for example, handling a CSI escape sequence describing cursor
365/// movement, or simply printing characters to the screen.
366///
367/// The methods on this type correspond to actions described in
368/// http://vt100.net/emu/dec_ansi_parser. I've done my best to describe them in
369/// a useful way in my own words for completeness, but the site should be
370/// referenced if something isn't clear. If the site disappears at some point in
371/// the future, consider checking archive.org.
372pub trait Perform {
373    /// Draw a character to the screen and update states
374    fn print(&mut self, char);
375
376    /// Execute a C0 or C1 control function
377    fn execute(&mut self, byte: u8);
378
379    /// Invoked when a final character arrives in first part of device control string
380    ///
381    /// The control function should be determined from the private marker, final character, and
382    /// execute with a parameter list. A handler should be selected for remaining characters in the
383    /// string; the handler function should subsequently be called by `put` for every character in
384    /// the control string.
385    ///
386    /// The `ignore` flag indicates that more than two intermediates arrived and
387    /// subsequent characters were ignored.
388    fn hook(&mut self, params: &[i64], intermediates: &[u8], ignore: bool);
389
390    /// Pass bytes as part of a device control string to the handle chosen in `hook`. C0 controls
391    /// will also be passed to the handler.
392    fn put(&mut self, byte: u8);
393
394    /// Called when a device control string is terminated
395    ///
396    /// The previously selected handler should be notified that the DCS has
397    /// terminated.
398    fn unhook(&mut self);
399
400    /// Dispatch an operating system command
401    fn osc_dispatch(&mut self, params: &[&[u8]]);
402
403    /// A final character has arrived for a CSI sequence
404    ///
405    /// The `ignore` flag indicates that more than two intermediates arrived and
406    /// subsequent characters were ignored.
407    fn csi_dispatch(&mut self, params: &[i64], intermediates: &[u8], ignore: bool, char);
408
409    /// The final character of an escape sequence has arrived.
410    ///
411    /// The `ignore` flag indicates that more than two intermediates arrived and
412    /// subsequent characters were ignored.
413    fn esc_dispatch(&mut self, params: &[i64], intermediates: &[u8], ignore: bool, byte: u8);
414}
415
416#[cfg(test)]
417#[macro_use]
418extern crate std;
419
420#[cfg(test)]
421mod tests {
422    use std::vec::Vec;
423    use super::{Parser, Perform};
424    use core::i64;
425
426    static OSC_BYTES: &'static [u8] = &[0x1b, 0x5d, // Begin OSC
427        b'2', b';', b'j', b'w', b'i', b'l', b'm', b'@', b'j', b'w', b'i', b'l',
428        b'm', b'-', b'd', b'e', b's', b'k', b':', b' ', b'~', b'/', b'c', b'o',
429        b'd', b'e', b'/', b'a', b'l', b'a', b'c', b'r', b'i', b't', b't', b'y',
430        0x07 // End OSC
431    ];
432
433    #[derive(Default)]
434    struct OscDispatcher {
435        dispatched_osc: bool,
436        params: Vec<Vec<u8>>,
437    }
438
439    // All empty bodies except osc_dispatch
440    impl Perform for OscDispatcher {
441        fn print(&mut self, _: char) {}
442        fn execute(&mut self, _byte: u8) {}
443        fn hook(&mut self, _params: &[i64], _intermediates: &[u8], _ignore: bool) {}
444        fn put(&mut self, _byte: u8) {}
445        fn unhook(&mut self) {}
446        fn osc_dispatch(&mut self, params: &[&[u8]]) {
447            // Set a flag so we know these assertions all run
448            self.dispatched_osc = true;
449            self.params = params.iter().map(|p| p.to_vec()).collect();
450        }
451        fn csi_dispatch(&mut self, _params: &[i64], _intermediates: &[u8], _ignore: bool, _c: char) {}
452        fn esc_dispatch(&mut self, _params: &[i64], _intermediates: &[u8], _ignore: bool, _byte: u8) {}
453    }
454
455    #[derive(Default)]
456    struct CsiDispatcher {
457        dispatched_csi: bool,
458        params: Vec<Vec<i64>>,
459    }
460
461    impl Perform for CsiDispatcher {
462        fn print(&mut self, _: char) {}
463        fn execute(&mut self, _byte: u8) {}
464        fn hook(&mut self, _params: &[i64], _intermediates: &[u8], _ignore: bool) {}
465        fn put(&mut self, _byte: u8) {}
466        fn unhook(&mut self) {}
467        fn osc_dispatch(&mut self, _params: &[&[u8]]) { }
468        fn csi_dispatch(&mut self, params: &[i64], _intermediates: &[u8], _ignore: bool, _c: char) {
469            self.dispatched_csi = true;
470            self.params.push(params.to_vec());
471        }
472        fn esc_dispatch(&mut self, _params: &[i64], _intermediates: &[u8], _ignore: bool, _byte: u8) {}
473    }
474
475
476    #[test]
477    fn parse_osc() {
478        // Create dispatcher and check state
479        let mut dispatcher = OscDispatcher::default();
480        assert_eq!(dispatcher.dispatched_osc, false);
481
482        // Run parser using OSC_BYTES
483        let mut parser = Parser::new();
484        for byte in OSC_BYTES {
485            parser.advance(&mut dispatcher, *byte);
486        }
487
488        // Check that flag is set and thus osc_dispatch assertions ran.
489        assert!(dispatcher.dispatched_osc);
490        assert_eq!(dispatcher.params.len(), 2);
491        assert_eq!(dispatcher.params[0], &OSC_BYTES[2..3]);
492        assert_eq!(dispatcher.params[1], &OSC_BYTES[4..(OSC_BYTES.len() - 1)]);
493    }
494
495    #[test]
496    fn parse_empty_osc() {
497        // Create dispatcher and check state
498        let mut dispatcher = OscDispatcher::default();
499        assert_eq!(dispatcher.dispatched_osc, false);
500
501        // Run parser using OSC_BYTES
502        let mut parser = Parser::new();
503        for byte in &[0x1b, 0x5d, 0x07] {
504            parser.advance(&mut dispatcher, *byte);
505        }
506
507        // Check that flag is set and thus osc_dispatch assertions ran.
508        assert!(dispatcher.dispatched_osc);
509    }
510
511    #[test]
512    fn parse_osc_max_params() {
513        use MAX_PARAMS;
514
515        static INPUT: &'static [u8] = b"\x1b];;;;;;;;;;;;;;;;;\x1b";
516
517        // Create dispatcher and check state
518        let mut dispatcher = OscDispatcher::default();
519        assert_eq!(dispatcher.dispatched_osc, false);
520
521        // Run parser using OSC_BYTES
522        let mut parser = Parser::new();
523        for byte in INPUT {
524            parser.advance(&mut dispatcher, *byte);
525        }
526
527        // Check that flag is set and thus osc_dispatch assertions ran.
528        assert!(dispatcher.dispatched_osc);
529        assert_eq!(dispatcher.params.len(), MAX_PARAMS);
530        for param in dispatcher.params.iter() {
531            assert_eq!(param.len(), 0);
532        }
533
534    }
535
536    #[test]
537    fn parse_csi_max_params() {
538        use MAX_PARAMS;
539
540        static INPUT: &'static [u8] = b"\x1b[1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;p";
541
542        // Create dispatcher and check state
543        let mut dispatcher = CsiDispatcher::default();
544        assert!(!dispatcher.dispatched_csi);
545
546
547        // Run parser using OSC_BYTES
548        let mut parser = Parser::new();
549        for byte in INPUT {
550            parser.advance(&mut dispatcher, *byte);
551        }
552
553        // Check that flag is set and thus csi_dispatch assertions ran.
554        assert!(dispatcher.dispatched_csi);
555        assert_eq!(dispatcher.params.len(), 1);
556        assert_eq!(dispatcher.params[0].len(), MAX_PARAMS);
557
558    }
559
560    #[test]
561    fn parse_semi_set_underline() {
562
563        // Create dispatcher and check state
564        let mut dispatcher = CsiDispatcher::default();
565
566        // Run parser using OSC_BYTES
567        let mut parser = Parser::new();
568        for byte in b"\x1b[;4m" {
569            parser.advance(&mut dispatcher, *byte);
570        }
571
572        // Check that flag is set and thus osc_dispatch assertions ran.
573        assert_eq!(dispatcher.params[0], &[0, 4]);
574    }
575
576    #[test]
577    fn parse_long_csi_param() {
578        // The important part is the parameter, which is (i64::MAX + 1)
579        static INPUT: &'static [u8] = b"\x1b[9223372036854775808m";
580
581        let mut dispatcher = CsiDispatcher::default();
582
583        let mut parser = Parser::new();
584        for byte in INPUT {
585            parser.advance(&mut dispatcher, *byte);
586        }
587
588        assert_eq!(dispatcher.params[0], &[i64::MAX as i64]);
589
590    }
591
592    #[test]
593    fn parse_osc_with_utf8_arguments() {
594        static INPUT: &'static [u8] = &[
595            0x0d, 0x1b, 0x5d, 0x32, 0x3b, 0x65, 0x63, 0x68, 0x6f, 0x20, 0x27,
596            0xc2, 0xaf, 0x5c, 0x5f, 0x28, 0xe3, 0x83, 0x84, 0x29, 0x5f, 0x2f,
597            0xc2, 0xaf, 0x27, 0x20, 0x26, 0x26, 0x20, 0x73, 0x6c, 0x65, 0x65,
598            0x70, 0x20, 0x31, 0x07
599        ];
600
601        // Create dispatcher and check state
602        let mut dispatcher = OscDispatcher { params: vec![], dispatched_osc: false };
603
604        // Run parser using OSC_BYTES
605        let mut parser = Parser::new();
606        for byte in INPUT {
607            parser.advance(&mut dispatcher, *byte);
608        }
609
610        // Check that flag is set and thus osc_dispatch assertions ran.
611        assert_eq!(dispatcher.params[0], &[b'2']);
612        assert_eq!(dispatcher.params[1], &INPUT[5..(INPUT.len() - 1)]);
613    }
614}