utf8parse/
types.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
//! Types supporting the UTF-8 parser
#![allow(non_camel_case_types)]
use core::mem;

/// States the parser can be in.
///
/// There is a state for each initial input of the 3 and 4 byte sequences since
/// the following bytes are subject to different conditions than a tail byte.
#[allow(dead_code)]
#[derive(Debug, Copy, Clone)]
pub enum State {
    /// Ground state; expect anything
    Ground = 0,
    /// 3 tail bytes
    Tail3 = 1,
    /// 2 tail bytes
    Tail2 = 2,
    /// 1 tail byte
    Tail1 = 3,
    /// UTF8-3 starting with E0
    U3_2_e0 = 4,
    /// UTF8-3 starting with ED
    U3_2_ed = 5,
    /// UTF8-4 starting with F0
    Utf8_4_3_f0 = 6,
    /// UTF8-4 starting with F4
    Utf8_4_3_f4 = 7,
}

/// Action to take when receiving a byte
#[allow(dead_code)]
#[derive(Debug, Copy, Clone)]
pub enum Action {
    /// Unexpected byte; sequence is invalid
    InvalidSequence = 0,
    /// Received valid 7-bit ASCII byte which can be directly emitted.
    EmitByte = 1,
    /// Set the bottom continuation byte
    SetByte1 = 2,
    /// Set the 2nd-from-last continuation byte
    SetByte2 = 3,
    /// Set the 2nd-from-last byte which is part of a two byte sequence
    SetByte2Top = 4,
    /// Set the 3rd-from-last continuation byte
    SetByte3 = 5,
    /// Set the 3rd-from-last byte which is part of a three byte sequence
    SetByte3Top = 6,
    /// Set the top byte of a four byte sequence.
    SetByte4 = 7,
}

/// Convert a state and action to a u8
///
/// State will be the bottom 4 bits and action the top 4
#[inline]
#[allow(dead_code)]
pub fn pack(state: State, action: Action) -> u8 {
    ((action as u8) << 4) | (state as u8)
}

/// Convert a u8 to a state and action
///
/// # Unsafety
///
/// If this function is called with a byte that wasn't encoded with the `pack`
/// function in this module, there is no guarantee that a valid state and action
/// can be produced.
#[inline]
pub unsafe fn unpack(val: u8) -> (State, Action) {
    (
        // State is stored in bottom 4 bits
        mem::transmute(val & 0x0f),

        // Action is stored in top 4 bits
        mem::transmute(val >> 4),
    )
}